Source code for hed.tools.remodeling.operations.summarize_hed_tags_op

""" Summarize the HED tags in collection of tabular files.  """

from hed.models.tabular_input import TabularInput
from hed.tools.analysis.hed_tag_counts import HedTagCounts
from hed.tools.analysis.event_manager import EventManager
from hed.tools.analysis.hed_tag_manager import HedTagManager
from hed.tools.remodeling.operations.base_op import BaseOp
from hed.tools.remodeling.operations.base_summary import BaseSummary


[docs]class SummarizeHedTagsOp(BaseOp): """ Summarize the HED tags in collection of tabular files. Required remodeling parameters: - **summary_name** (*str*): The name of the summary. - **summary_filename** (*str*): Base filename of the summary. - **tags** (*dict*): Specifies how to organize the tag output. Optional remodeling parameters: - **expand_context** (*bool*): If True, include counts from expanded context (not supported). The purpose of this op is to produce a summary of the occurrences of hed tags organized in a specified manner. The """ PARAMS = { "operation": "summarize_hed_tags", "required_parameters": { "summary_name": str, "summary_filename": str, "tags": dict }, "optional_parameters": { "append_timecode": bool, "include_context": bool, "replace_defs": bool, "remove_types": list } } SUMMARY_TYPE = "hed_tag_summary"
[docs] def __init__(self, parameters): """ Constructor for the summarize_hed_tags operation. Parameters: parameters (dict): Dictionary with the parameter values for required and optional parameters. :raises KeyError: - If a required parameter is missing. - If an unexpected parameter is provided. :raises TypeError: - If a parameter has the wrong type. """ super().__init__(self.PARAMS, parameters) self.summary_name = parameters['summary_name'] self.summary_filename = parameters['summary_filename'] self.tags = parameters['tags'] self.append_timecode = parameters.get('append_timecode', False) self.include_context = parameters.get('include_context', True) self.replace_defs = parameters.get("replace_defs", True) self.remove_types = parameters.get("remove_types", ["Condition-variable", "Task"])
[docs] def do_op(self, dispatcher, df, name, sidecar=None): """ Summarize the HED tags present in the dataset. Parameters: dispatcher (Dispatcher): Manages the operation I/O. df (DataFrame): The DataFrame to be remodeled. name (str): Unique identifier for the dataframe -- often the original file path. sidecar (Sidecar or file-like): Only needed for HED operations. Returns: DataFrame: A copy of df. Side effect: Updates the context. """ df_new = df.copy() summary = dispatcher.summary_dicts.get(self.summary_name, None) if not summary: summary = HedTagSummary(self) dispatcher.summary_dicts[self.summary_name] = summary x = {'df': dispatcher.post_proc_data(df_new), 'name': name, 'schema': dispatcher.hed_schema, 'sidecar': sidecar} summary.update_summary({'df': dispatcher.post_proc_data(df_new), 'name': name, 'schema': dispatcher.hed_schema, 'sidecar': sidecar}) return df_new
[docs]class HedTagSummary(BaseSummary):
[docs] def __init__(self, sum_op): super().__init__(sum_op) self.sum_op = sum_op
[docs] def update_summary(self, new_info): """ Update the summary for a given tabular input file. Parameters: new_info (dict): A dictionary with the parameters needed to update a summary. Notes: - The summary needs a "name" str, a "schema", a "df, and a "Sidecar". """ counts = HedTagCounts(new_info['name'], total_events=len(new_info['df'])) input_data = TabularInput(new_info['df'], sidecar=new_info['sidecar'], name=new_info['name']) tag_man = HedTagManager(EventManager(input_data, new_info['schema']), remove_types=self.sum_op.remove_types) hed_objs = tag_man.get_hed_objs(include_context=self.sum_op.include_context, replace_defs=self.sum_op.replace_defs) for hed in hed_objs: counts.update_event_counts(hed, new_info['name']) self.summary_dict[new_info["name"]] = counts
[docs] def get_details_dict(self, tag_counts): """ Return the summary-specific information in a dictionary. Parameters: tag_counts (HedTagCounts): Contains the counts of tags in the dataset. Returns: dict: dictionary with the summary results. """ template, unmatched = tag_counts.organize_tags(self.sum_op.tags) details = {} for key, key_list in self.sum_op.tags.items(): details[key] = self._get_details(key_list, template, verbose=True) leftovers = [value.get_info(verbose=True) for value in unmatched] return {"Name": tag_counts.name, "Total events": tag_counts.total_events, "Total files": len(tag_counts.files.keys()), "Files": [name for name in tag_counts.files.keys()], "Specifics": {"Main tags": details, "Other tags": leftovers}}
def _get_result_string(self, name, result, indent=BaseSummary.DISPLAY_INDENT): """ Return a formatted string with the summary for the indicated name. Parameters: name (str): Identifier (usually the filename) of the individual file. result (dict): The dictionary of the summary results indexed by name. indent (str): A string containing spaces used for indentation (usually 3 spaces). Returns: str - The results in a printable format ready to be saved to a text file. Notes: This calls _get_dataset_string to get the overall summary string and _get_individual_string to get an individual summary string. """ if name == 'Dataset': return self._get_dataset_string(result, indent=indent) return self._get_individual_string(result, indent=indent)
[docs] def merge_all_info(self): """ Create a HedTagCounts containing the overall dataset HED tag summary. Returns: HedTagCounts - the overall dataset summary object for HED tag counts. """ all_counts = HedTagCounts('Dataset') for key, counts in self.summary_dict.items(): all_counts.merge_tag_dicts(counts.tag_dict) for file_name in counts.files.keys(): all_counts.files[file_name] = "" all_counts.total_events = all_counts.total_events + counts.total_events return all_counts
@staticmethod def _get_dataset_string(result, indent=BaseSummary.DISPLAY_INDENT): """ Return a string with the overall summary for all the tabular files. Parameters: result (dict): Dictionary of merged summary information. indent (str): String of blanks used as the amount to indent for readability. Returns: str: Formatted string suitable for saving in a file or printing. """ sum_list = [f"Dataset: Total events={result.get('Total events', 0)} " f"Total files={len(result.get('Files', 0))}"] sum_list = sum_list + HedTagSummary._get_tag_list(result, indent=indent) return "\n".join(sum_list) @staticmethod def _get_individual_string(result, indent=BaseSummary.DISPLAY_INDENT): """ Return a string with the summary for an individual tabular file. Parameters: result (dict): Dictionary of summary information for a particular tabular file. indent (str): String of blanks used as the amount to indent for readability. Returns: str: Formatted string suitable for saving in a file or printing. """ sum_list = [f"Total events={result.get('Total events', 0)}"] sum_list = sum_list + HedTagSummary._get_tag_list(result, indent=indent) return "\n".join(sum_list) @staticmethod def _tag_details(tags): tag_list = [] for tag in tags: tag_list.append(f"{tag['tag']}[{tag['events']},{len(tag['files'])}]") return tag_list @staticmethod def _get_tag_list(result, indent=BaseSummary.DISPLAY_INDENT): tag_info = result["Specifics"] sum_list = [f"\n{indent}Main tags[events,files]:"] for category, tags in tag_info['Main tags'].items(): sum_list.append(f"{indent}{indent}{category}:") if tags: sum_list.append(f"{indent}{indent}{indent}{' '.join(HedTagSummary._tag_details(tags))}") if tag_info['Other tags']: sum_list.append(f"{indent}Other tags[events,files]:") sum_list.append(f"{indent}{indent}{' '.join(HedTagSummary._tag_details(tag_info['Other tags']))}") return sum_list @staticmethod def _get_details(key_list, template, verbose=False): key_details = [] for item in key_list: for tag_cnt in template[item.lower()]: key_details.append(tag_cnt.get_info(verbose=verbose)) return key_details