Source code for hed.tools.remodeling.operations.summarize_definitions_op

""" Summarize the type_defs in the dataset. """

from hed import TabularInput
from hed.tools.remodeling.operations.base_op import BaseOp
from hed.tools.remodeling.operations.base_summary import BaseSummary
from hed.models.def_expand_gather import DefExpandGatherer


[docs]class SummarizeDefinitionsOp(BaseOp): """ Summarize the type_defs in the dataset. Required remodeling parameters: - **summary_name** (*str*): The name of the summary. - **summary_filename** (*str*): Base filename of the summary. The purpose is to produce a summary of the values in a tabular file. """ PARAMS = { "operation": "summarize_definitions", "required_parameters": { "summary_name": str, "summary_filename": str }, "optional_parameters": { "append_timecode": bool } } SUMMARY_TYPE = 'type_defs'
[docs] def __init__(self, parameters): """ Constructor for the summarize column values operation. Parameters: parameters (dict): Dictionary with the parameter values for required and optional parameters. :raises KeyError: - If a required parameter is missing. - If an unexpected parameter is provided. :raises TypeError: - If a parameter has the wrong type. """ super().__init__(self.PARAMS, parameters) self.summary_name = parameters['summary_name'] self.summary_filename = parameters['summary_filename'] self.append_timecode = parameters.get('append_timecode', False)
[docs] def do_op(self, dispatcher, df, name, sidecar=None): """ Create summaries of type_defs Parameters: dispatcher (Dispatcher): Manages the operation I/O. df (DataFrame): The DataFrame to be remodeled. name (str): Unique identifier for the dataframe -- often the original file path. sidecar (Sidecar or file-like): Only needed for HED operations. Returns: DataFrame: a copy of df Side-effect: Updates the relevant summary. """ df_new = df.copy() summary = dispatcher.summary_dicts.setdefault(self.summary_name, DefinitionSummary(self, dispatcher.hed_schema)) summary.update_summary({'df': dispatcher.post_proc_data(df_new), 'name': name, 'sidecar': sidecar, 'schema': dispatcher.hed_schema}) return df_new
[docs]class DefinitionSummary(BaseSummary):
[docs] def __init__(self, sum_op, hed_schema, known_defs=None): super().__init__(sum_op) self.def_gatherer = DefExpandGatherer(hed_schema, known_defs=known_defs)
[docs] def update_summary(self, new_info): """ Update the summary for a given tabular input file. Parameters: new_info (dict): A dictionary with the parameters needed to update a summary. Notes: - The summary needs a "name" str, a "schema" and a "Sidecar". """ data_input = TabularInput(new_info['df'], sidecar=new_info['sidecar'], name=new_info['name']) series, def_dict = data_input.series_a, data_input.get_def_dict(new_info['schema']) self.def_gatherer.process_def_expands(series, def_dict)
@staticmethod def _build_summary_dict(items_dict, title, process_func, display_description=False): summary_dict = {} items = {} for key, value in items_dict.items(): if process_func: value = process_func(value) if "#" in str(value): key = key + "/#" if display_description: description, value = DefinitionSummary._remove_description(value) items[key] = {"description": description, "contents": str(value)} elif isinstance(value, list): items[key] = [str(x) for x in value] else: items[key] = str(value) summary_dict[title] = items return summary_dict
[docs] def get_details_dict(self, def_gatherer): """ Return the summary-specific information in a dictionary. Parameters: def_gatherer (DefExpandGatherer): Contains the resolved dictionaries. Returns: dict: dictionary with the summary results. """ known_defs_summary = self._build_summary_dict(def_gatherer.def_dict, "Known Definitions", None, display_description=True) ambiguous_defs_summary = self._build_summary_dict(def_gatherer.ambiguous_defs, "Ambiguous Definitions", def_gatherer.get_ambiguous_group) errors_summary = self._build_summary_dict(def_gatherer.errors, "Errors", None) known_defs_summary.update(ambiguous_defs_summary) known_defs_summary.update(errors_summary) return {"Name": "", "Total events": 0, "Total files": 0, "Files": [], "Specifics": known_defs_summary}
# return known_defs_summary
[docs] def merge_all_info(self): """ Create an Object containing the definition summary. Returns: Object - the overall summary object for type_defs. """ return self.def_gatherer
def _get_result_string(self, name, result, indent=BaseSummary.DISPLAY_INDENT): """ Return a formatted string with the summary for the indicated name. Parameters: name (str): Identifier (usually the filename) of the individual file. result (dict): The dictionary of the summary results indexed by name. indent (str): A string containing spaces used for indentation (usually 3 spaces). Returns: str - The results in a printable format ready to be saved to a text file. Notes: This calls _get_dataset_string to get the overall summary string and _get_individual_string to get an individual summary string. """ if name == "Dataset": return self._get_dataset_string(result, indent=indent) return self._get_individual_string(result, indent=indent) @staticmethod def _nested_dict_to_string(data, indent, level=1): result = [] for key, value in data.items(): if isinstance(value, dict): result.append(f"{indent * level}{key}: {len(value)} items") result.append(DefinitionSummary._nested_dict_to_string(value, indent, level + 1)) elif isinstance(value, list): result.append(f"{indent * level}{key}:") for item in value: result.append(f"{indent * (level + 1)}{item}") else: result.append(f"{indent * level}{key}: {value}") return "\n".join(result) @staticmethod def _get_dataset_string(summary_dict, indent=BaseSummary.DISPLAY_INDENT): return DefinitionSummary._nested_dict_to_string(summary_dict, indent) @staticmethod def _remove_description(def_entry): def_group = def_entry.contents.copy() description = "" desc_tag = def_group.find_tags({"description"}, include_groups=False) if desc_tag: def_group.remove(desc_tag) desc_tag = desc_tag[0] description = desc_tag.extension return description, def_group @staticmethod def _get_individual_string(result, indent=BaseSummary.DISPLAY_INDENT): """ Return a string with the summary for an individual tabular file. Parameters: result (dict): Dictionary of summary information for a particular tabular file. indent (str): String of blanks used as the amount to indent for readability. Returns: str: Formatted string suitable for saving in a file or printing. """ return ""