""" Create a JSON sidecar from column values in a collection of tabular files. """
import json
from hed.tools import TabularSummary
from hed.tools.remodeling.operations.base_op import BaseOp
from hed.tools.remodeling.operations.base_summary import BaseSummary
[docs]class SummarizeSidecarFromEventsOp(BaseOp):
""" Create a JSON sidecar from column values in a collection of tabular files.
Required remodeling parameters:
- **summary_name** (*str*): The name of the summary.
- **summary_filename** (*str*): Base filename of the summary.
- **skip_columns** (*list*): Names of columns to skip in the summary.
- **value_columns** (*list*): Names of columns to treat as value columns rather than categorical columns.
The purpose is to produce a JSON sidecar template for annotating a dataset with HED tags.
"""
PARAMS = {
"operation": "summarize_sidecar_from_events",
"required_parameters": {
"summary_name": str,
"summary_filename": str,
"skip_columns": list,
"value_columns": list,
},
"optional_parameters": {
"append_timecode": bool
}
}
SUMMARY_TYPE = "events_to_sidecar"
[docs] def __init__(self, parameters):
""" Constructor for summarize sidecar from events operation.
Parameters:
parameters (dict): Dictionary with the parameter values for required and optional parameters.
:raises KeyError:
- If a required parameter is missing.
- If an unexpected parameter is provided.
:raises TypeError:
- If a parameter has the wrong type.
"""
super().__init__(self.PARAMS, parameters)
self.summary_name = parameters['summary_name']
self.summary_filename = parameters['summary_filename']
self.skip_columns = parameters['skip_columns']
self.value_columns = parameters['value_columns']
self.append_timecode = parameters.get('append_timecode', False)
[docs] def do_op(self, dispatcher, df, name, sidecar=None):
""" Extract a sidecar from events file.
Parameters:
dispatcher (Dispatcher): The dispatcher object for managing the operations.
df (DataFrame): The tabular file to be remodeled.
name (str): Unique identifier for the dataframe -- often the original file path.
sidecar (Sidecar or file-like): Not needed for this operation.
Returns:
DataFrame: A copy of df.
Side effect:
Updates the associated summary if applicable.
"""
df_new = df.copy()
summary = dispatcher.summary_dicts.get(self.summary_name, None)
if not summary:
summary = EventsToSidecarSummary(self)
dispatcher.summary_dicts[self.summary_name] = summary
summary.update_summary({'df': dispatcher.post_proc_data(df_new), 'name': name})
return df_new
[docs]class EventsToSidecarSummary(BaseSummary):
[docs] def __init__(self, sum_op):
super().__init__(sum_op)
self.value_cols = sum_op.value_columns
self.skip_cols = sum_op.skip_columns
[docs] def update_summary(self, new_info):
""" Update the summary for a given tabular input file.
Parameters:
new_info (dict): A dictionary with the parameters needed to update a summary.
Notes:
- The summary needs a "name" str and a "df".
"""
tab_sum = TabularSummary(value_cols=self.value_cols, skip_cols=self.skip_cols, name=new_info["name"])
tab_sum.update(new_info['df'], new_info['name'])
self.summary_dict[new_info["name"]] = tab_sum
[docs] def get_details_dict(self, summary_info):
""" Return the summary-specific information.
Parameters:
summary_info (TabularSummary): Summary to return info from
Notes:
Abstract method be implemented by each individual context summary.
"""
return {"Name": summary_info.name, "Total events": summary_info.total_events,
"Total files": summary_info.total_files,
"Files": list(summary_info.files.keys()),
"Specifics": {"Categorical info": summary_info.categorical_info,
"Value info": summary_info.value_info,
"Skip columns": summary_info.skip_cols,
"Sidecar": summary_info.extract_sidecar_template()}}
[docs] def merge_all_info(self):
""" Merge summary information from all the files.
Returns:
TabularSummary: Consolidated summary of information.
"""
all_sum = TabularSummary(name='Dataset')
for key, tab_sum in self.summary_dict.items():
all_sum.update_summary(tab_sum)
return all_sum
def _get_result_string(self, name, result, indent=BaseSummary.DISPLAY_INDENT):
""" Return a formatted string with the summary for the indicated name.
Parameters:
name (str): Identifier (usually the filename) of the individual file.
result (dict): The dictionary of the summary results indexed by name.
indent (str): A string containing spaces used for indentation (usually 3 spaces).
Returns:
str - The results in a printable format ready to be saved to a text file.
Notes:
This calls _get_dataset_string to get the overall summary string and
_get_individual_string to get an individual summary string.
"""
if name == "Dataset":
return self._get_dataset_string(result, indent=indent)
return self._get_individual_string(result, indent=indent)
@staticmethod
def _get_dataset_string(result, indent=BaseSummary.DISPLAY_INDENT):
""" Return a string with the overall summary for all the tabular files.
Parameters:
result (dict): Dictionary of merged summary information.
indent (str): String of blanks used as the amount to indent for readability.
Returns:
str: Formatted string suitable for saving in a file or printing.
"""
specifics = result.get("Specifics", {})
sum_list = [f"Dataset: Total events={result.get('Total events', 0)} "
f"Total files={result.get('Total files', 0)}",
f"Skip columns: {str(specifics.get('Skip columns', []))}",
f"Value columns: {str(specifics.get('Value info', {}).keys())}",
f"Sidecar:\n{json.dumps(specifics.get('Sidecar', {}), indent=indent)}"]
return "\n".join(sum_list)
@staticmethod
def _get_individual_string(result, indent=BaseSummary.DISPLAY_INDENT):
""" Return a string with the summary for an individual tabular file.
Parameters:
result (dict): Dictionary of summary information for a particular tabular file.
indent (str): String of blanks used as the amount to indent for readability.
Returns:
str: Formatted string suitable for saving in a file or printing.
"""
specifics = result.get("Specifics", {})
sum_list = [f"Total events={result.get('Total events', 0)}",
f"Skip columns: {str(specifics.get('Slip columns', []))}",
f"Value columns: {str(specifics.get('Value info', {}).keys())}",
f"Sidecar:\n{json.dumps(specifics['Sidecar'], indent=indent)}"]
return "\n".join(sum_list)