Source code for hed.tools.remodeling.operations.base_summary

""" Abstract base class for the contents of summary operations. """

import os
from abc import ABC, abstractmethod
import json
from hed.tools.util.io_util import get_timestamp


[docs]class BaseSummary(ABC): """ Abstract base class for summary contents. Should not be instantiated. Parameters: sum_op (BaseOp): Operation corresponding to this summary. """ DISPLAY_INDENT = " " INDIVIDUAL_SUMMARIES_PATH = 'individual_summaries'
[docs] def __init__(self, sum_op): self.op = sum_op self.summary_dict = {}
[docs] def get_summary_details(self, include_individual=True): """ Return a dictionary with the details for individual files and the overall dataset. Parameters: include_individual (bool): If True, summaries for individual files are included. Returns: dict - a dictionary with 'Dataset' and 'Individual files' keys. Notes: - The 'Dataset' value is either a string or a dictionary with the overall summary. - The 'Individual files' value is dictionary whose keys are file names and values are their corresponding summaries. Users are expected to provide merge_all_info and get_details_dict to support this. """ merged_counts = self.merge_all_info() if merged_counts: details = self.get_details_dict(merged_counts) else: details = "Overall summary unavailable" summary_details = {"Dataset": details, "Individual files": {}} if include_individual: for name, count in self.summary_dict.items(): summary_details["Individual files"][name] = self.get_details_dict(count) return summary_details
[docs] def get_summary(self, individual_summaries="separate"): """ Return a summary dictionary with the information. Parameters: individual_summaries (str): "separate", "consolidated", or "none" Returns: dict - dictionary with "Dataset" and "Individual files" keys. Notes: The individual_summaries value is processed as follows - "separate" individual summaries are to be in separate files - "consolidated" means that the individual summaries are in same file as overall summary - "none" means that only the overall summary is produced. """ include_individual = individual_summaries == "separate" or individual_summaries == "consolidated" summary_details = self.get_summary_details(include_individual=include_individual) dataset_summary = {"Summary name": self.op.summary_name, "Summary type": self.op.SUMMARY_TYPE, "Summary filename": self.op.summary_filename, "Overall summary": summary_details['Dataset']} summary = {"Dataset": dataset_summary, "Individual files": {}} if summary_details["Individual files"]: summary["Individual files"] = self.get_individual(summary_details["Individual files"], separately=individual_summaries == "separate") return summary
[docs] def get_individual(self, summary_details, separately=True): individual_dict = {} for name, name_summary in summary_details.items(): if separately: individual_dict[name] = {"Summary name": self.op.summary_name, "summary type": self.op.SUMMARY_TYPE, "Summary filename": self.op.summary_filename, "File summary": name_summary} else: individual_dict[name] = name_summary return individual_dict
[docs] def get_text_summary_details(self, include_individual=True): result = self.get_summary_details(include_individual=include_individual) summary_details = {"Dataset": self._get_result_string("Dataset", result.get("Dataset", "")), "Individual files": {}} if include_individual: for name, individual_result in result.get("Individual files", {}).items(): summary_details["Individual files"][name] = self._get_result_string(name, individual_result) return summary_details
[docs] def get_text_summary(self, individual_summaries="separate"): include_individual = individual_summaries == "separate" or individual_summaries == "consolidated" summary_details = self.get_text_summary_details(include_individual=include_individual) summary = {"Dataset": f"Summary name: {self.op.summary_name}\n" + f"Summary type: {self.op.SUMMARY_TYPE}\n" + f"Summary filename: {self.op.summary_filename}\n\n" + f"Overall summary:\n{summary_details['Dataset']}"} if individual_summaries == "separate": summary["Individual files"] = {} for name, name_summary in summary_details["Individual files"].items(): summary["Individual files"][name] = f"Summary name: {self.op.summary_name}\n" + \ f"Summary type: {self.op.SUMMARY_TYPE}\n" + \ f"Summary filename: {self.op.summary_filename}\n\n" + \ f"Summary for {name}:\n{name_summary}" elif include_individual: ind_list = [] for name, name_summary in summary_details["Individual files"].items(): ind_list.append(f"{name}:\n{name_summary}\n") ind_str = "\n\n".join(ind_list) summary['Dataset'] = summary["Dataset"] + f"\n\nIndividual files:\n\n{ind_str}" return summary
[docs] def save(self, save_dir, file_formats=['.txt'], individual_summaries="separate", task_name=""): for file_format in file_formats: if file_format == '.txt': summary = self.get_text_summary(individual_summaries=individual_summaries) elif file_format == '.json': summary = self.get_summary(individual_summaries=individual_summaries) else: continue self._save_summary_files(save_dir, file_format, summary, individual_summaries, task_name=task_name)
def _save_summary_files(self, save_dir, file_format, summary, individual_summaries, task_name=''): """ Save the files in the appropriate format. Parameters: save_dir (str): Path to the directory in which the summaries will be saved. file_format (str): string representing the extension (including .), '.txt' or '.json'. summary (dictionary): Dictionary of summaries (has "Dataset" and "Individual files" keys). individual_summaries (str): "consolidated", "individual", or "none". task_name (str): Name of task to be included in file name if multiple tasks. """ if self.op.append_timecode: time_stamp = '_' + get_timestamp() else: time_stamp = '' if task_name: task_name = "_" + task_name this_save = os.path.join(save_dir, self.op.summary_name + '/') os.makedirs(os.path.realpath(this_save), exist_ok=True) filename = os.path.realpath(os.path.join(this_save, self.op.summary_filename + task_name + time_stamp + file_format)) individual = summary.get("Individual files", {}) if individual_summaries == "none" or not individual: self.dump_summary(filename, summary["Dataset"]) return if individual_summaries == "consolidated": self.dump_summary(filename, summary) return self.dump_summary(filename, summary["Dataset"]) individual_dir = os.path.join(this_save, self.INDIVIDUAL_SUMMARIES_PATH + '/') os.makedirs(os.path.realpath(individual_dir), exist_ok=True) for name, sum_str in individual.items(): filename = self._get_summary_filepath(individual_dir, name, task_name, time_stamp, file_format) self.dump_summary(filename, sum_str) def _get_summary_filepath(self, individual_dir, name, task_name, time_stamp, file_format): """ Return the filepath for the summary including the timestamp Parameters: individual_dir (str): path of the directory in which the summary should be stored. name (str): Path of the original file from which the summary was extracted. task_name (str): Task name if separate summaries for different tasks or the empty string if not separated. time_stamp (str): Formatted date-time string to be included in the filename of the summary. Returns: str: Full path name of the summary. """ this_name = os.path.basename(name) this_name = os.path.splitext(this_name)[0] count = 1 match = True filename = None while match: filename = f"{self.op.summary_filename}_{this_name}{task_name}_{count}{time_stamp}{file_format}" filename = os.path.realpath(os.path.join(individual_dir, filename)) if not os.path.isfile(filename): break count = count + 1 return filename def _get_result_string(self, name, result, indent=DISPLAY_INDENT): """ Return a formatted string with the summary for the indicated name. Parameters: name (str): Identifier (usually the filename) of the individual file. result (dict): The dictionary of the summary results indexed by name. indent (str): A string containing spaces used for indentation (usually 3 spaces). Returns: str - The results in a printable format ready to be saved to a text file. Notes: This file should be overridden by each summary. """ return f"\n{name}\n{indent}{str(result)}"
[docs] @staticmethod def dump_summary(filename, summary): with open(filename, 'w') as text_file: if not isinstance(summary, str): summary = json.dumps(summary, indent=4) text_file.write(summary)
[docs] @abstractmethod def get_details_dict(self, summary_info): """ Return the summary-specific information. Parameters: summary_info (object): Summary to return info from Returns: dict: dictionary with the results. Notes: Abstract method be implemented by each individual summary. Notes: The expected return value is a dictionary of the form: {"Name": "", "Total events": 0, "Total files": 0, "Files": [], "Specifics": {}}" """ raise NotImplementedError
[docs] @abstractmethod def merge_all_info(self): """ Return merged information. Returns: object: Consolidated summary of information. Notes: Abstract method be implemented by each individual summary. """ raise NotImplementedError
[docs] @abstractmethod def update_summary(self, summary_dict): """ Method to update summary for a given tabular input. Parameters: summary_dict (dict) A summary specific dictionary with the update information. """ raise NotImplementedError