Source code for hed.tools.remodeling.operations.base_summary

""" Abstract base class for the contents of summary operations. """

import os
from abc import ABC, abstractmethod
import json
from hed.tools.util.io_util import get_timestamp


[docs]class BaseSummary(ABC):
    """ Abstract base class for summary contents. Should not be instantiated.

    Parameters:
        sum_op (BaseOp):  Operation corresponding to this summary.

    """

    DISPLAY_INDENT = "   "
    INDIVIDUAL_SUMMARIES_PATH = 'individual_summaries'

[docs]    def __init__(self, sum_op):
        self.op = sum_op
        self.summary_dict = {}

[docs]    def get_summary_details(self, include_individual=True):
        """ Return a dictionary with the details for individual files and the overall dataset.

        Parameters:
            include_individual (bool):  If True, summaries for individual files are included.

        Returns:
            dict - a dictionary with 'Dataset' and 'Individual files' keys.

        Notes:
            - The 'Dataset' value is either a string or a dictionary with the overall summary.
            - The 'Individual files' value is dictionary whose keys are file names and values are
                   their corresponding summaries.

        Users are expected to provide merge_all_info and get_details_dict to support this.

        """
        merged_counts = self.merge_all_info()
        if merged_counts:
            details = self.get_details_dict(merged_counts)
        else:
            details = "Overall summary unavailable"

        summary_details = {"Dataset": details, "Individual files": {}}
        if include_individual:
            for name, count in self.summary_dict.items():
                summary_details["Individual files"][name] = self.get_details_dict(count)
        return summary_details

[docs]    def get_summary(self, individual_summaries="separate"):
        """ Return a summary dictionary with the information.

        Parameters:
            individual_summaries (str): "separate", "consolidated", or "none"

        Returns:
            dict - dictionary with "Dataset" and "Individual files" keys.

        Notes: The individual_summaries value is processed as follows
           -  "separate" individual summaries are to be in separate files
           -  "consolidated" means that the individual summaries are in same file as overall summary
           -  "none" means that only the overall summary is produced.

        """
        include_individual = individual_summaries == "separate" or individual_summaries == "consolidated"
        summary_details = self.get_summary_details(include_individual=include_individual)
        dataset_summary = {"Summary name": self.op.summary_name, "Summary type": self.op.SUMMARY_TYPE,
                           "Summary filename": self.op.summary_filename, "Overall summary": summary_details['Dataset']}
        summary = {"Dataset": dataset_summary, "Individual files": {}}
        if summary_details["Individual files"]:
            summary["Individual files"] = self.get_individual(summary_details["Individual files"],
                                                              separately=individual_summaries == "separate")
        return summary

[docs]    def get_individual(self, summary_details, separately=True):
        individual_dict = {}
        for name, name_summary in summary_details.items():
            if separately:
                individual_dict[name] = {"Summary name": self.op.summary_name, "summary type": self.op.SUMMARY_TYPE,
                                         "Summary filename": self.op.summary_filename, "File summary": name_summary}
            else:
                individual_dict[name] = name_summary
        return individual_dict

[docs]    def get_text_summary_details(self, include_individual=True):
        result = self.get_summary_details(include_individual=include_individual)
        summary_details = {"Dataset": self._get_result_string("Dataset", result.get("Dataset", "")),
                           "Individual files": {}}
        if include_individual:
            for name, individual_result in result.get("Individual files", {}).items():
                summary_details["Individual files"][name] = self._get_result_string(name, individual_result)
        return summary_details

[docs]    def get_text_summary(self, individual_summaries="separate"):
        include_individual = individual_summaries == "separate" or individual_summaries == "consolidated"
        summary_details = self.get_text_summary_details(include_individual=include_individual)
        summary = {"Dataset": f"Summary name: {self.op.summary_name}\n" +
                   f"Summary type: {self.op.SUMMARY_TYPE}\n" +
                   f"Summary filename: {self.op.summary_filename}\n\n" +
                   f"Overall summary:\n{summary_details['Dataset']}"}
        if individual_summaries == "separate":
            summary["Individual files"] = {}
            for name, name_summary in summary_details["Individual files"].items():
                summary["Individual files"][name] = f"Summary name: {self.op.summary_name}\n" + \
                                                    f"Summary type: {self.op.SUMMARY_TYPE}\n" + \
                                                    f"Summary filename: {self.op.summary_filename}\n\n" + \
                                                    f"Summary for {name}:\n{name_summary}"
        elif include_individual:
            ind_list = []
            for name, name_summary in summary_details["Individual files"].items():
                ind_list.append(f"{name}:\n{name_summary}\n")
            ind_str = "\n\n".join(ind_list)
            summary['Dataset'] = summary["Dataset"] + f"\n\nIndividual files:\n\n{ind_str}"

        return summary

[docs]    def save(self, save_dir, file_formats=['.txt'], individual_summaries="separate", task_name=""):

        for file_format in file_formats:
            if file_format == '.txt':
                summary = self.get_text_summary(individual_summaries=individual_summaries)
            elif file_format == '.json':
                summary = self.get_summary(individual_summaries=individual_summaries)
            else:
                continue
            self._save_summary_files(save_dir, file_format, summary, individual_summaries, task_name=task_name)

    def _save_summary_files(self, save_dir, file_format, summary, individual_summaries, task_name=''):
        """ Save the files in the appropriate format.

        Parameters:
            save_dir (str): Path to the directory in which the summaries will be saved.
            file_format (str): string representing the extension (including .), '.txt' or '.json'.
            summary (dictionary): Dictionary of summaries (has "Dataset" and "Individual files" keys).
            individual_summaries (str): "consolidated", "individual", or "none".
            task_name (str): Name of task to be included in file name if multiple tasks.

        """
        if self.op.append_timecode:
            time_stamp = '_' + get_timestamp()
        else:
            time_stamp = ''
        if task_name:
            task_name = "_" + task_name
        this_save = os.path.join(save_dir, self.op.summary_name + '/')
        os.makedirs(os.path.realpath(this_save), exist_ok=True)
        filename = os.path.realpath(os.path.join(this_save,
                                                 self.op.summary_filename + task_name + time_stamp + file_format))
        individual = summary.get("Individual files", {})
        if individual_summaries == "none" or not individual:
            self.dump_summary(filename, summary["Dataset"])
            return
        if individual_summaries == "consolidated":
            self.dump_summary(filename, summary)
            return
        self.dump_summary(filename, summary["Dataset"])
        individual_dir = os.path.join(this_save, self.INDIVIDUAL_SUMMARIES_PATH + '/')
        os.makedirs(os.path.realpath(individual_dir), exist_ok=True)
        for name, sum_str in individual.items():
            filename = self._get_summary_filepath(individual_dir, name, task_name, time_stamp, file_format)
            self.dump_summary(filename, sum_str)

    def _get_summary_filepath(self, individual_dir, name, task_name, time_stamp, file_format):
        """ Return the filepath for the summary including the timestamp

        Parameters:
            individual_dir (str):  path of the directory in which the summary should be stored.
            name (str): Path of the original file from which the summary was extracted.
            task_name (str): Task name if separate summaries for different tasks or the empty string if not separated.
            time_stamp (str):  Formatted date-time string to be included in the filename of the summary.

        Returns:
            str: Full path name of the summary.

        """
        this_name = os.path.basename(name)
        this_name = os.path.splitext(this_name)[0]
        count = 1
        match = True
        filename = None
        while match:
            filename = f"{self.op.summary_filename}_{this_name}{task_name}_{count}{time_stamp}{file_format}"
            filename = os.path.realpath(os.path.join(individual_dir, filename))
            if not os.path.isfile(filename):
                break
            count = count + 1
        return filename

    def _get_result_string(self, name, result, indent=DISPLAY_INDENT):
        """ Return a formatted string with the summary for the indicated name.

        Parameters:
            name (str):  Identifier (usually the filename) of the individual file.
            result (dict): The dictionary of the summary results indexed by name.
            indent (str): A string containing spaces used for indentation (usually 3 spaces).

        Returns:
            str - The results in a printable format ready to be saved to a text file.

        Notes:
            This file should be overridden by each summary.

        """
        return f"\n{name}\n{indent}{str(result)}"

[docs]    @staticmethod
    def dump_summary(filename, summary):
        with open(filename, 'w') as text_file:
            if not isinstance(summary, str):
                summary = json.dumps(summary, indent=4)
            text_file.write(summary)

[docs]    @abstractmethod
    def get_details_dict(self, summary_info):
        """ Return the summary-specific information.

        Parameters:
            summary_info (object):  Summary to return info from

        Returns:
            dict: dictionary with the results.

        Notes:
            Abstract method be implemented by each individual summary.

        Notes:
            The expected return value is a dictionary of the form:

               {"Name": "", "Total events": 0, "Total files": 0, "Files": [], "Specifics": {}}"

        """
        raise NotImplementedError

[docs]    @abstractmethod
    def merge_all_info(self):
        """ Return merged information.

        Returns:
           object:  Consolidated summary of information.

        Notes:
            Abstract method be implemented by each individual summary.

        """
        raise NotImplementedError

[docs]    @abstractmethod
    def update_summary(self, summary_dict):
        """ Method to update summary for a given tabular input.

        Parameters:
            summary_dict (dict)  A summary specific dictionary with the update information.

        """
        raise NotImplementedError