Source code for hed.tools.remodeling.operations.number_groups_op

""" Implementation in progress. """

from hed.tools.remodeling.operations.base_op import BaseOp


# TODO: This class is under development


[docs]class NumberGroupsOp(BaseOp): """ Implementation in progress. """ PARAMS = { "operation": "number_groups", "required_parameters": { "number_column_name": str, "source_column": str, "start": dict, "stop": dict }, "optional_parameters": {"overwrite": bool} }
[docs] def __init__(self, parameters): super().__init__(self.PARAMS, parameters) self.number_column_name = parameters['number_column_name'] self.source_column = parameters['source_column'] self.start = parameters['start'] self.stop = parameters['stop'] self.start_stop_test = {"values": list, "inclusion": str} self.inclusion_test = ["include", "exclude"] required = set(self.start_stop_test.keys()) for param_to_test in [self.start, self.stop]: required_missing = required.difference(set(param_to_test.keys())) if required_missing: raise KeyError("MissingRequiredParameters", f"Specified {param_to_test} for number_rows requires parameters" f"{list(required_missing)}") for param_name, param_value in param_to_test.items(): param_type = str if param_name in required: param_type = self.start_stop_test[param_name] else: raise KeyError("BadParameter", f"{param_name} not a required or optional parameter for {self.operation}") # TODO: This has a syntax error # if not isinstance(param_value, param_type): # raise TypeError("BadType" f"{param_value} has type {type(param_value)} not {param_type}") if (param_name == 'inclusion') & (param_value not in self.inclusion_test): raise ValueError("BadValue" f" {param_name} must be one of {self.inclusion_test} not {param_value}") self.overwrite = parameters.get('overwrite', False)
[docs] def do_op(self, dispatcher, df, name, sidecar=None): """ Add numbers to groups of events in dataframe. Parameters: dispatcher (Dispatcher): Manages the operation I/O. df (DataFrame): The DataFrame to be remodeled. name (str): Unique identifier for the dataframe -- often the original file path. sidecar (Sidecar or file-like): Only needed for HED operations. Returns: Dataframe - a new dataframe after processing. """ # check if number_column_name exists and if so, check overwrite setting if self.number_column_name in df.columns: if self.overwrite is False: raise ValueError("ExistingNumberColumn", f"Column {self.number_column_name} already exists in event file.", "") # check if source_column exists if self.source_column not in df.columns: raise ValueError("MissingSourceColumn", f"Column {self.source_column} does not exist in event file {name}.", "") # check if all elements in value lists start and stop exist in the source_column missing = [] for element in self.start['values']: if element not in df[self.source_column].tolist(): missing.append(element) if len(missing) > 0: raise ValueError("MissingValue", f"Start value(s) {missing} does not exist in {self.source_column} of event file {name}") missing = [] for element in self.stop['values']: if element not in df[self.source_column].tolist(): missing.append(element) if len(missing) > 0: raise ValueError("MissingValue", f"Start value(s) {missing} does not exist in {self.source_column} of event file {name}") df_new = df.copy() # # create number column # df_new[self.number_column_name] = np.nan # # # find group indices # indices = tuple_to_range( # get_indices(df, self.source_column, self.start['values'], self.stop['values']), # [self.start['inclusion'], self.stop['inclusion']]) # for i, group in enumerate(indices): # df_new.loc[group, self.number_column_name] = i + 1 return df_new