Source code for hed.tools.remodeling.operations.reorder_columns_op

""" Reorder columns in a tabular file. """
from hed.tools.remodeling.operations.base_op import BaseOp


[docs]class ReorderColumnsOp(BaseOp): """ Reorder columns in a tabular file. Required parameters: - column_order (*list*): The names of the columns to be reordered. - ignore_missing (*bool*): If false and a column in column_order is not in df, skip the column - keep_others (*bool*): If true, columns not in column_order are placed at end. """ PARAMS = { "operation": "reorder_columns", "required_parameters": { "column_order": list, "ignore_missing": bool, "keep_others": bool }, "optional_parameters": {} }
[docs] def __init__(self, parameters): """ Constructor for reorder columns operation. Parameters: parameters (dict): Dictionary with the parameter values for required and optional parameters. :raises KeyError: - If a required parameter is missing. - If an unexpected parameter is provided. :raises TypeError: - If a parameter has the wrong type. """ super().__init__(self.PARAMS, parameters) self.column_order = parameters['column_order'] self.ignore_missing = parameters['ignore_missing'] self.keep_others = parameters['keep_others']
[docs] def do_op(self, dispatcher, df, name, sidecar=None): """ Reorder columns as specified in event dictionary. Parameters: dispatcher (Dispatcher): Manages the operation I/O. df (DataFrame): The DataFrame to be remodeled. name (str): Unique identifier for the dataframe -- often the original file path. sidecar (Sidecar or file-like): Not needed for this operation. Returns: Dataframe: A new dataframe after processing. :raises ValueError: - When ignore_missing is false and column_order has columns not in the data. """ df_new = df.copy() current_columns = list(df_new.columns) missing_columns = set(self.column_order).difference(set(df_new.columns)) ordered = self.column_order if missing_columns and not self.ignore_missing: raise ValueError("MissingReorderedColumns", f"{str(missing_columns)} are not in dataframe columns " f" [{str(df_new.columns)}] and not ignored.") elif missing_columns: ordered = [elem for elem in self.column_order if elem not in list(missing_columns)] if self.keep_others: ordered += [elem for elem in current_columns if elem not in ordered] df_new = df_new.loc[:, ordered] return df_new