""" Create tabular file factors from type variables. """
import pandas as pd
import numpy as np
from hed.tools.remodeling.operations.base_op import BaseOp
from hed.models.tabular_input import TabularInput
from hed.tools.analysis.event_manager import EventManager
from hed.tools.analysis.hed_type_manager import HedTypeManager
# TODO: restricted factor values are not implemented yet.
[docs]class FactorHedTypeOp(BaseOp):
""" Create tabular file factors from type variables and append to tabular data.
Required remodeling parameters:
- **type_tag** (*str*): HED tag used to find the factors (most commonly `condition-variable`).
- **type_values** (*list*): Factor values to include. If empty all values of that type_tag are used.
"""
PARAMS = {
"operation": "factor_hed_type",
"required_parameters": {
"type_tag": str,
"type_values": list
},
"optional_parameters": {}
}
[docs] def __init__(self, parameters):
""" Constructor for the factor HED type operation.
Parameters:
parameters (dict): Actual values of the parameters for the operation.
:raises KeyError:
- If a required parameter is missing.
- If an unexpected parameter is provided.
:raises TypeError:
- If a parameter has the wrong type.
:raises ValueError:
- If the specification is missing a valid operation.
"""
super().__init__(self.PARAMS, parameters)
self.type_tag = parameters["type_tag"]
self.type_values = parameters["type_values"]
[docs] def do_op(self, dispatcher, df, name, sidecar=None):
""" Factor columns based on HED type and append to tabular data.
Parameters:
dispatcher (Dispatcher): Manages the operation I/O.
df (DataFrame): The DataFrame to be remodeled.
name (str): Unique identifier for the dataframe -- often the original file path.
sidecar (Sidecar or file-like): Only needed for HED operations.
Returns:
DataFrame: A new DataFame with that includes the factors.
Notes:
- If column_name is not a column in df, df is just returned.
"""
input_data = TabularInput(df, sidecar=sidecar, name=name)
df_list = [input_data.dataframe.copy()]
var_manager = HedTypeManager(EventManager(input_data, dispatcher.hed_schema))
var_manager.add_type(self.type_tag.lower())
df_factors = var_manager.get_factor_vectors(self.type_tag, self.type_values, factor_encoding="one-hot")
if len(df_factors.columns) > 0:
df_list.append(df_factors)
df_new = pd.concat(df_list, axis=1)
df_new.replace('n/a', np.NaN, inplace=True)
return df_new