from functools import partial
import pandas as pd
from hed.models.sidecar import Sidecar
from hed.models.tabular_input import TabularInput
from hed.models.hed_string import HedString
from hed.models.definition_dict import DefinitionDict
[docs]def get_assembled(tabular_file, sidecar, hed_schema, extra_def_dicts=None, join_columns=True,
shrink_defs=False, expand_defs=True):
"""Load a tabular file and its associated HED sidecar file.
Args:
tabular_file: str or TabularInput
The path to the tabular file, or a TabularInput object representing it.
sidecar: str or Sidecar
The path to the sidecar file, or a Sidecar object representing it.
hed_schema: HedSchema
If str, will attempt to load as a version if it doesn't have a valid extension.
extra_def_dicts: list of DefinitionDict, optional
Any extra DefinitionDict objects to use when parsing the HED tags.
join_columns: bool
If true, join all HED columns into one.
shrink_defs: bool
Shrink any def-expand tags found
expand_defs: bool
Expand any def tags found
Returns:
tuple:
hed_strings(list of HedStrings):A list of HedStrings or a list of lists of HedStrings
def_dict(DefinitionDict): The definitions from this Sidecar
"""
if isinstance(sidecar, str):
sidecar = Sidecar(sidecar)
if isinstance(tabular_file, str):
tabular_file = TabularInput(tabular_file, sidecar)
def_dict = None
if sidecar:
def_dict = sidecar.get_def_dict(hed_schema=hed_schema, extra_def_dicts=extra_def_dicts)
if join_columns:
if expand_defs:
return [HedString(x, hed_schema, def_dict).expand_defs() for x in tabular_file.series_a], def_dict
elif shrink_defs:
return [HedString(x, hed_schema, def_dict).shrink_defs() for x in tabular_file.series_a], def_dict
else:
return [HedString(x, hed_schema, def_dict) for x in tabular_file.series_a], def_dict
else:
return [[HedString(x, hed_schema, def_dict).expand_defs() if expand_defs
else HedString(x, hed_schema, def_dict).shrink_defs() if shrink_defs
else HedString(x, hed_schema, def_dict)
for x in text_file_row] for text_file_row in tabular_file.dataframe_a.itertuples(index=False)], \
def_dict
[docs]def shrink_defs(df, hed_schema, columns=None):
""" Shrink (in place) any def-expand tags found in the specified columns in the dataframe.
Parameters:
df (pd.Dataframe or pd.Series): The dataframe or series to modify
hed_schema (HedSchema or None): The schema to use to identify defs.
columns (list or None): The columns to modify on the dataframe.
"""
if isinstance(df, pd.Series):
mask = df.str.contains('Def-expand/', case=False)
df[mask] = df[mask].apply(partial(_shrink_defs, hed_schema=hed_schema))
else:
if columns is None:
columns = df.columns
for column in columns:
mask = df[column].str.contains('Def-expand/', case=False)
df[column][mask] = df[column][mask].apply(partial(_shrink_defs, hed_schema=hed_schema))
[docs]def expand_defs(df, hed_schema, def_dict, columns=None):
""" Expands any def tags found in the dataframe.
Converts in place
Parameters:
df (pd.Dataframe or pd.Series): The dataframe or series to modify
hed_schema (HedSchema or None): The schema to use to identify defs
def_dict (DefinitionDict): The definitions to expand
columns (list or None): The columns to modify on the dataframe
"""
if isinstance(df, pd.Series):
mask = df.str.contains('Def/', case=False)
df[mask] = df[mask].apply(partial(_expand_defs, hed_schema=hed_schema, def_dict=def_dict))
else:
if columns is None:
columns = df.columns
for column in columns:
mask = df[column].str.contains('Def/', case=False)
df.loc[mask, column] = df.loc[mask, column].apply(partial(_expand_defs, hed_schema=hed_schema, def_dict=def_dict))
def _convert_to_form(hed_string, hed_schema, tag_form):
return str(HedString(hed_string, hed_schema).get_as_form(tag_form))
def _shrink_defs(hed_string, hed_schema):
return str(HedString(hed_string, hed_schema).shrink_defs())
def _expand_defs(hed_string, hed_schema, def_dict):
return str(HedString(hed_string, hed_schema, def_dict).expand_defs())
[docs]def process_def_expands(hed_strings, hed_schema, known_defs=None, ambiguous_defs=None):
""" Gather def-expand tags in the strings/compare with known definitions to find any differences
Parameters:
hed_strings (list or pd.Series): A list of HED strings to process.
hed_schema (HedSchema): The schema to use
known_defs (DefinitionDict or list or str or None):
A DefinitionDict or anything its constructor takes. These are the known definitions going in, that must
match perfectly.
ambiguous_defs (dict): A dictionary containing ambiguous definitions
format TBD. Currently def name key: list of lists of HED tags values
Returns:
tuple: A tuple containing the DefinitionDict, ambiguous definitions, and errors.
"""
from hed.models.def_expand_gather import DefExpandGatherer
def_gatherer = DefExpandGatherer(hed_schema, known_defs, ambiguous_defs)
return def_gatherer.process_def_expands(hed_strings)