""" A group of BIDS files with specified suffix name. """
import os
from hed.errors.error_reporter import ErrorHandler
from hed.validator.sidecar_validator import SidecarValidator
from hed.tools.analysis.tabular_summary import TabularSummary
from hed.tools.bids.bids_tabular_file import BidsTabularFile
from hed.tools.bids.bids_sidecar_file import BidsSidecarFile
from hed.tools.util import io_util
[docs]class BidsFileGroup:
""" Container for BIDS files with a specified suffix.
Attributes:
root_path (str): Real root path of the Bids dataset.
suffix (str): The file suffix specifying the class of file represented in this group (e.g., events).
obj_type (str): Type of file in this group (e.g., Tabular or Timeseries).
sidecar_dict (dict): A dictionary of sidecars associated with this suffix .
datafile_dict (dict): A dictionary with values either BidsTabularFile or BidsTimeseriesFile.
sidecar_dir_dict (dict): Dictionary whose keys are directory paths and values are list of sidecars in the
corresponding directory.
"""
[docs] def __init__(self, root_path, suffix="_events", obj_type="tabular",
exclude_dirs=['sourcedata', 'derivatives', 'code', 'stimuli']):
""" Constructor for a BidsFileGroup.
Parameters:
root_path (str): Path of the root of the BIDS dataset.
suffix (str): Suffix indicating the type this group represents (e.g. events, or channels, etc.).
obj_type (str): Indicates the type of underlying file represents the contents.
exclude_dirs (list): Directories to exclude.
"""
self.root_path = os.path.realpath(root_path)
self.suffix = suffix
self.obj_type = obj_type
self.exclude_dirs = exclude_dirs
self.sidecar_dict = self._make_sidecar_dict()
self.sidecar_dir_dict = self._make_sidecar_dir_dict()
for bids_obj in self.sidecar_dict.values():
x = self.get_sidecars_from_path(bids_obj)
bids_obj.set_contents(content_info=x)
self.datafile_dict = self._make_datafile_dict()
for bids_obj in self.datafile_dict.values():
sidecar_list = self.get_sidecars_from_path(bids_obj)
if sidecar_list:
bids_obj.sidecar = self.sidecar_dict[sidecar_list[-1]]
[docs] def get_sidecars_from_path(self, obj):
""" Return applicable sidecars for the object.
Parameters:
obj (BidsTabularFile or BidsSidecarFile): The BIDS file object to get the sidecars for.
Returns:
list: A list of the paths for applicable sidecars for obj starting at the root.
"""
path_components = [self.root_path] + io_util.get_path_components(self.root_path, obj.file_path)
sidecar_list = []
current_path = ''
for comp in path_components:
current_path = os.path.realpath(os.path.join(current_path, comp))
next_sidecar = self._get_sidecar_for_obj(obj, current_path)
if next_sidecar:
sidecar_list.append(next_sidecar.file_path)
return sidecar_list
def _get_sidecar_for_obj(self, obj, current_path):
""" Return a single BidsSidecarFile relevant to obj from the sidecars in the current path.
Parameters:
obj (BidsFile): A file whose sidecars are to be found.
current_path (str): The path of the directory whose sidecars are to be checked.
Returns:
BidsSidecarFile or None: The BidsSidecarFile in current_path relevant to obj, if any.
"""
sidecars = self.sidecar_dir_dict.get(current_path, None)
if not sidecars:
return None
for sidecar in sidecars:
if sidecar.is_sidecar_for(obj):
return sidecar
return None
[docs] def summarize(self, value_cols=None, skip_cols=None):
""" Return a BidsTabularSummary of group files.
Parameters:
value_cols (list): Column names designated as value columns.
skip_cols (list): Column names designated as columns to skip.
Returns:
TabularSummary or None: A summary of the number of values in different columns if tabular group.
Notes:
- The columns that are not value_cols or skip_col are summarized by counting
the number of times each unique value appears in that column.
"""
if self.obj_type != 'tabular':
return None
info = TabularSummary(value_cols=value_cols, skip_cols=skip_cols)
info.update(list(self.datafile_dict.keys()))
return info
[docs] def validate_sidecars(self, hed_schema, extra_def_dicts=None, check_for_warnings=True):
""" Validate merged sidecars.
Parameters:
hed_schema (HedSchema): HED schema for validation.
extra_def_dicts (DefinitionDict): Extra definitions.
check_for_warnings (bool): If True, include warnings in the check.
Returns:
list: A list of validation issues found. Each issue is a dictionary.
"""
error_handler = ErrorHandler(check_for_warnings)
issues = []
validator = SidecarValidator(hed_schema)
for sidecar in self.sidecar_dict.values():
name = os.path.basename(sidecar.file_path)
issues += validator.validate(sidecar.contents, extra_def_dicts=extra_def_dicts, name=name,
error_handler=error_handler)
return issues
[docs] def validate_datafiles(self, hed_schema, extra_def_dicts=None, check_for_warnings=True, keep_contents=False):
""" Validate the datafiles and return an error list.
Parameters:
hed_schema (HedSchema): Schema to apply to the validation.
extra_def_dicts (DefinitionDict): Extra definitions that come from outside.
check_for_warnings (bool): If True, include warnings in the check.
keep_contents (bool): If True, the underlying data files are read and their contents retained.
Returns:
list: A list of validation issues found. Each issue is a dictionary.
"""
error_handler = ErrorHandler(check_for_warnings)
issues = []
for data_obj in self.datafile_dict.values():
data_obj.set_contents(overwrite=False)
name = os.path.basename(data_obj.file_path)
issues += data_obj.contents.validate(hed_schema, extra_def_dicts=extra_def_dicts, name=name,
error_handler=error_handler)
if not keep_contents:
data_obj.clear_contents()
return issues
def _make_datafile_dict(self):
""" Get a dictionary of objects corresponding to the underlying obj_type with underlying contents unset.
Returns:
dict: A dictionary of BidsTabularFile or BidsTimeseriesFile objects keyed by real path.
"""
files = io_util.get_file_list(self.root_path, name_suffix=self.suffix, extensions=['.tsv'],
exclude_dirs=self.exclude_dirs)
file_dict = {}
if self.obj_type == "tabular":
for file in files:
file_dict[os.path.realpath(file)] = BidsTabularFile(file)
else:
return None
return file_dict
def _make_sidecar_dict(self):
""" Create a dictionary of BidsSidecarFile objects for the specified entity type.
Returns:
dict: a dictionary of BidsSidecarFile objects keyed by real path for the specified suffix type.
Notes:
- This function creates the sidecars, but does not set their contents.
"""
files = io_util.get_file_list(self.root_path, name_suffix=self.suffix,
extensions=['.json'], exclude_dirs=self.exclude_dirs)
file_dict = {}
for file in files:
file_dict[os.path.realpath(file)] = BidsSidecarFile(os.path.realpath(file))
return file_dict
def _make_sidecar_dir_dict(self):
""" Create a dictionary with real paths of directories as keys and a list of sidecar file paths as values.
Returns:
dict: A dictionary of lists of sidecar BidsSidecarFiles
"""
dir_dict = io_util.get_dir_dictionary(self.root_path, name_suffix=self.suffix, extensions=['.json'],
exclude_dirs=self.exclude_dirs)
sidecar_dir_dict = {}
for this_dir, dir_list in dir_dict.items():
new_dir_list = []
for s_file in dir_list:
new_dir_list.append(self.sidecar_dict[os.path.realpath(s_file)])
sidecar_dir_dict[os.path.realpath(this_dir)] = new_dir_list
return sidecar_dir_dict