Source code for hed.tools.bids.bids_dataset

""" The contents of a BIDS dataset. """

import os
from hed.schema.hed_schema import HedSchema
from hed.schema.hed_schema_group import HedSchemaGroup
from hed.tools.bids.bids_file_group import BidsFileGroup
from hed.tools.bids import bids_util
from hed.tools.util import io_util


[docs]class BidsDataset:
    """ A BIDS dataset representation primarily focused on HED evaluation.

    Attributes:
        root_path (str):  Real root path of the BIDS dataset.
        schema (HedSchema or HedSchemaGroup):  The schema used for evaluation.
        file_groups (dict):  A dictionary of BidsFileGroup objects with a given file suffix.

    """

[docs]    def __init__(self, root_path, schema=None, suffixes=['events', 'participants'],
                 exclude_dirs=['sourcedata', 'derivatives', 'code', 'stimuli']):
        """ Constructor for a BIDS dataset.

        Parameters:
            root_path (str):  Root path of the BIDS dataset.
            schema (HedSchema or HedSchemaGroup):  A schema that overrides the one specified in dataset.
            suffixes (list or None): File name suffixes of items to include.
                If None or empty, then ['_events', 'participants'] is assumed.
            exclude_dirs=['sourcedata', 'derivatives', 'code', 'phenotype']:

        """
        self.root_path = os.path.realpath(root_path)
        if schema:
            self.schema = schema
        else:
            self.schema = bids_util.get_schema_from_description(self.root_path)

        self.exclude_dirs = exclude_dirs
        self.suffixes = suffixes
        self.file_groups = self._set_file_groups()
        self.bad_files = []

[docs]    def get_file_group(self, suffix):
        """ Return the file group of files with the specified suffix.

        Parameters:
            suffix (str):  Suffix of the BidsFileGroup to be returned.

        Returns:
            BidsFileGroup or None:  The requested tabular group.
        """
        return self.file_groups.get(suffix, None)

[docs]    def validate(self, check_for_warnings=False, schema=None):
        """ Validate the dataset.

        Parameters:
            check_for_warnings (bool):  If True, check for warnings.
            schema (HedSchema or HedSchemaGroup or None):  The schema used for validation.

        Returns:
            list:  List of issues encountered during validation. Each issue is a dictionary.

        """
        issues = []
        if schema:
            this_schema = schema
        elif self.schema:
            this_schema = self.schema
        else:
            return [{"code": "SCHEMA_LOAD_FAILED",
                     "message": "BIDS dataset_description.json has invalid HEDVersion and passed schema was invalid}"}]
        for suffix, group in self.file_groups.items():
            if group.has_hed:
                issues += group.validate(this_schema, check_for_warnings=check_for_warnings)
        return issues

[docs]    def get_summary(self):
        """ Return an abbreviated summary of the dataset. """
        summary = {"dataset": self.root_path,
                   "hed_schema_versions": self.schema.get_schema_versions(),
                   "file_group_types": f"{str(list(self.file_groups.keys()))}"}
        return summary

    def _set_file_groups(self):
        file_paths = io_util.get_file_list(self.root_path, extensions=['.tsv', '.json'],
                                           exclude_dirs=self.exclude_dirs, name_suffix=self.suffixes)
        file_dict = bids_util.group_by_suffix(file_paths)

        file_groups = {}
        for suffix, files in file_dict.items():
            file_group = BidsFileGroup.create_file_group(self.root_path, files, suffix)
            if file_group:
                file_groups[suffix] = file_group

        self.suffixes = list(file_groups.keys())
        return file_groups