""" A dictionary of BIDS files keyed to entity-value pairs. """
from hed.errors.exceptions import HedFileError
from hed.tools.bids.bids_file import BidsFile
from hed.tools.analysis.file_dictionary import FileDictionary
[docs]class BidsFileDictionary(FileDictionary):
""" A dictionary of BidsFile keyed by entity pairs.
The keys are simplified entity key-value pairs and the values are BidsFile objects.
"""
[docs] def __init__(self, collection_name, files, entities=('sub', 'ses', 'task', 'run')):
""" Create the dictionary keyed to entities.
Parameters:
collection_name (str): Name of this collection.
files (list or dict): Full paths of files to include.
entities (tuple): Entity names to use in creating the keys.
:raises HedFileError:
- If files has inappropriate values.
Notes:
- This function is used for cross listing BIDS style files for different studies.
Examples:
If entities is ('sub', 'ses', 'task', 'run'), a typical key might be sub-001_ses-01_task-memory_run-01.
"""
super().__init__(collection_name, None, None, separator='_')
self.entities = entities
self._file_dict = self.make_dict(files, entities)
@property
def key_list(self):
""" The dictionary keys. """
return list(self._file_dict.keys())
@property
def file_dict(self):
""" Dictionary of keys and paths. """
return {key: file.file_path for key, file in self._file_dict.items()}
@property
def file_list(self):
""" Paths of the files in the list. """
return [file.file_path for file in self._file_dict.values()]
[docs] def get_file_path(self, key):
""" Return the file path corresponding to key.
Parameters:
key (str): The key to use to look up the file in this dictionary.
Returns:
str: The real path of the file being looked up.
Notes:
- None is returned if the key is not present.
"""
if key in self._file_dict.keys():
return self._file_dict[key].file_path
return None
[docs] def iter_files(self):
""" Iterator over the files in this dictionary.
Yields:
tuple:
- str: The next entity-based key.
- BidsFile: The next BidsFile.
"""
for key, file in self._file_dict.items():
yield key, file
[docs] def key_diffs(self, other_dict):
""" Return the symmetric key difference with another file dictionary.
Parameters:
other_dict (FileDictionary) A file dictionary object.
Returns:
list: The symmetric difference of the keys in this dictionary and the other one.
"""
diffs = set(self._file_dict.keys()).symmetric_difference(set(other_dict._file_dict.keys()))
return list(diffs)
[docs] def get_new_dict(self, name, files):
""" Create a dictionary with these files.
Parameters:
name (str): Name of this dictionary.
files (list or dict): List or dictionary of files. These could be paths or objects.
Returns:
BidsFileDictionary: The newly created dictionary.
Notes:
- The new dictionary uses the same type of entities for keys as this dictionary.
"""
return BidsFileDictionary(name, files, entities=self.entities)
[docs] def make_dict(self, files, entities):
""" Make a dictionary from files or a dict.
Parameters:
files (list or dict): List or dictionary of file-like objs to use.
entities (tuple): Tuple of entity names to use as keys, e.g. ('sub', 'run').
Returns:
dict: A dictionary whose keys are entity keys and values are BidsFile objects.
:raises HedFileError:
- If incorrect format is passed or something not recognizable as a Bids file.
"""
file_dict = {}
if isinstance(files, dict):
files = files.values()
elif not isinstance(files, list):
raise HedFileError("BadArgument", "make_bids_file_dict expects a list or dict", [])
for the_file in files:
the_file = self._correct_file(the_file)
key = the_file.get_key(entities)
if key in file_dict:
raise HedFileError("NonUniqueFileKeys",
f"dictionary key {key} is associated with {the_file} and {file_dict[key]}", "")
file_dict[key] = the_file
return file_dict
[docs] def make_query(self, query_dict={'sub': '*'}):
""" Return a dictionary of files matching query.
Parameters:
query_dict (dict): A dictionary whose keys are entities and whose values are entity values to match.
Returns:
dict: A dictionary entries in this dictionary that match the query.
Notes:
- A query dictionary key a valid BIDS entity name such as sub or task.
- A query dictionary value may be a string or a list.
- A query value string should contain a specific value of the entity or a '*' indicating any value matches.
- A query value list should be a list of valid values for the corresponding entity.
"""
response_dict = {}
for key, file in self._file_dict.items():
if self.match_query(query_dict, file.entity_dict):
response_dict[key] = file
return response_dict
[docs] def split_by_entity(self, entity):
""" Split this dictionary based on an entity.
Parameters:
entity (str): Entity name (for example task).
Returns:
tuple:
- dict: A dictionary unique values of entity as keys and BidsFileDictionary objs as values.
- dict: A BidsFileDictionary containing the files that don't have entity in their names.
Notes:
- This function is used for analysis where a single subject or single type of task is being analyzed.
"""
split_dict, leftovers = self._split_dict_by_entity(self._file_dict, entity)
for entity_value, entity_dict in split_dict.items():
split_dict[entity_value] = self.get_new_dict(f"{self.name}_{entity_value}", entity_dict)
if leftovers:
leftover_dict = self.get_new_dict(self.name + "_left_overs", leftovers)
else:
leftover_dict = None
return split_dict, leftover_dict
[docs] @staticmethod
def match_query(query_dict, entity_dict):
""" Return True if query has a match in dictionary.
Parameters:
query_dict (dict): A dictionary representing a query about entities.
entity_dict (dict): A dictionary containing the entity representation for a BIDS file.
Returns:
bool: True if the query matches the entities representing the file.
Notes:
- A query is a dictionary whose keys are entity names and whose values are specific entity values or '*'.
Examples:
{'sub', '001', 'run', '*'} requests all runs from subject 001.
"""
for query, query_value in query_dict.items():
if query not in entity_dict:
return False
elif isinstance(query_value, str) and query_value != '*':
return False
elif isinstance(query_value, list) and (entity_dict[query] not in query_value):
return False
return True
@staticmethod
def _split_dict_by_entity(file_dict, entity):
""" Split a dict of BidsFile based on an entity.
Parameters:
file_dict (dict): Dictionary of BidsFile keyed by entity keys.
entity (str): String
Returns:
dict: Dictionary of dictionaries with first-level keys constructed from the unique values of entities.
dict: Dictionary of BidsFile that do not have the entity.
"""
split_dict = {}
leftovers = {}
for key, file in file_dict.items():
if entity not in file.entity_dict:
leftovers[key] = file
continue
entity_value = file.entity_dict[entity]
entity_dict = split_dict.get(entity_value, {})
entity_dict[key] = file
split_dict[entity_value] = entity_dict
return split_dict, leftovers
@classmethod
def _correct_file(cls, the_file):
""" Transform to BidsFile if needed.
Parameters:
the_file (str or BidsFile): If a str, create a new BidsFile object, otherwise pass the original on.
Returns:
BidsFile: Either the original file or a newly created BidsTabularFile.
:raises HedFileError:
- If the_file isn't str or BidsFile.
"""
if isinstance(the_file, str):
the_file = BidsFile(the_file)
elif not isinstance(the_file, BidsFile):
raise HedFileError("BadBidsFileArgument",
f"_correct_file expects file path or BidsFile but found {str(the_file)}", [])
return the_file