Source code for hed.models.string_util

""" Utilities for manipulating HedString objects. """
import re
from hed.models.hed_string import HedString


[docs]def gather_descriptions(hed_string): """Remove any description tags from the HedString and concatenates them. Parameters: hed_string(HedString): To be modified. Returns: tuple description(str): The concatenated values of all description tags. Side effect: The input HedString has its description tags removed. """ desc_tags = hed_string.find_tags({"description"}, recursive=True, include_groups=0) desc_string = " ".join([tag.extension if tag.extension.endswith(".") else tag.extension + "." for tag in desc_tags]) hed_string.remove(desc_tags) return desc_string
[docs]def split_base_tags(hed_string, base_tags, remove_group=False): """ Split a HedString object into two separate HedString objects based on the presence of base tags. Parameters: hed_string (HedString): The input HedString object to be split. base_tags (list of str): A list of strings representing the base tags. This is matching the base tag NOT all the terms above it. remove_group (bool, optional): Flag indicating whether to remove the parent group. Defaults to False. Returns: tuple: A tuple containing two HedString objects: - The first HedString object contains the remaining tags from hed_string. - The second HedString object contains the tags from hed_string that match the base_tags. """ base_tags = [tag.casefold() for tag in base_tags] include_groups = 0 if remove_group: include_groups = 2 found_things = hed_string.find_tags(base_tags, recursive=True, include_groups=include_groups) if remove_group: found_things = [tag if isinstance(group, HedString) else group for tag, group in found_things] if found_things: hed_string.remove(found_things) return hed_string, HedString("", hed_string._schema, _contents=found_things)
[docs]def split_def_tags(hed_string, def_names, remove_group=False): """ Split a HedString object into two separate HedString objects based on the presence of def tags This does NOT handle def-expand tags currently. Parameters: hed_string (HedString): The input HedString object to be split. def_names (list of str): A list of def names to search for. Can optionally include a value. remove_group (bool, optional): Flag indicating whether to remove the parent group. Defaults to False. Returns: tuple: A tuple containing two HedString objects: - The first HedString object contains the remaining tags from hed_string. - The second HedString object contains the tags from hed_string that match the def_names. """ include_groups = 0 if remove_group: include_groups = 2 wildcard_tags = [f"def/{def_name}".casefold() for def_name in def_names] found_things = hed_string.find_wildcard_tags(wildcard_tags, recursive=True, include_groups=include_groups) if remove_group: found_things = [tag if isinstance(group, HedString) else group for tag, group in found_things] if found_things: hed_string.remove(found_things) return hed_string, HedString("", hed_string._schema, _contents=found_things)
[docs]def cleanup_empties(string_in: str) -> str: leading_comma_regex = re.compile(r'^\s*,+') trailing_comma_regex = re.compile(r',\s*$') inner_comma_regex = re.compile(r',\s*,+') empty_parens_regex = re.compile(r'\(\s*\)') redundant_parens_regex = re.compile(r'\(\s*([,\s]*)\s*\)') trailing_inner_comma_regex = re.compile(r'[\s,]+\)') result = string_in previous_result = None while result != previous_result: previous_result = result # Step 1: Remove empty parentheses result = empty_parens_regex.sub('', result) # Step 2: Remove redundant parentheses containing only commas/spaces def replace_redundant_parens(match): group1 = match.group(1) if re.fullmatch(r'[,\s()]*', group1): return '' return f"({group1.strip().lstrip(',').rstrip(',')})" result = redundant_parens_regex.sub(replace_redundant_parens, result) # Step 3: Remove leading and trailing commas result = leading_comma_regex.sub('', result) result = trailing_comma_regex.sub('', result) # Step 4: Collapse multiple commas inside result = inner_comma_regex.sub(',', result) # Step 5: Remove trailing commas inside parentheses result = trailing_inner_comma_regex.sub(')', result) result = re.sub(r'\(\s*,+', '(', result) return result.strip()