Source code for hed.validator.tag_util.string_util

""" Utilities supporting the validation of HED strings. """

from hed.errors.error_reporter import ErrorHandler
from hed.errors.error_types import ValidationErrors


[docs]class StringValidator: """Runs checks on the raw string that depend on multiple characters, e.g. mismatched parentheses""" OPENING_GROUP_CHARACTER = '(' CLOSING_GROUP_CHARACTER = ')' COMMA = ','
[docs] def run_string_validator(self, hed_string_obj): validation_issues = [] validation_issues += self.check_count_tag_group_parentheses(hed_string_obj.get_original_hed_string()) validation_issues += self.check_delimiter_issues_in_hed_string(hed_string_obj.get_original_hed_string()) return validation_issues
[docs] @staticmethod def check_count_tag_group_parentheses(hed_string): """ Report unmatched parentheses. Parameters: hed_string (str): A hed string. Returns: list: A list of validation list. Each issue is a dictionary. """ validation_issues = [] number_open_parentheses = hed_string.count('(') number_closed_parentheses = hed_string.count(')') if number_open_parentheses != number_closed_parentheses: validation_issues += ErrorHandler.format_error(ValidationErrors.PARENTHESES_MISMATCH, opening_parentheses_count=number_open_parentheses, closing_parentheses_count=number_closed_parentheses) return validation_issues
[docs] def check_delimiter_issues_in_hed_string(self, hed_string): """ Report missing commas or commas in value tags. Parameters: hed_string (str): A hed string. Returns: list: A validation issues list. Each issue is a dictionary. """ last_non_empty_valid_character = '' last_non_empty_valid_index = 0 current_tag = '' issues = [] for i, current_character in enumerate(hed_string): current_tag += current_character if not current_character.strip(): continue if self._character_is_delimiter(current_character): if current_tag.strip() == current_character: issues += ErrorHandler.format_error(ValidationErrors.TAG_EMPTY, source_string=hed_string, char_index=i) current_tag = '' continue current_tag = '' elif current_character == self.OPENING_GROUP_CHARACTER: if current_tag.strip() == self.OPENING_GROUP_CHARACTER: current_tag = '' else: issues += ErrorHandler.format_error(ValidationErrors.COMMA_MISSING, tag=current_tag) elif last_non_empty_valid_character == "," and current_character == self.CLOSING_GROUP_CHARACTER: issues += ErrorHandler.format_error(ValidationErrors.TAG_EMPTY, source_string=hed_string, char_index=i) elif self._comma_is_missing_after_closing_parentheses(last_non_empty_valid_character, current_character): issues += ErrorHandler.format_error(ValidationErrors.COMMA_MISSING, tag=current_tag[:-1]) break last_non_empty_valid_character = current_character last_non_empty_valid_index = i if self._character_is_delimiter(last_non_empty_valid_character): issues += ErrorHandler.format_error(ValidationErrors.TAG_EMPTY, char_index=last_non_empty_valid_index, source_string=hed_string) return issues
@staticmethod def _comma_is_missing_after_closing_parentheses(last_non_empty_character, current_character): """ Checks if missing comma after a closing parentheses. Parameters: last_non_empty_character (str): The last non-empty string in the HED string. current_character (str): The current character in the HED string. Returns: bool: True if a comma is missing after a closing parentheses. False, if otherwise. Notes: - This is a helper function for the find_missing_commas_in_hed_string function. """ return last_non_empty_character == StringValidator.CLOSING_GROUP_CHARACTER and \ not (StringValidator._character_is_delimiter(current_character) or current_character == StringValidator.CLOSING_GROUP_CHARACTER) @staticmethod def _character_is_delimiter(character): """ Checks if the character is a delimiter. Parameters: character (str): A string character. Returns: bool: Returns True if the character is a delimiter. False, if otherwise. Notes: - A delimiter is a comma. """ return character == StringValidator.COMMA