"""Utilities used in HED validation/loading using a HED schema."""
from semantic_version import Version
from hed.errors import ErrorHandler, SchemaWarnings
from hed.schema import hed_schema_constants as constants
from hed.errors.exceptions import HedExceptions, HedFileError
from hed.schema.hed_schema_constants import valid_header_attributes
ALLOWED_TAG_CHARS = "-"
ALLOWED_DESC_CHARS = "-_:;,./()+ ^"
[docs]def validate_library_name(library_name):
""" Check the validity of the library name.
Parameters:
library_name (str): Name of the library.
Returns:
bool or str: If not False, string indicates the issue.
"""
for i, character in enumerate(library_name):
if not character.isalpha():
return f"Non alpha character '{character}' at position {i} in '{library_name}'"
if character.isupper():
return f"Non lowercase character '{character}' at position {i} in '{library_name}'"
[docs]def validate_version_string(version_string):
""" Check validity of the version.
Parameters:
version_string (str): A version string.
Returns:
bool or str: If not False, string indicates the issue.
"""
try:
Version(version_string)
except ValueError as e:
return str(e)
return False
header_attribute_validators = {
constants.VERSION_ATTRIBUTE: (validate_version_string, HedExceptions.SCHEMA_VERSION_INVALID),
constants.LIBRARY_ATTRIBUTE: (validate_library_name, HedExceptions.BAD_HED_LIBRARY_NAME)
}
[docs]def validate_present_attributes(attrib_dict, filename):
""" Validate combinations of attributes
Parameters:
attrib_dict (dict): Dictionary of attributes to be evaluated.
filename (str): File name to use in reporting errors.
Returns:
list: List of issues. Each issue is a dictionary.
:raises HedFileError:
- withStandard is found in th header, but a library attribute is not specified
"""
if constants.WITH_STANDARD_ATTRIBUTE in attrib_dict and constants.LIBRARY_ATTRIBUTE not in attrib_dict:
raise HedFileError(HedExceptions.BAD_WITH_STANDARD,
"withStandard header attribute found, but no library attribute is present",
filename)
[docs]def validate_attributes(attrib_dict, filename):
""" Validate attributes in the dictionary.
Parameters:
attrib_dict (dict): Dictionary of attributes to be evaluated.
filename (str): File name to use in reporting errors.
Returns:
list: List of issues. Each issue is a dictionary.
:raises HedFileError:
- Invalid library name
- Version not present
- Invalid combinations of attributes in header
"""
validate_present_attributes(attrib_dict, filename)
for attribute_name, attribute_value in attrib_dict.items():
if attribute_name in header_attribute_validators:
validator, error_code = header_attribute_validators[attribute_name]
had_error = validator(attribute_value)
if had_error:
raise HedFileError(error_code, had_error, filename)
if attribute_name not in valid_header_attributes:
raise HedFileError(HedExceptions.SCHEMA_UNKNOWN_HEADER_ATTRIBUTE,
f"Unknown attribute {attribute_name} found in header line", filename=filename)
if constants.VERSION_ATTRIBUTE not in attrib_dict:
raise HedFileError(HedExceptions.SCHEMA_VERSION_INVALID,
"No version attribute found in header", filename=filename)
# Might move this to a baseclass version if one is ever made for wiki2schema/xml2schema
[docs]def find_rooted_entry(tag_entry, schema, loading_merged):
""" This semi-validates rooted tags, raising an exception on major errors
Parameters:
tag_entry(HedTagEntry): the possibly rooted tag
schema(HedSchema): The schema being loaded
loading_merged(bool): If this schema was already merged before loading
Returns:
rooted_tag(HedTagEntry or None): The base tag entry from the standard schema
Returns None if this tag isn't rooted
:raises HedFileError:
- A rooted attribute is found in a non-paired schema
- A rooted attribute is not a string
- A rooted attribute was found on a non-root node in an unmerged schema.
- A rooted attribute is found on a root node in a merged schema.
- A rooted attribute indicates a tag that doesn't exist in the base schema.
"""
rooted_tag = tag_entry.has_attribute(constants.HedKey.Rooted, return_value=True)
if rooted_tag is not None:
if not schema.with_standard:
raise HedFileError(HedExceptions.ROOTED_TAG_INVALID,
f"Rooted tag attribute found on '{tag_entry.short_tag_name}' in a standard schema.",
schema.filename)
if not isinstance(rooted_tag, str):
raise HedFileError(HedExceptions.ROOTED_TAG_INVALID,
f'Rooted tag \'{tag_entry.short_tag_name}\' is not a string."',
schema.filename)
if tag_entry.parent_name and not loading_merged:
raise HedFileError(HedExceptions.ROOTED_TAG_INVALID,
f'Found rooted tag \'{tag_entry.short_tag_name}\' as a non root node.',
schema.filename)
if not tag_entry.parent_name and loading_merged:
raise HedFileError(HedExceptions.ROOTED_TAG_INVALID,
f'Found rooted tag \'{tag_entry.short_tag_name}\' as a root node in a merged schema.',
schema.filename)
rooted_entry = schema.tags.get(rooted_tag)
if not rooted_entry or rooted_entry.has_attribute(constants.HedKey.InLibrary):
raise HedFileError(HedExceptions.ROOTED_TAG_DOES_NOT_EXIST,
f"Rooted tag '{tag_entry.short_tag_name}' not found in paired standard schema",
schema.filename)
if loading_merged:
return None
return rooted_entry
[docs]def validate_schema_term(hed_term):
""" Check short tag for capitalization and illegal characters.
Parameters:
hed_term (str): A single hed term.
Returns:
list: A list of all formatting issues found in the term. Each issue is a dictionary.
"""
issues_list = []
# Any # terms will have already been validated as the previous entry.
if hed_term == "#":
return issues_list
for i, char in enumerate(hed_term):
if i == 0 and not (char.isdigit() or char.isupper()):
issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CAPITALIZATION,
hed_term, char_index=i, problem_char=char)
continue
if char in ALLOWED_TAG_CHARS or char.isalnum():
continue
issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_TAG,
hed_term, char_index=i, problem_char=char)
return issues_list
[docs]def validate_schema_description(tag_name, hed_description):
""" Check the description of a single schema term.
Parameters:
tag_name (str): A single hed tag - not validated here, just used for error messages.
hed_description (str): The description string to validate.
Returns:
list: A list of all formatting issues found in the description.
"""
issues_list = []
# Blank description is fine
if not hed_description:
return issues_list
for i, char in enumerate(hed_description):
if char.isalnum():
continue
if char in ALLOWED_DESC_CHARS:
continue
issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC,
hed_description, tag_name, char_index=i, problem_char=char)
return issues_list