Source code for hed.schema.schema_compliance

""" Utilities for HED schema checking. """

from hed.errors.error_types import ErrorContext, SchemaErrors, ErrorSeverity, SchemaAttributeErrors, SchemaWarnings
from hed.errors.error_reporter import ErrorHandler, sort_issues
from hed.schema.hed_schema import HedSchema, HedKey, HedSectionKey
from hed.schema import schema_attribute_validators
from hed.schema.schema_validation_util import validate_schema_tag_new, validate_schema_term_new, \
    get_allowed_characters_by_name, get_problem_indexes, validate_schema_description_new
from hed.schema.schema_validation_util_deprecated import validate_schema_tag, validate_schema_description, verify_no_brackets
from functools import partial
from hed.schema import hed_cache
from semantic_version import Version


[docs]def check_compliance(hed_schema, check_for_warnings=True, name=None, error_handler=None): """ Check for hed3 compliance of a schema object. Parameters: hed_schema (HedSchema): HedSchema object to check for hed3 compliance. check_for_warnings (bool): If True, check for formatting issues like invalid characters, capitalization, etc. name (str): If present, will use as filename for context. error_handler (ErrorHandler or None): Used to report errors. Uses a default one if none passed in. Returns: list: A list of all warnings and errors found in the file. Each issue is a dictionary. :raises ValueError: - Trying to validate a HedSchemaGroup directly """ if not isinstance(hed_schema, HedSchema): raise ValueError("To check compliance of a HedGroupSchema, call self.check_compliance on the schema itself.") error_handler = error_handler if error_handler else ErrorHandler(check_for_warnings) validator = SchemaValidator(hed_schema, error_handler) issues_list = [] if not name: name = hed_schema.filename error_handler.push_error_context(ErrorContext.FILE_NAME, name) issues_list += validator.check_if_prerelease_version() issues_list += validator.check_prologue_epilogue() issues_list += validator.check_invalid_chars() issues_list += validator.check_attributes() issues_list += validator.check_duplicate_names() error_handler.pop_error_context() issues_list = sort_issues(issues_list) return issues_list
[docs]class SchemaValidator: """Validator class to wrap some code. In general, just call check_compliance.""" attribute_validators_old = { HedKey.SuggestedTag: [partial(schema_attribute_validators.item_exists_check, section_key=HedSectionKey.Tags)], HedKey.RelatedTag: [partial(schema_attribute_validators.item_exists_check, section_key=HedSectionKey.Tags)], HedKey.UnitClass: [schema_attribute_validators.tag_is_placeholder_check, partial(schema_attribute_validators.item_exists_check, section_key=HedSectionKey.UnitClasses)], HedKey.ValueClass: [schema_attribute_validators.tag_is_placeholder_check, partial(schema_attribute_validators.item_exists_check, section_key=HedSectionKey.ValueClasses)], # Rooted tag is implicitly verified on loading # HedKey.Rooted: [schema_attribute_validators.tag_exists_base_schema_check], HedKey.DeprecatedFrom: [schema_attribute_validators.tag_is_deprecated_check], HedKey.TakesValue: [schema_attribute_validators.tag_is_placeholder_check], HedKey.DefaultUnits: [schema_attribute_validators.unit_exists], HedKey.ConversionFactor: [schema_attribute_validators.conversion_factor], HedKey.AllowedCharacter: [schema_attribute_validators.allowed_characters_check], HedKey.InLibrary: [schema_attribute_validators.in_library_check] } # Known attribute validators( < 8.3.0) attribute_validators = { HedKey.SuggestedTag: [], HedKey.RelatedTag: [], HedKey.UnitClass: [schema_attribute_validators.tag_is_placeholder_check], HedKey.ValueClass: [schema_attribute_validators.tag_is_placeholder_check], # Rooted tag is implicitly verified on loading # HedKey.Rooted: [schema_attribute_validators.tag_exists_base_schema_check], HedKey.DeprecatedFrom: [schema_attribute_validators.tag_is_deprecated_check], HedKey.TakesValue: [schema_attribute_validators.tag_is_placeholder_check], HedKey.DefaultUnits: [], HedKey.ConversionFactor: [schema_attribute_validators.conversion_factor], HedKey.AllowedCharacter: [schema_attribute_validators.allowed_characters_check], HedKey.InLibrary: [schema_attribute_validators.in_library_check] } # Known attribute validators ( > 8.3.0). Does not include range or domain validation, that's added later.
[docs] def __init__(self, hed_schema, error_handler): self.hed_schema = hed_schema self.error_handler = error_handler self._new_character_validation = hed_schema.schema_83_props
[docs] def check_if_prerelease_version(self): issues = [] libraries = self.hed_schema.library.split(",") versions = self.hed_schema.version_number.split(",") for library, version in zip(libraries, versions): all_known_versions = hed_cache.get_hed_versions(library_name=library) if "," not in library and not all_known_versions or Version(all_known_versions[0]) < Version(version): issues += ErrorHandler.format_error(SchemaWarnings.SCHEMA_PRERELEASE_VERSION_USED, version, all_known_versions) if self.hed_schema.with_standard: all_known_versions = hed_cache.get_hed_versions() if not all_known_versions or Version(all_known_versions[0]) < Version(self.hed_schema.with_standard): issues += ErrorHandler.format_error(SchemaWarnings.SCHEMA_PRERELEASE_VERSION_USED, self.hed_schema.with_standard, all_known_versions) self.error_handler.add_context_and_filter(issues) return issues
[docs] def check_prologue_epilogue(self): issues = [] if self._new_character_validation: character_set = get_allowed_characters_by_name(["text", "newline"]) indexes = get_problem_indexes(self.hed_schema.prologue, character_set) for _, index in indexes: issues += ErrorHandler.format_error(SchemaWarnings.SCHEMA_PROLOGUE_CHARACTER_INVALID, char_index=index, source_string=self.hed_schema.prologue, section_name="Prologue") indexes = get_problem_indexes(self.hed_schema.epilogue, character_set) for _, index in indexes: issues += ErrorHandler.format_error(SchemaWarnings.SCHEMA_PROLOGUE_CHARACTER_INVALID, char_index=index, source_string=self.hed_schema.epilogue, section_name="Epilogue") self.error_handler.add_context_and_filter(issues) return issues
[docs] def check_attributes(self): """Returns issues from validating known attributes in all sections""" issues_list = [] for section_key in HedSectionKey: self.error_handler.push_error_context(ErrorContext.SCHEMA_SECTION, str(section_key)) for tag_entry in self.hed_schema[section_key].values(): self.error_handler.push_error_context(ErrorContext.SCHEMA_TAG, tag_entry.name) if tag_entry._unknown_attributes: for attribute_name in tag_entry._unknown_attributes: issues_list += self.error_handler.format_error_with_context( SchemaAttributeErrors.SCHEMA_ATTRIBUTE_INVALID, attribute_name, source_tag=tag_entry.name) for attribute_name in tag_entry.attributes: if self._new_character_validation: validators = self.attribute_validators.get(attribute_name, []) \ + [schema_attribute_validators.attribute_is_deprecated] attribute_entry = self.hed_schema.get_tag_entry(attribute_name, HedSectionKey.Attributes) if attribute_entry: range_validators = { HedKey.TagRange: [partial(schema_attribute_validators.item_exists_check, section_key=HedSectionKey.Tags)], HedKey.NumericRange: [schema_attribute_validators.is_numeric_value], HedKey.StringRange: [], # Unclear what validation should be done here. HedKey.UnitClassRange: [partial(schema_attribute_validators.item_exists_check, section_key=HedSectionKey.UnitClasses)], HedKey.UnitRange: [schema_attribute_validators.unit_exists], HedKey.ValueClassRange: [partial(schema_attribute_validators.item_exists_check, section_key=HedSectionKey.ValueClasses)] } for range_attribute in attribute_entry.attributes: validators += range_validators.get(range_attribute, []) else: # Always check deprecated validators = self.attribute_validators_old.get(attribute_name, []) \ + [schema_attribute_validators.attribute_is_deprecated] for validator in validators: self.error_handler.push_error_context(ErrorContext.SCHEMA_ATTRIBUTE, attribute_name) new_issues = validator(self.hed_schema, tag_entry, attribute_name) for issue in new_issues: issue['severity'] = ErrorSeverity.WARNING self.error_handler.add_context_and_filter(new_issues) issues_list += new_issues self.error_handler.pop_error_context() self.error_handler.pop_error_context() self.error_handler.pop_error_context() return issues_list
[docs] def check_duplicate_names(self): """Return issues for any duplicate names in all sections.""" issues_list = [] for section_key in HedSectionKey: for name, duplicate_entries in self.hed_schema[section_key].duplicate_names.items(): values = set(entry.has_attribute(HedKey.InLibrary) for entry in duplicate_entries) error_code = SchemaErrors.SCHEMA_DUPLICATE_NODE if len(values) == 2: error_code = SchemaErrors.SCHEMA_DUPLICATE_FROM_LIBRARY issues_list += self.error_handler.format_error_with_context(error_code, name, duplicate_tag_list=[entry.name for entry in duplicate_entries], section=section_key) return issues_list
[docs] def check_invalid_chars(self): """Returns issues for bad chars in terms or descriptions.""" issues_list = [] section_validators = { HedSectionKey.Tags: validate_schema_tag, } default_validator = verify_no_brackets description_validator = validate_schema_description # If above 8.3.0 use the character class validation instead if self._new_character_validation: section_validators = { HedSectionKey.Tags: validate_schema_tag_new } default_validator = validate_schema_term_new description_validator = validate_schema_description_new for section_key in HedSectionKey: self.error_handler.push_error_context(ErrorContext.SCHEMA_SECTION, str(section_key)) for entry in self.hed_schema[section_key].values(): if entry.has_attribute(HedKey.DeprecatedFrom): # Don't validate deprecated terms and descriptions continue self.error_handler.push_error_context(ErrorContext.SCHEMA_TAG, str(entry)) # Everything but tags just does the generic term check validator = section_validators.get(section_key, default_validator) new_issues = [] if validator: new_issues += validator(entry) new_issues += description_validator(entry) self.error_handler.add_context_and_filter(new_issues) issues_list += new_issues self.error_handler.pop_error_context() # Term self.error_handler.pop_error_context() # section return issues_list