Source code for hed.schema.hed_schema_io

""" Utilities for loading and outputting HED schema. """
import os
import json
import functools
from hed.schema.schema_io.xml2schema import SchemaLoaderXML
from hed.schema.schema_io.wiki2schema import SchemaLoaderWiki
from hed.schema import hed_cache

from hed.errors.exceptions import HedFileError, HedExceptions
from hed.schema.schema_io import schema_util
from hed.schema.hed_schema_group import HedSchemaGroup
from hed.schema.schema_validation_util import validate_version_string


MAX_MEMORY_CACHE = 20


[docs]def from_string(schema_string, schema_format=".xml", schema_namespace=None): """ Create a schema from the given string. Parameters: schema_string (str): An XML or mediawiki file as a single long string. schema_format (str): The schema format of the source schema string. schema_namespace (str, None): The name_prefix all tags in this schema will accept. Returns: (HedSchema): The loaded schema. :raises HedFileError: - If empty string or invalid extension is passed. - Other fatal formatting issues with file Notes: - The loading is determined by file type. """ if not schema_string: raise HedFileError(HedExceptions.BAD_PARAMETERS, "Empty string passed to HedSchema.from_string", filename=schema_string) if schema_format.endswith(".xml"): hed_schema = SchemaLoaderXML.load(schema_as_string=schema_string) elif schema_format.endswith(".mediawiki"): hed_schema = SchemaLoaderWiki.load(schema_as_string=schema_string) else: raise HedFileError(HedExceptions.INVALID_EXTENSION, "Unknown schema extension", filename=schema_format) if schema_namespace: hed_schema.set_schema_prefix(schema_namespace=schema_namespace) return hed_schema
[docs]def load_schema(hed_path=None, schema_namespace=None): """ Load a schema from the given file or URL path. Parameters: hed_path (str or None): A filepath or url to open a schema from. schema_namespace (str or None): The name_prefix all tags in this schema will accept. Returns: HedSchema: The loaded schema. :raises HedFileError: - Empty path passed - Unknown extension - Any fatal issues when loading the schema. """ if not hed_path: raise HedFileError(HedExceptions.FILE_NOT_FOUND, "Empty file path passed to HedSchema.load_file", filename=hed_path) is_url = hed_cache._check_if_url(hed_path) if is_url: file_as_string = schema_util.url_to_string(hed_path) hed_schema = from_string(file_as_string, schema_format=os.path.splitext(hed_path.lower())[1]) elif hed_path.lower().endswith(".xml"): hed_schema = SchemaLoaderXML.load(hed_path) elif hed_path.lower().endswith(".mediawiki"): hed_schema = SchemaLoaderWiki.load(hed_path) else: raise HedFileError(HedExceptions.INVALID_EXTENSION, "Unknown schema extension", filename=hed_path) if schema_namespace: hed_schema.set_schema_prefix(schema_namespace=schema_namespace) return hed_schema
# If this is actually used, we could easily add other versions/update this one
[docs]def get_hed_xml_version(xml_file_path): """ Get the version number from a HED XML file. Parameters: xml_file_path (str): The path to a HED XML file. Returns: str: The version number of the HED XML file. :raises HedFileError: - There is an issue loading the schema """ parser = SchemaLoaderXML(xml_file_path) return parser.schema.version
@functools.lru_cache(maxsize=MAX_MEMORY_CACHE) def _load_schema_version(xml_version=None, xml_folder=None): """ Return specified version or latest if not specified. Parameters: xml_version (str): HED version format string. Expected format: '[schema_namespace:][library_name_]X.Y.Z'. xml_folder (str): Path to a folder containing schema. Returns: HedSchema or HedSchemaGroup: The requested HedSchema object. :raises HedFileError: - The xml_version is not valid. - The specified version cannot be found or loaded - Other fatal errors loading the schema (These are unlikely if you are not editing them locally) - The prefix is invalid """ schema_namespace = "" library_name = None if xml_version: if ":" in xml_version: schema_namespace, _, xml_version = xml_version.partition(":") if "_" in xml_version: library_name, _, xml_version = xml_version.rpartition("_") elif validate_version_string(xml_version): library_name = xml_version xml_version = None try: final_hed_xml_file = hed_cache.get_hed_version_path(xml_version, library_name, xml_folder) if not final_hed_xml_file: hed_cache.cache_local_versions(xml_folder) final_hed_xml_file = hed_cache.get_hed_version_path(xml_version, library_name, xml_folder) hed_schema = load_schema(final_hed_xml_file) except HedFileError as e: if e.code == HedExceptions.FILE_NOT_FOUND: hed_cache.cache_xml_versions(cache_folder=xml_folder) final_hed_xml_file = hed_cache.get_hed_version_path(xml_version, library_name, xml_folder) if not final_hed_xml_file: raise HedFileError(HedExceptions.FILE_NOT_FOUND, f"HED version '{xml_version}' not found in cache: {hed_cache.get_cache_directory()}", filename=xml_folder) hed_schema = load_schema(final_hed_xml_file) else: raise e if schema_namespace: hed_schema.set_schema_prefix(schema_namespace=schema_namespace) return hed_schema
[docs]def load_schema_version(xml_version=None, xml_folder=None): """ Return a HedSchema or HedSchemaGroup extracted from xml_version field. Parameters: xml_version (str or list or None): List or str specifying which official HED schemas to use. An empty string returns the latest version A json str format is also supported, based on the output of HedSchema.get_formatted_version Basic format: '[schema_namespace:][library_name_]X.Y.Z'. xml_folder (str): Path to a folder containing schema. Returns: HedSchema or HedSchemaGroup: The schema or schema group extracted. :raises HedFileError: - The xml_version is not valid. - The specified version cannot be found or loaded - Other fatal errors loading the schema (These are unlikely if you are not editing them locally) - The prefix is invalid """ # Check if we start and end with a square bracket, or double quote. This might be valid json if xml_version and isinstance(xml_version, str) and \ ((xml_version[0], xml_version[-1]) in [('[', ']'), ('"', '"')]): try: xml_version = json.loads(xml_version) except json.decoder.JSONDecodeError as e: raise HedFileError(HedExceptions.CANNOT_PARSE_JSON, str(e), xml_version) from e if xml_version and isinstance(xml_version, list): schemas = [_load_schema_version(xml_version=version, xml_folder=xml_folder) for version in xml_version] if len(schemas) == 1: return schemas[0] return HedSchemaGroup(schemas) else: return _load_schema_version(xml_version=xml_version, xml_folder=xml_folder)