Source code for hed.schema.schema_io.schema2base

"""Baseclass for mediawiki/xml writers"""
from hed.schema.hed_schema_constants import HedSectionKey, HedKey
from hed.errors.exceptions import HedFileError, HedExceptions


[docs]class Schema2Base:
[docs] def __init__(self): # Placeholder output variable self.output = None self._save_lib = False self._save_base = False self._save_merged = False self._strip_out_in_library = False self._schema = None
[docs] def process_schema(self, hed_schema, save_merged=False): """ Takes a HedSchema object and returns it in the inherited form(mediawiki, xml, etc) Parameters ---------- hed_schema : HedSchema save_merged: bool If True, this will save the schema as a merged schema if it is a "withStandard" schema. If it is not a "withStandard" schema, this setting has no effect. Returns ------- converted_output: Any Varies based on inherited class """ if not hed_schema.can_save(): raise HedFileError(HedExceptions.SCHEMA_LIBRARY_INVALID, "Cannot save a schema merged from multiple library schemas", hed_schema.filename) self._initialize_output() self._save_lib = False self._save_base = False self._strip_out_in_library = True self._schema = hed_schema # This is needed to save attributes in dataframes for now if hed_schema.with_standard: self._save_lib = True if save_merged: self._save_base = True self._strip_out_in_library = False else: # Saving a standard schema or a library schema without a standard schema save_merged = True self._save_lib = True self._save_base = True self._save_merged = save_merged self._output_header(hed_schema.get_save_header_attributes(self._save_merged), hed_schema.prologue) self._output_tags(hed_schema.tags) self._output_units(hed_schema.unit_classes) self._output_section(hed_schema, HedSectionKey.UnitModifiers) self._output_section(hed_schema, HedSectionKey.ValueClasses) self._output_section(hed_schema, HedSectionKey.Attributes) self._output_section(hed_schema, HedSectionKey.Properties) self._output_footer(hed_schema.epilogue) return self.output
def _initialize_output(self): raise NotImplementedError("This needs to be defined in the subclass") def _output_header(self, attributes, prologue): raise NotImplementedError("This needs to be defined in the subclass") def _output_footer(self, epilogue): raise NotImplementedError("This needs to be defined in the subclass") def _start_section(self, key_class): raise NotImplementedError("This needs to be defined in the subclass") def _end_tag_section(self): raise NotImplementedError("This needs to be defined in the subclass") def _write_tag_entry(self, tag_entry, parent=None, level=0): raise NotImplementedError("This needs to be defined in the subclass") def _write_entry(self, entry, parent_node, include_props=True): raise NotImplementedError("This needs to be defined in the subclass") def _output_tags(self, tags): schema_node = self._start_section(HedSectionKey.Tags) # This assumes .all_entries is sorted in a reasonable way for output. level_adj = 0 all_nodes = {} # List of all nodes we've written out. for tag_entry in tags.all_entries: if self._should_skip(tag_entry): continue tag = tag_entry.name level = tag.count("/") # Don't adjust if we're a top level tag(if this is a rooted tag, it will be re-adjusted below) if not tag_entry.parent_name: level_adj = 0 if level == 0: root_tag = self._write_tag_entry(tag_entry, schema_node, level) all_nodes[tag_entry.name] = root_tag else: # Only output the rooted parent nodes if they have a parent(for duplicates that don't) if tag_entry.has_attribute(HedKey.InLibrary) and tag_entry.parent and \ not tag_entry.parent.has_attribute(HedKey.InLibrary) and not self._save_merged: if tag_entry.parent.name not in all_nodes: level_adj = level parent_node = all_nodes.get(tag_entry.parent_name, schema_node) child_node = self._write_tag_entry(tag_entry, parent_node, level - level_adj) all_nodes[tag_entry.name] = child_node self._end_tag_section() def _output_units(self, unit_classes): section_node = self._start_section(HedSectionKey.UnitClasses) for unit_class_entry in unit_classes.values(): has_lib_unit = False if self._should_skip(unit_class_entry): has_lib_unit = any(unit.attributes.get(HedKey.InLibrary) for unit in unit_class_entry.units.values()) if not self._save_lib or not has_lib_unit: continue unit_class_node = self._write_entry(unit_class_entry, section_node, not has_lib_unit) unit_types = unit_class_entry.units for unit_entry in unit_types.values(): if self._should_skip(unit_entry): continue self._write_entry(unit_entry, unit_class_node) def _output_section(self, hed_schema, key_class): parent_node = self._start_section(key_class) for entry in hed_schema[key_class].values(): if self._should_skip(entry): continue self._write_entry(entry, parent_node) def _should_skip(self, entry): has_lib_attr = entry.has_attribute(HedKey.InLibrary) if not self._save_base and not has_lib_attr: return True if not self._save_lib and has_lib_attr: return True return False def _attribute_disallowed(self, attribute): return self._strip_out_in_library and attribute == HedKey.InLibrary def _format_tag_attributes(self, attributes): """ Takes a dictionary of tag attributes and returns a string with the .mediawiki representation Parameters ---------- attributes : {str:str} {attribute_name : attribute_value} Returns ------- str: The formatted string that should be output to the file. """ prop_string = "" final_props = [] for prop, value in attributes.items(): # Never save InLibrary if saving merged. if self._attribute_disallowed(prop): continue if value is True: final_props.append(prop) else: if "," in value: split_values = value.split(",") for split_value in split_values: final_props.append(f"{prop}={split_value}") else: final_props.append(f"{prop}={value}") if final_props: interior = ", ".join(final_props) prop_string = f"{interior}" return prop_string @staticmethod def _get_attribs_string_from_schema(header_attributes, sep=" "): """ Gets the schema attributes and converts it to a string. Parameters ---------- header_attributes : dict Attributes to format attributes from Returns ------- str: A string of the attributes that can be written to a .mediawiki formatted file """ attrib_values = [f"{attr}=\"{value}\"" for attr, value in header_attributes.items()] final_attrib_string = sep.join(attrib_values) return final_attrib_string