Source code for hed.schema.schema_io.text_util

"""Functions for parsing text from dataframes/text formats"""

import re

# Might need separate version again for wiki
header_attr_expression = "([^ ,]+?)=\"(.*?)\""
attr_re = re.compile(header_attr_expression)


def _parse_header_attributes_line(version_line):
    matches = {}
    unmatched = []
    last_end = 0

    for match in attr_re.finditer(version_line):
        start, end = match.span()

        # If there's unmatched content between the last match and the current one.
        if start > last_end:
            unmatched.append(version_line[last_end:start])

        matches[match.group(1)] = match.group(2)
        last_end = end

    # If there's unmatched content after the last match
    if last_end < len(version_line):
        unmatched.append(version_line[last_end:])

    unmatched = [m.strip() for m in unmatched if m.strip()]
    return matches, unmatched


def _validate_attribute_string(attribute_string):
    """Raises ValueError on bad input"""
    pattern = r'^[A-Za-z]+(=.+)?$'
    match = re.fullmatch(pattern, attribute_string)
    if match:
        return match.group()
    raise ValueError(f'Malformed attribute {attribute_string}.  Valid formatting is: attribute, or attribute="value"')


[docs]def parse_attribute_string(attr_string): """ Parse attributes for a single element into a dict. Parameters: attr_string(str): Formatted attributes (a=b, c=d, etc.) Returns: attributes(dict): The located attributes. Can be empty. :raises ValueError: - Very malformed input """ if attr_string: attributes_split = [x.strip() for x in attr_string.split(',')] final_attributes = {} for attribute in attributes_split: # Raises error on very invalid _validate_attribute_string(attribute) split_attribute = attribute.split("=") if len(split_attribute) == 1: final_attributes[split_attribute[0]] = True else: if split_attribute[0] in final_attributes: final_attributes[split_attribute[0]] += "," + split_attribute[1] else: final_attributes[split_attribute[0]] = split_attribute[1] return final_attributes elif attr_string == "": return {}