Source code for hed.models.query_service

""" Functions to get and use HED queries. """
import pandas as pd

from hed.models import QueryHandler


[docs]def get_query_handlers(queries, query_names=None): """ Return a list of query handlers, query names, and issues if any. Parameters: queries (list): A list of query strings. query_names (list or None): A list of column names for results of queries. If missing --- query_1, query_2, etc. Returns: list - QueryHandlers for successfully parsed queries. list - str names to assign to results of the queries. list - issues if any of the queries could not be parsed or other errors occurred. """ if not queries: return None, None, ["EmptyQueries: The queries list must not be empty"] elif isinstance(queries, str): queries = [queries] expression_parsers = [None] * len(queries) issues = [] if not query_names: query_names = [f"query_{index}" for index in range(len(queries))] if len(queries) != len(query_names): issues.append(f"QueryNamesLengthBad: The query_names length {len(query_names)} must be empty or equal " + f"to the queries length {len(queries)}.") elif len(set(query_names)) != len(query_names): issues.append(f"DuplicateQueryNames: The query names {str(query_names)} list has duplicates") for index, query in enumerate(queries): try: expression_parsers[index] = QueryHandler(query) except Exception: issues.append(f"[BadQuery {index}]: {query} cannot be parsed") return expression_parsers, query_names, issues
[docs]def search_strings(hed_strings, queries, query_names): """ Return a DataFrame of factors based on results of queries. Parameters: hed_strings (list): A list of HedString objects (empty entries or None entries are 0's queries (list): A list of query strings or QueryHandler objects. query_names (list): A list of column names for results of queries. Returns: DataFrame: Contains the factor vectors with results of the queries. :raises ValueError: - If query names are invalid or duplicated. """ df_factors = pd.DataFrame(0, index=range(len(hed_strings)), columns=query_names) for parse_ind, parser in enumerate(queries): for index, next_item in enumerate(hed_strings): if next_item: match = parser.search(next_item) if match: df_factors.at[index, query_names[parse_ind]] = 1 return df_factors