Source code for hed.models.query_handler

""" Holder for and manipulation of search results. """
import re

from hed.models.query_expressions import Expression, ExpressionAnd, ExpressionWildcardNew, ExpressionOr, \
    ExpressionNegation, ExpressionDescendantGroup, ExpressionExactMatch
from hed.models.query_util import Token


[docs]class QueryHandler: """Parse a search expression into a form than can be used to search a HED string."""
[docs] def __init__(self, expression_string): """Compiles a QueryHandler for a particular expression, so it can be used to search hed strings. Basic Input Examples: 'Event' - Finds any strings with Event, or a descendent tag of Event such as Sensory-event. 'Event and Action' - Find any strings with Event and Action, including descendant tags. 'Event or Action' - Same as above, but it has either. '"Event"' - Finds the Event tag, but not any descendent tags. `Def/DefName/*` - Find Def/DefName instances with placeholders, regardless of the value of the placeholder. 'Eve*' - Find any short tags that begin with Eve*, such as Event, but not Sensory-event. '[Event and Action]' - Find a group that contains both Event and Action(at any level). '{Event and Action}' - Find a group with Event And Action at the same level. '{Event and Action:}' - Find a group with Event And Action at the same level, and nothing else. '{Event and Action:Agent}' - Find a group with Event And Action at the same level, and optionally an Agent tag. Practical Complex Example: {(Onset or Offset), (Def or {Def-expand}): ???} - A group with an onset tag, a def tag or def-expand group, and an optional wildcard group Parameters: expression_string(str): The query string. """ self.tokens = [] self.at_token = -1 self.tree = self._parse(expression_string.casefold()) self._org_string = expression_string
[docs] def search(self, hed_string_obj): """Returns if a match is found in the given string Parameters: hed_string_obj (HedString): String to search Returns: list(SearchResult): Generally you should just treat this as a bool True if a match was found. """ current_node = self.tree result = current_node.handle_expr(hed_string_obj) return result
def __str__(self): return str(self.tree) def _get_next_token(self): """Returns the current token and advances the counter""" self.at_token += 1 if self.at_token >= len(self.tokens): raise ValueError("Parse error in get next token") return self.tokens[self.at_token] def _next_token_is(self, kinds): """Returns the current token if it matches kinds, and advances the counter""" if self.at_token + 1 >= len(self.tokens): return None if self.tokens[self.at_token + 1].kind in kinds: return self._get_next_token() return None def _parse(self, expression_string): """Parse the string and build an expression tree""" self.tokens = self._tokenize(expression_string) expr = self._handle_or_op() if self.at_token + 1 != len(self.tokens): raise ValueError("Parse error in search string") return expr @staticmethod def _tokenize(expression_string): """Tokenize the expression string into a list""" grouping_re = r"\[\[|\[|\]\]|\]|}|{|:" paren_re = r"\)|\(|~" word_re = r"\?+|\band\b|\bor\b|,|[\"_\-a-zA-Z0-9/.^#\*@]+" re_string = fr"({grouping_re}|{paren_re}|{word_re})" token_re = re.compile(re_string) tokens = token_re.findall(expression_string) tokens = [Token(token) for token in tokens] return tokens def _handle_and_op(self): expr = self._handle_negation() next_token = self._next_token_is([Token.And]) while next_token: right = self._handle_negation() if next_token.kind == Token.And: expr = ExpressionAnd(next_token, expr, right) next_token = self._next_token_is([Token.And]) return expr def _handle_or_op(self): expr = self._handle_and_op() next_token = self._next_token_is([Token.Or]) while next_token: right = self._handle_and_op() if next_token.kind == Token.Or: expr = ExpressionOr(next_token, expr, right) next_token = self._next_token_is([Token.Or]) return expr def _handle_negation(self): next_token = self._next_token_is([Token.LogicalNegation]) if next_token == Token.LogicalNegation: interior = self._handle_grouping_op() if "?" in str(interior): raise ValueError("Cannot negate wildcards, or expressions that contain wildcards." "Use {required_expression : optional_expression}.") expr = ExpressionNegation(next_token, right=interior) return expr else: return self._handle_grouping_op() def _handle_grouping_op(self): next_token = self._next_token_is( [Token.LogicalGroup, Token.DescendantGroup, Token.ExactMatch]) if next_token == Token.LogicalGroup: expr = self._handle_or_op() next_token = self._next_token_is([Token.LogicalGroupEnd]) if next_token != Token.LogicalGroupEnd: raise ValueError("Parse error: Missing closing paren") elif next_token == Token.DescendantGroup: interior = self._handle_or_op() expr = ExpressionDescendantGroup(next_token, right=interior) next_token = self._next_token_is([Token.DescendantGroupEnd]) if next_token != Token.DescendantGroupEnd: raise ValueError("Parse error: Missing closing square bracket") elif next_token == Token.ExactMatch: interior = self._handle_or_op() expr = ExpressionExactMatch(next_token, right=interior) next_token = self._next_token_is([Token.ExactMatchEnd, Token.ExactMatchOptional]) if next_token == Token.ExactMatchOptional: # We have an optional portion - this needs to now be an exact match expr.optional = "none" next_token = self._next_token_is([Token.ExactMatchEnd]) if next_token != Token.ExactMatchEnd: optional_portion = self._handle_or_op() expr.left = optional_portion next_token = self._next_token_is([Token.ExactMatchEnd]) if "~" in str(expr): raise ValueError("Cannot use negation in exact matching groups," " as it's not clear what is being matched.\n" "{thing and ~(expression)} is allowed.") if next_token is None: raise ValueError("Parse error: Missing closing curly bracket") else: next_token = self._get_next_token() if next_token and next_token.kind == Token.Wildcard: expr = ExpressionWildcardNew(next_token) elif next_token: expr = Expression(next_token) else: expr = None return expr