Commits

Olemis Lang committed e1cdac3

GViz QL : Operator precedence table and productions lookup tree generated from grammar productions

Comments (0)

Files changed (3)

trac-dev/gviz/tracgviz/grammar.py

 
 # Grammar
 # =======
-#
-#  Default start state BOOL_EXPR
-#
+
+GVIZ_START_STATE = 'GVIZ_SQL'
+
+_And        = (Operator.Word.Boolean, 'and')
+_Or         = (Operator.Word.Boolean, 'or')
+_Not        = (Operator.Word.Boolean, 'not')
+_Number     = (Number, Any)
+_Var        = (Name.Variable, Any)
+_Str        = (String, Any)
+_Date       = (Literal.Date, Any)
+_Const      = (Name.Constant, Any)
+_Builtin    = (Name.Builtin, Any)
+_Function   = (Name.Function, Any)
+_Comma      = (Punctuation, ',')
+_OpenP      = (Punctuation, '(')
+_CloseP     = (Punctuation, ')')
+_Sum        = (Operator.Arithmetic, '+')
+_Multiply   = (Operator.Arithmetic, '*')
+_Subtract   = (Operator.Arithmetic, '-')
+_Divide     = (Operator.Arithmetic, '/')
+_BoolOp     = (Operator.Comparison, Any)
+_BoolWordOp = (Operator.Word.Comparison, Any)
+_EndAE      = (EndMarker, 'ARITHMETIC_EXPR')
+_EndBE      = (EndMarker, 'BOOL_EXPR')
+_EndSuite   = (EndMarker, 'SUITE')
+
+GVIZ_PRODUCTIONS = [
+  ['', '', [  ] ],
+  ['', '', [  ] ],
+  ['', '', [  ] ],
+  ['', '', [  ] ],
+  ['', '', [  ] ],
+  ['', '', [  ] ],
+  ['', '', [  ] ],
+  ['', '', [  ] ],
+  ['', '', [  ] ],
+  ['', '', [  ] ],
+  ['', '', [  ] ],
+  ['', '', [  ] ],
+  ['', '', [  ] ],
+  ['', '', [  ] ],
+  ['', '', [  ] ],
+  ['', '', [  ] ],
+  ['', '', [  ] ],
+  ['', '', [  ] ],
+  ['', '', [  ] ],
+  ['', '', [  ] ],
+  ['', '', [  ] ],
+  ['', '', [  ] ],
+  ['', '', [  ] ],
+  ['', '', [  ] ],
+  ['', '', [  ] ],
+  ['', '', [  ] ],
+  ['', '', [  ] ],
+  ['', '', [  ] ],
+  ['', '', [  ] ],
+  ['', '', [  ] ],
+  ['', '', [  ] ],
+  ['', '', [  ] ],
+  ['', '', [  ] ],
+  ['', '', [  ] ],
+  ['', '', [  ] ],
+  ['', '', [  ] ],
+  ['', '', [  ] ],
+  ['', '', [  ] ],
+  ['', '', [  ] ],
+  ['', '', [  ] ],
+  ['', '', [  ] ],
+  ['', '', [  ] ],
+  ['', '', [  ] ],
+  ],
+
 #  1.   VALUE -> Number
 #  2.   VALUE -> Variable
 #  3.   VALUE -> String
 #  22.  OR_EXPR -> OR_EXPR or BOOL_VALUE
 #  22a. OR_EXPR -> BOOL_VALUE
 
-_And        = (Operator.Word.Boolean, 'and')
-_Or         = (Operator.Word.Boolean, 'or')
-_Not        = (Operator.Word.Boolean, 'not')
-_Number     = (Number, Any)
-_Var        = (Name.Variable, Any)
-_Str        = (String, Any)
-_Date       = (Literal.Date, Any)
-_Const      = (Name.Constant, Any)
-_Builtin    = (Name.Builtin, Any)
-_Function   = (Name.Function, Any)
-_Comma      = (Punctuation, ',')
-_OpenP      = (Punctuation, '(')
-_CloseP     = (Punctuation, ')')
-_Sum        = (Operator.Arithmetic, '+')
-_Multiply   = (Operator.Arithmetic, '*')
-_Subtract   = (Operator.Arithmetic, '-')
-_Divide     = (Operator.Arithmetic, '/')
-_BoolOp     = (Operator.Comparison, Any)
-_BoolWordOp = (Operator.Word.Comparison, Any)
-_EndAE      = (EndMarker, 'ARITHMETIC_EXPR')
-_EndBE      = (EndMarker, 'BOOL_EXPR')
-_EndSuite   = (EndMarker, 'SUITE')
-
 GVIZ_GRAMMAR_PRECEDENCE = {
     CloseP : {
         Parser.MorePrecedence : [Multiply, Add, CloseP, EndE],

trac-dev/gviz/tracgviz/testing/test_parsing.py

 #  5. F -> ( E )
 #  6. F -> a
 
+E         = (NonTerminal, 'E')
+T         = (NonTerminal, 'T')
+F         = (NonTerminal, 'F')
 Multiply  = (Operator, '*')
 Add       = (Operator, '+')
 OpenP     = (Punctuation, '(')
 EndE      = (EndMarker, 'E')
 Var       = (Name, Any)
 
+SAMPLE_GRAMMAR = [
+    ('1', 'E', [E, Add, T]),
+    ('2', 'E', [T]),
+    ('3', 'T', [T, Multiply, F]),
+    ('4', 'T', [F]),
+    ('5', 'F', [OpenP, E, CloseP]),
+    ('6', 'F', [Var]),
+  ]
+
 SAMPLE_GRAMMAR_PRECEDENCE = {
     CloseP : {
         Parser.MorePrecedence : [Multiply, Add, CloseP, EndE],
   'Operator precedence grammar: Simple example' : r"""
       >>> from tracgviz.util.parsing import OperatorPrecedenceParser
       >>> p = OperatorPrecedenceParser()
+
+      It's possible to set productions lookup tree and precedence directly
+
       >>> p.productions_tree = SAMPLE_GRAMMAR_PRODUCTIONS
       >>> p.precedence = SAMPLE_GRAMMAR_PRECEDENCE
       >>> p.start_state = 'E'
       >>> p.parse(stream, store_parse_order)
       >>> ' '.join(parse_order)
       '6 6 1 5 6 3'
+
+      However most of the time it will be easier to set productions
+      and let the parser build productions lookup tree and precedence
+
+      >>> from pprint import pprint
+
+      >>> p.set_productions('E', *SAMPLE_GRAMMAR)
+      >>> for tkn1 in [CloseP, Var, Multiply, Add, OpenP, EndE]:
+      ...   for tkn2 in [OpenP, Var, Multiply, Add, CloseP, EndE]:
+      ...     try:
+      ...       prec = p.precedence[tkn1, tkn2]
+      ...     except KeyError:
+      ...       pass
+      ...     else:
+      ...       print tkn1, \
+      ...           '<' if prec in Parser.LessPrecedence \
+      ...               else ('>' if prec in Parser.MorePrecedence \
+      ...               else ('=' if prec in Parser.SamePrecedence
+      ...               else prec)), tkn2
+      ... 
+      (Token.Punctuation, ')') > (Token.Operator, '*')
+      (Token.Punctuation, ')') > (Token.Operator, '+')
+      (Token.Punctuation, ')') > (Token.Punctuation, ')')
+      (Token.Punctuation, ')') > (Token.Grammar.EndMarker, 'E')
+      (Token.Name, Token.Grammar.Any) > (Token.Operator, '*')
+      (Token.Name, Token.Grammar.Any) > (Token.Operator, '+')
+      (Token.Name, Token.Grammar.Any) > (Token.Punctuation, ')')
+      (Token.Name, Token.Grammar.Any) > (Token.Grammar.EndMarker, 'E')
+      (Token.Operator, '*') < (Token.Punctuation, '(')
+      (Token.Operator, '*') < (Token.Name, Token.Grammar.Any)
+      (Token.Operator, '*') > (Token.Operator, '*')
+      (Token.Operator, '*') > (Token.Operator, '+')
+      (Token.Operator, '*') > (Token.Punctuation, ')')
+      (Token.Operator, '*') > (Token.Grammar.EndMarker, 'E')
+      (Token.Operator, '+') < (Token.Punctuation, '(')
+      (Token.Operator, '+') < (Token.Name, Token.Grammar.Any)
+      (Token.Operator, '+') < (Token.Operator, '*')
+      (Token.Operator, '+') > (Token.Operator, '+')
+      (Token.Operator, '+') > (Token.Punctuation, ')')
+      (Token.Operator, '+') > (Token.Grammar.EndMarker, 'E')
+      (Token.Punctuation, '(') < (Token.Punctuation, '(')
+      (Token.Punctuation, '(') < (Token.Name, Token.Grammar.Any)
+      (Token.Punctuation, '(') < (Token.Operator, '*')
+      (Token.Punctuation, '(') < (Token.Operator, '+')
+      (Token.Punctuation, '(') = (Token.Punctuation, ')')
+      (Token.Grammar.EndMarker, 'E') < (Token.Punctuation, '(')
+      (Token.Grammar.EndMarker, 'E') < (Token.Name, Token.Grammar.Any)
+      (Token.Grammar.EndMarker, 'E') < (Token.Operator, '*')
+      (Token.Grammar.EndMarker, 'E') < (Token.Operator, '+')
+
+      >>> pprint(p.productions_tree)
+      {(Token.Grammar.NonTerminal, Token.Grammar.Any): {(Token.Operator, '*'): {(Token.Grammar.NonTerminal, Token.Grammar.Any): {Token.Grammar.EndMarker: '3'}},
+                                                        (Token.Operator, '+'): {(Token.Grammar.NonTerminal, Token.Grammar.Any): {Token.Grammar.EndMarker: '1'}}},
+       (Token.Name, Token.Grammar.Any): {Token.Grammar.EndMarker: '6'},
+       (Token.Punctuation, ')'): {(Token.Grammar.NonTerminal, Token.Grammar.Any): {(Token.Punctuation, '('): {Token.Grammar.EndMarker: '5'}}}}
+
+      >>> stream = iter(SAMPLE_INPUT_STREAM)
+
+      >>> parse_order = []
+
+      >>> p.parse(stream, store_parse_order)
+      >>> ' '.join(parse_order)
+      '6 6 1 5 6 3'
+
       """,
   }
 

trac-dev/gviz/tracgviz/util/parsing.py

     'NonTerminal'
 __metaclass__ = type
 
-from itertools import ifilter
+from itertools import chain, ifilter
 import logging
 from pygments.token import *
 
   def __init__(self):
     r"""Initialize with empty precendence table and skeletal grammar.
     """
+    self.set_productions()
+
+  def set_productions(self, start_state=None, *productions):
+    r"""Rebuild precedence and production look up tree according to 
+    a new set of grammar productions. All previous state is discarded.
+    """
     self.start_state = None
+    self.nfirst = {}
+    self.nlast = {}
+    self.pre = {}
+    self.post = {}
     self.precedence = {}
     self.productions_tree = {}
 
+    nfirst = {}
+    nlast = {}
+    pre = {}
+    post = {}
+    precedence = {}
+    productions_tree = {}
+
+    if productions:
+
+      def _update(state, nt_rel, t_rel, terminal=None, nt=None):
+        r"""Incremental update of relationship adding termminals
+        """
+        logging.debug('_update: state=%s terminal=%s nt=%s', state, terminal, nt)
+        new_terminals = set([terminal]) if terminal is not None else set()
+        # Add terminal to PRE / POST
+        if nt is not None and nt != state:
+          if nt in nt_rel.setdefault(state, set()):
+            # Dependency loop
+            raise InvalidProduction('Failed to establish precedence (loop)')
+          # Add non-terminal to NFIRST / NLAST
+          nt_rel.setdefault(nt, set()).add(state)
+          # Propagate terminals related to non-terminal as well
+          new_terminals.update(t_rel.setdefault(nt, set()))
+        if new_terminals:
+          # Propagate changes
+          pend = list( nt_rel.setdefault(state, set()) ) + [state] 
+          already = set()
+          while pend:
+            s = pend.pop()
+            if s not in already:
+              t_rel.setdefault(s, set()).update(new_terminals)
+              pend += list( nt_rel.setdefault(s, set()) )
+            already.add(s)
+
+      for prod_id, state, ps in productions:
+        if not ps:
+          raise InvalidProduction(self, 'Empty productions not allowed')
+
+        # Update skeletal production path
+        choices = productions_tree
+        for subtkn, subval in reversed(ps):
+          if len(ps) > 1 or subtkn not in NonTerminal:
+            choices = choices.setdefault(
+                (subtkn, Any if subtkn in NonTerminal else subval), {})
+        else:
+          choices[EndMarker] = prod_id
+
+        # Populate NFIRST, NLAST, PRE, POST
+        subtkn, subval = ps[0]
+        if len(ps) == 1:
+          if subtkn in NonTerminal:
+            if state == subval:
+              raise InvalidProduction(self, 'Infinite recursion state=' + state)
+            else:
+              # PRE and POST for subval should be propagated back to state
+              _update(state, nfirst, pre, nt=subval)
+              _update(state, nlast, post, nt=subval)
+          else:
+            _update(state, nfirst, pre, terminal=(subtkn, subval))
+            _update(state, nlast, post, terminal=(subtkn, subval))
+        else:
+          # Update PRE (NFIRST)
+          if subtkn in NonTerminal:
+            _update(state, nfirst, pre, terminal=ps[1], nt=subval)
+          else:
+            _update(state, nfirst, pre, terminal=(subtkn, subval))
+          # Update POST (NLAST)
+          subtkn, subval = ps[-1]
+          if subtkn in NonTerminal:
+            _update(state, nlast, post, terminal=ps[-2], nt=subval)
+          else:
+            _update(state, nlast, post, terminal=(subtkn, subval))
+
+      # Build precedence table and productions lookup tree
+      for prod_id, state, ps in productions:
+        prev_t = prev_nt = None
+        is_last_nt = False
+        for subtkn, subval in ps:
+          if subtkn in NonTerminal:
+            if is_last_nt:
+              raise InvalidProduction("Consecutive non-terminals in production")
+            else:
+              if prev_t:
+                # Terminals in PRE have less precedence
+                for terminal in pre.setdefault(subval, set()):
+                  key = (prev_t, terminal)
+                  if precedence.get(key) not in (None, self.LessPrecedence):
+                    raise InvalidParserConfiguration(
+                        "Failed to establish precedence")
+                  precedence[key] = self.LessPrecedence
+            prev_nt = subval
+          else:
+            if prev_t:
+              # Adjacent terminals have the same precedence
+              key = (prev_t, (subtkn, subval))
+              if precedence.get(key) not in (None, self.SamePrecedence):
+                raise InvalidParserConfiguration(
+                    "Failed to establish precedence")
+              precedence[key] = self.SamePrecedence
+            if prev_nt:
+              # Terminals in previous non-terminal's POST have more precedence
+              for terminal in post.setdefault(prev_nt, set()):
+                key = (terminal, (subtkn, subval))
+                if precedence.get(key) not in (None, self.MorePrecedence):
+                  raise InvalidParserConfiguration(
+                      "Failed to establish precedence")
+                precedence[key] = self.MorePrecedence
+            prev_t = (subtkn, subval)
+            # Non-terminal has to be adjacent to terminal
+            prev_nt = None
+          is_last_nt = subtkn in NonTerminal
+
+      # Remove end marker from productions tree root , just in case ...
+      try:
+        del productions_tree[EndMarker]
+      except KeyError:
+        pass
+
+    # End-markers precedence
+    for nt, nt_pre in pre.iteritems():
+      for terminal in nt_pre:
+        precedence[((EndMarker, nt), terminal)] = self.LessPrecedence
+    for nt, nt_post in post.iteritems():
+      for terminal in nt_post:
+        precedence[(terminal, (EndMarker, nt))] = self.MorePrecedence
+
+    # Everything ok , assign instance methods now
+    self.start_state = start_state
+    self.nfirst = nfirst
+    self.nlast = nlast
+    self.pre = pre
+    self.post = post
+    self.precedence = precedence
+    self.productions_tree = productions_tree
+
   def parse(self, stream, on_reduce, start_state=None):
     r"""Parse a token `stream` of tokens.
 
   r"""Wrong parser configuration detected.
   """
 
+class InvalidProduction(InvalidParserConfiguration):
+  r"""Grammar production is either malformed or breaks a pre-condition 
+  for using target parser.
+  """
+
 #------------------------------------------------------
 #   Global Testing
 #------------------------------------------------------
 
 from tracgviz.testing.test_parsing import __test__, SAMPLE_GRAMMAR_PRECEDENCE, \
-    SAMPLE_GRAMMAR_PRODUCTIONS, SAMPLE_INPUT_STREAM
+    SAMPLE_GRAMMAR_PRODUCTIONS, SAMPLE_INPUT_STREAM, SAMPLE_GRAMMAR, Parser, \
+    CloseP, Var, Multiply, Add, OpenP, EndE
 
 def test_suite():
   from doctest import ELLIPSIS, NORMALIZE_WHITESPACE, REPORT_UDIFF