Source

trac-gviz / trac-dev / gviz / tracgviz / util / parsing.py

Diff from to

File trac-dev/gviz/tracgviz/util/parsing.py

     'NonTerminal'
 __metaclass__ = type
 
-from itertools import ifilter
+from itertools import chain, ifilter
 import logging
 from pygments.token import *
 
   def __init__(self):
     r"""Initialize with empty precendence table and skeletal grammar.
     """
+    self.set_productions()
+
+  def set_productions(self, start_state=None, *productions):
+    r"""Rebuild precedence and production look up tree according to 
+    a new set of grammar productions. All previous state is discarded.
+    """
     self.start_state = None
+    self.nfirst = {}
+    self.nlast = {}
+    self.pre = {}
+    self.post = {}
     self.precedence = {}
     self.productions_tree = {}
 
+    nfirst = {}
+    nlast = {}
+    pre = {}
+    post = {}
+    precedence = {}
+    productions_tree = {}
+
+    if productions:
+
+      def _update(state, nt_rel, t_rel, terminal=None, nt=None):
+        r"""Incremental update of relationship adding termminals
+        """
+        logging.debug('_update: state=%s terminal=%s nt=%s', state, terminal, nt)
+        new_terminals = set([terminal]) if terminal is not None else set()
+        # Add terminal to PRE / POST
+        if nt is not None and nt != state:
+          if nt in nt_rel.setdefault(state, set()):
+            # Dependency loop
+            raise InvalidProduction('Failed to establish precedence (loop)')
+          # Add non-terminal to NFIRST / NLAST
+          nt_rel.setdefault(nt, set()).add(state)
+          # Propagate terminals related to non-terminal as well
+          new_terminals.update(t_rel.setdefault(nt, set()))
+        if new_terminals:
+          # Propagate changes
+          pend = list( nt_rel.setdefault(state, set()) ) + [state] 
+          already = set()
+          while pend:
+            s = pend.pop()
+            if s not in already:
+              t_rel.setdefault(s, set()).update(new_terminals)
+              pend += list( nt_rel.setdefault(s, set()) )
+            already.add(s)
+
+      for prod_id, state, ps in productions:
+        if not ps:
+          raise InvalidProduction(self, 'Empty productions not allowed')
+
+        # Update skeletal production path
+        choices = productions_tree
+        for subtkn, subval in reversed(ps):
+          if len(ps) > 1 or subtkn not in NonTerminal:
+            choices = choices.setdefault(
+                (subtkn, Any if subtkn in NonTerminal else subval), {})
+        else:
+          choices[EndMarker] = prod_id
+
+        # Populate NFIRST, NLAST, PRE, POST
+        subtkn, subval = ps[0]
+        if len(ps) == 1:
+          if subtkn in NonTerminal:
+            if state == subval:
+              raise InvalidProduction(self, 'Infinite recursion state=' + state)
+            else:
+              # PRE and POST for subval should be propagated back to state
+              _update(state, nfirst, pre, nt=subval)
+              _update(state, nlast, post, nt=subval)
+          else:
+            _update(state, nfirst, pre, terminal=(subtkn, subval))
+            _update(state, nlast, post, terminal=(subtkn, subval))
+        else:
+          # Update PRE (NFIRST)
+          if subtkn in NonTerminal:
+            _update(state, nfirst, pre, terminal=ps[1], nt=subval)
+          else:
+            _update(state, nfirst, pre, terminal=(subtkn, subval))
+          # Update POST (NLAST)
+          subtkn, subval = ps[-1]
+          if subtkn in NonTerminal:
+            _update(state, nlast, post, terminal=ps[-2], nt=subval)
+          else:
+            _update(state, nlast, post, terminal=(subtkn, subval))
+
+      # Build precedence table and productions lookup tree
+      for prod_id, state, ps in productions:
+        prev_t = prev_nt = None
+        is_last_nt = False
+        for subtkn, subval in ps:
+          if subtkn in NonTerminal:
+            if is_last_nt:
+              raise InvalidProduction("Consecutive non-terminals in production")
+            else:
+              if prev_t:
+                # Terminals in PRE have less precedence
+                for terminal in pre.setdefault(subval, set()):
+                  key = (prev_t, terminal)
+                  if precedence.get(key) not in (None, self.LessPrecedence):
+                    raise InvalidParserConfiguration(
+                        "Failed to establish precedence")
+                  precedence[key] = self.LessPrecedence
+            prev_nt = subval
+          else:
+            if prev_t:
+              # Adjacent terminals have the same precedence
+              key = (prev_t, (subtkn, subval))
+              if precedence.get(key) not in (None, self.SamePrecedence):
+                raise InvalidParserConfiguration(
+                    "Failed to establish precedence")
+              precedence[key] = self.SamePrecedence
+            if prev_nt:
+              # Terminals in previous non-terminal's POST have more precedence
+              for terminal in post.setdefault(prev_nt, set()):
+                key = (terminal, (subtkn, subval))
+                if precedence.get(key) not in (None, self.MorePrecedence):
+                  raise InvalidParserConfiguration(
+                      "Failed to establish precedence")
+                precedence[key] = self.MorePrecedence
+            prev_t = (subtkn, subval)
+            # Non-terminal has to be adjacent to terminal
+            prev_nt = None
+          is_last_nt = subtkn in NonTerminal
+
+      # Remove end marker from productions tree root , just in case ...
+      try:
+        del productions_tree[EndMarker]
+      except KeyError:
+        pass
+
+    # End-markers precedence
+    for nt, nt_pre in pre.iteritems():
+      for terminal in nt_pre:
+        precedence[((EndMarker, nt), terminal)] = self.LessPrecedence
+    for nt, nt_post in post.iteritems():
+      for terminal in nt_post:
+        precedence[(terminal, (EndMarker, nt))] = self.MorePrecedence
+
+    # Everything ok , assign instance methods now
+    self.start_state = start_state
+    self.nfirst = nfirst
+    self.nlast = nlast
+    self.pre = pre
+    self.post = post
+    self.precedence = precedence
+    self.productions_tree = productions_tree
+
   def parse(self, stream, on_reduce, start_state=None):
     r"""Parse a token `stream` of tokens.
 
   r"""Wrong parser configuration detected.
   """
 
+class InvalidProduction(InvalidParserConfiguration):
+  r"""Grammar production is either malformed or breaks a pre-condition 
+  for using target parser.
+  """
+
 #------------------------------------------------------
 #   Global Testing
 #------------------------------------------------------
 
 from tracgviz.testing.test_parsing import __test__, SAMPLE_GRAMMAR_PRECEDENCE, \
-    SAMPLE_GRAMMAR_PRODUCTIONS, SAMPLE_INPUT_STREAM
+    SAMPLE_GRAMMAR_PRODUCTIONS, SAMPLE_INPUT_STREAM, SAMPLE_GRAMMAR, Parser, \
+    CloseP, Var, Multiply, Add, OpenP, EndE
 
 def test_suite():
   from doctest import ELLIPSIS, NORMALIZE_WHITESPACE, REPORT_UDIFF