Georg Brandl avatar Georg Brandl committed 03f24d0

Some speedups in pytree.
Add Cython parse.py replacement, yielding a 2x speedup in parsing.

Comments (0)

Files changed (2)

sphinx/pycode/pgen2/parse.pyx

+# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+"""Parser engine for the grammar tables generated by pgen.
+
+The grammar table must be loaded first.
+
+See Parser/parser.c in the Python distribution for additional info on
+how this parsing engine works.
+
+"""
+
+from sphinx.pycode.pytree import Node, Leaf
+
+DEF NAME = 1
+
+class ParseError(Exception):
+    """Exception to signal the parser is stuck."""
+
+    def __init__(self, msg, type, value, context):
+        Exception.__init__(self, "%s: type=%r, value=%r, context=%r" %
+                           (msg, type, value, context))
+        self.msg = msg
+        self.type = type
+        self.value = value
+        self.context = context
+
+
+cdef class Parser:
+    cdef public grammar, stack, rootnode, used_names
+    cdef _grammar_dfas, _grammar_labels, _grammar_keywords, _grammar_tokens
+    cdef _grammar_number2symbol
+
+    def __init__(self, grammar, convert=None):
+        self.grammar = grammar
+        #self.convert = convert or noconvert
+
+        self._grammar_dfas = grammar.dfas
+        self._grammar_labels = grammar.labels
+        self._grammar_keywords = grammar.keywords
+        self._grammar_tokens = grammar.tokens
+        self._grammar_number2symbol = grammar.number2symbol
+
+    def setup(self, start=None):
+        if start is None:
+            start = self.grammar.start
+        # Each stack entry is a tuple: (dfa, state, node).
+        # A node is a tuple: (type, value, context, children),
+        # where children is a list of nodes or None, and context may be None.
+        newnode = (start, None, None, [])
+        stackentry = (self._grammar_dfas[start], 0, newnode)
+        self.stack = [stackentry]
+        self.rootnode = None
+        self.used_names = set() # Aliased to self.rootnode.used_names in pop()
+
+    def addtoken(self, type, value, context):
+        """Add a token; return True iff this is the end of the program."""
+        cdef int ilabel, i, t, state, newstate
+        # Map from token to label
+        ilabel = self.classify(type, value, context)
+        # Loop until the token is shifted; may raise exceptions
+        while True:
+            dfa, state, node = self.stack[-1]
+            states, first = dfa
+            arcs = states[state]
+            # Look for a state with this label
+            for i, newstate in arcs:
+                t, v = self._grammar_labels[i]
+                if ilabel == i:
+                    # Look it up in the list of labels
+                    ## assert t < 256
+                    # Shift a token; we're done with it
+                    self.shift(type, value, newstate, context)
+                    # Pop while we are in an accept-only state
+                    state = newstate
+                    while states[state] == [(0, state)]:
+                        self.pop()
+                        if not self.stack:
+                            # Done parsing!
+                            return True
+                        dfa, state, node = self.stack[-1]
+                        states, first = dfa
+                    # Done with this token
+                    return False
+                elif t >= 256:
+                    # See if it's a symbol and if we're in its first set
+                    itsdfa = self._grammar_dfas[t]
+                    itsstates, itsfirst = itsdfa
+                    if ilabel in itsfirst:
+                        # Push a symbol
+                        self.push(t, itsdfa, newstate, context)
+                        break # To continue the outer while loop
+            else:
+                if (0, state) in arcs:
+                    # An accepting state, pop it and try something else
+                    self.pop()
+                    if not self.stack:
+                        # Done parsing, but another token is input
+                        raise ParseError("too much input",
+                                         type, value, context)
+                else:
+                    # No success finding a transition
+                    raise ParseError("bad input", type, value, context)
+
+    cdef int classify(self, type, value, context):
+        """Turn a token into a label.  (Internal)"""
+        if type == NAME:
+            # Keep a listing of all used names
+            self.used_names.add(value)
+            # Check for reserved words
+            ilabel = self._grammar_keywords.get(value)
+            if ilabel is not None:
+                return ilabel
+        ilabel = self._grammar_tokens.get(type)
+        if ilabel is None:
+            raise ParseError("bad token", type, value, context)
+        return ilabel
+
+    cdef void shift(self, type, value, newstate, context):
+        """Shift a token.  (Internal)"""
+        dfa, state, node = self.stack[-1]
+        newnode = (type, value, context, None)
+        newnode = self.convert(newnode)
+        if newnode is not None:
+            node[-1].append(newnode)
+        self.stack[-1] = (dfa, newstate, node)
+
+    cdef void push(self, type, newdfa, newstate, context):
+        """Push a nonterminal.  (Internal)"""
+        dfa, state, node = self.stack[-1]
+        newnode = (type, None, context, [])
+        self.stack[-1] = (dfa, newstate, node)
+        self.stack.append((newdfa, 0, newnode))
+
+    cdef void pop(self):
+        """Pop a nonterminal.  (Internal)"""
+        popdfa, popstate, popnode = self.stack.pop()
+        newnode = self.convert(popnode)
+        if newnode is not None:
+            if self.stack:
+                dfa, state, node = self.stack[-1]
+                node[-1].append(newnode)
+            else:
+                self.rootnode = newnode
+                self.rootnode.used_names = self.used_names
+
+    cdef convert(self, raw_node):
+        type, value, context, children = raw_node
+        if children or type in self._grammar_number2symbol:
+            # If there's exactly one child, return that child instead of
+            # creating a new node.
+            if len(children) == 1:
+                return children[0]
+            return Node(type, children, context=context)
+        else:
+            return Leaf(type, value, context=context)

sphinx/pycode/pytree.py

     children = ()  # Tuple of subnodes
     was_changed = False
 
-    def __new__(cls, *args, **kwds):
-        """Constructor that prevents Base from being instantiated."""
-        assert cls is not Base, "Cannot instantiate Base"
-        return object.__new__(cls)
-
     def __eq__(self, other):
         """Compares two nodes for equality.
 
 
     """Concrete implementation for interior nodes."""
 
-    def __init__(self, type, children, context=None, prefix=None):
+    def __init__(self, type, children, context=None):
         """Initializer.
 
         Takes a type constant (a symbol number >= 256), a sequence of
 
         As a side effect, the parent pointers of the children are updated.
         """
-        assert type >= 256, type
+        # assert type >= 256, type
         self.type = type
         self.children = list(children)
         for ch in self.children:
-            assert ch.parent is None, repr(ch)
+            # assert ch.parent is None, repr(ch)
             ch.parent = self
-        if prefix is not None:
-            self.set_prefix(prefix)
+        # if prefix is not None:
+        #     self.set_prefix(prefix)
 
     def __repr__(self):
         return "%s(%s, %r)" % (self.__class__.__name__,
     lineno = 0   # Line where this token starts in the input
     column = 0   # Column where this token tarts in the input
 
-    def __init__(self, type, value, context=None, prefix=None):
+    def __init__(self, type, value, context=None):
         """Initializer.
 
         Takes a type constant (a token number < 256), a string value,
         and an optional context keyword argument.
         """
-        assert 0 <= type < 256, type
+        # assert 0 <= type < 256, type
         if context is not None:
             self.prefix, (self.lineno, self.column) = context
         self.type = type
         self.value = value
-        if prefix is not None:
-            self.prefix = prefix
+        # if prefix is not None:
+        #     self.prefix = prefix
 
     def __repr__(self):
         return "%s(%r, %r, %r)" % (self.__class__.__name__,
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.