Commits

Daniel Poelzleithner  committed 3e7f910

initial commit of the antlr plugin

  • Participants

Comments (0)

Files changed (44)

File antlr/_grammars.py

+
+# !!! AUTOGENERATED. DO NOT ALTER !!!
+TYPES = {'ecma3': ('ECMAScript', ('JavaScript', 'javascript'), ('*.js', '*.as'), ('text/x-javascript',)), 'python': ('Python', (), (), ('text/x-python',)), 'c': ('C', (), ('*.c', '*.h'), ('text/x-c',)), 'lua': ('Lua', (), ('*.lua',), ('text/x-lua',))}
+MAPPING = {'ecma3': 'ES3', 'python': 'Python', 'c': 'C', 'lua': 'Lua'}

File antlr/antlr.py

+# -*- coding: utf-8 -*- 
+
+# Copyright (c) 2008 The PIDA Project
+
+#Permission is hereby granted, free of charge, to any person obtaining a copy
+#of this software and associated documentation files (the "Software"), to deal
+#in the Software without restriction, including without limitation the rights
+#to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+#copies of the Software, and to permit persons to whom the Software is
+#furnished to do so, subject to the following conditions:
+
+#The above copyright notice and this permission notice shall be included in
+#all copies or substantial portions of the Software.
+
+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+#IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+#FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+#AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+#LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+#OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+#SOFTWARE.
+
+# stdlib
+
+import sys
+import os
+
+# adjust the import path as antrl3 imports itself absolut :(
+P = os.path.dirname(os.path.abspath(__file__))
+if P not in sys.path:
+    sys.path.append(P)
+
+# core
+from pida.core.service import Service
+from pida.core.events import EventsConfig
+from pida.core.actions import ActionsConfig, TYPE_NORMAL, TYPE_TOGGLE
+from pida.core.options import OptionsConfig
+from pida.core.features import FeaturesConfig
+
+from pida.core.languages import Validator, Outliner
+
+import antlr3
+
+import _grammars
+
+class AntlOutliner(Outliner):
+    def parse(self):
+        print self.document
+        
+
+
+class AntlrFeatures(FeaturesConfig):
+
+    def subscribe_all_foreign(self):
+        self.subscribe_foreign('filemanager', 'file_hidden_check',
+            self.python)
+
+class AntlrOptionsConfig(OptionsConfig):
+
+    def create_options(self):
+        self.create_option(
+            'python_for_executing',
+            _('Python Executable for executing'),
+            str,
+            'python',
+            _('The Python executable when executing a module'),
+        )
+
+
+class AntlrEventsConfig(EventsConfig):
+
+    def subscribe_all_foreign(self):
+        self.subscribe_foreign('buffer', 'document-changed', self.on_document_changed)
+        self.subscribe_foreign('buffer', 'document-saved', self.on_document_changed)
+
+    def on_document_changed(self, document):
+        pass
+
+class AntlrActionsConfig(ActionsConfig):
+    
+    def create_actions(self):
+        self.create_action(
+            'execute_python',
+            TYPE_NORMAL,
+            _('Execute Python Module'),
+            _('Execute the current Python module in a shell'),
+            gtk.STOCK_EXECUTE,
+            self.on_python_execute,
+        )
+
+        self.create_action(
+            'show_python_errors',
+            TYPE_TOGGLE,
+            _('Python Error Viewer'),
+            _('Show the python error browser'),
+            'error',
+            self.on_show_errors,
+        )
+
+        self.create_action(
+            'show_python_source',
+            TYPE_TOGGLE,
+            _('Python Source Viewer'),
+            _('Show the python source browser'),
+            'info',
+            self.on_show_source,
+        )
+
+    def on_python_execute(self, action):
+        self.svc.execute_current_document()
+
+    def on_show_errors(self, action):
+        if action.get_active():
+            self.svc.show_errors()
+        else:
+            self.svc.hide_errors()
+
+    def on_show_source(self, action):
+        if action.get_active():
+            self.svc.show_source()
+        else:
+            self.svc.hide_source()
+
+
+# Service class
+class Antlr(Service):
+    """Service for all things Python""" 
+    
+    #events_config = PythonEventsConfig
+    #actions_config = PythonActionsConfig
+    #options_config = PythonOptionsConfig
+    #features_config = PythonFeatures
+
+    def pre_start(self):
+        """Start the service"""
+        mgr = self.boss.get_service('language').doctypes
+        import _grammars
+        # add new types if not already added
+        mgr.add_map(_grammars.TYPES)
+
+
+# Required Service attribute for service loading
+Service = Antlr
+
+
+
+# vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79:

File antlr/antlr3/__init__.py

+""" @package antlr3
+@brief ANTLR3 runtime package
+
+This module contains all support classes, which are needed to use recognizers
+generated by ANTLR3.
+
+@mainpage
+
+\note Please be warned that the line numbers in the API documentation do not
+match the real locations in the source code of the package. This is an
+unintended artifact of doxygen, which I could only convince to use the
+correct module names by concatenating all files from the package into a single
+module file...
+
+Here is a little overview over the most commonly used classes provided by
+this runtime:
+
+@section recognizers Recognizers
+
+These recognizers are baseclasses for the code which is generated by ANTLR3.
+
+- BaseRecognizer: Base class with common recognizer functionality.
+- Lexer: Base class for lexers.
+- Parser: Base class for parsers.
+- tree.TreeParser: Base class for %tree parser.
+
+@section streams Streams
+
+Each recognizer pulls its input from one of the stream classes below. Streams
+handle stuff like buffering, look-ahead and seeking.
+
+A character stream is usually the first element in the pipeline of a typical
+ANTLR3 application. It is used as the input for a Lexer.
+
+- ANTLRStringStream: Reads from a string objects. The input should be a unicode
+  object, or ANTLR3 will have trouble decoding non-ascii data.
+- ANTLRFileStream: Opens a file and read the contents, with optional character
+  decoding.
+- ANTLRInputStream: Reads the date from a file-like object, with optional
+  character decoding.
+
+A Parser needs a TokenStream as input (which in turn is usually fed by a
+Lexer):
+
+- CommonTokenStream: A basic and most commonly used TokenStream
+  implementation.
+- TokenRewriteStream: A modification of CommonTokenStream that allows the
+  stream to be altered (by the Parser). See the 'tweak' example for a usecase.
+
+And tree.TreeParser finally fetches its input from a tree.TreeNodeStream:
+
+- tree.CommonTreeNodeStream: A basic and most commonly used tree.TreeNodeStream
+  implementation.
+  
+
+@section tokenstrees Tokens and Trees
+
+A Lexer emits Token objects which are usually buffered by a TokenStream. A
+Parser can build a Tree, if the output=AST option has been set in the grammar.
+
+The runtime provides these Token implementations:
+
+- CommonToken: A basic and most commonly used Token implementation.
+- ClassicToken: A Token object as used in ANTLR 2.x, used to %tree
+  construction.
+
+Tree objects are wrapper for Token objects.
+
+- tree.CommonTree: A basic and most commonly used Tree implementation.
+
+A tree.TreeAdaptor is used by the parser to create tree.Tree objects for the
+input Token objects.
+
+- tree.CommonTreeAdaptor: A basic and most commonly used tree.TreeAdaptor
+implementation.
+
+
+@section Exceptions
+
+RecognitionException are generated, when a recognizer encounters incorrect
+or unexpected input.
+
+- RecognitionException
+  - MismatchedRangeException
+  - MismatchedSetException
+    - MismatchedNotSetException
+    .
+  - MismatchedTokenException
+  - MismatchedTreeNodeException
+  - NoViableAltException
+  - EarlyExitException
+  - FailedPredicateException
+  .
+.
+
+A tree.RewriteCardinalityException is raised, when the parsers hits a
+cardinality mismatch during AST construction. Although this is basically a
+bug in your grammar, it can only be detected at runtime.
+
+- tree.RewriteCardinalityException
+  - tree.RewriteEarlyExitException
+  - tree.RewriteEmptyStreamException
+  .
+.
+
+"""
+
+# tree.RewriteRuleElementStream
+# tree.RewriteRuleSubtreeStream
+# tree.RewriteRuleTokenStream
+# CharStream
+# DFA
+# TokenSource
+
+# [The "BSD licence"]
+# Copyright (c) 2005-2008 Terence Parr
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+# 3. The name of the author may not be used to endorse or promote products
+#    derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+__version__ = '3.1'
+
+def version_str_to_tuple(version_str):
+    import re
+    import sys
+
+    if version_str == 'HEAD':
+        return (sys.maxint, sys.maxint, sys.maxint, sys.maxint)
+
+    m = re.match(r'(\d+)\.(\d+)(\.(\d+))?(b(\d+))?', version_str)
+    if m is None:
+        raise ValueError("Bad version string %r" % version_str)
+
+    major = int(m.group(1))
+    minor = int(m.group(2))
+    patch = int(m.group(4) or 0)
+    beta = int(m.group(6) or sys.maxint)
+
+    return (major, minor, patch, beta)
+
+
+runtime_version_str = __version__
+runtime_version = version_str_to_tuple(runtime_version_str)
+
+
+from constants import *
+from dfa import *
+from exceptions import *
+from recognizers import *
+from streams import *
+from tokens import *

File antlr/antlr3/compat.py

+"""Compatibility stuff"""
+
+# begin[licence]
+#
+# [The "BSD licence"]
+# Copyright (c) 2005-2008 Terence Parr
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+# 3. The name of the author may not be used to endorse or promote products
+#    derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# end[licence]
+
+try:
+    set = set
+    frozenset = frozenset
+except NameError:
+    from sets import Set as set, ImmutableSet as frozenset
+
+
+try:
+    reversed = reversed
+except NameError:
+    def reversed(l):
+        l = l[:]
+        l.reverse()
+        return l
+
+

File antlr/antlr3/constants.py

+"""ANTLR3 runtime package"""
+
+# begin[licence]
+#
+# [The "BSD licence"]
+# Copyright (c) 2005-2008 Terence Parr
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+# 3. The name of the author may not be used to endorse or promote products
+#    derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# end[licence]
+
+EOF = -1
+
+## All tokens go to the parser (unless skip() is called in that rule)
+# on a particular "channel".  The parser tunes to a particular channel
+# so that whitespace etc... can go to the parser on a "hidden" channel.
+DEFAULT_CHANNEL = 0
+
+## Anything on different channel than DEFAULT_CHANNEL is not parsed
+# by parser.
+HIDDEN_CHANNEL = 99
+
+# Predefined token types
+EOR_TOKEN_TYPE = 1
+
+##
+# imaginary tree navigation type; traverse "get child" link
+DOWN = 2
+##
+#imaginary tree navigation type; finish with a child list
+UP = 3
+
+MIN_TOKEN_TYPE = UP+1
+	
+INVALID_TOKEN_TYPE = 0
+

File antlr/antlr3/dfa.py

+"""ANTLR3 runtime package"""
+
+# begin[licence]
+#
+# [The "BSD licence"]
+# Copyright (c) 2005-2008 Terence Parr
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+# 3. The name of the author may not be used to endorse or promote products
+#    derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# end[licensc]
+
+from antlr3.constants import EOF
+from antlr3.exceptions import NoViableAltException, BacktrackingFailed
+
+
+class DFA(object):
+    """@brief A DFA implemented as a set of transition tables.
+
+    Any state that has a semantic predicate edge is special; those states
+    are generated with if-then-else structures in a specialStateTransition()
+    which is generated by cyclicDFA template.
+    
+    """
+    
+    def __init__(
+        self,
+        recognizer, decisionNumber,
+        eot, eof, min, max, accept, special, transition
+        ):
+        ## Which recognizer encloses this DFA?  Needed to check backtracking
+        self.recognizer = recognizer
+
+        self.decisionNumber = decisionNumber
+        self.eot = eot
+        self.eof = eof
+        self.min = min
+        self.max = max
+        self.accept = accept
+        self.special = special
+        self.transition = transition
+
+
+    def predict(self, input):
+        """
+        From the input stream, predict what alternative will succeed
+	using this DFA (representing the covering regular approximation
+	to the underlying CFL).  Return an alternative number 1..n.  Throw
+	 an exception upon error.
+	"""
+        mark = input.mark()
+        s = 0 # we always start at s0
+        try:
+            for _ in xrange(50000):
+                #print "***Current state = %d" % s
+                
+                specialState = self.special[s]
+                if specialState >= 0:
+                    #print "is special"
+                    s = self.specialStateTransition(specialState, input)
+                    if s == -1:
+                        self.noViableAlt(s, input)
+                        return 0
+                    input.consume()
+                    continue
+
+                if self.accept[s] >= 1:
+                    #print "accept state for alt %d" % self.accept[s]
+                    return self.accept[s]
+
+                # look for a normal char transition
+                c = input.LA(1)
+
+                #print "LA = %d (%r)" % (c, unichr(c) if c >= 0 else 'EOF')
+                #print "range = %d..%d" % (self.min[s], self.max[s])
+
+                if c >= self.min[s] and c <= self.max[s]:
+                    # move to next state
+                    snext = self.transition[s][c-self.min[s]]
+                    #print "in range, next state = %d" % snext
+                    
+                    if snext < 0:
+                        #print "not a normal transition"
+                        # was in range but not a normal transition
+                        # must check EOT, which is like the else clause.
+                        # eot[s]>=0 indicates that an EOT edge goes to another
+                        # state.
+                        if self.eot[s] >= 0: # EOT Transition to accept state?
+                            #print "EOT trans to accept state %d" % self.eot[s]
+                            
+                            s = self.eot[s]
+                            input.consume()
+                            # TODO: I had this as return accept[eot[s]]
+                            # which assumed here that the EOT edge always
+                            # went to an accept...faster to do this, but
+                            # what about predicated edges coming from EOT
+                            # target?
+                            continue
+
+                        #print "no viable alt"
+                        self.noViableAlt(s, input)
+                        return 0
+
+                    s = snext
+                    input.consume()
+                    continue
+
+                if self.eot[s] >= 0:
+                    #print "EOT to %d" % self.eot[s]
+                    
+                    s = self.eot[s]
+                    input.consume()
+                    continue
+
+                # EOF Transition to accept state?
+                if c == EOF and self.eof[s] >= 0:
+                    #print "EOF Transition to accept state %d" \
+                    #  % self.accept[self.eof[s]]
+                    return self.accept[self.eof[s]]
+
+                # not in range and not EOF/EOT, must be invalid symbol
+                self.noViableAlt(s, input)
+                return 0
+
+            else:
+                raise RuntimeError("DFA bang!")
+            
+        finally:
+            input.rewind(mark)
+
+
+    def noViableAlt(self, s, input):
+        if self.recognizer._state.backtracking > 0:
+            raise BacktrackingFailed
+
+        nvae = NoViableAltException(
+            self.getDescription(),
+            self.decisionNumber,
+            s,
+            input
+            )
+
+        self.error(nvae)
+        raise nvae
+
+
+    def error(self, nvae):
+        """A hook for debugging interface"""
+        pass
+
+
+    def specialStateTransition(self, s, input):
+        return -1
+
+
+    def getDescription(self):
+        return "n/a"
+
+
+##     def specialTransition(self, state, symbol):
+##         return 0
+
+
+    def unpack(cls, string):
+        """@brief Unpack the runlength encoded table data.
+
+        Terence implemented packed table initializers, because Java has a
+        size restriction on .class files and the lookup tables can grow
+        pretty large. The generated JavaLexer.java of the Java.g example
+        would be about 15MB with uncompressed array initializers.
+
+        Python does not have any size restrictions, but the compilation of
+        such large source files seems to be pretty memory hungry. The memory
+        consumption of the python process grew to >1.5GB when importing a
+        15MB lexer, eating all my swap space and I was to impacient to see,
+        if it could finish at all. With packed initializers that are unpacked
+        at import time of the lexer module, everything works like a charm.
+        
+        """
+        
+        ret = []
+        for i in range(len(string) / 2):
+            (n, v) = ord(string[i*2]), ord(string[i*2+1])
+
+            # Is there a bitwise operation to do this?
+            if v == 0xFFFF:
+                v = -1
+
+            ret += [v] * n
+
+        return ret
+    
+    unpack = classmethod(unpack)

File antlr/antlr3/dottreegen.py

+""" @package antlr3.dottreegenerator
+@brief ANTLR3 runtime package, tree module
+
+This module contains all support classes for AST construction and tree parsers.
+
+"""
+
+# begin[licence]
+#
+# [The "BSD licence"]
+# Copyright (c) 2005-2008 Terence Parr
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+# 3. The name of the author may not be used to endorse or promote products
+#    derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# end[licence]
+
+# lot's of docstrings are missing, don't complain for now...
+# pylint: disable-msg=C0111
+
+from antlr3.tree import CommonTreeAdaptor
+import stringtemplate3
+
+class DOTTreeGenerator(object):
+    """
+    A utility class to generate DOT diagrams (graphviz) from
+    arbitrary trees.  You can pass in your own templates and
+    can pass in any kind of tree or use Tree interface method.
+    """
+
+    _treeST = stringtemplate3.StringTemplate(
+        template=(
+        "digraph {\n" +
+        "  ordering=out;\n" +
+        "  ranksep=.4;\n" +
+        "  node [shape=plaintext, fixedsize=true, fontsize=11, fontname=\"Courier\",\n" +
+        "        width=.25, height=.25];\n" +
+        "  edge [arrowsize=.5]\n" +
+        "  $nodes$\n" +
+        "  $edges$\n" +
+        "}\n")
+        )
+
+    _nodeST = stringtemplate3.StringTemplate(
+        template="$name$ [label=\"$text$\"];\n"
+        )
+
+    _edgeST = stringtemplate3.StringTemplate(
+        template="$parent$ -> $child$ // \"$parentText$\" -> \"$childText$\"\n"
+        )
+
+    def __init__(self):
+        ## Track node to number mapping so we can get proper node name back
+        self.nodeToNumberMap = {}
+
+        ## Track node number so we can get unique node names
+        self.nodeNumber = 0
+
+
+    def toDOT(self, tree, adaptor=None, treeST=_treeST, edgeST=_edgeST):
+        if adaptor is None:
+            adaptor = CommonTreeAdaptor()
+
+        treeST = treeST.getInstanceOf()
+
+        self.nodeNumber = 0
+        self.toDOTDefineNodes(tree, adaptor, treeST)
+
+        self.nodeNumber = 0
+        self.toDOTDefineEdges(tree, adaptor, treeST, edgeST)
+        return treeST
+
+
+    def toDOTDefineNodes(self, tree, adaptor, treeST, knownNodes=None):
+        if knownNodes is None:
+            knownNodes = set()
+
+        if tree is None:
+            return
+
+        n = adaptor.getChildCount(tree)
+        if n == 0:
+            # must have already dumped as child from previous
+            # invocation; do nothing
+            return
+
+        # define parent node
+        number = self.getNodeNumber(tree)
+        if number not in knownNodes:
+            parentNodeST = self.getNodeST(adaptor, tree)
+            treeST.setAttribute("nodes", parentNodeST)
+            knownNodes.add(number)
+
+        # for each child, do a "<unique-name> [label=text]" node def
+        for i in range(n):
+            child = adaptor.getChild(tree, i)
+            
+            number = self.getNodeNumber(child)
+            if number not in knownNodes:
+                nodeST = self.getNodeST(adaptor, child)
+                treeST.setAttribute("nodes", nodeST)
+                knownNodes.add(number)
+
+            self.toDOTDefineNodes(child, adaptor, treeST, knownNodes)
+
+
+    def toDOTDefineEdges(self, tree, adaptor, treeST, edgeST):
+        if tree is None:
+            return
+
+        n = adaptor.getChildCount(tree)
+        if n == 0:
+            # must have already dumped as child from previous
+            # invocation; do nothing
+            return
+
+        parentName = "n%d" % self.getNodeNumber(tree)
+
+        # for each child, do a parent -> child edge using unique node names
+        parentText = adaptor.getText(tree)
+        for i in range(n):
+            child = adaptor.getChild(tree, i)
+            childText = adaptor.getText(child)
+            childName = "n%d" % self.getNodeNumber(child)
+            edgeST = edgeST.getInstanceOf()
+            edgeST.setAttribute("parent", parentName)
+            edgeST.setAttribute("child", childName)
+            edgeST.setAttribute("parentText", parentText)
+            edgeST.setAttribute("childText", childText)
+            treeST.setAttribute("edges", edgeST)
+            self.toDOTDefineEdges(child, adaptor, treeST, edgeST)
+
+
+    def getNodeST(self, adaptor, t):
+        text = adaptor.getText(t)
+        nodeST = self._nodeST.getInstanceOf()
+        uniqueName = "n%d" % self.getNodeNumber(t)
+        nodeST.setAttribute("name", uniqueName)
+        if text is not None:
+            text = text.replace('"', r'\\"')
+        nodeST.setAttribute("text", text)
+        return nodeST
+
+
+    def getNodeNumber(self, t):
+        try:
+            return self.nodeToNumberMap[t]
+        except KeyError:
+            self.nodeToNumberMap[t] = self.nodeNumber
+            self.nodeNumber += 1
+            return self.nodeNumber - 1
+
+
+def toDOT(tree, adaptor=None, treeST=DOTTreeGenerator._treeST, edgeST=DOTTreeGenerator._edgeST):
+    """
+    Generate DOT (graphviz) for a whole tree not just a node.
+    For example, 3+4*5 should generate:
+
+    digraph {
+        node [shape=plaintext, fixedsize=true, fontsize=11, fontname="Courier",
+            width=.4, height=.2];
+        edge [arrowsize=.7]
+        "+"->3
+        "+"->"*"
+        "*"->4
+        "*"->5
+    }
+
+    Return the ST not a string in case people want to alter.
+
+    Takes a Tree interface object.
+
+    Example of invokation:
+
+        import antlr3
+        import antlr3.extras
+
+        input = antlr3.ANTLRInputStream(sys.stdin)
+        lex = TLexer(input)
+        tokens = antlr3.CommonTokenStream(lex)
+        parser = TParser(tokens)
+        tree = parser.e().tree
+        print tree.toStringTree()
+        st = antlr3.extras.toDOT(t)
+        print st
+        
+    """
+
+    gen = DOTTreeGenerator()
+    return gen.toDOT(tree, adaptor, treeST, edgeST)

File antlr/antlr3/exceptions.py

+"""ANTLR3 exception hierarchy"""
+
+# begin[licence]
+#
+# [The "BSD licence"]
+# Copyright (c) 2005-2008 Terence Parr
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+# 3. The name of the author may not be used to endorse or promote products
+#    derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# end[licence]
+
+from antlr3.constants import INVALID_TOKEN_TYPE
+
+
+class BacktrackingFailed(Exception):
+    """@brief Raised to signal failed backtrack attempt"""
+
+    pass
+
+
+class RecognitionException(Exception):
+    """@brief The root of the ANTLR exception hierarchy.
+
+    To avoid English-only error messages and to generally make things
+    as flexible as possible, these exceptions are not created with strings,
+    but rather the information necessary to generate an error.  Then
+    the various reporting methods in Parser and Lexer can be overridden
+    to generate a localized error message.  For example, MismatchedToken
+    exceptions are built with the expected token type.
+    So, don't expect getMessage() to return anything.
+
+    Note that as of Java 1.4, you can access the stack trace, which means
+    that you can compute the complete trace of rules from the start symbol.
+    This gives you considerable context information with which to generate
+    useful error messages.
+
+    ANTLR generates code that throws exceptions upon recognition error and
+    also generates code to catch these exceptions in each rule.  If you
+    want to quit upon first error, you can turn off the automatic error
+    handling mechanism using rulecatch action, but you still need to
+    override methods mismatch and recoverFromMismatchSet.
+    
+    In general, the recognition exceptions can track where in a grammar a
+    problem occurred and/or what was the expected input.  While the parser
+    knows its state (such as current input symbol and line info) that
+    state can change before the exception is reported so current token index
+    is computed and stored at exception time.  From this info, you can
+    perhaps print an entire line of input not just a single token, for example.
+    Better to just say the recognizer had a problem and then let the parser
+    figure out a fancy report.
+    
+    """
+
+    def __init__(self, input=None):
+        Exception.__init__(self)
+
+	# What input stream did the error occur in?
+        self.input = None
+
+        # What is index of token/char were we looking at when the error
+        # occurred?
+        self.index = None
+
+	# The current Token when an error occurred.  Since not all streams
+	# can retrieve the ith Token, we have to track the Token object.
+	# For parsers.  Even when it's a tree parser, token might be set.
+        self.token = None
+
+	# If this is a tree parser exception, node is set to the node with
+	# the problem.
+        self.node = None
+
+	# The current char when an error occurred. For lexers.
+        self.c = None
+
+	# Track the line at which the error occurred in case this is
+	# generated from a lexer.  We need to track this since the
+        # unexpected char doesn't carry the line info.
+        self.line = None
+
+        self.charPositionInLine = None
+
+        # If you are parsing a tree node stream, you will encounter som
+        # imaginary nodes w/o line/col info.  We now search backwards looking
+        # for most recent token with line/col info, but notify getErrorHeader()
+        # that info is approximate.
+        self.approximateLineInfo = False
+
+        
+        if input is not None:
+            self.input = input
+            self.index = input.index()
+
+            # late import to avoid cyclic dependencies
+            from antlr3.streams import TokenStream, CharStream
+            from antlr3.tree import TreeNodeStream
+
+            if isinstance(self.input, TokenStream):
+                self.token = self.input.LT(1)
+                self.line = self.token.line
+                self.charPositionInLine = self.token.charPositionInLine
+
+            if isinstance(self.input, TreeNodeStream):
+                self.extractInformationFromTreeNodeStream(self.input)
+
+            else:
+                if isinstance(self.input, CharStream):
+                    self.c = self.input.LT(1)
+                    self.line = self.input.line
+                    self.charPositionInLine = self.input.charPositionInLine
+
+                else:
+                    self.c = self.input.LA(1)
+
+    def extractInformationFromTreeNodeStream(self, nodes):
+        from antlr3.tree import Tree, CommonTree
+        from antlr3.tokens import CommonToken
+        
+        self.node = nodes.LT(1)
+        adaptor = nodes.adaptor
+        payload = adaptor.getToken(self.node)
+        if payload is not None:
+            self.token = payload
+            if payload.line <= 0:
+                # imaginary node; no line/pos info; scan backwards
+                i = -1
+                priorNode = nodes.LT(i)
+                while priorNode is not None:
+                    priorPayload = adaptor.getToken(priorNode)
+                    if priorPayload is not None and priorPayload.line > 0:
+                        # we found the most recent real line / pos info
+                        self.line = priorPayload.line
+                        self.charPositionInLine = priorPayload.charPositionInLine
+                        self.approximateLineInfo = True
+                        break
+                    
+                    i -= 1
+                    priorNode = nodes.LT(i)
+                    
+            else: # node created from real token
+                self.line = payload.line
+                self.charPositionInLine = payload.charPositionInLine
+                
+        elif isinstance(self.node, Tree):
+            self.line = self.node.line
+            self.charPositionInLine = self.node.charPositionInLine
+            if isinstance(self.node, CommonTree):
+                self.token = self.node.token
+
+        else:
+            type = adaptor.getType(self.node)
+            text = adaptor.getText(self.node)
+            self.token = CommonToken(type=type, text=text)
+
+     
+    def getUnexpectedType(self):
+        """Return the token type or char of the unexpected input element"""
+
+        from antlr3.streams import TokenStream
+        from antlr3.tree import TreeNodeStream
+
+        if isinstance(self.input, TokenStream):
+            return self.token.type
+
+        elif isinstance(self.input, TreeNodeStream):
+            adaptor = self.input.treeAdaptor
+            return adaptor.getType(self.node)
+
+        else:
+            return self.c
+
+    unexpectedType = property(getUnexpectedType)
+    
+
+class MismatchedTokenException(RecognitionException):
+    """@brief A mismatched char or Token or tree node."""
+    
+    def __init__(self, expecting, input):
+        RecognitionException.__init__(self, input)
+        self.expecting = expecting
+        
+
+    def __str__(self):
+        #return "MismatchedTokenException("+self.expecting+")"
+        return "MismatchedTokenException(%r!=%r)" % (
+            self.getUnexpectedType(), self.expecting
+            )
+    __repr__ = __str__
+
+
+class UnwantedTokenException(MismatchedTokenException):
+    """An extra token while parsing a TokenStream"""
+
+    def getUnexpectedToken(self):
+        return self.token
+
+
+    def __str__(self):
+        exp = ", expected %s" % self.expecting
+        if self.expecting == INVALID_TOKEN_TYPE:
+            exp = ""
+
+        if self.token is None:
+            return "UnwantedTokenException(found=%s%s)" % (None, exp)
+
+        return "UnwantedTokenException(found=%s%s)" % (self.token.text, exp)
+    __repr__ = __str__
+
+
+class MissingTokenException(MismatchedTokenException):
+    """
+    We were expecting a token but it's not found.  The current token
+    is actually what we wanted next.
+    """
+
+    def __init__(self, expecting, input, inserted):
+        MismatchedTokenException.__init__(self, expecting, input)
+
+        self.inserted = inserted
+
+
+    def getMissingType(self):
+        return self.expecting
+
+
+    def __str__(self):
+        if self.inserted is not None and self.token is not None:
+            return "MissingTokenException(inserted %r at %r)" % (
+                self.inserted, self.token.text)
+
+        if self.token is not None:
+            return "MissingTokenException(at %r)" % self.token.text
+
+        return "MissingTokenException"
+    __repr__ = __str__
+
+
+class MismatchedRangeException(RecognitionException):
+    """@brief The next token does not match a range of expected types."""
+
+    def __init__(self, a, b, input):
+        RecognitionException.__init__(self, input)
+
+        self.a = a
+        self.b = b
+        
+
+    def __str__(self):
+        return "MismatchedRangeException(%r not in [%r..%r])" % (
+            self.getUnexpectedType(), self.a, self.b
+            )
+    __repr__ = __str__
+    
+
+class MismatchedSetException(RecognitionException):
+    """@brief The next token does not match a set of expected types."""
+
+    def __init__(self, expecting, input):
+        RecognitionException.__init__(self, input)
+
+        self.expecting = expecting
+        
+
+    def __str__(self):
+        return "MismatchedSetException(%r not in %r)" % (
+            self.getUnexpectedType(), self.expecting
+            )
+    __repr__ = __str__
+
+
+class MismatchedNotSetException(MismatchedSetException):
+    """@brief Used for remote debugger deserialization"""
+    
+    def __str__(self):
+        return "MismatchedNotSetException(%r!=%r)" % (
+            self.getUnexpectedType(), self.expecting
+            )
+    __repr__ = __str__
+
+
+class NoViableAltException(RecognitionException):
+    """@brief Unable to decide which alternative to choose."""
+
+    def __init__(
+        self, grammarDecisionDescription, decisionNumber, stateNumber, input
+        ):
+        RecognitionException.__init__(self, input)
+
+        self.grammarDecisionDescription = grammarDecisionDescription
+        self.decisionNumber = decisionNumber
+        self.stateNumber = stateNumber
+
+
+    def __str__(self):
+        return "NoViableAltException(%r!=[%r])" % (
+            self.unexpectedType, self.grammarDecisionDescription
+            )
+    __repr__ = __str__
+    
+
+class EarlyExitException(RecognitionException):
+    """@brief The recognizer did not match anything for a (..)+ loop."""
+
+    def __init__(self, decisionNumber, input):
+        RecognitionException.__init__(self, input)
+
+        self.decisionNumber = decisionNumber
+
+
+class FailedPredicateException(RecognitionException):
+    """@brief A semantic predicate failed during validation.
+
+    Validation of predicates
+    occurs when normally parsing the alternative just like matching a token.
+    Disambiguating predicate evaluation occurs when we hoist a predicate into
+    a prediction decision.
+    """
+
+    def __init__(self, input, ruleName, predicateText):
+        RecognitionException.__init__(self, input)
+        
+        self.ruleName = ruleName
+        self.predicateText = predicateText
+
+
+    def __str__(self):
+        return "FailedPredicateException("+self.ruleName+",{"+self.predicateText+"}?)"
+    __repr__ = __str__
+    
+
+class MismatchedTreeNodeException(RecognitionException):
+    """@brief The next tree mode does not match the expected type."""
+
+    def __init__(self, expecting, input):
+        RecognitionException.__init__(self, input)
+        
+        self.expecting = expecting
+
+    def __str__(self):
+        return "MismatchedTreeNodeException(%r!=%r)" % (
+            self.getUnexpectedType(), self.expecting
+            )
+    __repr__ = __str__

File antlr/antlr3/extras.py

+""" @package antlr3.dottreegenerator
+@brief ANTLR3 runtime package, tree module
+
+This module contains all support classes for AST construction and tree parsers.
+
+"""
+
+# begin[licence]
+#
+# [The "BSD licence"]
+# Copyright (c) 2005-2008 Terence Parr
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+# 3. The name of the author may not be used to endorse or promote products
+#    derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# end[licence]
+
+# lot's of docstrings are missing, don't complain for now...
+# pylint: disable-msg=C0111
+
+from treewizard import TreeWizard
+
+try:
+    from antlr3.dottreegen import toDOT
+except ImportError, exc:
+    def toDOT(*args, **kwargs):
+        raise exc

File antlr/antlr3/main.py

+"""ANTLR3 runtime package"""
+
+# begin[licence]
+#
+# [The "BSD licence"]
+# Copyright (c) 2005-2008 Terence Parr
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+# 3. The name of the author may not be used to endorse or promote products
+#    derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# end[licence]
+
+
+import sys
+import optparse
+
+import antlr3
+
+
+class _Main(object):
+    def __init__(self):
+        self.stdin = sys.stdin
+        self.stdout = sys.stdout
+        self.stderr = sys.stderr
+
+        
+    def parseOptions(self, argv):
+        optParser = optparse.OptionParser()
+        optParser.add_option(
+            "--encoding",
+            action="store",
+            type="string",
+            dest="encoding"
+            )
+        optParser.add_option(
+            "--input",
+            action="store",
+            type="string",
+            dest="input"
+            )
+        optParser.add_option(
+            "--interactive", "-i",
+            action="store_true",
+            dest="interactive"
+            )
+        optParser.add_option(
+            "--no-output",
+            action="store_true",
+            dest="no_output"
+            )
+        optParser.add_option(
+            "--profile",
+            action="store_true",
+            dest="profile"
+            )
+        optParser.add_option(
+            "--hotshot",
+            action="store_true",
+            dest="hotshot"
+            )
+
+        self.setupOptions(optParser)
+        
+        return optParser.parse_args(argv[1:])
+
+
+    def setupOptions(self, optParser):
+        pass
+
+
+    def execute(self, argv):
+        options, args = self.parseOptions(argv)
+
+        self.setUp(options)
+        
+        if options.interactive:
+            while True:
+                try:
+                    input = raw_input(">>> ")
+                except (EOFError, KeyboardInterrupt):
+                    self.stdout.write("\nBye.\n")
+                    break
+            
+                inStream = antlr3.ANTLRStringStream(input)
+                self.parseStream(options, inStream)
+            
+        else:
+            if options.input is not None:
+                inStream = antlr3.ANTLRStringStream(options.input)
+
+            elif len(args) == 1 and args[0] != '-':
+                inStream = antlr3.ANTLRFileStream(
+                    args[0], encoding=options.encoding
+                    )
+
+            else:
+                inStream = antlr3.ANTLRInputStream(
+                    self.stdin, encoding=options.encoding
+                    )
+
+            if options.profile:
+                try:
+                    import cProfile as profile
+                except ImportError:
+                    import profile
+
+                profile.runctx(
+                    'self.parseStream(options, inStream)',
+                    globals(),
+                    locals(),
+                    'profile.dat'
+                    )
+
+                import pstats
+                stats = pstats.Stats('profile.dat')
+                stats.strip_dirs()
+                stats.sort_stats('time')
+                stats.print_stats(100)
+
+            elif options.hotshot:
+                import hotshot
+
+                profiler = hotshot.Profile('hotshot.dat')
+                profiler.runctx(
+                    'self.parseStream(options, inStream)',
+                    globals(),
+                    locals()
+                    )
+
+            else:
+                self.parseStream(options, inStream)
+
+
+    def setUp(self, options):
+        pass
+
+    
+    def parseStream(self, options, inStream):
+        raise NotImplementedError
+
+
+    def write(self, options, text):
+        if not options.no_output:
+            self.stdout.write(text)
+
+
+    def writeln(self, options, text):
+        self.write(options, text + '\n')
+
+
+class LexerMain(_Main):
+    def __init__(self, lexerClass):
+        _Main.__init__(self)
+
+        self.lexerClass = lexerClass
+        
+    
+    def parseStream(self, options, inStream):
+        lexer = self.lexerClass(inStream)
+        for token in lexer:
+            self.writeln(options, str(token))
+
+
+class ParserMain(_Main):
+    def __init__(self, lexerClassName, parserClass):
+        _Main.__init__(self)
+
+        self.lexerClassName = lexerClassName
+        self.lexerClass = None
+        self.parserClass = parserClass
+        
+    
+    def setupOptions(self, optParser):
+        optParser.add_option(
+            "--lexer",
+            action="store",
+            type="string",
+            dest="lexerClass",
+            default=self.lexerClassName
+            )
+        optParser.add_option(
+            "--rule",
+            action="store",
+            type="string",
+            dest="parserRule"
+            )
+
+
+    def setUp(self, options):
+        lexerMod = __import__(options.lexerClass)
+        self.lexerClass = getattr(lexerMod, options.lexerClass)
+
+        
+    def parseStream(self, options, inStream):
+        lexer = self.lexerClass(inStream)
+        tokenStream = antlr3.CommonTokenStream(lexer)
+        parser = self.parserClass(tokenStream)
+        result = getattr(parser, options.parserRule)()
+        if result is not None:
+            if hasattr(result, 'tree'):
+                if result.tree is not None:
+                    self.writeln(options, result.tree.toStringTree())
+            else:
+                self.writeln(options, repr(result))
+
+
+class WalkerMain(_Main):
+    def __init__(self, walkerClass):
+        _Main.__init__(self)
+
+        self.lexerClass = None
+        self.parserClass = None
+        self.walkerClass = walkerClass
+        
+    
+    def setupOptions(self, optParser):
+        optParser.add_option(
+            "--lexer",
+            action="store",
+            type="string",
+            dest="lexerClass",
+            default=None
+            )
+        optParser.add_option(
+            "--parser",
+            action="store",
+            type="string",
+            dest="parserClass",
+            default=None
+            )
+        optParser.add_option(
+            "--parser-rule",
+            action="store",
+            type="string",
+            dest="parserRule",
+            default=None
+            )
+        optParser.add_option(
+            "--rule",
+            action="store",
+            type="string",
+            dest="walkerRule"
+            )
+
+
+    def setUp(self, options):
+        lexerMod = __import__(options.lexerClass)
+        self.lexerClass = getattr(lexerMod, options.lexerClass)
+        parserMod = __import__(options.parserClass)
+        self.parserClass = getattr(parserMod, options.parserClass)
+
+        
+    def parseStream(self, options, inStream):
+        lexer = self.lexerClass(inStream)
+        tokenStream = antlr3.CommonTokenStream(lexer)
+        parser = self.parserClass(tokenStream)
+        result = getattr(parser, options.parserRule)()
+        if result is not None:
+            assert hasattr(result, 'tree'), "Parser did not return an AST"
+            nodeStream = antlr3.tree.CommonTreeNodeStream(result.tree)
+            nodeStream.setTokenStream(tokenStream)
+            walker = self.walkerClass(nodeStream)
+            result = getattr(walker, options.walkerRule)()
+            if result is not None:
+                if hasattr(result, 'tree'):
+                    self.writeln(options, result.tree.toStringTree())
+                else:
+                    self.writeln(options, repr(result))
+

File antlr/antlr3/recognizers.py

+"""ANTLR3 runtime package"""
+
+# begin[licence]
+#
+# [The "BSD licence"]
+# Copyright (c) 2005-2008 Terence Parr
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+# 3. The name of the author may not be used to endorse or promote products
+#    derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# end[licence]
+
+import sys
+import inspect
+
+from antlr3 import runtime_version, runtime_version_str
+from antlr3.constants import DEFAULT_CHANNEL, HIDDEN_CHANNEL, EOF, \
+     EOR_TOKEN_TYPE, INVALID_TOKEN_TYPE
+from antlr3.exceptions import RecognitionException, MismatchedTokenException, \
+     MismatchedRangeException, MismatchedTreeNodeException, \
+     NoViableAltException, EarlyExitException, MismatchedSetException, \
+     MismatchedNotSetException, FailedPredicateException, \
+     BacktrackingFailed, UnwantedTokenException, MissingTokenException
+from antlr3.tokens import CommonToken, EOF_TOKEN, SKIP_TOKEN
+from antlr3.compat import set, frozenset, reversed
+
+
+class RecognizerSharedState(object):
+    """
+    The set of fields needed by an abstract recognizer to recognize input
+    and recover from errors etc...  As a separate state object, it can be
+    shared among multiple grammars; e.g., when one grammar imports another.
+
+    These fields are publically visible but the actual state pointer per
+    parser is protected.
+    """
+
+    def __init__(self):
+        # Track the set of token types that can follow any rule invocation.
+        # Stack grows upwards.
+        self.following = []
+
+        # This is true when we see an error and before having successfully
+        # matched a token.  Prevents generation of more than one error message
+        # per error.
+        self.errorRecovery = False
+
+        # The index into the input stream where the last error occurred.
+        # This is used to prevent infinite loops where an error is found
+        # but no token is consumed during recovery...another error is found,
+        # ad naseum.  This is a failsafe mechanism to guarantee that at least
+        # one token/tree node is consumed for two errors.
+        self.lastErrorIndex = -1
+
+        # If 0, no backtracking is going on.  Safe to exec actions etc...
+        # If >0 then it's the level of backtracking.
+        self.backtracking = 0
+
+        # An array[size num rules] of Map<Integer,Integer> that tracks
+        # the stop token index for each rule.  ruleMemo[ruleIndex] is
+        # the memoization table for ruleIndex.  For key ruleStartIndex, you
+        # get back the stop token for associated rule or MEMO_RULE_FAILED.
+        #
+        # This is only used if rule memoization is on (which it is by default).
+        self.ruleMemo = None
+
+        ## Did the recognizer encounter a syntax error?  Track how many.
+        self.syntaxErrors = 0
+
+
+        # LEXER FIELDS (must be in same state object to avoid casting
+        # constantly in generated code and Lexer object) :(
+
+
+	## The goal of all lexer rules/methods is to create a token object.
+        # This is an instance variable as multiple rules may collaborate to
+        # create a single token.  nextToken will return this object after
+        # matching lexer rule(s).  If you subclass to allow multiple token
+        # emissions, then set this to the last token to be matched or
+        # something nonnull so that the auto token emit mechanism will not
+        # emit another token.
+        self.token = None
+
+        ## What character index in the stream did the current token start at?
+        # Needed, for example, to get the text for current token.  Set at
+        # the start of nextToken.
+        self.tokenStartCharIndex = -1
+
+        ## The line on which the first character of the token resides
+        self.tokenStartLine = None
+
+        ## The character position of first character within the line
+        self.tokenStartCharPositionInLine = None
+
+        ## The channel number for the current token
+        self.channel = None
+
+        ## The token type for the current token
+        self.type = None
+
+        ## You can set the text for the current token to override what is in
+        # the input char buffer.  Use setText() or can set this instance var.
+        self.text = None
+        
+
+class BaseRecognizer(object):
+    """
+    @brief Common recognizer functionality.
+    
+    A generic recognizer that can handle recognizers generated from
+    lexer, parser, and tree grammars.  This is all the parsing
+    support code essentially; most of it is error recovery stuff and
+    backtracking.
+    """
+
+    MEMO_RULE_FAILED = -2
+    MEMO_RULE_UNKNOWN = -1
+
+    # copies from Token object for convenience in actions
+    DEFAULT_TOKEN_CHANNEL = DEFAULT_CHANNEL
+
+    # for convenience in actions
+    HIDDEN = HIDDEN_CHANNEL
+
+    # overridden by generated subclasses
+    tokenNames = None
+
+    # The antlr_version attribute has been introduced in 3.1. If it is not
+    # overwritten in the generated recognizer, we assume a default of 3.0.1.
+    antlr_version = (3, 0, 1, 0)
+    antlr_version_str = "3.0.1"
+
+    def __init__(self, state=None):
+        # Input stream of the recognizer. Must be initialized by a subclass.
+        self.input = None
+
+        ## State of a lexer, parser, or tree parser are collected into a state
+        # object so the state can be shared.  This sharing is needed to
+        # have one grammar import others and share same error variables
+        # and other state variables.  It's a kind of explicit multiple
+        # inheritance via delegation of methods and shared state.
+        if state is None:
+            state = RecognizerSharedState()
+        self._state = state
+
+        if self.antlr_version > runtime_version:
+            raise RuntimeError(
+                "ANTLR version mismatch: "
+                "The recognizer has been generated by V%s, but this runtime "
+                "is V%s. Please use the V%s runtime or higher."
+                % (self.antlr_version_str,
+                   runtime_version_str,
+                   self.antlr_version_str))
+        elif (self.antlr_version < (3, 1, 0, 0) and
+              self.antlr_version != runtime_version):
+            # FIXME: make the runtime compatible with 3.0.1 codegen
+            # and remove this block.
+            raise RuntimeError(
+                "ANTLR version mismatch: "
+                "The recognizer has been generated by V%s, but this runtime "
+                "is V%s. Please use the V%s runtime."
+                % (self.antlr_version_str,
+                   runtime_version_str,
+                   self.antlr_version_str))
+
+    # this one only exists to shut up pylint :(
+    def setInput(self, input):
+        self.input = input
+
+        
+    def reset(self):
+        """
+        reset the parser's state; subclasses must rewinds the input stream
+        """
+        
+        # wack everything related to error recovery
+        if self._state is None:
+            # no shared state work to do
+            return
+        
+        self._state.following = []
+        self._state.errorRecovery = False
+        self._state.lastErrorIndex = -1
+        self._state.syntaxErrors = 0
+        # wack everything related to backtracking and memoization
+        self._state.backtracking = 0
+        if self._state.ruleMemo is not None:
+            self._state.ruleMemo = {}
+
+
+    def match(self, input, ttype, follow):
+        """
+        Match current input symbol against ttype.  Attempt
+        single token insertion or deletion error recovery.  If
+        that fails, throw MismatchedTokenException.
+
+        To turn off single token insertion or deletion error
+        recovery, override mismatchRecover() and have it call
+        plain mismatch(), which does not recover.  Then any error
+        in a rule will cause an exception and immediate exit from
+        rule.  Rule would recover by resynchronizing to the set of
+        symbols that can follow rule ref.
+        """
+        
+        matchedSymbol = self.getCurrentInputSymbol(input)
+        if self.input.LA(1) == ttype:
+            self.input.consume()
+            self._state.errorRecovery = False
+            return matchedSymbol
+
+        if self._state.backtracking > 0:
+            # FIXME: need to return matchedSymbol here as well. damn!!
+            raise BacktrackingFailed
+
+        matchedSymbol = self.recoverFromMismatchedToken(input, ttype, follow)
+        return matchedSymbol
+
+
+    def matchAny(self, input):
+        """Match the wildcard: in a symbol"""
+
+        self._state.errorRecovery = False
+        self.input.consume()
+
+
+    def mismatchIsUnwantedToken(self, input, ttype):
+        return input.LA(2) == ttype
+
+
+    def mismatchIsMissingToken(self, input, follow):
+        if follow is None:
+            # we have no information about the follow; we can only consume
+            # a single token and hope for the best
+            return False
+        
+        # compute what can follow this grammar element reference
+        if EOR_TOKEN_TYPE in follow:
+            if len(self._state.following) > 0:
+                # remove EOR if we're not the start symbol
+                follow = follow - set([EOR_TOKEN_TYPE])
+
+            viableTokensFollowingThisRule = self.computeContextSensitiveRuleFOLLOW()
+            follow = follow | viableTokensFollowingThisRule
+
+        # if current token is consistent with what could come after set
+        # then we know we're missing a token; error recovery is free to
+        # "insert" the missing token
+        if input.LA(1) in follow or EOR_TOKEN_TYPE in follow:
+            return True
+
+        return False
+
+
+    def mismatch(self, input, ttype, follow):
+        """
+        Factor out what to do upon token mismatch so tree parsers can behave
+        differently.  Override and call mismatchRecover(input, ttype, follow)
+        to get single token insertion and deletion. Use this to turn of
+        single token insertion and deletion. Override mismatchRecover
+        to call this instead.
+        """
+
+        if self.mismatchIsUnwantedToken(input, ttype):
+            raise UnwantedTokenException(ttype, input)
+
+        elif self.mismatchIsMissingToken(input, follow):
+            raise MissingTokenException(ttype, input, None)
+
+        raise MismatchedTokenException(ttype, input)
+
+
+##     def mismatchRecover(self, input, ttype, follow):
+##         if self.mismatchIsUnwantedToken(input, ttype):
+##             mte = UnwantedTokenException(ttype, input)
+
+##         elif self.mismatchIsMissingToken(input, follow):
+##             mte = MissingTokenException(ttype, input)
+
+##         else:
+##             mte = MismatchedTokenException(ttype, input)
+
+##         self.recoverFromMismatchedToken(input, mte, ttype, follow)
+
+
+    def reportError(self, e):
+        """Report a recognition problem.
+            
+        This method sets errorRecovery to indicate the parser is recovering
+        not parsing.  Once in recovery mode, no errors are generated.
+        To get out of recovery mode, the parser must successfully match
+        a token (after a resync).  So it will go:
+
+        1. error occurs
+        2. enter recovery mode, report error
+        3. consume until token found in resynch set
+        4. try to resume parsing
+        5. next match() will reset errorRecovery mode
+
+        If you override, make sure to update syntaxErrors if you care about
+        that.
+        
+        """
+        
+        # if we've already reported an error and have not matched a token
+        # yet successfully, don't report any errors.
+        if self._state.errorRecovery:
+            return
+
+        self._state.syntaxErrors += 1 # don't count spurious
+        self._state.errorRecovery = True
+
+        self.displayRecognitionError(self.tokenNames, e)
+
+
+    def displayRecognitionError(self, tokenNames, e):
+        hdr = self.getErrorHeader(e)
+        msg = self.getErrorMessage(e, tokenNames)
+        self.emitErrorMessage(hdr+" "+msg)
+
+
+    def getErrorMessage(self, e, tokenNames):
+        """
+        What error message should be generated for the various
+        exception types?
+        
+        Not very object-oriented code, but I like having all error message
+        generation within one method rather than spread among all of the
+        exception classes. This also makes it much easier for the exception
+        handling because the exception classes do not have to have pointers back
+        to this object to access utility routines and so on. Also, changing
+        the message for an exception type would be difficult because you
+        would have to subclassing exception, but then somehow get ANTLR
+        to make those kinds of exception objects instead of the default.
+        This looks weird, but trust me--it makes the most sense in terms
+        of flexibility.
+
+        For grammar debugging, you will want to override this to add
+        more information such as the stack frame with
+        getRuleInvocationStack(e, this.getClass().getName()) and,
+        for no viable alts, the decision description and state etc...
+
+        Override this to change the message generated for one or more
+        exception types.
+        """
+
+        if isinstance(e, UnwantedTokenException):
+            tokenName = "<unknown>"
+            if e.expecting == EOF:
+                tokenName = "EOF"
+
+            else:
+                tokenName = self.tokenNames[e.expecting]
+
+            msg = "extraneous input %s expecting %s" % (
+                self.getTokenErrorDisplay(e.getUnexpectedToken()),
+                tokenName
+                )
+
+        elif isinstance(e, MissingTokenException):
+            tokenName = "<unknown>"
+            if e.expecting == EOF:
+                tokenName = "EOF"
+
+            else:
+                tokenName = self.tokenNames[e.expecting]
+
+            msg = "missing %s at %s" % (
+                tokenName, self.getTokenErrorDisplay(e.token)
+                )
+
+        elif isinstance(e, MismatchedTokenException):
+            tokenName = "<unknown>"
+            if e.expecting == EOF:
+                tokenName = "EOF"
+            else:
+                tokenName = self.tokenNames[e.expecting]
+
+            msg = "mismatched input " \
+                  + self.getTokenErrorDisplay(e.token) \
+                  + " expecting " \
+                  + tokenName
+
+        elif isinstance(e, MismatchedTreeNodeException):
+            tokenName = "<unknown>"
+            if e.expecting == EOF:
+                tokenName = "EOF"
+            else:
+                tokenName = self.tokenNames[e.expecting]
+
+            msg = "mismatched tree node: %s expecting %s" \
+                  % (e.node, tokenName)
+
+        elif isinstance(e, NoViableAltException):
+            msg = "no viable alternative at input " \
+                  + self.getTokenErrorDisplay(e.token)
+
+        elif isinstance(e, EarlyExitException):
+            msg = "required (...)+ loop did not match anything at input " \
+                  + self.getTokenErrorDisplay(e.token)
+
+        elif isinstance(e, MismatchedSetException):
+            msg = "mismatched input " \
+                  + self.getTokenErrorDisplay(e.token) \
+                  + " expecting set " \
+                  + repr(e.expecting)
+
+        elif isinstance(e, MismatchedNotSetException):
+            msg = "mismatched input " \
+                  + self.getTokenErrorDisplay(e.token) \
+                  + " expecting set " \
+                  + repr(e.expecting)
+
+        elif isinstance(e, FailedPredicateException):
+            msg = "rule " \
+                  + e.ruleName \
+                  + " failed predicate: {" \
+                  + e.predicateText \
+                  + "}?"
+
+        else:
+            msg = str(e)
+
+        return msg
+    
+
+    def getNumberOfSyntaxErrors(self):
+        """
+        Get number of recognition errors (lexer, parser, tree parser).  Each
+        recognizer tracks its own number.  So parser and lexer each have
+        separate count.  Does not count the spurious errors found between
+        an error and next valid token match
+
+        See also reportError()
+	"""
+        return self._state.syntaxErrors
+
+
+    def getErrorHeader(self, e):
+        """
+        What is the error header, normally line/character position information?
+        """
+        
+        return "line %d:%d" % (e.line, e.charPositionInLine)
+
+
+    def getTokenErrorDisplay(self, t):
+        """
+        How should a token be displayed in an error message? The default
+        is to display just the text, but during development you might
+        want to have a lot of information spit out.  Override in that case
+        to use t.toString() (which, for CommonToken, dumps everything about
+        the token). This is better than forcing you to override a method in
+        your token objects because you don't have to go modify your lexer
+        so that it creates a new Java type.
+        """
+        
+        s = t.text
+        if s is None:
+            if t.type == EOF:
+                s = "<EOF>"
+            else:
+                s = "<"+t.type+">"
+
+        return repr(s)
+    
+
+    def emitErrorMessage(self, msg):