Commits

Georg Brandl  committed b355c4a Merge

merge in pycode branch

  • Participants
  • Parent commits 41fc34a, a548126

Comments (0)

Files changed (32)

 .*\.pyc
 .*\.egg
+.*\.so
 build/
 dist/
+sphinx/pycode/Grammar.*pickle
 Sphinx.egg-info/
 doc/_build/
 TAGS
-Copyright (c) 2007-2009 by the respective authors (see AUTHORS file).
+Copyright (c) 2007-2009 by the Sphinx team (see AUTHORS file).
 All rights reserved.
 
 License for Sphinx
 Licenses for incorporated software
 ==================================
 
+The pgen2 package, included in this distribution under the name
+sphinx.pycode.pgen2, is available in the Python 2.6 distribution under
+the PSF license agreement for Python:
+
+----------------------------------------------------------------------
+1. This LICENSE AGREEMENT is between the Python Software Foundation
+   ("PSF"), and the Individual or Organization ("Licensee") accessing
+   and otherwise using Python 2.6 software in source or binary form
+   and its associated documentation.
+
+2. Subject to the terms and conditions of this License Agreement, PSF
+   hereby grants Licensee a nonexclusive, royalty-free, world-wide
+   license to reproduce, analyze, test, perform and/or display
+   publicly, prepare derivative works, distribute, and otherwise use
+   Python 2.6 alone or in any derivative version, provided, however,
+   that PSF's License Agreement and PSF's notice of copyright, i.e.,
+   "Copyright © 2001-2008 Python Software Foundation; All Rights
+   Reserved" are retained in Python 2.6 alone or in any derivative
+   version prepared by Licensee.
+
+3. In the event Licensee prepares a derivative work that is based on
+   or incorporates Python 2.6 or any part thereof, and wants to make
+   the derivative work available to others as provided herein, then
+   Licensee hereby agrees to include in any such work a brief summary
+   of the changes made to Python 2.6.
+
+4. PSF is making Python 2.6 available to Licensee on an "AS IS" basis.
+   PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED.  BY
+   WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND DISCLAIMS ANY
+   REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY
+   PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 2.6 WILL NOT INFRINGE
+   ANY THIRD PARTY RIGHTS.
+
+5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
+   2.6 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS
+   AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON
+   2.6, OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY
+   THEREOF.
+
+6. This License Agreement will automatically terminate upon a material
+   breach of its terms and conditions.
+
+7. Nothing in this License Agreement shall be deemed to create any
+   relationship of agency, partnership, or joint venture between PSF
+   and Licensee.  This License Agreement does not grant permission to
+   use PSF trademarks or trade name in a trademark sense to endorse or
+   promote products or services of Licensee, or any third party.
+
+8. By copying, installing or otherwise using Python 2.6, Licensee
+   agrees to be bound by the terms and conditions of this License
+   Agreement.
+----------------------------------------------------------------------
+
 The included smartypants module, included as sphinx.util.smartypants,
 is available under the following license:
 
 
 # General substitutions.
 project = 'Sphinx'
-copyright = '2008, Georg Brandl'
+copyright = '2007-2009, Georg Brandl'
 
 # The default replacements for |version| and |release|, also used in various
 # other places throughout the built documents.

File doc/markup/code.rst

       .. literalinclude:: example.py
          :encoding: latin-1
 
+   The directive also supports including only parts of the file.  If it is a
+   Python module, you can select a class, function or method to include using
+   the ``pyobject`` option::
+
+      .. literalinclude:: example.py
+         :pyobject: Timer.start
+
+   This would only include the code lines belonging to the ``start()`` method in
+   the ``Timer`` class within the file.
+
+   Alternately, you can specify exactly which lines to include by giving a
+   ``lines`` option::
+
+      .. literalinclude:: example.py
+         :lines: 1,3,5-10,20-
+
+   This includes the lines 1, 3, 5 to 10 and lines 20 to the last line.
+
+   Another way to control which part of the file is included is to use the
+   ``start-after`` and ``end-before`` options (or only one of them).  If
+   ``start-after`` is given as a string option, only lines that follow the first
+   line containing that string are included.  If ``end-before`` is given as a
+   string option, only lines that precede the first lines containing that string
+   are included.
+
    .. versionadded:: 0.4.3
       The ``encoding`` option.
+   .. versionadded:: 0.6
+      The ``pyobject``, ``lines``, ``start-after`` and ``end-before`` options.
 
 
 .. rubric:: Footnotes

File sphinx/builders/__init__.py

 
     Builder superclass for all builders.
 
-    :copyright: 2007-2008 by Georg Brandl, Sebastian Wiesner, Horst Gutmann.
+    :copyright: Copyright 2007-2009 by the Sphinx team, see AUTHORS.
     :license: BSD, see LICENSE for details.
 """
 

File sphinx/builders/html.py

     except ImportError:
         json = None
 
-
+#: the filename for the inventory of objects
 INVENTORY_FILENAME = 'objects.inv'
+#: the filename for the "last build" file (for serializing builders)
 LAST_BUILD_FILENAME = 'last_build'
 
 

File sphinx/directives/code.py

 from docutils.parsers.rst import directives
 
 from sphinx import addnodes
+from sphinx.util import parselinenos
 
 
 # ------ highlight directive --------------------------------------------------------
         lineno - state_machine.input_offset - 1)))
     fn = path.normpath(path.join(source_dir, rel_fn))
 
+    if 'pyobject' in options and 'lines' in options:
+        return [state.document.reporter.warning(
+            'Cannot use both "pyobject" and "lines" options', line=lineno)]
+
     encoding = options.get('encoding', env.config.source_encoding)
     try:
         f = codecs.open(fn, 'r', encoding)
-        text = f.read()
+        lines = f.readlines()
         f.close()
     except (IOError, OSError):
-        retnode = state.document.reporter.warning(
-            'Include file %r not found or reading it failed' % arguments[0], line=lineno)
+        return [state.document.reporter.warning(
+            'Include file %r not found or reading it failed' % arguments[0],
+            line=lineno)]
     except UnicodeError:
-        retnode = state.document.reporter.warning(
+        return [state.document.reporter.warning(
             'Encoding %r used for reading included file %r seems to '
             'be wrong, try giving an :encoding: option' %
-            (encoding, arguments[0]))
-    else:
-        retnode = nodes.literal_block(text, text, source=fn)
-        retnode.line = 1
-        if options.get('language', ''):
-            retnode['language'] = options['language']
-        if 'linenos' in options:
-            retnode['linenos'] = True
-        state.document.settings.env.note_dependency(rel_fn)
+            (encoding, arguments[0]))]
+
+    objectname = options.get('pyobject')
+    if objectname is not None:
+        from sphinx.pycode import ModuleAnalyzer
+        analyzer = ModuleAnalyzer.for_file(fn, '')
+        tags = analyzer.find_tags()
+        if objectname not in tags:
+            return [state.document.reporter.warning(
+                'Object named %r not found in include file %r' %
+                (objectname, arguments[0]), line=lineno)]
+        else:
+            lines = lines[tags[objectname][1] - 1 : tags[objectname][2] - 1]
+
+    linespec = options.get('lines')
+    if linespec is not None:
+        try:
+            linelist = parselinenos(linespec, len(lines))
+        except ValueError, err:
+            return [state.document.reporter.warning(str(err), line=lineno)]
+        lines = [lines[i] for i in linelist]
+
+    startafter = options.get('start-after')
+    endbefore = options.get('end-before')
+    if startafter is not None or endbefore is not None:
+        use = not startafter
+        res = []
+        for line in lines:
+            if not use and startafter in line:
+                use = True
+            elif use and endbefore in line:
+                use = False
+                break
+            elif use:
+                res.append(line)
+        lines = res
+
+    text = ''.join(lines)
+    retnode = nodes.literal_block(text, text, source=fn)
+    retnode.line = 1
+    if options.get('language', ''):
+        retnode['language'] = options['language']
+    if 'linenos' in options:
+        retnode['linenos'] = True
+    state.document.settings.env.note_dependency(rel_fn)
     return [retnode]
 
 literalinclude_directive.options = {'linenos': directives.flag,
-                                    'language': directives.unchanged,
-                                    'encoding': directives.encoding}
+                                    'language': directives.unchanged_required,
+                                    'encoding': directives.encoding,
+                                    'pyobject': directives.unchanged_required,
+                                    'lines': directives.unchanged_required,
+                                    'start-after': directives.unchanged_required,
+                                    'end-before': directives.unchanged_required,
+                                    }
 literalinclude_directive.content = 0
 literalinclude_directive.arguments = (1, 0, 0)
 directives.register_directive('literalinclude', literalinclude_directive)

File sphinx/environment.py

 
     Global creation environment.
 
-<<<<<<< local
-    :copyright: 2007-2009 by Georg Brandl.
-=======
     :copyright: Copyright 2007-2009 by the Sphinx team, see AUTHORS.
->>>>>>> other
     :license: BSD, see LICENSE for details.
 """
 

File sphinx/ext/autodoc.py

 from docutils.parsers.rst import directives
 from docutils.statemachine import ViewList
 
-from sphinx.util import rpartition, nested_parse_with_titles
+from sphinx.util import rpartition, nested_parse_with_titles, force_decode
+from sphinx.pycode import ModuleAnalyzer, PycodeError
+from sphinx.util.docstrings import prepare_docstring
 
 clstypes = (type, ClassType)
 try:
 except NameError:
     base_exception = Exception
 
-_charset_re = re.compile(r'coding[:=]\s*([-\w.]+)')
-_module_charsets = {}
 
 py_ext_sig_re = re.compile(
     r'''^ ([\w.]+::)?            # explicit module name
     return False
 
 
-def prepare_docstring(s):
-    """
-    Convert a docstring into lines of parseable reST.  Return it as a list of
-    lines usable for inserting into a docutils ViewList (used as argument
-    of nested_parse().)  An empty line is added to act as a separator between
-    this docstring and following content.
-    """
-    lines = s.expandtabs().splitlines()
-    # Find minimum indentation of any non-blank lines after first line.
-    margin = sys.maxint
-    for line in lines[1:]:
-        content = len(line.lstrip())
-        if content:
-            indent = len(line) - content
-            margin = min(margin, indent)
-    # Remove indentation.
-    if lines:
-        lines[0] = lines[0].lstrip()
-    if margin < sys.maxint:
-        for i in range(1, len(lines)): lines[i] = lines[i][margin:]
-    # Remove any leading blank lines.
-    while lines and not lines[0]:
-        lines.pop(0)
-    # make sure there is an empty line at the end
-    if lines and lines[-1]:
-        lines.append('')
-    return lines
-
-
-def get_module_charset(module):
-    """Return the charset of the given module (cached in _module_charsets)."""
-    if module in _module_charsets:
-        return _module_charsets[module]
-    try:
-        filename = __import__(module, None, None, ['foo']).__file__
-    except (ImportError, AttributeError):
-        return None
-    if filename[-4:].lower() in ('.pyc', '.pyo'):
-        filename = filename[:-1]
-    for line in [linecache.getline(filename, x) for x in (1, 2)]:
-        match = _charset_re.search(line)
-        if match is not None:
-            charset = match.group(1)
-            break
-    else:
-        charset = 'ascii'
-    _module_charsets[module] = charset
-    return charset
-
-
 class RstGenerator(object):
     def __init__(self, options, document, lineno):
         self.options = options
     def warn(self, msg):
         self.warnings.append(self.reporter.warning(msg, line=self.lineno))
 
-    def get_doc(self, what, name, obj):
-        """Format and yield lines of the docstring(s) for the object."""
+    def get_doc(self, what, obj, encoding=None):
+        """Decode and return lines of the docstring(s) for the object."""
         docstrings = []
+
+        # add the regular docstring if present
         if getattr(obj, '__doc__', None):
             docstrings.append(obj.__doc__)
-        # skip some lines in module docstrings if configured
+
+        # skip some lines in module docstrings if configured (deprecated!)
         if what == 'module' and self.env.config.automodule_skip_lines and docstrings:
             docstrings[0] = '\n'.join(docstrings[0].splitlines()
                                       [self.env.config.automodule_skip_lines:])
+
         # for classes, what the "docstring" is can be controlled via an option
         if what in ('class', 'exception'):
             content = self.env.config.autoclass_content
                         docstrings.append(initdocstring)
             # the default is only the class docstring
 
-        # decode the docstrings using the module's source encoding
-        charset = None
-        module = getattr(obj, '__module__', None)
-        if module is not None:
-            charset = get_module_charset(module)
+        # make sure we have Unicode docstrings, then sanitize and split into lines
+        return [prepare_docstring(force_decode(docstring, encoding))
+                for docstring in docstrings]
 
-        for docstring in docstrings:
-            if isinstance(docstring, str):
-                if charset:
-                    docstring = docstring.decode(charset)
-                else:
-                    try:
-                        # try decoding with utf-8, should only work for real UTF-8
-                        docstring = docstring.decode('utf-8')
-                    except UnicodeError:
-                        # last resort -- can't fail
-                        docstring = docstring.decode('latin1')
-            docstringlines = prepare_docstring(docstring)
+    def process_doc(self, docstrings, what, name, obj):
+        """Let the user process the docstrings."""
+        for docstringlines in docstrings:
             if self.env.app:
                 # let extensions preprocess docstrings
                 self.env.app.emit('autodoc-process-docstring',
                           'for automodule %s' % name)
             return (path or '') + base, [], None, None
 
-        elif what in ('exception', 'function', 'class'):
+        elif what in ('exception', 'function', 'class', 'data'):
             if mod is None:
                 if path:
                     mod = path.rstrip('.')
 
         # now, import the module and get object to document
         try:
-            todoc = module = __import__(mod, None, None, ['foo'])
-            if hasattr(module, '__file__') and module.__file__:
-                modfile = module.__file__
-                if modfile[-4:].lower() in ('.pyc', '.pyo'):
-                    modfile = modfile[:-1]
-                self.filename_set.add(modfile)
-            else:
-                modfile = None  # e.g. for builtin and C modules
+            __import__(mod)
+            todoc = module = sys.modules[mod]
             for part in objpath:
                 todoc = getattr(todoc, part)
         except (ImportError, AttributeError), err:
                       (what, str(fullname), err))
             return
 
+        # try to also get a source code analyzer for attribute docs
+        try:
+            analyzer = ModuleAnalyzer.for_module(mod)
+        except PycodeError, err:
+            # no source file -- e.g. for builtin and C modules
+            analyzer = None
+        else:
+            self.filename_set.add(analyzer.srcname)
+
         # check __module__ of object if wanted (for members not given explicitly)
         if check_module:
             if hasattr(todoc, '__module__'):
                 if todoc.__module__ != mod:
                     return
 
+        # make sure that the result starts with an empty line.  This is
+        # necessary for some situations where another directive preprocesses
+        # reST and no starting newline is present
+        self.result.append(u'', '')
+
         # format the object's signature, if any
         try:
             sig = self.format_signature(what, fullname, todoc, args, retann)
                       (fullname, err))
             sig = ''
 
-        # make sure that the result starts with an empty line.  This is
-        # necessary for some situations where another directive preprocesses
-        # reST and no starting newline is present
-        self.result.append(u'', '')
-
         # now, create the directive header
         if what == 'method':
             directive = get_method_type(todoc)
             self.result.append(indent + u'   :noindex:', '<autodoc>')
         self.result.append(u'', '<autodoc>')
 
+        # add inheritance info, if wanted
         if self.options.show_inheritance and what in ('class', 'exception'):
             if len(todoc.__bases__):
                 bases = [b.__module__ == '__builtin__' and
                          u':class:`%s`' % b.__name__ or
                          u':class:`%s.%s`' % (b.__module__, b.__name__)
                          for b in todoc.__bases__]
-                self.result.append(indent + u'   Bases: %s' % ', '.join(bases),
+                self.result.append(indent + _(u'   Bases: %s') % ', '.join(bases),
                                    '<autodoc>')
                 self.result.append(u'', '<autodoc>')
 
         if what != 'module':
             indent += u'   '
 
-        if modfile:
-            sourcename = '%s:docstring of %s' % (modfile, fullname)
+        # add content from attribute documentation
+        if analyzer:
+            sourcename = '%s:docstring of %s' % (analyzer.srcname, fullname)
+            attr_docs = analyzer.find_attr_docs()
+            if what in ('data', 'attribute'):
+                key = ('.'.join(objpath[:-1]), objpath[-1])
+                if key in attr_docs:
+                    no_docstring = True
+                    docstrings = [attr_docs[key]]
+                    for i, line in enumerate(self.process_doc(docstrings, what,
+                                                              fullname, todoc)):
+                        self.result.append(indent + line, sourcename, i)
         else:
             sourcename = 'docstring of %s' % fullname
+            attr_docs = {}
 
         # add content from docstrings
         if not no_docstring:
-            for i, line in enumerate(self.get_doc(what, fullname, todoc)):
+            encoding = analyzer and analyzer.encoding
+            docstrings = self.get_doc(what, todoc, encoding)
+            for i, line in enumerate(self.process_doc(docstrings, what,
+                                                      fullname, todoc)):
                 self.result.append(indent + line, sourcename, i)
 
-        # add source content, if present
+        # add additional content (e.g. from document), if present
         if add_content:
             for line, src in zip(add_content.data, add_content.items):
                 self.result.append(indent + line, src[0], src[1])
         if objpath:
             self.env.autodoc_current_class = objpath[0]
 
-        # add members, if possible
-        _all = members == ['__all__']
+        # look for members to include
+        want_all_members = members == ['__all__']
         members_check_module = False
-        if _all:
+        if want_all_members:
             # unqualified :members: given
             if what == 'module':
                 if hasattr(todoc, '__all__'):
         else:
             all_members = [(mname, getattr(todoc, mname)) for mname in members]
 
+        # search for members in source code too
+        namespace = '.'.join(objpath)  # will be empty for modules
+
         for (membername, member) in all_members:
-            if _all and membername.startswith('_'):
+            # if isattr is True, the member is documented as an attribute
+            isattr = False
+            # if content is not None, no extra content from docstrings will be added
+            content = None
+
+            if want_all_members and membername.startswith('_'):
                 # ignore members whose name starts with _ by default
                 skip = True
             else:
-                # ignore undocumented members if :undoc-members: is not given
-                doc = getattr(member, '__doc__', None)
-                skip = not self.options.undoc_members and not doc
+                if (namespace, membername) in attr_docs:
+                    # keep documented attributes
+                    skip = False
+                    isattr = True
+                else:
+                    # ignore undocumented members if :undoc-members: is not given
+                    doc = getattr(member, '__doc__', None)
+                    skip = not self.options.undoc_members and not doc
+
             # give the user a chance to decide whether this member should be skipped
             if self.env.app:
                 # let extensions preprocess docstrings
             if skip:
                 continue
 
-            content = None
+            # determine member type
             if what == 'module':
                 if isinstance(member, (FunctionType, BuiltinFunctionType)):
                     memberwhat = 'function'
+                elif isattr:
+                    memberwhat = 'attribute'
                 elif isinstance(member, clstypes):
                     if member.__name__ != membername:
                         # assume it's aliased
                     else:
                         memberwhat = 'class'
                 else:
-                    # XXX: todo -- attribute docs
                     continue
             else:
-                if isinstance(member, clstypes):
+                if inspect.isroutine(member):
+                    memberwhat = 'method'
+                elif isattr:
+                    memberwhat = 'attribute'
+                elif isinstance(member, clstypes):
                     if member.__name__ != membername:
                         # assume it's aliased
                         memberwhat = 'attribute'
                                            source='')
                     else:
                         memberwhat = 'class'
-                elif inspect.isroutine(member):
-                    memberwhat = 'method'
                 elif isdescriptor(member):
                     memberwhat = 'attribute'
                 else:
-                    # XXX: todo -- attribute docs
                     continue
+
             # give explicitly separated module name, so that members of inner classes
             # can be documented
             full_membername = mod + '::' + '.'.join(objpath + [membername])

File sphinx/jinja2glue.py

 
     Glue code for the jinja2 templating engine.
 
-    :copyright: 2008 by Sebastian Wiesner.
+    :copyright: Copyright 2007-2009 by the Sphinx team, see AUTHORS.
     :license: BSD, see LICENSE for details.
 """
 

File sphinx/pycode/Grammar.txt

+# Grammar for Python
+
+# Note:  Changing the grammar specified in this file will most likely
+#        require corresponding changes in the parser module
+#        (../Modules/parsermodule.c).  If you can't make the changes to
+#        that module yourself, please co-ordinate the required changes
+#        with someone who can; ask around on python-dev for help.  Fred
+#        Drake <fdrake@acm.org> will probably be listening there.
+
+# NOTE WELL: You should also follow all the steps listed in PEP 306,
+# "How to Change Python's Grammar"
+
+# Commands for Kees Blom's railroad program
+#diagram:token NAME
+#diagram:token NUMBER
+#diagram:token STRING
+#diagram:token NEWLINE
+#diagram:token ENDMARKER
+#diagram:token INDENT
+#diagram:output\input python.bla
+#diagram:token DEDENT
+#diagram:output\textwidth 20.04cm\oddsidemargin  0.0cm\evensidemargin 0.0cm
+#diagram:rules
+
+# Start symbols for the grammar:
+#	file_input is a module or sequence of commands read from an input file;
+#	single_input is a single interactive statement;
+#	eval_input is the input for the eval() and input() functions.
+# NB: compound_stmt in single_input is followed by extra NEWLINE!
+file_input: (NEWLINE | stmt)* ENDMARKER
+single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
+eval_input: testlist NEWLINE* ENDMARKER
+
+decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
+decorators: decorator+
+decorated: decorators (classdef | funcdef)
+funcdef: 'def' NAME parameters ['->' test] ':' suite
+parameters: '(' [typedargslist] ')'
+typedargslist: ((tfpdef ['=' test] ',')*
+                ('*' [tname] (',' tname ['=' test])* [',' '**' tname] | '**' tname)
+                | tfpdef ['=' test] (',' tfpdef ['=' test])* [','])
+tname: NAME [':' test]
+tfpdef: tname | '(' tfplist ')'
+tfplist: tfpdef (',' tfpdef)* [',']
+varargslist: ((vfpdef ['=' test] ',')*
+              ('*' [vname] (',' vname ['=' test])*  [',' '**' vname] | '**' vname)
+              | vfpdef ['=' test] (',' vfpdef ['=' test])* [','])
+vname: NAME
+vfpdef: vname | '(' vfplist ')'
+vfplist: vfpdef (',' vfpdef)* [',']
+
+stmt: simple_stmt | compound_stmt
+simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
+small_stmt: (expr_stmt | print_stmt  | del_stmt | pass_stmt | flow_stmt |
+             import_stmt | global_stmt | exec_stmt | assert_stmt)
+expr_stmt: testlist (augassign (yield_expr|testlist) |
+                     ('=' (yield_expr|testlist))*)
+augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
+            '<<=' | '>>=' | '**=' | '//=')
+# For normal assignments, additional restrictions enforced by the interpreter
+print_stmt: 'print' ( [ test (',' test)* [','] ] |
+                      '>>' test [ (',' test)+ [','] ] )
+del_stmt: 'del' exprlist
+pass_stmt: 'pass'
+flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
+break_stmt: 'break'
+continue_stmt: 'continue'
+return_stmt: 'return' [testlist]
+yield_stmt: yield_expr
+raise_stmt: 'raise' [test ['from' test | ',' test [',' test]]]
+import_stmt: import_name | import_from
+import_name: 'import' dotted_as_names
+import_from: ('from' ('.'* dotted_name | '.'+)
+              'import' ('*' | '(' import_as_names ')' | import_as_names))
+import_as_name: NAME ['as' NAME]
+dotted_as_name: dotted_name ['as' NAME]
+import_as_names: import_as_name (',' import_as_name)* [',']
+dotted_as_names: dotted_as_name (',' dotted_as_name)*
+dotted_name: NAME ('.' NAME)*
+global_stmt: ('global' | 'nonlocal') NAME (',' NAME)*
+exec_stmt: 'exec' expr ['in' test [',' test]]
+assert_stmt: 'assert' test [',' test]
+
+compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated
+if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
+while_stmt: 'while' test ':' suite ['else' ':' suite]
+for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
+try_stmt: ('try' ':' suite
+           ((except_clause ':' suite)+
+	    ['else' ':' suite]
+	    ['finally' ':' suite] |
+	   'finally' ':' suite))
+with_stmt: 'with' test [ with_var ] ':' suite
+with_var: 'as' expr
+# NB compile.c makes sure that the default except clause is last
+except_clause: 'except' [test [(',' | 'as') test]]
+suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
+
+# Backward compatibility cruft to support:
+# [ x for x in lambda: True, lambda: False if x() ]
+# even while also allowing:
+# lambda x: 5 if x else 2
+# (But not a mix of the two)
+testlist_safe: old_test [(',' old_test)+ [',']]
+old_test: or_test | old_lambdef
+old_lambdef: 'lambda' [varargslist] ':' old_test
+
+test: or_test ['if' or_test 'else' test] | lambdef
+or_test: and_test ('or' and_test)*
+and_test: not_test ('and' not_test)*
+not_test: 'not' not_test | comparison
+comparison: expr (comp_op expr)*
+comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
+expr: xor_expr ('|' xor_expr)*
+xor_expr: and_expr ('^' and_expr)*
+and_expr: shift_expr ('&' shift_expr)*
+shift_expr: arith_expr (('<<'|'>>') arith_expr)*
+arith_expr: term (('+'|'-') term)*
+term: factor (('*'|'/'|'%'|'//') factor)*
+factor: ('+'|'-'|'~') factor | power
+power: atom trailer* ['**' factor]
+atom: ('(' [yield_expr|testlist_gexp] ')' |
+       '[' [listmaker] ']' |
+       '{' [dictsetmaker] '}' |
+       '`' testlist1 '`' |
+       NAME | NUMBER | STRING+ | '.' '.' '.')
+listmaker: test ( comp_for | (',' test)* [','] )
+testlist_gexp: test ( comp_for | (',' test)* [','] )
+lambdef: 'lambda' [varargslist] ':' test
+trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
+subscriptlist: subscript (',' subscript)* [',']
+subscript: test | [test] ':' [test] [sliceop]
+sliceop: ':' [test]
+exprlist: expr (',' expr)* [',']
+testlist: test (',' test)* [',']
+dictsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) |
+                (test (comp_for | (',' test)* [','])) )
+
+classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
+
+arglist: (argument ',')* (argument [',']
+                         |'*' test (',' argument)* [',' '**' test] 
+                         |'**' test)
+argument: test [comp_for] | test '=' test  # Really [keyword '='] test
+
+comp_iter: comp_for | comp_if
+comp_for: 'for' exprlist 'in' testlist_safe [comp_iter]
+comp_if: 'if' old_test [comp_iter]
+
+testlist1: test (',' test)*
+
+# not used in grammar, but may appear in "node" passed from Parser to Compiler
+encoding_decl: NAME
+
+yield_expr: 'yield' [testlist]

File sphinx/pycode/__init__.py

+# -*- coding: utf-8 -*-
+"""
+    sphinx.pycode
+    ~~~~~~~~~~~~~
+
+    Utilities parsing and analyzing Python code.
+
+    :copyright: Copyright 2007-2009 by the Sphinx team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+"""
+
+import re
+import sys
+from os import path
+from cStringIO import StringIO
+
+from sphinx.pycode import nodes
+from sphinx.pycode.pgen2 import driver, token, tokenize, parse, literals
+from sphinx.util.docstrings import prepare_docstring, prepare_commentdoc
+
+
+# load the Python grammar
+_grammarfile = path.join(path.dirname(__file__), 'Grammar.txt')
+pygrammar = driver.load_grammar(_grammarfile)
+pydriver = driver.Driver(pygrammar, convert=nodes.convert)
+
+# an object with attributes corresponding to token and symbol names
+class sym: pass
+for k, v in pygrammar.symbol2number.iteritems():
+    setattr(sym, k, v)
+for k, v in token.tok_name.iteritems():
+    setattr(sym, v, k)
+
+# a dict mapping terminal and nonterminal numbers to their names
+number2name = pygrammar.number2symbol.copy()
+number2name.update(token.tok_name)
+
+
+# a regex to recognize coding cookies
+_coding_re = re.compile(r'coding[:=]\s*([-\w.]+)')
+
+_eq = nodes.Leaf(token.EQUAL, '=')
+
+
+class AttrDocVisitor(nodes.NodeVisitor):
+    """
+    Visitor that collects docstrings for attribute assignments on toplevel and
+    in classes.
+
+    The docstrings can either be in special '#:' comments before the assignment
+    or in a docstring after it.
+    """
+    def init(self, scope, encoding):
+        self.scope = scope
+        self.encoding = encoding
+        self.namespace = []
+        self.collected = {}
+
+    def visit_classdef(self, node):
+        self.namespace.append(node[1].value)
+        self.generic_visit(node)
+        self.namespace.pop()
+
+    def visit_expr_stmt(self, node):
+        """Visit an assignment which may have a special comment before it."""
+        if _eq not in node.children:
+            # not an assignment (we don't care for augmented assignments)
+            return
+        pnode = node[0]
+        prefix = pnode.get_prefix()
+        # if the assignment is the first statement on a new indentation
+        # level, its preceding whitespace and comments are not assigned
+        # to that token, but the first INDENT or DEDENT token
+        while not prefix:
+            pnode = pnode.get_prev_leaf()
+            if not pnode or pnode.type not in (token.INDENT, token.DEDENT):
+                break
+            prefix = pnode.get_prefix()
+        prefix = prefix.decode(self.encoding)
+        docstring = prepare_commentdoc(prefix)
+        if docstring:
+            self.add_docstring(node, docstring)
+
+    def visit_simple_stmt(self, node):
+        """Visit a docstring statement which may have an assignment before."""
+        if node[0].type != token.STRING:
+            # not a docstring; but still need to visit children
+            return self.generic_visit(node)
+        prev = node.get_prev_sibling()
+        if not prev:
+            return
+        if prev.type == sym.simple_stmt and \
+               prev[0].type == sym.expr_stmt and _eq in prev[0].children:
+            # need to "eval" the string because it's returned in its original form
+            docstring = literals.evalString(node[0].value, self.encoding)
+            docstring = prepare_docstring(docstring)
+            self.add_docstring(prev[0], docstring)
+
+    def visit_funcdef(self, node):
+        # don't descend into functions -- nothing interesting there
+        return
+
+    def add_docstring(self, node, docstring):
+        # add an item for each assignment target
+        for i in range(0, len(node) - 1, 2):
+            target = node[i]
+            if target.type != token.NAME:
+                # don't care about complex targets
+                continue
+            namespace = '.'.join(self.namespace)
+            if namespace.startswith(self.scope):
+                self.collected[namespace, target.value] = docstring
+
+
+class PycodeError(Exception):
+    def __str__(self):
+        res = self.args[0]
+        if len(self.args) > 1:
+            res += ' (exception was: %r)' % self.args[1]
+        return res
+
+
+class ModuleAnalyzer(object):
+    # cache for analyzer objects -- caches both by module and file name
+    cache = {}
+
+    @classmethod
+    def for_string(cls, string, modname, srcname='<string>'):
+        return cls(StringIO(string), modname, srcname)
+
+    @classmethod
+    def for_file(cls, filename, modname):
+        if ('file', filename) in cls.cache:
+            return cls.cache['file', filename]
+        try:
+            fileobj = open(filename, 'r')
+        except Exception, err:
+            raise PycodeError('error opening %r' % filename, err)
+        obj = cls(fileobj, modname, filename)
+        cls.cache['file', filename] = obj
+        return obj
+
+    @classmethod
+    def for_module(cls, modname):
+        if ('module', modname) in cls.cache:
+            entry = cls.cache['module', modname]
+            if isinstance(entry, PycodeError):
+                raise entry
+            return entry
+
+        try:
+            if modname not in sys.modules:
+                try:
+                    __import__(modname)
+                except ImportError, err:
+                    raise PycodeError('error importing %r' % modname, err)
+            mod = sys.modules[modname]
+            if hasattr(mod, '__loader__'):
+                try:
+                    source = mod.__loader__.get_source(modname)
+                except Exception, err:
+                    raise PycodeError('error getting source for %r' % modname, err)
+                obj = cls.for_string(source, modname)
+                cls.cache['module', modname] = obj
+                return obj
+            filename = getattr(mod, '__file__', None)
+            if filename is None:
+                raise PycodeError('no source found for module %r' % modname)
+            filename = path.normpath(filename)
+            lfilename = filename.lower()
+            if lfilename.endswith('.pyo') or lfilename.endswith('.pyc'):
+                filename = filename[:-1]
+            elif not lfilename.endswith('.py'):
+                raise PycodeError('source is not a .py file: %r' % filename)
+            if not path.isfile(filename):
+                raise PycodeError('source file is not present: %r' % filename)
+            obj = cls.for_file(filename, modname)
+        except PycodeError, err:
+            cls.cache['module', modname] = err
+            raise
+        cls.cache['module', modname] = obj
+        return obj
+
+    def __init__(self, source, modname, srcname):
+        # name of the module
+        self.modname = modname
+        # name of the source file
+        self.srcname = srcname
+        # file-like object yielding source lines
+        self.source = source
+
+        # will be filled by tokenize()
+        self.tokens = None
+        # will be filled by parse()
+        self.parsetree = None
+        # will be filled by find_attr_docs()
+        self.attr_docs = None
+        # will be filled by find_tags()
+        self.tags = None
+
+    def tokenize(self):
+        """Generate tokens from the source."""
+        if self.tokens is not None:
+            return
+        self.tokens = list(tokenize.generate_tokens(self.source.readline))
+        self.source.close()
+
+    def parse(self):
+        """Parse the generated source tokens."""
+        if self.parsetree is not None:
+            return
+        self.tokenize()
+        self.parsetree = pydriver.parse_tokens(self.tokens)
+        # find the source code encoding
+        encoding = sys.getdefaultencoding()
+        comments = self.parsetree.get_prefix()
+        for line in comments.splitlines()[:2]:
+            match = _coding_re.search(line)
+            if match is not None:
+                encoding = match.group(1)
+                break
+        self.encoding = encoding
+
+    def find_attr_docs(self, scope=''):
+        """Find class and module-level attributes and their documentation."""
+        if self.attr_docs is not None:
+            return self.attr_docs
+        self.parse()
+        attr_visitor = AttrDocVisitor(number2name, scope, self.encoding)
+        attr_visitor.visit(self.parsetree)
+        self.attr_docs = attr_visitor.collected
+        return attr_visitor.collected
+
+    def find_tags(self):
+        """Find class, function and method definitions and their location."""
+        if self.tags is not None:
+            return self.tags
+        self.tokenize()
+        result = {}
+        namespace = []
+        stack = []
+        indent = 0
+        defline = False
+        expect_indent = False
+        def tokeniter(ignore = (token.COMMENT, token.NL)):
+            for tokentup in self.tokens:
+                if tokentup[0] not in ignore:
+                    yield tokentup
+        tokeniter = tokeniter()
+        for type, tok, spos, epos, line in tokeniter:
+            if expect_indent:
+                if type != token.INDENT:
+                    # no suite -- one-line definition
+                    assert stack
+                    dtype, fullname, startline, _ = stack.pop()
+                    endline = epos[0]
+                    namespace.pop()
+                    result[fullname] = (dtype, startline, endline)
+                expect_indent = False
+            if tok in ('def', 'class'):
+                name = tokeniter.next()[1]
+                namespace.append(name)
+                fullname = '.'.join(namespace)
+                stack.append((tok, fullname, spos[0], indent))
+                defline = True
+            elif type == token.INDENT:
+                expect_indent = False
+                indent += 1
+            elif type == token.DEDENT:
+                indent -= 1
+                # if the stacklevel is the same as it was before the last
+                # def/class block, this dedent closes that block
+                if stack and indent == stack[-1][3]:
+                    dtype, fullname, startline, _ = stack.pop()
+                    endline = spos[0]
+                    namespace.pop()
+                    result[fullname] = (dtype, startline, endline)
+            elif type == token.NEWLINE:
+                # if this line contained a definition, expect an INDENT to start the
+                # suite; if there is no such INDENT it's a one-line definition
+                if defline:
+                    defline = False
+                    expect_indent = True
+        self.tags = result
+        return result
+
+
+if __name__ == '__main__':
+    import time, pprint
+    x0 = time.time()
+    #ma = ModuleAnalyzer.for_file(__file__.rstrip('c'), 'sphinx.builders.html')
+    ma = ModuleAnalyzer.for_file('sphinx/builders/html.py', 'sphinx.builders.html')
+    ma.tokenize()
+    x1 = time.time()
+    ma.parse()
+    x2 = time.time()
+    #for (ns, name), doc in ma.find_attr_docs().iteritems():
+    #    print '>>', ns, name
+    #    print '\n'.join(doc)
+    pprint.pprint(ma.find_tags())
+    x3 = time.time()
+    #print nodes.nice_repr(ma.parsetree, number2name)
+    print "tokenizing %.4f, parsing %.4f, finding %.4f" % (x1-x0, x2-x1, x3-x2)

File sphinx/pycode/nodes.py

+# -*- coding: utf-8 -*-
+"""
+    sphinx.pycode.nodes
+    ~~~~~~~~~~~~~~~~~~~
+
+    Parse tree node implementations.
+
+    :copyright: Copyright 2007-2009 by the Sphinx team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+"""
+
+
+class BaseNode(object):
+    """
+    Node superclass for both terminal and nonterminal nodes.
+    """
+
+    def _eq(self, other):
+        raise NotImplementedError
+
+    def __eq__(self, other):
+        if self.__class__ is not other.__class__:
+            return NotImplemented
+        return self._eq(other)
+
+    def __ne__(self, other):
+        if self.__class__ is not other.__class__:
+            return NotImplemented
+        return not self._eq(other)
+
+    def get_prev_sibling(self):
+        """Return previous child in parent's children, or None."""
+        if self.parent is None:
+            return None
+        for i, child in enumerate(self.parent.children):
+            if child is self:
+                if i == 0:
+                    return None
+                return self.parent.children[i-1]
+
+    def get_next_sibling(self):
+        """Return next child in parent's children, or None."""
+        if self.parent is None:
+            return None
+        for i, child in enumerate(self.parent.children):
+            if child is self:
+                try:
+                    return self.parent.children[i+1]
+                except IndexError:
+                    return None
+
+    def get_prev_leaf(self):
+        """Return the leaf node that precedes this node in the parse tree."""
+        def last_child(node):
+            if isinstance(node, Leaf):
+                return node
+            elif not node.children:
+                return None
+            else:
+                return last_child(node.children[-1])
+        if self.parent is None:
+            return None
+        prev = self.get_prev_sibling()
+        if isinstance(prev, Leaf):
+            return prev
+        elif prev is not None:
+            return last_child(prev)
+        return self.parent.get_prev_leaf()
+
+    def get_next_leaf(self):
+        """Return self if leaf, otherwise the leaf node that succeeds this
+        node in the parse tree.
+        """
+        node = self
+        while not isinstance(node, Leaf):
+            assert node.children
+            node = node.children[0]
+        return node
+
+    def get_lineno(self):
+        """Return the line number which generated the invocant node."""
+        return self.get_next_leaf().lineno
+
+    def get_prefix(self):
+        """Return the prefix of the next leaf node."""
+        # only leaves carry a prefix
+        return self.get_next_leaf().prefix
+
+
+class Node(BaseNode):
+    """
+    Node implementation for nonterminals.
+    """
+
+    def __init__(self, type, children, context=None):
+        # type of nonterminals is >= 256
+        # assert type >= 256, type
+        self.type = type
+        self.children = list(children)
+        for ch in self.children:
+            # assert ch.parent is None, repr(ch)
+            ch.parent = self
+
+    def __repr__(self):
+        return '%s(%s, %r)' % (self.__class__.__name__, self.type, self.children)
+
+    def __str__(self):
+        """This reproduces the input source exactly."""
+        return ''.join(map(str, self.children))
+
+    def _eq(self, other):
+        return (self.type, self.children) == (other.type, other.children)
+
+    # support indexing the node directly instead of .children
+
+    def __getitem__(self, index):
+        return self.children[index]
+
+    def __iter__(self):
+        return iter(self.children)
+
+    def __len__(self):
+        return len(self.children)
+
+
+class Leaf(BaseNode):
+    """
+    Node implementation for leaf nodes (terminals).
+    """
+    prefix = ''  # Whitespace and comments preceding this token in the input
+    lineno = 0   # Line where this token starts in the input
+    column = 0   # Column where this token tarts in the input
+
+    def __init__(self, type, value, context=None):
+        # type of terminals is below 256
+        # assert 0 <= type < 256, type
+        self.type = type
+        self.value = value
+        if context is not None:
+            self.prefix, (self.lineno, self.column) = context
+
+    def __repr__(self):
+        return '%s(%r, %r, %r)' % (self.__class__.__name__,
+                                   self.type, self.value, self.prefix)
+
+    def __str__(self):
+        """This reproduces the input source exactly."""
+        return self.prefix + str(self.value)
+
+    def _eq(self, other):
+        """Compares two nodes for equality."""
+        return (self.type, self.value) == (other.type, other.value)
+
+
+def convert(grammar, raw_node):
+    """Convert raw node to a Node or Leaf instance."""
+    type, value, context, children = raw_node
+    if children or type in grammar.number2symbol:
+        # If there's exactly one child, return that child instead of
+        # creating a new node.
+        if len(children) == 1:
+            return children[0]
+        return Node(type, children, context=context)
+    else:
+        return Leaf(type, value, context=context)
+
+
+def nice_repr(node, number2name, prefix=False):
+    def _repr(node):
+        if isinstance(node, Leaf):
+            return "%s(%r)" % (number2name[node.type], node.value)
+        else:
+            return "%s(%s)" % (number2name[node.type],
+                               ', '.join(map(_repr, node.children)))
+    def _prepr(node):
+        if isinstance(node, Leaf):
+            return "%s(%r, %r)" % (number2name[node.type], node.prefix, node.value)
+        else:
+            return "%s(%s)" % (number2name[node.type],
+                               ', '.join(map(_prepr, node.children)))
+    return (prefix and _prepr or _repr)(node)
+
+
+class NodeVisitor(object):
+    def __init__(self, number2name, *args):
+        self.number2name = number2name
+        self.init(*args)
+
+    def init(self, *args):
+        pass
+
+    def visit(self, node):
+        """Visit a node."""
+        method = 'visit_' + self.number2name[node.type]
+        visitor = getattr(self, method, self.generic_visit)
+        return visitor(node)
+
+    def generic_visit(self, node):
+        """Called if no explicit visitor function exists for a node."""
+        if isinstance(node, Node):
+            for child in node:
+                self.visit(child)

File sphinx/pycode/pgen2/__init__.py

+# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+"""The pgen2 package."""

File sphinx/pycode/pgen2/driver.py

+# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+# Modifications:
+# Copyright 2006 Google, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+"""Parser driver.
+
+This provides a high-level interface to parse a file into a syntax tree.
+
+"""
+
+__author__ = "Guido van Rossum <guido@python.org>"
+
+__all__ = ["Driver", "load_grammar"]
+
+# Python imports
+import os
+import logging
+import sys
+
+# Pgen imports
+from sphinx.pycode.pgen2 import grammar, parse, token, tokenize, pgen
+
+
+class Driver(object):
+
+    def __init__(self, grammar, convert=None, logger=None):
+        self.grammar = grammar
+        if logger is None:
+            logger = logging.getLogger()
+        self.logger = logger
+        self.convert = convert
+
+    def parse_tokens(self, tokens, debug=False):
+        """Parse a series of tokens and return the syntax tree."""
+        # XXX Move the prefix computation into a wrapper around tokenize.
+        p = parse.Parser(self.grammar, self.convert)
+        p.setup()
+        lineno = 1
+        column = 0
+        type = value = start = end = line_text = None
+        prefix = ""
+        opmap = grammar.opmap
+        for type, value, start, end, line_text in tokens:
+            if start != (lineno, column):
+                assert (lineno, column) <= start, ((lineno, column), start)
+                s_lineno, s_column = start
+                if lineno < s_lineno:
+                    prefix += "\n" * (s_lineno - lineno)
+                    lineno = s_lineno
+                    column = 0
+                if column < s_column:
+                    prefix += line_text[column:s_column]
+                    column = s_column
+            if type in (tokenize.COMMENT, tokenize.NL):
+                prefix += value
+                lineno, column = end
+                if value.endswith("\n"):
+                    lineno += 1
+                    column = 0
+                continue
+            if type == token.OP:
+                type = opmap[value]
+            # if debug:
+            #     self.logger.debug("%s %r (prefix=%r)",
+            #                       token.tok_name[type], value, prefix)
+            if p.addtoken(type, value, (prefix, start)):
+                # if debug:
+                #     self.logger.debug("Stop.")
+                break
+            prefix = ""
+            lineno, column = end
+            if value.endswith("\n"):
+                lineno += 1
+                column = 0
+        else:
+            # We never broke out -- EOF is too soon (how can this happen???)
+            raise parse.ParseError("incomplete input", type, value, line_text)
+        return p.rootnode
+
+    def parse_stream_raw(self, stream, debug=False):
+        """Parse a stream and return the syntax tree."""
+        tokens = tokenize.generate_tokens(stream.readline)
+        return self.parse_tokens(tokens, debug)
+
+    def parse_stream(self, stream, debug=False):
+        """Parse a stream and return the syntax tree."""
+        return self.parse_stream_raw(stream, debug)
+
+    def parse_file(self, filename, debug=False):
+        """Parse a file and return the syntax tree."""
+        stream = open(filename)
+        try:
+            return self.parse_stream(stream, debug)
+        finally:
+            stream.close()
+
+    def parse_string(self, text, debug=False):
+        """Parse a string and return the syntax tree."""
+        tokens = tokenize.generate_tokens(generate_lines(text).next)
+        return self.parse_tokens(tokens, debug)
+
+
+def generate_lines(text):
+    """Generator that behaves like readline without using StringIO."""
+    for line in text.splitlines(True):
+        yield line
+    while True:
+        yield ""
+
+
+def load_grammar(gt="Grammar.txt", gp=None,
+                 save=True, force=False, logger=None):
+    """Load the grammar (maybe from a pickle)."""
+    if logger is None:
+        logger = logging.getLogger()
+    if gp is None:
+        head, tail = os.path.splitext(gt)
+        if tail == ".txt":
+            tail = ""
+        gp = head + tail + ".".join(map(str, sys.version_info)) + ".pickle"
+    if force or not _newer(gp, gt):
+        logger.info("Generating grammar tables from %s", gt)
+        g = pgen.generate_grammar(gt)
+        if save:
+            logger.info("Writing grammar tables to %s", gp)
+            try:
+                g.dump(gp)
+            except IOError, e:
+                logger.info("Writing failed:"+str(e))
+    else:
+        g = grammar.Grammar()
+        g.load(gp)
+    return g
+
+
+def _newer(a, b):
+    """Inquire whether file a was written since file b."""
+    if not os.path.exists(a):
+        return False
+    if not os.path.exists(b):
+        return True
+    return os.path.getmtime(a) >= os.path.getmtime(b)

File sphinx/pycode/pgen2/grammar.py

+# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+"""This module defines the data structures used to represent a grammar.
+
+These are a bit arcane because they are derived from the data
+structures used by Python's 'pgen' parser generator.
+
+There's also a table here mapping operators to their names in the
+token module; the Python tokenize module reports all operators as the
+fallback token code OP, but the parser needs the actual token code.
+
+"""
+
+# Python imports
+import pickle
+
+# Local imports
+from sphinx.pycode.pgen2 import token, tokenize
+
+
+class Grammar(object):
+    """Pgen parsing tables tables conversion class.
+
+    Once initialized, this class supplies the grammar tables for the
+    parsing engine implemented by parse.py.  The parsing engine
+    accesses the instance variables directly.  The class here does not
+    provide initialization of the tables; several subclasses exist to
+    do this (see the conv and pgen modules).
+
+    The load() method reads the tables from a pickle file, which is
+    much faster than the other ways offered by subclasses.  The pickle
+    file is written by calling dump() (after loading the grammar
+    tables using a subclass).  The report() method prints a readable
+    representation of the tables to stdout, for debugging.
+
+    The instance variables are as follows:
+
+    symbol2number -- a dict mapping symbol names to numbers.  Symbol
+                     numbers are always 256 or higher, to distinguish
+                     them from token numbers, which are between 0 and
+                     255 (inclusive).
+
+    number2symbol -- a dict mapping numbers to symbol names;
+                     these two are each other's inverse.
+
+    states        -- a list of DFAs, where each DFA is a list of
+                     states, each state is is a list of arcs, and each
+                     arc is a (i, j) pair where i is a label and j is
+                     a state number.  The DFA number is the index into
+                     this list.  (This name is slightly confusing.)
+                     Final states are represented by a special arc of
+                     the form (0, j) where j is its own state number.
+
+    dfas          -- a dict mapping symbol numbers to (DFA, first)
+                     pairs, where DFA is an item from the states list
+                     above, and first is a set of tokens that can
+                     begin this grammar rule (represented by a dict
+                     whose values are always 1).
+
+    labels        -- a list of (x, y) pairs where x is either a token
+                     number or a symbol number, and y is either None
+                     or a string; the strings are keywords.  The label
+                     number is the index in this list; label numbers
+                     are used to mark state transitions (arcs) in the
+                     DFAs.
+
+    start         -- the number of the grammar's start symbol.
+
+    keywords      -- a dict mapping keyword strings to arc labels.
+
+    tokens        -- a dict mapping token numbers to arc labels.
+
+    """
+
+    def __init__(self):
+        self.symbol2number = {}
+        self.number2symbol = {}
+        self.states = []
+        self.dfas = {}
+        self.labels = [(0, "EMPTY")]
+        self.keywords = {}
+        self.tokens = {}
+        self.symbol2label = {}
+        self.start = 256
+
+    def dump(self, filename):
+        """Dump the grammar tables to a pickle file."""
+        f = open(filename, "wb")
+        pickle.dump(self.__dict__, f, 2)
+        f.close()
+
+    def load(self, filename):
+        """Load the grammar tables from a pickle file."""
+        f = open(filename, "rb")
+        d = pickle.load(f)
+        f.close()
+        self.__dict__.update(d)
+
+    def report(self):
+        """Dump the grammar tables to standard output, for debugging."""
+        from pprint import pprint
+        print "s2n"
+        pprint(self.symbol2number)
+        print "n2s"
+        pprint(self.number2symbol)
+        print "states"
+        pprint(self.states)
+        print "dfas"
+        pprint(self.dfas)
+        print "labels"
+        pprint(self.labels)
+        print "start", self.start
+
+
+# Map from operator to number (since tokenize doesn't do this)
+
+opmap_raw = """
+( LPAR
+) RPAR
+[ LSQB
+] RSQB
+: COLON
+, COMMA
+; SEMI
++ PLUS
+- MINUS
+* STAR
+/ SLASH
+| VBAR
+& AMPER
+< LESS
+> GREATER
+= EQUAL
+. DOT
+% PERCENT
+` BACKQUOTE
+{ LBRACE
+} RBRACE
+@ AT
+== EQEQUAL
+!= NOTEQUAL
+<> NOTEQUAL
+<= LESSEQUAL
+>= GREATEREQUAL
+~ TILDE
+^ CIRCUMFLEX
+<< LEFTSHIFT
+>> RIGHTSHIFT
+** DOUBLESTAR
++= PLUSEQUAL
+-= MINEQUAL
+*= STAREQUAL
+/= SLASHEQUAL
+%= PERCENTEQUAL
+&= AMPEREQUAL
+|= VBAREQUAL
+^= CIRCUMFLEXEQUAL
+<<= LEFTSHIFTEQUAL
+>>= RIGHTSHIFTEQUAL
+**= DOUBLESTAREQUAL
+// DOUBLESLASH
+//= DOUBLESLASHEQUAL
+-> RARROW
+"""
+
+opmap = {}
+for line in opmap_raw.splitlines():
+    if line:
+        op, name = line.split()
+        opmap[op] = getattr(token, name)

File sphinx/pycode/pgen2/literals.py

+# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+# Extended to handle raw and unicode literals by Georg Brandl.
+
+"""Safely evaluate Python string literals without using eval()."""
+
+import re
+
+simple_escapes = {"a": "\a",
+                  "b": "\b",
+                  "f": "\f",
+                  "n": "\n",
+                  "r": "\r",
+                  "t": "\t",
+                  "v": "\v",
+                  "'": "'",
+                  '"': '"',
+                  "\\": "\\"}
+
+def convert_hex(x, n):
+    if len(x) < n+1:
+        raise ValueError("invalid hex string escape ('\\%s')" % x)
+    try:
+        return int(x[1:], 16)
+    except ValueError:
+        raise ValueError("invalid hex string escape ('\\%s')" % x)
+
+def escape(m):
+    all, tail = m.group(0, 1)
+    assert all.startswith("\\")
+    esc = simple_escapes.get(tail)
+    if esc is not None:
+        return esc
+    elif tail.startswith("x"):
+        return chr(convert_hex(tail, 2))
+    elif tail.startswith('u'):
+        return unichr(convert_hex(tail, 4))
+    elif tail.startswith('U'):
+        return unichr(convert_hex(tail, 8))
+    elif tail.startswith('N'):
+        import unicodedata
+        try:
+            return unicodedata.lookup(tail[1:-1])
+        except KeyError:
+            raise ValueError("undefined character name %r" % tail[1:-1])
+    else:
+        try:
+            return chr(int(tail, 8))
+        except ValueError:
+            raise ValueError("invalid octal string escape ('\\%s')" % tail)
+
+def escaperaw(m):
+    all, tail = m.group(0, 1)
+    if tail.startswith('u'):
+        return unichr(convert_hex(tail, 4))
+    elif tail.startswith('U'):
+        return unichr(convert_hex(tail, 8))
+    else:
+        return all
+
+escape_re = re.compile(r"\\(\'|\"|\\|[abfnrtv]|x.{0,2}|[0-7]{1,3})")
+uni_escape_re = re.compile(r"\\(\'|\"|\\|[abfnrtv]|x.{0,2}|[0-7]{1,3}|"
+                           r"u[0-9a-fA-F]{0,4}|U[0-9a-fA-F]{0,8}|N\{.+?\})")
+
+def evalString(s, encoding=None):
+    regex = escape_re
+    repl = escape
+    if encoding:
+        s = s.decode(encoding)
+    if s.startswith('u') or s.startswith('U'):
+        regex = uni_escape_re
+        s = s[1:]
+    if s.startswith('r') or s.startswith('R'):
+        repl = escaperaw
+        s = s[1:]
+    assert s.startswith("'") or s.startswith('"'), repr(s[:1])
+    q = s[0]
+    if s[:3] == q*3:
+        q = q*3
+    assert s.endswith(q), repr(s[-len(q):])
+    assert len(s) >= 2*len(q)
+    s = s[len(q):-len(q)]
+    return regex.sub(repl, s)
+
+def test():
+    for i in range(256):
+        c = chr(i)
+        s = repr(c)
+        e = evalString(s)
+        if e != c:
+            print i, c, s, e
+
+
+if __name__ == "__main__":
+    test()

File sphinx/pycode/pgen2/parse.c

+/* Generated by Cython 0.9.8.1 on Thu Jan  1 23:45:38 2009 */
+
+#define PY_SSIZE_T_CLEAN
+#include "Python.h"
+#include "structmember.h"
+#ifndef PY_LONG_LONG
+  #define PY_LONG_LONG LONG_LONG
+#endif
+#ifndef DL_EXPORT
+  #define DL_EXPORT(t) t
+#endif
+#if PY_VERSION_HEX < 0x02040000
+  #define METH_COEXIST 0
+#endif
+#if PY_VERSION_HEX < 0x02050000
+  typedef int Py_ssize_t;
+  #define PY_SSIZE_T_MAX INT_MAX
+  #define PY_SSIZE_T_MIN INT_MIN
+  #define PyInt_FromSsize_t(z) PyInt_FromLong(z)
+  #define PyInt_AsSsize_t(o)   PyInt_AsLong(o)
+  #define PyNumber_Index(o)    PyNumber_Int(o)
+  #define PyIndex_Check(o)     PyNumber_Check(o)
+#endif
+#if PY_VERSION_HEX < 0x02060000
+  #define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt)
+  #define Py_TYPE(ob)   (((PyObject*)(ob))->ob_type)
+  #define Py_SIZE(ob)   (((PyVarObject*)(ob))->ob_size)
+  #define PyVarObject_HEAD_INIT(type, size) \
+          PyObject_HEAD_INIT(type) size,
+  #define PyType_Modified(t)
+
+  typedef struct {
+       void *buf;
+       Py_ssize_t len;
+       int readonly;
+       const char *format;
+       int ndim;
+       Py_ssize_t *shape;
+       Py_ssize_t *strides;
+       Py_ssize_t *suboffsets;
+       Py_ssize_t itemsize;
+       void *internal;
+  } Py_buffer;
+
+  #define PyBUF_SIMPLE 0
+  #define PyBUF_WRITABLE 0x0001
+  #define PyBUF_LOCK 0x0002
+  #define PyBUF_FORMAT 0x0004
+  #define PyBUF_ND 0x0008
+  #define PyBUF_STRIDES (0x0010 | PyBUF_ND)
+  #define PyBUF_C_CONTIGUOUS (0x0020 | PyBUF_STRIDES)
+  #define PyBUF_F_CONTIGUOUS (0x0040 | PyBUF_STRIDES)
+  #define PyBUF_ANY_CONTIGUOUS (0x0080 | PyBUF_STRIDES)
+  #define PyBUF_INDIRECT (0x0100 | PyBUF_STRIDES)
+
+#endif
+#if PY_MAJOR_VERSION < 3
+  #define __Pyx_BUILTIN_MODULE_NAME "__builtin__"
+#else
+  #define __Pyx_BUILTIN_MODULE_NAME "builtins"
+#endif
+#if PY_MAJOR_VERSION >= 3
+  #define Py_TPFLAGS_CHECKTYPES 0
+  #define Py_TPFLAGS_HAVE_INDEX 0
+#endif
+#if PY_MAJOR_VERSION >= 3
+  #define PyBaseString_Type            PyUnicode_Type
+  #define PyString_Type                PyBytes_Type
+  #define PyInt_Type                   PyLong_Type
+  #define PyInt_Check(op)              PyLong_Check(op)
+  #define PyInt_CheckExact(op)         PyLong_CheckExact(op)
+  #define PyInt_FromString             PyLong_FromString
+  #define PyInt_FromUnicode            PyLong_FromUnicode
+  #define PyInt_FromLong               PyLong_FromLong
+  #define PyInt_FromSize_t             PyLong_FromSize_t
+  #define PyInt_FromSsize_t            PyLong_FromSsize_t
+  #define PyInt_AsLong                 PyLong_AsLong
+  #define PyInt_AS_LONG                PyLong_AS_LONG
+  #define PyInt_AsSsize_t              PyLong_AsSsize_t
+  #define PyInt_AsUnsignedLongMask     PyLong_AsUnsignedLongMask
+  #define PyInt_AsUnsignedLongLongMask PyLong_AsUnsignedLongLongMask
+  #define __Pyx_PyNumber_Divide(x,y)         PyNumber_TrueDivide(x,y)
+#else
+  #define __Pyx_PyNumber_Divide(x,y)         PyNumber_Divide(x,y)
+  #define PyBytes_Type                 PyString_Type
+#endif
+#if PY_MAJOR_VERSION >= 3
+  #define PyMethod_New(func, self, klass) PyInstanceMethod_New(func)
+#endif
+#if !defined(WIN32) && !defined(MS_WINDOWS)
+  #ifndef __stdcall
+    #define __stdcall
+  #endif
+  #ifndef __cdecl
+    #define __cdecl
+  #endif
+#else
+  #define _USE_MATH_DEFINES
+#endif
+#ifdef __cplusplus
+#define __PYX_EXTERN_C extern "C"
+#else
+#define __PYX_EXTERN_C extern
+#endif
+#include <math.h>
+#define __PYX_HAVE_API__sphinx__pycode__pgen2__parse
+
+
+#ifdef __GNUC__
+#define INLINE __inline__
+#elif _WIN32
+#define INLINE __inline
+#else
+#define INLINE 
+#endif
+
+typedef struct {PyObject **p; char *s; long n; char is_unicode; char intern; char is_identifier;} __Pyx_StringTabEntry; /*proto*/
+
+
+
+static int __pyx_skip_dispatch = 0;
+
+
+/* Type Conversion Predeclarations */
+
+#if PY_MAJOR_VERSION < 3
+#define __Pyx_PyBytes_FromString PyString_FromString
+#define __Pyx_PyBytes_AsString   PyString_AsString
+#else
+#define __Pyx_PyBytes_FromString PyBytes_FromString
+#define __Pyx_PyBytes_AsString   PyBytes_AsString
+#endif
+
+#define __Pyx_PyBool_FromLong(b) ((b) ? (Py_INCREF(Py_True), Py_True) : (Py_INCREF(Py_False), Py_False))
+static INLINE int __Pyx_PyObject_IsTrue(PyObject* x);
+static INLINE PY_LONG_LONG __pyx_PyInt_AsLongLong(PyObject* x);
+static INLINE unsigned PY_LONG_LONG __pyx_PyInt_AsUnsignedLongLong(PyObject* x);
+static INLINE Py_ssize_t __pyx_PyIndex_AsSsize_t(PyObject* b);
+
+#define __pyx_PyInt_AsLong(x) (PyInt_CheckExact(x) ? PyInt_AS_LONG(x) : PyInt_AsLong(x))
+#define __pyx_PyFloat_AsDouble(x) (PyFloat_CheckExact(x) ? PyFloat_AS_DOUBLE(x) : PyFloat_AsDouble(x))
+
+static INLINE unsigned char __pyx_PyInt_unsigned_char(PyObject* x);
+static INLINE unsigned short __pyx_PyInt_unsigned_short(PyObject* x);
+static INLINE char __pyx_PyInt_char(PyObject* x);
+static INLINE short __pyx_PyInt_short(PyObject* x);
+static INLINE int __pyx_PyInt_int(PyObject* x);
+static INLINE long __pyx_PyInt_long(PyObject* x);
+static INLINE signed char __pyx_PyInt_signed_char(PyObject* x);
+static INLINE signed short __pyx_PyInt_signed_short(PyObject* x);
+static INLINE signed int __pyx_PyInt_signed_int(PyObject* x);
+static INLINE signed long __pyx_PyInt_signed_long(PyObject* x);
+static INLINE long double __pyx_PyInt_long_double(PyObject* x);
+#ifdef __GNUC__
+/* Test for GCC > 2.95 */
+#if __GNUC__ > 2 ||               (__GNUC__ == 2 && (__GNUC_MINOR__ > 95)) 
+#define likely(x)   __builtin_expect(!!(x), 1)
+#define unlikely(x) __builtin_expect(!!(x), 0)
+#else /* __GNUC__ > 2 ... */
+#define likely(x)   (x)
+#define unlikely(x) (x)
+#endif /* __GNUC__ > 2 ... */
+#else /* __GNUC__ */
+#define likely(x)   (x)
+#define unlikely(x) (x)
+#endif /* __GNUC__ */
+    
+static PyObject *__pyx_m;
+static PyObject *__pyx_b;
+static PyObject *__pyx_empty_tuple;
+static int __pyx_lineno;
+static int __pyx_clineno = 0;
+static const char * __pyx_cfilenm= __FILE__;
+static const char *__pyx_filename;
+static const char **__pyx_f;
+
+static INLINE void __Pyx_RaiseArgtupleTooLong(Py_ssize_t num_expected, Py_ssize_t num_found); /*proto*/
+
+static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list); /*proto*/
+
+static PyObject *__Pyx_GetName(PyObject *dict, PyObject *name); /*proto*/
+
+static PyObject *__Pyx_CreateClass(PyObject *bases, PyObject *dict, PyObject *name, char *modname); /*proto*/
+
+static INLINE PyObject *__Pyx_GetItemInt(PyObject *o, Py_ssize_t i, int is_unsigned) {
+    PyObject *r;
+    if (PyList_CheckExact(o) && 0 <= i && i < PyList_GET_SIZE(o)) {
+        r = PyList_GET_ITEM(o, i);
+        Py_INCREF(r);
+    }
+    else if (PyTuple_CheckExact(o) && 0 <= i && i < PyTuple_GET_SIZE(o)) {
+        r = PyTuple_GET_ITEM(o, i);
+        Py_INCREF(r);
+    }
+    else if (Py_TYPE(o)->tp_as_sequence && Py_TYPE(o)->tp_as_sequence->sq_item && (likely(i >= 0) || !is_unsigned))
+        r = PySequence_GetItem(o, i);
+    else {
+        PyObject *j = (likely(i >= 0) || !is_unsigned) ? PyInt_FromLong(i) : PyLong_FromUnsignedLongLong((sizeof(unsigned long long) > sizeof(Py_ssize_t) ? (1ULL << (sizeof(Py_ssize_t)*8)) : 0) + i);
+        if (!j)
+            return 0;
+        r = PyObject_GetItem(o, j);
+        Py_DECREF(j);
+    }
+    return r;
+}
+
+static PyObject *__Pyx_UnpackItem(PyObject *, Py_ssize_t index); /*proto*/
+static int __Pyx_EndUnpack(PyObject *); /*proto*/
+
+static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb); /*proto*/
+
+static INLINE PyObject* __Pyx_PyObject_Append(PyObject* L, PyObject* x) {
+    if (likely(PyList_CheckExact(L))) {
+        if (PyList_Append(L, x) < 0) return NULL;
+        Py_INCREF(Py_None);
+        return Py_None; // this is just to have an accurate signature
+    }
+    else {
+        return PyObject_CallMethod(L, "append", "(O)", x);
+    }
+}
+
+static INLINE int __Pyx_SetItemInt(PyObject *o, Py_ssize_t i, PyObject *v, int is_unsigned) {
+    int r;
+    if (PyList_CheckExact(o) && 0 <= i && i < PyList_GET_SIZE(o)) {
+        Py_DECREF(PyList_GET_ITEM(o, i));
+        Py_INCREF(v);
+        PyList_SET_ITEM(o, i, v);
+        return 1;
+    }
+    else if (Py_TYPE(o)->tp_as_sequence && Py_TYPE(o)->tp_as_sequence->sq_ass_item && (likely(i >= 0) || !is_unsigned))
+        r = PySequence_SetItem(o, i, v);
+    else {
+        PyObject *j = (likely(i >= 0) || !is_unsigned) ? PyInt_FromLong(i) : PyLong_FromUnsignedLongLong((sizeof(unsigned long long) > sizeof(Py_ssize_t) ? (1ULL << (sizeof(Py_ssize_t)*8)) : 0) + i);
+        if (!j)
+            return -1;
+        r = PyObject_SetItem(o, j, v);
+        Py_DECREF(j);
+    }
+    return r;
+}
+
+static void __Pyx_WriteUnraisable(const char *name); /*proto*/
+
+static int __Pyx_SetVtable(PyObject *dict, void *vtable); /*proto*/
+
+static void __Pyx_AddTraceback(const char *funcname); /*proto*/
+
+static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); /*proto*/
+
+/* Type declarations */
+
+/* "/home/gbr/devel/sphinx/sphinx/pycode/pgen2/parse.pyx":31
+ * 
+ * 
+ * cdef class Parser:             # <<<<<<<<<<<<<<
+ *     cdef public grammar, stack, rootnode, used_names
+ *     cdef _grammar_dfas, _grammar_labels, _grammar_keywords, _grammar_tokens
+ */
+
+struct __pyx_obj_6sphinx_6pycode_5pgen2_5parse_Parser {
+  PyObject_HEAD
+  struct __pyx_vtabstruct_6sphinx_6pycode_5pgen2_5parse_Parser *__pyx_vtab;
+  PyObject *grammar;
+  PyObject *stack;
+  PyObject *rootnode;
+  PyObject *used_names;
+  PyObject *_grammar_dfas;
+  PyObject *_grammar_labels;
+  PyObject *_grammar_keywords;
+  PyObject *_grammar_tokens;
+  PyObject *_grammar_number2symbol;
+};
+
+
+struct __pyx_vtabstruct_6sphinx_6pycode_5pgen2_5parse_Parser {
+  int (*classify)(struct __pyx_obj_6sphinx_6pycode_5pgen2_5parse_Parser *, PyObject *, PyObject *, PyObject *);
+  void (*shift)(struct __pyx_obj_6sphinx_6pycode_5pgen2_5parse_Parser *, PyObject *, PyObject *, PyObject *, PyObject *);
+  void (*push)(struct __pyx_obj_6sphinx_6pycode_5pgen2_5parse_Parser *, PyObject *, PyObject *, PyObject *, PyObject *);
+  void (*pop)(struct __pyx_obj_6sphinx_6pycode_5pgen2_5parse_Parser *);
+  PyObject *(*convert)(struct __pyx_obj_6sphinx_6pycode_5pgen2_5parse_Parser *, PyObject *);
+};
+static struct __pyx_vtabstruct_6sphinx_6pycode_5pgen2_5parse_Parser *__pyx_vtabptr_6sphinx_6pycode_5pgen2_5parse_Parser;
+/* Module declarations from sphinx.pycode.pgen2.parse */
+
+static PyTypeObject *__pyx_ptype_6sphinx_6pycode_5pgen2_5parse_Parser = 0;
+
+
+/* Implementation of sphinx.pycode.pgen2.parse */
+static char __pyx_k_2[] = "Exception to signal the parser is stuck.";
+static PyObject *__pyx_int_0;
+static PyObject *__pyx_int_1;
+static char __pyx_k___init__[] = "__init__";
+static PyObject *__pyx_kp___init__;
+static char __pyx_k_setup[] = "setup";
+static PyObject *__pyx_kp_setup;
+static char __pyx_k_addtoken[] = "addtoken";
+static PyObject *__pyx_kp_addtoken;
+static char __pyx_k_1[] = "sphinx.pycode.nodes";
+static PyObject *__pyx_kp_1;
+static char __pyx_k_Node[] = "Node";
+static PyObject *__pyx_kp_Node;
+static char __pyx_k_Leaf[] = "Leaf";
+static PyObject *__pyx_kp_Leaf;
+static char __pyx_k_ParseError[] = "ParseError";
+static PyObject *__pyx_kp_ParseError;
+static char __pyx_k_Exception[] = "Exception";
+static PyObject *__pyx_kp_Exception;
+static char __pyx_k_msg[] = "msg";
+static PyObject *__pyx_kp_msg;
+static char __pyx_k_type[] = "type";
+static PyObject *__pyx_kp_type;
+static char __pyx_k_value[] = "value";
+static PyObject *__pyx_kp_value;
+static char __pyx_k_context[] = "context";
+static PyObject *__pyx_kp_context;
+static char __pyx_k_dfas[] = "dfas";
+static PyObject *__pyx_kp_dfas;