Anonymous avatar Anonymous committed 3a85889

[svn] Name change, round 4 (rename SVN root folder).

Comments (0)

Files changed (96)

+Pygments is written and maintained by Georg Brandl <g.brandl@gmx.net>.
+
+Other major contributors are:
+ - Armin Ronacher <armin.ronacher@active-4.com>
+
+Pygments - Python syntax highlighting package
+Copyright (C) 2006 by the respective authors (see AUTHORS)
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+#
+# Makefile for Pygments
+# ~~~~~~~~~~~~~~~~~~~~~
+#
+# Combines scripts for common tasks.
+#
+# :copyright: 2006 by Georg Brandl.
+# :license: GNU GPL, see LICENSE for more details.
+#
+
+PYTHON ?= python
+
+export PYTHONPATH = $(shell python -c 'print ":".join(line.strip() for line in file("PYTHONPATH"))' 2>/dev/null)
+
+.PHONY: apidocs check clean clean-pyc codetags docs epydoc lexermap \
+	pylint reindent test
+
+apidocs: epydoc
+
+check:
+	@$(PYTHON) scripts/check_sources.py -i apidocs -i pygments/lexers/_mapping.py \
+		   -i docs/build
+
+clean: clean-pyc
+	rm -f codetags.html
+	rm -rf apidocs
+
+clean-pyc:
+	find . -name '*.pyc' -exec rm -f {} +
+	find . -name '*.pyo' -exec rm -f {} +
+	find . -name '*~' -exec rm -f {} +
+
+codetags:
+	@$(PYTHON) scripts/find_codetags.py -i apidocs -i scripts/pylintrc \
+		   -i scripts/find_codetags.py -o codetags.html .
+
+docs: docs/build
+
+docs/build: docs/src/*.txt
+	$(PYTHON) docs/generate.py html docs/build $?
+	touch docs/build
+
+epydoc:
+	@rm -rf apidocs
+	@$(PYTHON) -Wi:default_transform `which epydoc` -o apidocs --css scripts/epydoc.css \
+		   --url http://trac.pocoo.org/pygments --no-frames --docformat restructuredtext \
+		   -v pygments
+	@sed -i -e 's|^<br />||' \
+			-e 's|\s\+$$||' \
+			-e 's|^\s\+</pre>|</pre>|' \
+			-e 's|\(<table class="[^"]*"\) border="1"|\1|' \
+			-e 's|\(<table class="navbar" .*\) width="100%"|\1|' \
+			-e 's|<td width="15%"|<td class="spacer"|' \
+			apidocs/*.html
+	@$(PYTHON) scripts/fix_epydoc_markup.py apidocs
+
+lexermap:
+	cd pygments/lexers; $(PYTHON) _mapping.py
+
+pylint:
+	@pylint --rcfile scripts/pylintrc pygments
+
+reindent:
+	@$(PYTHON) scripts/reindent.py -r -B .
+
+test:
+	@$(PYTHON) tests/run.py
+Todo
+====
+
+- allow multiple token types per regex (done, but awkwardly)
+- allow "overlay" token types (e.g. Diff + X) 
+  - highlight specials: nth line, a word etc.
+  - dhtml: overlays toggleable by javascript
+
+- unit tests
+
+- docstrings?
+
+- lexers:
+    * HTML with special formatting
+    * ocaml
+    * nemerle
+    * scheme/lisp
+    * windows batch files
+    * assembler
+    * objective c
+    * bash
+    * mysql/postgresql/sqlite
+    * tcl
+    * (la)tex
+    * django templates 
+
+- goto label HL support for languages that use it
+
+- add a `Punctuation` token type for symbols that are not text
+  but also not a symbol (blocks in ruby etc)
+
+- add support for function name highlighting to c++ lexer
+
+- styles should be able to define the overall background color
+
+- tell the DelphiLexer how to differ between Operators and
+  text.
+
+- review perl lexer (numerous bugs)
+
+- moin parser
+
+- add folding? would require more language-aware parsers...
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+    Generate Pygments Documentation
+    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+    Generates a bunch of html files containing the documentation.
+
+    :copyright: 2006 by Armin Ronacher, Georg Brandl.
+    :license: GNU LGPL, see LICENSE for more details.
+"""
+
+import os
+import sys
+from datetime import datetime
+from cgi import escape
+
+from docutils import nodes
+from docutils.parsers.rst import directives
+from docutils.core import publish_parts
+from docutils.writers import html4css1
+
+from jinja import Template, Context, StringLoader
+
+from pygments import highlight
+from pygments.lexers import get_lexer_by_name
+from pygments.formatters import HtmlFormatter
+
+
+PYGMENTS_FORMATTER = HtmlFormatter(style='friendly', cssclass='syntax')
+
+USAGE = '''\
+Usage: %s <mode> <destination> [<source.txt> ...]
+
+Generate either python or html files out of the documentation.
+
+Mode can either be python or html.\
+''' % sys.argv[0]
+
+TEMPLATE = '''\
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"
+   "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <title>{{ title }} &mdash; Pygments</title>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <style type="text/css">
+    {{ style }}
+  </style>
+</head>
+<body>
+  <div id="content">
+    <h1 class="heading">Pygments</h1>
+    <h2 class="subheading">{{ title }}</h2>
+    {% if not file_id equals "index" %}
+      <a id="backlink" href="index.html">&laquo; Back To Index</a>
+    {% endif %}
+    {% if toc %}
+      <div class="toc">
+        <h2>Contents</h2>
+        <ul class="contents">
+        {% for item in toc %}
+          <li><a href="{{ item.0 }}">{{ item.1 }}</a></li>
+        {% endfor %}
+        </ul>
+      </div>
+    {% endif %}
+    {{ body }}
+  </div>
+</body>
+<!-- generated on: {{ generation_date }}
+     file id: {{ file_id }} -->
+</html>\
+'''
+
+STYLESHEET = '''\
+body {
+    background-color: #f2f2f2;
+    margin: 0;
+    padding: 0;
+    font-family: 'Georgia', serif;
+    color: #111;
+}
+
+#content {
+    background-color: white;
+    padding: 20px;
+    margin: 20px auto 20px auto;
+    max-width: 800px;
+    border: 4px solid #ddd;
+}
+
+h1 {
+	font-weight: normal;
+	font-size: 40px;
+	color: #09839A;
+}
+
+h2 {
+	font-weight: normal;
+	font-size: 30px;
+	color: #C73F00;
+}
+
+h1.heading {
+    margin: 0 0 30px 0;
+}
+
+h2.subheading {
+    margin: -30px 0 0 45px;
+}
+
+h3 {
+	margin-top: 30px;
+}
+
+table.docutils {
+	border-collapse: collapse;
+	border: 2px solid #aaa;
+	margin: 0.5em 1.5em 0.5em 1.5em;
+}
+
+table.docutils td {
+	padding: 2px;
+	border: 1px solid #ddd;
+}
+
+p, li, dd, dt, blockquote {
+	font-size: 15px;
+	color: #333;
+}
+
+p {
+	line-height: 150%;
+	margin-bottom: 0;
+	margin-top: 10px;
+}
+
+hr {
+	border-top: 1px solid #ccc;
+	border-bottom: 0;
+	border-right: 0;
+	border-left: 0;
+	margin-bottom: 10px;
+	margin-top: 20px;
+}
+
+dl {
+	margin-left: 10px;
+}
+
+li, dt {
+	margin-top: 5px;
+}
+
+dt {
+	font-weight: bold;
+}
+
+th {
+	text-align: left;
+}
+
+a {
+	color: #990000;
+}
+
+a:hover {
+	color: #c73f00;
+}
+
+pre {
+	background-color: #f0f0f0;
+	border-top: 1px solid #ccc;
+	border-bottom: 1px solid #ccc;
+	padding: 5px;
+	font-size: 13px;
+	font-family: Bitstream Vera Sans Mono,monospace;
+}
+
+tt {
+	font-size: 13px;
+	font-family: Bitstream Vera Sans Mono,monospace;
+	color: black;
+	padding: 1px 2px 1px 2px;
+	background-color: #f0f0f0;
+}
+
+cite {
+	/* abusing <cite>, it's generated by ReST for `x` */
+	font-size: 13px;
+	font-family: Bitstream Vera Sans Mono,monospace;
+	font-weight: bold;
+	font-style: normal;
+}
+
+#backlink {
+    float: right;
+    font-size: 11px;
+    color: #888;
+}
+
+div.toc {
+    margin: 0 0 10px 0;
+}
+
+div.toc h2 {
+    font-size: 20px;
+}
+'''
+
+
+def generate_documentation(data, link_style):
+    writer = DocumentationWriter(link_style)
+    parts = publish_parts(
+        data,
+        writer=writer,
+        settings_overrides={
+            'initial_header_level': 3,
+            'field_name_limit': 50,
+        }
+    )
+    return {
+        'title':        parts['title'].encode('utf-8'),
+        'body':         parts['body'].encode('utf-8'),
+        'toc':          parts['toc']
+    }
+
+
+def pygments_directive(name, arguments, options, content, lineno,
+                      content_offset, block_text, state, state_machine):
+    try:
+        lexer = get_lexer_by_name(arguments[0])
+    except ValueError:
+        # no lexer found
+        lexer = get_lexer_by_name('text')
+    parsed = highlight(u'\n'.join(content), lexer, PYGMENTS_FORMATTER)
+    return [nodes.raw('', parsed, format="html")]
+pygments_directive.arguments = (1, 0, 1)
+pygments_directive.content = 1
+directives.register_directive('sourcecode', pygments_directive)
+
+
+class DocumentationWriter(html4css1.Writer):
+
+    def __init__(self, link_style):
+        html4css1.Writer.__init__(self)
+        self.translator_class = create_translator(link_style)
+
+    def translate(self):
+        html4css1.Writer.translate(self)
+        # generate table of contents
+        contents = self.build_contents(self.document)
+        contents_doc = self.document.copy()
+        contents_doc.children = contents
+        contents_visitor = self.translator_class(contents_doc)
+        contents_doc.walkabout(contents_visitor)
+        self.parts['toc'] = self._generated_toc
+
+    def build_contents(self, node, level=0):
+        sections = []
+        i = len(node) - 1
+        while i >= 0 and isinstance(node[i], nodes.section):
+            sections.append(node[i])
+            i -= 1
+        sections.reverse()
+        toc = []
+        for section in sections:
+            try:
+                reference = nodes.reference('', '', refid=section['ids'][0], *section[0])
+            except IndexError:
+                continue
+            ref_id = reference['refid']
+            text = escape(reference.astext().encode('utf-8'))
+            toc.append((ref_id, text))
+
+        self._generated_toc = [('#%s' % href, caption) for href, caption in toc]
+        # no further processing
+        return []
+
+
+def create_translator(link_style):
+    class Translator(html4css1.HTMLTranslator):
+        def visit_reference(self, node):
+            refuri = node.get('refuri')
+            if refuri is not None and '/' not in refuri and refuri.endswith('.txt'):
+                node['refuri'] = link_style(refuri[:-4])
+            html4css1.HTMLTranslator.visit_reference(self, node)
+    return Translator
+
+
+def handle_python(filename, fp, dst):
+    now = datetime.now()
+    title = os.path.basename(filename)[:-4]
+    content = fp.read()
+    def urlize(href):
+        # create links for the pygments webpage
+        if href == 'index.txt':
+            return '/docs/'
+        else:
+            return '/docs/%s/' % href
+    parts = generate_documentation(content, urlize)
+    result = file(os.path.join(dst, title + '.py'), 'w')
+    result.write('# -*- coding: utf-8 -*-\n')
+    result.write('"""\n    Pygments Documentation - %s\n' % title)
+    result.write('    %s\n\n' % ('~' * (24 + len(title))))
+    result.write('    Generated on: %s\n"""\n\n' % now)
+    result.write('import datetime\n')
+    result.write('DATE = %r\n' % now)
+    result.write('TITLE = %r\n' % parts['title'])
+    result.write('TOC = %r\n' % parts['toc'])
+    result.write('BODY = %r\n' % parts['body'])
+    result.close()
+
+
+def handle_html(filename, fp, dst):
+    now = datetime.now()
+    title = os.path.basename(filename)[:-4]
+    content = fp.read()
+    parts = generate_documentation(content, (lambda x: './%s.html' % x))
+    result = file(os.path.join(dst, title + '.html'), 'w')
+    c = Context(parts)
+    c['style'] = STYLESHEET + PYGMENTS_FORMATTER.get_style_defs('.syntax')
+    c['generation_date'] = now
+    c['file_id'] = title
+    t = Template(TEMPLATE, StringLoader())
+    result.write(t.render(c).encode('utf-8'))
+    result.close()
+
+
+def run(handle_file, dst, sources=()):
+    path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'src'))
+    if not sources:
+        sources = [os.path.join(path, fn) for fn in os.listdir(path)]
+    for fn in sources:
+        if not os.path.isfile(fn):
+            continue
+        print 'Processing %s' % fn
+        f = open(fn)
+        try:
+            handle_file(fn, f, dst)
+        finally:
+            f.close()
+
+
+def main(mode, dst='build/', *sources):
+    try:
+        handler = {
+            'html':         handle_html,
+            'python':       handle_python
+        }[mode]
+    except KeyError:
+        print 'Error: unknown mode "%s"' % mode
+        sys.exit(1)
+    run(handler, os.path.realpath(dst), sources)
+
+
+if __name__ == '__main__':
+    if len(sys.argv) == 1:
+        print USAGE
+    else:
+        main(*sys.argv[1:])
+.. -*- mode: rst -*-
+
+====================
+The full Pygments API
+====================
+
+This page describes the Pygments API.
+
+High-level API
+==============
+
+Functions from the `pygments` module:
+
+def `lex(code, lexer):`
+    Lex `code` with the `lexer` (must be a `Lexer` instance)
+    and return an iterable of tokens. Currently, this only calls
+    `lexer.get_tokens()`.
+
+def `format(tokens, formatter, outfile=None):`
+    Format a token stream (iterable of tokens) `tokens` with the
+    `formatter` (must be a `Formatter` instance). The result is
+    written to `outfile`, or if that is ``None``, returned as a
+    string.
+
+def `highlight(code, lexer, formatter, outfile=None):`
+    This is the most high-level highlighting function.
+    It combines `lex` and `format` in one function.
+
+
+Functions from `pygments.lexers`:
+
+def `get_lexer_by_name(alias, **options):`
+    Return an instance of a `Lexer` subclass that has `alias` in its
+    aliases list. The lexer is given the `options` at its
+    instantiation.
+
+    Will raise `ValueError` if no lexer with that alias is found.
+
+def `get_lexer_for_filename(fn, **options):`
+    Return a `Lexer` subclass instance that has a filename pattern
+    matching `fn`. The lexer is given the `options` at its
+    instantiation.
+
+    Will raise `ValueError` if no lexer for that filename is found.
+
+
+Functions from `pygments.formatters`:
+
+def `get_formatter_by_name(alias, **options):`
+    Return an instance of a `Formatter` subclass that has `alias` in its
+    aliases list. The formatter is given the `options` at its
+    instantiation.
+
+    Will raise `ValueError` if no formatter with that alias is found.
+
+def `get_formatter_for_filename(fn, **options):`
+    Return a `Formatter` subclass instance that has a filename pattern
+    matching `fn`. The formatter is given the `options` at its
+    instantiation.
+
+    Will raise `ValueError` if no formatter for that filename is found.
+
+
+Lexers
+======
+
+A lexer (derived from `pygments.lexer.Lexer`) has the following functions:
+
+def `__init__(self, **options):`
+    The constructor. Takes a \*\*keywords dictionary of options.
+    Every subclass must first process its own options and then call
+    the `Lexer` constructor, since it processes the `stripnl`,
+    `stripall` and `tabsize` options.
+
+    An example looks like this:
+
+    .. sourcecode:: python
+
+        def __init__(self, **options):
+            self.compress = options.get('compress', '')
+            Lexer.__init__(self, **options)
+
+    As these options must all be specifiable as strings (due to the
+    command line usage), there are various utility functions
+    available to help with that, see `Option processing`_.
+
+def `get_tokens(self, text):`
+    This method is the basic interface of a lexer. It is called by
+    the `highlight()` function. It must process the text and return an
+    iterable of ``(tokentype, value)`` pairs from `text`.
+
+    Normally, you don't need to override this method. The default
+    implementation processes the `stripnl`, `stripall` and `tabsize`
+    options and then yields all tokens from `get_tokens_unprocessed()`,
+    with the ``index`` dropped.
+
+def `get_tokens_unprocessed(self, text):`
+    This method should process the text and return an iterable of
+    ``(index, tokentype, value)`` tuples where ``index`` is the starting
+    position of the token within the input text.
+
+    This method must be overridden by subclasses.
+
+For a list of known tokens have a look at the `Tokens`_ page.
+
+The lexer also recognizes the following attributes that are used by the
+builtin lookup mechanism.
+
+`name`
+    Full name for the lexer, in human-readable form.
+
+`aliases`
+    A list of short, unique identifiers that can be used to lookup
+    the lexer from a list.
+
+`filenames`
+    A list of `fnmatch` patterns that can be used to find a lexer for
+    a given filename.
+
+
+.. _Tokens: tokens.txt
+
+
+Formatters
+==========
+
+A formatter (derived from `pygments.formatter.Formatter`) has the following
+functions:
+
+def `__init__(self, **options):`
+    As with lexers, this constructor processes options and then must call
+    the base class `__init__`.
+
+    The `Formatter` class recognizes the options `style`, `full` and
+    `title`. It is up to the formatter class whether it uses them.
+
+def `get_style_defs(self, arg=''):`
+    This method must return statements or declarations suitable to define
+    the current style for subsequent highlighted text (e.g. CSS classes
+    in the `HTMLFormatter`).
+
+    The optional argument `arg` can be used to modify the generation and
+    is formatter dependent (it is standardized because it can be given on
+    the command line).
+
+    This method is called by the ``-S`` `command-line option`_, the `arg`
+    is then given by the ``-a`` option.
+
+def `format(self, tokensource, outfile):`
+    This method must format the tokens from the `tokensource` iterable and
+    write the formatted version to the file object `outfile`.
+
+    Formatter options can control how exactly the tokens are converted.
+
+.. _command-line option: cmdline.txt
+
+
+Option processing
+=================
+
+The `pygments.util` module has some utility functions usable for option
+processing:
+
+class `OptionError`
+    This exception will be raised by all option processing functions if
+    the type of the argument is not correct.
+
+def `get_bool_opt(options, optname, default=None):`
+    Interpret the key `optname` from the dictionary `options`
+    as a boolean and return it. Return `default` if `optname`
+    is not in `options`.
+
+    The valid string values for ``True`` are ``1``, ``yes``,
+    ``true`` and ``on``, the ones for ``False`` are ``0``,
+    ``no``, ``false`` and ``off`` (matched case-insensitively).
+
+def `get_int_opt(options, optname, default=None):`
+    As `get_bool_opt`, but interpret the value as an integer.
+
+def `get_list_opt(options, optname, default=None):`
+    If the key `optname` from the dictionary `options` is a string,
+    split it at whitespace and return it. If it is already a list
+    or a tuple, it is returned as a list.

docs/src/cmdline.txt

+.. -*- mode: rst -*-
+
+======================
+Command Line Interface
+======================
+
+You can use Pygments from the shell, provided you installed the `pygmentize` script::
+
+    $ pygmentize test.py
+    print "Hello World"
+
+will print the file test.py to standard output, using the Python lexer
+(inferred from the file name extension) and the terminal formatter (because
+you didn't give an explicit formatter name).
+
+If you want HTML output::
+
+    $ pygmentize -f html -l python -o test.html test.py
+
+As you can see, the -l option explicitly selects a lexer. As seen above, if you
+give an input file name and it has an extension that Pygments recognizes, you can
+omit this option.
+
+The ``-o`` option gives an output file name. If it is not given, output is
+written to stdout.
+
+The ``-f`` option selects a formatter (as with ``-l``, it can also be omitted
+if an output file name is given and has a supported extension).
+If no output file name is given and ``-f`` is omitted, the
+`TerminalFormatter` is used.
+
+The above command could therefore also be given as::
+
+    $ pygmentize -o test.html test.py
+
+Lexer and formatter options can be given using the ``-O`` option::
+
+    $ pygmentize -f html -O style=colorful,linenos=1 -l python test.py
+
+Be sure to enclose the option string in quotes if it contains any special
+shell characters, such as spaces or expansion wildcards like ``*``.
+
+There's a special ``-S`` option for generating style definitions. Usage is
+as follows::
+
+    $ pygmentize -f html -S colorful -a .syntax
+
+generates a CSS style sheet (because you selected the HTML formatter) for
+the "colorful" style prepending a ".syntax" selector to all style rules.
+
+For an explanation what ``-a`` means for `a particular formatter`_, look for
+the `arg` argument for the formatter's `get_style_defs()` method.
+
+The ``-L`` option lists all lexers and formatters, along with their short
+names and supported file name extensions.
+
+
+.. _a particular formatter: formatters.txt

docs/src/formatterdev.txt

+.. -*- mode: rst -*-
+
+========================
+Write your own formatter
+========================
+
+As well as creating `your own lexer <lexerdevelopment.txt>`_, writing a new
+formatter for Pygments is easy and straightforward.
+
+A formatter is a class that is initialized with some keyword arguments (the
+formatter options) and that must provides a `format()` method.
+Additionally a formatter should provide a `get_style_defs()` method that
+returns the style definitions from the style in a form usable for the
+formatter's output format.
+
+
+Quickstart
+==========
+
+The most basic formatter shipped with Pygments is the `NullFormatter`. It just
+sends the value of a token to the output stream:
+
+.. sourcecode:: python
+
+    from pygments.formatter import Formatter
+
+    class NullFormatter(Formatter):
+        def format(self, tokensource, outfile):
+            for ttype, value in tokensource:
+                outfile.write(value)
+
+As you can see, the `format()` method is passed two parameters: `tokensource`
+and `outfile`. The first is an iterable of ``(token_type, value)`` tuples,
+the latter a file like object with a `write()` method.
+
+Because the formatter is that basic it doesn't overwrite the `get_style_defs()`
+method.
+
+
+Styles
+======
+
+Styles aren't instantiated but their metaclass provides some class functions
+so that you can access the style definitions easily.
+
+Styles are iterable and yield tuples in the form ``(ttype, d)`` where `ttype`
+is a token and `d` is a dict with the following keys:
+
+``'color'``
+    Hexadecimal color value (eg: ``'ff0000'`` for red) or `None` if not
+    defined.
+
+``'bold'``
+    `True` if the value should be bold
+
+``'italic'``
+    `True` if the value should be italic
+
+``'underline'``
+    `True` if the value should be underlined
+
+``'bgcolor'``
+    Hexadecimal color value for the background (eg: ``'eeeeeee'`` for light
+    gray) or `None` if not defined.
+
+``'border'``
+    Hexadecimal color value for the border (eg: ``'0000aa'`` for a dark
+    blue) or `None` for no border.
+
+Additional keys might appear in the future, formatters should ignore all keys
+they don't support.
+
+
+HTML 3.2 Formatter
+==================
+
+For an more complex example, let's implement a HTML 3.2 Formatter. We don't
+use CSS but inline markup (``<u>``, ``<font>``, etc). Because this isn't good
+style this formatter isn't in the standard library ;-)
+
+.. sourcecode:: python
+
+    from pygments.formatter import Formatter
+
+    class OldHtmlFormatter(Formatter):
+
+        def __init__(self, **options):
+            Formatter.__init__(self, **options)
+
+            # create a dict of (start, end) tuples that wrap the
+            # value of a token so that we can use it in the format
+            # method later
+            self.styles = {}
+
+            # we iterate over the `_styles` attribute of a style item
+            # that contains the parsed style values.
+            for token, style in self.style:
+                start = end = ''
+                # a style item is a tuple in the following form:
+                # colors are readily specified in hex: 'RRGGBB'
+                if style['color']:
+                    start += '<font color="#%s">' % color
+                    end += '</font>'
+                if style['bold']:
+                    start += '<b>'
+                    end += '</b>'
+                if style['italic']:
+                    start += '<i>'
+                    end += '</i>'
+                if style['underline']:
+                    start += '<u>'
+                    end += '</u>'
+                self.styles[token] = (start, end)
+
+        def format(self, tokensource, outfile):
+            # lastval is a string we use for caching
+            # because it's possible that an lexer yields a number
+            # of consecutive tokens with the same token type.
+            # to minimize the size of the generated html markup we
+            # try to join the values of same-type tokens here
+            lastval = ''
+            lasttype = None
+
+            # wrap the whole output with <pre>
+            outfile.write('<pre>')
+
+            for ttype, value in tokensource:
+                # if the token type doesn't exist in the stylemap
+                # we try it with the parent of the token type
+                # eg: parent of Token.Literal.String.Double is
+                # Token.Literal.String
+                while ttype not in self.styles:
+                    ttype = ttype.parent
+                if ttype == lasttype:
+                    # the current token type is the same of the last
+                    # iteration. cache it
+                    lastval += value
+                else:
+                    # not the same token as last iteration, but we
+                    # have some data in the buffer. wrap it with the
+                    # defined style and write it to the output file
+                    if lastval:
+                        stylebegin, styleend = self.styles[lasttype]
+                        outfile.write(stylebegin + lastval + styleend)
+                    # set lastval/lasttype to current values
+                    lastval = value
+                    lasttype = ttype
+
+            # if something is left in the buffer, write it to the
+            # output file, then close the opened <pre> tag
+            if lastval:
+                stylebegin, styleend = self.styles[lasttype]
+                outfile.write(stylebegin + lastval + styleend)
+            outfile.write('</pre>\n')
+
+The comments should explain it. Again, this formatter doesn't override the
+`get_style_defs()` method. If we would have used CSS classes instead of
+inline HTML markup, we would need to generate the CSS first. For that
+purpose the `get_style_defs()` method exists:
+
+
+Generating Style Definitions
+============================
+
+Some formatters like the `LatexFormatter` and the `HtmlFormatter` don't
+output inline markup but reference either macros or css classes. Because
+the definitions of those are not part of the output, the `get_style_defs()`
+method exists. It is passed one parameter (if it's used and how it's used
+is up to the formatter) and has to return a string or ``None``.

docs/src/formatters.txt

+.. -*- mode: rst -*-
+
+====================
+Available formatters
+====================
+
+This page lists all builtin formatters.
+
+Common options
+==============
+
+The `HtmlFormatter` and `LatexFormatter` classes support these options:
+
+`style`
+    The style to use, can be a string or a Style subclass (default:
+    ``'default'``).
+
+`full`
+    Tells the formatter to output a "full" document, i.e. a complete
+    self-contained document (default: ``False``).
+
+`title`
+    If `full` is true, the title that should be used to caption the
+    document (default: ``''``).
+
+`linenos`
+    If set to ``True``, output line numbers (default: ``False``).
+
+`linenostart`
+    The line number for the first line (default: ``1``).
+
+`linenostep`
+    If set to a number n > 1, only every nth line number is printed.
+
+
+Formatter classes
+=================
+
+All these classes are importable from `pygments.formatters`.
+
+
+`HtmlFormatter`
+---------------
+
+    Formats tokens as HTML 4 ``<span>`` tags within a ``<pre>`` tag, wrapped
+    in a ``<div>`` tag. The ``<div>``'s CSS class can be set by the `cssclass`
+    option.
+
+    If the `linenos` option is given and true, the ``<pre>`` is additionally
+    wrapped inside a ``<table>`` which has one row and two cells: one
+    containing the line numbers and one containing the code. Example:
+
+    .. sourcecode:: html
+
+        <div class="highlight" >
+        <table><tr>
+          <td class="linenos" title="click to toggle"
+            onclick="with (this.firstChild.style)
+                     { display = (display == '') ? 'none' : '' }">
+            <pre>1
+            2</pre>
+          </td>
+          <td class="code">
+            <pre><span class="Ke">def </span><span class="NaFu">foo</span>(bar):
+              <span class="Ke">pass</span>
+            </pre>
+          </td>
+        </tr></table></div>
+
+    (whitespace added to improve clarity). Wrapping can be disabled using the
+    `nowrap` option.
+
+    With the `full` option, a complete HTML 4 document is output, including
+    the style definitions inside a ``<style>`` tag.
+
+    The `get_style_defs(arg='')` method of a `HtmlFormatter` returns a string
+    containing CSS rules for the CSS classes used by the formatter. The
+    argument `arg` can be used to specify additional CSS selectors that
+    are prepended to the classes. A call `fmter.get_style_defs('td .code')`
+    would result in the following CSS classes:
+
+    .. sourcecode:: css
+
+        td .code .kw { font-weight: bold; color: #00FF00 }
+        td .code .cm { color: #999999 }
+        ...
+
+    Additional options accepted by the `HtmlFormatter`:
+
+    `nowrap`
+        If set to ``True``, don't wrap the tokens at all, not even in a ``<pre>``
+        tag. This disables all other options (default: ``False``).
+
+    `noclasses`
+        If set to true, token ``<span>`` tags will not use CSS classes, but
+        inline styles. This is not recommended for larger pieces of code since
+        it increases output size by quite a bit (default: ``False``).
+
+    `classprefix`
+        Since the token types use relatively short class names, they may clash
+        with some of your own class names. In this case you can use the
+        `classprefix` option to give a string to prepend to all Pygments-generated
+        CSS class names for token types.
+        Note that this option also affects the output of `get_style_defs()`.
+
+    `cssclass`
+        CSS class for the wrapping ``<div>`` tag (default: ``'highlight'``).
+
+    `cssstyles`
+        Inline CSS styles for the wrapping ``<div>`` tag (default: ``''``).
+
+    `linenospecial`
+        If set to a number n > 0, every nth line number is given the CSS
+        class ``"special"`` (default: ``0``).
+    
+    :Aliases: ``html``
+    :Filename patterns: ``*.html``, ``*.htm``
+    
+
+`LatexFormatter`
+----------------
+
+    Formats tokens as LaTeX code. This needs the `fancyvrb` and `color`
+    standard packages.
+
+    Without the `full` option, code is formatted as one ``Verbatim``
+    environment, like this:
+
+    .. sourcecode:: latex
+
+        \begin{Verbatim}[commandchars=@\[\]]
+        @Can[def ]@Cax[foo](bar):
+            @Can[pass]
+        \end{Verbatim}
+
+    The command sequences used here (``@Can`` etc.) are generated from the given
+    `style` and can be retrieved using the `get_style_defs` method.
+
+    With the `full` option, a complete LaTeX document is output, including
+    the command definitions in the preamble.
+
+    The `get_style_defs(arg='')` method of a `LatexFormatter` returns a string
+    containing ``\newcommand`` commands defining the commands used inside the
+    ``Verbatim`` environments. If the argument `arg` is true,
+    ``\renewcommand`` is used instead.
+    
+    Additional options accepted by the `LatexFormatter`:
+
+    `docclass`
+        If the `full` option is enabled, this is the document class to use
+        (default: ``'article'``).
+
+    `preamble`
+        If the `full` option is enabled, this can be further preamble commands,
+        e.g. ``\usepackage`` (default: ``''``).
+
+    `verboptions`
+        Additional options given to the Verbatim environment (see the *fancyvrb*
+        docs for possible values) (default: ``''``).
+    
+    :Aliases: ``latex``, ``tex``
+    :Filename pattern: ``*.tex``
+    
+
+`BBCodeFormatter`
+-----------------
+    
+    Formats tokens with BBcodes. These formatting codes are used by many
+    bulletin boards, so you can highlight your sourcecode with pygments before
+    posting it there.
+
+    This formatter has no support for background colors and borders, as there
+    are no common BBcode tags for that.
+
+    Some board systems (e.g. phpBB) don't support colors in their [code] tag,
+    so you can't use the highlighting together with that tag.
+    Text in a [code] tag usually is shown with a monospace font (which this
+    formatter can do with the ``monofont`` option) and no spaces (which you
+    need for indentation) are removed.
+
+    The `BBCodeFormatter` accepts two additional option:
+
+    `codetag`
+        If set to true, put the output into ``[code]`` tags (default:
+        ``false``)
+
+    `monofont`
+        If set to true, add a tag to show the code with a monospace font
+        (default: ``false``).
+
+    :Aliases: ``bbcode``, ``bb``
+    :Filename pattern: None
+
+
+`TerminalFormatter`
+-------------------
+    
+    Formats tokens with ANSI color sequences, for output in a text console.
+    Color sequences are terminated at newlines, so that paging the output
+    works correctly.
+
+    The `get_style_defs()` method doesn't do anything special since there is
+    no support for common styles.
+
+    The TerminalFormatter class supports only these options:
+
+    `bg`
+        Set to ``"light"`` or ``"dark"`` depending on the terminal's background
+        (default: ``"light"``).
+
+    `colorscheme`
+        A dictionary mapping token types to (lightbg, darkbg) color names or
+        ``None`` (default: ``None`` = use builtin colorscheme).
+
+    `debug`
+        If this option is true, output the string "<<ERROR>>" after each error
+        token. This is meant as a help for debugging Pygments (default: ``False``).
+
+    :Aliases: ``terminal``, ``console``
+    :Filename pattern: None
+
+
+`RawTokenFormatter`
+-------------------
+
+    Formats tokens as a raw representation for storing token streams.
+
+    The format is ``tokentype<TAB>repr(tokenstring)\n``. The output can later
+    be converted to a token stream with the `RawTokenLexer`, described in the
+    `lexer list <lexers.txt>`_.
+
+    One option is accepted:
+
+    `compress`
+        If set to ``'gz'`` or ``'bz2'``, compress the output with the given
+        compression algorithm after encoding (default: ``''``).
+
+    :Aliases: ``raw``, ``tokens``
+    :Filename pattern: ``*.raw``
+
+
+`NullFormatter`
+---------------
+
+    Just output all tokens, don't format in any way.
+
+    :Aliases: ``text``, ``null``
+    :Filename pattern: ``*.txt``
+

docs/src/index.txt

+.. -*- mode: rst -*-
+
+========
+Overview
+========
+
+Welcome to the Pygments documentation.
+
+- Starting with Pygments
+
+  - `Installation <installation.txt>`_
+
+  - `Quickstart <quickstart.txt>`_
+
+  - `Command line interface <cmdline.txt>`_
+
+- Essential to know
+
+  - `Builtin lexers <lexers.txt>`_
+
+  - `Builtin formatters <formatters.txt>`_
+
+  - `Styles <styles.txt>`_
+
+- API and more
+
+  - `API documentation <api.txt>`_
+
+  - `Builtin Tokens <tokens.txt>`_
+
+- Hacking for Pygments
+
+  - `Write your own lexer <lexerdevelopment.txt>`_
+
+  - `Write your own formatter <formatterdev.txt>`_
+
+- Hints and Tricks
+
+  - `Using Pygments in ReST documents <rstdirective.txt>`_
+
+
+--------------
+
+If you find bugs or have suggestions for the documentation, please
+look `here`_ for info on how to contact the team.
+
+You can download an offline version of this documentation from the
+`download page`_.
+
+.. _here: http://pygments.pocoo.org/contribute
+.. _download page: http://pygments.pocoo.org/download

docs/src/installation.txt

+.. -*- mode: rst -*-
+
+============
+Installation
+============
+
+Pygments requires at least Python 2.3 to work correctly. Just to clarify:
+there *wont't* ever be support for Python versions below 2.3.
+
+
+Install the Release Version
+===========================
+
+1.  download the recent tarball from the `download page`_
+2.  unpack the tarball
+3.  ``sudo python setup.py install``
+
+Note that the last command will automatically download and install
+`setuptools`_ if you don't already have it installed. This requires a working
+internet connection.
+
+This will install Pygments into your Python installation's site-packages directory.
+
+
+Install via easy_install
+========================
+
+You can also install the most recent Pygments version using `easy_install`_::
+
+    sudo easy_install Pygments
+
+This will install a Pygments egg in your Python installation's site-packages
+directory.
+
+
+Installing the development Version
+==================================
+
+1.  Install `subversion`_
+2.  ``svn co http://trac.pocoo.org/repos/pygments/trunk pygments``
+3.  ``ln -s `pwd`/pygments/pygments /usr/lib/python2.X/site-packages``
+
+
+.. _download page: http://pygments.pocoo.org/download/
+.. _setuptools: http://peak.telecommunity.com/DevCenter/setuptools
+.. _easy_install: http://peak.telecommunity.com/DevCenter/EasyInstall
+.. _subversion: http://subversion.tigris.org/

docs/src/lexerdevelopment.txt

+.. -*- mode: rst -*-
+
+====================
+Write your own lexer
+====================
+
+If a lexer for your favorite language is missing in the Pygments package, you can
+easily write your own and extend Pygments.
+
+All you need can be found inside the `pygments.lexer` module. As you can read in
+the `API documentation <api.txt>`_, a lexer is a class that is initialized with
+some keyword arguments (the lexer options) and that provides a
+`get_tokens_unprocessed()` method which is given a string or unicode object with
+the data to parse.
+
+The `get_tokens_unprocessed()` method must return an iterator or iterable
+containing tuples in the form ``(index, token, value)``. Normally you don't need
+to do this since there are numerous base lexers you can subclass.
+
+
+RegexLexer
+==========
+
+A very powerful (but quite easy to use) lexer is the `RegexLexer`. This lexer
+base class allows you to define lexing rules in terms of *regular expressions*
+for different *states*.
+
+States are groups of regular expressions that are matched against the input
+string at the *current position*. If one of these expressions matches, a
+corresponding action is performed (normally yielding a token with a specific
+type), the current position is set to where the last match ended and the
+matching process continues with the first regex of the current state.
+
+Lexer states are kept in a state stack: each time a new state is entered, the
+new state is pushed onto the stack.  The most basic lexers (like the
+`DiffLexer`) just need one state.
+
+Each state is defined as a list of tuples in the form (`regex`, `action`,
+`new_state`) where the last item is optional.  In the most basic form, `action`
+is a token type (like `Name.Builtin`).  That means: When `regex` matches, emit a
+token with the match text and type `tokentype` and push `new_state` on the state
+stack.  If the new state is ``'#pop'``, the topmost state is popped from the
+stack instead. (To pop more than one state, use ``'#pop:2'`` and so on.)
+``'#push'`` is a synonym for pushing the current state on the
+stack.
+
+The following example shows the `DiffLexer` from the builtin lexers. Note that
+it contains some additional attributes `name`, `aliases` and `filenames` which
+aren't required for a lexer. They are used by the builtin lexer lookup
+functions.
+
+.. sourcecode:: python
+
+    from pygments.lexer import RegexLexer
+    from pygments.token import \
+         Text, Comment, Keyword, Name, String, Generic
+
+    class DiffLexer(RegexLexer):
+        name = 'Diff'
+        aliases = ['diff']
+        filenames = ['*.diff']
+
+        tokens = {
+            'root': [
+                (r' .*\n', Text),
+                (r'\+.*\n', Generic.Inserted),
+                (r'-.*\n', Generic.Deleted),
+                (r'@.*\n', Generic.Subheading),
+                (r'Index.*\n', Generic.Heading),
+                (r'=.*\n', Generic.Heading),
+                (r'.*\n', Text),
+            ]
+        }
+
+As you can see this lexer only uses one state.  When the lexer starts scanning
+the text, it first checks if the current character is a space. If this is true
+it scans everything until newline and returns the parsed data as `Text` token.
+
+If this rule doesn't match, it checks if the current char is a plus sign.  And
+so on.
+
+If no rule matches at the current position, the current char is emitted as an
+`Error` token that indicates a parsing error, and the position is increased by
+1.
+
+
+Regex Flags
+===========
+
+You can either define regex flags in the regex (``r'(?x)foo bar'``) or by adding
+a `flags` attribute to your lexer class. If no attribute is defined, it defaults
+to `re.MULTILINE`. For more informations about regular expression flags see the
+`regular expressions`_ help page in the python documentation.
+
+.. _regular expressions: http://docs.python.org/lib/re-syntax.html
+
+
+Scanning multiple tokens at once
+================================
+
+Here is a more complex lexer that highlights INI files. INI files consist of
+sections, comments and key = value pairs:
+
+.. sourcecode:: python
+
+    from pygments.lexer import RegexLexer, bygroups
+
+    class IniLexer(RegexLexer):
+        name = 'INI'
+        aliases = ['ini', 'cfg']
+        filenames = ['*.ini', '*.cfg']
+
+        tokens = {
+            'root': [
+                (r'\s+', Text),
+                (r';.*?$', Comment),
+                (r'\[.*?\]$', Keyword),
+                (r'(.*?)(\s*)(=)(\s*)(.*?)$',
+                 bygroups(Name.Attribute, Text, Operator, Text, String))
+            ]
+        }
+
+The lexer first looks for whitespace, comments and section names. And later it
+looks for a line that looks like a key, value pair, seperated by an ``'='``
+sign, and optional whitespace.
+
+The `bygroups` helper makes sure that aach group is yielded with a different
+token type. First the `Name.Attribute` token, then a `Text` token for the
+optional whitespace, after that a `Operator` token for the equals sign. Then a
+`Text` token for the whitespace again. The rest of the line is returned as
+`String`.
+
+Note that for this to work, every part of the match must be inside a capturing
+group (a ``(...)``), and there must not be any nested capturing groups.  If you
+nevertheless need a group, use a non-capturing group defined using this syntax:
+``r'(?:some|words|here)'`` (note the ``?:`` after the beginning parenthesis).
+
+
+Changing states
+===============
+
+Many lexers need multiple states to work as expected. For example, some
+languages allow multiline comments to be nested. Since this is a recursive
+pattern it's impossible to lex just using regular expressions.
+
+Here is the solution:
+
+.. sourcecode:: python
+
+    class ExampleLexer(RegexLexer):
+        name = 'Example Lexer with states'
+
+        tokens = {
+            'root': [
+                (r'[^/]+', Text),
+                (r'/\*', Comment.Multiline, 'comment'),
+                (r'//.*?$', Comment.Singleline),
+                (r'/', Text)
+            ],
+            'comment': [
+                (r'[^*/]', Comment.Multiline),
+                (r'/\*', Comment.Multiline, '#push'),
+                (r'\*/', Comment.Multiline, '#pop'),
+                (r'[*/]', Comment.Multiline)
+            ]
+        }
+
+This lexer starts lexing in the ``'root'`` state. It tries to match as much as
+possible until it finds a slash (``'/'``). If the next character after the slash
+is a star (``'*'``) the `RegexLexer` sends those two characters to the output
+stream marked as `Comment.Multiline` and continues parsing with the rules
+defined in the ``'comment'`` state.
+
+If there wasn't a star after the slash, the `RegexLexer` checks if it's a
+singleline comment (eg: followed by a second slash). If this also wasn't the
+case it must be a single slash (the separate regex for a single slash must also
+be given, else the slash would be marked as an error token).
+
+Inside the ``'comment'`` state, we do the same thing again. Scan until the lexer
+finds a star or slash. If it's the opening of a multiline comment, push the
+``'comment'`` state on the stack and continue scanning, again in the
+``'comment'`` state.  Else, check if it's the end of the multiline comment. If
+yes, pop one state from the stack.
+
+Note: If you pop from an empty stack you'll get an `IndexError`. (There is an
+easy way to prevent this from happening: don't ``'#pop'`` in the root state).
+
+If the `RegexLexer` encounters a newline that is flagged as an error token, the
+stack is emptied and the lexer continues scanning in the ``'root'`` state. This
+helps producing error-tolerant highlighting for erroneous input, e.g. when a
+single-line string is not closed.
+
+
+Advanced state tricks
+=====================
+
+There are a few more things you can do with states:
+
+- You can push multiple states onto the stack if you give a tuple instead of a
+  simple string as the third item in a rule tuple. For example, if you want to
+  match a comment containing a directive, something like::
+
+      /* <processing directive>    rest of comment */
+
+  you can use this rule:
+
+  .. sourcecode:: python
+
+      tokens = {
+          'root': [
+              (r'/\* <', Comment, ('comment', 'directive')),
+              ...
+          ],
+          'directive': [
+              (r'[^>]*', Comment.Directive),
+              (r'>', Comment, '#pop'),
+          ],
+          'comment': [
+              (r'[^*]+', Comment),
+              (r'\*/', Comment, '#pop'),
+              (r'\*', Comment),
+          ]
+      }
+
+  When this encounters the above sample, first ``'comment'`` and ``'directive'``
+  are pushed onto the stack, then the lexer continues in the directive state
+  until it finds the closing ``>``, then it continues in the comment state until
+  the closing ``*/``. Then, both states are popped from the stack again and
+  lexing continues in the root state.
+
+
+- You can include the rules of a state in the definition of another.  This is
+  done by using `include` from `pygments.lexer`:
+
+  .. sourcecode:: python
+
+      from pygments.lexer import RegexLexer, include
+
+      class ExampleLexer(RegexLexer):
+          tokens = {
+              'comments': [
+                  (r'/\*.*?\*/', Comment),
+                  (r'//.*?\n', Comment),
+              ],
+              'root': [
+                  include('comments'),
+                  (r'(function )(\w+)( {)',
+                   (Keyword, Name, Keyword), 'function'),
+                  (r'.', Text),
+              ],
+              'function': [
+                  (r'[^}/]+', Text),
+                  include('comments'),
+                  (r'/', Text),
+                  (r'}', Keyword, '#pop'),
+              ]
+          }
+
+  This is a hypothetical lexer for a language that consist of functions and
+  comments. Because comments can occur at toplevel and in functions, we need
+  rules for comments in both states. As you can see, the `include` helper saves
+  repeating rules that occur more than once (in this example, the state
+  ``'comment'`` will never be entered by the lexer, as it's only there to be
+  included in ``'root'`` and ``'function'``).
+
+
+- Sometimes, you may want to "combine" a state from existing ones.  This is
+  possible with the `combine` helper from `pygments.lexer`.
+
+  If you, instead of a new state, write ``combined('state1', 'state2')`` as the
+  third item of a rule tuple, a new anonymous state will be formed from state1
+  and state2 and if the rule matches, the lexer will enter this state.
+
+  This is not used very often, but can be helpful in some cases, such as the
+  `PythonLexer`'s string literal processing.
+
+- If you want your lexer to start lexing in a different state you can modify
+  the stack by overloading the `get_tokens_unprocessed` method:
+
+  .. sourcecode:: python
+
+      class MyLexer(RegexLexer):
+          tokens = {...}
+
+          def get_tokens_unprocessed(self, text):
+              stack = ['root', 'otherstate']
+              for item in RegexLexer.get_tokens_unprocessed(text, stack):
+                  yield item
+
+  Some lexers like the `PhpLexer` use this to make the leading ``<?php``
+  preprocessor comments optional. Note that you can crash the lexer easily
+  by putting values into the stack that don't exist in the token map. Also
+  removing ``'root'`` from the stack can result in strange errors!
+
+
+Using multiple lexers
+=====================
+
+Using multiple lexers for the same input can be tricky. One of the easiest
+combination techniques is shown here: You can replace the token type entry in a
+rule tuple (the second item) with a lexer class. The matched text will then be
+lexed with that lexer, and the resulting tokens will be yielded.
+
+For example, look at this stripped-down HTML lexer:
+
+.. sourcecode:: python
+
+    from pygments.lexer import RegexLexer, bygroups, using
+
+    class HtmlLexer(RegexLexer):
+        name = 'HTML'
+        aliases = ['html']
+        filenames = ['*.html', '*.htm']
+
+        flags = re.IGNORECASE | re.DOTALL
+        tokens = {
+            'root': [
+                ('[^<&]+', Text),
+                ('&.*?;', Name.Entity),
+                (r'<\s*script\s*', Name.Tag, ('script-content', 'tag')),
+                (r'<\s*[a-zA-Z0-9:]+', Name.Tag, 'tag'),
+                (r'<\s*/\s*[a-zA-Z0-9:]+\s*>', Name.Tag),
+            ],
+            'script-content': [
+                (r'(.+?)(<\s*/\s*script\s*>)',
+                 bygroups(using(JavascriptLexer), Name.Tag),
+                 '#pop'),
+            ]
+        }
+
+Here the content of a ``<script>`` tag is passed to a newly created instance of
+a `JavascriptLexer` and not processed by the `HtmlLexer`. This is done using the
+`using` helper that takes the other lexer class as its parameter.
+
+Note the combination of `bygroups` and `using`. This makes sure that the content
+up to the ``</script>`` end tag is processed by the `JavascriptLexer`, while the
+end tag is yielded as a normal token with the `Name.Tag` type.
+
+As an additional goodie, if the lexer class is replaced by `this` (imported from
+`pygments.lexer`), the "other" lexer will be the current one (because you cannot
+refer to the current class within the code that runs at class definition time).
+
+Also note the ``(r'<\s*script\s*', Name.Tag, ('script-content', 'tag'))`` rule.
+Here, two states are pushed onto the state stack, ``'script-content'`` and
+``'tag'``.  That means that first ``'tag'`` is processed, which will parse
+attributes and the closing ``>``, then the ``'tag'`` state is popped and the
+next state on top of the stack will be ``'script-content'``.
+
+Any keywords arguments passed to ``using()`` are added to the keyword arguments
+used to create the lexer.
+
+
+Delegating Lexer
+================
+
+Another approach for nested lexers is the `DelegatingLexer` which is for
+example used for the template engine lexers. It takes two lexers as
+arguments on initialisation: a `root_lexer` and a `language_lexer`.
+
+The input is processed as follows: First, the whole text is lexed with the
+`language_lexer`. All tokens yielded with a type of ``Other`` are then
+concatenated and given to the `root_lexer`. The language tokens of the
+`language_lexer` are then inserted into the `root_lexer`'s token stream
+at the appropriate positions.
+
+.. sourcecode:: python
+
+    from pygments.lexer import DelegatingLexer
+    from pygments.lexers.web import HtmlLexer, PhpLexer
+
+    class HtmlPhpLexer(DelegatingLexer):
+        def __init__(self, **options):
+            super(HtmlPhpLexer, self).__init__(HtmlLexer, PhpLexer, **options)
+
+This procedure ensures that e.g. HTML with template tags in it is highlighted
+correctly even if the template tags are put into HTML tags or attributes.
+
+If you want to change the needle token ``Other`` to something else, you can
+give the lexer another token type as the third parameter:
+
+.. sourcecode:: python
+
+    DelegatingLexer.__init__(MyLexer, OtherLexer, Text, **options)
+
+
+Callbacks
+=========
+
+Sometimes the grammar of a language is so complex that a lexer would be unable
+to parse it just by using regular expressions and stacks.
+
+For this, the `RegexLexer` allows callbacks to be given in rule tuples, instead
+of token types (`bygroups` and `using` are nothing else but preimplemented
+callbacks). The callback must be a function taking two arguments:
+
+* the lexer itself
+* the match object for the last matched rule
+
+The callback must then return an iterable of (or simply yield) ``(index,
+tokentype, value)`` tuples, which are then just passed through by
+`get_tokens_unprocessed()`. The ``index`` here is the position of the token in
+the input string, ``tokentype`` is the normal token type (like `Name.Builtin`),
+and ``value`` the associated part of the input string.
+
+You can see an example here:
+
+.. sourcecode:: python
+
+    class HypotheticLexer(RegexLexer):
+
+        def headline_callback(lexer, match):
+            yield match.start(), Generic.Headline, equal_signs + text + equal_signs
+
+        tokens = {
+            'root': [
+                (r'(=+)(.*?)(\1)', headline_callback)
+            ]
+        }
+
+If the regex for the `headline_callback` matches, the function is called with the
+match object. Note that after the callback is done, processing continues
+normally, that is, after the end of the previous match. The callback has no
+possibility to influence the position.
+
+There are not really any simple examples for lexer callbacks, but you can see
+them in action e.g. in the `compiled.py`_ source code in the `CLexer` and
+`JavaLexer` classes.
+
+.. _compiled.py: http://trac.pocoo.org/repos/pygments/lexers/compiled.py
+
+
+The ExtendedRegexLexer class
+============================
+
+The `RegexLexer`, even with callbacks, unfortunately isn't powerful enough for
+the funky syntax rules of some languages that will go unnamed, such as Ruby.
+
+But fear not; even then you don't have to abandon the regular expression
+approach. For Pygments has a subclass of `RegexLexer`, the `ExtendedRegexLexer`.
+All features known from RegexLexers are available here too, and the tokens are
+specified in exactly the same way, *except* for one detail:
+
+The `get_tokens_unprocessed()` method holds its internal state data not as local
+variables, but in an instance of the `pygments.lexer.LexerContext` class, and
+that instance is passed to callbacks as a third argument. This means that you
+can modify the lexer state in callbacks.
+
+The `LexerContext` class has the following members:
+
+* `text` -- the input text
+* `pos` -- the current starting position that is used for matching regexes
+* `stack` -- a list containing the state stack
+* `end` -- the maximum position to which regexes are matched, this defaults to
+  the length of `text`
+
+Additionally, the `get_tokens_unprocessed()` method can be given a
+`LexerContext` instead of a string and will then process this context instead of
+creating a new one for the string argument.
+
+Note that because you can set the current position to anything in the callback,
+it won't be automatically be set by the caller after the callback is finished.
+For example, this is how the hypothetical lexer above would be written with the
+`ExtendedRegexLexer`:
+
+.. sourcecode:: python
+
+    class ExHypotheticLexer(ExtendedRegexLexer):
+
+        def headline_callback(lexer, match, ctx):
+            yield match.start(), Generic.Headline, equal_signs + text + equal_signs
+            ctx.pos = match.end()
+
+        tokens = {
+            'root': [
+                (r'(=+)(.*?)(\1)', headline_callback)
+            ]
+        }
+
+This might sound confusing (and it can really be). But it is needed, and for an
+example look at the Ruby lexer in `agile.py`_.
+
+.. _agile.py: http://trac.pocoo.org/repos/pygments/trunk/pygments/lexers/agile.py

docs/src/lexers.txt

+.. -*- mode: rst -*-
+
+================
+Available lexers
+================
+
+This page lists all available builtin lexers and the options they take.
+
+Currently, **all lexers** support these options:
+
+`stripnl`
+    Strip leading and trailing newlines from the input (default: ``True``)
+
+`stripall`
+    Strip all leading and trailing whitespace from the input (default:
+    ``False``).
+
+`tabsize`
+    If given and greater than 0, expand tabs in the input (default: ``0``).
+
+
+These lexers are builtin and can be imported from
+`pygments.lexers`:
+
+
+Special lexers
+==============
+
+`TextLexer`
+
+    "Null" lexer, doesn't highlight anything.
+
+    :Aliases: ``text``
+    :Filename patterns: ``*.txt``
+
+
+`RawTokenLexer`
+
+    Recreates a token stream formatted with the `RawTokenFormatter`.
+
+    Additional option:
+
+    `compress`
+         If set to ``'gz'`` or ``'bz2'``, decompress the token stream with
+         the given compression algorithm before lexing (default: '').
+
+    :Aliases: ``raw``
+    :Filename patterns: ``*.raw``
+
+
+Agile languages
+===============
+
+`PythonLexer`
+
+    For `Python <http://www.python.org>`_ source code.
+
+    :Aliases: ``python``, ``py``
+    :Filename patterns: ``*.py``, ``*.pyw``
+
+
+`PythonConsoleLexer`
+
+    For Python console output or doctests, such as:
+
+    .. sourcecode:: pycon
+
+        >>> a = 'foo'
+        >>> print a
+        'foo'
+        >>> 1/0
+        Traceback (most recent call last):
+        ...
+
+    :Aliases: ``pycon``
+    :Filename patterns: None
+
+
+`RubyLexer`
+
+    For `Ruby <http://www.ruby-lang.org>`_ source code.
+
+    :Aliases: ``ruby``, ``rb``
+    :Filename patterns: ``*.rb``
+
+
+`RubyConsoleLexer`
+
+    For Ruby interactive console (**irb**) output like:
+
+    .. sourcecode:: rbcon
+
+        irb(main):001:0> a = 1
+        => 1
+        irb(main):002:0> puts a
+        1
+        => nil
+
+    :Aliases: ``rbcon``, ``irb``
+    :Filename patterns: None
+
+
+`PerlLexer`
+
+    For `Perl <http://www.perl.org>`_ source code.
+
+    :Aliases: ``perl``, ``pl``
+    :Filename patterns: ``*.pl``, ``*.pm``
+
+
+`LuaLexer`
+
+    For `Lua <http://www.lua.org>`_ source code.
+
+    Additional options:
+
+    `func_name_highlighting`
+        If given and ``True``, highlight builtin function names
+        (default: ``True``).
+    `disabled_modules`
+        If given, must be a list of module names whose function names
+        should not be highlighted. By default all modules are highlighted.
+
+        To get a list of allowed modules have a look into the
+        `_luabuiltins` module:
+
+        .. sourcecode:: pycon
+
+            >>> from pygments.lexers._luabuiltins import MODULES
+            >>> MODULES.keys()
+            ['string', 'coroutine', 'modules', 'io', 'basic', ...]
+
+    :Aliases: ``lua``
+    :Filename patterns: ``*.lua``
+
+
+Compiled languages
+==================
+
+`CLexer`
+
+    For C source code with preprocessor directives.
+
+    :Aliases: ``c``
+    :Filename patterns: ``*.c``, ``*.h``
+
+
+`CppLexer`
+
+    For C++ source code with preprocessor directives.
+
+    :Aliases: ``cpp``, ``c++``
+    :Filename patterns: ``*.cpp``, ``*.hpp``, ``*.c++``, ``*.h++``
+
+
+`DelphiLexer`
+
+    For `Delphi <http://www.borland.com/delphi/>`_
+    (Borland Object Pascal) source code.
+
+    :Aliases: ``delphi``, ``pas``, ``pascal``, ``objectpascal``
+    :Filename patterns: ``*.pas``
+
+
+`JavaLexer`
+
+    For `Java <http://www.sun.com/java/>`_ source code.
+
+    :Aliases: ``java``
+    :Filename patterns: ``*.java``
+
+
+.NET languages
+==============
+
+`CSharpLexer`
+
+    For `C# <http://msdn2.microsoft.com/en-us/vcsharp/default.aspx>`_
+    source code.
+
+    :Aliases: ``c#``, ``csharp``
+    :Filename patterns: ``*.cs``
+
+`BooLexer`
+
+    For `Boo <http://boo.codehaus.org/>`_ source code.
+
+    :Aliases: ``boo``
+    :Filename patterns: ``*.boo``
+
+`VbNetLexer`
+
+    For
+    `Visual Basic.NET <http://msdn2.microsoft.com/en-us/vbasic/default.aspx>`_
+    source code.
+
+    :Aliases: ``vbnet``, ``vb.net``
+    :Filename patterns: ``*.vb``, ``*.bas``
+
+
+Web-related languages
+=====================
+
+`JavascriptLexer`
+
+    For JavaScript source code.
+
+    :Aliases: ``js``, ``javascript``
+    :Filename patterns: ``*.js``
+
+
+`CssLexer`
+
+    For CSS (Cascading Style Sheets).
+
+    :Aliases: ``css``
+    :Filename patterns: ``*.css``
+
+
+`HtmlLexer`
+
+    For HTML 4 and XHTML 1 markup. Nested JavaScript and CSS is highlighted
+    by the appropriate lexer.
+
+    :Aliases: ``html``
+    :Filename patterns: ``*.html``, ``*.htm``, ``*.xhtml``
+
+
+`PhpLexer`
+
+    For `PHP <http://www.php.net/>`_ source code.
+    For PHP embedded in HTML, use the `HtmlPhpLexer`.
+
+    Additional options:
+
+    `startinline`
+        If given and ``True`` the lexer starts highlighting with
+        php code. (i.e.: no starting ``<?php`` required)
+    `funcnamehighlighting`
+        If given and ``True``, highlight builtin function names
+        (default: ``True``).
+    `disabledmodules`
+        If given, must be a list of module names whose function names
+        should not be highlighted. By default all modules are highlighted
+        except the special ``'unknown'`` module that includes functions
+        that are known to php but are undocumented.
+
+        To get a list of allowed modules have a look into the
+        `_phpbuiltins` module:
+
+        .. sourcecode:: pycon
+
+            >>> from pygments.lexers._phpbuiltins import MODULES
+            >>> MODULES.keys()
+            ['PHP Options/Info', 'Zip', 'dba', ...]
+
+        In fact the names of those modules match the module names from
+        the php documentation.
+
+    :Aliases: ``php``, ``php3``, ``php4``, ``php5``
+    :Filename patterns: ``*.php``, ``*.php[345]``
+
+
+`XmlLexer`
+
+    Generic lexer for XML (extensible markup language).
+
+    :Aliases: ``xml``
+    :Filename patterns: ``*.xml``
+
+
+Template languages
+==================
+
+`ErbLexer`
+
+    Generic `ERB <http://ruby-doc.org/core/classes/ERB.html>`_ (Ruby Templating)
+    lexer.
+
+    Just highlights ruby code between the preprocessor directives, other data
+    is left untouched by the lexer.
+
+    All options are also forwarded to the `RubyLexer`.
+
+    :Aliases:   ``erb``
+    :Filename patterns: None
+
+
+`RhtmlLexer`
+
+    Subclass of the ERB lexer that highlights the unlexed data with the
+    html lexer.
+
+    Nested Javascript and CSS is highlighted too.
+
+    :Aliases:   ``rhtml``, ``html+erb``, ``html+ruby``
+    :Filename patterns: ``*.rhtml``
+
+
+`XmlErbLexer`
+
+    Subclass of `ErbLexer` which highlights data outside preprocessor
+    directives with the `XmlLexer`.
+
+    :Aliases:   ``xml+erb``, ``xml+ruby``
+    :Filename patterns: None
+
+
+`CssErbLexer`
+
+    Subclass of `ErbLexer` which highlights unlexed data with the `CssLexer`.
+
+    :Aliases:   ``css+erb``, ``css+ruby``
+    :Filename patterns: None
+
+
+`JavascriptErbLexer`
+
+    Subclass of `ErbLexer` which highlights unlexed data with the
+    `JavascriptLexer`.
+
+    :Aliases:   ``js+erb``, ``javascript+erb``, ``js+ruby``, ``javascript+ruby``
+    :Filename patterns: None
+
+
+`HtmlPhpLexer`
+
+    Subclass of `PhpLexer` that highlights unhandled data with the `HtmlLexer`.
+
+    Nested Javascript and CSS is highlighted too.
+
+    :Aliases:   ``html+php``
+    :Filename patterns:  ``*.phtml``
+
+
+`XmlPhpLexer`
+
+    Subclass of `PhpLexer` that higlights unhandled data with the `XmlLexer`.
+
+    :Aliases:   ``xml+php``
+    :Filename patterns: None
+
+
+`CssPhpLexer`
+
+    Subclass of `PhpLexer` which highlights unmatched data with the `CssLexer`.
+
+    :Aliases:   ``css+php``
+    :Filename patterns: None
+
+
+`JavascriptPhpLexer`
+
+    Subclass of `PhpLexer` which highlights unmatched data with the
+    `JavascriptLexer`.
+
+    :Aliases:   ``js+php``, ``javascript+php``
+    :Filename patterns: None
+
+
+`DjangoLexer`
+
+    Generic `django <http://www.djangoproject.com/documentation/templates/>`_
+    template lexer.
+
+    It just highlights django code between the preprocessor directives, other
+    data is left untouched by the lexer.
+
+    :Aliases:   ``django``
+    :Filename patterns: None
+
+
+`HtmlDjangoLexer`
+
+    Subclass of the `DjangoLexer` that highighlights unlexed data with the
+    `HtmlLexer`.
+
+    Nested Javascript and CSS is highlighted too.
+
+    :Aliases:   ``html+django``
+    :Filename patterns: None
+
+
+`XmlDjangoLexer`
+
+    Subclass of the `DjangoLexer` that highlights unlexed data with the
+    `XmlLexer`.
+
+    :Aliases:   ``xml+django``
+    :Filename patterns: None
+
+
+`CssDjangoLexer`
+
+    Subclass of the `DjangoLexer` that highlights unlexed data with the
+    `CssLexer`.
+
+    :Aliases:   ``css+django``
+    :Filename patterns: None
+
+
+`JavascriptDjangoLexer`
+
+    Subclass of the `DjangoLexer` that highlights unlexed data with the
+    `JavascriptLexer`.
+
+    :Aliases:   ``javascript+django``
+    :Filename patterns: None