Commits

takanao ENDOH committed 1df77dd

Initial import Erlang OTP Documentation (Sphinx)
sync to git repo rev ab1b8bac58e1155ed7cbaee25d55ac5d55c050dc

Comments (0)

Files changed (8)

+.git
+build
+downloads
+source/doc
+source/erts*
+source/lib
+
+syntax: glob
+*.pyc
+*.swo
+*.swp
+.DS_Store
+# Makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line.
+SPHINXOPTS    =
+SPHINXBUILD   = sphinx-build
+PAPER         =
+BUILDDIR      = build
+DOWNLOADDIR   = downloads
+FETCH         = python tools/fetch.py
+MAKERST       = tools/makerst.py
+SOURCE        := $(shell python source/conf.py SOURCE)
+TARBALL       := $(shell python source/conf.py TARBALL)
+
+# Internal variables.
+PAPEROPT_a4     = -D latex_paper_size=a4
+PAPEROPT_letter = -D latex_paper_size=letter
+ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
+
+.PHONY: help clean html dirhtml pickle json htmlhelp qthelp latex changes linkcheck doctest
+
+help:
+	@echo "Please use \`make <target>' where <target> is one of"
+	@echo "  html      to make standalone HTML files"
+	@echo "  dirhtml   to make HTML files named index.html in directories"
+	@echo "  pickle    to make pickle files"
+	@echo "  json      to make JSON files"
+	@echo "  htmlhelp  to make HTML files and a HTML help project"
+	@echo "  qthelp    to make HTML files and a qthelp project"
+	@echo "  devhelp   to make HTML files and a Devhelp project"
+	@echo "  latex     to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
+	@echo "  latexpdf  to make LaTeX files and run them through pdflatex"
+	@echo "  changes   to make an overview of all changed/added/deprecated items"
+	@echo "  linkcheck to check all external links for integrity"
+	@echo "  doctest   to run all doctests embedded in the documentation (if enabled)"
+
+clean:
+	-rm -rf $(BUILDDIR)/*
+
+dlclean:
+	-rm -rf $(DOWNLOADDIR)/*
+
+rstclean:
+	-rm -rf source/doc
+	-rm -rf source/erts*
+	-rm -rf source/lib
+
+fetch: $(DOWNLOADDIR)/$(TARBALL)
+	@echo -n
+
+$(DOWNLOADDIR)/$(TARBALL): $(DOWNLOADDIR)
+	$(FETCH) $(SOURCE) $(TARBALL) $(DOWNLOADDIR)
+	@echo
+	@echo "Download finished. The tar ball are in $(DOWNLOADDIR)."
+
+$(DOWNLOADDIR):
+	-mkdir $(DOWNLOADDIR)
+
+source/doc/index.rst: $(MAKERST)
+	python $(MAKERST) $(DOWNLOADDIR) $(SOURCEDIR)
+	@echo
+	@echo "Build finished. The reST files are in $(SOURCEDIR)."
+
+html: $(DOWNLOADDIR)/$(TARBALL) source/doc/index.rst
+	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
+	@echo
+	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
+
+dirhtml:
+	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
+	@echo
+	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
+
+pickle:
+	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
+	@echo
+	@echo "Build finished; now you can process the pickle files."
+
+json:
+	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
+	@echo
+	@echo "Build finished; now you can process the JSON files."
+
+htmlhelp:
+	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
+	@echo
+	@echo "Build finished; now you can run HTML Help Workshop with the" \
+	      ".hhp project file in $(BUILDDIR)/htmlhelp."
+
+qthelp:
+	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
+	@echo
+	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
+	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
+	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/ErlangOTP.qhcp"
+	@echo "To view the help file:"
+	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/ErlangOTP.qhc"
+
+devhelp:
+	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) build/devhelp
+	@echo
+	@echo "Build finished."
+	@echo "To view the help file:"
+	@echo "# mkdir -p $$HOME/.local/share/devhelp/ErlangOTP"
+	@echo "# ln -s build/devhelp $$HOME/.local/share/devhelp/ErlangOTP"
+	@echo "# devhelp"
+
+latex:
+	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+	@echo
+	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
+	@echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \
+	      "run these through (pdf)latex."
+
+latexpdf: latex
+	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) build/latex
+	@echo "Running LaTeX files through pdflatex..."
+	make -C build/latex all-pdf
+	@echo "pdflatex finished; the PDF files are in build/latex."
+
+changes:
+	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
+	@echo
+	@echo "The overview file is in $(BUILDDIR)/changes."
+
+linkcheck:
+	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
+	@echo
+	@echo "Link check complete; look for any errors in the above output " \
+	      "or in $(BUILDDIR)/linkcheck/output.txt."
+
+doctest:
+	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
+	@echo "Testing of doctests in the sources finished, look at the " \
+	      "results in $(BUILDDIR)/doctest/output.txt."
+# -*- coding: utf-8 -*-
+#
+# Erlang/OTP documentation build configuration file, created by
+# sphinx-quickstart on Sat Oct 24 09:46:27 2009.
+#
+# This file is execfile()d with the current directory set to its containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+import sys, os
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#sys.path.append(os.path.abspath('.'))
+
+# -- General configuration -----------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be extensions
+# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
+extensions = []
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix of source filenames.
+source_suffix = '.rst'
+
+# The encoding of source files.
+#source_encoding = 'utf-8-sig'
+
+# The master toctree document.
+master_doc = 'index'
+
+# General information about the project.
+project = u'Erlang/OTP'
+copyright = u'1999-2009 Ericsson AB'
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+# The short X.Y version.
+version = 'R13B'
+# The full version, including alpha/beta/rc tags.
+release = 'R13B02'
+
+SOURCE = "http://dl.getdropbox.com/u/89936/otp_doc_html_R13B02-1.tar.gz"
+TARBALL = "otp_doc_html.tar.gz"
+HTML2RST = "http://bitbucket.org/djerdo/musette/raw/tip/musette/html/html2rest.py"
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#language = None
+
+# There are two options for replacing |today|: either, you set today to some
+# non-false value, then it is used:
+#today = ''
+# Else, today_fmt is used as the format for a strftime call.
+#today_fmt = '%B %d, %Y'
+
+# List of documents that shouldn't be included in the build.
+#unused_docs = []
+
+# List of directories, relative to source directory, that shouldn't be searched
+# for source files.
+exclude_trees = []
+
+# The reST default role (used for this markup: `text`) to use for all documents.
+#default_role = None
+
+# If true, '()' will be appended to :func: etc. cross-reference text.
+#add_function_parentheses = True
+
+# If true, the current module name will be prepended to all description
+# unit titles (such as .. function::).
+#add_module_names = True
+
+# If true, sectionauthor and moduleauthor directives will be shown in the
+# output. They are ignored by default.
+#show_authors = False
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+# A list of ignored prefixes for module index sorting.
+#modindex_common_prefix = []
+
+
+# -- Options for HTML output ---------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  Major themes that come with
+# Sphinx are currently 'default' and 'sphinxdoc'.
+html_theme = 'default'
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further.  For a list of options available for each theme, see the
+# documentation.
+#html_theme_options = {}
+
+# Add any paths that contain custom themes here, relative to this directory.
+#html_theme_path = []
+
+# The name for this set of Sphinx documents.  If None, it defaults to
+# "<project> v<release> documentation".
+#html_title = None
+
+# A shorter title for the navigation bar.  Default is the same as html_title.
+#html_short_title = None
+
+# The name of an image file (relative to this directory) to place at the top
+# of the sidebar.
+#html_logo = None
+
+# The name of an image file (within the static path) to use as favicon of the
+# docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
+# pixels large.
+#html_favicon = None
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+
+# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
+# using the given strftime format.
+#html_last_updated_fmt = '%b %d, %Y'
+
+# If true, SmartyPants will be used to convert quotes and dashes to
+# typographically correct entities.
+#html_use_smartypants = True
+
+# Custom sidebar templates, maps document names to template names.
+#html_sidebars = {}
+
+# Additional templates that should be rendered to pages, maps page names to
+# template names.
+#html_additional_pages = {}
+
+# If false, no module index is generated.
+#html_use_modindex = True
+
+# If false, no index is generated.
+#html_use_index = True
+
+# If true, the index is split into individual pages for each letter.
+#html_split_index = False
+
+# If true, links to the reST sources are added to the pages.
+#html_show_sourcelink = True
+
+# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
+#html_show_sphinx = True
+
+# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
+#html_show_copyright = True
+
+# If true, an OpenSearch description file will be output, and all pages will
+# contain a <link> tag referring to it.  The value of this option must be the
+# base URL from which the finished HTML is served.
+#html_use_opensearch = ''
+
+# If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml").
+#html_file_suffix = ''
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'ErlangOTPdoc'
+
+
+# -- Options for LaTeX output --------------------------------------------------
+
+# The paper size ('letter' or 'a4').
+#latex_paper_size = 'letter'
+
+# The font size ('10pt', '11pt' or '12pt').
+#latex_font_size = '10pt'
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title, author, documentclass [howto/manual]).
+latex_documents = [
+  ('index', 'ErlangOTP.tex', u'Erlang/OTP Documentation',
+   u'Ericsson AB', 'manual'),
+]
+
+# The name of an image file (relative to this directory) to place at the top of
+# the title page.
+#latex_logo = None
+
+# For "manual" documents, if this is true, then toplevel headings are parts,
+# not chapters.
+#latex_use_parts = False
+
+# Additional stuff for the LaTeX preamble.
+#latex_preamble = ''
+
+# Documents to append as an appendix to all manuals.
+#latex_appendices = []
+
+# If false, no module index is generated.
+#latex_use_modindex = True
+
+if __name__ == "__main__":
+    import sys
+    print(vars().get(sys.argv[1]))
+.. Erlang/OTP documentation master file, created by
+   sphinx-quickstart on Sat Oct 24 09:46:27 2009.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+Welcome to Erlang/OTP's documentation!
+======================================
+
+Contents:
+
+.. toctree::
+    :maxdepth: 2
+
+    doc/index
+    doc/first
+    doc/glossary
+
+    doc/system_architecture_intro/sys_arch_intro
+    doc/tutorial/introduction
+    doc/tutorial/overview
+    doc/tutorial/example
+    doc/tutorial/c_port
+    doc/tutorial/erl_interface
+    doc/tutorial/c_portdriver
+    doc/tutorial/cnode
+    doc/highlights
+
+Erlang/OTP:
+
+.. toctree::
+    :maxdepth: 2
+
+    doc/installation_guide/install
+    doc/installation_guide/verification
+    doc/system_principles/system_principles
+    doc/system_principles/error_logging
+    doc/system_principles/create_target
+    doc/embedded/embedded_solaris
+    doc/embedded/embedded_nt
+    doc/embedded/vxworks
+
+Erlang Programming:
+
+.. toctree::
+    :maxdepth: 2
+
+    doc/getting_started/intro
+    doc/getting_started/seq_prog
+    doc/getting_started/conc_prog
+    doc/getting_started/robustness
+    doc/getting_started/records_macros
+    doc/reference_manual/introduction
+    doc/reference_manual/data_types
+    doc/reference_manual/patterns
+    doc/reference_manual/modules
+    doc/reference_manual/functions
+    doc/reference_manual/expressions
+    doc/reference_manual/macros
+    doc/reference_manual/records
+    doc/reference_manual/errors
+    doc/reference_manual/processes
+    doc/reference_manual/distributed
+    doc/reference_manual/code_loading
+    doc/reference_manual/ports
+    doc/programming_examples/records
+    doc/programming_examples/funs
+    doc/programming_examples/list_comprehensions
+    doc/programming_examples/bit_syntax
+    doc/efficiency_guide/introduction
+    doc/efficiency_guide/myths
+    doc/efficiency_guide/commoncaveats
+    doc/efficiency_guide/binaryhandling
+    doc/efficiency_guide/listhandling
+    doc/efficiency_guide/functions
+    doc/efficiency_guide/tablesDatabases
+    doc/efficiency_guide/processes
+    doc/efficiency_guide/drivers
+    doc/efficiency_guide/advanced
+    doc/efficiency_guide/profiling
+    doc/incompatible
+
+Working with OTP:
+
+.. toctree::
+    :maxdepth: 2
+
+    doc/design_principles/applications
+    doc/design_principles/included_applications
+    doc/design_principles/distributed_applications
+    doc/design_principles/release_structure
+    doc/design_principles/release_handling
+    doc/design_principles/appup_cookbook
+    doc/design_principles/des_princ
+    doc/design_principles/gen_server_concepts
+    doc/design_principles/fsm
+    doc/design_principles/events
+    doc/design_principles/sup_princ
+    doc/design_principles/spec_proc
+    doc/oam/oam_intro
+    doc/oam/part_term
+
+All:
+
+.. toctree::
+    :maxdepth: 2
+    :glob:
+
+    *
+    */*
+    */*/*
+    */*/*/*
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
+
+#!/usr/bin/env python
+
+import httplib
+import os
+import sys
+import tarfile
+import urlparse
+
+SOURCE = sys.argv[1]
+TARBALL = sys.argv[2]
+DOWNLOADDIR = sys.argv[3]
+
+def main():
+    os.chdir(DOWNLOADDIR)
+    scheme, netloc, path, params, query, fragment = urlparse.urlparse(SOURCE)
+    con = httplib.HTTPConnection(netloc)
+    con.request("GET", path)
+    response = con.getresponse()
+    if response.status != 200:
+        con.close()
+        sys.exit(1)
+    output = open(TARBALL, "w")
+    output.write(response.read())
+    output.close()
+    con.close()
+
+    tar = tarfile.open(TARBALL)
+    tar.extractall()
+    tar.close()
+
+if __name__ == "__main__":
+    main()

tools/html2rest.py

+#-----------------------------------------------------------------------------
+# Copyright (c) 2006-2009  Gerard Flanagan
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+#    The above copyright notice and this permission notice shall be included
+#    in all copies or substantial portions of the Software.
+#
+#-----------------------------------------------------------------------------
+
+
+import sys
+import os
+import re
+import codecs
+from sgmllib import SGMLParser
+from StringIO import StringIO
+from textwrap import TextWrapper
+
+CODEBLOCK = '.. sourcecode:: erlang'
+BLOCKTAGS = ['div', 'blockquote']
+IGNORETAGS = ['title', 'style', 'script']
+UNDERLINES = list('=-~`+;')
+
+# Fredrik Lundh, http://effbot.org/zone/re-sub.html
+def unescape(text):
+    def fixup(m):
+        text = m.group(0)
+        if text[:2] == "&#":
+            # character reference
+            try:
+                if text[:3].lower() == "&#x":
+                    return unichr(int(text[3:-1], 16))
+                else:
+                    return unichr(int(text[2:-1]))
+            except ValueError:
+                pass
+        else:
+            # named entity
+            import htmlentitydefs
+            try:
+                text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
+            except KeyError:
+                pass
+        return text # leave as is
+    return re.sub("&#?\w+;", fixup, text)
+
+class LineBuffer(object):
+
+    def __init__(self):
+        self._lines = []
+        self._wrapper = TextWrapper()
+
+    def __len__(self):
+        return len(self._lines)
+
+    def __getitem__(self, i):
+        return self._lines[i]
+
+    def __setitem__(self, i, value):
+        self._lines[i] = value
+
+    def clear(self):
+        self._lines[:] = []
+
+    def read(self):
+        return '\n'.join(self._lines)
+
+    def write(self, s):
+        #normalise whitespace
+        s = ' '.join(s.split())
+        self._lines.extend(self._wrapper.wrap(s))
+
+    def rawwrite(self, s):
+        self._lines.extend(s.splitlines())
+
+    def indent(self, numspaces=4, start=0):
+        linebuf = self._lines
+        n = len(linebuf)
+        if n > start:
+            indent = ' ' * numspaces
+            for i in range(start, n):
+                linebuf[i] = indent + linebuf[i]
+
+    def lstrip(self):
+        linebuf = self._lines
+        for i in range(len(linebuf)):
+            linebuf[i] = linebuf[i].lstrip()
+
+class Parser(SGMLParser):
+
+    def __init__(self, writer=sys.stdout):
+        SGMLParser.__init__(self)
+        self.writer = writer
+        self.stringbuffer = StringIO()
+        self.linebuffer = LineBuffer()
+        self.verbatim = False
+        self.lists = []
+        self.ignoredata = False
+        self.inblock = 0
+        self.nobreak = False
+        self.link = None
+
+    def close(self):
+        self.writeline()
+        SGMLParser.close(self)
+
+    def flush(self):
+        if self.linebuffer:
+            if self.inblock > 1:
+                indent = 4 * (self.inblock - 1)
+                self.linebuffer.indent(indent)
+            self.writer.write(unescape(self.linebuffer.read()))
+            self.linebuffer.clear()
+
+    def flush_stringbuffer(self):
+        sbuf = self.stringbuffer.getvalue()
+        if not sbuf:
+            return
+        elif self.linebuffer:
+            self.linebuffer[-1] += sbuf
+        else:
+            self.linebuffer.write(sbuf)
+        self.clear_stringbuffer()
+
+    def clear_stringbuffer(self):
+        #self.stringbuffer.reset()
+        self.stringbuffer.seek(0)
+        self.stringbuffer.truncate()
+
+    def data(self, text):
+        self.stringbuffer.write(text)
+
+    def pending(self):
+        return self.stringbuffer.tell() or self.linebuffer
+
+    def write(self, text=''):
+        self.flush_stringbuffer()
+        self.flush()
+        self.writer.write(unescape(text))
+
+    def writeline(self, text=''):
+        self.write(text + '\n')
+
+    def writestartblock(self, text=''):
+        if self.pending():
+            self.writeline()
+        self.writeline()
+        self.writeline(text)
+
+    def writeendblock(self, text=''):
+        self.writeline(text)
+        self.writeline()
+
+    def writeblock(self, text=''):
+        self.writestartblock(text)
+        self.writeline()
+
+    def handle_data(self, data):
+        if self.ignoredata:
+            return
+        elif self.verbatim:
+            self.data(data)
+        else:
+            self.data(' '.join(data.splitlines()))
+
+    def unknown_starttag(self, tag, attrs):
+        if tag in IGNORETAGS:
+            self.ignoredata = True
+        elif len(tag) == 2 and tag[0] == 'h':
+            self.writestartblock()
+        elif tag == 'br':
+            if self.verbatim:
+                self.data('\n')
+            elif not self.inblock:
+                self.writeline()
+            else:
+                self.data(' ')
+        elif not self.verbatim:
+            self.data(' ')
+
+    def unknown_endtag(self, tag):
+        self.ignoredata = False
+        if tag == 'hr':
+            self.flush_stringbuffer()
+            if self.linebuffer:
+                linebuf = self.linebuffer
+                linebuf.write("--------")
+                self.writeline()
+        elif len(tag) == 2 and tag[0] == 'h':
+            self.flush_stringbuffer()
+            if self.linebuffer:
+                linebuf = self.linebuffer
+                linebuf[-1] = linebuf[-1].strip()
+                char = UNDERLINES[int(tag[1])-1]
+                linebuf.write(char * len(linebuf[-1]))
+                self.writeline()
+        #elif tag in BLOCKTAGS and self.pending():
+        #    if self.lists:
+        #        self.end_li()
+        #    else:
+        #        self.writeline()
+        elif not self.verbatim:
+            self.data(' ')
+
+    def start_a(self, attrs):
+        href = dict(attrs).get('href', None)
+        if not href or href.startswith('#'):
+            return
+        self.data(' `')
+        self.link = href
+
+    def end_a(self):
+        if self.link:
+            self.data(' <%s>`__' % self.link)
+            self.link = None
+
+    def start_pre(self, attrs):
+        if self.lists:
+            self.end_li()
+            self.writeline()
+        #self.inblock += 1
+        self.verbatim = True
+        self.writeblock(CODEBLOCK)
+
+    def end_pre(self):
+        sbuf = self.stringbuffer.getvalue()
+        if sbuf:
+            self.linebuffer.rawwrite(sbuf)
+            self.linebuffer.indent(4)
+        self.clear_stringbuffer()
+        self.writeendblock()
+        #self.inblock -= 1
+        self.verbatim = False
+
+    def start_ul(self, attrs):
+        if self.lists:
+            self.end_li()
+            self.writeline()
+        else:
+            self.writeline()
+        self.lists.append('+ ')
+        self.inblock += 1
+
+    def end_ul(self):
+        self.end_li()
+        self.lists.pop()
+        self.inblock -= 1
+        if self.inblock:
+            self.writeline()
+        else:
+            self.writeendblock()
+
+    def start_ol(self, attrs):
+        if self.lists:
+            self.end_li()
+            self.writeline()
+        else:
+            self.writeline()
+        self.lists.append('#. ')
+        self.inblock += 1
+
+    def end_ol(self):
+        self.end_li()
+        self.lists.pop()
+        self.inblock -= 1
+        if self.inblock:
+            self.writeline()
+        else:
+            self.writeendblock()
+
+    def start_p(self, attrs):
+        if self.verbatim:
+            self.writeline()
+        elif not self.inblock:
+            self.writeline()
+
+    def end_p(self):
+        if self.inblock:
+        #self.flush_stringbuffer()
+            if self.verbatim:
+                self.writeline()
+            else:
+                return
+        else:
+            self.linebuffer.lstrip()
+            self.writeline()
+
+    def start_li(self, attrs):
+        self.writeline()
+        self.data(self.lists[-1])
+    
+    def end_li(self):
+        self.flush_stringbuffer()
+        linebuf = self.linebuffer
+        if linebuf and linebuf[0] and linebuf[0].lstrip()[:2] in ['+ ', '#.']:
+            start=1
+        else:
+            # the start of the <li> has already been written, perhaps because
+            # there was a <pre> block
+            start = 0
+        self.linebuffer.indent(len(self.lists[-1]), start=start)
+        self.write()
+
+    def start_dl(self, attrs):
+        self.writeline()
+        self.inblock += 1
+        self.nobreak = True
+
+    def end_dl(self):
+        self.nobreak = False
+        self.writeline()
+        self.inblock -= 1
+
+    def start_dt(self, attrs):
+        self.data(':')
+
+    def end_dt(self):
+        self.data(':')
+
+    def start_dd(self, attrs):
+        self.data(' ')
+
+    def end_dd(self):
+        self.flush_stringbuffer()
+        self.linebuffer.indent(2, start=1)
+        self.writeline()
+
+    def start_em(self, attrs):
+        self.data(' *')
+
+    def end_em(self):
+        self.data('*')
+
+    def start_b(self, attrs):
+        self.data(' **')
+
+    def end_b(self):
+        self.data('**')
+
+    def start_code(self, attrs):
+        self.data(' `')
+
+    def end_code(self):
+        self.data('`')
+
+    def start_span(self, attrs):
+        pass
+
+    def end_span(self):
+        pass
+
+    def start_body(self, attrs):
+        pass
+
+    def end_body(self):
+        self.end_p()
+try:
+    from BeautifulSoup import BeautifulSoup, NavigableString
+
+    # don't seem to need this anymore - issue fixed in latest BeautifulSoup presumably
+    class ShlurpUpYourShloup(BeautifulSoup):
+        '''preserve whitespace in <pre>'''
+        def endData(self, containerClass=NavigableString):
+            if self.currentData:
+                currentData = ''.join(self.currentData)
+                if not currentData.strip():
+                    if '\n' in currentData:
+                        currentData = '\n'
+                    else:
+                        # just changed the following line
+                        # original: currentData = ' '
+                        currentData = u' ' * len(currentData)
+                self.currentData = []
+                if self.parseOnlyThese and len(self.tagStack) <= 1 and \
+                    (not self.parseOnlyThese.text or \
+                        not self.parseOnlyThese.search(currentData)):
+                    return
+                o = containerClass(currentData)
+                o.setup(self.currentTag, self.previous)
+                if self.previous:
+                    self.previous.next = o
+                self.previous = o
+                self.currentTag.contents.append(o)
+
+except ImportError:
+    def ShlurpUpYourShloup(text, *args, **kw):
+        return text
+
+    BeautifulSoup = ShlurpUpYourSoup
+
+def readsoup(fileobj, convert='html', encoding='utf8'):
+    if hasattr(fileobj, 'read'):
+        text = fileobj.read()
+    else:
+        text = open(fileobj, 'rb').read()
+    #for br in ['<br>', '<br/>', '<br />']:
+    #    text = text.replace(br, '\n')
+    #    text = text.replace(br.upper(), '\n')
+    return str(BeautifulSoup(text, convertEntities=convert,
+                                            fromEncoding=encoding))
+
+def html2rest(html, writer=sys.stdout):
+    parser = Parser(writer)
+    parser.feed(html)
+    parser.close()
+
+if __name__ == '__main__':
+    # Eg.
+    # python html2rest.py http://sphinx.pocoo.org/intro.html > intro.rst
+    fileobj = None
+    if sys.argv[1:]:
+        arg = sys.argv[1]
+        if arg.startswith('http://'):
+            import urllib
+            fileobj = urllib.urlopen(arg)
+        else:
+            fileobj = codecs.open(arg, 'rb', 'utf8')
+    else:
+        fileobj = sys.stdin
+    if fileobj is not None:
+        try:
+            html2rest(fileobj.read())#readsoup(fileobj))
+        finally:
+            fileobj.close()
+
+

tools/html2rst.py

+"""html2rst: Turn HTML into reStructured text.
+
+based on Aaron Swartz <me@aaronsw.com> html2text V 2.23
+http://www.aaronsw.com/2002/html2text/
+
+| GNU GPL 2.
+| (C) 2004 Aaron Swartz.
+| (C) 2005 Chris Liechti.
+
+Changes to the original:
+
+- generate reST comaptible output
+- don't number links, use named references
+- redo wrap function so that it keeps indents in lists
+
+
+There a re a number of problems with this approach and implementation, see
+README.txt
+"""
+
+
+# TODO:
+#   Support decoded entities with unifiable.
+#    Relative URL resolution
+
+if not hasattr(__builtins__, 'True'): True, False = 1, 0
+import re, sys, urllib, htmlentitydefs, codecs, StringIO, types
+import sgmllib
+sgmllib.charref = re.compile('&#([xX]?[0-9a-fA-F]+)[^0-9a-fA-F]')
+
+try: from textwrap import wrap
+except: pass
+
+# Use Unicode characters instead of their ascii psuedo-replacements
+UNICODE_SNOB = 0
+
+# Put the links after each paragraph instead of at the end.
+LINKS_EACH_PARAGRAPH = 0
+
+# Wrap long lines at position. 0 for no wrapping. (Requires Python 2.3.)
+BODY_WIDTH = 0
+
+### Entity Nonsense ###
+
+def name2cp(k):
+    if k == 'apos': return ord("'")
+    if hasattr(htmlentitydefs, "name2codepoint"): # requires Python 2.3
+        return htmlentitydefs.name2codepoint[k]
+    else:
+        k = htmlentitydefs.entitydefs[k]
+        if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1
+        return ord(codecs.latin_1_decode(k)[0])
+
+unifiable = {'rsquo':"'", 'lsquo':"'", 'rdquo':'"', 'ldquo':'"', 
+'copy':'(C)', 'mdash':'--', 'nbsp':' ', 'rarr':'->', 'larr':'<-', 'middot':'*',
+'ndash':'-', 'oelig':'oe', 'aelig':'ae',
+'agrave':'a', 'aacute':'a', 'acirc':'a', 'atilde':'a', 'auml':'a', 'aring':'a', 
+'egrave':'e', 'eacute':'e', 'ecirc':'e', 'euml':'e', 
+'igrave':'i', 'iacute':'i', 'icirc':'i', 'iuml':'i',
+'ograve':'o', 'oacute':'o', 'ocirc':'o', 'otilde':'o', 'ouml':'o', 
+'ugrave':'u', 'uacute':'u', 'ucirc':'u', 'uuml':'u'}
+
+unifiable_n = {}
+
+for k in unifiable.keys():
+    unifiable_n[name2cp(k)] = unifiable[k]
+
+def charref(name):
+    if name[0] in ['x','X']:
+        c = int(name[1:], 16)
+    else:
+        c = int(name)
+    
+    if not UNICODE_SNOB and c in unifiable_n.keys():
+        return unifiable_n[c]
+    else:
+        return unichr(c)
+
+def entityref(c):
+    if not UNICODE_SNOB and c in unifiable.keys():
+        return unifiable[c]
+    else:
+        try: name2cp(c)
+        except KeyError: return "&" + c
+        else: return unichr(name2cp(c))
+
+def replaceEntities(s):
+    s = s.group(1)
+    if s[0] == "#": 
+        return charref(s[1:])
+    else: return entityref(s)
+
+r_unescape = re.compile(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));")
+def unescape(s):
+    return r_unescape.sub(replaceEntities, s)
+    
+def fixattrs(attrs):
+    # Fix bug in sgmllib.py
+    if not attrs: return attrs
+    newattrs = []
+    for attr in attrs:
+        newattrs.append((attr[0], unescape(attr[1])))
+    return newattrs
+
+### End Entity Nonsense ###
+
+def onlywhite(line):
+    """Return true if the line does only consist of whitespace characters."""
+    for c in line:
+        if c is not ' ' and c is not '    ':
+            return c is ' '
+    return line
+
+def optwrap(text):
+    """Wrap all paragraphs in the provided text."""
+    if not BODY_WIDTH:
+        return text
+    
+    assert wrap # Requires Python 2.3.
+    result = ''
+    newlines = 0
+    for para in text.split("\n"):
+        if len(para) > 0:
+            if para[0] is not ' ' and para[0] is not '-' and para[0] is not '*':
+                for line in wrap(para, BODY_WIDTH):
+                    result += line + "\n"
+                result += "\n"
+                newlines = 2
+            else:
+                if not onlywhite(para):
+                    result += para + "\n"
+                    newlines = 1
+        else:
+            if newlines < 2:
+                result += "\n"
+                newlines += 1
+    return result
+
+TITLE_UNDERLININGS = "$=-~:."
+
+def hn(tag):
+    if tag[0] == 'h' and len(tag) == 2:
+        try:
+            n = int(tag[1])
+            if n in range(1, 10): return n
+        except ValueError: return 0
+
+class _html2text(sgmllib.SGMLParser):
+    def __init__(self, out=sys.stdout.write):
+        sgmllib.SGMLParser.__init__(self)
+        
+        if out is None: self.out = self.outtextf
+        else: self.out = out
+        self.outtext = u''
+        self.quiet = 0
+        self.p_p = 0
+        self.outcount = 0
+        self.start = 1
+        self.space = 0
+        self.a = []
+        self.astack = []
+        self.acount = 0
+        self.list = []
+        self.blockquote = 0
+        self.pre = 0
+        self.startpre = 0
+        self.lastWasNL = 0
+        self.indentation = 0
+        self.indentation_stack = []
+        """required to keep indent after unbalanced <li>s"""
+    
+    def outtextf(self, s): 
+        if type(s) is type(''): s = codecs.utf_8_decode(s)[0]
+        self.outtext += s
+    
+    def close(self):
+        sgmllib.SGMLParser.close(self)
+        
+        self.pbr()
+        self.o('', 0, 'end')
+        
+        return self.outtext
+        
+    def handle_charref(self, c):
+        self.o(charref(c))
+
+    def handle_entityref(self, c):
+        self.o(entityref(c))
+            
+    def unknown_starttag(self, tag, attrs):
+        self.handle_tag(tag, attrs, 1)
+    
+    def unknown_endtag(self, tag):
+        self.handle_tag(tag, None, 0)
+        
+    def previousIndex(self, attrs):
+         """ returns the index of certain set of attributes (of a link) in the
+             self.a list
+ 
+             If the set of attributes is not found, returns None
+         """
+         if not attrs.has_key('href'): return None
+         
+         i = -1
+         for a in self.a:
+             i += 1
+             match = 0
+             
+             if a.has_key('href') and a['href'] == attrs['href']:
+                 if a.has_key('title') or attrs.has_key('title'):
+                         if (a.has_key('title') and attrs.has_key('title') and
+                            a['title'] == attrs['title']):
+                             match = True
+                 else:
+                     match = True
+
+             if match: return i
+
+    def handle_tag(self, tag, attrs, start):
+        attrs = fixattrs(attrs)
+    
+        if hn(tag):
+            #~ if start: self.o(hn(tag)*"#" + ' ')
+            if start:
+                self.o('\n')
+            else:
+                self.o('\n' + TITLE_UNDERLININGS[hn(tag)]*len(self.last_output))
+                self.p()
+
+        if tag in ['p', 'div']: self.p()
+        
+        if tag == "br" and start: self.o("  \n")
+
+        if tag == "hr" and start:
+            self.p()
+            self.o("--------")
+            self.p()
+
+        if tag in ["head", "style", 'script']: 
+            if start:
+                self.quiet += 1
+            else:
+                self.quiet -= 1
+        
+        if tag == "blockquote":
+            if start: 
+                self.p(); self.o('> ', 0, 1); self.start = 1
+                self.blockquote += 1
+            else:
+                self.blockquote -= 1
+                self.p()
+        
+        if tag in ['em', 'i', 'u']:
+            self.o("*")
+        if tag in ['strong', 'b']:
+            self.o("**")
+        if tag in ['code', 'tt'] and not self.pre:
+            self.o('``') #TODO: `` `this` ``
+        
+        if tag == "a":
+            if start:
+                attrsD = {}
+                for (x, y) in attrs: attrsD[x] = y
+                attrs = attrsD
+                if attrs.has_key('href'):
+                    self.astack.append(attrs)
+                    self.o("`")
+                else:
+                    self.astack.append(None)
+            else:
+                if self.astack:
+                    a = self.astack.pop()
+                    if a:
+                        i = self.previousIndex(a)
+                        if i is not None:
+                            a = self.a[i]
+                        else:
+                            self.acount += 1
+                            a['count'] = self.acount
+                            a['outcount'] = self.outcount
+                            a['data'] = self.last_output.replace('\n', ' ')
+                            self.a.append(a)
+                        self.o("`_")
+        
+        if tag == "img" and start:
+            attrsD = {}
+            for (x, y) in attrs: attrsD[x] = y
+            attrs = attrsD
+            if attrs.has_key('src'):
+                #~ attrs['href'] = attrs['src']
+                alt = attrs.get('alt', '')
+                #~ i = self.previousIndex(attrs)
+                #~ if i is not None:
+                    #~ attrs = self.a[i]
+                #~ else:
+                    #~ self.acount += 1
+                    #~ attrs['count'] = self.acount
+                    #~ attrs['outcount'] = self.outcount
+                    #~ attrs['data'] = attrs['src']
+                    #~ self.a.append(attrs)
+                self.o(".. image:: %s\n" % attrs['src'])
+                if alt:
+                    self.o("    :alt: %s\n" % alt)
+                #~ self.o("["+ repr(attrs['count']) +"]")
+        
+        if tag in ["ol", "ul"]:
+            if start:
+                self.list.append({'name':tag, 'num':0})
+                self.indentation_stack.append(self.indentation)
+            else:
+                if self.list: self.list.pop()
+                self.indentation = self.indentation_stack.pop()
+            self.p()
+        
+        if tag == 'li':
+            if start:
+                if self.indentation_stack:
+                    self.indentation = self.indentation_stack[-1]
+                self.pbr()
+                if self.list: li = self.list[-1]
+                else: li = {'name':'ul', 'num':0}
+                #~ self.o("  "*len(self.list)) #TODO: line up <ol><li>s > 9 correctly.
+                if li['name'] == "ul": self.o("-   ")
+                elif li['name'] == "ol":
+                    li['num'] += 1
+                    self.o('%-4s' % ('%d.' % li['num']))
+                self.start = 1
+                self.indentation += 1
+            else:
+                self.indentation -= 1
+                self.pbr()
+        
+        if tag in ['tr']: self.pbr()
+        
+        if tag == "pre":
+            if start:
+                self.o('::\n')
+                self.startpre = 1
+                self.pre = 1
+            else:
+                self.pre = 0
+            #~ self.p()
+            
+    def pbr(self):
+        if self.p_p == 0: self.p_p = 1
+
+    def p(self): self.p_p = 2
+    
+    def o(self, data, puredata=0, force=0):
+        if not self.quiet: 
+            if puredata and not self.pre:
+                data = re.sub(r'\s+', ' ', data)
+                if data and data[0] == ' ':
+                    self.space = 1
+                    data = data[1:]
+            if not data and not force: return
+            
+            if self.startpre:
+                #self.out(" :") #TODO: not output when already one there
+                self.startpre = 0
+            
+            bq = (">" * self.blockquote)
+            if not (force and data and data[0] == ">") and self.blockquote: bq += " "
+            bq += '    '*self.indentation
+            
+            if self.pre:
+                bq += "    "
+                data = data.replace("\n", "\n"+bq)
+            
+            if self.start:
+                self.space = 0
+                self.p_p = 0
+                self.start = 0
+
+            if force == 'end':
+                # It's the end.
+                self.p_p = 0
+                self.out("\n")
+                self.space = 0
+
+            if self.p_p:
+                self.out(('\n'+bq)*self.p_p)
+                self.space = 0
+                
+            if self.space:
+                if not self.lastWasNL: self.out(' ')
+                self.space = 0
+
+            if self.a and ((self.p_p == 2 and LINKS_EACH_PARAGRAPH) or force == "end"):
+                if force == "end": self.out("\n")
+
+                newa = []
+                for link in self.a:
+                    if self.outcount > link['outcount']:
+                        self.out(".. _"+link['data']+": " + link['href']) #TODO: base href
+                        if link.has_key('title'): self.out(" ("+link['title']+")")
+                        self.out("\n")
+                    else:
+                        newa.append(link)
+
+                if self.a != newa: self.out("\n") # Don't need an extra line when nothing was done.
+
+                self.a = newa
+
+            self.p_p = 0
+            self.out(data)
+            self.lastWasNL = data and data[-1] == '\n'
+            self.outcount += 1
+
+    def handle_data(self, data):
+        self.last_output = data
+        self.o(data, 1)
+    
+    def unknown_decl(self, data): pass
+
+def html2text_file(html, out=sys.stdout.write):
+    h = _html2text(out)
+    h.feed(html)
+    h.feed("")
+    return h.close()
+
+def html2text(html):
+    return optwrap(html2text_file(html, None))
+
+import textwrap
+def optwrap(text):
+    """smart wrapping, keeping indents"""
+    output = []
+    iii = re.compile(r'(\.\. )|(\d+.)|(-   )')
+    for p in text.splitlines():
+        indent = 0
+        while indent < len(p) and p[indent] == ' ':
+            indent += 1
+        j = '\n'
+        un = p[indent:]
+        if iii.match(p):
+            j += '    '
+        j += ' '*indent
+        output.append(j.join(textwrap.wrap(p, width=78-len(j))))
+    return '\n'.join(output)
+
+if __name__ == "__main__":
+    if sys.argv[1:]:
+        arg = sys.argv[1]
+        if arg.startswith('http://'):
+            data = urllib.urlopen(arg).read()
+        else:
+            data = open(arg, 'r').read()
+    else:
+        data = sys.stdin.read()
+    #~ html2text_file(data)
+    rest = optwrap(html2text_file(data, None))
+    sys.stdout.write(rest.encode('ascii', 'replace'))
+#!/usr/bin/env python
+
+import os
+import sys
+import shutil
+
+from lxml import etree
+from lxml import html
+import html2rest
+import html2rst
+
+#ROOTDIR = sys.argv[1]
+#SOURCEDIR = sys.argv[2]
+ROOTDIR = "downloads"
+SOURCEDIR = "source"
+
+def mkdir(path):
+    _path = path.split(os.sep)
+    postion = len(_path)
+    while not os.path.exists(path):
+        try:
+            os.mkdir(os.sep.join(_path[0:postion]))
+        except OSError:
+            postion = postion - 1
+        else:
+            postion = postion + 1
+
+def main():
+    for root, dirs, files in os.walk(ROOTDIR):
+        if not root.endswith("/doc"):
+            continue
+        for srcdir, dirs, files in os.walk(root):
+            if os.sep in srcdir:
+                dstdir = os.sep.join([SOURCEDIR] + srcdir.split(os.sep)[1:])
+            if not os.path.exists(dstdir):
+                #mkdir(dstdir)
+                os.makedirs(dstdir)
+            for _file in files:
+                src, dst = map(lambda x:os.sep.join([x, _file]), (srcdir, dstdir))
+                base = src[len(ROOTDIR)+1:]
+                if src.endswith(".html"):
+                    #if "/java/" not in src:
+                        #continue
+                    head, tail = os.path.splitext(dst)
+                    dst = "".join([head, ".rst"])
+                    try:
+                        dst_file = open(dst, "w")
+                        title = " ".join((s.capitalize() for s in os.path.splitext(base)[0].replace("/","_-_").split("_")))
+                        title_line = ["="*len(title)] * 2
+                        title_line[1:1] = [title]
+                        [dst_file.write("".join((i,"\n"))) for i in title_line]
+                        dst_file.write("\n")
+                        html2rest.html2rest(
+                            html.tostring(html.fromstring(open(src).read())),
+                            dst_file)
+                    except (UnicodeEncodeError, UnicodeDecodeError, IndexError), e:
+                        print
+                        print src, e
+                    else:
+                        sys.stdout.write(".")
+                        sys.stdout.flush()
+                    finally:
+                        dst_file.close()
+                else:
+                    shutil.copyfile(src, dst)
+    return
+
+if __name__ == "__main__":
+    main()