Anonymous avatar Anonymous committed 1b554dc

Fix searching and search index creation for incremental builds.

Comments (0)

Files changed (6)

sphinx/__init__.py

     print >>sys.stderr, """\
 usage: %s [options] sourcedir outdir [filenames...]"
 options: -b <builder> -- builder to use (one of %s)
-         -a -- write all files; default is to only write new and changed files
-         -d <path> -- path for the cached doctree files (default outdir/.doctrees)
+         -a        -- write all files; default is to only write new and changed files
+         -E        -- don't use a saved environment, always read all files
+         -d <path> -- path for the cached environment and doctree files
+                      (default outdir/.doctrees)
          -O <option[=value]> -- give option to to the builder (-O help for list)
          -D <setting=value> -- override a setting in sourcedir/conf.py
-         -N -- do not do colored output
+         -N        -- do not do colored output
 modi:
 * without -a and without filenames, write new and changed files.
 * with -a, write all files.
 
 def main(argv):
     try:
-        opts, args = getopt.getopt(argv[1:], 'ab:d:O:D:N')
+        opts, args = getopt.getopt(argv[1:], 'ab:d:O:D:NE')
         srcdirname = path.abspath(args[0])
         if not path.isdir(srcdirname):
             print >>sys.stderr, 'Error: Cannot find source directory.'
         return 1
 
     builder = all_files = None
-    opt_help = False
+    opt_help = freshenv = False
     options = {}
     confoverrides = {}
     doctreedir = path.join(outdirname, '.doctrees')
             confoverrides[key] = val
         elif opt == '-N':
             nocolor()
+        elif opt == '-E':
+            freshenv = True
 
     if not sys.stdout.isatty() or sys.platform == 'win32':
         # Windows' cmd box doesn't understand ANSI sequences
     builderobj = builderobj(srcdirname, outdirname, doctreedir, options,
                             status_stream=sys.stdout,
                             warning_stream=sys.stderr,
-                            confoverrides=confoverrides)
+                            confoverrides=confoverrides,
+                            freshenv=freshenv)
     if all_files:
         builderobj.build_all()
     elif filenames:

sphinx/builder.py

     Builds target formats from the reST sources.
     """
 
-    option_spec = {
-        'freshenv': 'Don\'t use a pickled environment',
-    }
+    option_spec = {}
 
     def __init__(self, srcdirname, outdirname, doctreedirname,
                  options, confoverrides=None, env=None,
-                 status_stream=None, warning_stream=None):
+                 status_stream=None, warning_stream=None,
+                 freshenv=False):
         self.srcdir = srcdirname
         self.outdir = outdirname
         self.doctreedir = doctreedirname
         if not path.isdir(doctreedirname):
             os.mkdir(doctreedirname)
+        self.freshenv = freshenv
 
         self.options = attrdict(options)
         self.validate_options()
            successfully loaded, False if a new environment had to be created."""
         if self.env:
             return
-        if not self.options.freshenv:
+        if not self.freshenv:
             try:
                 self.msg('trying to load pickled env...', nonl=True)
                 self.env = BuildEnvironment.frompickle(
         self.msg('creating index...')
         self.env.create_index(self)
 
-        self.prepare_writing()
-
         if filenames:
             # add all TOC files that may have changed
             filenames_set = set(filenames)
             # build all
             filenames_set = set(self.env.all_files)
 
+        self.prepare_writing(filenames)
+
         # write target files
         with collect_env_warnings(self):
             self.msg('writing output...')
         self.finish()
         self.msg('done!')
 
-    def prepare_writing(self):
+    def prepare_writing(self, filenames):
         raise NotImplementedError
 
     def write_file(self, filename, doctree):
     """
     name = 'html'
 
-    option_spec = Builder.option_spec
-    option_spec.update({
-        'nostyle': 'Don\'t copy style and script files',
-        'nosearchindex': 'Don\'t create a JSON search index for offline search',
-    })
-
     copysource = True
 
     def init(self):
             settings_overrides={'output_encoding': 'unicode'}
         )
 
-    def prepare_writing(self):
-        if not self.options.nosearchindex:
-            from .search import IndexBuilder
-            self.indexer = IndexBuilder()
-        else:
-            self.indexer = None
+    def prepare_writing(self, filenames):
+        from .search import IndexBuilder
+        self.indexer = IndexBuilder()
+        self.load_indexer(filenames)
         self.docwriter = HTMLWriter(self.config)
         self.docsettings = OptionParser(
             defaults=self.env.settings,
         )
         self.handle_file('search.rst', searchcontext, 'search')
 
-        if not self.options.nostyle:
-            self.msg('copying style files...')
-            # copy style files
-            styledirname = path.join(path.dirname(__file__), 'style')
-            ensuredir(path.join(self.outdir, 'style'))
-            for filename in os.listdir(styledirname):
-                if not filename.startswith('.'):
-                    shutil.copyfile(path.join(styledirname, filename),
-                                    path.join(self.outdir, 'style', filename))
-            # add pygments style file
-            f = open(path.join(self.outdir, 'style', 'pygments.css'), 'w')
-            if pygments:
-                f.write(get_stylesheet())
-            f.close()
+        # copy style files
+        self.msg('copying style files...')
+        styledirname = path.join(path.dirname(__file__), 'style')
+        ensuredir(path.join(self.outdir, 'style'))
+        for filename in os.listdir(styledirname):
+            if not filename.startswith('.'):
+                shutil.copyfile(path.join(styledirname, filename),
+                                path.join(self.outdir, 'style', filename))
+        # add pygments style file
+        f = open(path.join(self.outdir, 'style', 'pygments.css'), 'w')
+        if pygments:
+            f.write(get_stylesheet())
+        f.close()
 
         # dump the search index
         self.handle_finish()
             if path.getmtime(path.join(self.srcdir, filename)) > targetmtime:
                 yield filename
 
+
+    def load_indexer(self, filenames):
+        try:
+            with open(path.join(self.outdir, 'searchindex.json'), 'r') as f:
+                self.indexer.load(f, 'json')
+        except (IOError, OSError):
+            pass
+        # delete all entries for files that will be rebuilt
+        self.indexer.prune(set(self.env.all_files) - set(filenames))
+
     def index_file(self, filename, doctree, title):
         # only index pages with title
         if self.indexer is not None and title:
                             path.join(self.outdir, context['sourcename']))
 
     def handle_finish(self):
-        if self.indexer is not None:
-            self.msg('dumping search index...')
-            f = open(path.join(self.outdir, 'searchindex.json'), 'w')
+        self.msg('dumping search index...')
+        self.indexer.prune([self.get_target_uri(fn)[:-5] for fn in self.env.all_files])
+        with open(path.join(self.outdir, 'searchindex.json'), 'w') as f:
             self.indexer.dump(f, 'json')
-            f.close()
 
 
 class WebHTMLBuilder(StandaloneHTMLBuilder):
     """
     name = 'web'
 
-    # doesn't use the standalone specific options
-    option_spec = Builder.option_spec.copy()
-    option_spec.update({
-        'nostyle': 'Don\'t copy style and script files',
-        'nosearchindex': 'Don\'t create a search index for the online search',
-    })
-
     def init(self):
         # Nothing to do here.
         pass
             return source_filename[:-9] # up to /
         return source_filename[:-4] + '/'
 
+    def load_indexer(self, filenames):
+        try:
+            with open(path.join(self.outdir, 'searchindex.pickle'), 'r') as f:
+                self.indexer.load(f, 'pickle')
+        except (IOError, OSError):
+            pass
+        # delete all entries for files that will be rebuilt
+        self.indexer.prune(set(self.env.all_files) - set(filenames))
+
     def index_file(self, filename, doctree, title):
         # only index pages with title and category
         if self.indexer is not None and title:
         with file(outfilename, 'wb') as fp:
             pickle.dump(self.globalcontext, fp, 2)
 
-        if self.indexer is not None:
-            self.msg('dumping search index...')
-            f = open(path.join(self.outdir, 'searchindex.pickle'), 'w')
+        self.msg('dumping search index...')
+        self.indexer.prune(self.env.all_files)
+        with open(path.join(self.outdir, 'searchindex.pickle'), 'wb') as f:
             self.indexer.dump(f, 'pickle')
-            f.close()
+
         # touch 'last build' file, used by the web application to determine
         # when to reload its environment and clear the cache
         open(path.join(self.outdir, LAST_BUILD_FILENAME), 'w').close()
     """
     name = 'htmlhelp'
 
-    option_spec = Builder.option_spec.copy()
-    option_spec.update({
+    option_spec = {
         'outname': 'Output file base name (default "pydoc")'
-    })
+    }
 
     # don't copy the reST source
     copysource = False
 from collections import defaultdict
 from docutils.nodes import Text, NodeVisitor
 from .util.stemmer import PorterStemmer
-from .util.json import dump_json
+from .util.json import dump_json, load_json
 
 
 word_re = re.compile(r'\w+(?u)')
     passed to the `feed` method.
     """
     formats = {
-        'json':     dump_json,
-        'pickle':   pickle.dumps
+        'json':     (dump_json, load_json),
+        'pickle':   (pickle.dumps, pickle.loads),
     }
 
     def __init__(self):
-        self._filenames = {}
+        self._stemmer = Stemmer()
+        # filename -> title
+        self._titles = {}
+        # stemmed word -> set(filenames)
         self._mapping = {}
-        self._titles = {}
+        # category -> set(filenames)
         self._categories = {}
-        self._stemmer = Stemmer()
+
+    def load(self, stream, format):
+        """Reconstruct from frozen data."""
+        frozen = self.formats[format][1](stream.read())
+        index2fn = frozen[0]
+        self._titles = dict(zip(frozen[0], frozen[2]))
+        self._categories = dict((k, set(index2fn[i] for i in v))
+                                for (k, v) in frozen[1].iteritems())
+        self._mapping = dict((k, set(index2fn[i] for i in v))
+                             for (k, v) in frozen[3].iteritems())
 
     def dump(self, stream, format):
-        """Dump the freezed index to a stream."""
-        stream.write(self.formats[format](self.freeze()))
+        """Dump the frozen index to a stream."""
+        stream.write(self.formats[format][0](self.freeze()))
 
     def freeze(self):
         """
         Create a useable data structure. You can pass this output
         to the `SearchFrontend` to search the index.
         """
+        fns, titles = self._titles.keys(), self._titles.values()
+        fn2index = dict((f, i) for (i, f) in enumerate(fns))
         return [
-            [k for k, v in sorted(self._filenames.items(),
-                                  key=lambda x: x[1])],
-            dict(item for item in sorted(self._categories.items(),
-                                         key=lambda x: x[0])),
-            [v for k, v in sorted(self._titles.items(),
-                                  key=lambda x: x[0])],
-            dict(item for item in sorted(self._mapping.items(),
-                                         key=lambda x: x[0])),
+            fns,
+            dict((k, [fn2index[fn] for fn in v])
+                 for (k, v) in self._categories.iteritems()),
+            titles,
+            dict((k, [fn2index[fn] for fn in v])
+                 for (k, v) in self._mapping.iteritems()),
         ]
 
+    def prune(self, filenames):
+        """Remove data for all filenames not in the list."""
+        new_titles = {}
+        for filename in filenames:
+            if filename in self._titles:
+                new_titles[filename] = self._titles[filename]
+        self._titles = new_titles
+        for wordnames in self._mapping.itervalues():
+            wordnames.intersection_update(filenames)
+        for catnames in self._categories.itervalues():
+            catnames.intersection_update(filenames)
+
     def feed(self, filename, category, title, doctree):
         """Feed a doctree to the index."""
-        file_id = self._filenames.setdefault(filename, len(self._filenames))
-        self._titles[file_id] = title
+        self._titles[filename] = title
+        self._categories.setdefault(category, set()).add(filename)
+
         visitor = WordCollector(doctree)
         doctree.walk(visitor)
-        self._categories.setdefault(category, set()).add(file_id)
         for word in word_re.findall(title) + visitor.found_words:
             self._mapping.setdefault(self._stemmer.stem(word.lower()),
-                                     set()).add(file_id)
+                                     set()).add(filename)
 
 
 class SearchFrontend(object):

sphinx/style/searchtools.js

 }
 
 $(document).ready(function() {
-        Documentation.Search.init();
+        Search.init();
     });

sphinx/templates/search.html

 {% extends "layout.html" %}
 {% set title = 'Search Documentation' %}
-{% block header %}
+{% block head %}
     <script type="text/javascript" src="{{ pathto('style/searchtools.js', 1) }}"></script>
 {% endblock %}
 {% block body %}
       ('tutorial', 'Python Tutorial', true),
       ('library', 'Library Reference', true),
       ('maclib', 'Macintosh Library Modules', false),
+      ('reference', 'Language Reference', false),
       ('extending', 'Extending and Embedding', false),
       ('c-api', 'Python/C API', false),
       ('install', 'Installing Python Modules', true),
       ('distutils', 'Distributing Python Modules', true),
       ('documenting', 'Documenting Python', false),
       ('whatsnew', 'What\'s new in Python?', false),
-      ('reference', 'Language Reference', false)
     ] -%}
       <li><input type="checkbox" name="area" id="area-{{ id }}" value="{{ id
           }}"{% if checked %} checked{% endif %}>

sphinx/util/json.py

 
 import re
 
-ESCAPE = re.compile(r'[\x00-\x19\\"\b\f\n\r\t]')
+# escape \, ", control characters and everything outside ASCII
 ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
 ESCAPE_DICT = {
     '\\': '\\\\',
     '\r': '\\r',
     '\t': '\\t',
 }
-for i in range(0x20):
-    ESCAPE_DICT.setdefault(chr(i), '\\u%04x' % (i,))
 
 
 def encode_basestring_ascii(s):
     elif isinstance(obj, basestring):
         return encode_basestring_ascii(obj)
     raise TypeError(type(obj))
+
+
+STRING = re.compile(r'("(\\\\|\\"|[^"])*")')
+
+def load_json(s):
+    d = {'null': None, 'true': True, 'false': False}
+    s = STRING.sub(r'u\1', s)
+    return eval(s, d)
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.