Commits

Georg Brandl committed 2d7cb56

Issue #1067: in search index generation, record words in section titles in their own set

Comments (0)

Files changed (2)

sphinx/environment.py

 
 # This is increased every time an environment attribute is added
 # or changed to properly invalidate pickle files.
-ENV_VERSION = 41
+ENV_VERSION = 42
 
 
 default_substitutions = set([

sphinx/search/__init__.py

     :license: BSD, see LICENSE for details.
 """
 import re
+import itertools
 import cPickle as pickle
 
-from docutils.nodes import comment, Text, NodeVisitor, SkipNode
+from docutils.nodes import comment, title, Text, NodeVisitor, SkipNode
 
 from sphinx.util import jsdump, rpartition
 
             (ord(word[0]) < 256 and (len(word) < 3 or word in self.stopwords or
                                      word.isdigit())))
 
+
 from sphinx.search import en, ja
 
 languages = {
     def __init__(self, document, lang):
         NodeVisitor.__init__(self, document)
         self.found_words = []
+        self.found_title_words = []
         self.lang = lang
 
     def dispatch_visit(self, node):
         if node.__class__ is comment:
             raise SkipNode
-        if node.__class__ is Text:
+        elif node.__class__ is Text:
             self.found_words.extend(self.lang.split(node.astext()))
+        elif node.__class__ is title:
+            self.found_title_words.extend(self.lang.split(node.astext()))
 
 
 class IndexBuilder(object):
         self._titles = {}
         # stemmed word -> set(filenames)
         self._mapping = {}
+        # stemmed words in titles -> set(filenames)
+        self._title_mapping = {}
         # objtype -> index
         self._objtypes = {}
         # objtype index -> (domain, type, objname (localized))
             raise ValueError('old format')
         index2fn = frozen['filenames']
         self._titles = dict(zip(index2fn, frozen['titles']))
-        self._mapping = {}
-        for k, v in frozen['terms'].iteritems():
-            if isinstance(v, int):
-                self._mapping[k] = set([index2fn[v]])
-            else:
-                self._mapping[k] = set(index2fn[i] for i in v)
+
+        def load_terms(mapping):
+            rv = {}
+            for k, v in mapping.iteritems():
+                if isinstance(v, int):
+                    rv[k] = set([index2fn[v]])
+                else:
+                    rv[k] = set(index2fn[i] for i in v)
+            return rv
+
+        self._mapping = load_terms(frozen['terms'])
+        self._title_mapping = load_terms(frozen['section_terms'])
         # no need to load keywords/objtypes
 
     def dump(self, stream, format):
         return rv
 
     def get_terms(self, fn2index):
-        rv = {}
-        for k, v in self._mapping.iteritems():
-            if len(v) == 1:
-                fn, = v
-                if fn in fn2index:
-                    rv[k] = fn2index[fn]
-            else:
-                rv[k] = [fn2index[fn] for fn in v if fn in fn2index]
-        return rv
+        rvs = {}, {}
+        for rv, mapping in zip(rvs, (self._mapping, self._title_mapping)):
+            for k, v in mapping.iteritems():
+                if len(v) == 1:
+                    fn, = v
+                    if fn in fn2index:
+                        rv[k] = fn2index[fn]
+                else:
+                    rv[k] = [fn2index[fn] for fn in v if fn in fn2index]
+        return rvs
 
     def freeze(self):
         """Create a usable data structure for serializing."""
         filenames = self._titles.keys()
         titles = self._titles.values()
         fn2index = dict((f, i) for (i, f) in enumerate(filenames))
-        terms = self.get_terms(fn2index)
+        terms, title_terms = self.get_terms(fn2index)
+
         objects = self.get_objects(fn2index)  # populates _objtypes
         objtypes = dict((v, k[0] + ':' + k[1])
                         for (k, v) in self._objtypes.iteritems())
         objnames = self._objnames
         return dict(filenames=filenames, titles=titles, terms=terms,
-                    objects=objects, objtypes=objtypes, objnames=objnames)
+                    objects=objects, objtypes=objtypes, objnames=objnames,
+                    titleterms=title_terms)
 
     def prune(self, filenames):
         """Remove data for all filenames not in the list."""
         self._titles = new_titles
         for wordnames in self._mapping.itervalues():
             wordnames.intersection_update(filenames)
+        for wordnames in self._title_mapping.itervalues():
+            wordnames.intersection_update(filenames)
 
     def feed(self, filename, title, doctree):
         """Feed a doctree to the index."""
         visitor = WordCollector(doctree, self.lang)
         doctree.walk(visitor)
 
-        def add_term(word, stem=self.lang.stem):
+        stem = self.lang.stem
+        filter =  self.lang.word_filter
+
+        for word in itertools.chain(visitor.found_title_words,
+                                    self.lang.split(title)):
             word = stem(word)
-            if self.lang.word_filter(word):
-                self._mapping.setdefault(word, set()).add(filename)
-
-        for word in self.lang.split(title):
-            add_term(word)
+            if filter(word):
+                self._title_mapping.setdefault(word, set()).add(filename)
 
         for word in visitor.found_words:
-            add_term(word)
+            word = stem(word)
+            if word not in self._title_mapping and filter(word):
+                self._mapping.setdefault(word, set()).add(filename)
 
     def context_for_searchtool(self):
         return dict(
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.