Matt Chaput avatar Matt Chaput committed de92149

Changed W3 codec to save columns in separate files.
This avoids the common case of a compound file inside a compound file.

Comments (0)

Files changed (2)


     def supports_columns(self):
         return True
+    @classmethod
+    def column_filename(cls, segment, fieldname):
+        ext = "".join((".", fieldname, ".", cls.COLUMN_EXT))
+        return segment.make_filename(ext)
     # Segments and generations
     def new_segment(self, storage, indexname):
         self._indoc = False
+    def _column_filename(self, fieldname):
+        return W3Codec.column_filename(self._segment, fieldname)
     def close(self):
         if self._indoc is not None:
             # Called close without calling finish_doc
         # Finish open columns and close the columns writer
         for writer in self._colwriters.values():
-        colfile = self._create_file(W3Codec.COLUMN_EXT)
-        self._cols.save_as_compound(colfile)
+        self._cols.save_as_files(self._storage, self._column_filename)
         # If vectors were written, close the vector writers
         if self._vpostfile:
         self._segment = segment
         self._doccount = segment.doc_count_all()
-        self._colfile = segment.open_file(storage, W3Codec.COLUMN_EXT)
-        self._cols = compound.CompoundStorage(self._colfile, use_mmap=False)
         self._vpostfile = None
+        self._colfiles = {}
         self._readers = {}
         self._minlengths = {}
         self._maxlengths = {}
     def close(self):
-        self._cols.close()
+        for colfile, _, _ in self._colfiles.values():
+            colfile.close()
         if self._vpostfile:
     # Columns
     def has_column(self, fieldname):
-        return self._cols.file_exists(fieldname)
+        filename = W3Codec.column_filename(self._segment, fieldname)
+        return self._storage.file_exists(filename)
+    def _get_column_file(self, fieldname):
+        filename = W3Codec.column_filename(self._segment, fieldname)
+        length = self._storage.file_length(filename)
+        colfile = self._storage.open_file(filename)
+        return colfile, 0, length
     def column_reader(self, fieldname, column):
-        offset, length = self._cols.range(fieldname)
-        return column.reader(self._colfile, offset, length, self._doccount)
+        if fieldname not in self._colfiles:
+            self._colfiles[fieldname] = self._get_column_file(fieldname)
+        colfile, offset, length = self._colfiles[fieldname]
+        return column.reader(colfile, offset, length, self._doccount)
     # Lengths


     def _segment_pattern(cls, indexname):
-        return re.compile("(%s_[0-9a-z]+)[.][a-z]+" % indexname)
+        return re.compile("(%s_[0-9a-z]+)[.][A-Za-z0-9_.]+" % indexname)
     def _latest_generation(cls, storage, indexname):
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.