Commits

Jason R. Coombs committed d6ce23f

Expanded IndexWriter.add_document to perform the indexing manipulations based on SegmentWriter's implementation

  • Participants
  • Parent commits ba4d416

Comments (0)

Files changed (3)

File tests/test_simple.py

 from __future__ import print_function, unicode_literals
 
+import pymongo
+
 from whoosh import fields
 from whoosh.qparser import QueryParser
 from thwump.index import Index
 
+def setup_module(mod):
+	# for now, drop previous results when running tests
+	pymongo.Connection().thwump.index.drop()
+
 def test_quick_example():
 	"""
 	Exercise the behavior using the quick example given in the Whoosh

File thwump/index.py

 
 class Index(whoosh.index.Index):
 	database_name = 'thwump'
-	collection_name = 'index'
-	def __init__(self, schema=None, connection_factory=pymongo.Connection):
+
+	def __init__(self, name='index', schema=None,
+			connection_factory=pymongo.Connection):
+		self.name = name
 		self.schema = schema
 		self.connection = connection_factory()
-		self.collection = self.connection[self.database_name][
-			self.collection_name]
+		self.collection = self.connection[self.database_name][name]
 
 	def is_empty(self):
 		return not self.doc_count()

File thwump/writing.py

+import bson
 import whoosh.writing
 
 class IndexWriter(whoosh.writing.IndexWriter):
 		return self.index.schema
 
 	def add_document(self, **fields):
-		self.index.collection.insert(fields)
+		field_names = sorted(name for name in fields.keys()
+			if not name.startswith('_') and fields[name] is not None)
+		doc_id = bson.objectid.ObjectId()
+		doc = dict(
+			_id = doc_id,
+		)
+		for field_name in field_names:
+			value = fields[field_name]
+			field = self.schema[field_name]
+
+			if field.indexed:
+				# Get the index details for the field
+				details = field.index(value)
+				if field.scoreable:
+					doc[field_name] = dict(
+						length = sum(freq for tbytes, freq, weight, vbytes
+							in details),
+						)
+				# store the details
+				for text, freq, weight, vector in details:
+					self.index.collection.posts.insert(dict(
+						doc_id = doc_id,
+						field_name=field_name,
+						text = text,
+						weight = weight,
+						vector = vector,
+						), safe=True)
+			if field.separate_spelling():
+				raise NotImplementedError()
+			if field.vector:
+				raise NotImplementedError()
+
+			stored_value = fields.get('_stored_%s' % field_name, value)
+
+			if field.stored:
+				doc_field = doc.setdefault(field_name, {})
+				doc_field['value'] = stored_value
+
+		self.index.collection.insert(doc)
 
 	def add_reader(self, reader):
 		"""
 			self.index.insert(item)
 
 	def delete_document(self, docnum, delete=True):
-		self.index.collection.remove(docnum)
+		if not delete:
+			raise NotImplementedError()
+		doc_id = next(self.index.collection.find().skip(docnum-1))['_id']
+		self.index.collection.remove(doc_id)
+		self.index.collection.posts.remove({'doc_id': doc_id})