Matt Chaput avatar Matt Chaput committed 5413eb3

Randomized complex queries. Simplified Whoosh code. Added guards around XODB usage.

Comments (0)

Files changed (1)

         DOCS.append(doc)
 
 
+def generate_complex_query():
+    query_dict = {"two": unicode(randint(0, 1)),
+                  "four": unicode(randint(0, 3)),
+                  "eight": unicode(randint(0, 7))}
+    return query_dict
+
+
 class Bench(object):
     def __init__(self, index_dir):
         self.index_dir = index_dir
     def bench_all(self):
         print "Benchmarking: %s" % self.NAME
         t_index = self.bench(self.create_index)
-        print "Indexing takes %.1fs (%.1f/s)" % (t_index, DOC_COUNT/t_index)
+        print "Indexing takes %.1fs (%.1f/s)" % (t_index, DOC_COUNT / t_index)
         t_search = self.bench(self.search)
-        print "Searching takes %.1fs (%.1f/s)" % (t_search, DOC_COUNT/t_search)
+        print "Searching takes %.1fs (%.1f/s)" % (t_search, DOC_COUNT / t_search)
         t_search_complex = self.bench(self.search_complex)
-        print "Complex Searching takes %.1fs (%.1f/s)" % (t_search_complex, COMPLEX_COUNT/t_search_complex)
+        print "Complex Searching takes %.1fs (%.1f/s)" % (t_search_complex, COMPLEX_COUNT / t_search_complex)
         print
         self.remove_index()
 
     import whoosh
     from whoosh.fields import Schema, ID, NUMERIC
     from whoosh.index import open_dir, create_in
-    from whoosh.filedb.multiproc import MultiSegmentWriter
     from whoosh.query import Term, And
 except ImportError:
     whoosh = None
 if whoosh:
     class Whoosh(Bench):
         NAME = 'whoosh %d.%d.%d' % whoosh.__version__
-        USE_MULTIPROCESSING = True
+        LIMITMB = 256
+        PROCESSORS = 2
+        MULTISEGMENT = True
 
         def create_index(self):
             fields = {}
             for field in ['word', 'two', 'four', 'eight', ] + EXTRA_FIELDS:
                 fields[field] = ID(stored=True)
             schema = Schema(**fields)
-            os.mkdir(self.index_dir)
+
+            if not os.path.exists(self.index_dir):
+                os.mkdir(self.index_dir)
             ix = create_in(self.index_dir, schema)
-            if self.USE_MULTIPROCESSING:
-                writer = MultiSegmentWriter(ix, limitmb=128)
-            else:
-                writer = ix.writer(limitmb=256)
-            with writer as writer:
+
+            mb = self.LIMITMB // self.PROCESSORS
+            procs = self.PROCESSORS
+            ms = self.MULTISEGMENT
+            with ix.writer(limitmb=mb, procs=procs, multisegment=ms) as writer:
                 for doc in self.make_docs():
                     writer.add_document(**doc)
             ix.close()
                              Term('four', '2'),
                              Term('eight', '3')])
                 for i in xrange(COMPLEX_COUNT):
+                    query_dict = generate_complex_query()
+                    query = And([Term(fieldname, text) for fieldname, text
+                                 in query_dict.items()]).normalize()
                     results = searcher.search(query, limit=10)
                     for result in results:
                         # make sure to really read the stored fields
 if xapian:
     class Xappy(Bench):
         NAME = 'xappy %s / xapian %d.%d.%d' % (xappy.__version__,
-                                               xapian.major_version(), xapian.minor_version(), xapian.revision(), )
+                                               xapian.major_version(), xapian.minor_version(), xapian.revision(),)
 
         def create_index(self):
             iconn = IndexerConnection(self.index_dir)
 
         def search_complex(self):
             sconn = SearchConnection(self.index_dir)
-            terms = [
-                sconn.query_field('two', '1'),
-                sconn.query_field('four', '2'),
-                sconn.query_field('eight', '3'),
-            ]
-            query = Query(Query.OP_AND, terms)
             for i in xrange(COMPLEX_COUNT):
+                query_dict = generate_complex_query()
+                terms = [sconn.query_field(fieldname, text)
+                         for fieldname, text in query_dict.items()]
+                query = Query(Query.OP_AND, terms)
+
                 results = sconn.search(query, 0, 10)
                 for result in results:
                     # make sure to really read the stored fields
             sconn.close()
 
 
-from xodb import Array, Integer, Schema as XODBSchema, String, open as xodb_open
-
-
 class Benchmark(object):
 
     def __init__(self, word, two, four, eight, **extra_fields):
             setattr(self, field, value)
 
 
-class BenchmarkSchema(XODBSchema):
-    word = String.using(default='en')
-    two = String.using(default='en')
-    four = String.using(default='en')
-    eight = String.using(default='en')
+xodb = None
+if xapian:
+    try:
+        import xodb
+    except ImportError:
+        pass
 
-for field in EXTRA_FIELDS:
-    setattr(BenchmarkSchema, field, String.using(default='en'))
+if xodb:
+    class BenchmarkSchema(xodb.XODBSchema):
+        word = xodb.String.using(default='en')
+        two = xodb.String.using(default='en')
+        four = xodb.String.using(default='en')
+        eight = xodb.String.using(default='en')
 
+    for field in EXTRA_FIELDS:
+        setattr(BenchmarkSchema, field, xodb.String.using(default='en'))
 
-class XODB(Bench):
-    NAME = 'xodb %s / xapian %s' % ('0.4.17', xapian.version_string())
 
-    def create_index(self):
-        db = xodb_open(self.index_dir)
-        db.map(Benchmark, BenchmarkSchema)
+    class XODB(Bench):
+        NAME = 'xodb %s / xapian %s' % ('0.4.17', xapian.version_string())
 
-        for doc in self.make_docs():
-            db.add(Benchmark(**doc))
-        db.flush()
-        db.close()
+        def create_index(self):
+            db = xodb.open(self.index_dir)
+            db.map(Benchmark, BenchmarkSchema)
 
-    def search(self):
-        db = xodb_open(self.index_dir)
-        for word in SHUFFLED_WORDS:
-            results = db.query('word:%s' % word, limit=1)
-            for res in results:
-                dummy = repr(res)
-        db.close()
+            for doc in self.make_docs():
+                db.add(Benchmark(**doc))
+            db.flush()
+            db.close()
 
-    def search_complex(self):
-        db = xodb_open(self.index_dir)
-        query = "two:1 AND four:2 AND eight:3"
-        for i in xrange(COMPLEX_COUNT):
-            results = db.query(query, limit=10)
-            for result in results:
-                # make sure to really read the stored fields
-                dummy = repr(result)
-        db.close()
+        def search(self):
+            db = xodb.open(self.index_dir)
+            for word in SHUFFLED_WORDS:
+                results = db.query('word:%s' % word, limit=1)
+                for res in results:
+                    dummy = repr(res)
+            db.close()
+
+        def search_complex(self):
+            db = xodb.open(self.index_dir)
+            for i in xrange(COMPLEX_COUNT):
+                query_dict = generate_complex_query()
+                query = " AND ".join("%s:%s" % (fieldname, text)
+                                     for fieldname, text in query_dict.items())
+                results = db.query(query, limit=10)
+                for result in results:
+                    # make sure to really read the stored fields
+                    dummy = repr(result)
+            db.close()
 
 
 if __name__ == '__main__':
 
     if xapian:
         Xappy('xapian_ix').bench_all()
-        XODB('xapian_ix').bench_all()
+    if xodb:
+        XODB('xodb_ix').bench_all()
     if whoosh:
         Whoosh('whoosh_ix').bench_all()
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.