Commits

Anonymous committed 23f5ada

[search/xapian][s]: get working the build of xapian search index and use in wui (rather confusingly at api/search atm!).

Comments (0)

Files changed (4)

         else:
             print 'Command not recognized'
 
+
 class Xapian(Command):
     '''Everything related to xapian search index
 
     usage = __doc__
     max_args = None
     min_args = 1
-    import pdw.searchxapian as sx
-
-    def get_index(self):
-        self.index = sx.SearchIndex()
-
 
     def command(self):
         self._load_config()
         cmd = self.args[0]
-        if len(self.args) > 1:
-            scaler = int(self.args[1])
-        else:
-            scaler = 1
+        import pdw.searchxapian as sx
+        self.index = sx.SearchIndex.default_index()
         if cmd == 'create':
             print 'creating database, or using existing'
-            self.get_index()
             print 'indexing all works'
             self.index.index_all_works()
             print 'indexing all people'
             self.index.index_all_persons()
-
-        if cmd == 'all_works':
+        elif cmd == 'all_works':
             print 'indexing all people'
-            self.get_index()
             self.index.index_all_works()
-
-        if cmd == 'all_people':
+        elif cmd == 'all_people':
             print 'indexing all people'
-            self.get_index()
             self.index.index_all_persons()
-
         else:
             print 'Command not recognized'
 

pdw/controllers/api.py

             result = json.dumps({ "error": "not enough information provided. Please check documentation at http://wiki.okfn.org/PublicDomainCalculators/Api "})
             return result
             #return render('api/example')
-        
+
+    def search(self):
+        '''Xapian based search.
+        '''
+        index = searchxapian.SearchIndex.default_index()
+        c.q = request.params.get('q', '')
+        if c.q:
+            c.had_query = True
+            c.result_list, c.size = index.result_list(c.q)
+        return render('api/search')
 
     def search2(self):
-        '''copied from the work controller...
-        it gives back the results in order, but 
-        first books, then persons, then items.
-        it consults the postgres database.'''
-        #raise NotImplementedError('Still working on this')
-        index = searchxapian.SearchIndex()
+        '''Search using the Xapian search index.
+
+        Copied from the work controller. It gives back the results in order,
+        but first books, then persons, then items.
+        '''
+        index = searchxapian.SearchIndex.default_index()
 
         q = request.params.get('q', '')
         c.q = q
 
         return render('api/search')
 
-    def search(self):
-        '''new version of the search, with 
-        mixed results...
-        doesn't make use of the postgres database:
-        gets all its results from the xapian db 
-        (could get outdated!)'''
-        index = searchxapian.SearchIndex()
-
-        q = request.params.get('q', '')
-        c.q = q
-        if q:
-
-            c.had_query = True
-
-            c.result_list, c.size = index.result_list(q)
-        return render('api/search')

pdw/searchxapian.py

     pass
 
 class SearchIndex(object):
-    def __init__(self, index_dir='xapian/'):
+    def __init__(self, index_dir):
         self.index_dir = index_dir
         self.xapiandb = xapian.WritableDatabase(index_dir, xapian.DB_CREATE_OR_OPEN)
         flintlock = os.path.join(self.index_dir, 'flintlock') 
             os.unlink(flintlock) 
         self.stemmer = xapian.Stem('en')
 
-    def refresh(self):
-        self.xapiandb.flush()
-
     @classmethod
     def config_index_dir(self):
         '''Get the search index directory specified in the config.'''
             except InvalidArgumentError:
                 print 'there has been an error with ', person.name, person.id
                 print InvalidArgumentError
-        self.refresh()
+        self.xapiandb.flush()
 
     def index_work_list(self,worklist):
         '''
         count = 0
         for work in worklist:
             if str(count).endswith('000'):
-                self.refresh()
+                self.xapiandb.flush()
 
             count += 1
             #print count
                 work_doc = self.index_work(work)
             except InvalidArgumentError:
                 pass
-        self.refresh()
+        self.xapiandb.flush()
 
     def index_work(self,work=None):
         """Gets a work object (model.Work()) and returns
                          'pd': pd,
                         }
             except:
-                self.refresh()
+                self.xapiandb.flush()
 
 
             out_list.append(match)
         return out_list, results.size()
 
-
-
-
     def index_range(self,start,end=None):
         '''index a number of records from the database.
 
 
         worklist = pdw.model.Session.query(pdw.model.Work)[start:end].all()
         self.index_work_list(worklist)
-        self.refresh()
-        pdw.model.Session.close()
+        self.xapiandb.flush()
+        pdw.model.Session.remove()
 
-    def index_all_works(self,start=0.0,model='Work'):
+    def index_all_works(self, start=0, model='Work'):
         '''
         takes chunks of 500 works  from the database
         and reindexes them, along with their appended 
         for indexing all records starting from the 35000 till the end
         '''
         end = float(pdw.model.Work.query.count())
+        end = 1000.0
         
         stop = start
-        while stop<end: 
-            print stop
-
+        while stop < end: 
+            print('%s (%s%%)') % (stop, stop/end*100)
             if stop+500>end:
                 self.index_range(stop,end)
                 stop = end
             else:
                 self.index_range(stop,stop+500)
                 stop +=500
-
-                print stop/end*100, " %"
-                self.refresh()
-
+                self.xapiandb.flush()
 
     def index_all_persons(self,start=0,end=None):
         ''' with this command we can index all the remaining persons
                 stop +=500
 
                 print stop/end*100, " %"
-                self.refresh()
+                self.xapiandb.flush()
         
 
 
             self.index_person(person)
             if str(index).endswith('00'):
 
-                self.refresh()
-        pdw.model.Session.close()
+                self.xapiandb.flush()
+        pdw.model.Session.remove()
 
 
 
     stats = pdw.cli:Stats
     consolidate = pdw.cli:Consolidate
     analyze = pdw.cli:Analyze
-    pd = pdw.cli:Pd
     search_index = pdw.cli:Xapian
     """,
 )