Source

wdmmg / wdmmg / lib / solrhelp.py

Full commit
'''Helper methods for using solr.

Called solrhelp to avoid conflict with solr package itself.
'''
from solr import SolrConnection
from pylons import config

from wdmmg import model


def solr_connection():
    solr_url = config['solr.url']
    if solr_url == 'stub':
        return stub
    else:
        return SolrConnection(solr_url, 
            http_user=config.get('solr.http_user'),
            http_pass=config.get('solr.http_pass'))

# TODO: this should move in wdmmg/tests/stub/solr.py or the like
class _Stub(object):
    '''
    Fakes the API of solrpy, to avoid needing a real instance of SOLR for
    testing.
    '''
    def __init__(self):
        self.records = []
        self.results = []

    def add_many(self, records):
        self.records = self.records + records
    
    def commit(self):
        pass
    
    def delete_query(self, q, **kwargs):
        self.records = []
        pass

    def query(self, q, **kwargs):
        if q == '*' or q == '':
            self.results = self.records
        else:
            # HACK query
            def match(query, rec):
                for v in rec.values():
                    if query in unicode(v): return True
            self.results = [ r for r in self.records if match(q,r) ]
        return self
    
    @property
    def numFound(self):
        return len(self.results)

stub = _Stub()


def drop_index(
    dataset_name=None,
    solr=None
):
    dataset_name = dataset_name or unicode(config.get('default_dataset', u'cra'))
    solr = solr or solr_connection()
    solr.delete_query('dataset:%s' % dataset_name)
    solr.commit()

SOLR_CORE_FIELDS = ['id', 'dataset', 'amount', 'time', 'location', 'from',
'to', 'notes' ]


def build_index(dataset_name=None, solr=None):
    dataset_name = dataset_name or unicode(config.get('default_dataset', u'cra'))
    solr = solr or solr_connection()
    dataset_ = model.Dataset.find_one({'name': dataset_name})
    assert dataset_, "No such dataset: %s" % dataset_name
    dataset_name = dataset_.name
    cur = model.Entry.find({'dataset.name': dataset_name})
    buf = []
    total = 0
    increment = 500
    for entry in cur:
        ourdata = entry.to_flat_dict()
        buf.append(ourdata)
        if len(buf) == increment:
            print 'Writing %d records...' % len(buf)
            solr.add_many(buf)
            solr.commit()
            total += increment
            print 'Done. (Total so far: %s)' % total
            buf = []
    solr.add_many(buf)
    solr.commit()