Source

wdmmg / wdmmg / lib / loader.py

Full commit
from wdmmg import model
from wdmmg.model import Dataset, Entry, Entity, Classifier


# TODO: Rethink Loader API.
# Currently, caller has to do three jobs for each Key:
# - Retrieve or create it.
# - Wrap it in a ValueCache.
# - Pass it to Loader.
# The flexibility to do something different is not really useful.

class Loader(object):
    '''
    Represents the RAM-resident state of a process that loads an OLAP-like
    data set into the store. The data set is assumed to be too big to fit in
    RAM, so it is streamed in and simultaneously written out to the database.
    
    The intended usage is something like this:
    
        my_loader = Loader('my_dataset', **other_attributes)
        for row in fetch_my_data():
            entry = my_loader.create_entry(row.amount, **other_attributes))
            # Optionally do stuff to `entry` here.
        my_loader.compute_aggregates()
    '''
    
    def __init__(self, dataset_name, label, description=u'', metadata=None, \
                 currency=u'gbp'):
        '''
        Constructs a Loader for a new Dataset `dataset_name`. (Raises an exception
        if a Dataset already exists with that name). Calling the constructor 
        creates the Dataset object.
        
        dataset_name - the unique name of the Dataset.
        
        axes - a list of Keys which will be used to classify spending.
        
        description (optional) - the `description` to use when creating the Dataset.
        
        '''
        assert isinstance(dataset_name, unicode)
        assert not Dataset.find_one({'name': dataset_name}),\
            "Dataset '%s' already loaded" % dataset_name
        
        # Create dataset.
        dataset = Dataset(name=dataset_name, label=label, 
                          currency=currency, description=description)
        if metadata is not None:
            dataset.update(dataset)
        dataset.id = Dataset.c.insert(dataset)
        self.dataset = dataset
     

    def create_entry(self, amount, currency=None, **kwargs):
        '''
        Creates a Entry record and associated ClassificationItems.
        
        amount - the amount spent.

        currency - null by default.
        
        values - a dictionary of data.
        '''
        assert isinstance(amount, float)
        if currency is None: 
            currency = self.dataset.currency
        
        entry = Entry(dataset=self.dataset.to_ref_dict(), 
                      amount=amount,
                      currency=currency)
        entry.update(kwargs)
        entry.id = Entry.c.insert(entry)
        return entry
    
    
    def create_entity(self, name, label=u'', description=u'', **kwargs):
        entity = Entity.find_one({'name': name})
        kwargs.update({'name': name, 
                       'label': label,
                       'description': description})
        if entity is None:
            entity = Entity(**kwargs)
        else:
            entity.update(kwargs)
        Entity.c.update({'name': entity.name}, entity, upsert=True)
        return Entity.find_one({'name': name})

    
    def create_classifier(self, name, label=u'', description=u'', **kwargs):
        classifier = Classifier.find_one({'name': name})
        kwargs.update({'name': name, 
                      'label': label,
                      'description': description})
        if classifier is None:
            classifier = Classifier(**kwargs)
        else:
            classifier.update(kwargs)
        Classifier.c.update({'name': classifier.name}, classifier, upsert=True)
        return Classifier.find_one({'name': name})
    
    
    def compute_aggregates(self):
        # TODO.
        pass