1. Rufus Pollock
  2. wdmmg


wdmmg / wdmmg / lib / loader.py

from wdmmg import model
from wdmmg.model import Dataset, Entry, Entity

# TODO: Rethink Loader API.
# Currently, caller has to do three jobs for each Key:
# - Retrieve or create it.
# - Wrap it in a ValueCache.
# - Pass it to Loader.
# The flexibility to do something different is not really useful.

class Loader(object):
    Represents the RAM-resident state of a process that loads an OLAP-like
    data set into the store. The data set is assumed to be too big to fit in
    RAM, so it is streamed in and simultaneously written out to the database.
    To avoid running out of RAM, the caller will probably call
    `model.Session.remove()` one or more times during the lifetime of this 
    Loader. Therefore, this class cannot retain domain objects between method 
    calls. Instead, it records the database `id`s of the objects it needs.
    The intended usage is something like this:
        my_loader = Loader('my_dataset', [key_1, ..., key_n], commit_every=1000)
        for row in fetch_my_data():
            entry = my_loader.create_entry(row.amount, [
            # Optionally do stuff to `entry` here.
    The caller is responsible for setting up the Keys and EnumerationValues.
    It is recommended that instances of ValueCache be used to retrieve the
    `EnumerationValue.id`s passed to `create_entry()`, as this will
    avoid database traffic.
    def __init__(self, dataset_name, notes=u'', metadata=None, \
                 dataset_long_name=None, currency=u'gbp'):
        Constructs a Loader for a new Dataset `dataset_name`. (Raises an exception
        if a Dataset already exists with that name). Calling the constructor 
        creates the Dataset object.
        dataset_name - the unique name of the Dataset.
        axes - a list of Keys which will be used to classify spending.
        notes (optional) - the `notes` to use when creating the Dataset.
        commit_every (optional) - if not None, the frequency with which the
            Loader will commit data to the store, expressed as a number of 
            calls of `create_entry()`. The Loader will call
            `model.Session.commit()` and also `model.Session.remove()`.
        assert isinstance(dataset_name, unicode)
        assert Dataset.find_one({'name': dataset_name}),\
            "Dataset '%s' already loaded" % dataset_name
        if dataset_long_name is None:
            dataset_long_name = dataset_name
        # Create dataset.
        dataset = Dataset(name=dataset_name, label=dataset_long_name, 
                          currency=currency, description=description)
        if metadata is not None:
        dataset.id = Dataset.c.insert(dataset)
        self.dataset = dataset

    def create_entry(self, amount, currency=None, **kwargs):
        Creates a Entry record and associated ClassificationItems.
        amount - the amount spent.

        currency - null by default.
        values - a dictionary of data.
        assert isinstance(amount, float)
        if currency is None: 
            currency = self.dataset.currency
        entry = Entry(dataset=self.dataset.to_ref_dict(), 
        entry.id = Entry.c.insert(entry)
        return entry
    def create_entity(self, name, label=u'', description=u'', **kwargs):
        entity = Entity.find_one({'name': name})
        kwargs.update({'name': name, 
                       'label': label,
                       'description': notes})
        if entity is None:
            entity = Entity(**kwargs)
        Entity.c.update({'name': entity.name}, entity, upsert=True)
        return Entity.find_one({'name': name})
    def compute_aggregates(self):
        # TODO.