mdhub / mdhub /

Full commit
# -*- coding: utf-8 -*-


import ConfigParser
import time
import sys
import os
import copy
from mdhub import store
from mdhub import importconfig
from mdhub import location
from mdhub import exc
from mdhub import util

def get_store_type(store_conf_path):
    For a path to store configuration file, get the
    kind of store we are dealing with.
    parser = ConfigParser.SafeConfigParser()
    return parser.sections()[0]

class Processor(object):
    Hi, I'm god. Maybe you should refactor me.
    def __init__(self, import_conf_path, store_conf_path):
        Initialize the god processor. ``import_configuration`` and
        ``store_configuration`` are pathes to the configuration files,
        not objects.
        self.import_conf_path = os.path.expanduser(import_conf_path)
        # get a handle on a import configuration
        self.import_conf = importconfig.ImportConfiguration(

        # Just get the store type out of the config
        # and set ```` accordingly (without import magic)
        self.store_conf_path = os.path.expanduser(store_conf_path)
        self.store_type = get_store_type(self.store_conf_path)
        if self.store_type == 'redis':
   = store.RedisStore(self.store_conf_path)
        elif self.store_type == 'rdbms':
            raise NotImplementedError
            raise exc.MetaDataHubException(
                "No suitable store defined in: {0}".format(
        # TODO: Add more store types here ...

    def process(self):
        Given a FINC import configuration (``self.import_configuration``) and
        a store (key-value store, RDBMS) configuration
        (``self.store_configuration``) process all available records and
        feed them to our store appropriately.
        start = time.time()
        records, commands = 0, 0
        iterator = location.record_iterator(self.import_conf.location)

        for record_iterator in iterator:
            for record in record_iterator:
                # Bootstrap internal representation of the metadata:
                # This dict will be passed along all commands; each command
                # is free to add and modify keys and values.
                bag = util.mdbag()
                bag['import_conf'] = self.import_conf
                bag['store'] =
                bag['original'] = copy.deepcopy(record) # we store a python object at the moment

                for command in self.import_conf.commands:
                    bag = command.execute(bag)
                    commands += 1

                # ``data`` is processed by now - should
                # be handed over to the export side of things
                records += 1

        stop = time.time()
        print >> sys.stderr, \
            "Processed {0} records and {1} commands in {2:.4f} seconds".format(
            records, commands, (stop - start))
        print >> sys.stderr, "{0:.4f} records/second".format(
            (records / (stop - start)))
        print >> sys.stderr, "{0:.4f} commands/second".format(
            (commands / (stop - start)))

if __name__ == '__main__':
    processor = Processor(