Commits

Nick Stenning  committed 95fd88e

[load] Remove EPSRC and Israel State Budget loaders -- both now import directly from CKAN via JSON models.

  • Participants
  • Parent commits 256f3e2

Comments (0)

Files changed (2)

File wdmmgext/load/epsrc.py

-import os
-import sqlite3
-
-from pylons import config
-
-from wdmmg.model import Dataset, Entry, Entity
-from wdmmg.lib import times, munge
-from wdmmg.lib.loader import Loader
-
-DATASET_NAME = u'epsrc'
-
-def make_loader():
-    """Returns a loader for this dataset"""
-    loader = Loader(DATASET_NAME, ["name"], u"Engineering and Physical Research Sciences Council",
-        description=u'''Research grants from the EPRSC between 1985 and 2011''', currency="GBP")
-    return loader
-
-def load_entries(loader, connection):
-    '''
-    Load Entries by selecting grants from the given connection joining them up with their
-    departments and organisations and principals.
-    Classifies the entries and calls aggregate afterwards.
-    '''
-
-    def describe_dimension(key, label, description=None):
-        loader.create_dimension(key, label, description=description)
-
-   # Retrieve or create the required Keys.
-    describe_dimension(u'from', 'Grant from', description=u'''\
-The entity that gave the grant.''')
-    describe_dimension(u'to', "Grant to", description=u'''\
-The entity that received the grant.''')
-    describe_dimension(u'time', "Time", description=u'''\
-The accounting period for which the grant was given.''')
-    describe_dimension(u'department', u"The department the grant was given to")
-    describe_dimension(u'organisation', u"The organisation that received the grant")
-
-    epsrc = loader.create_entity(name=u'epsrc',
-        label=u'EPSRC',
-        description=u'''UK Engineering and Physical Sciences Research Council'''
-    )
-
-    cursor = connection.cursor()
-    cols = ('grant_id', 'label', 'amount', 'piid', 'oid', 'depid', 'principal_name',
-            'department_name', 'department_orgid', 'organisation_name')
-    cursor.execute('''SELECT g.id as grant_id, g.title AS label, g.value AS amount,
-            g.principal_investigator_id AS piid, d.organisation_id AS oid,
-            g.department_id AS depid, p.name AS principal_name, d.name AS department_name,
-            d.organisation_id AS dep_orgid, o.name AS organisation_name
-            FROM grants AS g
-            LEFT JOIN people AS p ON p.id = piid
-            LEFT JOIN departments AS d ON d.id = depid
-            LEFT JOIN organisations AS o ON o.id = oid
-    ''')
-
-    entry_id = -1
-
-    dummy_time = times.for_year(2010)
-
-    for row in cursor:
-        entry_id += 1
-        mapping = dict([(k,v) for k,v in zip(cols, row)])
-
-        principal_id = u"%s-%s" % (mapping["piid"], munge.to_name(mapping["principal_name"]))
-
-        additional_entity = {
-            "epsrc_department_id": mapping["depid"],
-            "epsrc_department_name": mapping["department_name"],
-            "epsrc_organisation_id": mapping["oid"],
-            "epsrc_organisation_name": mapping["organisation_name"],
-            "epsrc_principal_id": mapping["piid"]
-        }
-
-        principal = loader.create_entity(principal_id,
-                label=mapping["principal_name"], epsrc_researcher=True, **additional_entity)
-
-        department = loader.create_entity(u"%s-%s" % (mapping["depid"],
-                munge.to_name(mapping["department_name"])),
-            label=mapping["department_name"],
-            epsrc_department=True,
-            epsrc_department_id=mapping["depid"],
-            epsrc_organisation_id=mapping["oid"]
-        )
-
-        organisation = loader.create_entity(u"%s-%s" % (mapping["oid"],
-                munge.to_name(mapping["organisation_name"])),
-            label=mapping["organisation_name"],
-            epsrc_organisation=True,
-            epsrc_organisation_id=mapping["oid"]
-        )
-
-        entry_dict = {
-            'name': u"%s-%s-%s" % (DATASET_NAME, entry_id, munge.to_name(mapping["grant_id"])),
-            'label': mapping["label"],
-            'from': epsrc,
-            'to': principal,
-            'time': dummy_time,
-            'amount': float(mapping["amount"]),
-            "department": department.to_ref_dict(),
-            "department_id": mapping["depid"],
-            "department_name": mapping["department_name"],
-            "organisation": organisation.to_ref_dict(),
-            "organisation_id": mapping["oid"],
-            "organisation_name": mapping["organisation_name"],
-            "principal_id": mapping["piid"]
-        }
-
-        loader.create_entry(**entry_dict)
-
-    aggregate(loader)
-
-def aggregate(loader=None):
-    if loader is None:
-       loader = make_loader()
-
-    loader.flush_aggregates()
-    loader.create_view(Dataset, {'name': DATASET_NAME}, name='default',
-                       label="Grants by Organisation",
-                       dimension="dataset", breakdown="organisation")
-    loader.create_view(Entity, {'epsrc_organisation': True}, name='default',
-                       label="Grants by Department",
-                       dimension="organisation", breakdown="department")
-    loader.create_view(Entity, {'epsrc_department': True}, name='default',
-                       label="Grants by Principal Investigator",
-                       dimension="department", breakdown="to")
-    # loader.create_view(Entity, {'epsrc_researcher': True}, name='default',
-    #                    label="Grants by Project",
-    #                    dimension="to", breakdown="label")
-    #TODO: needs more views on data
-
-    loader.compute_aggregates()
-
-
-def drop():
-    '''
-    Drops from the database all records associated with dataset 'cra'.
-    '''
-    # Delete only the keys we created ourselves.
-    # TODO: Move as many as possible of these into separate data packages.
-    raise NotImplemented
-
-def load():
-    filename = os.path.join(config['getdata_cache'] + '/epsrc/db')
-    connection = sqlite3.connect(filename)
-
-    loader = make_loader()
-    load_entries(loader, connection)

File wdmmgext/load/israel.py

-# Script to import Israeli data
-# Incomplete, needs discussion with RGRP re appropriate keys
-
-import os, sys, csv, re
-from datetime import date
-try: import json
-except ImportError: import simplejson as json
-
-import datapkg
-from pylons import config
-
-from wdmmg.model import Dataset, Entry, Entity, Classifier
-import util
-from wdmmg.lib.loader import Loader
-from wdmmg.lib import times
-import xlrd
-
-DATASET_NAME = u'israel'
-TAXONOMY = u'israel'
-
-
-COLORS = {
-        "001": "#CD531C",
-        "002": "#A5B425",
-        "003": "#CD853F",
-        "004": "#D32645",
-        "005": "#6AAC32",
-        "006": "#14388C",
-        "007": "#3F93E1",
-        "008": "#449256",
-        "009": "#481B79",
-        "010": "#2458A3",
-        "011": "#A0522D",
-        "012": "#0000CD",
-        "013": "#696969",
-        "014": "#00BFFF",
-        "015": "#CA221D",
-        "016": "#42928F",
-        "017": "#EDC92D",
-        "018": "#778899",
-        "019": "#211d79",
-        "020": "#DAA520",
-        "021": "#CD5C5C",
-        "022": "#DB7093"
-        }
-
-
-def load_file(filename):
-    '''
-    Loads a file into a slice with name 'israel'.
-    - filename - name of the file.
-    '''
-
-    # Make a suitably configured Loader (this also creates the Dataset).
-    loader = Loader(DATASET_NAME, ["name"], "Israel budget",
-        description = u'''Israeli budget.''', currency=u'ils')
-
-    def describe_dimension(key, label, description=None):
-        loader.create_dimension(key, label, description=description)
-
-    # The keys used to classify Israeli spending 
-    describe_dimension(u'from', "Paid by", description=u'''\
-The entity that the money was paid from.''')
-    describe_dimension(u'to', "Paid to", description=u'''\
-The entity that the money was paid to.''')
-    describe_dimension(u'time', "Time", description=u'''\
-The accounting period in which the spending happened.''')
-    describe_dimension(u'primary', "Overview grouping", description=u'''\
-An artificial grouping to allow for an easier overview of spending.''')
-    describe_dimension(u'section', "Section", description=u'''\
-Israeli spending code, level 1.''')
-    describe_dimension(u'entity', "Entity", description=u'''\
-Israeli spending code, level 2 (entity).''')
-    describe_dimension(u'programme', "Programme", description=u'''\
-Israeli spending code, level 3 (programme).''')
-    describe_dimension(u'concept', "Concept", description=u'''\
-Israeli spending code, level 4 (concept).''')
-
-    israel_govt = loader.create_entity(name=u'IL', 
-        label = u'Israel',
-        description = u'''Israeli state budget'''
-        )
-    society = loader.create_entity(name=u'il-society', 
-        label = u'Society (Israel)',
-        description = u'''A dummy entity to be the recipient of final government spending'''
-        )
-
-    # Open the Excel workbook
-    book = xlrd.open_workbook(filename=filename)
-    sheet = book.sheet_by_index(0)
-    print "Number of rows: " + str(sheet.nrows)
-    entry_id = 0
-
-    for row in range(5, sheet.nrows): # for each row after the header
-        label = sheet.cell(row,1).value.strip()
-        l0_name = sheet.cell(row,0).value.strip()
-        level0 = loader.create_classifier(
-                taxonomy=TAXONOMY, 
-                name=l0_name,
-                label=label,
-                #label_en=util.translate("iw", "en", label),
-                level=0, color=COLORS.get(l0_name))
-        label = sheet.cell(row,3).value.strip()
-        level1 = loader.create_classifier(
-                taxonomy=TAXONOMY, 
-                name=sheet.cell(row,2).value.strip(),
-                label=label,
-                #label_en=util.translate("iw", "en", label),
-                level=1, parent=level0.to_ref())
-        label = sheet.cell(row,5).value.strip()
-        level2 = loader.create_classifier(
-                taxonomy=TAXONOMY,
-                name=sheet.cell(row,4).value.strip(),
-                label=label,
-                #label_en=util.translate("iw", "en", label)
-                level=2, parent=level1.to_ref())
-        label = sheet.cell(row,7).value.strip()
-        level3 = loader.create_classifier(
-                taxonomy=TAXONOMY,
-                name=sheet.cell(row,6).value.strip(),
-                label=label,
-                #label_en=util.translate("iw", "en", label)
-                level=3, parent=level2.to_ref())
-        label = sheet.cell(row,9).value.strip()
-        level4 = loader.create_classifier(
-                taxonomy=TAXONOMY,
-                name=sheet.cell(row,8).value.strip(),
-                label=label, 
-                #label_en=util.translate("iw", "en", label)
-                level=4, parent=level3.to_ref())
-
-        expenditure_2009 = util.to_float(unicode(sheet.cell(row,10).value))
-        expenditure_2010 = util.to_float(unicode(sheet.cell(row,15).value))
-        
-        for (year, exp) in ((2010, expenditure_2010), 
-                            (2009, expenditure_2009)):
-            entry_id += 1
-            e = {
-                'amount': exp,
-                'name': DATASET_NAME + '-r' + str(entry_id) + "-net" + str(year),
-                'from': israel_govt,
-                'to': society,
-                'time': times.for_year(year)
-                }
-            loader.classify_entry(e, level0, 'primary')
-            loader.classify_entry(e, level1, 'section')
-            loader.classify_entry(e, level2, 'entity')
-            loader.classify_entry(e, level3, 'programme')
-            loader.classify_entry(e, level4, 'concept')
-            loader.create_entry(**e)
-        
-             
-    loader.create_view(Dataset, {'name': DATASET_NAME}, 
-                       name='default', label="Spending by Overview Category",
-                       dimension="dataset", breakdown="primary")
-    loader.create_view(Dataset, {'name': DATASET_NAME}, 
-                       name='section', label="Spending by Section",
-                       dimension="dataset", breakdown="section")
-    loader.create_view(Classifier, {'taxonomy': TAXONOMY, 'level': 0}, 
-                       name='default', label="Spending by Section",
-                       dimension="primary", breakdown="section")
-    loader.create_view(Classifier, {'taxonomy': TAXONOMY, 'level': 1}, 
-                       name='default', label="Spending by Entity",
-                       dimension="section", breakdown="entity")
-    loader.create_view(Classifier, {'taxonomy': TAXONOMY, 'level': 2}, 
-                       name='default', label="Spending by Programme",
-                       dimension="entity", breakdown="programme")
-    loader.create_view(Classifier, {'taxonomy': TAXONOMY, 'level': 3}, 
-                       name='default', label="Spending by Concept",
-                       dimension="programme", breakdown="concept")
-    loader.compute_aggregates()
-
-
-def drop():
-    '''
-    Drops from the database all records associated with slice 'israel'.
-    Not yet complete.
-    '''
-    # Delete only the keys we created ourselves.
-    raise NotImplemented
-        
-def load(filename=None):
-    '''
-    Downloads the Israel data, and loads it into the database with slice name 'israel'
-    Access via the Paste Script comment "paster load israel", see lib/cli.py 
-    '''
-    if not filename:
-        #filename = os.path.join(config['getdata_cache'], 'israel-state-budget/Israel state Budget 2009_2010 290710.xls')
-        filename = os.path.join(config['getdata_cache'], 'israel-state-budget/Israel state Budget 2009_2010 290710_Clustered_Without_State_Revenues.xls')
-	load_file(filename)