Commits

Rufus Pollock  committed dd917fb

[data][s]: convert codes in headings (become series names) to country names using iso-3166-2 dataset from ckan.

  • Participants
  • Parent commits 7fd1894

Comments (0)

Files changed (5)

File eurostat/data.py

         open(newfp, 'w').write(contents)
         return newfp
 
+    def _iso3166(self):
+        # ckan.net/package/iso-3166-2-digit-country-codes
+        url = 'http://api.scraperwiki.com/api/1.0/datastore/getdata?&name=iso-3166-2-letter-country-codes&limit=300'
+        fp = ourcache.retrieve(url)
+        codes = json.load(open(fp))
+        return dict([ [x['code'], x['name']] for x in codes ])
+
     def extract(self, newfp):
         '''Extract data from tsv file at `filepath`, clean it and save it as json
-        to file with same basename and extension json'''
+        to file with same basename and extension json
+
+        :return: extracted data as `Tabular`.
+        '''
         reader = tabular.CsvReader()
         tab = reader.read(open(newfp), dialect='excel-tab')
         # some data has blank top row!
         alldata = [tab.header] + tab.data
         transposed = zip(*alldata)
         tab.header = transposed[0]
+        # clean header coders (mainly for countries)
+        isocodes = self._iso3166()
+        # of form PCH_Q1_SA,BE
+        def clean_code(code):
+            parts = code.split(',')
+            isocode = parts[1] if len(parts) > 1 else parts[0]
+            return isocodes.get(isocode, isocode)
+        tab.header = map(clean_code, tab.header)
         def parsedate(cell):
             if 'Q' in cell:
                 items = cell.split('Q')
         writer = tabular.JsonWriter()
         jsonfp = newfp.split('.')[0] + '.json'
         writer.write(tab, open(jsonfp, 'w'))
+        return tab
 
     PEEI_LIST = 'peeis.json'
     def peeis(self):

File eurostat/static/cache/teicp000.json

 {
   "header": [
-    "unit,geo\\time", 
-    "I2005,AT", 
-    "I2005,BE", 
-    "I2005,BG", 
-    "I2005,CH", 
-    "I2005,CY", 
-    "I2005,CZ", 
-    "I2005,DE", 
-    "I2005,DK", 
-    "I2005,EA", 
-    "I2005,EA15", 
-    "I2005,EA16", 
-    "I2005,EE", 
-    "I2005,ES", 
-    "I2005,EU", 
-    "I2005,EU25", 
-    "I2005,EU27", 
-    "I2005,FI", 
-    "I2005,FR", 
-    "I2005,GR", 
-    "I2005,HR", 
-    "I2005,HU", 
-    "I2005,IE", 
-    "I2005,IS", 
-    "I2005,IT", 
-    "I2005,LT", 
-    "I2005,LU", 
-    "I2005,LV", 
-    "I2005,MT", 
-    "I2005,NL", 
-    "I2005,NO", 
-    "I2005,PL", 
-    "I2005,PT", 
-    "I2005,RO", 
-    "I2005,SE", 
-    "I2005,SI", 
-    "I2005,SK", 
-    "I2005,TR", 
-    "I2005,UK", 
-    "I2005,US", 
-    "PCH_M1,AT", 
-    "PCH_M1,BE", 
-    "PCH_M1,BG", 
-    "PCH_M1,CH", 
-    "PCH_M1,CY", 
-    "PCH_M1,CZ", 
-    "PCH_M1,DE", 
-    "PCH_M1,DK", 
-    "PCH_M1,EA", 
-    "PCH_M1,EA15", 
-    "PCH_M1,EA16", 
-    "PCH_M1,EE", 
-    "PCH_M1,ES", 
-    "PCH_M1,EU", 
-    "PCH_M1,EU25", 
-    "PCH_M1,EU27", 
-    "PCH_M1,FI", 
-    "PCH_M1,FR", 
-    "PCH_M1,GR", 
-    "PCH_M1,HR", 
-    "PCH_M1,HU", 
-    "PCH_M1,IE", 
-    "PCH_M1,IS", 
-    "PCH_M1,IT", 
-    "PCH_M1,LT", 
-    "PCH_M1,LU", 
-    "PCH_M1,LV", 
-    "PCH_M1,MT", 
-    "PCH_M1,NL", 
-    "PCH_M1,NO", 
-    "PCH_M1,PL", 
-    "PCH_M1,PT", 
-    "PCH_M1,RO", 
-    "PCH_M1,SE", 
-    "PCH_M1,SI", 
-    "PCH_M1,SK", 
-    "PCH_M1,TR", 
-    "PCH_M1,UK", 
-    "PCH_M1,US", 
-    "PCH_M12,AT", 
-    "PCH_M12,BE", 
-    "PCH_M12,BG", 
-    "PCH_M12,CH", 
-    "PCH_M12,CY", 
-    "PCH_M12,CZ", 
-    "PCH_M12,DE", 
-    "PCH_M12,DK", 
-    "PCH_M12,EA", 
-    "PCH_M12,EA15", 
-    "PCH_M12,EA16", 
-    "PCH_M12,EE", 
-    "PCH_M12,ES", 
-    "PCH_M12,EU", 
-    "PCH_M12,EU25", 
-    "PCH_M12,EU27", 
-    "PCH_M12,FI", 
-    "PCH_M12,FR", 
-    "PCH_M12,GR", 
-    "PCH_M12,HR", 
-    "PCH_M12,HU", 
-    "PCH_M12,IE", 
-    "PCH_M12,IS", 
-    "PCH_M12,IT", 
-    "PCH_M12,LT", 
-    "PCH_M12,LU", 
-    "PCH_M12,LV", 
-    "PCH_M12,MT", 
-    "PCH_M12,NL", 
-    "PCH_M12,NO", 
-    "PCH_M12,PL", 
-    "PCH_M12,PT", 
-    "PCH_M12,RO", 
-    "PCH_M12,SE", 
-    "PCH_M12,SI", 
-    "PCH_M12,SK", 
-    "PCH_M12,TR", 
-    "PCH_M12,UK", 
-    "PCH_M12,US", 
-    "PCH_MV12,AT", 
-    "PCH_MV12,BE", 
-    "PCH_MV12,BG", 
-    "PCH_MV12,CH", 
-    "PCH_MV12,CY", 
-    "PCH_MV12,CZ", 
-    "PCH_MV12,DE", 
-    "PCH_MV12,DK", 
-    "PCH_MV12,EA", 
-    "PCH_MV12,EA15", 
-    "PCH_MV12,EA16", 
-    "PCH_MV12,EE", 
-    "PCH_MV12,ES", 
-    "PCH_MV12,EU", 
-    "PCH_MV12,EU25", 
-    "PCH_MV12,EU27", 
-    "PCH_MV12,FI", 
-    "PCH_MV12,FR", 
-    "PCH_MV12,GR", 
-    "PCH_MV12,HR", 
-    "PCH_MV12,HU", 
-    "PCH_MV12,IE", 
-    "PCH_MV12,IS", 
-    "PCH_MV12,IT", 
-    "PCH_MV12,LT", 
-    "PCH_MV12,LU", 
-    "PCH_MV12,LV", 
-    "PCH_MV12,MT", 
-    "PCH_MV12,NL", 
-    "PCH_MV12,NO", 
-    "PCH_MV12,PL", 
-    "PCH_MV12,PT", 
-    "PCH_MV12,RO", 
-    "PCH_MV12,SE", 
-    "PCH_MV12,SI", 
-    "PCH_MV12,SK", 
-    "PCH_MV12,TR", 
-    "PCH_MV12,UK", 
-    "PCH_MV12,US"
+    "geo\\time", 
+    "Austria", 
+    "Belgium", 
+    "Bulgaria", 
+    "Switzerland", 
+    "Cyprus", 
+    "Czech Republic", 
+    "Germany", 
+    "Denmark", 
+    "EA", 
+    "EA15", 
+    "EA16", 
+    "Estonia", 
+    "Spain", 
+    "EU", 
+    "EU25", 
+    "EU27", 
+    "Finland", 
+    "France", 
+    "Greece", 
+    "Croatia", 
+    "Hungary", 
+    "Ireland", 
+    "Iceland", 
+    "Italy", 
+    "Lithuania", 
+    "Luxembourg", 
+    "Latvia", 
+    "Malta", 
+    "Netherlands", 
+    "Norway", 
+    "Poland", 
+    "Portugal", 
+    "Romania", 
+    "Sweden", 
+    "Slovenia", 
+    "Slovakia", 
+    "Turkey", 
+    "UK", 
+    "United States", 
+    "Austria", 
+    "Belgium", 
+    "Bulgaria", 
+    "Switzerland", 
+    "Cyprus", 
+    "Czech Republic", 
+    "Germany", 
+    "Denmark", 
+    "EA", 
+    "EA15", 
+    "EA16", 
+    "Estonia", 
+    "Spain", 
+    "EU", 
+    "EU25", 
+    "EU27", 
+    "Finland", 
+    "France", 
+    "Greece", 
+    "Croatia", 
+    "Hungary", 
+    "Ireland", 
+    "Iceland", 
+    "Italy", 
+    "Lithuania", 
+    "Luxembourg", 
+    "Latvia", 
+    "Malta", 
+    "Netherlands", 
+    "Norway", 
+    "Poland", 
+    "Portugal", 
+    "Romania", 
+    "Sweden", 
+    "Slovenia", 
+    "Slovakia", 
+    "Turkey", 
+    "UK", 
+    "United States", 
+    "Austria", 
+    "Belgium", 
+    "Bulgaria", 
+    "Switzerland", 
+    "Cyprus", 
+    "Czech Republic", 
+    "Germany", 
+    "Denmark", 
+    "EA", 
+    "EA15", 
+    "EA16", 
+    "Estonia", 
+    "Spain", 
+    "EU", 
+    "EU25", 
+    "EU27", 
+    "Finland", 
+    "France", 
+    "Greece", 
+    "Croatia", 
+    "Hungary", 
+    "Ireland", 
+    "Iceland", 
+    "Italy", 
+    "Lithuania", 
+    "Luxembourg", 
+    "Latvia", 
+    "Malta", 
+    "Netherlands", 
+    "Norway", 
+    "Poland", 
+    "Portugal", 
+    "Romania", 
+    "Sweden", 
+    "Slovenia", 
+    "Slovakia", 
+    "Turkey", 
+    "UK", 
+    "United States", 
+    "Austria", 
+    "Belgium", 
+    "Bulgaria", 
+    "Switzerland", 
+    "Cyprus", 
+    "Czech Republic", 
+    "Germany", 
+    "Denmark", 
+    "EA", 
+    "EA15", 
+    "EA16", 
+    "Estonia", 
+    "Spain", 
+    "EU", 
+    "EU25", 
+    "EU27", 
+    "Finland", 
+    "France", 
+    "Greece", 
+    "Croatia", 
+    "Hungary", 
+    "Ireland", 
+    "Iceland", 
+    "Italy", 
+    "Lithuania", 
+    "Luxembourg", 
+    "Latvia", 
+    "Malta", 
+    "Netherlands", 
+    "Norway", 
+    "Poland", 
+    "Portugal", 
+    "Romania", 
+    "Sweden", 
+    "Slovenia", 
+    "Slovakia", 
+    "Turkey", 
+    "UK", 
+    "United States"
   ], 
   "data": [
     [

File eurostat/static/cache/teina011.json

 {
   "header": [
-    "unit,geo\\time", 
-    "PCH_Q1_SA,AT", 
-    "PCH_Q1_SA,BE", 
-    "PCH_Q1_SA,BG", 
-    "PCH_Q1_SA,CH", 
-    "PCH_Q1_SA,CY", 
-    "PCH_Q1_SA,CZ", 
-    "PCH_Q1_SA,DE", 
-    "PCH_Q1_SA,DK", 
-    "PCH_Q1_SA,EA", 
-    "PCH_Q1_SA,EA15", 
-    "PCH_Q1_SA,EA16", 
-    "PCH_Q1_SA,EE", 
-    "PCH_Q1_SA,ES", 
-    "PCH_Q1_SA,EU15", 
-    "PCH_Q1_SA,EU25", 
-    "PCH_Q1_SA,EU27", 
-    "PCH_Q1_SA,FI", 
-    "PCH_Q1_SA,FR", 
-    "PCH_Q1_SA,GR", 
-    "PCH_Q1_SA,HR", 
-    "PCH_Q1_SA,HU", 
-    "PCH_Q1_SA,IE", 
-    "PCH_Q1_SA,IS", 
-    "PCH_Q1_SA,IT", 
-    "PCH_Q1_SA,JP", 
-    "PCH_Q1_SA,LT", 
-    "PCH_Q1_SA,LU", 
-    "PCH_Q1_SA,LV", 
-    "PCH_Q1_SA,MT", 
-    "PCH_Q1_SA,NL", 
-    "PCH_Q1_SA,NO", 
-    "PCH_Q1_SA,PL", 
-    "PCH_Q1_SA,PT", 
-    "PCH_Q1_SA,RO", 
-    "PCH_Q1_SA,SE", 
-    "PCH_Q1_SA,SI", 
-    "PCH_Q1_SA,SK", 
-    "PCH_Q1_SA,UK", 
-    "PCH_Q1_SA,US", 
-    "PCH_Q4_NSA,AT", 
-    "PCH_Q4_NSA,BE", 
-    "PCH_Q4_NSA,BG", 
-    "PCH_Q4_NSA,CH", 
-    "PCH_Q4_NSA,CY", 
-    "PCH_Q4_NSA,CZ", 
-    "PCH_Q4_NSA,DE", 
-    "PCH_Q4_NSA,DK", 
-    "PCH_Q4_NSA,EA", 
-    "PCH_Q4_NSA,EA15", 
-    "PCH_Q4_NSA,EA16", 
-    "PCH_Q4_NSA,EE", 
-    "PCH_Q4_NSA,ES", 
-    "PCH_Q4_NSA,EU15", 
-    "PCH_Q4_NSA,EU25", 
-    "PCH_Q4_NSA,EU27", 
-    "PCH_Q4_NSA,FI", 
-    "PCH_Q4_NSA,FR", 
-    "PCH_Q4_NSA,GR", 
-    "PCH_Q4_NSA,HR", 
-    "PCH_Q4_NSA,HU", 
-    "PCH_Q4_NSA,IE", 
-    "PCH_Q4_NSA,IS", 
-    "PCH_Q4_NSA,IT", 
-    "PCH_Q4_NSA,JP", 
-    "PCH_Q4_NSA,LT", 
-    "PCH_Q4_NSA,LU", 
-    "PCH_Q4_NSA,LV", 
-    "PCH_Q4_NSA,MT", 
-    "PCH_Q4_NSA,NL", 
-    "PCH_Q4_NSA,NO", 
-    "PCH_Q4_NSA,PL", 
-    "PCH_Q4_NSA,PT", 
-    "PCH_Q4_NSA,RO", 
-    "PCH_Q4_NSA,SE", 
-    "PCH_Q4_NSA,SI", 
-    "PCH_Q4_NSA,SK", 
-    "PCH_Q4_NSA,TR", 
-    "PCH_Q4_NSA,UK"
+    "geo\\time", 
+    "Austria", 
+    "Belgium", 
+    "Bulgaria", 
+    "Switzerland", 
+    "Cyprus", 
+    "Czech Republic", 
+    "Germany", 
+    "Denmark", 
+    "EA", 
+    "EA15", 
+    "EA16", 
+    "Estonia", 
+    "Spain", 
+    "EU15", 
+    "EU25", 
+    "EU27", 
+    "Finland", 
+    "France", 
+    "Greece", 
+    "Croatia", 
+    "Hungary", 
+    "Ireland", 
+    "Iceland", 
+    "Italy", 
+    "Japan", 
+    "Lithuania", 
+    "Luxembourg", 
+    "Latvia", 
+    "Malta", 
+    "Netherlands", 
+    "Norway", 
+    "Poland", 
+    "Portugal", 
+    "Romania", 
+    "Sweden", 
+    "Slovenia", 
+    "Slovakia", 
+    "UK", 
+    "United States", 
+    "Austria", 
+    "Belgium", 
+    "Bulgaria", 
+    "Switzerland", 
+    "Cyprus", 
+    "Czech Republic", 
+    "Germany", 
+    "Denmark", 
+    "EA", 
+    "EA15", 
+    "EA16", 
+    "Estonia", 
+    "Spain", 
+    "EU15", 
+    "EU25", 
+    "EU27", 
+    "Finland", 
+    "France", 
+    "Greece", 
+    "Croatia", 
+    "Hungary", 
+    "Ireland", 
+    "Iceland", 
+    "Italy", 
+    "Japan", 
+    "Lithuania", 
+    "Luxembourg", 
+    "Latvia", 
+    "Malta", 
+    "Netherlands", 
+    "Norway", 
+    "Poland", 
+    "Portugal", 
+    "Romania", 
+    "Sweden", 
+    "Slovenia", 
+    "Slovakia", 
+    "Turkey", 
+    "UK"
   ], 
   "data": [
     [

File eurostat/static/cache/teina021.json

 {
   "header": [
-    "unit,geo\\time", 
-    "PCH_Q1_SA,AT", 
-    "PCH_Q1_SA,BE", 
-    "PCH_Q1_SA,BG", 
-    "PCH_Q1_SA,CH", 
-    "PCH_Q1_SA,CY", 
-    "PCH_Q1_SA,CZ", 
-    "PCH_Q1_SA,DE", 
-    "PCH_Q1_SA,DK", 
-    "PCH_Q1_SA,EA", 
-    "PCH_Q1_SA,EA15", 
-    "PCH_Q1_SA,EA16", 
-    "PCH_Q1_SA,EE", 
-    "PCH_Q1_SA,ES", 
-    "PCH_Q1_SA,EU15", 
-    "PCH_Q1_SA,EU25", 
-    "PCH_Q1_SA,EU27", 
-    "PCH_Q1_SA,FI", 
-    "PCH_Q1_SA,FR", 
-    "PCH_Q1_SA,GR", 
-    "PCH_Q1_SA,HR", 
-    "PCH_Q1_SA,HU", 
-    "PCH_Q1_SA,IE", 
-    "PCH_Q1_SA,IS", 
-    "PCH_Q1_SA,IT", 
-    "PCH_Q1_SA,JP", 
-    "PCH_Q1_SA,LT", 
-    "PCH_Q1_SA,LU", 
-    "PCH_Q1_SA,LV", 
-    "PCH_Q1_SA,MT", 
-    "PCH_Q1_SA,NL", 
-    "PCH_Q1_SA,NO", 
-    "PCH_Q1_SA,PL", 
-    "PCH_Q1_SA,PT", 
-    "PCH_Q1_SA,RO", 
-    "PCH_Q1_SA,SE", 
-    "PCH_Q1_SA,SI", 
-    "PCH_Q1_SA,SK", 
-    "PCH_Q1_SA,UK", 
-    "PCH_Q1_SA,US", 
-    "PCH_Q4_NSA,AT", 
-    "PCH_Q4_NSA,BE", 
-    "PCH_Q4_NSA,BG", 
-    "PCH_Q4_NSA,CH", 
-    "PCH_Q4_NSA,CY", 
-    "PCH_Q4_NSA,CZ", 
-    "PCH_Q4_NSA,DE", 
-    "PCH_Q4_NSA,DK", 
-    "PCH_Q4_NSA,EA", 
-    "PCH_Q4_NSA,EA15", 
-    "PCH_Q4_NSA,EA16", 
-    "PCH_Q4_NSA,EE", 
-    "PCH_Q4_NSA,ES", 
-    "PCH_Q4_NSA,EU15", 
-    "PCH_Q4_NSA,EU25", 
-    "PCH_Q4_NSA,EU27", 
-    "PCH_Q4_NSA,FI", 
-    "PCH_Q4_NSA,FR", 
-    "PCH_Q4_NSA,GR", 
-    "PCH_Q4_NSA,HR", 
-    "PCH_Q4_NSA,HU", 
-    "PCH_Q4_NSA,IE", 
-    "PCH_Q4_NSA,IS", 
-    "PCH_Q4_NSA,IT", 
-    "PCH_Q4_NSA,JP", 
-    "PCH_Q4_NSA,LT", 
-    "PCH_Q4_NSA,LU", 
-    "PCH_Q4_NSA,LV", 
-    "PCH_Q4_NSA,MT", 
-    "PCH_Q4_NSA,NL", 
-    "PCH_Q4_NSA,NO", 
-    "PCH_Q4_NSA,PL", 
-    "PCH_Q4_NSA,PT", 
-    "PCH_Q4_NSA,RO", 
-    "PCH_Q4_NSA,SE", 
-    "PCH_Q4_NSA,SI", 
-    "PCH_Q4_NSA,SK", 
-    "PCH_Q4_NSA,TR", 
-    "PCH_Q4_NSA,UK"
+    "geo\\time", 
+    "Austria", 
+    "Belgium", 
+    "Bulgaria", 
+    "Switzerland", 
+    "Cyprus", 
+    "Czech Republic", 
+    "Germany", 
+    "Denmark", 
+    "EA", 
+    "EA15", 
+    "EA16", 
+    "Estonia", 
+    "Spain", 
+    "EU15", 
+    "EU25", 
+    "EU27", 
+    "Finland", 
+    "France", 
+    "Greece", 
+    "Croatia", 
+    "Hungary", 
+    "Ireland", 
+    "Iceland", 
+    "Italy", 
+    "Japan", 
+    "Lithuania", 
+    "Luxembourg", 
+    "Latvia", 
+    "Malta", 
+    "Netherlands", 
+    "Norway", 
+    "Poland", 
+    "Portugal", 
+    "Romania", 
+    "Sweden", 
+    "Slovenia", 
+    "Slovakia", 
+    "UK", 
+    "United States", 
+    "Austria", 
+    "Belgium", 
+    "Bulgaria", 
+    "Switzerland", 
+    "Cyprus", 
+    "Czech Republic", 
+    "Germany", 
+    "Denmark", 
+    "EA", 
+    "EA15", 
+    "EA16", 
+    "Estonia", 
+    "Spain", 
+    "EU15", 
+    "EU25", 
+    "EU27", 
+    "Finland", 
+    "France", 
+    "Greece", 
+    "Croatia", 
+    "Hungary", 
+    "Ireland", 
+    "Iceland", 
+    "Italy", 
+    "Japan", 
+    "Lithuania", 
+    "Luxembourg", 
+    "Latvia", 
+    "Malta", 
+    "Netherlands", 
+    "Norway", 
+    "Poland", 
+    "Portugal", 
+    "Romania", 
+    "Sweden", 
+    "Slovenia", 
+    "Slovakia", 
+    "Turkey", 
+    "UK"
   ], 
   "data": [
     [

File eurostat/test/test_data.py

-from eurostat.data import Data
+from eurostat.data import Data, ourcache
 
 class TestData:
-    def test_peeis(self):
+    def test_01_peeis(self):
         d = Data()
         out = d.peeis()
         assert len(out) == 22
 
-    def test_peeis_download(self):
+    def test_02_peeis_download(self):
         d = Data()
         d.peeis_download()
+    
+    def test_03_extract(self):
+        d = Data()
+        # gdp
+        tabular = d.extract(ourcache.cache_path('teina011.tsv'))
+        assert tabular.header[1] == 'Austria', tabular.header
+        # one with a different heading structure
+        tabular = d.extract(ourcache.cache_path('teibs010.tsv'))
+        assert tabular.header[-1] == 'UK', tabular.header