Commits

Chris Mutel committed bb32871

Major rewrite, with Database and Method objects; abstract serialization classes; method metadata in JSON

Comments (0)

Files changed (15)

 - import_method can't handle SimaPro XML files (where exchanges are numbered sequentially)
-- import_method should skip SimaPro XML files
 - RobustFinder to find processes or flows which aren't correctly specified (import scripts)
-- import_ecospold can't handle SimaPro XML files (where exchanges are numbered sequentially)
-- Manager.copy() should copy databases
+- Manager.copy() should copy databases and methods

brightway2/__init__.py

-from _config import Config
-config = Config()
-
-from _meta import Meta
-meta = Meta()
-
-from _mapping import Mapping
-mapping = Mapping()
-
-from _methods import Methods
-methods = Methods()
-
-from manager import Manager
+# -*- coding: utf-8 -*
+from _config import config
+from meta import databases, methods, mapping
+from database import Database
+from method import Method
 from query import Is, Isnt, Contains, In, Exchange, iIs, iIsnt, iIs, iContains

brightway2/_config.py

 
 class Config(object):
     def __init__(self, path=None):
-        self.dir = self.get_home_directory(path)
-        self.check_dir()
+        self.reset(path)
 
     def check_dir(self):
         pass
 
+    def reset(self, path=None):
+        """Reset to original configuration. Useful for testing."""
+        self.dir = self.get_home_directory(path)
+        self.check_dir()
+
     def get_home_directory(self, path):
         if path:
             return path
         self.check_dir()
 
     dir = property(_get_dir, _set_dir)
+
+
+config = Config()

brightway2/_mapping.py

-import os
-from . import config
-try:
-    import cPickle as pickle
-except ImportError:
-    import pickle
-
-
-class Mapping(object):
-    _filepath = os.path.join(config.dir, "mapping.pickle")
-
-    def __init__(self):
-        self.reload()
-
-    def reload(self):
-        try:
-            self._data = pickle.load(open(self._filepath, "rb"))
-        except IOError:
-            # Create if not present
-            self._data = {}
-            self.flush()
-
-    def add(self, keys):
-        index = max(self._data.values())
-        for i, key in enumerate(keys):
-            if key not in self._data:
-                self._data[key] = index + i + 1
-        self.flush()
-
-    def delete(self, keys):
-        for key in keys:
-            del self._data[key]
-        self.flush()
-
-    def flush(self):
-        with open(self._filepath, "wb") as f:
-            pickle.dump(self._data, f, protocol=pickle.HIGHEST_PROTOCOL)
-
-    def __getitem__(self, key):
-        return self._data[key]
-
-    def __setitem__(self, key, value):
-        raise NotImplemented
-
-    def __contains__(self, key):
-        return key in self._data

brightway2/_meta.py

-# -*- coding: utf-8 -*
-import os
-import json
-from . import config
-
-
-class Meta(object):
-    _filepath = os.path.join(config.dir, "meta.json")
-
-    def __init__(self):
-        self.reload()
-
-    def reload(self):
-        try:
-            self._data = json.load(open(self._filepath, "r"))
-        except IOError:
-            # Create if not present
-            self._data = {}
-            self.flush()
-
-    def add(self, name, data):
-        self._data[name] = data
-        self.flush()
-
-    def delete(self, name):
-        del self._data[name]
-        self.flush()
-
-    def flush(self):
-        with open(self._filepath, "w") as f:
-            json.dump(self._data, f, indent=2)
-
-    def increment_version(self, database):
-        self._data[database]["version"] += 1
-        self.flush()
-        return self._data[database]["version"]
-
-    @property
-    def databases(self):
-        return self._data
-
-    @property
-    def list(self):
-        return self._data.keys()
-
-    def version(self, database):
-        return self._data[database]["version"]
-
-    def __getitem__(self, key):
-        return self._data[key]
-
-    def __contains__(self, key):
-        return key in self._data

brightway2/_methods.py

-# -*- coding: utf-8 -*
-import os
-import numpy as np
-from . import config, mapping
-from utils import MAX_INT_32
-try:
-    import cPickle as pickle
-except:
-    import pickle
-
-
-class Methods(object):
-    _filepath = os.path.join(config.dir, "methods.pickle")
-
-    def __init__(self):
-        self.reload()
-
-    def reload(self):
-        try:
-            self._data = pickle.load(open(self._filepath, "r"))
-        except IOError:
-            # Create if not present
-            self._data = {}
-            self.flush()
-
-    def add(self, name, data):
-        self._data[name] = data
-        self.flush()
-
-    def delete(self, name):
-        del self._data[name]
-        self.flush()
-
-    def flush(self):
-        with open(self._filepath, "wb") as f:
-            pickle.dump(self._data, f, protocol=pickle.HIGHEST_PROTOCOL)
-
-    @property
-    def list(self):
-        return self._data.keys()
-
-    def __getitem__(self, key):
-        return self._data[key]
-
-    def __contains__(self, key):
-        return key in self._data
-
-    def process(self, name):
-        """Create numpy structured arrays for IA method"""
-        data_filepath = os.path.join(config.dir, "ia", "%s.pickle" % self[
-            name]["abbreviation"])
-        data = pickle.load(open(data_filepath, "rb"))
-        assert data
-        num_cfs = len(data)
-        dtype = [('uncertainty_type', np.uint8),
-            ('flow', np.uint32),
-            ('index', np.uint32),
-            ('amount', np.float32),
-            ('sigma', np.float32),
-            ('minimum', np.float32),
-            ('maximum', np.float32),
-            ('negative', np.bool)]
-        arr = np.zeros((num_cfs, ), dtype=dtype)
-        arr['minimum'] = arr['maximum'] = arr['sigma'] = np.NaN
-        for i, cf in enumerate(data):
-            arr[i] = (
-                0,
-                mapping[cf[0]],
-                MAX_INT_32,
-                cf[1],
-                np.NaN,
-                np.NaN,
-                np.NaN,
-                False
-                )
-        filepath = os.path.join(config.dir, "processed", "%s.pickle" % \
-            self[name]["abbreviation"])
-        with open(filepath, "wb") as f:
-            pickle.dump(arr, f, protocol=pickle.HIGHEST_PROTOCOL)

brightway2/database.py

+# -*- coding: utf-8 -*
+from . import databases, config, mapping
+from errors import MissingIntermediateData, UnknownObject
+import os
+import numpy as np
+from query import Query
+from utils import natural_sort, MAX_INT_32
+try:
+    import cPickle as pickle
+except ImportError:
+    import pickle
+
+
+class Database(object):
+    def __init__(self, database, *args, **kwargs):
+        self.database = database
+        if self.database not in databases:
+            print "Warning: %s not a currently installed database" % database
+
+    def query(self, *queries):
+        return Query(*queries)(self.load())
+
+    def copy(self, name):
+        # Todo: register copied method
+        raise NotImplemented
+        def relabel_exchanges(obj, keys):
+            for e in obj['exchanges']:
+                if e["input"] in data:
+                    e["input"] = (name, e["input"][1])
+            return obj
+        data = self.load()
+        data = dict([((name, k[1]), relabel_exchanges(v)) for k, v in data.iteritems()])
+        self.write(data, name)
+
+    def register(self, format, depends, num_processes):
+        assert self.database not in databases
+        databases[self.database] = {
+            "from format": format,
+            "depends": depends,
+            "number": num_processes,
+            "version": 0
+            }
+
+    def deregister(self):
+        del databases[self.database]
+
+    def write(self, data, name=None):
+        if self.database not in databases:
+            raise UnknownObject("This database is not yet registered")
+        databases.increment_version(self.database)
+        mapping.add(data.keys())
+        filename = "%s.%i.pickle" % (self.database,
+            databases.version(self.database))
+        filepath = os.path.join(config.dir, "intermediate", filename)
+        with open(filepath, "wb") as f:
+            pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL)
+
+    def load(self, version=None):
+        if self.database not in databases:
+            raise UnknownObject("This database is not yet registered")
+        files = filter(lambda x: ".".join(x.split(".")[:-2]) == self.database,
+            os.listdir(os.path.join(config.dir, "intermediate")))
+        if not files:
+            raise MissingIntermediateData("Can't load intermediate data")
+        if version == None:
+            return pickle.load(open(os.path.join(config.dir, "intermediate",
+                natural_sort(files)[-1]), "rb"))
+        else:
+            filepath = os.path.join(config.dir, "intermediate",
+                "%s.%i.pickle" % (self.database, version))
+            if not os.path.exists(filepath):
+                raise MissingIntermediateData("This version not found")
+            return pickle.load(open(filepath, "rb"))
+
+    def process(self, version=None):
+        """Create numpy structured arrays from database"""
+        data = self.load(version)
+        num_exchanges = sum([len(obj["exchanges"]) for obj in data.values()])
+        assert data
+        dtype = [('uncertainty_type', np.uint8),
+            ('input', np.uint32),
+            ('output', np.uint32),
+            ('row', np.uint32),
+            ('col', np.uint32),
+            ('technosphere', np.bool),
+            ('amount', np.float32),
+            ('sigma', np.float32),
+            ('minimum', np.float32),
+            ('maximum', np.float32),
+            ('negative', np.bool)]
+        arr = np.zeros((num_exchanges, ), dtype=dtype)
+        arr['minimum'] = arr['maximum'] = arr['sigma'] = np.NaN
+        count = 0
+        for key in data:
+            for exc in data[key]["exchanges"]:
+                arr[count] = (
+                    exc["uncertainty type"],
+                    mapping[exc["input"]],
+                    mapping[key],
+                    MAX_INT_32,
+                    MAX_INT_32,
+                    exc["technosphere"],
+                    exc["amount"],
+                    exc.get("sigma", np.NaN),
+                    exc.get("minimum", np.NaN),
+                    exc.get("maximum", np.NaN),
+                    exc["amount"] < 1
+                    )
+                count += 1
+
+        filepath = os.path.join(config.dir, "processed", "%s.pickle" % \
+            self.database)
+        with open(filepath, "wb") as f:
+            pickle.dump(arr, f, protocol=pickle.HIGHEST_PROTOCOL)
+
+    def __unicode__(self):
+        return u"Brightway2 database %s" % self.database
+
+    def __str__(self):
+        return self.__unicode__()

brightway2/errors.py

 
 class UnknownExchange(StandardError):
     pass
+
+
+class UnknownObject(StandardError):
+    pass

brightway2/io/import_ecospold.py

 # -*- coding: utf-8 -*
 from __future__ import division
-from brightway2 import Manager, mapping
+from brightway2 import Database, mapping
 from brightway2.logs import get_logger
 from brightway2.errors import UnknownExchange
 from lxml import objectify
 
         data = dict([((name, int(o["code"])), o) for o in data])
 
-        manager = Manager(name)
+        manager = Database(name)
         manager.register("Ecospold 1", depends, len(data))
         manager.write(data)
 
             data["exchanges"] = []
 
             # Write modified biosphere database
-            biosphere = Manager("biosphere")
+            biosphere = Database("biosphere")
             bio_data = biosphere.load()
             bio_data[("biosphere", code)] = data
             biosphere.write(bio_data)

brightway2/io/import_method.py

 # -*- coding: utf-8 -*
-from brightway2 import config, Manager, mapping, methods
+from brightway2 import Database, mapping, Method, methods
 from lxml import objectify
 import os
-import string
-import random
 try:
     import cPickle as pickle
 except:
     import pickle
 
 
-def abbreviate(names, length=8):
-    abbrev = lambda x: x if x[0] in string.digits else x[0].lower()
-    name = " ".join(names).split(" ")[0].lower() + \
-        "".join([abbrev(x) for x in " ".join(names).split(" ")[1:]])
-    random_string = ''.join(random.choice(string.letters + string.digits
-        ) for i in xrange(length))
-    return name + "-" + random_string
-
-
 def import_ia_dir(dirpath):
     for filename in filter(lambda x: x.lower()[-4:] == ".xml",
             os.listdir(dirpath)):
     """
     def __init__(self, filename):
         self.filename = filename
-        self.biosphere_data = Manager("biosphere").load()
+        self.biosphere_data = Database("biosphere").load()
         # Note that this is only used for the first root method found in
         # the file
         root = objectify.parse(open(self.filename)).getroot()
         ref_func = ds.metaInformation.processInformation.referenceFunction
         name = (ref_func.get("category"), ref_func.get("subCategory"),
             ref_func.get("name"))
-        abbreviation = abbreviate(name)
-        print abbreviation, name
-        filepath = os.path.join(config.dir, "ia", "%s.pickle" % abbreviation)
         description = ref_func.get("generalComment") or ""
         unit = ref_func.get("unit") or ""
-        data = []
+        data = {}
         for cf in ds.flowData.iterchildren():
             if ("biosphere", int(cf.get("number"))) not in mapping:
                 # Add new biosphere flow
                 new_flow["type"] = "resource" if resource else "emission"
 
                 # Write modified biosphere database
-                biosphere = Manager("biosphere")
+                biosphere = Database("biosphere")
                 bio_data = biosphere.load()
                 bio_data[("biosphere", code)] = new_flow
                 biosphere.write(bio_data)
                 return ("biosphere", code)
-            data.append((("biosphere", int(cf.get("number"))), float(cf.get(
-                "meanValue"))))
-        methods.add(name, {
-            'abbreviation': abbreviation,
-            'description': description,
-            'unit': unit
-            })
-        pickle.dump(data, open(filepath, "wb"),
-            protocol=pickle.HIGHEST_PROTOCOL)
-        methods.process(name)
+            data[("biosphere", int(cf.get("number")))] = float(
+                cf.get("meanValue"))
+        assert name not in methods
+        method = Method(name)
+        method.register(unit, description, len(data))
+        method.write(data)
+        method.process()

brightway2/manager.py

-# -*- coding: utf-8 -*
-from . import meta, config, mapping
-from errors import MissingIntermediateData
-import os
-import numpy as np
-from query import Query
-from utils import natural_sort, MAX_INT_32
-try:
-    import cPickle as pickle
-except ImportError:
-    import pickle
-
-
-class Manager(object):
-    def __init__(self, database, *args, **kwargs):
-        self.database = database
-        if self.database not in meta:
-            print "Warning: %s not a currently installed database" % database
-
-    def query(self, *queries):
-        return Query(*queries)(self.load())
-
-    def copy(self, name):
-        # TODO
-        self.write_database(self._data, name)
-
-    def register(self, format, depends, num_processes):
-        assert self.database not in meta
-        meta.add(self.database, {
-            "from format": format,
-            "depends": depends,
-            "number": num_processes,
-            "version": 0
-            })
-
-    def deregister(self):
-        meta.delete(self.database)
-
-    def write(self, data):
-        meta.increment_version(self.database)
-        mapping.add(data.keys())
-        filename = "%s.%i.pickle" % (self.database,
-            meta.version(self.database))
-        filepath = os.path.join(config.dir, "intermediate", filename)
-        with open(filepath, "wb") as f:
-            pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL)
-
-    def load(self, version=None):
-        files = filter(lambda x: ".".join(x.split(".")[:-2]) == self.database,
-            os.listdir(os.path.join(config.dir, "intermediate")))
-        if not files:
-            raise MissingIntermediateData("Can't load intermediate data")
-        if version == None:
-            return pickle.load(open(os.path.join(config.dir, "intermediate",
-                natural_sort(files)[-1]), "rb"))
-        else:
-            filepath = os.path.join(config.dir, "intermediate",
-                "%s.%i.pickle" % (self.database, version))
-            if not os.path.exists(filepath):
-                raise MissingIntermediateData("This version not found")
-            return pickle.load(open(filepath, "rb"))
-
-    def process(self, version=None):
-        """Create numpy structured arrays from database"""
-        data = self.load(version)
-        num_exchanges = sum([len(obj["exchanges"]) for obj in data.values()])
-        assert data
-        dtype = [('uncertainty_type', np.uint8),
-            ('input', np.uint32),
-            ('output', np.uint32),
-            ('row', np.uint32),
-            ('col', np.uint32),
-            ('technosphere', np.bool),
-            ('amount', np.float32),
-            ('sigma', np.float32),
-            ('minimum', np.float32),
-            ('maximum', np.float32),
-            ('negative', np.bool)]
-        arr = np.zeros((num_exchanges, ), dtype=dtype)
-        arr['minimum'] = arr['maximum'] = arr['sigma'] = np.NaN
-        count = 0
-        for key in data:
-            for exc in data[key]["exchanges"]:
-                arr[count] = (
-                    exc["uncertainty type"],
-                    mapping[exc["input"]],
-                    mapping[key],
-                    MAX_INT_32,
-                    MAX_INT_32,
-                    exc["technosphere"],
-                    exc["amount"],
-                    exc.get("sigma", np.NaN),
-                    exc.get("minimum", np.NaN),
-                    exc.get("maximum", np.NaN),
-                    exc["amount"] < 1
-                    )
-                count += 1
-
-        filepath = os.path.join(config.dir, "processed", "%s.pickle" % \
-            self.database)
-        with open(filepath, "wb") as f:
-            pickle.dump(arr, f, protocol=pickle.HIGHEST_PROTOCOL)

brightway2/meta.py

+from serialization import SerializedDict, PickledDict
+
+
+class Mapping(PickledDict):
+    _filename = "mapping.pickle"
+
+    def add(self, keys):
+        index = max(self._data.values())
+        for i, key in enumerate(keys):
+            if key not in self._data:
+                self._data[key] = index + i + 1
+        self.flush()
+
+    def delete(self, keys):
+        for key in keys:
+            del self._data[key]
+        self.flush()
+
+    def __setitem__(self, key, value):
+        raise NotImplemented
+
+    def __unicode__(self):
+        return u"Mapping from databases and methods to parameter indices."
+
+
+class Databases(SerializedDict):
+    _filename = "databases.json"
+
+    def increment_version(self, database):
+        self._data[database]["version"] += 1
+        self.flush()
+        return self._data[database]["version"]
+
+    def version(self, database):
+        return self._data[database]["version"]
+
+    def __unicode__(self):
+        return u"Brightway2 databases metadata with %i objects" % len(
+            self._data)
+
+
+class Methods(SerializedDict):
+    _filename = "methods.json"
+
+    def pack(self, data):
+        # Transform to list because JSON can't handle lists as keys
+        return [(k, v) for k, v in data.iteritems()]
+
+    def unpack(self, data):
+        # Tuples can be dict keys, but not lists; JSON can't encode tuples
+        return dict([(tuple(x[0]), x[1]) for x in data])
+
+    def __unicode__(self):
+        return u"Brightway2 methods metadata with %i objects" % len(
+            self._data)
+
+
+mapping = Mapping()
+databases = Databases()
+methods = Methods()

brightway2/method.py

+# -*- coding: utf-8 -*
+from . import config, mapping, methods
+from errors import UnknownObject, MissingIntermediateData
+from utils import MAX_INT_32
+import numpy as np
+import os
+import random
+import string
+try:
+    import cPickle as pickle
+except ImportError:
+    import pickle
+
+
+def abbreviate(names, length=8):
+    abbrev = lambda x: x if x[0] in string.digits else x[0].lower()
+    name = " ".join(names).split(" ")[0].lower() + \
+        "".join([abbrev(x) for x in " ".join(names).split(" ")[1:]])
+    random_string = ''.join(random.choice(string.letters + string.digits
+        ) for i in xrange(length))
+    return name + "-" + random_string
+
+
+class Method(object):
+    def __init__(self, method, *args, **kwargs):
+        self.method = method
+        if self.method not in methods:
+            print "Warning: %s not a currently installed method" % (
+                " : ".join(method))
+
+    def get_abbreviation(self):
+        try:
+            return methods[self.method]["abbreviation"]
+        except KeyError:
+            raise UnknownObject("This method is not yet registered")
+
+    def copy(self, name):
+        # Todo: This doesn't work yet
+        self.write(self.load(), name)
+
+    def register(self, unit, description="", num_cfs=0):
+        assert self.method not in methods
+        methods[self.method] = {
+            "abbreviation": abbreviate(self.method),
+            "unit": unit,
+            "description": description,
+            "num_cfs": num_cfs
+            }
+
+    def deregister(self):
+        del methods[self.method]
+
+    def write(self, data):
+        mapping.add(data.keys())
+        filepath = os.path.join(config.dir, "intermediate",
+            "%s.pickle" % self.get_abbreviation())
+        with open(filepath, "wb") as f:
+            pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL)
+
+    def load(self):
+        try:
+            return pickle.load(open(os.path.join(config.dir, "intermediate",
+                "%s.pickle" % self.get_abbreviation()), "rb"))
+        except OSError:
+            raise MissingIntermediateData("Can't load intermediate data")
+
+    def process(self):
+        """Create numpy structured arrays for IA method"""
+        data = pickle.load(open(os.path.join(config.dir, "intermediate",
+            "%s.pickle" % self.get_abbreviation()), "rb"))
+        assert data
+        dtype = [('uncertainty_type', np.uint8),
+            ('flow', np.uint32),
+            ('index', np.uint32),
+            ('amount', np.float32),
+            ('sigma', np.float32),
+            ('minimum', np.float32),
+            ('maximum', np.float32),
+            ('negative', np.bool)]
+        arr = np.zeros((len(data), ), dtype=dtype)
+        arr['minimum'] = arr['maximum'] = arr['sigma'] = np.NaN
+        for i, (key, value) in enumerate(data.iteritems()):
+            arr[i] = (
+                0,
+                mapping[key],
+                MAX_INT_32,
+                value,
+                np.NaN,
+                np.NaN,
+                np.NaN,
+                False
+                )
+        filepath = os.path.join(config.dir, "processed", "%s.pickle" % \
+            self.get_abbreviation())
+        with open(filepath, "wb") as f:
+            pickle.dump(arr, f, protocol=pickle.HIGHEST_PROTOCOL)

brightway2/serialization.py

+# -*- coding: utf-8 -*
+import os
+from . import config
+from time import time
+import json
+try:
+    import cPickle as pickle
+except ImportError:
+    import pickle
+
+
+class SerializedDict(object):
+    def __init__(self):
+        self._filepath = os.path.join(config.dir, self._filename)
+        self.load()
+
+    def load(self):
+        try:
+            self._data = self.deserialize()
+        except IOError:
+            # Create if not present
+            self._data = {}
+            self.flush()
+
+    def flush(self):
+        self.serialize()
+
+    @property
+    def data(self):
+        return self._data
+
+    @property
+    def list(self):
+        return self._data.keys()
+
+    def __getitem__(self, key):
+        return self._data[key]
+
+    def __setitem__(self, key, value):
+        self._data[key] = value
+        self.flush()
+
+    def __contains__(self, key):
+        return key in self._data
+
+    def __str__(self):
+        return self.__unicode__()
+
+    def __delitem__(self, name):
+        del self._data[name]
+        self.flush()
+
+    def serialize(self, filepath=None):
+        with open(filepath or self._filepath, "w") as f:
+            json.dump(self.pack(self._data), f, indent=2)
+
+    def deserialize(self):
+        return self.unpack(json.load(open(self._filepath, "r")))
+
+    def pack(self, data):
+        return data
+
+    def unpack(self, data):
+        return data
+
+    def backup(self):
+        """Write a backup version of the data to backups directory"""
+        filepath = os.path.join(config.dir, "backups",
+            self._filename + ".%s.backup" % int(time()))
+        self.serialize(filepath)
+
+
+class PickledDict(SerializedDict):
+    def serialize(self):
+        with open(self._filepath, "wb") as f:
+            pickle.dump(self.pack(self._data), f,
+                protocol=pickle.HIGHEST_PROTOCOL)
+
+    def deserialize(self):
+        return self.unpack(pickle.load(open(self._filepath, "rb")))
   author="Chris Mutel",
   author_email="cmutel@gmail.com",
   license=open('LICENSE.txt').read(),
+  requires=["voluptuous", "nose"],
+  url="https://bitbucket.org/cmutel/brightway2",
   long_description=open('README.txt').read(),
 )