Commits

Chris Mutel committed 0058276

0.9 branch: Better IO

Comments (0)

Files changed (6)

brightway2/io/export_database.py

+# -*- coding: utf-8 -*-
+import os
+import bz2
+from ..serialization import JsonWrapper
+from .. import config, Database, databases
+
+
+class DatabaseExport(object):
+    """Export database to Brightway2 format, which is bz2-compressed JSON. Pickle can't be used because of security reasons.
+
+    Brightway2 format for databases is:
+
+    .. code-block::
+
+        {
+            "meta":
+                {
+                "name": database_name,
+                "depends": [list_of_databases],
+                "version": version_number,
+                "from format": format,
+                "bw2 version": bw2_version
+                },
+            "data": [
+                [database_name, process_id], [process_data],
+                ]
+        }
+
+    """
+    def exporter(self, name):
+        """Export a database to Brightway2 format.
+
+        Args:
+            *name* (str): Name of database to export.
+
+        Returns:
+            Filepath of exported database.
+
+        """
+        assert name in databases, "Database %s not found" % name
+        meta = databases[name]
+        data = Database(name).load().iteritems()
+        version = config.version
+        processed = {
+            "meta": {
+                "name": name,
+                "depends": meta["depends"],
+                "version": meta["version"],
+                "from format": meta["from format"],
+                "bw2 version": version
+                },
+            "data": data
+            }
+        dirname = config.request_dict("export")
+        assert dirname, "No suitable directory for export found"
+        filepath = os.path.join(dirname, name + ".json.bz2")
+        with bz2.BZ2File(filepath, "w") as f:
+            f.write(JsonWrapper.dumps(processed))
+        return filepath

brightway2/io/import_database.py

+# -*- coding: utf-8 -*-
+from .. import Database
+from ..logs import get_io_logger
+from ..serialization import JsonWrapper
+from ..validate import db_validator
+import bz2
+import os
+
+
+class DatabaseImporter(object):
+    def importer(self, path):
+        data = JsonWrapper.loads(bz2.BZ2File(path, "r").read())
+        for o in data:
+            # Do validation of all incoming data first
+            db_validator(o["data"])
+        logger, logfile = get_io_logger(os.get_filename(path))
+        report_needed = sum([self.update_database(
+            o["data"], o["meta"], logger) for o in data])
+        return report_needed, logfile
+
+    def update_database(self, data, meta, logger):
+        name = meta["name"]
+        try:
+            old = Database(name)
+            odata = old.load()
+        except IOError:
+            # Create new database
+            database = Database(name)
+            database.register(
+                format="Brightway2 internal",
+                depends=meta["depends"],
+                num_processes=len(data))
+            database.write(data)
+            database.process()
+            # No messages about this import
+            logger.info(u"Database %s created" % name)
+            return False
+
+        # First, see if data will be overwritten
+        if self.no_difference(odata, data):
+            logger.debug(u"DEBUG: Database %s not changed" % name)
+            return False
+
+        # There are changes to be made; log them
+        for message in self.get_changes(odata, data):
+            logger.info(message)
+
+        # Finally, update data
+        odata.update(**data)
+        old.write(odata)
+        old.process()
+        return True
+
+    def no_difference(self, d, e):
+        """Test if ``d`` properly contains ``e``.
+
+        Note that ``d`` can have *more* than ``e``."""
+        for key in e:
+            if key in d and d[key] == e[key]:
+                continue
+            else:
+                return False
+        return True
+
+    def get_changes(self, old, new):
+        messages = []
+        for key in new:
+            if key not in old:
+                messages.append(u"Process %s (%s) added" % (
+                    new[key]["name"], key))
+            elif new[key] == old[key]:
+                continue
+            else:
+                attrs = []
+                for attr in new[key]:
+                    if new[key][attr] != old[key].get(attr, None):
+                        attrs.append(attr)
+                assert attrs
+                messages.append(u"Process %s update the following:\n\t%s" % (
+                    new[key]["name"], u", ".join(attrs)))
+
+        return messages

brightway2/io/robust_import_ecospold.py

+# -*- coding: utf-8 -*
+from . import EcospoldImporter
+from fuzzywuzzy import process
+from ..query import Query, Filter, Dictionaries
+
+units_map = {}
+geo_map = {}
+
+
+class RobustEcospoldImporter(EcospoldImporter):
+    """A subclass that assumes that id numbers are meaningless, and exchange inputs must be searched for each time to be matched."""
+
+    def _match_process(self, data, child_dbs, logger):
+        # First, make sure location and unit match (allow for mapping
+        # between systems)
+        possibles = Query(Filter("location",
+            geo_map.get(data["location"], [data["location"]]),
+            "iin"),
+            Filter("unit",
+            units_map.get(data["unit"], [data["unit"]]),
+            "iin"))(Dictionaries(child_dbs)).result
+        # Next, perform fuzzy matching on the name
+        names = [x["name"] for x in possibles.values()]
+        matches = process.extract(data["name"], names, limit=5)
+
+
+
+# BIOSPHERE = ("air", "water", "soil", "resource")
+
+
+# class EcospoldImporter(object):
+#     def importer(self, path, name, depends=["biosphere", ]):
+#         data = []
+#         log = get_logger(name)
+#         log.critical(u"Starting import of %s (from %s)" % (name, path))
+#         if os.path.isdir(path):
+#             files = [os.path.join(path, filter(lambda x: x[-4:].lower(
+#                 ) == ".xml", os.listdir(path)))]
+#         else:
+#             files = [path]
+
+#         widgets = ['Files: ', progressbar.Percentage(), ' ',
+#             progressbar.Bar(marker=progressbar.RotatingMarker()), ' ',
+#             progressbar.ETA()]
+#         pbar = progressbar.ProgressBar(widgets=widgets, maxval=len(files)
+#             ).start()
+
+#         for index, filename in enumerate(files):
+#             root = objectify.parse(open(filename)).getroot()
+
+#             if root.tag != '{http://www.EcoInvent.org/EcoSpold01}ecoSpold':
+#                 # Unrecognized file type
+#                 log.critical(u"skipping %s - no ecoSpold element" % filename)
+#                 continue
+
+#             for dataset in root.iterchildren():
+#                 data.append(self._process_dataset(dataset))
+
+#             pbar.update(index)
+
+#         # Hackish
+#         for o in data:
+#             try:
+#                 o["code"] = int(o["code"])
+#             except:
+#                 pass
+
+#         # Fix exchanges
+#         codes = set([o["code"] for o in data])
+#         for ds in data:
+#             for exc in ds["exchanges"]:
+#                 code = exc["code"]
+#                 # Hack - not work with others?
+#                 try:
+#                     code = int(code)
+#                 except:
+#                     pass
+#                 if code in codes:
+#                     exc["input"] = (name, code)
+#                 else:
+#                     exc["input"] = self._find_in_dependent_database(code,
+#                         exc, depends)
+#                 exc["technosphere"] = exc["input"][0] != "biosphere"
+
+#         data = dict([((name, int(o["code"])), o) for o in data])
+
+#         manager = Database(name)
+#         manager.register("Ecospold 1", depends, len(data))
+#         manager.write(data)
+
+#         pbar.finish()
+
+#     def _find_in_dependent_database(self, code, exc, depends):
+#         for db in depends:
+#             if (db, code) in mapping:
+#                 return (db, code)
+
+#         # Add new biosphere flow if needed
+#         if exc["_matching"].get("categories", [None, ])[0] in BIOSPHERE:
+#             data = exc["_matching"]
+
+#             # Emission or resource
+#             resource = data["categories"][0] == "resource"
+#             data["type"] = "resource" if resource else "emission"
+
+#             # Biosphere flows don't have locations or exchanges
+#             del data["location"]
+#             data["exchanges"] = []
+
+#             # Write modified biosphere database
+#             biosphere = Database("biosphere")
+#             bio_data = biosphere.load()
+#             bio_data[("biosphere", code)] = data
+#             biosphere.write(bio_data)
+#             return ("biosphere", code)
+#         raise UnknownExchange(("The exchange %s couldn't be " + \
+#             "matched to this or a depending database") % code)
+
+#     def _process_dataset(self, dataset):
+#         data = {}
+#         ref_func = dataset.metaInformation.processInformation.\
+#             referenceFunction
+
+#         data["name"] = ref_func.get("name")
+#         data["type"] = "process"  # True for all ecospold?
+#         data["categories"] = [ref_func.get("category"), ref_func.get(
+#             "subCategory")]
+#         # Convert ("foo", "unspecified") to ("foo",)
+#         while data["categories"][-1] == "unspecified":
+#             data["categories"] = data["categories"][:-1]
+#         data["location"] = dataset.metaInformation.processInformation.\
+#             geography.get("location")
+#         data["code"] = dataset.get("number")
+#         data["unit"] = ref_func.get("unit")
+#         data["exchanges"] = self._process_exchanges(dataset)
+#         return data
+
+#     def _process_exchanges(self, dataset):
+#         data = []
+#         # Skip definitional exchange - we assume this already
+#         for exc in dataset.flowData.iterchildren():
+#             if exc.get("name") == dataset.metaInformation.processInformation.\
+#                     referenceFunction.get("name") != None and float(
+#                     exc.get("meanValue", 0.)) == 1.0:
+#                 continue
+
+#             this = {
+#                 "code": int(exc.get("number")),
+#                 "_matching": {
+#                     "categories": (exc.get("category"), exc.get("subCategory")),
+#                     "location": exc.get("location"),
+#                     "unit": exc.get("unit"),
+#                     "name": exc.get("name")
+#                     }
+#                 }
+
+#             if exc.get("generalComment"):
+#                 this["pedigree matrix"] = exc.get("generalComment")
+
+#             uncertainty = int(exc.get("uncertaintyType", 0))
+#             mean = exc.get("meanValue")
+#             min_ = exc.get("minValue")
+#             max_ = exc.get("maxValue")
+#             sigma = exc.get("standardDeviation95")
+
+#             if uncertainty == 1:
+#                 # Lognormal
+#                 this.update({
+#                     'uncertainty type': LognormalUncertainty.id,
+#                     'amount': float(mean),
+#                     'sigma': math.log(math.sqrt(float(sigma)))
+#                     })
+#                 if this['sigma'] == 0:
+#                     # Bad ecoinvent data
+#                     this['uncertainty type'] = UndefinedUncertainty.id
+#                     del this["sigma"]
+#             elif uncertainty == 2:
+#                 # Normal
+#                 this.update({
+#                     'uncertainty type': NormalUncertainty.id,
+#                     'amount': float(mean),
+#                     'sigma': float(sigma) / 2
+#                     })
+#             elif uncertainty == 3:
+#                 # Triangular
+#                 this.update({
+#                     'uncertainty type': TriangularUncertainty.id,
+#                     'minimum': float(min_),
+#                     'maximum': float(max_)
+#                     })
+#                 # Sometimes this isn't included (though it SHOULD BE)
+#                 if exc.get("mostLikelyValue"):
+#                     this['amount'] = float(exc.get("mostLikelyValue"))
+#                 else:
+#                     this['amount'] = float(mean)
+#             elif uncertainty == 4:
+#                 # Uniform
+#                 this.update({
+#                     'uncertainty type': UniformUncertainty.id,
+#                     'amount': float(mean),
+#                     'minimum': float(min_),
+#                     'maximum': float(max_)
+#                     })
+#             else:
+#                 # None
+#                 this.update({
+#                     'uncertainty type': UndefinedUncertainty.id,
+#                     'amount': float(mean)
+#                 })
+
+#             data.append(this)
+
+#         # Sort for consistent order to make import comparisons easier
+#         data.sort(key=lambda x: x["input"])
+#         return data
 # built documents.
 #
 # The short X.Y version.
-version = '0.7'
+version = '0.9'
 # The full version, including alpha/beta/rc tags.
-release = '0.7'
+release = '0.9'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
 numpy
 nose
 progressbar
-voluptuous
+voluptuous
+fuzzywuzzy
 
 setup(
   name='brightway2',
-  version="0.7",
+  version="0.9",
   packages=packages,
   author="Chris Mutel",
   author_email="cmutel@gmail.com",
   license=open('LICENSE.txt').read(),
-  requires=["voluptuous", "nose", "progressbar", "numpy", "lxml"],
+  requires=["voluptuous", "nose", "progressbar", "numpy", "lxml", "fuzzywuzzy"],
   url="https://bitbucket.org/cmutel/brightway2",
   long_description=open('README').read(),
 )