Chris Mutel avatar Chris Mutel committed 6875e1e

Even more data cleaning for US LCI database

Comments (0)

Files changed (2)

bw2data/io/import_ecospold.py

 
 BIOSPHERE = ("air", "water", "soil", "resource")
 
+widgets = [
+    progressbar.SimpleProgress(sep="/"), " (",
+    progressbar.Percentage(), ') ',
+    progressbar.Bar(marker=progressbar.RotatingMarker()), ' ',
+    progressbar.ETA()
+]
+
 
 class Ecospold1DataExtractor(object):
     @classmethod
                 lambda x: x[-4:].lower() == ".xml", os.listdir(path))]
         else:
             files = [path]
-        widgets = ['Extracting data: ', progressbar.Percentage(), ' ',
-            progressbar.Bar(marker=progressbar.RotatingMarker()), ' ',
-            progressbar.ETA()]
+
+        if not files:
+            raise OSError("Provided path doesn't appear to have any XML files")
+
         pbar = progressbar.ProgressBar(widgets=widgets, maxval=len(files)
             ).start()
 
         ref_func = dataset.metaInformation.processInformation.\
             referenceFunction
         data = {
-            "name": ref_func.get("name"),
+            "name": ref_func.get("name").strip(),
             "type": "process",  # True for all ecospold?
             "categories": [ref_func.get("category"), ref_func.get(
                 "subCategory")],
                 "categories": (exc.get("category"), exc.get("subCategory")),
                 "location": exc.get("location"),
                 "unit": normalize_units(exc.get("unit")),
-                "name": exc.get("name")
+                "name": exc.get("name").strip()
                 }
             }
 
     Does not have any arguments; instead, instantiate the class, and then import using the ``importer`` method, i.e. ``Ecospold1Importer().importer(filepath)``.
 
     """
-    def importer(self, path, name, depends=[config.biosphere]):
+    def importer(self, path, name, depends=[config.biosphere], remapping={}):
         """Import an inventory dataset, or a directory of inventory datasets.
 
         .. image:: images/import-method.png
         self.log, self.logfile = get_io_logger("lci-import")
         self.new_activities = []
         self.new_biosphere = []
+        self.remapping = remapping
 
         data = Ecospold1DataExtractor.extract(path, self.log)
         data = self.allocate_datasets(data)
             warnings.warn("No data found in XML file %s" % path)
             return
 
-        widgets = ['Linking exchanges:', progressbar.Percentage(), ' ',
-            progressbar.Bar(marker=progressbar.RotatingMarker()), ' ',
-            progressbar.ETA()]
         pbar = progressbar.ProgressBar(widgets=widgets, maxval=len(data)
             ).start()
 
 
         """
         coproduct_codes = [exc["code"] for exc in ds["exchanges"] if exc.get(
-            "group", None) == 2]
+            "group", None) in (0, 2)]
         coproducts = dict([(x, copy.deepcopy(ds)) for x in coproduct_codes])
         exchanges = dict([(exc["code"], exc) for exc in ds["exchanges"
             ] if "code" in exc])
         return np.allclose(np.diff(codes), np.ones(np.diff(codes).shape))
 
     def link_exchange(self, exc, ds, data, depends, name):
+        """`name`: Name of database"""
+        if exc["matching"]["name"] in self.remapping:
+            exc["matching"]["name"] = self.remapping[exc["matching"]["name"]]
         # Has to happen before others because US LCI doesn't define categories
         # for product definitions...
         if exc.get("group", None) == 0:
             return exc
         # Hack for US LCI-specific bug - both "Energy recovered"
         # and "Energy, recovered" are present
-        elif exc["matching"]["categories"] == () and \
+        elif exc.get("group", None) == 1 and \
+            exc["matching"]["categories"] == () and \
                 exc["matching"]["name"] == "Recovered energy":
             exc["matching"].update(
                 name="Energy, recovered",

bw2data/io/remapping.py

+# -*- coding: utf-8 -*
+
+US_LCI = {
+    "Bark, hardwood, average, at forest road, NE-NC": "Bark, hardwood, average, at forest road,  NE-NC",
+    "Bucked log, hardwood, green, at veneer mill, E": "Bucked and debarked log, hardwood, green, at veneer mill, E",
+    "Chlorine, PVC producer average, at plant": "Chlor-alkali electrolysis, average production mix, at plant",  # Best guess?
+    "Compressed natural gas, at plant": "Natural gas, processed, at plant",
+    "Corn stover, production, average,US, 2022": "Corn stover, production, average, US, 2022",
+    "corn wet milling operations, AP-42": "corn wet milling, operations, AP-42",
+    "Electricity, as grid, US, 2008": "Electricity, at Grid, US, 2008",
+    "Electricity, at cogen, natural gas turbine": "Electricity, at cogen, for natural gas turbine",
+    "Electricity, at grid, Eastern US": "Electricity, at grid, Eastern US, 2000",
+    "Electricity, at grid, US": "Electricity, at Grid, US, 2008",
+    "Electricity, at grid, Western US": "Electricity, at grid, Western US, 2000",
+    "Electricity, cogenerated, at plant": "Electricity, at cogen, for natural gas turbine",  # Best guess?
+    "Electricity, onsite boiler, hardwood mill average, SE": "Electricity, onsite boiler, hardwood mill, average, SE",
+    "Electricity, onsite boiler, hardwood mill, average, NE-NC": "Electricity, onsite boiler, hardwood mill average, NE-NC",
+    "Electricity, onsite boiler, softwood mill average, NE-NC": "Electricity, onsite boiler, softwood mill, average, NE-NC",
+    "ethanol, denatured, forest residues, thermochemical": "Ethanol, denatured, forest residues, thermochem",
+    "Fuel wood, hardwood, green, at veneer mill, E": "Wood fuel, hardwood, green, at veneer mill, E",
+    "Head, indirect, heated zones, softwood, plywood veneer drying, AP-42": "indirect heated, heated zones, softwood, plywood veneer drying, AP-42",
+    "Heat, Block conditioning, at veneer mill, E": "Heat, block conditiong, at veneer mill, E",
+    "Heat, Drying veneer, hardwood, at veneer mill,  E": "Heat, drying veneer, hardwood, at veneer mill, E",
+    "Heat, onsite boiler, hardwood mill average, SE": "Heat, onsite boiler, hardwood mill, average, SE",
+    "Heat, onsite boiler, hardwood mill, average, NE-NC": "Heat, onsite boiler, hardwood mill average, NE-NC",
+    "Natural gas, processed, for olefins production, at plant, internal offgas use": "Natural gas, processed, for olefins production, at plant",
+    "Natural gas, processed, for olefins production, at plant, material use": "Natural gas, processed, for olefins production, at plant",
+    "Palm kernel oil, crude, at plant": "Crude palm kernel oil, at plant",
+    "Petroleum refining, for olefins production, at plant, internal offgas use": "Petroleum refining, for olefins production, at plant",
+    "Petroleum refining, for olefins production, at plant, material use": "Petroleum refining, for olefins production, at plant",
+    "Sawn lumber, hardwood, rough, kiln dried, at kiln, NE-NC  ": "Sawn lumber, hardwood, rough, kiln dried, at kiln, NE-NC",
+    "Spring wheat straw, ground and stored": "spring wheat straw, ground and stored, 2022",
+    "Switchgrass, production, average, US, 2022": "Switchgrass, production, US, 2022",
+    "tillage, corn, conservation": "tillage, conversation, corn production",
+    "tillage, corn, intensive": "tillage, intensive, corn production",
+    "tillage, corn, reduced": "tillage, reduce, corn production",
+    "Transport, combination truck": "Transport, combination truck, average fuel mix",
+    "Transport, combination truck, diesel powered ": "Transport, combination truck, diesel powered",
+    "Transport, ocean tanker, average fuel mix": "Transport, ocean freighter, average fuel mix",
+    "Transport, pipeline, unspecified": "Transport, pipeline, unspecified petroleum products",
+    "Transport, single-unit truck": "Transport, single unit truck, diesel powered",  # No average fuel mix - guess for diesel?
+    "Wood, hardwood, generated at mill, combusted in industrial boiler, E": "Wood fuel, E hardwood, generated at mill, combusted in industrial boiler",
+    "Wood, hardwood, purchased, combusted in industrial boiler, E": "Wood fuel, E hardwood, purchased, combusted in industrial boiler, E",
+    "Wood, NE-NC hardwood, generated at lumber mill, combusted in industrial boiler": "Wood fuel, NE-NC hardwood, gen at lumber mill, combusted in industrial boiler",
+    "Wood, NE-NC hardwood, purchased, combusted in industrial boiler": "Wood fuel, NE-NC hardwood, purchased, combusted in industrial boiler",
+    'decomposition, corn, 15.5% moisture basis': "Corn, decomposition, 15.5% moisture",
+}
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.