Commits

Chris Mutel committed 35e2755

0.10.1: bw2-uptodate, Gephi graph shortcuts, better ecospold2 importing, better Simapro importing, more complete units normalization, colored text, more documentation

  • Participants
  • Parent commits b31dad5

Comments (0)

Files changed (25)

File bw2data/__init__.py

 # -*- coding: utf-8 -*
+__version__ = (0, 10, 1)
+
 from _config import config
-from meta import databases, methods, mapping, reset_meta, geomapping
+from meta import databases, methods, mapping, reset_meta, geomapping, \
+    weightings, normalizations
 from serialization import JsonWrapper
 from database import Database
 from method import Method
 import validate
 import io
 
-__version__ = (0, 9, 3, 3)
+from upgrades import check_status
+check_status()

File bw2data/_config.py

         self.reset(path)
         self.cache = {}
 
-    def check_dir(self, dir=None):
+    def check_dir(self, directory=None):
         """Check is directory is a directory and writeable."""
-        return os.path.isdir(self.dir) and os.access(dir or self.dir, os.W_OK)
+        return os.path.isdir(self.dir) and \
+            os.access(directory or self.dir, os.W_OK)
 
     def reset(self, path=None):
         """Reset to original configuration. Useful for testing."""
                 self.dir, "preferences.json")))
         except:
             self.p = {"use_cache": True}
+            self.save_preferences()
 
     def save_preferences(self):
         """Serialize preferences to disk."""

File bw2data/bin/bw2-uptodate.py

+#!/usr/bin/env python
+# encoding: utf-8
+"""Brightway2 updating made simple.
+
+Usage:
+  bw2-uptodate.py
+  bw2-uptodate.py --list
+  bw2-uptodate.py -h | --help
+  bw2-uptodate.py --version
+
+Options:
+  --list        List the updates needed, but don't do anything
+  -h --help     Show this screen.
+  --version     Show version.
+
+"""
+from docopt import docopt
+import sys
+import warnings
+with warnings.catch_warnings():
+    warnings.simplefilter("ignore")
+    from bw2data import config
+    from bw2data.upgrades import *
+    from bw2data.utils import Fore
+
+
+EXPLANATIONS = {
+    "stats_array reformat": Fore.GREEN + "\nstats_array reformat:" + Fore.RESET + """
+    Upgrading to the ``stats_arrays`` package changes the data format of both inventory databases and impact assessment methods.
+    Read more about the stats_arrays data format: """ + Fore.BLUE + \
+        "\n\thttps://stats_arrays.readthedocs.org/en/latest/\n" + Fore.RESET,
+    "0.10 units restandardization": Fore.GREEN + "0.10 units restandardization:" + Fore.RESET + """
+    Brightway2 tries to normalize units so that they are consistent from machine to machine, and person to person. For example, ``m2a`` is changed to ``square meter-year``. This update adds more data normalizations, and needs to updates links across databases.""",
+}
+
+class Updater(object):
+    def needed(self):
+        try:
+            import stats_arrays
+        except ImportError:
+            warnings.warn(STATS_ARRAY_WARNING)
+            sys.exit(0)
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            updates_needed = check_status()
+        return updates_needed
+
+    def list(self):
+        updates_needed = self.needed()
+        if not updates_needed:
+            print Fore.GREEN + "\n*** Brightway2 is up to date! ***\n" + \
+                Fore.RESET
+        else:
+            print Fore.RED + "\n*** Updates found ***" + Fore.RESET
+            for update in updates_needed:
+                print EXPLANATIONS[update]
+            print Fore.RED + "\n*** Action needed ***" + Fore.RESET + \
+                "\nPlease run " + Fore.BLUE + "bw2-uptodate.py\n" + Fore.RESET
+
+    def update(self, confirm=True):
+        updates_needed = self.needed()
+
+        if updates_needed:
+            print Fore.GREEN + "\nThe following upgrades will be applied:\n"
+            for update in updates_needed:
+                print EXPLANATIONS[update]
+            if confirm:
+                confirmation = raw_input("\nType '" + Fore.MAGENTA  + "y" + \
+                    Fore.RESET + "'to confirm, " + Fore.RED + "anything else" + \
+                    Fore.RESET + " to cancel: "
+                )
+                if confirmation.strip() != 'y':
+                    print Fore.MAGENTA + "\n*** Upgrade canceled ***\n" + \
+                        Fore.RESET
+                    sys.exit(0)
+
+            if "stats_array reformat" in updates_needed:
+                convert_from_stats_toolkit()
+                config.p["upgrades"]["stats_array reformat"] = True
+            if "0.10 units restandardization" in updates_needed:
+                units_renormalize()
+                config.p["upgrades"]["0.10 units restandardization"] = True
+            config.save_preferences()
+        else:
+            print Fore.GREEN + "\n*** Brightway2 is up to date! ***\n" + \
+                Fore.RESET
+
+
+if __name__ == "__main__":
+    config.create_basic_directories()
+    args = docopt(__doc__, version='Brightway2 up to date 0.1')
+    updater = Updater()
+    if args['--list']:
+        updater.list()
+    else:
+        updater.update()

File bw2data/database.py

     def copy(self, name):
         """Make a copy of the database.
 
-        Internal links within the database will be updated to match the new database name.
+        Internal links within the database will be updated to match the new database name, i.e. ``("old name", "some id")`` will be converted to ``("new name", "some id")`` for all exchanges.
 
         Args:
-            * *name* (str): Name of the new database.
+            * *name* (str): Name of the new database. Must not already exist.
 
         """
         assert name not in databases, ValueError("This database exists")
         return u"Brightway2 database %s" % self.database
 
     def __str__(self):
-        return self.__unicode__()
+        return unicode(self).encode('utf-8')

File bw2data/ia_data_store.py

         return u"%s: %s" % (self.label.title(), u"-".join(self.name))
 
     def __str__(self):
-        return self.__unicode__()
+        return unicode(self).encode('utf-8')
 
     def get_abbreviation(self):
         """Abbreviate a method identifier (a tuple of long strings) for a filename. Random characters are added because some methods have similar names which would overlap when abbreviated."""

File bw2data/io/__init__.py

 from .bw2package import BW2PackageExporter, BW2PackageImporter, \
     download_biosphere, download_methods
-from .export_gexf import DatabaseToGEXF
-from .import_ecospold import Ecospold1Importer as EcospoldImporter
+from .export_gexf import DatabaseToGEXF, DatabaseSelectionToGEXF, keyword_to_gephi_graph
+from .import_ecospold import Ecospold1Importer
 from .import_method import EcospoldImpactAssessmentImporter
 # from import_ecospold2 import Ecospold2Importer
 from .import_simapro import SimaProImporter

File bw2data/io/export_gexf.py

 # -*- coding: utf-8 -*
-from .. import config, Database
+from .. import config, Database, Filter
 from lxml.builder import ElementMaker
 from lxml.etree import tostring
 import datetime
 
 
 class DatabaseToGEXF(object):
+    """Export a Gephi graph for a database.
+
+    Call ``.export()`` to export the file after class instantiation.
+
+    Args:
+        * *database* (str): Database name.
+        * *include_descendants* (bool): Include databases which are linked from ``database``.
+
+    .. warning:: ``include_descendants`` is not yet implemented.
+
+    """
     def __init__(self, database, include_descendants=False):
         self.database = database
         self.descendants = include_descendants
             self.data)])
 
     def export(self):
+        """Export the Gephi XML file. Returns the filepath of the created file."""
         E = ElementMaker(namespace="http://www.gexf.net/1.2draft",
             nsmap={None: "http://www.gexf.net/1.2draft"})
         meta = E.meta(E.creator("Brightway2"), E.description(self.database),
         return self.filepath
 
     def get_data(self, E):
+        """Get Gephi nodes and edges."""
         count = itertools.count()
         nodes = []
         edges = []
 
 
 class DatabaseSelectionToGEXF(DatabaseToGEXF):
+    """Export a Gephi graph for a selection of activities from a database.
+
+    Also includes all inputs for the filtered activities.
+
+    Args:
+        * *database* (str): Database name.
+        * *keys* (str): The activity keys to export.
+
+    """
     def __init__(self, database, keys):
         self.database = database
         self.filepath = os.path.join(config.request_dir("output"),
         self.data = {key: value for key, value in unfiltered_data.iteritems() if key in keys}
         self.id_mapping = dict([(key, str(i)) for i, key in enumerate(
             self.data)])
+
+
+def keyword_to_gephi_graph(database, keyword):
+    """Export a Gephi graph for a database for all activities which include the string ``keyword``, all all inputs for the filtered activities.
+
+    Args:
+        * *database* (str): Database name.
+        * *keyword* (str): Keyword to search for.
+
+    Returns:
+        The filepath of the exported file.
+
+    """
+    query = Database(database).query(Filter("name", "in", keyword))
+    return DatabaseSelectionToGEXF(database, set(query.keys())).export()

File bw2data/io/import_ecospold.py

         max_ = floatish(exc.get("maxValue"))
         sigma = floatish(exc.get("standardDeviation95"))
 
+        if uncertainty == 1 and sigma in (0, 1):
+            # Bad data
+            uncertainty = 0
+
         if uncertainty == 1:
             # Lognormal
             data.update({
                 'scale': math.log(math.sqrt(float(sigma))),
                 'negative': mean < 0,
             })
-            if data['scale'] == 0 or np.isnan(data['scale']):
-                # Bad ecoinvent data
+            if np.isnan(data['scale']):
+                # Bad data
                 data['uncertainty type'] = UndefinedUncertainty.id
                 data['loc'] = data['amount']
                 del data["scale"]

File bw2data/io/import_ecospold2.py

 # -*- coding: utf-8 -*
 from __future__ import division
 from .. import Database, databases, mapping
-# from ..logs import get_io_logger
+from ..logs import get_io_logger
 from ..units import normalize_units
 from lxml import objectify, etree
 from stats_arrays.distributions import *
 import os
+import pprint
 import progressbar
 import warnings
 
         self.name = name
 
     def importer(self):
+        self.log, self.logfile = get_io_logger("es3-import")
         # Note: Creates biosphere3 database
         activities, biosphere, technosphere = Ecospold2DataExtractor().extract(
             self.datapath,
             self.metadatapath
         )
-        self.file = open("exchange-weirdness.txt", "w")
         self.create_biosphere3_database(biosphere)
         self.create_database(biosphere, technosphere, activities)
 
         if "biosphere3" in databases:
             del databases["biosphere3"]
 
+        print "Writing new biosphere database"
         with warnings.catch_warnings():
             warnings.simplefilter("ignore")
             db = Database("biosphere3")
             db.process()
 
     def create_database(self, biosphere, technosphere, activities):
+        print "Processing database"
         for elem in activities:
             elem["unit"] = ""
             elem["type"] = "product"
                     exc['type'] = 'technosphere'
                     exc['input'] = (self.name, exc['activity'])
                 if exc['input'][1] is None:
+                    # This exchange wasn't linked correctly by ecoinvent
+                    # It is missing the "activityLinkId" attribute
+                    # See http://www.ecoinvent.org/database/ecoinvent-version-3/reports-of-changes/known-data-issues/
+                    # We ignore it for now, but add attributes to log it later
                     exc['input'] = None
+                    exc['activity filename'] = elem['filename']
+                    exc['activity name'] = elem['name']
                     continue
-                    # self.file.write("Activity name: %s\n" % elem['name'])
-                    # self.file.write('Flow name: %s\n' % exc['name'])
-                    # self.file.write('Filename: %s\n' % elem['filename'])
-                    # self.file.write('XML:\n%s\n' % exc['xml'])
 
         # Drop "missing" exchanges
         for elem in activities:
+            for exc in [x for x in elem["exchanges"] if not x['input']]:
+                self.log.warning(u"Dropped missing exchange: %s" % \
+                    pprint.pformat(exc, indent=2))
             elem["exchanges"] = [x for x in elem["exchanges"] if x['input']]
 
         data = dict([((self.name, elem['id']), elem) for elem in activities])
         if self.name in databases:
             del databases[self.name]
 
+        print "Writing new database"
         with warnings.catch_warnings():
             warnings.simplefilter("ignore")
             db = Database(self.name)
             db.register("Ecospold2", ["biosphere3"], len(data))
             db.write(data)
 
-            # Purge weird exchanges without valid activities
+            # Purge any exchanges without valid activities
+            rewrite = False
             for value in data.values():
+                for exc in [x for x in value['exchanges'] \
+                        if x['input'] not in mapping]:
+                    rewrite = True
+                    self.log.critical(u"Purging unlinked exchange:\n%s" % \
+                        pprint.pformat(exc, indent=2))
                 value['exchanges'] = [x for x in value['exchanges'] if
                                       x['input'] in mapping]
 
-            # Rewrite with correct data
-            db.write(data)
+            if rewrite:
+                # Rewrite with correct data
+                db.write(data)
             db.process()
 
 

File bw2data/io/import_simapro.py

     return [name, geo]
 
 
-def is_number(x):
-    try:
-        float(x)
-        return True
-    except:
-        return False
-
 INTRODUCTION = """Starting SimaPro import:
 \tFilepath: %s
 \tDelimiter: %s
 \tDefault geo: %s
 """
 
-SIMAPRO_BIOSPHERE = set(["Resources", "Emissions to air", "Emissions to water", "Emissions to soil"])
+SIMAPRO_BIOSPHERE = {
+    "Emissions to air": "air",
+    "Resources": "resource",
+    "Emissions to water": "water",
+    "Emissions to soil": "soil",
+}
+
+SIMAPRO_BIO_SUBCATEGORIES = {
+    "high. pop.": u'high population density',
+    "low. pop.": u'low population density',
+    "low. pop., long-term": u'low population density, long-term',
+    "stratosphere + troposphere": u'lower stratosphere + upper troposphere',
+    "groundwater": u'ground-',
+    "groundwater, long-term": u'ground-, long-term',
+}
 
 
 class SimaProImporter(object):
         * Links to background databases like ecoinvent can be included
 
     However, the SimaPro importer has the following limitations:
-        * Multioutput datasets are not supported
-        * Linking against datasets other than ecoinvent is not tested (most are not available otherwise)
+        * Multioutput datasets are not supported.
+        * Uncertainty data is not imported.
+        * Social and economic flows are ignored.
+        * Linking against datasets other than ecoinvent is not tested (most are not available otherwise).
         * Modifying an existing database is not supported; it can only be overwritten completely.
-        * Uncertainty data is not imported.
-        * Biosphere flows are not imported.
         * Not all SimaPro unit changes from ecoinvent are included (no comprehensive list seems to be available)
         * SimaPro unit conversions will cause problems matching to background databases (e.g. if you specify an import in megajoules, and the ecoinvent process is defined in kWh, they won't match)
 
     Args:
         * ``filepath``: Filepath for file to important.
         * ``delimiter`` (str, default=tab character): Delimiter character for CSV file.
-        * ``depends`` (list, default= ``['ecoinvent 2.2']`` ): List of databases referenced by datasets in this file.
+        * ``depends`` (list, default= ``['ecoinvent 2.2']`` ): List of databases referenced by datasets in this file. The database *biosphere* is always matched against.
         * ``overwrite`` (bool, default=False): Overwrite existing database.
         * ``name`` (str, default=None): Name of the database to import. If not specified, the SimaPro project name will be used.
         * ``default_geo`` (str, default= ``GLO`` ): Default location for datasets with no location is specified.
             elif len(line) == 1:
                 label = line[0]
             elif label in SIMAPRO_BIOSPHERE:
-                continue
+                categories = [
+                    SIMAPRO_BIOSPHERE[label],
+                    SIMAPRO_BIO_SUBCATEGORIES.get(line[1], line[1])
+                ]
+                exchanges.append({
+                    'name': line[0],
+                    'categories': filter(lambda x: x, categories),
+                    'amount': float(line[2]),
+                    'loc': float(line[2]),
+                    'uncertainty type': 0,
+                    'unit': normalize_units(line[3]),
+                    'uncertainty': line[4],
+                    'biosphere': True,
+                })
             else:
                 # Try to interpret as ecoinvent
                 name, geo = detoxify(line[0], self.log)
                 exchanges.append({
                     'name': name,
                     'amount': float(line[1]),
+                    'loc': float(line[1]),
+                    'uncertainty type': 0,
                     'comment': label,
                     'unit': normalize_units(line[2]),
                     'uncertainty': line[3],
         line = dataset[self.get_exchanges_index(dataset) + 1]
         return {
             'amount': float(line[1]),
+            'loc': float(line[1]),
             'input': (self.db_name, data['code']),
             'uncertainty type': 0,
             'type': 'production'
 
         Need to be able to match against both ``(name, unit, geo)`` and ``(name, unit)``.
 
+        Also loads the *biosphere* database.
+
         Global variables:
             * ``self.background``: dict
+            * ``self.biosphere``: dict
 
         """
         background_data = {}
                             value['location'])] = key
             self.background[(value['name'].lower(), value['unit'])] = key
 
+        self.biosphere = Database("biosphere").load()
+
     def link_exchanges(self, dataset):
         """Link all exchanges in a given dataset"""
         dataset['exchanges'] = [
         This method looks first in the foreground, then the background; if an exchange isn't found an error is rasied."""
         if exc.get('type', None) == 'production':
             return exc
+        elif exc.get('biosphere', False):
+            try:
+                code = ('biosphere', activity_hash(exc))
+                assert code in self.biosphere
+                exc['input'] = code
+                exc['type'] = 'biosphere'
+                exc['uncertainty type'] = 0
+                del exc['biosphere']
+                return exc
+            except:
+                raise MissingExchange("Can't find biosphere flow\n%s" % \
+                    pprint.pformat(exc, indent=4))
         elif (exc["name"], exc["unit"]) in self.foreground:
             exc["input"] = self.foreground[(exc["name"], exc["unit"])]
             found = True

File bw2data/query.py

         self.result = result
 
     def __str__(self):
-        return u"Query result with %i entries" % len(self.result)
+        return "Query result with %i entries" % len(self.result)
 
     def __repr__(self):
         if len(self.result) > 20:

File bw2data/serialization.py

         return key in self.data
 
     def __str__(self):
-        return self.__unicode__()
+        return unicode(self).encode('utf-8')
 
     def __delitem__(self, name):
         del self.data[name]

File bw2data/tests/__init__.py

 # -*- coding: utf-8 -*-
 from .base import BW2DataTest
+from .array import ArrayProxyTest, ListArrayProxyTest
+from .config import ConfigTest
 from .database import DatabaseTest
 from .geo import GeoTest
+from .simapro import SimaProImportTest
+from .sparse import SparseMatrixProxyTest
 from .utils import UtilsTest
-from .array import ArrayProxyTest, ListArrayProxyTest
-from .sparse import SparseMatrixProxyTest
-from .simapro import SimaProImportTest

File bw2data/tests/config.py

+# -*- coding: utf-8 -*-
+import os
+from . import BW2DataTest
+from .. import config
+import json
+
+
+class ConfigTest(BW2DataTest):
+    def test_request_directory_not_writable(self):
+        dirpath = config.request_dir("untouchable")
+        os.chmod(dirpath, 000)
+        self.assertFalse(config.request_dir("untouchable"))
+        os.chmod(dirpath, 776)
+
+    def test_request_directory(self):
+        self.assertTrue(config.request_dir("wow"))
+        self.assertTrue(config.request_dir(u"привет"))
+
+    def test_basic_preferences(self):
+        preferences = {
+            "use_cache": True,
+        }
+        config.load_preferences()
+        self.assertEqual(preferences, config.p)
+
+    def test_save_preferences(self):
+        config.load_preferences()
+        config.p['saved'] = "yep"
+        config.save_preferences()
+        self.assertEqual(config.p['saved'], "yep")
+        config.load_preferences()
+        self.assertEqual(config.p['saved'], "yep")
+        data = json.load(open(os.path.join(config.dir, "preferences.json")))
+        self.assertEqual(data['saved'], "yep")

File bw2data/tests/database.py

         d = Database("food")
         with self.assertRaises(UnknownObject):
             d.write(food)
+
+    def test_repr(self):
+        d = Database("food")
+        self.assertTrue(isinstance(str(d), str))
+        self.assertTrue(isinstance(unicode(d), unicode))

File bw2data/tests/simapro.py

 
 
 class SimaProImportTest(BW2DataTest):
+    def extra_setup(self):
+        # SimaPro importer always wants biosphere database
+        database = Database("biosphere")
+        database.register(
+            format="Test data",
+            depends=[],
+            num_processes=0
+        )
+        database.write({})
+
     def filepath(self, name):
         return os.path.join(SP_FIXTURES_DIR, name + '.txt')
 
             "code": u'6524377b64855cc3daf13bd1bcfe0385',
             "exchanges": [{
                 'amount': 1.0,
+                'loc': 1.0,
                 'input': ('W00t', u'6524377b64855cc3daf13bd1bcfe0385'),
                 'type': 'production',
                 'uncertainty type': 0}],
         data = Database("W00t").load().values()[0]
         self.assertEqual(data['exchanges'], [{
             'amount': 1.0,
+            'loc': 1.0,
             'input': ('W00t', u'6524377b64855cc3daf13bd1bcfe0385'),
             'type': 'production',
             'uncertainty type': 0

File bw2data/units.py

 UNITS_NORMALIZATION = {
+    "ha": u"hectare",
+    "kbq": u"kilo Becquerel",
+    "kg": u"kilogram",
+    "km": u"kilometer",
+    "kwh": u"kilowatt hour",
+    "m2": u"square meter",
+    "m2a": u"square meter-year",
+    "m3": u"cubic meter",
+    "ma": u"meter-year",
     "mj": u"megajoule",
-    "kg": u"kilogram",
-    "m3": u"cubic meter",
-    "m2": u"square meter",
-    'm3a': u"cubic meter-year",
-    "m2a": u"square meter-year",
+    "nm3": u"cubic meter",
+    "pkm": u"person kilometer",
+    "tkm": u"ton kilometer",
+    "vkm": u"vehicle kilometer",
     # SimaPro units to convert, ranging from sensible to bizarre
-    "personkm": u"pkm",  # SimaPro changes this but doesn't change tkm!?
+    "m3a": u"cubic meter-year",
+    "personkm": u"person kilometer",
     "p": u"unit",
-    "my": u"ma",  # SimaPro is much better (meter-year)
+    "my": u"meter-year",
 }
 
 normalize_units = lambda x: UNITS_NORMALIZATION.get(x.lower(), x)
+
+
+

File bw2data/upgrades.py

+# -*- coding: utf-8 -*-
+from . import Database, databases, Method, methods, config
+from .utils import activity_hash, Fore
+from .units import normalize_units
+import numpy as np
+import progressbar
+import sys
+import warnings
+
+STATS_ARRAY_WARNING = "\n\nIt looks like you need to upgrade to the ``" + \
+    Fore.GREEN + "stats_arrays" + Fore.RESET + \
+    "`` package. This is a new statistical toolkit that replaces the deprecated ``" \
+    + Fore.RED + "bw_stats_toolkit" + Fore.RESET + "``. Read more at """ + \
+    Fore.BLUE + "https://bitbucket.org/cmutel/stats_arrays/." + Fore.RESET + \
+    "\n\nTo do this, use `pip` (or whatever package manager you prefer) to install `stats_arrays`, e.g.:\n\n\t" \
+    + Fore.MAGENTA + "pip install stats_arrays" + Fore.RESET + \
+    "\n\nThen run the following program on the command line:\n\n\t" + \
+    Fore.MAGENTA + "bw2-uptodate.py\n" + Fore.RESET
+
+UPTODATE_WARNING = Fore.RED + "\n\nYour data needs to be updated." + Fore.RESET \
+    + " Please run the following program on the command line:\n\n\t" + \
+    Fore.BLUE + "bw2-uptodate.py\n" + Fore.RESET
+
+
+def check_status():
+    """Check if updates need to be applied.
+
+    Returns:
+        List of needed updates (strings), if any.
+
+    """
+    try:
+        import stats_arrays
+    except ImportError:
+        warnings.warn(STATS_ARRAY_WARNING)
+        try:
+            # ipython won't let us leave the shell...
+            __IPYTHON__
+        except:
+            sys.exit(0)
+    updates = []
+    if not databases.list:
+        # First time setup - no upgrades needed
+        # Setup function will populate config.p
+        return []
+    if "upgrades" not in config.p:
+        config.p['upgrades'] = {}
+        config.save_preferences()
+    if not config.p['upgrades'].get("stats_array reformat", False):
+        updates.append("stats_array reformat")
+    if not config.p['upgrades'].get('0.10 units restandardization', False):
+        updates.append('0.10 units restandardization')
+    if updates:
+        warnings.warn(UPTODATE_WARNING)
+    return updates
+
+
+def convert_from_stats_toolkit():
+    """Convert all databases from ``bw_stats_toolkit`` to ``stats_arrays`` (https://bitbucket.org/cmutel/stats_arrays/)."""
+    import stats_arrays as sa
+    assert sa, "Must have `stats_arrays` package for this function"
+
+    def update_exchange(exc):
+        if exc.get('uncertainty type', None) is None:
+            return exc
+        elif 'loc' in exc:
+            # Already updated
+            return exc
+        if 'sigma' in exc:
+            exc['scale'] = exc['sigma']
+            del exc['sigma']
+        exc['loc'] = exc['amount']
+        if exc['uncertainty type'] == sa.LognormalUncertainty.id:
+            exc['negative'] = exc['amount'] < 0
+            exc['loc'] = np.log(np.abs(exc['amount']))
+        return exc
+
+    print "Starting inventory conversion"
+    for database in databases:
+        print "Working on %s" % database
+        db = Database(database)
+        print "\t... loading ..."
+        data = db.load()
+        print "\t... converting ..."
+        new_data = {}
+
+        for index, (key, value) in enumerate(data.iteritems()):
+            if 'exchanges' in value:
+                value['exchanges'] = [update_exchange(exchange
+                    ) for exchange in value['exchanges']]
+            new_data[key] = value
+
+        print "\t... writing ..."
+        db.write(new_data)
+        db.process()
+    print "Inventory conversion finished\nStarting IA conversion"
+
+    widgets = ['IA methods: ', progressbar.Percentage(), ' ',
+               progressbar.Bar(marker=progressbar.RotatingMarker()), ' ',
+               progressbar.ETA()]
+    pbar = progressbar.ProgressBar(widgets=widgets, maxval=len(methods.list)
+                                   ).start()
+
+    for index, name in enumerate(methods):
+        method = Method(name)
+        method.process()
+        pbar.update(index)
+    pbar.finish()
+    print "Conversion finished"
+
+
+def units_renormalize():
+    """Renormalize some units, making many activity datasets with hash ids change."""
+    db_versions = {name: databases[name]['version'] for name in databases.list}
+
+    try:
+        mapping = {}
+
+        print "Updating inventory databases.\nFirst pass: Checking process IDs"
+
+        widgets = [
+            'Databases: ',
+            progressbar.Percentage(),
+            ' ',
+            progressbar.Bar(marker=progressbar.RotatingMarker()),
+            ' ',
+            progressbar.ETA()
+        ]
+        pbar = progressbar.ProgressBar(
+            widgets=widgets,
+            maxval=len(databases.list)
+        ).start()
+
+        for index, database in enumerate(databases.list):
+            db = Database(database)
+            db_data = db.load()
+            for key, ds in db_data.iteritems():
+                old_hash = (database, activity_hash(ds))
+                ds['unit'] = normalize_units(ds['unit'])
+                if key[1] != old_hash:
+                    continue
+                new_hash = (database, activity_hash(ds))
+                if new_hash != old_hash:
+                    mapping[old_hash] = new_hash
+
+            for key, ds in db_data.iteritems():
+                if key in mapping:
+                    db_data[mapping[key]] = db_data[key]
+                    del db_data[key]
+
+            db.write(db_data)
+            pbar.update(index)
+
+        pbar.finish()
+
+        print "Second pass: Fixing links..."
+
+        widgets = [
+            'Databases: ',
+            progressbar.Percentage(),
+            ' ',
+            progressbar.Bar(marker=progressbar.RotatingMarker()),
+            ' ',
+            progressbar.ETA()
+        ]
+        pbar = progressbar.ProgressBar(
+            widgets=widgets,
+            maxval=len(databases.list)
+        ).start()
+
+        for index, database in enumerate(databases.list):
+            db = Database(database)
+            db_data = db.load()
+            for ds in db_data.values():
+                for exc in ds['exchanges']:
+                    if tuple(exc['input']) in mapping:
+                        exc['input'] = mapping[tuple(exc['input'])]
+
+            db.write(db_data)
+            db.process()
+            pbar.update(index)
+
+        pbar.finish()
+
+        print "Updating IA methods"
+
+        widgets = [
+            'Methods: ',
+            progressbar.Percentage(),
+            ' ',
+            progressbar.Bar(marker=progressbar.RotatingMarker()),
+            ' ',
+            progressbar.ETA()
+        ]
+        pbar = progressbar.ProgressBar(
+            widgets=widgets,
+            maxval=len(methods.list)
+        ).start()
+
+        for index, method in enumerate(methods.list):
+            m = Method(method)
+            m_data = m.load()
+            for row in m_data:
+                if row[0] in mapping:
+                    row[0] = mapping[row[0]]
+
+            m.write(m_data)
+            m.process()
+            pbar.update(index)
+
+        pbar.finish()
+
+    except:
+        print "Oops, something went wrong. Reverting all changes..."
+        for database in databases.list:
+            Database(database).revert(db_versions[database])
+        raise

File bw2data/utils.py

 from . import config, reset_meta
 import codecs
 import hashlib
-import numpy as np
 import os
-import progressbar
 import random
 import re
 import requests
     import cStringIO as StringIO
 except ImportError:
     import StringIO
-try:
-    import stats_arrays as sa
-except ImportError:
-    import warnings
-    WARNING_TEXT = """
-
-It looks like you need to upgrade to the ``stats_arrays`` package. This is a new statistical toolkit that replaces the deprecated ``bw_stats_toolkit``. Read more at (https://bitbucket.org/cmutel/stats_arrays/).
-
-To do this, use `pip` (or whatever package manager you prefer) to install `stats_arrays`, e.g.:
-
-    pip install stats_arrays
-
-Then, enter a Python interpreter, and run the following:
-
-    from bw2data.utils import convert_from_stats_toolkit
-    convert_from_stats_toolkit()
-    """
-    warnings.warn(WARNING_TEXT)
-    sa = None
 
 # Maximum value for unsigned integer stored in 4 bytes
 MAX_INT_32 = 4294967295
 
+# Type of technosphere/biosphere exchanges used in processed Databases
 TYPE_DICTIONARY = {
     "production": 0,
     "technosphere": 1,
 
 DOWNLOAD_URL = "http://brightwaylca.org/data/"
 
+if config._windows:
+    # Colorama no workie, grumble grumble
+    # See http://stackoverflow.com/questions/9848889/colorama-for-python-not-returning-colored-print-lines-on-windows
+    class _Fore(object):
+        def __getattr__(self, arg):
+            return ""
+    Fore = _Fore()
+else:
+    class Fore(object):
+        BLACK = '\x1b[30m'
+        CYAN = '\x1b[36m'
+        MAGENTA = '\x1b[35m'
+        RESET = '\x1b[39m'
+        YELLOW = '\x1b[33m'
+        BLUE = '\x1b[34m'
+        GREEN = '\x1b[32m'
+        RED = '\x1b[31m'
+        WHITE = '\x1b[37m'
+
 
 def natural_sort(l):
     """Sort the given list in the way that humans expect"""
         return obj
 
 
-def random_string(length):
+def random_string(length=8):
+    """Generate a random string of letters and numbers.
+
+    Args:
+        * *length* (int): Length of string, default is 8
+
+    Returns:
+        A string
+
+    """
     return ''.join(random.choice(string.letters + string.digits
                                  ) for i in xrange(length))
 
 
 def combine_methods(name, *ms):
+    """Combine LCIA methods by adding duplicate characterization factors.
+
+    Args:
+        * *ms* (one or more method ids): The method ids, e.g. ``("my method", "wow")``. Not the actual Method classes.
+
+    Returns:
+        The new Method
+
+    """
     from . import Method, methods
     data = {}
     units = set([methods[tuple(x)]["unit"] for x in ms])
     method.register(**meta)
     method.write(data)
     method.process()
+    return method
 
 
 def database_hash(data):
+    """Hash a Database.
+
+    Data is recursively sorted so that the hashes are consistent. Useful for exchanging data and making sure background databases are the same across computers.
+
+    Args:
+        * *data* (dict): The Database data.
+
+    Returns:
+        A MD5 hash string, hex-encoded.
+
+    """
     return hashlib.md5(unicode(recursively_sort(data))).hexdigest()
 
 
 def activity_hash(data):
+    """Hash an activity dataset.
+
+    Used to import data formats like ecospold 1 (ecoinvent v1-2) and SimaPro, where no unique attributes for datasets are given. This is clearly an imperfect and brittle solution, but there is no other obvious approach at this time.
+
+    Uses the following, in order:
+        * *name* Lower case
+        * *categories* In string form, joined together with ``""``.
+        * *unit* Lower case, default is ``""``.
+        * *location* Lower case, default is ``""``.
+
+    Args:
+        * *data* (list): The activity dataset data.
+
+    Returns:
+        A MD5 hash string, hex-encoded.
+
+    """
     string = (data["name"].lower() + \
         u"".join(data["categories"]) + \
         (data.get("unit", u"") or u"").lower() + \
 
 
 def download_file(filename):
+    """Download a file from ``DOWNLOAD_URL`` and write it to disk in ``downloads`` directory.
+
+    Streams download to reduce memory usage.
+
+    Args:
+        * *filename* (str): The filename to download.
+
+    Returns:
+        The path of the created file.
+
+    """
+
     dirpath = config.request_dir("downloads")
     filepath = os.path.join(dirpath, filename)
     download = requests.get(DOWNLOAD_URL + filename, stream=True).raw
     from io import download_biosphere, download_methods
     config.create_basic_directories()
     config.is_temp_dir = False
+    config.load_preferences()
+    config.p["upgrades"] = {
+        "stats_array reformat": True,
+        "0.10 units restandardization": True,
+    }
+    config.save_preferences()
     download_biosphere()
     download_methods()
 
     zf.close()
     memory_obj.seek(0)
     return memory_obj
-
-
-def convert_from_stats_toolkit():
-    """Convert all databases from ``bw_stats_toolkit`` to ``stats_arrays`` (https://bitbucket.org/cmutel/stats_arrays/)."""
-    def update_exchange(exc):
-        if exc.get('uncertainty type', None) is None:
-            return exc
-        if 'sigma' in exc:
-            exc['scale'] = exc['sigma']
-            del exc['sigma']
-        exc['loc'] = exc['amount']
-        if exc['uncertainty type'] == sa.LognormalUncertainty.id:
-            exc['negative'] = exc['amount'] < 0
-            exc['loc'] = np.log(np.abs(exc['amount']))
-        return exc
-
-    assert sa, "Must have `stats_arrays` package for this function"
-    from bw2data import Database, databases, Method, methods
-    print "Starting inventory conversion"
-    for database in databases:
-        print "Working on %s" % database
-        db = Database(database)
-        print "\t... loading ..."
-        data = db.load()
-        print "\t... converting ..."
-        new_data = {}
-
-        for index, (key, value) in enumerate(data.iteritems()):
-            if 'exchanges' in value:
-                value['exchanges'] = [update_exchange(exchange
-                    ) for exchange in value['exchanges']]
-            new_data[key] = value
-
-        print "\t... writing ..."
-        db.write(new_data)
-        db.process()
-    print "Inventory conversion finished\nStarting IA conversion"
-
-    widgets = ['IA methods: ', progressbar.Percentage(), ' ',
-               progressbar.Bar(marker=progressbar.RotatingMarker()), ' ',
-               progressbar.ETA()]
-    pbar = progressbar.ProgressBar(widgets=widgets, maxval=len(methods.list)
-                                   ).start()
-
-    for index, name in enumerate(methods):
-        method = Method(name)
-        method.process()
-        pbar.update(index)
-    pbar.finish()
-    print "Conversion finished"
-
-
-def keyword_to_gephi_graph(database, keyword):
-    """Export a Gephi graph from a keyword in activity names"""
-    from . import Database, Filter
-    from bw2data.io.export_gexf import DatabaseSelectionToGEXF
-    query = Database(database).query(Filter("name", "in", keyword))
-    return DatabaseSelectionToGEXF(database, set(query.keys())).export()

File docs/conf.py

 # built documents.
 #
 # The short X.Y version.
-version = '0.9'
+version = '0.10'
 # The full version, including alpha/beta/rc tags.
-release = '0.9.3'
+release = '0.10.1'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
 BW2Package
 ==========
 
+Brightway2 has its own data format for efficient saving, loading, and transfer. Read more at the `Brightway2 documentation <http://brightway2.readthedocs.org/en/latest/key-concepts.html#data-interchange>`_.
+
+.. note:: **imports** and **exports** are supported.
+
+
 .. autoclass:: bw2data.io.BW2PackageImporter
     :members:
 
 .. autoclass:: bw2data.io.BW2PackageExporter
     :members:
 
-Ecospold
-========
+Ecospold1
+=========
+
+Ecospold version 1 is the data format of ecoinvent versions 1 and 2, and the US LCI. It is an XML data format with reasonable defaults.
+
+.. note:: only **imports** are supported.
 
 .. autoclass:: bw2data.io.EcospoldImporter
     :members:
 SimaPro
 =======
 
+Import a `SimaPro <http://www.pre-sustainability.com/simapro-lca-software>`_ text file.
+
+.. note:: only **imports** are supported.
+
 .. autoclass:: bw2data.io.SimaProImporter
     :members:
+
+Gephi
+=====
+
+`Gephi <http://gephi.org/>`_ is an open-source graph visualization and analysis program.
+
+.. note:: only **exports** are supported.
+
+.. autoclass:: bw2data.io.DatabaseToGEXF
+    :members:
+
+.. autoclass:: bw2data.io.DatabaseSelectionToGEXF
+    :members:
+
+.. autofunction:: bw2data.io.keyword_to_gephi_graph

File docs/technical.rst

    database
    method
    io
+   utils

File docs/utils.rst

+Utilities
+=========
+
+.. autofunction:: bw2data.utils.activity_hash
+
+.. autofunction:: bw2data.utils.combine_methods
+
+.. autofunction:: bw2data.utils.database_hash
+
+.. autofunction:: bw2data.utils.download_file

File requirements.txt

 requests>=1.1.0
 scipy
 voluptuous
-brightway2>=0.9.1
+brightway2>=0.10
 stats_arrays
 
 setup(
     name='bw2data',
-    version="0.9.3.3",
+    version="0.10.1",
     packages=packages,
     author="Chris Mutel",
     author_email="cmutel@gmail.com",
     license=open('LICENSE.txt').read(),
     install_requires=[
         "brightway2",
+        "docopt",
         "lxml",
         "numpy",
         "progressbar",
     ],
     url="https://bitbucket.org/cmutel/brightway2-data",
     long_description=open('README.rst').read(),
+    scripts=["bw2data/bin/bw2-uptodate.py"],
     classifiers=[
         'Development Status :: 4 - Beta',
         'Intended Audience :: End Users/Desktop',