Commits

Chris Mutel  committed 7a4fd2d

Initial commit

  • Participants

Comments (0)

Files changed (22)

+syntax:glob
+dist
+*.pyc
+*~
+MANIFEST
+*.sublime*
+Copyright (c) 2012, Chris Mutel and ETH Zürich
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+Neither the name of ETH Zürich nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+include *.txt
+include bw2calc/*.py
+include bw2calc/io/*.py
+include bw2calc/proxies/*.py
+Brightway2 is a framework for life cycle assessment calculations.
+- import_method can't handle SimaPro XML files (where exchanges are numbered sequentially)
+- import_method should skip SimaPro XML files
+- RobustFinder to find processes or flows which aren't correctly specified (import scripts)
+- import_ecospold can't handle SimaPro XML files (where exchanges are numbered sequentially)
+- Manager.copy() should copy databases

File brightway2/__init__.py

+from _config import Config
+config = Config()
+
+from _meta import Meta
+meta = Meta()
+
+from _mapping import Mapping
+mapping = Mapping()
+
+from _methods import Methods
+methods = Methods()
+
+from manager import Manager
+from query import Is, Isnt, Contains, In, Exchange, iIs, iIsnt, iIs, iContains

File brightway2/_config.py

+# -*- coding: utf-8 -*
+import os
+
+
+class Config(object):
+    def __init__(self, path=None):
+        self.dir = self.get_home_directory(path)
+        self.check_dir()
+
+    def check_dir(self):
+        pass
+
+    def get_home_directory(self, path):
+        if path:
+            return path
+        # Unix/Max: export BRIGHTWAY2-VAR=/brightway2/directory
+        # Windows: set BRIGHTWAY2-VAR=\brightway2\directory
+        envvar = os.getenv("BRIGHTWAY2-VAR")
+        if envvar:
+            return envvar
+        else:
+            return os.path.expanduser("~/brightway2")
+
+    def _get_dir(self):
+        return self._dir
+
+    def _set_dir(self, d):
+        self._dir = d
+        self.check_dir()
+
+    dir = property(_get_dir, _set_dir)

File brightway2/_mapping.py

+import os
+from . import config
+try:
+    import cPickle as pickle
+except ImportError:
+    import pickle
+
+
+class Mapping(object):
+    _filepath = os.path.join(config.dir, "mapping.pickle")
+
+    def __init__(self):
+        self.reload()
+
+    def reload(self):
+        try:
+            self._data = pickle.load(open(self._filepath, "rb"))
+        except IOError:
+            # Create if not present
+            self._data = {}
+            self.flush()
+
+    def add(self, keys):
+        index = max(self._data.values())
+        for i, key in enumerate(keys):
+            if key not in self._data:
+                self._data[key] = index + i + 1
+        self.flush()
+
+    def delete(self, keys):
+        for key in keys:
+            del self._data[key]
+        self.flush()
+
+    def flush(self):
+        with open(self._filepath, "wb") as f:
+            pickle.dump(self._data, f, protocol=pickle.HIGHEST_PROTOCOL)
+
+    def __getitem__(self, key):
+        return self._data[key]
+
+    def __setitem__(self, key, value):
+        raise NotImplemented
+
+    def __contains__(self, key):
+        return key in self._data

File brightway2/_meta.py

+# -*- coding: utf-8 -*
+import os
+import json
+from . import config
+
+
+class Meta(object):
+    _filepath = os.path.join(config.dir, "meta.json")
+
+    def __init__(self):
+        self.reload()
+
+    def reload(self):
+        try:
+            self._data = json.load(open(self._filepath, "r"))
+        except IOError:
+            # Create if not present
+            self._data = {}
+            self.flush()
+
+    def add(self, name, data):
+        self._data[name] = data
+        self.flush()
+
+    def delete(self, name):
+        del self._data[name]
+        self.flush()
+
+    def flush(self):
+        with open(self._filepath, "w") as f:
+            json.dump(self._data, f, indent=2)
+
+    def increment_version(self, database):
+        self._data[database]["version"] += 1
+        self.flush()
+        return self._data[database]["version"]
+
+    @property
+    def databases(self):
+        return self._data
+
+    @property
+    def list(self):
+        return self._data.keys()
+
+    def version(self, database):
+        return self._data[database]["version"]
+
+    def __getitem__(self, key):
+        return self._data[key]
+
+    def __contains__(self, key):
+        return key in self._data

File brightway2/_methods.py

+# -*- coding: utf-8 -*
+import os
+import numpy as np
+from . import config, mapping
+from utils import MAX_INT_32
+try:
+    import cPickle as pickle
+except:
+    import pickle
+
+
+class Methods(object):
+    _filepath = os.path.join(config.dir, "methods.pickle")
+
+    def __init__(self):
+        self.reload()
+
+    def reload(self):
+        try:
+            self._data = pickle.load(open(self._filepath, "r"))
+        except IOError:
+            # Create if not present
+            self._data = {}
+            self.flush()
+
+    def add(self, name, data):
+        self._data[name] = data
+        self.flush()
+
+    def delete(self, name):
+        del self._data[name]
+        self.flush()
+
+    def flush(self):
+        with open(self._filepath, "wb") as f:
+            pickle.dump(self._data, f, protocol=pickle.HIGHEST_PROTOCOL)
+
+    @property
+    def list(self):
+        return self._data.keys()
+
+    def __getitem__(self, key):
+        return self._data[key]
+
+    def __contains__(self, key):
+        return key in self._data
+
+    def process(self, name):
+        """Create numpy structured arrays for IA method"""
+        data_filepath = os.path.join(config.dir, "ia", "%s.pickle" % self[
+            name]["abbreviation"])
+        data = pickle.load(open(data_filepath, "rb"))
+        assert data
+        num_cfs = len(data)
+        dtype = [('uncertainty_type', np.uint8),
+            ('flow', np.uint32),
+            ('index', np.uint32),
+            ('amount', np.float32),
+            ('sigma', np.float32),
+            ('minimum', np.float32),
+            ('maximum', np.float32),
+            ('negative', np.bool)]
+        arr = np.zeros((num_cfs, ), dtype=dtype)
+        arr['minimum'] = arr['maximum'] = arr['sigma'] = np.NaN
+        for i, cf in enumerate(data):
+            arr[i] = (
+                0,
+                mapping[cf[0]],
+                MAX_INT_32,
+                cf[1],
+                np.NaN,
+                np.NaN,
+                np.NaN,
+                False
+                )
+        filepath = os.path.join(config.dir, "processed", "%s.pickle" % \
+            self[name]["abbreviation"])
+        with open(filepath, "wb") as f:
+            pickle.dump(arr, f, protocol=pickle.HIGHEST_PROTOCOL)

File brightway2/errors.py

+class MissingIntermediateData(StandardError):
+    pass
+
+
+class UnknownExchange(StandardError):
+    pass

File brightway2/io/__init__.py

+from import_ecospold import EcospoldImporter
+from import_method import EcospoldImpactAssessmentImporter, import_ia_dir

File brightway2/io/import_ecospold.py

+# -*- coding: utf-8 -*
+from __future__ import division
+from brightway2 import Manager, mapping
+from brightway2.logs import get_logger
+from brightway2.errors import UnknownExchange
+from lxml import objectify
+import math
+import os
+from stats_toolkit.distributions import *
+try:
+    import progressbar
+except ImportError:
+    progressbar = None
+
+BIOSPHERE = ("air", "water", "soil", "resource")
+
+
+class EcospoldImporter(object):
+    def import_directory(self, path, name, depends=["biosphere", ]):
+        data = []
+        log = get_logger(name)
+        log.critical(u"Starting import of %s (from %s)" % (name, path))
+        files = filter(lambda x: x[-4:].lower() == ".xml", os.listdir(path))
+
+        if progressbar:
+            widgets = ['Files: ', progressbar.Percentage(), ' ',
+                progressbar.Bar(marker=progressbar.RotatingMarker()), ' ',
+                progressbar.ETA()]
+            pbar = progressbar.ProgressBar(widgets=widgets, maxval=len(files)
+                ).start()
+
+        for index, filename in enumerate(files):
+            root = objectify.parse(open(os.path.join(path, filename))
+                ).getroot()
+
+            if root.tag != '{http://www.EcoInvent.org/EcoSpold01}ecoSpold':
+                # Unrecognized file type
+                log.critical(u"skipping %s - no ecoSpold element" % filename)
+                continue
+
+            for dataset in root.iterchildren():
+                data.append(self._process_dataset(dataset))
+
+            if progressbar:
+                pbar.update(index)
+
+        if progressbar:
+            pbar.finish()
+
+        # Hackish
+        for o in data:
+            try:
+                o["code"] = int(o["code"])
+            except:
+                pass
+
+        # Fix exchanges
+        codes = set([o["code"] for o in data])
+        for ds in data:
+            for exc in ds["exchanges"]:
+                code = exc["code"]
+                # Hack - not work with others?
+                try:
+                    code = int(code)
+                except:
+                    pass
+                if code in codes:
+                    exc["input"] = (name, code)
+                else:
+                    exc["input"] = self._find_in_dependent_database(code,
+                        exc, depends)
+                exc["technosphere"] = exc["input"][0] != "biosphere"
+
+        data = dict([((name, int(o["code"])), o) for o in data])
+
+        manager = Manager(name)
+        manager.register("Ecospold 1", depends, len(data))
+        manager.write(data)
+
+    def _find_in_dependent_database(self, code, exc, depends):
+        for db in depends:
+            if (db, code) in mapping:
+                return (db, code)
+
+        # Add new biosphere flow if needed
+        if exc["_matching"].get("categories", [None, ])[0] in BIOSPHERE:
+            data = exc["_matching"]
+
+            # Emission or resource
+            resource = data["categories"][0] == "resource"
+            data["type"] = "resource" if resource else "emission"
+
+            # Biosphere flows don't have locations or exchanges
+            del data["location"]
+            data["exchanges"] = []
+
+            # Write modified biosphere database
+            biosphere = Manager("biosphere")
+            bio_data = biosphere.load()
+            bio_data[("biosphere", code)] = data
+            biosphere.write(bio_data)
+            return ("biosphere", code)
+        raise UnknownExchange(("The exchange %s couldn't be " + \
+            "matched to this or a depending database") % code)
+
+    def _process_dataset(self, dataset):
+        data = {}
+        ref_func = dataset.metaInformation.processInformation.\
+            referenceFunction
+
+        data["name"] = ref_func.get("name")
+        data["type"] = "process"  # True for all ecospold?
+        data["categories"] = [ref_func.get("category"), ref_func.get(
+            "subCategory")]
+        # Convert ("foo", "unspecified") to ("foo",)
+        while data["categories"][-1] == "unspecified":
+            data["categories"] = data["categories"][:-1]
+        data["location"] = dataset.metaInformation.processInformation.\
+            geography.get("location")
+        data["code"] = dataset.get("number")
+        data["unit"] = ref_func.get("unit")
+        data["exchanges"] = self._process_exchanges(dataset)
+        return data
+
+    def _process_exchanges(self, dataset):
+        data = []
+        # Skip definitional exchange - we assume this already
+        for exc in dataset.flowData.iterchildren():
+            if exc.get("name") == dataset.metaInformation.processInformation.\
+                    referenceFunction.get("name") != None and float(
+                    exc.get("meanValue", 0.)) == 1.0:
+                continue
+
+            this = {
+                "pedigree matrix": exc.get("generalComment"),
+                "code": int(exc.get("number")),
+                "_matching": {
+                    "categories": (exc.get("category"), exc.get("subCategory")),
+                    "location": exc.get("location"),
+                    "unit": exc.get("unit"),
+                    "name": exc.get("name")
+                    }
+                }
+
+            uncertainty = int(exc.get("uncertaintyType", 0))
+            mean = exc.get("meanValue")
+            min_ = exc.get("minValue")
+            max_ = exc.get("maxValue")
+            sigma = exc.get("standardDeviation95")
+
+            if uncertainty == 1:
+                # Lognormal
+                this.update({
+                    'uncertainty type': LognormalUncertainty.id,
+                    'amount': float(mean),
+                    'sigma': math.log(math.sqrt(float(sigma)))
+                    })
+                if this['sigma'] == 0:
+                    # Bad ecoinvent data
+                    this['uncertainty type'] = UndefinedUncertainty.id
+                    del this["sigma"]
+            elif uncertainty == 2:
+                # Normal
+                this.update({
+                    'uncertainty type': NormalUncertainty.id,
+                    'amount': float(mean),
+                    'sigma': float(sigma) / 2
+                    })
+            elif uncertainty == 3:
+                # Triangular
+                this.update({
+                    'uncertainty type': TriangularUncertainty.id,
+                    'minimum': float(min_),
+                    'maximum': float(max_)
+                    })
+                # Sometimes this isn't included (though it SHOULD BE)
+                if exc.get("mostLikelyValue"):
+                    this['amount'] = float(exc.get("mostLikelyValue"))
+                else:
+                    this['amount'] = float(mean)
+            elif uncertainty == 4:
+                # Uniform
+                this.update({
+                    'uncertainty type': UniformUncertainty.id,
+                    'amount': float(mean),
+                    'minimum': float(min_),
+                    'maximum': float(max_)
+                    })
+            else:
+                # None
+                this.update({
+                    'uncertainty type': UndefinedUncertainty.id,
+                    'amount': float(mean)
+                })
+
+            data.append(this)
+
+        return data

File brightway2/io/import_method.py

+# -*- coding: utf-8 -*
+from brightway2 import config, Manager, mapping, methods
+from lxml import objectify
+import os
+import string
+import random
+try:
+    import cPickle as pickle
+except:
+    import pickle
+
+
+def abbreviate(names, length=8):
+    abbrev = lambda x: x if x[0] in string.digits else x[0].lower()
+    name = " ".join(names).split(" ")[0].lower() + \
+        "".join([abbrev(x) for x in " ".join(names).split(" ")[1:]])
+    random_string = ''.join(random.choice(string.letters + string.digits
+        ) for i in xrange(length))
+    return name + "-" + random_string
+
+
+def import_ia_dir(dirpath):
+    for filename in filter(lambda x: x.lower()[-4:] == ".xml",
+            os.listdir(dirpath)):
+        filepath = os.path.join(dirpath, filename)
+        print "Working on %s" % filepath
+        EcospoldImpactAssessmentImporter(filepath)
+
+
+class EcospoldImpactAssessmentImporter(object):
+    """
+Import impact assessment methods and weightings from ecospold XML format.
+    """
+    def __init__(self, filename):
+        self.filename = filename
+        self.biosphere_data = Manager("biosphere").load()
+        # Note that this is only used for the first root method found in
+        # the file
+        root = objectify.parse(open(self.filename)).getroot()
+        for dataset in root.iterchildren():
+            self.add_method(dataset)
+
+    def add_method(self, ds):
+        ref_func = ds.metaInformation.processInformation.referenceFunction
+        name = (ref_func.get("category"), ref_func.get("subCategory"),
+            ref_func.get("name"))
+        abbreviation = abbreviate(name)
+        print abbreviation, name
+        filepath = os.path.join(config.dir, "ia", "%s.pickle" % abbreviation)
+        description = ref_func.get("generalComment") or ""
+        unit = ref_func.get("unit") or ""
+        data = []
+        for cf in ds.flowData.iterchildren():
+            if ("biosphere", int(cf.get("number"))) not in mapping:
+                # Add new biosphere flow
+                code = int(cf.get("number"))
+                new_flow = {
+                    "name": cf.get("name"),
+                    "categories": (cf.get("category"),
+                        cf.get("subCategory") or "unspecified"),
+                    "code": code,
+                    "unit": cf.get("unit"),
+                    "exchanges": []
+                }
+
+                # Convert ("foo", "unspecified") to ("foo",)
+                while new_flow["categories"][-1] == "unspecified":
+                    new_flow["categories"] = new_flow["categories"][:-1]
+
+                # Emission or resource
+                resource = new_flow["categories"][0] == "resource"
+                new_flow["type"] = "resource" if resource else "emission"
+
+                # Write modified biosphere database
+                biosphere = Manager("biosphere")
+                bio_data = biosphere.load()
+                bio_data[("biosphere", code)] = new_flow
+                biosphere.write(bio_data)
+                return ("biosphere", code)
+            data.append((("biosphere", int(cf.get("number"))), float(cf.get(
+                "meanValue"))))
+        methods.add(name, {
+            'abbreviation': abbreviation,
+            'description': description,
+            'unit': unit
+            })
+        pickle.dump(data, open(filepath, "wb"),
+            protocol=pickle.HIGHEST_PROTOCOL)
+        methods.process(name)

File brightway2/logs.py

+# -*- coding: utf-8 -*
+from . import config
+import datetime
+import logging
+from logging.handlers import RotatingFileHandler
+import os
+
+
+def get_logger(name, add_datetime=True, level=logging.INFO):
+    now = datetime.datetime.now()
+    filename = "%s-%s.log" % (name, now.strftime("%d-%B-%Y-%I-%M%p"))
+    handler = RotatingFileHandler(os.path.join(config.dir, 'logs', filename),
+        maxBytes=50000, encoding='utf-8', backupCount=5)
+    formatter = logging.Formatter(
+        "%(asctime)s %(levelname)s %(lineno)d %(message)s")
+    logger = logging.getLogger("name")
+    logger.setLevel(level)
+    handler.setFormatter(formatter)
+    logger.addHandler(handler)
+    return logger

File brightway2/manager.py

+# -*- coding: utf-8 -*
+from . import meta, config, mapping
+from errors import MissingIntermediateData
+import os
+import numpy as np
+from query import Query
+from utils import natural_sort, MAX_INT_32
+try:
+    import cPickle as pickle
+except ImportError:
+    import pickle
+
+
+class Manager(object):
+    def __init__(self, database, *args, **kwargs):
+        self.database = database
+        if self.database not in meta:
+            print "Warning: %s not a currently installed database" % database
+
+    def query(self, *queries):
+        return Query(*queries)(self.load())
+
+    def copy(self, name):
+        # TODO
+        self.write_database(self._data, name)
+
+    def register(self, format, depends, num_processes):
+        assert self.database not in meta
+        meta.add(self.database, {
+            "from format": format,
+            "depends": depends,
+            "number": num_processes,
+            "version": 0
+            })
+
+    def deregister(self):
+        meta.delete(self.database)
+
+    def write(self, data):
+        meta.increment_version(self.database)
+        mapping.add(data.keys())
+        filename = "%s.%i.pickle" % (self.database,
+            meta.version(self.database))
+        filepath = os.path.join(config.dir, "intermediate", filename)
+        with open(filepath, "wb") as f:
+            pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL)
+
+    def load(self, version=None):
+        files = filter(lambda x: ".".join(x.split(".")[:-2]) == self.database,
+            os.listdir(os.path.join(config.dir, "intermediate")))
+        if not files:
+            raise MissingIntermediateData("Can't load intermediate data")
+        if version == None:
+            return pickle.load(open(os.path.join(config.dir, "intermediate",
+                natural_sort(files)[-1]), "rb"))
+        else:
+            filepath = os.path.join(config.dir, "intermediate",
+                "%s.%i.pickle" % (self.database, version))
+            if not os.path.exists(filepath):
+                raise MissingIntermediateData("This version not found")
+            return pickle.load(open(filepath, "rb"))
+
+    def process(self, version=None):
+        """Create numpy structured arrays from database"""
+        data = self.load(version)
+        num_exchanges = sum([len(obj["exchanges"]) for obj in data.values()])
+        assert data
+        dtype = [('uncertainty_type', np.uint8),
+            ('input', np.uint32),
+            ('output', np.uint32),
+            ('row', np.uint32),
+            ('col', np.uint32),
+            ('technosphere', np.bool),
+            ('amount', np.float32),
+            ('sigma', np.float32),
+            ('minimum', np.float32),
+            ('maximum', np.float32),
+            ('negative', np.bool)]
+        arr = np.zeros((num_exchanges, ), dtype=dtype)
+        arr['minimum'] = arr['maximum'] = arr['sigma'] = np.NaN
+        count = 0
+        for key in data:
+            for exc in data[key]["exchanges"]:
+                arr[count] = (
+                    exc["uncertainty type"],
+                    mapping[exc["input"]],
+                    mapping[key],
+                    MAX_INT_32,
+                    MAX_INT_32,
+                    exc["technosphere"],
+                    exc["amount"],
+                    exc.get("sigma", np.NaN),
+                    exc.get("minimum", np.NaN),
+                    exc.get("maximum", np.NaN),
+                    exc["amount"] < 1
+                    )
+                count += 1
+
+        filepath = os.path.join(config.dir, "processed", "%s.pickle" % \
+            self.database)
+        with open(filepath, "wb") as f:
+            pickle.dump(arr, f, protocol=pickle.HIGHEST_PROTOCOL)

File brightway2/proxies/__init__.py

+# -*- coding: utf-8 -*
+from array import ArrayProxy, OneDimensionalArrayProxy, ListArrayProxy
+from sparse import CompressedSparseMatrixProxy, SparseMatrixProxy

File brightway2/proxies/array.py

+# -*- coding: utf-8 -*
+import numpy as np
+import unittest
+
+
+class DifferentIndexTypesError(StandardError):
+    pass
+
+
+class InconsistentSlicingError(StandardError):
+    pass
+
+
+class ArrayProxy(object):
+    """
+Provides a dictionary-based interface from database row ids to array indices.
+
+ArrayProxy provides a matrix or array whose indices are translated through a
+lookup dictionary. Slices are not generally supported.
+    """
+    def __init__(self, data, row_dict, col_dict=None):
+        super(ArrayProxy, self).__init__()
+        self.data = data
+        self.row_dict = row_dict
+        if col_dict:
+            self.col_dict = col_dict
+
+    def translate(self, obj, type="row"):
+        if hasattr(obj, "id"):
+            obj = obj.id
+        if isinstance(obj, slice):
+            if obj == slice(None):
+                return obj
+            else:
+                # Could support for slicing using Django's SortedDict
+                # (http://code.djangoproject.com/browser/django/trunk/django/utils/datastructures.py)
+                # or other sorted dictionary implementations, e.g.
+                # http://code.activestate.com/recipes/496761/ or
+                # http://www.voidspace.org.uk/downloads/odict.py
+                # However, this is not desperately needed...
+                raise NotImplementedError("Slices are not supported, " + \
+                    "because their meaning is unclear when translated into" + \
+                    " array indices. If you are confident, you can call " + \
+                    "slices directly on self.data.")
+        elif isinstance(obj, (list, tuple)):
+            return tuple([self.translate(_obj, type) for _obj in obj])
+        else:  # No longer test for integer keys; subclasses can use strings
+            try:
+                if type == "row":
+                    return self.row_dict[obj]
+                elif type == "col":
+                    return self.col_dict[obj]
+            except KeyError:
+                raise KeyError("Provided object %s is not a valid %s key" \
+                    % (obj, type))
+
+    def __getitem__(self, *args):
+        assert len(args) == 1
+        x = self.translate(args[0][0], "row")
+        y = self.translate(args[0][1], "col")
+        return self.data.__getitem__((x, y),)
+
+    def __setitem__(self, *args):
+        assert len(args) == 2
+        x = self.translate(args[0][0], "row")
+        y = self.translate(args[0][1], "col")
+        return self.data.__setitem__((x, y), args[1])
+
+    def __repr__(self):
+        return '<%s for %s>' % (self.__class__.__name__, self.data.__repr__(
+            )[1:-1])
+
+    def __str__(self):
+        return self.data.__str__()
+
+    # Numpy functions
+    def any(self, *args, **kwargs):
+        return self.data.any(*args, **kwargs)
+
+    def all(self, *args, **kwargs):
+        return self.data.all(*args, **kwargs)
+
+    def sum(self, *args, **kwargs):
+        return self.data.sum(*args, **kwargs)
+
+    def min(self, *args, **kwargs):
+        # This should work for both ndarrays and sparse matrices
+        try:
+            m = self.data.min(*args, **kwargs)
+        except AttributeError:
+            m = self.data.data.min(*args, **kwargs)
+        while isinstance(m, np.ndarray):
+            m = m[0]
+        return m
+
+    def max(self, *args, **kwargs):
+        try:
+            m = self.data.max(*args, **kwargs)
+        except AttributeError:
+            m = self.data.data.max(*args, **kwargs)
+        while isinstance(m, np.ndarray):
+            m = m[0]
+        return m
+
+    def cumsum(self, *args, **kwargs):
+        return self.data.cumsum(*args, **kwargs)
+
+    def mean(self, *args, **kwargs):
+        return self.data.mean(*args, **kwargs)
+
+    def row(self, id):
+        return self.row_dict[id]
+
+    def col(self, id):
+        return self.col_dict[id]
+
+    @property
+    def shape(self):
+        return self.data.shape
+
+    @property
+    def row_dict_rev(self):
+        """Build only upon demand"""
+        if not hasattr(self, "_row_dict_rev"):
+            self._row_dict_rev = dict(zip(self.row_dict.values(),
+                self.row_dict.keys()))
+        return self._row_dict_rev
+
+    @property
+    def col_dict_rev(self):
+        if not hasattr(self, "_col_dict_rev"):
+            self._col_dict_rev = dict(zip(self.col_dict.values(),
+                self.col_dict.keys()))
+        return self._col_dict_rev
+
+
+class OneDimensionalArrayProxy(ArrayProxy):
+    """
+A special case of ArrayProxy limited to one-dimensional arrays.
+
+Used for supply and demand arrays in LCA calculations.
+    """
+    def __init__(self, data, row_dict):
+        if not len(data.shape) == 1:
+            raise AttributeError("Must only be used for one-dimensional array")
+        super(OneDimensionalArrayProxy, self).__init__(data, row_dict)
+
+    def __getitem__(self, *args):
+        assert len(args) == 1
+        x = self.translate(args[0], "row")
+        return self.data.__getitem__((x,),)
+
+    def __setitem__(self, *args):
+        assert len(args) == 2
+        x = self.translate(args[0], "row")
+        self.data.__setitem__((x,), args[1])
+
+
+class ListArrayProxy(object):
+    """
+An interface to a list of objects that translates lookeups from foo[bar,baz] to
+foo.indices.index(bar)[baz]. If baz is a slice, returns a generator.
+
+Takes list_, the list of objects, and optionally index_objs, which is an
+iterable of objects used as indices to list_. index_objs must all be of the
+same type.
+    """
+
+    __slots__ = ["indices", "list", "index_type"]
+
+    def __init__(self, list_, index_objs=None):
+        self.list = list_
+        if index_objs:
+            self.check_indice_types(index_objs)
+            if len(index_objs) != len(list_):
+                raise ValueError("index_objs must have same length as list_")
+            self.indices = list(index_objs)
+        else:
+            self.indices = None
+
+    def check_indice_types(self, objs):
+        l = [type(x) for x in objs]
+        if not len(set(l)) == 1:
+            raise DifferentIndexTypesError
+
+    def sum(self):
+        return sum([sum(obj) for obj in self.list])
+
+    def __unicode__(self):
+        if self.indices:
+            return "<ListArrayProxy for %s>" % self.indices[0]
+        else:
+            return "<ListArrayProxy for unknown objects (id %s)>" % id(self)
+
+    def translate_slice(self, sl):
+        """Possible translate slice arguments into self.indices terms"""
+        if sl.start in self.indices:
+            start = self.indices.index(sl.start)
+            start_translated = True
+        else:
+            start = sl.start
+            start_translated = False
+        if sl.stop in self.indices:
+            stop = self.indices.index(sl.stop)
+            stop_translated = True
+        else:
+            stop = sl.stop
+            stop_translated = False
+        if start_translated != stop_translated and start != None and stop != \
+                None:
+            raise InconsistentSlicingError(
+                "Only one slice element could be found in the indices")
+        return slice(start, stop, sl.step)
+
+    def __getitem__(self, args):
+        if args == None:
+            raise SyntaxError
+
+        if isinstance(args, tuple):
+            list_pos = args[0]
+        else:
+            list_pos = args
+
+        if self.indices and isinstance(list_pos, slice):
+            list_pos = self.translate_slice(list_pos)
+        elif self.indices and list_pos in self.indices:
+            list_pos = self.indices.index(list_pos)
+
+        if not isinstance(args, tuple):
+            return self.list[list_pos]
+        elif isinstance(list_pos, slice):
+            return (obj.__getitem__(*args[1:]) for obj in self.list[list_pos])
+        else:
+            return self.list[list_pos].__getitem__(*args[1:])
+
+    def __setitem__(self):
+        raise NotImplementedError
+
+    def __iter__(self):
+        return iter(self.list)
+
+    def __len__(self):
+        return len(self.list)
+
+    def __repr__(self):
+        return self.__unicode__()
+
+
+class ArrayProxyTest(unittest.TestCase):
+    def test_array_proxy(self):
+        a = np.zeros((3, 3))
+        for x in range(3):
+            for y in range(3):
+                a[x, y] = x + 3 * y
+        a = ArrayProxy(a, {10: 0, 11: 1, 12: 2}, {20: 0, 21: 1, 22: 2})
+        self.assertEqual(a[10, 20], 0)
+        self.assertTrue(np.allclose(a[11, (20, 21)], np.array((1, 4))))
+
+    def test_array_proxy_object(self):
+        class Dummy(object):
+            def __init__(self, id):
+                self.id = id
+
+        a = np.zeros((3, 3))
+        for x in range(3):
+            for y in range(3):
+                a[x, y] = x + 3 * y
+        a = ArrayProxy(a, {10: 0, 11: 1, 12: 2}, {20: 0, 21: 1, 22: 2})
+        obj = Dummy(10)
+        self.assertEqual(a[obj, 20], 0)
+
+    def test_array_proxy_key_error(self):
+        a = np.zeros((3, 3))
+        for x in range(3):
+            for y in range(3):
+                a[x, y] = x + 3 * y
+        a = ArrayProxy(a, {10: 0, 11: 1, 12: 2}, {20: 0, 21: 1, 22: 2})
+        self.assertRaises(KeyError, a.__getitem__, (20, 10))
+
+        class Vanilla(object):
+            pass
+        v = Vanilla()
+        self.assertRaises(KeyError, a.__getitem__, (v, 20))
+
+    def test_array_proxy_slices(self):
+        a = np.zeros((3, 3))
+        for x in range(3):
+            for y in range(3):
+                a[x, y] = x + 3 * y
+        a = ArrayProxy(a, {10: 0, 11: 1, 12: 2}, {20: 0, 21: 1, 22: 2})
+        self.assertRaises(NotImplementedError, a.__getitem__, (slice(None, 11,
+            None), 20))
+        # Note: np slices don't preserve shape!
+        self.assertTrue(np.allclose(a[:, 21], np.array((3, 4, 5,))))
+        self.assertTrue(np.allclose(a[11, :], np.array((1, 4, 7))))
+
+    def test_reverse_dict(self):
+        a = np.zeros((3, 3))
+        for x in range(3):
+            for y in range(3):
+                a[x, y] = x + 3 * y
+        a = ArrayProxy(a, {10: 0, 11: 1, 12: 2}, {20: 0, 21: 1, 22: 2})
+        self.assertFalse(hasattr(a, "_row_dict_rev"))
+        self.assertFalse(hasattr(a, "_col_dict_rev"))
+        self.assertTrue(a.row_dict_rev)
+        self.assertTrue(a.col_dict_rev)
+        self.assertEquals(a.row_dict_rev[0], 10)
+        self.assertEquals(a.col_dict_rev[0], 20)
+
+    def test_one_dimensional_proxy(self):
+        b = np.zeros((3,))
+        for x in range(3):
+            b[x] = x + 3
+        b = OneDimensionalArrayProxy(b, {10: 0, 11: 1, 12: 2})
+        self.assertEqual(b[11], 4)
+        self.assertTrue(np.allclose(b[:], np.array((3, 4, 5))))
+        b[11] = 13
+        self.assertEqual(b[11], 13)
+        self.assertTrue(np.allclose(b[:], np.array((3, 13, 5))))
+        b = np.zeros((3, 3))
+        self.assertRaises(AttributeError, OneDimensionalArrayProxy, b, {})
+        self.assertRaises(TypeError, OneDimensionalArrayProxy, b, {}, {})
+
+
+class ListArrayProxyTest(unittest.TestCase):
+    def test_list_array_proxy_without_indices(self):
+        l = ListArrayProxy((range(10), range(10)))
+        self.assertEqual(l[0], range(10))
+        self.assertEqual(list(l[:, 1]), [1, 1])
+        self.assertEqual(list(l[:, :]), [range(10), range(10)])
+        self.assertEqual(list(l[:, 4:6]), [range(10)[4:6], range(10)[4:6]])
+
+    def test_objects_as_indices(self):
+        class A(object):
+            pass
+        m = A()
+        n = A()
+        l = ListArrayProxy((range(5), range(5, 10)), [m, n])
+        self.assertEqual(l[n], range(5, 10))
+        self.assertEqual(l.sum(), sum(range(10)))
+        self.assertEqual(l[n, :2], [5, 6])
+
+    def test_mismatched_index_length(self):
+        self.assertRaises(ValueError, ListArrayProxy, range(5), range(4))
+
+    def test_list_array_proxy_with_indices(self):
+        class A(object):
+            pass
+
+        class B(object):
+            pass
+        m = A()
+        o = B()
+        self.assertRaises(DifferentIndexTypesError, ListArrayProxy, ((), ()),
+            [m, o])
+
+    def test_slices_with_indices(self):
+        l = ListArrayProxy((range(3), range(5), range(7)),
+            (3, 5, 7))
+        self.assertEqual(l[:], (range(3), range(5), range(7)))
+        self.assertRaises(InconsistentSlicingError, l.__getitem__, slice(3, 4))
+        self.assertFalse(l[:3])
+        self.assertEqual(l[:5], (range(3),))
+        self.assertEqual([x for x in l[::2]], [range(3), range(7)])
+        self.assertEqual([x for x in l[5:]], [range(5), range(7)])

File brightway2/proxies/sparse.py

+# -*- coding: utf-8 -*
+import numpy as np
+import scipy.sparse
+import unittest
+from . import ArrayProxy
+
+
+class SparseMatrixProxy(ArrayProxy):
+    """
+Provides a dictionary-based interface from database row ids to array indices 
+for sparse matrices. Does not assume a certain sparsity structure.
+    """
+    def __init__(self, data, row_dict, col_dict, *args, **kwargs):
+        self.row_dict = row_dict
+        self.col_dict = col_dict
+        self.data = data
+        try:
+            self.format = self.data.getformat()
+        except AttributeError:
+            raise TypeError("Must pass a Scipy sparse matrix")
+
+    def __getitem__(self, *args):
+        assert len(args) == 1
+        x = self.translate(args[0][0], "row")
+        y = self.translate(args[0][1], "col")
+
+        if x == slice(None) or y == slice(None):
+            raise NotImplementedError("SparseMatrixProxy doesn't support " + \
+                "slices; use CompressedSparseMatrixProxy for slices")
+        return self.data.__getitem__((x, y),)
+
+    def __setitem__(self, *args):
+        assert len(args) == 2
+        x = self.translate(args[0][0], "row")
+        y = self.translate(args[0][1], "col")
+
+        if x == slice(None) or y == slice(None):
+            raise NotImplementedError, "SparseMatrixProxy doesn't support " +\
+                "assignment by slice"
+        self.data.__setitem__((x,y), args[1])
+
+    def get_row_as_dict(self, row):
+        raise NotImplementedError, "Use CompressedSparseMatrixProxy for " + \
+            "dictionary matrix slices"
+
+    def get_col_as_dict(self, col):
+        raise NotImplementedError, "Use CompressedSparseMatrixProxy for " + \
+            "dictionary matrix slices"
+
+    def toarray(self):
+        return self.data.toarray()
+
+    def todense(self):
+        return self.data.todense()
+
+    @property
+    def nnz(self):
+        # Used to use getnzmax, but that seems to be missing from newest scipy
+        return self.data.getnnz()
+
+
+class CompressedSparseMatrixProxy(SparseMatrixProxy):
+    """
+Subclass of SparseMatrixProxy for CSR (comma separated row) or CSC (comma 
+separated column) matrices.
+
+This class will create, on demand, the complementary matrix type (e.g. CSC for 
+CSR) for efficient slicing and multiple assignment. If a matrix is changed, a 
+*dirty* flag is set that tells the class to re-create the complementary matrix 
+before accessing its information. All updates to complementary matrices are 
+lazy.
+    """
+
+    __slots__ = ['row_dict', 'col_dict', 'format', 'dirty', '_csr', '_csc']
+
+    def __init__(self, data, row_dict, col_dict, *args, **kwargs):
+        self.row_dict = row_dict
+        self.col_dict = col_dict
+        self.dirty = True
+        # try:
+        self.format = data.getformat()
+        if self.format == "csc":
+            self._csc = data
+        else:
+            self._csr = data.tocsr()
+            self.format = "csr"
+
+    def __getitem__(self, *args):
+        assert len(args) == 1
+        x = self.translate(args[0][0], "row")
+        y = self.translate(args[0][1], "col")
+
+        if isinstance(x, slice) and isinstance(y, slice):
+            raise NotImplementedError, \
+                "Convert to dense matrix to do slices on multiple dimensions"
+        elif isinstance(x, slice):
+            if self.dirty and self.format == "csr":
+                self._csc = self._csr.tocsc()
+            return self._csc.__getitem__((x,y),)
+        elif isinstance(y, slice):
+            if self.dirty and self.format == "csc":
+                self._csr = self._csc.tocsr()
+            return self._csr.__getitem__((x,y),)
+        else:
+            return self.data.__getitem__((x,y),)
+
+    def _get_data(self):
+        if self.format == "csr":
+            return self._csr
+        else:
+            return self._csc
+
+    def _set_data(self, data):
+        self.dirty = True
+        if self.format == "csr":
+            self._csr = data
+        else:
+            self._csc = data
+
+    data = property(_get_data, _set_data)
+
+    def __setitem__(self, *args):
+        # Set dirty flag to indicate that matrix has changed, and need to
+        # recompute the partner matrix if is accessed
+        self.dirty = True
+        # TODO: Warn if setting slicing on compressed sparse matrix?
+        super(CompressedSparseMatrixProxy, self).__setitem__(*args)
+
+    def get_row_as_dict(self, row):
+        obj = self.__getitem__((row,slice(None)),)
+        return dict([(self.row_dict_rev[obj.indices[index]], obj.data[index] 
+            ) for index in xrange(obj.indices.shape[0])])
+
+    def get_col_as_dict(self, col):
+        obj = self.__getitem__((slice(None),col),)
+        return dict([(self.col_dict_rev[obj.indices[index]], obj.data[index] 
+            ) for index in xrange(obj.indices.shape[0])])
+
+
+class SparseMatrixProxyTest(unittest.TestCase):
+    def test_sparse_matrix_proxy(self):
+        mat = scipy.sparse.lil_matrix((3,3))
+        for x in range(3):
+            for y in range(3):
+                if x == y: continue
+                mat[x,y] = x+3*y
+        mat = SparseMatrixProxy(mat, {10: 0, 11: 1, 12: 2}, {20: 0, 21: 1, 
+            22: 2})
+        self.assertEqual(mat[10, 20], 0)
+        self.assertEqual(mat[11, 22], 7)
+        self.assertEqual(mat.nnz, 6)
+        mat[11,21] = 3
+        self.assertEqual(mat[11,21], 3)
+        self.assertRaises(NotImplementedError, mat.__getitem__, (slice(None), 
+            21))
+        self.assertRaises(NotImplementedError, mat.__setitem__, (slice(None), 
+            21), 1)
+
+    def test_compressed_sparse_matrix_proxy(self):
+        c = scipy.sparse.lil_matrix((3,3))
+        for x in range(3):
+            for y in range(3):
+                if x == y: continue
+                c[x,y] = x+3*y
+        c = CompressedSparseMatrixProxy(c, {10: 0, 11: 1, 12: 2}, 
+            {20: 0, 21: 1, 22: 2})
+        self.assertEqual(c[10, 20], 0)
+        self.assertEqual(c[11, 22], 7)
+        self.assertEqual(c.nnz, 6)
+        self.assertTrue(isinstance(c.data, scipy.sparse.csr.csr_matrix))
+        self.assertTrue(np.allclose(c[:,21].todense(), np.array(((3,),(
+            0,),(5,)))))
+        self.assertTrue(np.allclose(c[11,:].todense(), np.array((1,0,7))))
+        c[11,21] = 3
+        assert c[11,21] == 3

File brightway2/query.py

+# -*- coding: utf-8 -*
+import collections
+
+
+class Result(object):
+    """The result of a query"""
+    def __init__(self, result):
+        self.result = result
+
+    def __str__(self):
+        return u"Query result with %i entries" % len(self.result)
+
+    def __repr__(self):
+        return repr(self.result)
+
+    def __getitem__(self, key):
+        return self.result[key]
+
+    def __contains__(self, key):
+        return key in self.result
+
+    def sort(self, field):
+        self.result = collections.OrderedDict(sorted(self.result.iteritems(),
+            key=lambda t: t[1].get(field, None)))
+
+    def __len__(self):
+        return len(self.result)
+
+
+class Query(object):
+    def __init__(self, *queries):
+        self.queries = list(queries)
+
+    def add(self, query):
+        self.queries.append(query)
+
+    def __call__(self, data):
+        for query in self.queries:
+            data = query(data)
+        return Result(data)
+
+
+class Filter(object):
+    def __init__(self, field, value):
+        self.field = field
+        self.value = value
+
+    def __call__(self, data):
+        """Should return a filtered dictionary, same form as before"""
+        return dict((k, v) for k, v in data.iteritems() if self.filter(v))
+
+    def filter(self, o):
+        raise NotImplemented
+
+
+# class Category(object):
+#     def __init__(self, *args):
+#         self.filters = args
+
+#     def __call__(self, data):
+#         return dict([
+#             (k, v) for k, v in data.iteritems() if \
+#                 all([
+#                     any([f.filter(x) for x in v["categories"]]) \
+#                     for f in self.filters
+#                 ])
+#             ])
+
+
+class Exchange(object):
+    def __init__(self, *args):
+        self.filters = args
+
+    def __call__(self, data):
+        """All filters should pass for at least one exchange"""
+        return dict([
+            (k, v) for k, v in data.iteritems() if \
+                any([
+                    all([f.filter(e) for f in self.filters]) \
+                    for e in v["exchanges"]
+                ])
+            ])
+
+
+class In(Filter):
+    def filter(self, o):
+        return self.value in o.get(self.field, [])
+
+
+class Is(Filter):
+    def filter(self, o):
+        return self.value == o.get(self.field, "")
+
+
+class Isnt(Filter):
+    def filter(self, o):
+        return self.value != o.get(self.field, None)
+
+
+class Contains(Filter):
+    def filter(self, o):
+        return self.value in o.get(self.field, None)
+
+
+class iFilter(Filter):
+    def __init__(self, field, value):
+        self.field = field
+        self.value = value.lower()
+
+
+class iIs(iFilter):
+    def filter(self, o):
+        return self.value == o.get(self.field, "").lower()
+
+
+class iIsnt(iFilter):
+    def filter(self, o):
+        return self.value != o.get(self.field, "").lower()
+
+
+class iIn(iFilter):
+    def filter(self, o):
+        return self.value in [x.lower() for x in o.get(self.field, [])]
+
+
+class iContains(iFilter):
+    def filter(self, o):
+        return self.value in (o.get(self.field, '').lower() or None)

File brightway2/utils.py

+import re
+
+# Maximum value for unsigned integer stored in 4 bytes
+MAX_INT_32 = 4294967295
+
+
+def natural_sort(l):
+    """Sort the given list in the way that humans expect"""
+    # http://nedbatchelder.com/blog/200712/human_sorting.html#comments
+    convert = lambda text: int(text) if text.isdigit() else text
+    alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)]
+    return sorted(l, key=alphanum_key)
+from distutils.core import setup
+import os
+
+packages = []
+root_dir = os.path.dirname(__file__)
+if root_dir:
+    os.chdir(root_dir)
+
+for dirpath, dirnames, filenames in os.walk('brightway2'):
+    # Ignore dirnames that start with '.'
+    if '__init__.py' in filenames:
+        pkg = dirpath.replace(os.path.sep, '.')
+        if os.path.altsep:
+            pkg = pkg.replace(os.path.altsep, '.')
+        packages.append(pkg)
+
+setup(
+  name='brightway2',
+  version="0.1",
+  packages=packages,
+  author="Chris Mutel",
+  author_email="cmutel@gmail.com",
+  license=open('LICENSE.txt').read(),
+  long_description=open('README.txt').read(),
+)