Commits

Chris Mutel committed afbeaca

Add copy method to Database and Method

  • Participants
  • Parent commits cafd81b

Comments (0)

Files changed (2)

brightway2/database.py

 
     Databases are automatically versioned.
 
-    The Database class never holds intermediate data, but it can load or write intermediate data. The only attribute is *database*, which is the name of the database being managed."""
+    The Database class never holds intermediate data, but it can load or write intermediate data. The only attribute is *database*, which is the name of the database being managed.
+
+    Instantiation does not load any data. If this database is not yet registered in the metadata store, a warning is written to ``stdout``.
+
+    Args:
+        *database* (str): Name of the database to manage.
+
+    """
     def __init__(self, database):
         """Instantiate a Database object.
 
         Does not load any data. If this database is not yet registered in the metadata store, a warning is written to **stdout**.
 
-        Args:
-            *database* (str): Name of the database to manage.
 
         """
         self.database = database
             *name* (str): Name of the new database.
 
         """
-        # Todo: register copied method
-        raise NotImplemented
-        assert name not in databases, ValueError("This database exists")
         def relabel_exchanges(obj, keys):
             for e in obj['exchanges']:
                 if e["input"] in data:
                     e["input"] = (name, e["input"][1])
             return obj
+
+        assert name not in databases, ValueError("This database exists")
         data = self.load()
         data = dict([((name, k[1]), relabel_exchanges(v)) for k, v in data.iteritems()])
-        self.write(data, name)
+        new_database = Database(name)
+        new_database.register(
+            format="Brightway2 copy",
+            depends=databases[self.database]["depends"],
+            num_processes=len(data))
+        new_database.write(data)
 
     def register(self, format, depends, num_processes):
         """Register a database with the metadata store.
         Args:
             *format* (str): Format that the database was converted from, e.g. "Ecospold"
             *depends* (list): Names of the databases that this database references, e.g. "biosphere"
-            num_processes (int): Number of processes in this database.
+            *num_processes* (int): Number of processes in this database.
 
         """
         assert self.database not in databases
         del databases[self.database]
 
     def validate(self, data):
-        """Validate data (that is presumably not yet written).
+        """Validate data. Must be called manually.
 
         Args:
             *data* (dict): The data, in its processed form.
         db_validator(data)
         return True
 
-    def write(self, data, name=None):
-        """"""
+    def write(self, data):
+        """Serialize data to disk.
+
+        Args:
+            *data* (dict): Inventory data
+
+        """
         if self.database not in databases:
             raise UnknownObject("This database is not yet registered")
         databases.increment_version(self.database)
             return pickle.load(open(filepath, "rb"))
 
     def process(self, version=None):
-        """Create numpy structured arrays from database"""
+        """Process intermediate data from a Python dictionary to a `NumPy <http://numpy.scipy.org/>`_ `Structured <http://docs.scipy.org/doc/numpy/reference/arrays.classes.html#record-arrays-numpy-rec>`_ `Array <http://docs.scipy.org/doc/numpy/user/basics.rec.html>`_. A structured array (also called record arrays) is a heterogeneous array, where each column has a different label and data type. These structured arrays act as a standard data format for LCA and Monte Carlo calculations, and are the native data format for the Stats Arrays package.
+
+        Processed arrays are saved in the ``processed`` directory.
+
+        Args:
+            *version* (int, optional): The version of the database to process
+
+        """
         data = self.load(version)
         num_exchanges = sum([len(obj["exchanges"]) for obj in data.values()])
         assert data

brightway2/method.py

 # -*- coding: utf-8 -*
 from . import config, mapping, methods
+from copy import copy
 from errors import UnknownObject, MissingIntermediateData
 from utils import MAX_INT_32
 import numpy as np
 
 
 class Method(object):
-    """A manager for a method. This class can register or deregister databases, write intermediate data, process data to parameter arrays, query, validate, and copy databases.
+    """A manager for a method. This class can register or deregister methods, write intermediate data, process data to parameter arrays, validate, and copy methods.
 
-    Databases are automatically versioned.
+    The Method class never holds intermediate data, but it can load or write intermediate data. The only attribute is *method*, which is the name of the method being managed.
 
-    The Database class never holds intermediate data, but it can load or write intermediate data. The only attribute is *database*, which is the name of the database being managed."""
+    Instantiation does not load any data. If this method is not yet registered in the metadata store, a warning is written to ``stdout``.
+
+    Methods are hierarchally structured, and this structure is preserved in the method name. It is a tuple of strings, like ``('ecological scarcity 2006', 'total', 'natural resources')``.
+
+    Args:
+        *method* (tuple): Name of the method to manage. Must be a tuple of strings.
+
+    """
     def __init__(self, method, *args, **kwargs):
         self.method = method
         if self.method not in methods:
                 " : ".join(method))
 
     def get_abbreviation(self):
+        """Abbreviate a method identifier (a tuple of long strings) for a filename. Random characters are added because some methods have similar names which would overlap when abbreviated."""
         try:
             return methods[self.method]["abbreviation"]
         except KeyError:
             raise UnknownObject("This method is not yet registered")
 
-    def copy(self, name):
-        # Todo: This doesn't work yet
-        self.write(self.load(), name)
+    def copy(self, name=None):
+        """Make a copy of the method.
+
+        Args:
+            *name* (tuple, optional): Name of the new method.
+
+        """
+        name = name or self.method[:-1] + ("Copy of " + self.method[-1],)
+        new_method = Method(name)
+        metadata = copy(methods[self.method])
+        del metadata["abbreviation"]
+        new_method.register(**metadata)
+        new_method.write(self.load())
 
     def register(self, unit, description="", num_cfs=0):
+        """Register a database with the metadata store.
+
+        Methods must be registered before data can be written.
+
+        Args:
+            *unit* (str): Unit for impact assessment CFs
+            *description* (str): Description
+            num_cfs (int): Number of characterization factors
+
+        """
         assert self.method not in methods
         methods[self.method] = {
             "abbreviation": abbreviate(self.method),
             }
 
     def deregister(self):
+        """Remove a method from the metadata store. Does not delete any files."""
         del methods[self.method]
 
     def validate(self, data):
+        """Validate data. Must be called manually.
+
+        Args:
+            *data* (dict): The data, in its processed form.
+
+        """
         ia_validator(data)
         return True
 
     def write(self, data):
+        """Serialize data to disk.
+
+        Args:
+            *data* (dict): Inventory data
+
+        """
         if self.method not in methods:
             raise UnknownObject("This database is not yet registered")
         mapping.add(data.keys())
             pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL)
 
     def load(self):
+        """Load the intermediate data for this method.
+
+        Returns:
+            The intermediate data, a dictionary.
+
+        """
         try:
             return pickle.load(open(os.path.join(config.dir, "intermediate",
                 "%s.pickle" % self.get_abbreviation()), "rb"))
             raise MissingIntermediateData("Can't load intermediate data")
 
     def process(self):
-        """Create numpy structured arrays for IA method"""
+        """Process intermediate data from a Python dictionary to a `NumPy <http://numpy.scipy.org/>`_ `Structured <http://docs.scipy.org/doc/numpy/reference/arrays.classes.html#record-arrays-numpy-rec>`_ `Array <http://docs.scipy.org/doc/numpy/user/basics.rec.html>`_. A structured array (also called record arrays) is a heterogeneous array, where each column has a different label and data type. These structured arrays act as a standard data format for LCA and Monte Carlo calculations, and are the native data format for the Stats Arrays package.
+
+        Processed arrays are saved in the ``processed`` directory.
+
+        Although it is not standard to provide uncertainty distributions for impact assessment methods, the structured array includes uncertainty fields.
+
+        """
         data = pickle.load(open(os.path.join(config.dir, "intermediate",
             "%s.pickle" % self.get_abbreviation()), "rb"))
         assert data