Commits

Chris Mutel committed df40f5d

Matrix builder now filter param arrays to exclude unmapped rows and columns

Comments (0)

Files changed (2)

     def build(cls, dirpath, names, data_label,
               row_id_label, row_index_label,
               col_id_label=None, col_index_label=None,
-              row_dict=None, col_dict=None, one_d=False):
+              row_dict=None, col_dict=None, one_d=False, drop_missing=True):
         """
 Build a sparse matrix from NumPy structured array(s).
 
             # Eliminate references to row data which isn't used;
             # Unused data remains MAX_INT_32 values because it isn't mapped
             # by ``add_matrix_indices``.
-            array = array[np.where(array[row_index_label] != MAX_INT_32)]
+            if drop_missing:
+                array = array[np.where(array[row_index_label] != MAX_INT_32)]
             matrix = cls.build_diagonal_matrix(array, row_dict, row_index_label, data_label)
         else:
             if not col_dict:
                 col_dict = cls.build_dictionary(array[col_id_label])
             cls.add_matrix_indices(array[col_id_label],
                                    array[col_index_label], col_dict)
+            if drop_missing:
+                array = array[np.where(array[row_index_label] != MAX_INT_32)]
+                array = array[np.where(array[col_index_label] != MAX_INT_32)]
             matrix = cls.build_matrix(
                 array, row_dict, col_dict, row_index_label, col_index_label,
                 data_label)

bw2calc/tests/matrices.py

 from .. import *
 from bw2data import config
 from bw2data.tests import BW2DataTest
+from bw2data.utils import MAX_INT_32
 import numpy as np
 import os
 try:
             MatrixBuilder.load(config.dir, ["a", "b"])
         ))
 
+    def test_build_one_d(self):
+        dtype = [
+            ('a', np.uint32),
+            ('row', np.uint32),
+            ('values', np.float32),
+        ]
+        array = np.array([
+            (1, MAX_INT_32, 99),
+            (2, MAX_INT_32, 100),
+            ], dtype=dtype
+        )
+        row_dict = {1: 0, 2: 1}
+        with open(os.path.join(
+                config.dir,
+                "processed",
+                "sour.pickle"), "wb") as f:
+            pickle.dump(array, f, protocol=pickle.HIGHEST_PROTOCOL)
+        matrix = MatrixBuilder.build(config.dir, ["sour"], "values", "a",
+            "row", row_dict=row_dict, one_d=True)[3]
+        self.assertTrue(np.allclose(
+            matrix.todense(),
+            np.array(((99, 0), (0, 100)))
+        ))
+
+    def test_build_one_d_drop_missing(self):
+        dtype = [
+            ('a', np.uint32),
+            ('row', np.uint32),
+            ('values', np.float32),
+        ]
+        array = np.array([
+            (1, MAX_INT_32, 99),
+            (2, MAX_INT_32, 99),
+            (3, MAX_INT_32, 99),
+            ], dtype=dtype
+        )
+        row_dict = {1: 0, 2: 1}
+        with open(os.path.join(
+                config.dir,
+                "processed",
+                "ghost.pickle"), "wb") as f:
+            pickle.dump(array, f, protocol=pickle.HIGHEST_PROTOCOL)
+        values = MatrixBuilder.build(config.dir, ["ghost"], "values", "a",
+            "row", row_dict=row_dict, one_d=True)[0]
+        self.assertEqual(values.shape, (2,))
+
+    def test_one_d_missing_in_row_dict_raise_valueerror(self):
+        dtype = [
+            ('a', np.uint32),
+            ('row', np.uint32),
+            ('values', np.float32),
+        ]
+        array = np.array([
+            (1, MAX_INT_32, 99),
+            (2, MAX_INT_32, 99),
+            ], dtype=dtype
+        )
+        row_dict = {1: 0}
+        with open(os.path.join(
+                config.dir,
+                "processed",
+                "ghost.pickle"), "wb") as f:
+            pickle.dump(array, f, protocol=pickle.HIGHEST_PROTOCOL)
+        with self.assertRaises(ValueError):
+            MatrixBuilder.build(config.dir, ["ghost"], "values", "a",
+                "row", row_dict=row_dict, one_d=True, drop_missing=False)
+
+    def test_build_drop_missing(self):
+        dtype = [
+            ('a', np.uint32),
+            ('b', np.uint32),
+            ('row', np.uint32),
+            ('col', np.uint32),
+            ('values', np.float32),
+        ]
+        array = np.array([
+            (1, 2, MAX_INT_32, MAX_INT_32, 99),
+            (3, 4, MAX_INT_32, MAX_INT_32, 99),
+            (3, 2, MAX_INT_32, MAX_INT_32, 99),
+            (5, 6, MAX_INT_32, MAX_INT_32, 99),
+            ], dtype=dtype
+        )
+        row_dict = {1: 0, 3: 1}
+        col_dict = {2: 0, 6: 1}
+        with open(os.path.join(
+                config.dir,
+                "processed",
+                "boo.pickle"), "wb") as f:
+            pickle.dump(array, f, protocol=pickle.HIGHEST_PROTOCOL)
+        values = MatrixBuilder.build(config.dir, ["boo"], "values", "a", "row",
+            "b", "col", row_dict, col_dict)[0]
+        self.assertEqual(values.shape, (2,))
+
+    def test_missing_in_row_dict_raise_valueerror(self):
+        dtype = [
+            ('a', np.uint32),
+            ('b', np.uint32),
+            ('row', np.uint32),
+            ('col', np.uint32),
+            ('values', np.float32),
+        ]
+        array = np.array([
+            (1, 2, MAX_INT_32, MAX_INT_32, 99),
+            (1, 4, MAX_INT_32, MAX_INT_32, 99),
+            ], dtype=dtype
+        )
+        row_dict = {1: 0}
+        col_dict = {2: 0}
+        with open(os.path.join(
+                config.dir,
+                "processed",
+                "whoah.pickle"), "wb") as f:
+            pickle.dump(array, f, protocol=pickle.HIGHEST_PROTOCOL)
+        with self.assertRaises(ValueError):
+            MatrixBuilder.build(config.dir, ["whoah"], "values", "a",
+                "row", "b", "col", row_dict, col_dict, drop_missing=False)
+
     def test_add_matrix_indices(self):
         a = np.arange(10)
         b = np.zeros(10)