Commits

Konstantin Lopuhin  committed 42acc0c

like numpy-dot-optimization but more general - unpack only the last index

  • Participants
  • Parent commits 3f01d4d
  • Branches numpy-dot-optimization-3

Comments (0)

Files changed (2)

File pypy/module/micronumpy/iter.py

 
 class MultiDimViewIterator(ConcreteArrayIterator):
     def __init__(self, array, start, strides, backstrides, shape):
-        self.indexes = [0] * len(shape)
         self.array = array
         self.shape = shape
         self.offset = start
         self.strides = strides
         self.backstrides = backstrides
         self.size = array.size
+        self._indexes = [0] * (len(shape) - 1)
+        self.outer_index = 0
+        self.outer_shape = shape[-1]
+        self.outer_stride = strides[-1]
+        self.outer_backstride = backstrides[-1]
 
     @jit.unroll_safe
     def next(self):
         offset = self.offset
-        for i in range(self.shapelen - 1, -1, -1):
-            if self.indexes[i] < self.shape[i] - 1:
-                self.indexes[i] += 1
+        if self.outer_index < self.outer_shape - 1:
+            self.outer_index += 1
+            self.offset += self.outer_stride
+            return
+        else:
+            self.outer_index = 0
+            offset -= self.outer_backstride
+        for i in range(self.shapelen - 2, -1, -1):
+            if self._indexes[i] < self.shape[i] - 1:
+                self._indexes[i] += 1
                 offset += self.strides[i]
                 break
             else:
-                self.indexes[i] = 0
+                self._indexes[i] = 0
                 offset -= self.backstrides[i]
         else:
             self._done = True
 
     @jit.unroll_safe
     def next_skip_x(self, step):
-        for i in range(len(self.shape) - 1, -1, -1):
-            if self.indexes[i] < self.shape[i] - step:
-                self.indexes[i] += step
+        if self.outer_index < self.outer_shape - step:
+            self.outer_index += step
+            self.offset += self.outer_stride * step
+            return
+        else:
+            remaining_step = (self.outer_index + step) // self.outer_shape
+            this_i_step = step - remaining_step * self.outer_shape
+            self.offset += self.outer_stride * this_i_step
+            self.outer_index += this_i_step
+            step = remaining_step
+        for i in range(len(self.shape) - 2, -1, -1):
+            if self._indexes[i] < self.shape[i] - step:
+                self._indexes[i] += step
                 self.offset += self.strides[i] * step
                 break
             else:
-                remaining_step = (self.indexes[i] + step) // self.shape[i]
+                remaining_step = (self._indexes[i] + step) // self.shape[i]
                 this_i_step = step - remaining_step * self.shape[i]
                 self.offset += self.strides[i] * this_i_step
-                self.indexes[i] = self.indexes[i] +  this_i_step
+                self._indexes[i] += this_i_step
                 step = remaining_step
         else:
             self._done = True
         self.offset %= self.size
 
     def get_index(self, d):
-        return self.indexes[d]
+        if d == self.shapelen - 1:
+            return self.outer_index
+        else:
+            return self._indexes[d]
+
+    def get_indexes(self):
+        return self._indexes + [self.outer_index]
 
 class AxisIterator(base.BaseArrayIterator):
     def __init__(self, array, shape, dim, cumulative):

File pypy/module/micronumpy/test/test_iter.py

         i.next()
         assert i.offset == 3
         assert not i.done()
-        assert i.indexes == [0,3]
+        assert i.get_indexes() == [0,3]
         #cause a dimension overflow
         i.next()
         i.next()
         assert i.offset == 5
-        assert i.indexes == [1,0]
+        assert i.get_indexes() == [1,0]
 
         #Now what happens if the array is transposed? strides[-1] != 1
         # therefore layout is non-contiguous
         i.next()
         assert i.offset == 9
         assert not i.done()
-        assert i.indexes == [0,3]
+        assert i.get_indexes() == [0,3]
         #cause a dimension overflow
         i.next()
         i.next()
         assert i.offset == 1
-        assert i.indexes == [1,0]
+        assert i.get_indexes() == [1,0]
 
     def test_C_viewiterator_step(self):
         #iteration in C order with #contiguous layout => strides[-1] is 1
         i.next_skip_x(2)
         assert i.offset == 6
         assert not i.done()
-        assert i.indexes == [1,1]
+        assert i.get_indexes() == [1,1]
         #And for some big skips
         i.next_skip_x(5)
         assert i.offset == 11
-        assert i.indexes == [2,1]
+        assert i.get_indexes() == [2,1]
         i.next_skip_x(5)
         # Note: the offset does not overflow but recycles,
         # this is good for broadcast
         assert i.offset == 1
-        assert i.indexes == [0,1]
+        assert i.get_indexes() == [0,1]
         assert i.done()
 
         #Now what happens if the array is transposed? strides[-1] != 1
         i.next_skip_x(2)
         i.next_skip_x(2)
         assert i.offset == 4
-        assert i.indexes == [1,1]
+        assert i.get_indexes() == [1,1]
         assert not i.done()
         i.next_skip_x(5)
         assert i.offset == 5
-        assert i.indexes == [2,1]
+        assert i.get_indexes() == [2,1]
         assert not i.done()
         i.next_skip_x(5)
-        assert i.indexes == [0,1]
+        assert i.get_indexes() == [0,1]
         assert i.offset == 3
         assert i.done()