Commits

Anonymous committed dbe8412

Another fix for pow(), disable _k_lopsided (has less than 1% gain), fix _x_divrem crash.

There is one remaining known issue (with eq), may lake a _normalize somewhere

Comments (0)

Files changed (2)

pypy/rlib/rbigint.py

 
     @jit.elidable
     def eq(self, other):
+        # This code is temp only. Just to raise some more specific asserts
+        # For a bug.
+        # One of the values sent to eq have not gone through normalize.
+        # Etc Aga x * p2 != x << n from test_long.py
+        if self.sign == 0 and other.sign == 0:
+            return True
+        assert not (self.numdigits() == 1 and self._digits[0] == NULLDIGIT)
+        assert not (other.numdigits() == 1 and other._digits[0] == NULLDIGIT)
+        
+        
         if (self.sign != other.sign or
             self.numdigits() != other.numdigits()):
             return False
                 
             if asize <= i:
                 result = _x_mul(a, b)
-            elif 2 * asize <= bsize:
-                result = _k_lopsided_mul(a, b)
+                """elif 2 * asize <= bsize:
+                    result = _k_lopsided_mul(a, b)"""
             else:
                 result = _k_mul(a, b)
         else:
             # XXX failed to implement
             raise ValueError("bigint pow() too negative")
         
+        size_b = b.numdigits()
+        
         if c is not None:
             if c.sign == 0:
                 raise ValueError("pow() 3rd argument cannot be 0")
             return ONERBIGINT
         elif a.sign == 0:
             return NULLRBIGINT
-            
-        size_b = b.numdigits()
-        
-        if size_b == 1:
+        elif size_b == 1:
             if b._digits[0] == NULLDIGIT:
                 return ONERBIGINT if a.sign == 1 else ONENEGATIVERBIGINT
             elif b._digits[0] == ONEDIGIT:
         return z
 
     def neg(self):
-        return rbigint(self._digits[:], -self.sign, self.size)
+        return rbigint(self._digits, -self.sign, self.size)
 
     def abs(self):
         if self.sign != -1:
             return self
-        return rbigint(self._digits[:], abs(self.sign), self.size)
+        return rbigint(self._digits, 1, self.size)
 
     def invert(self): #Implement ~x as -(x + 1)
         if self.sign == 0:
     size_n = n.numdigits()
     size_lo = min(size_n, size)
 
-    lo = rbigint(n._digits[:size_lo], 1)
-    hi = rbigint(n._digits[size_lo:], 1)
+    # We use "or" her to avoid having a check where list can be empty in _normalize.
+    lo = rbigint(n._digits[:size_lo] or [NULLDIGIT], 1)
+    hi = rbigint(n._digits[size_lo:n.size] or [NULLDIGIT], 1)
     lo._normalize()
     hi._normalize()
     return hi, lo
     # Split a & b into hi & lo pieces.
     shift = bsize >> 1
     ah, al = _kmul_split(a, shift)
-    assert ah.sign == 1    # the split isn't degenerate
+    if ah.sign == 0:
+        # This may happen now that _k_lopsided_mul ain't catching it.
+        return _x_mul(a, b)
+    #assert ah.sign == 1    # the split isn't degenerate
 
     if a is b:
         bh = ah
 
     # 2. t1 <- ah*bh, and copy into high digits of result.
     t1 = ah.mul(bh)
+
     assert t1.sign >= 0
     assert 2*shift + t1.numdigits() <= ret.numdigits()
     ret._digits[2*shift : 2*shift + t1.numdigits()] = t1._digits
 """
 
 def _k_lopsided_mul(a, b):
+    # Not in use anymore, only account for like 1% performance. Perhaps if we
+    # Got rid of the extra list allocation this would be more effective.
     """
     b has at least twice the digits of a, and a is big enough that Karatsuba
     would pay off *if* the inputs had balanced sizes.  View b as a sequence
     wm2 = w.widedigit(abs(size_w-2))
     j = size_v
     k = size_a - 1
+    carry = _widen_digit(0)
     while k >= 0:
-        assert j >= 2
+        assert j > 1
         if j >= size_v:
             vj = 0
         else:
             vj = v.widedigit(j)
-            
-        carry = 0
-        vj1 = v.widedigit(abs(j-1))
         
         if vj == wm1:
             q = MASK
-            r = 0
         else:
-            vv = ((vj << SHIFT) | vj1)
-            q = vv // wm1
-            r = _widen_digit(vv) - wm1 * q
-        
-        vj2 = v.widedigit(abs(j-2))
-        while wm2 * q > ((r << SHIFT) | vj2):
+            q = ((vj << SHIFT) + v.widedigit(abs(j-1))) // wm1
+
+        while (wm2 * q >
+                ((
+                    (vj << SHIFT)
+                    + v.widedigit(abs(j-1))
+                    - q * wm1
+                                ) << SHIFT)
+                + v.widedigit(abs(j-2))):
             q -= 1
-            r += wm1
-            if r > MASK:
-                break
         i = 0
         while i < size_w and i+k < size_v:
             z = w.widedigit(i) * q
                 i += 1
         j -= 1
         k -= 1
+        carry = 0
 
     a._normalize()
     _inplace_divrem1(v, v, d, size_v)

pypy/translator/goal/targetbigintbenchmark.py

         Sum:  142.686547
         
         Pypy with improvements:
-        mod by 2:  0.003079
-        mod by 10000:  3.148599
-        mod by 1024 (power of two):  0.009572
-        Div huge number by 2**128: 2.202237
-        rshift: 2.240624
-        lshift: 1.405393
-        Floordiv by 2: 1.562338
-        Floordiv by 3 (not power of two): 4.197440
-        2**500000: 0.033737
-        (2**N)**5000000 (power of two): 0.046997
-        10000 ** BIGNUM % 100 1.321710
-        i = i * i: 3.929341
-        n**10000 (not power of two): 6.215907
-        Power of two ** power of two: 0.014209
-        v = v * power of two 3.506702
-        v = v * v 6.253210
-        v = v + v 2.772122
-        Sum:  38.863216
+        mod by 2:  0.005841
+        mod by 10000:  3.134566
+        mod by 1024 (power of two):  0.009598
+        Div huge number by 2**128: 2.117672
+        rshift: 2.216447
+        lshift: 1.318227
+        Floordiv by 2: 1.518645
+        Floordiv by 3 (not power of two): 4.349879
+        2**500000: 0.033484
+        (2**N)**5000000 (power of two): 0.052457
+        10000 ** BIGNUM % 100 1.323458
+        i = i * i: 3.964939
+        n**10000 (not power of two): 6.313849
+        Power of two ** power of two: 0.013127
+        v = v * power of two 3.537295
+        v = v * v 6.310657
+        v = v + v 2.765472
+        Sum:  38.985613
 
         With SUPPORT_INT128 set to False
         mod by 2:  0.004103