Commits

Stian Andreassen  committed 8a78c6b Merge

Merge improve-rbigint. This branch improves the performance on most long operations and use 64bit storage and __int128 for wide digits on systems where it is available.

Special cases for power of two mod, division, multiplication. Improvements to pow (see pypy/translator/goal/targetbigintbenchmark.py for some runs on my system), mark operations as elidable and various other tweaks.

Overall, it makes things run faster than CPython if the script doesn't heavily rely on division.

  • Participants
  • Parent commits 161f9ca, b627feb

Comments (0)

Files changed (5)

File pypy/module/sys/system.py

 
 def get_long_info(space):
     #assert rbigint.SHIFT == 31
-    bits_per_digit = 31 #rbigint.SHIFT
-    sizeof_digit = rffi.sizeof(rffi.ULONG)
+    bits_per_digit = rbigint.SHIFT
+    sizeof_digit = rffi.sizeof(rbigint.STORE_TYPE)
     info_w = [
         space.wrap(bits_per_digit),
         space.wrap(sizeof_digit),

File pypy/rlib/rarithmetic.py

         n -= 2*LONG_TEST
     return int(n)
 
-if LONG_BIT >= 64:
-    def longlongmask(n):
-        assert isinstance(n, (int, long))
-        return int(n)
-else:
-    def longlongmask(n):
-        """
-        NOT_RPYTHON
-        """
-        assert isinstance(n, (int, long))
-        n = long(n)
-        n &= LONGLONG_MASK
-        if n >= LONGLONG_TEST:
-            n -= 2*LONGLONG_TEST
-        return r_longlong(n)
+def longlongmask(n):
+    """
+    NOT_RPYTHON
+    """
+    assert isinstance(n, (int, long))
+    n = long(n)
+    n &= LONGLONG_MASK
+    if n >= LONGLONG_TEST:
+        n -= 2*LONGLONG_TEST
+    return r_longlong(n)
 
 def longlonglongmask(n):
     # Assume longlonglong doesn't overflow. This is perfectly fine for rbigint.

File pypy/rlib/rbigint.py

     SHIFT = 63
     BASE = long(1 << SHIFT)
     UDIGIT_TYPE = r_ulonglong
-    UDIGIT_MASK = longlongmask
+    if LONG_BIT >= 64:
+        UDIGIT_MASK = intmask
+    else:
+        UDIGIT_MASK = longlongmask
     LONG_TYPE = rffi.__INT128
     if LONG_BIT > SHIFT:
         STORE_TYPE = lltype.Signed
     LONG_TYPE = rffi.LONGLONG
 
 MASK = BASE - 1
-FLOAT_MULTIPLIER = float(1 << LONG_BIT) # Because it works.
+FLOAT_MULTIPLIER = float(1 << SHIFT)
 
 # Debugging digit array access.
 #
     udigit._always_inline_ = True
 
     def setdigit(self, x, val):
-        val = val & MASK
+        val = _mask_digit(val)
         assert val >= 0
         self._digits[x] = _store_digit(val)
     setdigit._annspecialcase_ = 'specialize:argtype(2)'
                 
             if asize <= i:
                 result = _x_mul(a, b)
-            elif 2 * asize <= bsize:
-                result = _k_lopsided_mul(a, b)
+                """elif 2 * asize <= bsize:
+                    result = _k_lopsided_mul(a, b)"""
             else:
                 result = _k_mul(a, b)
         else:
 
     @jit.elidable
     def floordiv(self, other):
-        if other.numdigits() == 1 and other.sign == 1:
+        if self.sign == 1 and other.numdigits() == 1 and other.sign == 1:
             digit = other.digit(0)
             if digit == 1:
-                return rbigint(self._digits[:], other.sign * self.sign, self.size)
+                return rbigint(self._digits[:], 1, self.size)
             elif digit and digit & (digit - 1) == 0:
                 return self.rshift(ptwotable[digit])
             
         if mod.sign * other.sign == -1:
             if div.sign == 0:
                 return ONENEGATIVERBIGINT
-            if div.sign == 1:
-                _v_isub(div, 0, div.numdigits(), ONERBIGINT, 1)
-            else:
-                _v_iadd(div, 0, div.numdigits(), ONERBIGINT, 1)
+            div = div.sub(ONERBIGINT)
+            
         return div
 
     def div(self, other):
             mod = mod.add(w)
             if div.sign == 0:
                 return ONENEGATIVERBIGINT, mod
-            if div.sign == 1:
-                _v_isub(div, 0, div.numdigits(), ONERBIGINT, 1)
-            else:
-                _v_iadd(div, 0, div.numdigits(), ONERBIGINT, 1)
+            div = div.sub(ONERBIGINT)
         return div, mod
 
     @jit.elidable
             # XXX failed to implement
             raise ValueError("bigint pow() too negative")
         
-        if b.sign == 0:
-            return ONERBIGINT
-        elif a.sign == 0:
-            return NULLRBIGINT
-        
         size_b = b.numdigits()
         
         if c is not None:
             #     return 0
             if c.numdigits() == 1 and c._digits[0] == ONEDIGIT:
                 return NULLRBIGINT
-
+   
             # if base < 0:
             #     base = base % modulus
             # Having the base positive just makes things easier.
             if a.sign < 0:
                 a = a.mod(c)
-                
             
+        elif b.sign == 0:
+            return ONERBIGINT
+        elif a.sign == 0:
+            return NULLRBIGINT
         elif size_b == 1:
             if b._digits[0] == NULLDIGIT:
                 return ONERBIGINT if a.sign == 1 else ONENEGATIVERBIGINT
         return z
 
     def neg(self):
-        return rbigint(self._digits[:], -self.sign, self.size)
+        return rbigint(self._digits, -self.sign, self.size)
 
     def abs(self):
         if self.sign != -1:
             return self
-        return rbigint(self._digits[:], abs(self.sign), self.size)
+        return rbigint(self._digits, 1, self.size)
 
     def invert(self): #Implement ~x as -(x + 1)
         if self.sign == 0:
         ret = self.add(ONERBIGINT)
         ret.sign = -ret.sign
         return ret
-
-    def inplace_invert(self): # Used by rshift and bitwise to prevent a double allocation.
-        if self.sign == 0:
-            return ONENEGATIVERBIGINT
-        if self.sign == 1:
-            _v_iadd(self, 0, self.numdigits(), ONERBIGINT, 1)
-        else:
-             _v_isub(self, 0, self.numdigits(), ONERBIGINT, 1)
-        self.sign = -self.sign
-        return self
         
     @jit.elidable    
     def lshift(self, int_other):
         remshift  = int_other - wordshift * SHIFT
 
         if not remshift:
+            # So we can avoid problems with eq, AND avoid the need for normalize.
+            if self.sign == 0:
+                return self
             return rbigint([NULLDIGIT] * wordshift + self._digits, self.sign, self.size + wordshift)
         
         oldsize = self.numdigits()
         if self.sign == -1 and not dont_invert:
             a1 = self.invert()
             a2 = a1.rshift(int_other)
-            return a2.inplace_invert()
+            return a2.invert()
 
         wordshift = int_other // SHIFT
         newsize = self.numdigits() - wordshift
 
     def _normalize(self):
         i = self.numdigits()
-        # i is always >= 1
+
         while i > 1 and self._digits[i - 1] == NULLDIGIT:
             i -= 1
         assert i > 0
         if self.numdigits() == 1 and self._digits[0] == NULLDIGIT:
             self.sign = 0
             self._digits = [NULLDIGIT]
-            
+
     _normalize._always_inline_ = True
     
     @jit.elidable
         return bits
 
     def __repr__(self):
-        return "<rbigint digits=%s, sign=%s, %s>" % (self._digits,
-                                                     self.sign, self.str())
+        return "<rbigint digits=%s, sign=%s, size=%d, len=%d, %s>" % (self._digits,
+                                            self.sign, self.size, len(self._digits),
+                                            self.str())
 
 ONERBIGINT = rbigint([ONEDIGIT], 1, 1)
 ONENEGATIVERBIGINT = rbigint([ONEDIGIT], -1, 1)
     size_n = n.numdigits()
     size_lo = min(size_n, size)
 
-    lo = rbigint(n._digits[:size_lo], 1)
-    hi = rbigint(n._digits[size_lo:], 1)
+    # We use "or" her to avoid having a check where list can be empty in _normalize.
+    lo = rbigint(n._digits[:size_lo] or [NULLDIGIT], 1)
+    hi = rbigint(n._digits[size_lo:n.size] or [NULLDIGIT], 1)
     lo._normalize()
     hi._normalize()
     return hi, lo
     # Split a & b into hi & lo pieces.
     shift = bsize >> 1
     ah, al = _kmul_split(a, shift)
-    assert ah.sign == 1    # the split isn't degenerate
+    if ah.sign == 0:
+        # This may happen now that _k_lopsided_mul ain't catching it.
+        return _x_mul(a, b)
+    #assert ah.sign == 1    # the split isn't degenerate
 
     if a is b:
         bh = ah
 
     # 2. t1 <- ah*bh, and copy into high digits of result.
     t1 = ah.mul(bh)
+
     assert t1.sign >= 0
     assert 2*shift + t1.numdigits() <= ret.numdigits()
     ret._digits[2*shift : 2*shift + t1.numdigits()] = t1._digits
 """
 
 def _k_lopsided_mul(a, b):
+    # Not in use anymore, only account for like 1% performance. Perhaps if we
+    # Got rid of the extra list allocation this would be more effective.
     """
     b has at least twice the digits of a, and a is big enough that Karatsuba
     would pay off *if* the inputs had balanced sizes.  View b as a sequence
     wm2 = w.widedigit(abs(size_w-2))
     j = size_v
     k = size_a - 1
+    carry = _widen_digit(0)
     while k >= 0:
-        assert j >= 2
+        assert j > 1
         if j >= size_v:
             vj = 0
         else:
             vj = v.widedigit(j)
-            
-        carry = 0
-        vj1 = v.widedigit(abs(j-1))
         
         if vj == wm1:
             q = MASK
-            r = 0
         else:
-            vv = ((vj << SHIFT) | vj1)
-            q = vv // wm1
-            r = _widen_digit(vv) - wm1 * q
-        
-        vj2 = v.widedigit(abs(j-2))
-        while wm2 * q > ((r << SHIFT) | vj2):
+            q = ((vj << SHIFT) + v.widedigit(abs(j-1))) // wm1
+
+        while (wm2 * q >
+                ((
+                    (vj << SHIFT)
+                    + v.widedigit(abs(j-1))
+                    - q * wm1
+                                ) << SHIFT)
+                + v.widedigit(abs(j-2))):
             q -= 1
-            r += wm1
-            if r > MASK:
-                break
         i = 0
         while i < size_w and i+k < size_v:
             z = w.widedigit(i) * q
                 i += 1
         j -= 1
         k -= 1
+        carry = 0
 
     a._normalize()
     _inplace_divrem1(v, v, d, size_v)
     if negz == 0:
         return z
     
-    return z.inplace_invert()
+    return z.invert()
 _bitwise._annspecialcase_ = "specialize:arg(1)"
 
 

File pypy/rlib/test/test_rbigint.py

                     res2 = getattr(operator, mod)(x, y)
                     assert res1 == res2
 
+    def test_mul_eq_shift(self):
+        p2 = rbigint.fromlong(1).lshift(63)
+        f1 = rbigint.fromlong(0).lshift(63)
+        f2 = rbigint.fromlong(0).mul(p2)
+        assert f1.eq(f2)
+            
     def test_tostring(self):
         z = rbigint.fromlong(0)
         assert z.str() == '0'

File pypy/translator/goal/targetbigintbenchmark.py

         Sum:  142.686547
         
         Pypy with improvements:
-        mod by 2:  0.003079
-        mod by 10000:  3.148599
-        mod by 1024 (power of two):  0.009572
-        Div huge number by 2**128: 2.202237
-        rshift: 2.240624
-        lshift: 1.405393
-        Floordiv by 2: 1.562338
-        Floordiv by 3 (not power of two): 4.197440
-        2**500000: 0.033737
-        (2**N)**5000000 (power of two): 0.046997
-        10000 ** BIGNUM % 100 1.321710
-        i = i * i: 3.929341
-        n**10000 (not power of two): 6.215907
-        Power of two ** power of two: 0.014209
-        v = v * power of two 3.506702
-        v = v * v 6.253210
-        v = v + v 2.772122
-        Sum:  38.863216
+        mod by 2:  0.006321
+        mod by 10000:  3.143117
+        mod by 1024 (power of two):  0.009611
+        Div huge number by 2**128: 2.138351
+        rshift: 2.247337
+        lshift: 1.334369
+        Floordiv by 2: 1.555604
+        Floordiv by 3 (not power of two): 4.275014
+        2**500000: 0.033836
+        (2**N)**5000000 (power of two): 0.049600
+        10000 ** BIGNUM % 100 1.326477
+        i = i * i: 3.924958
+        n**10000 (not power of two): 6.335759
+        Power of two ** power of two: 0.013380
+        v = v * power of two 3.497662
+        v = v * v 6.359251
+        v = v + v 2.785971
+        Sum:  39.036619
 
         With SUPPORT_INT128 set to False
         mod by 2:  0.004103