Anonymous avatar Anonymous committed e377b17

Attempt to improve division by porting Cpythons new algorithm, it cuts division time by 30%. And also improve divrem1 by just casting the value, a small speed increase when // 3.

Note: The CPython algorithm currently doesn't pass tests and need to be looked at

Comments (0)

Files changed (3)

pypy/rlib/rbigint.py

         hop.exception_cannot_occur()
 
 class rbigint(object):
+    _immutable_ = True
+    _immutable_fields_ = ["_digits"]
     """This is a reimplementation of longs using a list of digits."""
 
     def __init__(self, digits=[NULLDIGIT], sign=0, size=0):
         if other.sign == 0:
             return self
         if self.sign == 0:
-            return rbigint(other._digits[:], -other.sign, other.size)
+            return rbigint(other._digits[:other.size], -other.sign, other.size)
         if self.sign == other.sign:
             result = _x_sub(self, other)
         else:
             if a._digits[0] == NULLDIGIT:
                 return NULLRBIGINT
             elif a._digits[0] == ONEDIGIT:
-                return rbigint(b._digits[:], a.sign * b.sign, b.size)
+                return rbigint(b._digits[:b.size], a.sign * b.sign, b.size)
             elif bsize == 1:
                 res = b.widedigit(0) * a.widedigit(0)
                 carry = res >> SHIFT
         if self.sign == 1 and other.numdigits() == 1 and other.sign == 1:
             digit = other.digit(0)
             if digit == 1:
-                return rbigint(self._digits[:], 1, self.size)
+                return rbigint(self._digits[:self.size], 1, self.size)
             elif digit and digit & (digit - 1) == 0:
                 return self.rshift(ptwotable[digit])
             
             if digit == 1:
                 return NULLRBIGINT
             elif digit == 2:
-                modm = self.digit(0) % digit
+                modm = self.digit(0) & 1
                 if modm:
                     return ONENEGATIVERBIGINT if other.sign == -1 else ONERBIGINT
                 return NULLRBIGINT
     size -= 1
     
     while size >= 0:
-        rem = (rem << SHIFT) + pin.widedigit(size)
+        rem = (rem << SHIFT) | pin.widedigit(size)
         hi = rem // n
         pout.setdigit(size, hi)
         rem -= hi * n
         size -= 1
-    return rem & MASK
+    return rffi.cast(lltype.Signed, rem)
 
 def _divrem1(a, n):
     """
         * result in z[0:m], and return the d bits shifted out of the bottom.
     """
     
-    carry = 0
+    carry = _widen_digit(0)
     acc = _widen_digit(0)
     mask = (1 << d) - 1
     
     assert 0 <= d and d < SHIFT
     for i in range(m-1, 0, -1):
-        acc = carry << SHIFT | a.digit(i)
+        acc = (carry << SHIFT) | a.widedigit(i)
         carry = acc & mask
         z.setdigit(i, acc >> d)
         
 
 def _x_divrem(v1, w1):
     """ Unsigned bigint division with remainder -- the algorithm """
-
+    size_v = v1.numdigits()
     size_w = w1.numdigits()
-    d = (UDIGIT_TYPE(MASK)+1) // (w1.udigit(abs(size_w-1)) + 1)
-    assert d <= MASK    # because the first digit of w1 is not zero
-    d = UDIGIT_MASK(d)
-    v = _muladd1(v1, d)
-    w = _muladd1(w1, d)
-    size_v = v.numdigits()
-    size_w = w.numdigits()
-    assert size_w > 1 # (Assert checks by div()
+    assert size_v >= size_w and size_w > 1
+    
+    v = rbigint([NULLDIGIT] * (size_v + 1), 1, size_v + 1)
+    w = rbigint([NULLDIGIT] * size_w, 1, size_w)
+    
+    """/normalize: shift w1 left so that its top digit is >= PyLong_BASE/2.
+        shift v1 left by the same amount. Results go into w and v. """
+        
+    d = SHIFT - bits_in_digit(w1.digit(size_w-1))
+    carry = _v_lshift(w, w1, size_w, d)
+    assert carry == 0
+    carry = _v_lshift(v, v1, size_v, d)
+    if carry != 0 or v.digit(abs(size_v-1)) >= w.digit(abs(size_w-1)):
+        v.setdigit(size_v, carry)
+        size_v += 1
+        
+    """ Now v->ob_digit[size_v-1] < w->ob_digit[size_w-1], so quotient has
+        at most (and usually exactly) k = size_v - size_w digits. """
         
     size_a = size_v - size_w + 1
-    assert size_a > 0
     a = rbigint([NULLDIGIT] * size_a, 1, size_a)
-
+    
     wm1 = w.widedigit(abs(size_w-1))
     wm2 = w.widedigit(abs(size_w-2))
+
     j = size_v
     k = size_a - 1
-    carry = _widen_digit(0)
+    assert k > 0
     while k >= 0:
-        assert j > 1
+        assert j >= 0
+        """ inner loop: divide vk[0:size_w+1] by w0[0:size_w], giving
+            single-digit quotient q, remainder in vk[0:size_w]. """
+            
+        # estimate quotient digit q; may overestimate by 1 (rare)
         if j >= size_v:
-            vj = 0
+            vtop = 0
         else:
-            vj = v.widedigit(j)
+            vtop = v.widedigit(j)
+        assert vtop <= wm1
+        vv = (vtop << SHIFT | v.widedigit(abs(j-1)))
+        q = vv / wm1
+        r = vv - (wm1 * q)
+        while wm2 * q > (r << SHIFT | v.widedigit(abs(j-2))):
+            q -= 1
+            r += wm1
+            if r > MASK:
+                break
+                
+        assert q < MASK
         
-        if vj == wm1:
-            q = MASK
-        else:
-            q = ((vj << SHIFT) + v.widedigit(abs(j-1))) // wm1
-
-        while (wm2 * q >
-                ((
-                    (vj << SHIFT)
-                    + v.widedigit(abs(j-1))
-                    - q * wm1
-                                ) << SHIFT)
-                + v.widedigit(abs(j-2))):
-            q -= 1
+        # subtract q*w0[0:size_w] from vk[0:size_w+1]
+        zhi = 0
         i = 0
-        while i < size_w and i+k < size_v:
-            z = w.widedigit(i) * q
-            zz = z >> SHIFT
-            carry += v.widedigit(i+k) - z + (zz << SHIFT)
-            v.setdigit(i+k, carry)
-            carry >>= SHIFT
-            carry -= zz
+        while i < size_w:
+            z = v.widedigit(k+i) + zhi - q * w.widedigit(i)
+            v.setdigit(k+i, z)
+            zhi = z >> SHIFT
             i += 1
-
-        if i+k < size_v:
-            carry += v.widedigit(i+k)
-            v.setdigit(i+k, 0)
-
-        if carry == 0:
-            a.setdigit(k, q)
-            assert not q >> SHIFT
-        else:
-            assert carry == -1
-            q -= 1
-            a.setdigit(k, q)
-            assert not q >> SHIFT
-
-            carry = 0
+        
+        # add w back if q was too large (this branch taken rarely)
+        assert vtop+zhi == -1 or vtop + zhi == 0
+        if vtop + zhi < 0:
+            carry = _widen_digit(0)
             i = 0
-            while i < size_w and i+k < size_v:
-                carry += v.udigit(i+k) + w.udigit(i)
-                v.setdigit(i+k, carry)
+            while i < size_w:
+                carry += v.widedigit(k+i) + w.widedigit(i)
+                v.setdigit(k+i, carry)
                 carry >>= SHIFT
                 i += 1
+            q -= 1
+            
+        # store quotient digit
+        a.setdigit(k, q)
+        k -= 1
         j -= 1
-        k -= 1
-        carry = 0
-
+        
+        
+    
+    carry = _v_rshift(w, v, size_w, d)
+    assert carry == 0
+    
     a._normalize()
-    _inplace_divrem1(v, v, d, size_v)
-    v._normalize()
-    return a, v
+    w._normalize()
+    
+    return a, w
         
 def _divrem(a, b):
     """ Long division with remainder, top-level routine """

pypy/rlib/test/test_rbigint.py

             y = long(randint(1, 1 << 60))
             y <<= 60
             y += randint(1, 1 << 60)
+            if y > x:
+                x <<= 100
+                
             f1 = rbigint.fromlong(x)
             f2 = rbigint.fromlong(y)
             div, rem = lobj._x_divrem(f1, f2)
             assert div.tolong() == _div
             assert rem.tolong() == _rem
 
+    def test__x_divrem2(self):
+        Rx = 1 << 130
+        Rx2 = 1 << 150
+        Ry = 1 << 127
+        Ry2 = 1<< 130
+        for i in range(10):
+            x = long(randint(Rx, Rx2))
+            y = long(randint(Ry, Ry2))
+            f1 = rbigint.fromlong(x)
+            f2 = rbigint.fromlong(y)
+            div, rem = lobj._x_divrem(f1, f2)
+            _div, _rem = divmod(x, y)
+            assert div.tolong() == _div
+            assert rem.tolong() == _rem
+            
     def test_divmod(self):
         x = 12345678901234567890L
         for i in range(100):

pypy/translator/goal/targetbigintbenchmark.py

 
 import os, sys
 from time import time
-from pypy.rlib.rbigint import rbigint, _k_mul, _tc_mul
+from pypy.rlib.rbigint import rbigint
 
 # __________  Entry point  __________
 
         Sum:  142.686547
         
         Pypy with improvements:
-        mod by 2:  0.006321
-        mod by 10000:  3.143117
-        mod by 1024 (power of two):  0.009611
-        Div huge number by 2**128: 2.138351
-        rshift: 2.247337
-        lshift: 1.334369
-        Floordiv by 2: 1.555604
-        Floordiv by 3 (not power of two): 4.275014
-        2**500000: 0.033836
-        (2**N)**5000000 (power of two): 0.049600
-        10000 ** BIGNUM % 100 1.326477
-        i = i * i: 3.924958
-        n**10000 (not power of two): 6.335759
-        Power of two ** power of two: 0.013380
-        v = v * power of two 3.497662
-        v = v * v 6.359251
-        v = v + v 2.785971
-        Sum:  39.036619
+        mod by 2:  0.004325
+        mod by 10000:  3.152204
+        mod by 1024 (power of two):  0.009776
+        Div huge number by 2**128: 1.393527
+        rshift: 2.222866
+        lshift: 1.360271
+        Floordiv by 2: 1.499409
+        Floordiv by 3 (not power of two): 4.027997
+        2**500000: 0.033296
+        (2**N)**5000000 (power of two): 0.045644
+        10000 ** BIGNUM % 100 1.217218
+        i = i * i: 3.962458
+        n**10000 (not power of two): 6.343562
+        Power of two ** power of two: 0.013249
+        v = v * power of two 3.536149
+        v = v * v 6.299587
+        v = v + v 2.767121
+        Sum:  37.888659
 
         With SUPPORT_INT128 set to False
         mod by 2:  0.004103
     """
     sumTime = 0.0
     
-    
-    """t = time()
-    by = rbigint.fromint(2**62).lshift(1030000)
-    for n in xrange(5000):
-        by2 = by.lshift(63)
-        _tc_mul(by, by2)
-        by = by2
-        
-
-    _time = time() - t
-    sumTime += _time
-    print "Toom-cook effectivity _Tcmul 1030000-1035000 digits:", _time
-    
-    t = time()
-    by = rbigint.fromint(2**62).lshift(1030000)
-    for n in xrange(5000):
-        by2 = by.lshift(63)
-        _k_mul(by, by2)
-        by = by2
-        
-
-    _time = time() - t
-    sumTime += _time
-    print "Toom-cook effectivity _kMul 1030000-1035000 digits:", _time"""
-    
-    
     V2 = rbigint.fromint(2)
     num = rbigint.pow(rbigint.fromint(100000000), rbigint.fromint(1024))
     t = time()
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.