Commits

Armin Rigo committed a9ccee5

Speed up a little bit str.split('char'). Unsure why CPython is so much faster
at doing this on a big string (benchmark: a 60MB string with 3 million times
the separator character).

Comments (0)

Files changed (1)

pypy/objspace/std/stringobject.py

 
     res_w = []
     start = 0
-    while maxsplit != 0:
-        next = value.find(by, start)
-        if next < 0:
-            break
-        res_w.append(sliced(space, value, start, next, w_self))
-        start = next + bylen
-        maxsplit -= 1   # NB. if it's already < 0, it stays < 0
+    if bylen == 1 and maxsplit < 0:
+        # fast path: uses str.rfind(character) and str.count(character)
+        by = by[0]    # annotator hack: string -> char
+        count = value.count(by)
+        res_w = [None] * (count + 1)
+        end = len(value)
+        while count >= 0:
+            assert end >= 0
+            prev = value.rfind(by, 0, end)
+            start = prev + 1
+            assert start >= 0
+            res_w[count] = sliced(space, value, start, end, w_self)
+            count -= 1
+            end = prev
+    else:
+        while maxsplit != 0:
+            next = value.find(by, start)
+            if next < 0:
+                break
+            res_w.append(sliced(space, value, start, next, w_self))
+            start = next + bylen
+            maxsplit -= 1   # NB. if it's already < 0, it stays < 0
+        res_w.append(sliced(space, value, start, len(value), w_self))
 
-    res_w.append(sliced(space, value, start, len(value), w_self))
     return space.newlist(res_w)
 
 def str_rsplit__String_None_ANY(space, w_self, w_none, w_maxsplit=-1):