Commits

Aaron Iles  committed 8effffc Merge

Merged in aliles/pypy/default (pull request #57)

  • Participants
  • Parent commits 65a2411, 434d177
  • Branches bytearray-refactor

Comments (0)

Files changed (7)

File pypy/objspace/std/abstractstring.py

+from pypy.objspace.std.model import W_Object
+from pypy.rlib.objectmodel import specialize
+
+
+class Mixin_BaseStringMethods(object):
+    __slots__ = ()
+
+    def isalnum(w_self, space):
+        return w_self._all_true(space, w_self._isalnum)
+
+    def isalpha(w_self, space):
+        return w_self._all_true(space, w_self._isalpha)
+
+    def isdigit(w_self, space):
+        return w_self._all_true(space, w_self._isdigit)
+
+    def islower(w_self, space):
+        return w_self._none_false_one_true(space,
+                w_self._islower, w_self._isupper)
+
+    def isspace(w_self, space):
+        return w_self._all_true(space, w_self._isspace)
+
+    def isupper(w_self, space):
+        return w_self._none_false_one_true(space,
+                w_self._isupper, w_self._islower)
+
+    def istitle(w_self, space):
+        return w_self._title(space)
+
+    def lower(w_self, space):
+        return w_self._transform(space, w_self._lower)
+
+    def swapcase(w_self, space):
+        return w_self._transform(space, w_self._swapcase)
+
+    def upper(w_self, space):
+        return w_self._transform(space, w_self._upper)
+
+
+class AbstractCharIterator(object):
+
+    def __init__(self, sequence):
+        self.sequence = sequence
+        self.pos = 0
+
+    def __len__(self):
+        return len(self.sequence)
+
+    def __iter__(self):
+        return self
+
+    def next(self):
+        ch = self.nextchar()
+        if ch is None:
+            raise StopIteration
+        return ch
+
+    # XXX deprecate nextchar() method
+    def nextchar(self):
+        if self.pos >= len(self):
+            return None
+        idx = self.pos
+        self.pos += 1
+        return self.sequence[idx]
+
+
+class W_AbstractBaseStringObject(W_Object):
+    __slots__ = ()
+
+    def __repr__(w_self):
+        """ representation for debugging purposes """
+        return "%s(%r)" % (w_self.__class__.__name__, w_self.raw_value())
+
+    def builder(w_self, space, size=0):
+        raise NotImplemented, "method not implemented"
+
+    def construct(w_self, space, data):
+        raise NotImplemented, "method not implemented"
+
+    def immutable_unique_id(w_self, space):
+        if w_self.user_overridden_class:
+            return None
+        return space.wrap(compute_unique_id(w_self.unwrap(space)))
+
+    def is_w(self, space, w_other):
+        if not isinstance(w_other, W_AbstractBaseStringObject):
+            return False
+        if self is w_other:
+            return True
+        if self.user_overridden_class or w_other.user_overridden_class:
+            return False
+        return self.unwrap(space) is w_other.unwrap(space)
+
+    def iterator(w_self, space):
+        return AbstractCharIterator(w_self.unwrap(space))
+
+    def length(w_self, space):
+        return len(w_self.unwrap(space))
+
+    def raw_value(w_self):
+        raise NotImplemented, "method not implemented"
+
+    def str_w(w_self, space):
+        raise NotImplemented, "method not implemented"
+
+    def unicode_w(w_self, space):
+        raise NotImplemented, "method not implemented"
+
+    def unwrap(w_self, space):
+        raise NotImplemented, "method not implemented"
+
+    @specialize.arg(2)
+    def _all_true(w_self, space, func):
+        """Test all elements of a list with func for True.
+        Returns True only if all elements test True."""
+        size = w_self.length(space)
+        it = w_self.iterator(space)
+        if size == 0:
+            return space.w_False
+        if size == 1:
+            return space.newbool(func(it.nextchar()))
+        # not all it objects will support iterator protocol, eg rope
+        for pos in range(size):
+            ch = it.nextchar()
+            if not func(ch):
+                return space.w_False
+        return space.w_True
+
+    @specialize.arg(2, 3)
+    def _none_false_one_true(w_self, space, pred, inverse):
+        """Test all elements against predicate and inverse.
+        Returns True only if all elements fail inverse and at least one
+        element passes predicate."""
+        v = w_self.unwrap(space)
+        if len(v) == 1:
+            c = v[0]
+            return space.newbool(pred(c))
+        status = False
+        for idx in range(len(v)):
+            if inverse(v[idx]):
+                return space.w_False
+            elif not status and pred(v[idx]):
+                status = True
+        return space.newbool(status)
+
+    def _title(w_self, space):
+        input = w_self.unwrap(space)
+        cased = False
+        previous_is_cased = False
+
+        for pos in range(0, len(input)):
+            ch = input[pos]
+            if w_self._isupper(ch):
+                if previous_is_cased:
+                    return space.w_False
+                previous_is_cased = True
+                cased = True
+            elif w_self._islower(ch):
+                if not previous_is_cased:
+                    return space.w_False
+                cased = True
+            else:
+                previous_is_cased = False
+
+        return space.newbool(cased)
+
+    @specialize.arg(2)
+    def _transform(w_self, space, func):
+        sz = w_self.length(space)
+        it = w_self.iterator(space)
+        bd = w_self.builder(space, sz)
+        for pos in range(sz):
+            ch = it.nextchar()
+            bd.append(func(ch))
+        return w_self.construct(space, bd.build())

File pypy/objspace/std/bytearrayobject.py

 from pypy.interpreter import gateway
 from pypy.interpreter.argument import Signature
 from pypy.interpreter.buffer import RWBuffer
+from pypy.objspace.std.abstractstring import \
+        W_AbstractBaseStringObject, Mixin_BaseStringMethods
 from pypy.objspace.std.bytearraytype import (
     makebytearraydata_w, getbytevalue,
     new_bytearray
 )
-from pypy.tool.sourcetools import func_with_new_name
 
 
-class W_BytearrayObject(W_Object):
+class Mixin_BytearrayMethods(Mixin_BaseStringMethods):
+    __slots__ = ()
+
+
+class W_AbstractBytearrayObject(stringobject.W_AbstractStringObject):
+    __slots__ = ()
+
+
+class W_BytearrayObject(W_AbstractBytearrayObject, Mixin_BytearrayMethods):
     from pypy.objspace.std.bytearraytype import bytearray_typedef as typedef
 
     def __init__(w_self, data):
         w_self.data = data
 
-    def __repr__(w_self):
-        """ representation for debugging purposes """
-        return "%s(%s)" % (w_self.__class__.__name__, ''.join(w_self.data))
+    def builder(w_self, space, size=0):
+        return StringBuilder(size)
+
+    def construct(w_self, space, data):
+        return W_BytearrayObject(data)
+
+    def raw_value(w_self):
+        return w_self.data
+
+    def str_w(w_self, space):
+        return w_self.data
+
+    def unicode_w(w_self, space):
+        # XXX should this use the default encoding?
+        from pypy.objspace.std.unicodetype import plain_str2unicode
+        return plain_str2unicode(space, w_self.data)
+
 
 registerimplementation(W_BytearrayObject)
 
 def str__Bytearray(space, w_bytearray):
     return space.wrap(''.join(w_bytearray.data))
 
+def str_isalnum__Bytearray(space, w_self):
+    return w_self.isalnum(space)
+
+def str_isalpha__Bytearray(space, w_self):
+    return w_self.isalpha(space)
+
+def str_isdigit__Bytearray(space, w_self):
+    return w_self.isdigit(space)
+
+def str_islower__Bytearray(space, w_self):
+    return w_self.islower(space)
+
+def str_isspace__Bytearray(space, w_self):
+    return w_self.isspace(space)
+
+def str_istitle__Bytearray(space, w_self):
+    return w_self.istitle(space)
+
+def str_isupper__Bytearray(space, w_self):
+    return w_self.isupper(space)
+
+def str_lower__Bytearray(space, w_self):
+    return w_self.lower(space)
+
+def str_upper__Bytearray(space, w_self):
+    return w_self.upper(space)
+
+def str_swapcase__Bytearray(space, w_self):
+    return w_self.swapcase(space)
+
 def str_count__Bytearray_Int_ANY_ANY(space, w_bytearray, w_char, w_start, w_stop):
     char = w_char.intval
     bytearray = w_bytearray.data
     w_str = str__Bytearray(space, w_bytearray)
     return stringobject.str_decode__String_ANY_ANY(space, w_str, w_encoding, w_errors)
 
-def str_islower__Bytearray(space, w_bytearray):
-    w_str = str__Bytearray(space, w_bytearray)
-    return stringobject.str_islower__String(space, w_str)
-
-def str_isupper__Bytearray(space, w_bytearray):
-    w_str = str__Bytearray(space, w_bytearray)
-    return stringobject.str_isupper__String(space, w_str)
-
-def str_isalpha__Bytearray(space, w_bytearray):
-    w_str = str__Bytearray(space, w_bytearray)
-    return stringobject.str_isalpha__String(space, w_str)
-
-def str_isalnum__Bytearray(space, w_bytearray):
-    w_str = str__Bytearray(space, w_bytearray)
-    return stringobject.str_isalnum__String(space, w_str)
-
-def str_isdigit__Bytearray(space, w_bytearray):
-    w_str = str__Bytearray(space, w_bytearray)
-    return stringobject.str_isdigit__String(space, w_str)
-
-def str_istitle__Bytearray(space, w_bytearray):
-    w_str = str__Bytearray(space, w_bytearray)
-    return stringobject.str_istitle__String(space, w_str)
-
-def str_isspace__Bytearray(space, w_bytearray):
-    w_str = str__Bytearray(space, w_bytearray)
-    return stringobject.str_isspace__String(space, w_str)
-
 def bytearray_insert__Bytearray_Int_ANY(space, w_bytearray, w_idx, w_other):
     where = space.int_w(w_idx)
     length = len(w_bytearray.data)
                                                          w_str2, w_max)
     return String2Bytearray(space, w_res)
 
-def str_upper__Bytearray(space, w_bytearray):
-    w_str = str__Bytearray(space, w_bytearray)
-    w_res = stringobject.str_upper__String(space, w_str)
-    return String2Bytearray(space, w_res)
-
-def str_lower__Bytearray(space, w_bytearray):
-    w_str = str__Bytearray(space, w_bytearray)
-    w_res = stringobject.str_lower__String(space, w_str)
-    return String2Bytearray(space, w_res)
-
 def str_title__Bytearray(space, w_bytearray):
     w_str = str__Bytearray(space, w_bytearray)
     w_res = stringobject.str_title__String(space, w_str)
     return String2Bytearray(space, w_res)
 
-def str_swapcase__Bytearray(space, w_bytearray):
-    w_str = str__Bytearray(space, w_bytearray)
-    w_res = stringobject.str_swapcase__String(space, w_str)
-    return String2Bytearray(space, w_res)
-
 def str_capitalize__Bytearray(space, w_bytearray):
     w_str = str__Bytearray(space, w_bytearray)
     w_res = stringobject.str_capitalize__String(space, w_str)

File pypy/objspace/std/ropeobject.py

 from pypy.objspace.std.stringobject import (
     mod__String_ANY as mod__Rope_ANY,
     str_format__String as str_format__Rope,
-    _upper, _lower, DEFAULT_NOOP_TABLE)
+    DEFAULT_NOOP_TABLE)
 
-class W_RopeObject(stringobject.W_AbstractStringObject):
+
+class RopeBuilder(object):
+    """Mimic sufficent StringBuilder API for over simple character arrays"""
+
+    def __init__(self, size=0):
+        self.data = [' '] * size
+        self.pos = 0
+
+    def append(self, ch):
+        self.data[self.pos] = ch
+        self.pos += 1
+
+    def build(self):
+        return rope.rope_from_charlist(self.data)
+
+
+class W_RopeObject(stringobject.W_AbstractStringObject,
+        stringobject.Mixin_StringMethods):
     from pypy.objspace.std.stringtype import str_typedef as typedef
     _immutable_fields_ = ['_node']
 
             assert node.is_bytestring()
         w_self._node = node
 
-    def __repr__(w_self):
-        """ representation for debugging purposes """
-        return "%s(%r)" % (w_self.__class__.__name__, w_self._node)
+    def builder(w_self, space, size=0):
+        return RopeBuilder(size)
 
-    def unwrap(w_self, space):
+    def construct(w_self, space, data):
+        return W_RopeObject(data)
+
+    def iterator(w_self, space):
+        return rope.ItemIterator(w_self._node)
+
+    def length(w_self, space):
+        return w_self._node.length()
+
+    def raw_value(w_self):
+        return w_self._node
+
+    def str_w(w_self, space):
         return w_self._node.flatten_string()
-    str_w = unwrap
+
+    def unicode_w(w_self, space):
+        # XXX should this use the default encoding?
+        from pypy.objspace.std.unicodetype import plain_str2unicode
+        return plain_str2unicode(space, w_self._node.flatten_string())
 
     def create_if_subclassed(w_self):
         if type(w_self) is W_RopeObject:
             return w_self
         return W_RopeObject(w_self._node)
 
-    def unicode_w(w_self, space):
-        # XXX should this use the default encoding?
-        from pypy.objspace.std.unicodetype import plain_str2unicode
-        return plain_str2unicode(space, w_self._node.flatten_string())
-
 W_RopeObject.EMPTY = W_RopeObject(rope.LiteralStringNode.EMPTY)
 W_RopeObject.PREBUILT = [W_RopeObject(rope.LiteralStringNode.PREBUILT[i])
                              for i in range(256)]
 
 registerimplementation(W_RopeIterObject)
 
-def _is_generic(space, w_self, fun):
-    l = w_self._node.length()
-    if l == 0:
-        return space.w_False
-    iter = rope.ItemIterator(w_self._node)
-    for i in range(l):
-        if not fun(iter.nextchar()):
-            return space.w_False
-    return space.w_True
-_is_generic._annspecialcase_ = "specialize:arg(2)"
-
-_isspace = lambda c: c.isspace()
-_isdigit = lambda c: c.isdigit()
-_isalpha = lambda c: c.isalpha()
-_isalnum = lambda c: c.isalnum()
-
 def str_isspace__Rope(space, w_self):
-    return _is_generic(space, w_self, _isspace)
+    return w_self.isspace(space)
 
 def str_isdigit__Rope(space, w_self):
-    return _is_generic(space, w_self, _isdigit)
+    return w_self.isdigit(space)
 
 def str_isalpha__Rope(space, w_self):
-    return _is_generic(space, w_self, _isalpha)
+    return w_self.isalpha(space)
 
 def str_isalnum__Rope(space, w_self):
-    return _is_generic(space, w_self, _isalnum)
+    return w_self.isalnum(space)
 
 def str_isupper__Rope(space, w_self):
     """Return True if all cased characters in S are uppercase and there is
 at least one cased character in S, False otherwise."""
-    l = w_self._node.length()
-
-    if l == 0:
-        return space.w_False
-    cased = False
-    iter = rope.ItemIterator(w_self._node)
-    for idx in range(l):
-        c = iter.nextchar()
-        if c.islower():
-            return space.w_False
-        elif not cased and c.isupper():
-            cased = True
-    return space.newbool(cased)
+    return w_self.isupper(space)
 
 def str_islower__Rope(space, w_self):
     """Return True if all cased characters in S are lowercase and there is
 at least one cased character in S, False otherwise."""
-    l = w_self._node.length()
-
-    if l == 0:
-        return space.w_False
-    cased = False
-    iter = rope.ItemIterator(w_self._node)
-    for idx in range(l):
-        c = iter.nextchar()
-        if c.isupper():
-            return space.w_False
-        elif not cased and c.islower():
-            cased = True
-    return space.newbool(cased)
+    return w_self.islower(space)
 
 def str_istitle__Rope(space, w_self):
     """Return True if S is a titlecased string and there is at least one
 character in S, i.e. uppercase characters may only follow uncased
 characters and lowercase characters only cased ones. Return False
 otherwise."""
-    cased = False
-    previous_is_cased = False
+    return w_self.istitle(space)
 
-    iter = rope.ItemIterator(w_self._node)
-    for pos in range(0, w_self._node.length()):
-        ch = iter.nextchar()
-        if ch.isupper():
-            if previous_is_cased:
-                return space.w_False
-            previous_is_cased = True
-            cased = True
-        elif ch.islower():
-            if not previous_is_cased:
-                return space.w_False
-            cased = True
-        else:
-            previous_is_cased = False
+def str_lower__Rope(space, w_self):
+    return w_self.lower(space)
 
-    return space.newbool(cased)
-
-def _local_transform(node, transform):
-    l = node.length()
-    res = [' '] * l
-    iter = rope.ItemIterator(node)
-    for i in range(l):
-        ch = iter.nextchar()
-        res[i] = transform(ch)
-
-    return W_RopeObject(rope.rope_from_charlist(res))
-_local_transform._annspecialcase_ = "specialize:arg(1)"
+def str_swapcase__Rope(space, w_self):
+    return w_self.swapcase(space)
 
 def str_upper__Rope(space, w_self):
-    return _local_transform(w_self._node, _upper)
-
-def str_lower__Rope(space, w_self):
-    return _local_transform(w_self._node, _lower)
-
-def _swapcase(ch):
-    if ch.isupper():
-        o = ord(ch) + 32
-        return chr(o)
-    elif ch.islower():
-        o = ord(ch) - 32
-        return chr(o)
-    else:
-        return ch
-
-def str_swapcase__Rope(space, w_self):
-    return _local_transform(w_self._node, _swapcase)
-
+    return w_self.upper(space)
 
 def str_capitalize__Rope(space, w_self):
     node = w_self._node
     for pos in range(0, length):
         ch = iter.nextchar()
         if not prev_letter.isalpha():
-            buffer[pos] = _upper(ch)
+            buffer[pos] = w_self._upper(ch)
         else:
-            buffer[pos] = _lower(ch)
+            buffer[pos] = w_self._lower(ch)
 
         prev_letter = buffer[pos]
 

File pypy/objspace/std/ropeunicodeobject.py

 from pypy.interpreter.error import OperationError, operationerrfmt
 from pypy.interpreter import gateway
 from pypy.objspace.std.stringobject import W_StringObject
-from pypy.objspace.std.unicodeobject import _normalize_index
 from pypy.objspace.std.ropeobject import W_RopeObject
 from pypy.objspace.std.noneobject import W_NoneObject
 from pypy.rlib import rope
 from pypy.objspace.std.tupleobject import W_TupleObject
 from pypy.rlib.rarithmetic import intmask, ovfcheck
 from pypy.module.unicodedata import unicodedb
-from pypy.tool.sourcetools import func_with_new_name
 
 from pypy.objspace.std.formatting import mod_format
 
     return encode_object(space, w_unistr, encoding, errors)
 
 
+# XXX create shared base class with RopeBuilder
+class RopeUnicodeBuilder(object):
+    """Mimic sufficent StringBuilder API for over simple character arrays"""
+
+    def __init__(self, size=0):
+        self.data = [u' '] * size
+        self.pos = 0
+
+    def append(self, ch):
+        self.data[self.pos] = ch
+        self.pos += 1
+
+    def build(self):
+        return rope.rope_from_unicharlist(self.data)
+
+
 class W_RopeUnicodeObject(unicodeobject.W_AbstractUnicodeObject):
     from pypy.objspace.std.unicodetype import unicode_typedef as typedef
     _immutable_fields_ = ['_node']
     def __init__(w_self, node):
         w_self._node = node
 
-    def __repr__(w_self):
-        """ representation for debugging purposes """
-        return "%s(%r)" % (w_self.__class__.__name__, w_self._node)
+    def builder(w_self, space, size=0):
+        return RopeUnicodeBuilder(size)
 
-    def unwrap(w_self, space):
-        # for testing
-        return w_self._node.flatten_unicode()
+    def construct(w_self, space, data):
+        return W_RopeUnicodeObject(data)
+
+    def iterator(w_self, space):
+        return rope.ItemIterator(w_self._node)
+
+    def length(w_self, space):
+        return w_self._node.length()
+
+    def raw_value(w_self):
+        return w_self._node
 
     def str_w(w_self, space):
         return space.str_w(space.str(w_self))
 
+    def unicode_w(self, space):
+        return self._node.flatten_unicode()
+
     def create_if_subclassed(w_self):
         if type(w_self) is W_RopeUnicodeObject:
             return w_self
         return W_RopeUnicodeObject(w_self._node)
 
-    def unicode_w(self, space):
-        return self._node.flatten_unicode()
-
 W_RopeUnicodeObject.EMPTY = W_RopeUnicodeObject(rope.LiteralStringNode.EMPTY)
 
 registerimplementation(W_RopeUnicodeObject)
 
-def _isspace(uchar_ord):
-    return unicodedb.isspace(uchar_ord)
-
 def ropeunicode_w(space, w_str):
     if isinstance(w_str, W_RopeUnicodeObject):
         return w_str._node
 def mul__ANY_RopeUnicode(space, w_times, w_uni):
     return mul__RopeUnicode_ANY(space, w_uni, w_times)
 
+def unicode_isspace__RopeUnicode(space, w_self):
+    return w_self.isspace(space)
 
-def make_generic(funcname):
-    def func(space, w_self):
-        node = w_self._node
-        if node.length() == 0:
-            return space.w_False
-        iter = rope.ItemIterator(node)
-        for idx in range(node.length()):
-            if not getattr(unicodedb, funcname)(iter.nextint()):
-                return space.w_False
-        return space.w_True
-    return func_with_new_name(func, "unicode_%s__RopeUnicode" % (funcname, ))
+def unicode_isalpha__RopeUnicode(space, w_self):
+    return w_self.isalpha(space)
 
-unicode_isspace__RopeUnicode = make_generic("isspace")
-unicode_isalpha__RopeUnicode = make_generic("isalpha")
-unicode_isalnum__RopeUnicode = make_generic("isalnum")
-unicode_isdecimal__RopeUnicode = make_generic("isdecimal")
-unicode_isdigit__RopeUnicode = make_generic("isdigit")
-unicode_isnumeric__RopeUnicode = make_generic("isnumeric")
+def unicode_isalnum__RopeUnicode(space, w_self):
+    return w_self.isalnum(space)
 
-def unicode_islower__RopeUnicode(space, w_unicode):
-    cased = False
-    iter = rope.ItemIterator(w_unicode._node)
-    while 1:
-        try:
-            ch = iter.nextint()
-        except StopIteration:
-            return space.newbool(cased)
-        if (unicodedb.isupper(ch) or
-            unicodedb.istitle(ch)):
-            return space.w_False
-        if not cased and unicodedb.islower(ch):
-            cased = True
+def unicode_isdecimal__RopeUnicode(space, w_self):
+    return w_self.isdecimal(space)
 
-def unicode_isupper__RopeUnicode(space, w_unicode):
-    cased = False
-    iter = rope.ItemIterator(w_unicode._node)
-    while 1:
-        try:
-            ch = iter.nextint()
-        except StopIteration:
-            return space.newbool(cased)
-        if (unicodedb.islower(ch) or
-            unicodedb.istitle(ch)):
-            return space.w_False
-        if not cased and unicodedb.isupper(ch):
-            cased = True
+def unicode_isdigit__RopeUnicode(space, w_self):
+    return w_self.isdigit(space)
 
-def unicode_istitle__RopeUnicode(space, w_unicode):
-    cased = False
-    previous_is_cased = False
-    iter = rope.ItemIterator(w_unicode._node)
-    while 1:
-        try:
-            ch = iter.nextint()
-        except StopIteration:
-            return space.newbool(cased)
-        if (unicodedb.isupper(ch) or
-            unicodedb.istitle(ch)):
-            if previous_is_cased:
-                return space.w_False
-            previous_is_cased = cased = True
-        elif unicodedb.islower(ch):
-            if not previous_is_cased:
-                return space.w_False
-            previous_is_cased = cased = True
-        else:
-            previous_is_cased = False
+def unicode_isnumeric__RopeUnicode(space, w_self):
+    return w_self.isnumeric(space)
 
+def unicode_islower__RopeUnicode(space, w_self):
+    return w_self.islower(space)
+
+def unicode_isupper__RopeUnicode(space, w_self):
+    return w_self.isupper(space)
+
+def unicode_istitle__RopeUnicode(space, w_self):
+    return w_self.istitle(space)
 
 def _contains(i, uni):
     return unichr(i) in uni
 
 def unicode_strip__RopeUnicode_None(space, w_self, w_chars):
-    return W_RopeUnicodeObject(rope.strip(w_self._node, True, True, _isspace))
+    return W_RopeUnicodeObject(rope.strip(w_self._node, True, True, unicodedb.isspace))
 def unicode_strip__RopeUnicode_RopeUnicode(space, w_self, w_chars):
     return W_RopeUnicodeObject(rope.strip(w_self._node, True, True, _contains,
                                w_chars._node.flatten_unicode()))
                              unicode_from_string(space, w_chars))
 
 def unicode_lstrip__RopeUnicode_None(space, w_self, w_chars):
-    return W_RopeUnicodeObject(rope.strip(w_self._node, True, False, _isspace))
+    return W_RopeUnicodeObject(rope.strip(w_self._node, True, False, unicodedb.isspace))
 def unicode_lstrip__RopeUnicode_RopeUnicode(space, w_self, w_chars):
     return W_RopeUnicodeObject(rope.strip(w_self._node, True, False, _contains,
                                w_chars._node.flatten_unicode()))
                              unicode_from_string(space, w_chars))
 
 def unicode_rstrip__RopeUnicode_None(space, w_self, w_chars):
-    return W_RopeUnicodeObject(rope.strip(w_self._node, False, True, _isspace))
+    return W_RopeUnicodeObject(rope.strip(w_self._node, False, True, unicodedb.isspace))
 def unicode_rstrip__RopeUnicode_RopeUnicode(space, w_self, w_chars):
     return W_RopeUnicodeObject(rope.strip(w_self._node, False, True, _contains,
                                w_chars._node.flatten_unicode()))
         previous_is_cased = unicodedb.iscased(unichar)
     return W_RopeUnicodeObject(rope.rope_from_unicharlist(result))
 
+def unicode_lower__RopeUnicode(space, w_self):
+    return w_self.lower(space)
 
-def _local_transform(node, transform):
-    l = node.length()
-    res = [u' '] * l
-    iter = rope.ItemIterator(node)
-    for i in range(l):
-        ch = iter.nextint()
-        res[i] = transform(ch)
-
-    return W_RopeUnicodeObject(rope.rope_from_unicharlist(res))
-_local_transform._annspecialcase_ = "specialize:arg(1)"
-
-def _tolower(ordch):
-    return unichr(unicodedb.tolower(ordch))
-def unicode_lower__RopeUnicode(space, w_self):
-    return _local_transform(w_self._node, _tolower)
-
-def _toupper(ordch):
-    return unichr(unicodedb.toupper(ordch))
 def unicode_upper__RopeUnicode(space, w_self):
-    return _local_transform(w_self._node, _toupper)
-
-def _swapcase(ordch):
-    if unicodedb.islower(ordch):
-        return unichr(unicodedb.toupper(ordch))
-    elif unicodedb.isupper(ordch):
-        return unichr(unicodedb.tolower(ordch))
-    else:
-        return unichr(ordch)
+    return w_self.upper(space)
 
 def unicode_swapcase__RopeUnicode(space, w_self):
-    return _local_transform(w_self._node, _swapcase)
+    return w_self.swapcase(space)
 
 def _convert_idx_params(space, w_self, w_start, w_end):
     self = w_self._node
     selfnode = w_self._node
     maxsplit = space.int_w(w_maxsplit)
     res_w = [W_RopeUnicodeObject(node)
-                for node in rope.split_chars(selfnode, maxsplit, _isspace)]
+                for node in rope.split_chars(selfnode, maxsplit, unicodedb.isspace)]
     return space.newlist(res_w)
 
 def unicode_split__RopeUnicode_RopeUnicode_ANY(space, w_self, w_delim, w_maxsplit):
     selfnode = w_self._node
     maxsplit = space.int_w(w_maxsplit)
     res_w = [W_RopeUnicodeObject(node)
-                for node in rope.rsplit_chars(selfnode, maxsplit, _isspace)]
+                for node in rope.rsplit_chars(selfnode, maxsplit, unicodedb.isspace)]
     return space.newlist(res_w)
 
 

File pypy/objspace/std/stringobject.py

 from pypy.rlib.rstring import StringBuilder, split
 from pypy.interpreter.buffer import StringBuffer
 
+from pypy.objspace.std.abstractstring import \
+        W_AbstractBaseStringObject, Mixin_BaseStringMethods
+from pypy.objspace.std.formatting import mod_format
 from pypy.objspace.std.stringtype import sliced, wrapstr, wrapchar, \
      stringendswith, stringstartswith, joined2
 
-from pypy.objspace.std.formatting import mod_format
 
-class W_AbstractStringObject(W_Object):
+class Mixin_StringMethods(Mixin_BaseStringMethods):
     __slots__ = ()
 
-    def is_w(self, space, w_other):
-        if not isinstance(w_other, W_AbstractStringObject):
-            return False
-        if self is w_other:
-            return True
-        if self.user_overridden_class or w_other.user_overridden_class:
-            return False
-        return space.str_w(self) is space.str_w(w_other)
 
-    def immutable_unique_id(self, space):
-        if self.user_overridden_class:
-            return None
-        return space.wrap(compute_unique_id(space.str_w(self)))
+class W_AbstractStringObject(W_AbstractBaseStringObject):
+    __slots__ = ()
 
+    def unwrap(w_self, space):
+        return w_self.str_w(space)
 
-class W_StringObject(W_AbstractStringObject):
+    def _isalnum(self, ch):
+        return ch.isalnum()
+
+    def _isalpha(self, ch):
+        return ch.isalpha()
+
+    def _isdigit(self, ch):
+        return ch.isdigit()
+
+    def _islower(self, ch):
+        return ch.islower()
+
+    def _isspace(self, ch):
+        return ch.isspace()
+
+    def _isupper(self, ch):
+        return ch.isupper()
+
+    def _lower(self, ch):
+        if ch.isupper():
+            o = ord(ch) + 32
+            return chr(o)
+        else:
+            return ch
+
+    def _upper(self, ch):
+        if ch.islower():
+            o = ord(ch) - 32
+            return chr(o)
+        else:
+            return ch
+
+    def _swapcase(self, ch):
+        if ch.isupper():
+            o = ord(ch) + 32
+            return chr(o)
+        elif ch.islower():
+            o = ord(ch) - 32
+            return chr(o)
+        else:
+            return ch
+
+
+class W_StringObject(W_AbstractStringObject, Mixin_StringMethods):
     from pypy.objspace.std.stringtype import str_typedef as typedef
     _immutable_fields_ = ['_value']
 
     def __init__(w_self, str):
         w_self._value = str
 
-    def __repr__(w_self):
-        """ representation for debugging purposes """
-        return "%s(%r)" % (w_self.__class__.__name__, w_self._value)
+    def builder(w_self, space, size=0):
+        return StringBuilder(size)
 
-    def unwrap(w_self, space):
+    def construct(w_self, space, data):
+        return W_StringObject(data)
+
+    def raw_value(w_self):
         return w_self._value
 
     def str_w(w_self, space):
             return space.unicode_w(unicode_from_string(space, w_self))
         return space.unicode_w(decode_object(space, w_self, encoding, errors))
 
+
 registerimplementation(W_StringObject)
 
 W_StringObject.EMPTY = W_StringObject('')
 W_StringObject.PREBUILT = [W_StringObject(chr(i)) for i in range(256)]
 del i
 
-@specialize.arg(2)
-def _is_generic(space, w_self, fun):
-    v = w_self._value
-    if len(v) == 0:
-        return space.w_False
-    if len(v) == 1:
-        c = v[0]
-        return space.newbool(fun(c))
-    else:
-        return _is_generic_loop(space, v, fun)
+def str_isalnum__String(space, w_self):
+    return w_self.isalnum(space)
 
-@specialize.arg(2)
-def _is_generic_loop(space, v, fun):
-    for idx in range(len(v)):
-        if not fun(v[idx]):
-            return space.w_False
-    return space.w_True
+def str_isalpha__String(space, w_self):
+    return w_self.isalpha(space)
 
-def _upper(ch):
-    if ch.islower():
-        o = ord(ch) - 32
-        return chr(o)
-    else:
-        return ch
-
-def _lower(ch):
-    if ch.isupper():
-        o = ord(ch) + 32
-        return chr(o)
-    else:
-        return ch
-
-_isspace = lambda c: c.isspace()
-_isdigit = lambda c: c.isdigit()
-_isalpha = lambda c: c.isalpha()
-_isalnum = lambda c: c.isalnum()
+def str_isdigit__String(space, w_self):
+    return w_self.isdigit(space)
 
 def str_isspace__String(space, w_self):
-    return _is_generic(space, w_self, _isspace)
+    return w_self.isspace(space)
 
-def str_isdigit__String(space, w_self):
-    return _is_generic(space, w_self, _isdigit)
-
-def str_isalpha__String(space, w_self):
-    return _is_generic(space, w_self, _isalpha)
-
-def str_isalnum__String(space, w_self):
-    return _is_generic(space, w_self, _isalnum)
+def str_islower__String(space, w_self):
+    """Return True if all cased characters in S are lowercase and there is
+at least one cased character in S, False otherwise."""
+    return w_self.islower(space)
 
 def str_isupper__String(space, w_self):
     """Return True if all cased characters in S are uppercase and there is
 at least one cased character in S, False otherwise."""
-    v = w_self._value
-    if len(v) == 1:
-        c = v[0]
-        return space.newbool(c.isupper())
-    cased = False
-    for idx in range(len(v)):
-        if v[idx].islower():
-            return space.w_False
-        elif not cased and v[idx].isupper():
-            cased = True
-    return space.newbool(cased)
-
-def str_islower__String(space, w_self):
-    """Return True if all cased characters in S are lowercase and there is
-at least one cased character in S, False otherwise."""
-    v = w_self._value
-    if len(v) == 1:
-        c = v[0]
-        return space.newbool(c.islower())
-    cased = False
-    for idx in range(len(v)):
-        if v[idx].isupper():
-            return space.w_False
-        elif not cased and v[idx].islower():
-            cased = True
-    return space.newbool(cased)
+    return w_self.isupper(space)
 
 def str_istitle__String(space, w_self):
     """Return True if S is a titlecased string and there is at least one
 character in S, i.e. uppercase characters may only follow uncased
 characters and lowercase characters only cased ones. Return False
 otherwise."""
-    input = w_self._value
-    cased = False
-    previous_is_cased = False
+    return w_self.istitle(space)
 
-    for pos in range(0, len(input)):
-        ch = input[pos]
-        if ch.isupper():
-            if previous_is_cased:
-                return space.w_False
-            previous_is_cased = True
-            cased = True
-        elif ch.islower():
-            if not previous_is_cased:
-                return space.w_False
-            cased = True
-        else:
-            previous_is_cased = False
+def str_lower__String(space, w_self):
+    return w_self.lower(space)
 
-    return space.newbool(cased)
+def str_swapcase__String(space, w_self):
+    return w_self.swapcase(space)
 
 def str_upper__String(space, w_self):
-    self = w_self._value
-    return space.wrap(self.upper())
-
-def str_lower__String(space, w_self):
-    self = w_self._value
-    return space.wrap(self.lower())
-
-def str_swapcase__String(space, w_self):
-    self = w_self._value
-    builder = StringBuilder(len(self))
-    for i in range(len(self)):
-        ch = self[i]
-        if ch.isupper():
-            o = ord(ch) + 32
-            builder.append(chr(o))
-        elif ch.islower():
-            o = ord(ch) - 32
-            builder.append(chr(o))
-        else:
-            builder.append(ch)
-
-    return space.wrap(builder.build())
-
+    return w_self.upper(space)
 
 def str_capitalize__String(space, w_self):
     input = w_self._value
     for pos in range(len(input)):
         ch = input[pos]
         if not prev_letter.isalpha():
-            ch = _upper(ch)
+            ch = w_self._upper(ch)
             builder.append(ch)
         else:
-            ch = _lower(ch)
+            ch = w_self._lower(ch)
             builder.append(ch)
 
         prev_letter = ch

File pypy/objspace/std/test/test_unicodeobject.py

         assert u"Brown Fox".title() == u"Brown Fox"
         assert u"bro!wn fox".title() == u"Bro!Wn Fox"
 
+    def test_isalnum(self):
+        assert u"".isalnum() == False
+        assert u"!Bro12345w&&&&n Fox".isalnum() == False
+        assert u"125 Brown Foxes".isalnum() == False
+        assert u"125BrownFoxes".isalnum() == True
+
+    def test_isalpha(self):
+        assert u"".isalpha() == False
+        assert u"!Bro12345w&&&&nFox".isalpha() == False
+        assert u"Brown Foxes".isalpha() == False
+        assert u"125".isalpha() == False
+
+    def test_isdigit(self):
+        assert u"".isdigit() == False
+        assert u"!Bro12345w&&&&nFox".isdigit() == False
+        assert u"Brown Foxes".isdigit() == False
+        assert u"125".isdigit() == True
+
+    def test_isdecimal(self):
+        assert u"".isdecimal() == False
+        assert u"42!".isdecimal() == False
+        assert u"\t1337".isdecimal() == False
+        assert u"6".isdecimal() == True
+
+    def test_isspace(self):
+        assert u"".isspace() == False
+        assert u"!Bro12345w&&&&nFox".isspace() == False
+        assert u" ".isspace() ==  True
+        assert u"\t\t\b\b\n".isspace() == False
+        assert u"\t\t".isspace() == True
+        assert u"\t\t\r\r\n".isspace() == True
+
+    def test_islower(self):
+        assert u"".islower() == False
+        assert u" ".islower() ==  False
+        assert u"\t\t\b\b\n".islower() == False
+        assert u"b".islower() == True
+        assert u"bbb".islower() == True
+        assert u"!bbb".islower() == True
+        assert u"BBB".islower() == False
+        assert u"bbbBBB".islower() == False
+
+    def test_isupper(self):
+        assert u"".isupper() == False
+        assert u" ".isupper() ==  False
+        assert u"\t\t\b\b\n".isupper() == False
+        assert u"B".isupper() == True
+        assert u"BBB".isupper() == True
+        assert u"!BBB".isupper() == True
+        assert u"bbb".isupper() == False
+        assert u"BBBbbb".isupper() == False
+
     def test_istitle(self):
         assert u"".istitle() == False
         assert u"!".istitle() == False
         assert (u'\u019b\u1d00\u1d86\u0221\u1fb7'.capitalize() ==
                 u'\u019b\u1d00\u1d86\u0221\u1fb7')
 
+    def test_lower(self):
+        assert u"ABC".lower() == u"abc"
+
+    def test_upper(self):
+        assert u"abc".upper() == u"ABC"
+
     def test_rjust(self):
         s = u"abc"
         assert s.rjust(2) == s

File pypy/objspace/std/unicodeobject.py

 from pypy.rlib.rstring import UnicodeBuilder
 from pypy.rlib.runicode import unicode_encode_unicode_escape
 from pypy.module.unicodedata import unicodedb
-from pypy.tool.sourcetools import func_with_new_name
 
+from pypy.objspace.std.abstractstring import \
+        W_AbstractBaseStringObject, Mixin_BaseStringMethods
 from pypy.objspace.std.formatting import mod_format
 from pypy.objspace.std.stringtype import stringstartswith, stringendswith
 
-class W_AbstractUnicodeObject(W_Object):
+
+class Mixin_UnicodeMethods(Mixin_BaseStringMethods):
+    __slows__ = ()
+
+    def isdecimal(w_self, space):
+        return w_self._all_true(space, w_self._isdecimal)
+
+
+class W_AbstractUnicodeObject(W_AbstractBaseStringObject, Mixin_UnicodeMethods):
     __slots__ = ()
 
-    def is_w(self, space, w_other):
-        if not isinstance(w_other, W_AbstractUnicodeObject):
-            return False
-        if self is w_other:
-            return True
-        if self.user_overridden_class or w_other.user_overridden_class:
-            return False
-        return space.unicode_w(self) is space.unicode_w(w_other)
+    def unwrap(w_self, space):
+        return w_self.unicode_w(space)
 
-    def immutable_unique_id(self, space):
-        if self.user_overridden_class:
-            return None
-        return space.wrap(compute_unique_id(space.unicode_w(self)))
+    def _isalnum(self, ch):
+        return unicodedb.isalnum(ord(ch))
+
+    def _isalpha(self, ch):
+        return unicodedb.isalpha(ord(ch))
+
+    def _isdigit(self, ch):
+        return unicodedb.isdigit(ord(ch))
+
+    def _isdecimal(self, ch):
+        return unicodedb.isdecimal(ord(ch))
+
+    def _islower(self, ch):
+        return unicodedb.islower(ord(ch))
+
+    def _isspace(self, ch):
+        return unicodedb.isspace(ord(ch))
+
+    def _isupper(self, ch):
+        return unicodedb.isupper(ord(ch))
+
+    def _lower(self, ch):
+        return unichr(unicodedb.tolower(ord(ch)))
+
+    def _upper(self, ch):
+        return unichr(unicodedb.toupper(ord(ch)))
+
+    def _swapcase(self, ch):
+        num = ord(ch)
+        if unicodedb.islower(num):
+            return unichr(unicodedb.toupper(num))
+        elif unicodedb.isupper(num):
+            return unichr(unicodedb.tolower(num))
+        else:
+            return ch
 
 
 class W_UnicodeObject(W_AbstractUnicodeObject):
         assert isinstance(unistr, unicode)
         w_self._value = unistr
 
-    def __repr__(w_self):
-        """ representation for debugging purposes """
-        return "%s(%r)" % (w_self.__class__.__name__, w_self._value)
+    def builder(w_self, space, size=0):
+        return UnicodeBuilder(size)
 
-    def unwrap(w_self, space):
-        # for testing
+    def construct(w_self, space, data):
+        return W_UnicodeObject(data)
+
+    def raw_value(w_self):
         return w_self._value
 
+    def str_w(self, space):
+        return space.str_w(space.str(self))
+
+    def unicode_w(self, space):
+        return self._value
+
     def create_if_subclassed(w_self):
         if type(w_self) is W_UnicodeObject:
             return w_self
         return W_UnicodeObject(w_self._value)
 
-    def str_w(self, space):
-        return space.str_w(space.str(self))
-
-    def unicode_w(self, space):
-        return self._value
 
 W_UnicodeObject.EMPTY = W_UnicodeObject(u'')
 
         raise operationerrfmt(space.w_TypeError,
                               "expected unicode, got '%s'",
                               space.type(w_unistr).getname(space))
+    # XXX remove direct use of _value
     unistr = w_unistr._value
     result = ['\0'] * len(unistr)
     digits = [ '0', '1', '2', '3', '4',
 def mul__ANY_Unicode(space, w_times, w_uni):
     return mul__Unicode_ANY(space, w_uni, w_times)
 
-def _isspace(uchar):
-    return unicodedb.isspace(ord(uchar))
+def unicode_isspace__Unicode(space, w_self):
+    return w_self.isspace(space)
 
-def make_generic(funcname):
-    def func(space, w_self):
-        v = w_self._value
-        if len(v) == 0:
-            return space.w_False
-        for idx in range(len(v)):
-            if not getattr(unicodedb, funcname)(ord(v[idx])):
-                return space.w_False
-        return space.w_True
-    return func_with_new_name(func, "unicode_%s__Unicode" % (funcname, ))
+def unicode_isalpha__Unicode(space, w_self):
+    return w_self.isalpha(space)
 
-unicode_isspace__Unicode = make_generic("isspace")
-unicode_isalpha__Unicode = make_generic("isalpha")
-unicode_isalnum__Unicode = make_generic("isalnum")
-unicode_isdecimal__Unicode = make_generic("isdecimal")
-unicode_isdigit__Unicode = make_generic("isdigit")
-unicode_isnumeric__Unicode = make_generic("isnumeric")
+def unicode_isalnum__Unicode(space, w_self):
+    return w_self.isalnum(space)
 
-def unicode_islower__Unicode(space, w_unicode):
-    cased = False
-    for uchar in w_unicode._value:
-        if (unicodedb.isupper(ord(uchar)) or
-            unicodedb.istitle(ord(uchar))):
-            return space.w_False
-        if not cased and unicodedb.islower(ord(uchar)):
-            cased = True
-    return space.newbool(cased)
+def unicode_isdecimal__Unicode(space, w_self):
+    return w_self.isdecimal(space)
 
-def unicode_isupper__Unicode(space, w_unicode):
-    cased = False
-    for uchar in w_unicode._value:
-        if (unicodedb.islower(ord(uchar)) or
-            unicodedb.istitle(ord(uchar))):
-            return space.w_False
-        if not cased and unicodedb.isupper(ord(uchar)):
-            cased = True
-    return space.newbool(cased)
+def unicode_isdigit__Unicode(space, w_self):
+    return w_self.isdigit(space)
 
-def unicode_istitle__Unicode(space, w_unicode):
-    cased = False
-    previous_is_cased = False
-    for uchar in w_unicode._value:
-        if (unicodedb.isupper(ord(uchar)) or
-            unicodedb.istitle(ord(uchar))):
-            if previous_is_cased:
-                return space.w_False
-            previous_is_cased = cased = True
-        elif unicodedb.islower(ord(uchar)):
-            if not previous_is_cased:
-                return space.w_False
-            previous_is_cased = cased = True
-        else:
-            previous_is_cased = False
-    return space.newbool(cased)
+def unicode_isnumeric__Unicode(space, w_self):
+    return w_self.isnumeric(space)
+
+def unicode_islower__Unicode(space, w_self):
+    return w_self.islower(space)
+
+def unicode_isupper__Unicode(space, w_self):
+    return w_self.isupper(space)
+
+def unicode_istitle__Unicode(space, w_self):
+    return w_self.istitle(space)
 
 def _strip(space, w_self, w_chars, left, right):
     "internal function called by str_xstrip methods"
     rpos = len(u_self)
 
     if left:
-        while lpos < rpos and _isspace(u_self[lpos]):
+        while lpos < rpos and w_self._isspace(u_self[lpos]):
            lpos += 1
 
     if right:
-        while rpos > lpos and _isspace(u_self[rpos - 1]):
+        while rpos > lpos and w_self._isspace(u_self[rpos - 1]):
            rpos -= 1
 
     assert rpos >= 0
     return W_UnicodeObject(builder.build())
 
 def unicode_lower__Unicode(space, w_self):
-    input = w_self._value
-    builder = UnicodeBuilder(len(input))
-    for i in range(len(input)):
-        builder.append(unichr(unicodedb.tolower(ord(input[i]))))
-    return W_UnicodeObject(builder.build())
+    return w_self.lower(space)
 
 def unicode_upper__Unicode(space, w_self):
-    input = w_self._value
-    builder = UnicodeBuilder(len(input))
-    for i in range(len(input)):
-        builder.append(unichr(unicodedb.toupper(ord(input[i]))))
-    return W_UnicodeObject(builder.build())
+    return w_self.upper(space)
 
 def unicode_swapcase__Unicode(space, w_self):
-    input = w_self._value
-    builder = UnicodeBuilder(len(input))
-    for i in range(len(input)):
-        unichar = ord(input[i])
-        if unicodedb.islower(unichar):
-            builder.append(unichr(unicodedb.toupper(unichar)))
-        elif unicodedb.isupper(unichar):
-            builder.append(unichr(unicodedb.tolower(unichar)))
-        else:
-            builder.append(input[i])
-    return W_UnicodeObject(builder.build())
-
-def _normalize_index(length, index):
-    if index < 0:
-        index += length
-        if index < 0:
-            index = 0
-    elif index > length:
-        index = length
-    return index
+    return w_self.swapcase(space)
 
 @specialize.arg(4)
 def _convert_idx_params(space, w_self, w_start, w_end, upper_bound=False):
     while True:
         # find the beginning of the next word
         while i < length:
-            if not _isspace(value[i]):
+            if not w_self._isspace(value[i]):
                 break   # found
             i += 1
         else:
             j = length   # take all the rest of the string
         else:
             j = i + 1
-            while j < length and not _isspace(value[j]):
+            while j < length and not w_self._isspace(value[j]):
                 j += 1
             maxsplit -= 1   # NB. if it's already < 0, it stays < 0
 
     while True:
         # starting from the end, find the end of the next word
         while i >= 0:
-            if not _isspace(value[i]):
+            if not w_self._isspace(value[i]):
                 break   # found
             i -= 1
         else:
             j = -1   # take all the rest of the string
         else:
             j = i - 1
-            while j >= 0 and not _isspace(value[j]):
+            while j >= 0 and not w_self._isspace(value[j]):
                 j -= 1
             maxsplit -= 1   # NB. if it's already < 0, it stays < 0