Commits

Amaury Forgeot d'Arc committed 2693bfc

Remove mixed operations between str and unicode

Comments (0)

Files changed (7)

pypy/objspace/descroperation.py

             # Note that space.is_w() is potentially not happy if one of them
             # is None (e.g. with the thunk space)...
             if w_left_src is not w_right_src:    # XXX
-                # -- cpython bug compatibility: see objspace/std/test/
-                # -- test_unicodeobject.test_str_unicode_concat_overrides.
-                # -- The following handles "unicode + string subclass" by
-                # -- pretending that the unicode is a superclass of the
-                # -- string, thus giving priority to the string subclass'
-                # -- __radd__() method.  The case "string + unicode subclass"
-                # -- is handled directly by add__String_Unicode().
-                if symbol == '+' and space.is_w(w_typ1, space.w_unicode):
-                    w_typ1 = space.w_basestring
                 # -- end of bug compatibility
                 if space.is_true(space.issubtype(w_typ2, w_typ1)):
                     if (w_left_src and w_right_src and

pypy/objspace/std/bytearrayobject.py

 def eq__Bytearray_String(space, w_bytearray, w_other):
     return space.eq(str__Bytearray(space, w_bytearray), w_other)
 
-def eq__Bytearray_Unicode(space, w_bytearray, w_other):
-    return space.w_False
-
-def eq__Unicode_Bytearray(space, w_other, w_bytearray):
-    return space.w_False
-
 def ne__Bytearray_String(space, w_bytearray, w_other):
     return space.ne(str__Bytearray(space, w_bytearray), w_other)
 
-def ne__Bytearray_Unicode(space, w_bytearray, w_other):
-    return space.w_True
-
-def ne__Unicode_Bytearray(space, w_other, w_bytearray):
-    return space.w_True
-
 def _min(a, b):
     if a < b:
         return a

pypy/objspace/std/ropeunicodeobject.py

 def wrapunicode(space, uni):
     return W_RopeUnicodeObject(rope.rope_from_unicode(uni))
 
-def unicode_from_string(space, w_str):
-    from pypy.objspace.std.unicodetype import getdefaultencoding
-    assert isinstance(w_str, W_RopeObject)
-    encoding = getdefaultencoding(space)
-    w_retval = decode_string(space, w_str, encoding, "strict")
-    if not space.isinstance_w(w_retval, space.w_unicode):
-        raise operationerrfmt(
-            space.w_TypeError,
-            "decoder did not return an unicode object (type '%s')",
-            space.type(w_retval).getname(space))
-    assert isinstance(w_retval, W_RopeUnicodeObject)
-    return w_retval
-
 def decode_string(space, w_str, encoding, errors):
     from pypy.objspace.std.unicodetype import decode_object
     if errors is None or errors == "strict":
 def ropeunicode_w(space, w_str):
     if isinstance(w_str, W_RopeUnicodeObject):
         return w_str._node
-    if isinstance(w_str, W_RopeObject):
-        return unicode_from_string(space, w_str)._node
     return rope.LiteralUnicodeNode(space.unicode_w(w_str))
 
 
                 raise OperationError(space.w_UnicodeEncodeError, space.newtuple([w_encoding, w_unistr, w_start, w_end, w_reason]))
     return ''.join(result)
 
-# string-to-unicode delegation
-def delegate_Rope2RopeUnicode(space, w_rope):
-    w_uni = unicode_from_string(space, w_rope)
-    assert isinstance(w_uni, W_RopeUnicodeObject) # help the annotator!
-    return w_uni
-
 def str__RopeUnicode(space, w_uni):
     return space.call_method(w_uni, 'encode')
 
 def eq__RopeUnicode_RopeUnicode(space, w_str1, w_str2):
     return space.newbool(_eq(w_str1, w_str2))
 
-def eq__RopeUnicode_Rope(space, w_runi, w_rope):
-    from pypy.objspace.std.unicodeobject import _unicode_string_comparison
-    return _unicode_string_comparison(space, w_runi, w_rope,
-                    False,  unicode_from_string)
-
 def ne__RopeUnicode_RopeUnicode(space, w_str1, w_str2):
     return space.newbool(not _eq(w_str1, w_str2))
 
-def ne__RopeUnicode_Rope(space, w_runi, w_rope):
-    from pypy.objspace.std.unicodeobject import _unicode_string_comparison
-    return _unicode_string_comparison(space, w_runi, w_rope,
-                    True, unicode_from_string)
-
 def gt__RopeUnicode_RopeUnicode(space, w_str1, w_str2):
     n1 = w_str1._node
     n2 = w_str2._node
         raise OperationError(space.w_OverflowError,
                              space.wrap("string too long"))
 
-def add__Rope_RopeUnicode(space, w_left, w_right):
-    return space.add(unicode_from_string(space, w_left) , w_right)
-
-def add__RopeUnicode_Rope(space, w_left, w_right):
-    return space.add(w_left, unicode_from_string(space, w_right))
-
 def contains__RopeUnicode_RopeUnicode(space, w_container, w_item):
     item = w_item._node
     container = w_container._node
     return space.newbool(rope.find(container, item) != -1)
 
-def contains__Rope_RopeUnicode(space, w_container, w_item):
-    return space.contains(unicode_from_string(space, w_container), w_item )
-
 def unicode_join__RopeUnicode_ANY(space, w_self, w_list):
     l_w = space.listview(w_list)
     delim = w_self._node
         if isinstance(w_item, W_RopeUnicodeObject):
             # shortcut for performane
             item = w_item._node
-        elif space.isinstance_w(w_item, space.w_str):
-            item = unicode_from_string(space, w_item)._node
         else:
-            msg = 'sequence item %d: expected string or Unicode'
+            msg = 'sequence item %d: expected string'
             raise operationerrfmt(space.w_TypeError, msg, i)
         values_list.append(item)
     try:
     return W_RopeUnicodeObject(rope.strip(w_self._node, True, True, _contains,
                                w_chars._node.flatten_unicode()))
 
-def unicode_strip__RopeUnicode_Rope(space, w_self, w_chars):
-    return space.call_method(w_self, 'strip',
-                             unicode_from_string(space, w_chars))
-
 def unicode_lstrip__RopeUnicode_None(space, w_self, w_chars):
     return W_RopeUnicodeObject(rope.strip(w_self._node, True, False, _isspace))
 def unicode_lstrip__RopeUnicode_RopeUnicode(space, w_self, w_chars):
     return W_RopeUnicodeObject(rope.strip(w_self._node, True, False, _contains,
                                w_chars._node.flatten_unicode()))
-def unicode_lstrip__RopeUnicode_Rope(space, w_self, w_chars):
-    return space.call_method(w_self, 'lstrip',
-                             unicode_from_string(space, w_chars))
 
 def unicode_rstrip__RopeUnicode_None(space, w_self, w_chars):
     return W_RopeUnicodeObject(rope.strip(w_self._node, False, True, _isspace))
 def unicode_rstrip__RopeUnicode_RopeUnicode(space, w_self, w_chars):
     return W_RopeUnicodeObject(rope.strip(w_self._node, False, True, _contains,
                                w_chars._node.flatten_unicode()))
-def unicode_rstrip__RopeUnicode_Rope(space, w_self, w_chars):
-    return space.call_method(w_self, 'rstrip',
-                             unicode_from_string(space, w_chars))
 
 def unicode_capitalize__RopeUnicode(space, w_self):
     input = w_self._node
 
 from pypy.objspace.std import unicodetype
 register_all(vars(), unicodetype)
-
-# str.strip(unicode) needs to convert self to unicode and call unicode.strip we
-# use the following magic to register strip_string_unicode as a String
-# multimethod.
-
-# XXX couldn't string and unicode _share_ the multimethods that make up their
-# methods?
-
-class str_methods:
-    from pypy.objspace.std import stringtype
-    W_RopeUnicodeObject = W_RopeUnicodeObject
-    from pypy.objspace.std.ropeobject import W_RopeObject
-    def str_strip__Rope_RopeUnicode(space, w_self, w_chars):
-        return space.call_method(unicode_from_string(space, w_self),
-                                 'strip', w_chars)
-    def str_lstrip__Rope_RopeUnicode(space, w_self, w_chars):
-        return space.call_method(unicode_from_string(space, w_self),
-                                 'lstrip', w_chars)
-    def str_rstrip__Rope_RopeUnicode(space, w_self, w_chars):
-        return space.call_method(unicode_from_string(space, w_self),
-                                 'rstrip', w_chars)
-    def str_count__Rope_RopeUnicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
-        return space.call_method(unicode_from_string(space, w_self),
-                                 'count', w_substr, w_start, w_end)
-    def str_find__Rope_RopeUnicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
-        return space.call_method(unicode_from_string(space, w_self),
-                                 'find', w_substr, w_start, w_end)
-    def str_rfind__Rope_RopeUnicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
-        return space.call_method(unicode_from_string(space, w_self),
-                                 'rfind', w_substr, w_start, w_end)
-    def str_index__Rope_RopeUnicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
-        return space.call_method(unicode_from_string(space, w_self),
-                                 'index', w_substr, w_start, w_end)
-    def str_rindex__Rope_RopeUnicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
-        return space.call_method(unicode_from_string(space, w_self),
-                                 'rindex', w_substr, w_start, w_end)
-    def str_replace__Rope_RopeUnicode_RopeUnicode_ANY(space, w_self, w_old, w_new, w_maxsplit):
-        return space.call_method(unicode_from_string(space, w_self),
-                                 'replace', w_old, w_new, w_maxsplit)
-    def str_split__Rope_RopeUnicode_ANY(space, w_self, w_delim, w_maxsplit):
-        return space.call_method(unicode_from_string(space, w_self),
-                                 'split', w_delim, w_maxsplit)
-    def str_rsplit__Rope_RopeUnicode_ANY(space, w_self, w_delim, w_maxsplit):
-        return space.call_method(unicode_from_string(space, w_self),
-                                 'rsplit', w_delim, w_maxsplit)
-    register_all(vars(), stringtype)

pypy/objspace/std/stringobject.py

 
 def str_decode__String_ANY_ANY(space, w_string, w_encoding=None, w_errors=None):
     from pypy.objspace.std.unicodetype import _get_encoding_and_errors, \
-        unicode_from_string, decode_object
+        decode_object
     encoding, errors = _get_encoding_and_errors(space, w_encoding, w_errors)
-    if encoding is None and errors is None:
-        return unicode_from_string(space, w_string)
     return decode_object(space, w_string, encoding, errors)
 
 def str_encode__String_ANY_ANY(space, w_string, w_encoding=None, w_errors=None):

pypy/objspace/std/test/test_unicodeobject.py

 
 class AppTestUnicodeStringStdOnly:
     def test_compares(self):
+        assert type('a') != type(b'a')
         assert 'a' != b'a'
         assert b'a' != 'a'
         assert not ('a' == 5)

pypy/objspace/std/unicodeobject.py

 from pypy.interpreter import gateway
 from pypy.interpreter.error import OperationError, operationerrfmt
 from pypy.objspace.std.stringobject import W_StringObject, make_rsplit_with_delim
-from pypy.objspace.std.ropeobject import W_RopeObject
 from pypy.objspace.std.noneobject import W_NoneObject
 from pypy.objspace.std.sliceobject import W_SliceObject, normalize_simple_slice
 from pypy.objspace.std import slicetype, newformat
                 raise OperationError(space.w_UnicodeEncodeError, space.newtuple([w_encoding, w_unistr, w_start, w_end, w_reason]))
     return ''.join(result)
 
-# checks if should trigger an unicode warning
-def _unicode_string_comparison(space, w_uni, w_str, inverse, uni_from_str):
-    try:
-        w_uni2 = uni_from_str(space, w_str)
-    except OperationError, e:
-        if e.match(space, space.w_UnicodeDecodeError):
-            if inverse:
-                msg = "Unicode unequal comparison failed to convert both "  \
-                      "arguments to Unicode - interpreting them as being unequal"
-            else :
-                msg = "Unicode equal comparison failed to convert both "    \
-                      "arguments to Unicode - interpreting them as being unequal"
-            space.warn(msg, space.w_UnicodeWarning)
-            return space.newbool(inverse)
-        raise
-    result = space.eq(w_uni, w_uni2)
-    if inverse:
-        return space.not_(result)
-    return result
-
 def str__Unicode(space, w_uni):
     from pypy.objspace.std.unicodetype import encode_object
     return encode_object(space, w_uni, None, None)
 def eq__Unicode_Unicode(space, w_left, w_right):
     return space.newbool(w_left._value == w_right._value)
 
-def eq__Unicode_String(space, w_uni, w_str):
-    from pypy.objspace.std.unicodetype import unicode_from_string
-    return _unicode_string_comparison(space, w_uni, w_str,
-                    False, unicode_from_string)
-
-eq__Unicode_Rope = eq__Unicode_String
-
-def ne__Unicode_String(space, w_uni, w_str):
-    from pypy.objspace.std.unicodetype import unicode_from_string
-    return _unicode_string_comparison(space, w_uni, w_str,
-                    True, unicode_from_string)
-
-ne__Unicode_Rope = ne__Unicode_String
-
 def lt__Unicode_Unicode(space, w_left, w_right):
     left = w_left._value
     right = w_right._value
 def add__Unicode_Unicode(space, w_left, w_right):
     return W_UnicodeObject(w_left._value + w_right._value)
 
-def add__String_Unicode(space, w_left, w_right):
-    # this function is needed to make 'abc'.__add__(u'def') return
-    # u'abcdef' instead of NotImplemented.  This is what occurs on
-    # top of CPython.
-    from pypy.objspace.std.unicodetype import unicode_from_string
-    # XXX fragile implementation detail: for "string + unicode subclass",
-    # if the unicode subclass overrides __radd__(), then it will be
-    # called (see test_str_unicode_concat_overrides).  This occurs as a
-    # result of the following call to space.add() in which the first
-    # argument is a unicode and the second argument a subclass of unicode
-    # (and thus the usual logic about calling __radd__() first applies).
-    return space.add(unicode_from_string(space, w_left) , w_right)
-
-add__Rope_Unicode = add__String_Unicode
-
-def add__Unicode_String(space, w_left, w_right):
-    # this function is needed to make 'abc'.__radd__(u'def') return
-    # u'defabc', although it's completely unclear if that's necessary
-    # given that CPython doesn't even have a method str.__radd__().
-    from pypy.objspace.std.unicodetype import unicode_from_string
-    return space.add(w_left, unicode_from_string(space, w_right))
-    # Note about "unicode + string subclass": look for
-    # "cpython bug compatibility" in descroperation.py
-
-add__Unicode_Rope = add__Unicode_String
-
-def contains__String_Unicode(space, w_container, w_item):
-    from pypy.objspace.std.unicodetype import unicode_from_string
-    return space.contains(unicode_from_string(space, w_container), w_item )
-contains__Rope_Unicode = contains__String_Unicode
-
-
 def contains__Unicode_Unicode(space, w_container, w_item):
     item = w_item._value
     container = w_container._value
     return _strip_none(space, w_self, 1, 1)
 def unicode_strip__Unicode_Unicode(space, w_self, w_chars):
     return _strip(space, w_self, w_chars, 1, 1)
-def unicode_strip__Unicode_String(space, w_self, w_chars):
-    from pypy.objspace.std.unicodetype import unicode_from_string
-    return space.call_method(w_self, 'strip',
-                             unicode_from_string(space, w_chars))
-unicode_strip__Unicode_Rope = unicode_strip__Unicode_String
 
 def unicode_lstrip__Unicode_None(space, w_self, w_chars):
     return _strip_none(space, w_self, 1, 0)
 def unicode_lstrip__Unicode_Unicode(space, w_self, w_chars):
     return _strip(space, w_self, w_chars, 1, 0)
-def unicode_lstrip__Unicode_String(space, w_self, w_chars):
-    from pypy.objspace.std.unicodetype import unicode_from_string
-    return space.call_method(w_self, 'lstrip',
-                             unicode_from_string(space, w_chars))
-
-unicode_lstrip__Unicode_Rope = unicode_lstrip__Unicode_String
 
 def unicode_rstrip__Unicode_None(space, w_self, w_chars):
     return _strip_none(space, w_self, 0, 1)
 def unicode_rstrip__Unicode_Unicode(space, w_self, w_chars):
     return _strip(space, w_self, w_chars, 0, 1)
-def unicode_rstrip__Unicode_String(space, w_self, w_chars):
-    from pypy.objspace.std.unicodetype import unicode_from_string
-    return space.call_method(w_self, 'rstrip',
-                             unicode_from_string(space, w_chars))
-
-unicode_rstrip__Unicode_Rope = unicode_rstrip__Unicode_String
 
 def unicode_capitalize__Unicode(space, w_self):
     input = w_self._value
 from pypy.objspace.std import unicodetype
 register_all(vars(), unicodetype)
 
-# str.strip(unicode) needs to convert self to unicode and call unicode.strip we
-# use the following magic to register strip_string_unicode as a String
-# multimethod.
-
-# XXX couldn't string and unicode _share_ the multimethods that make up their
-# methods?
-
-class str_methods:
-    from pypy.objspace.std import stringtype
-    W_UnicodeObject = W_UnicodeObject
-    from pypy.objspace.std.stringobject import W_StringObject
-    from pypy.objspace.std.ropeobject import W_RopeObject
-    def str_strip__String_Unicode(space, w_self, w_chars):
-        from pypy.objspace.std.unicodetype import unicode_from_string
-        return space.call_method(unicode_from_string(space, w_self),
-                                 'strip', w_chars)
-    str_strip__Rope_Unicode = str_strip__String_Unicode
-    def str_lstrip__String_Unicode(space, w_self, w_chars):
-        from pypy.objspace.std.unicodetype import unicode_from_string
-        return space.call_method(unicode_from_string(space, w_self),
-                                 'lstrip', w_chars)
-    str_lstrip__Rope_Unicode = str_lstrip__String_Unicode
-    def str_rstrip__String_Unicode(space, w_self, w_chars):
-        from pypy.objspace.std.unicodetype import unicode_from_string
-        return space.call_method(unicode_from_string(space, w_self),
-                                 'rstrip', w_chars)
-    str_rstrip__Rope_Unicode = str_rstrip__String_Unicode
-    def str_count__String_Unicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
-        from pypy.objspace.std.unicodetype import unicode_from_string
-        return space.call_method(unicode_from_string(space, w_self),
-                                 'count', w_substr, w_start, w_end)
-    str_count__Rope_Unicode_ANY_ANY = str_count__String_Unicode_ANY_ANY
-    def str_find__String_Unicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
-        from pypy.objspace.std.unicodetype import unicode_from_string
-        return space.call_method(unicode_from_string(space, w_self),
-                                 'find', w_substr, w_start, w_end)
-    str_find__Rope_Unicode_ANY_ANY = str_find__String_Unicode_ANY_ANY
-    def str_rfind__String_Unicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
-        from pypy.objspace.std.unicodetype import unicode_from_string
-        return space.call_method(unicode_from_string(space, w_self),
-                                 'rfind', w_substr, w_start, w_end)
-    str_rfind__Rope_Unicode_ANY_ANY = str_rfind__String_Unicode_ANY_ANY
-    def str_index__String_Unicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
-        from pypy.objspace.std.unicodetype import unicode_from_string
-        return space.call_method(unicode_from_string(space, w_self),
-                                 'index', w_substr, w_start, w_end)
-    str_index__Rope_Unicode_ANY_ANY = str_index__String_Unicode_ANY_ANY
-    def str_rindex__String_Unicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
-        from pypy.objspace.std.unicodetype import unicode_from_string
-        return space.call_method(unicode_from_string(space, w_self),
-                                 'rindex', w_substr, w_start, w_end)
-    str_rindex__Rope_Unicode_ANY_ANY = str_rindex__String_Unicode_ANY_ANY
-    def str_replace__String_Unicode_Unicode_ANY(space, w_self, w_old, w_new, w_maxsplit):
-        from pypy.objspace.std.unicodetype import unicode_from_string
-        return space.call_method(unicode_from_string(space, w_self),
-                                 'replace', w_old, w_new, w_maxsplit)
-    str_replace__Rope_Unicode_Unicode_ANY = str_replace__String_Unicode_Unicode_ANY
-    def str_split__String_Unicode_ANY(space, w_self, w_delim, w_maxsplit):
-        from pypy.objspace.std.unicodetype import unicode_from_string
-        return space.call_method(unicode_from_string(space, w_self),
-                                 'split', w_delim, w_maxsplit)
-    str_split__Rope_Unicode_ANY = str_split__String_Unicode_ANY
-    def str_rsplit__String_Unicode_ANY(space, w_self, w_delim, w_maxsplit):
-        from pypy.objspace.std.unicodetype import unicode_from_string
-        return space.call_method(unicode_from_string(space, w_self),
-                                 'rsplit', w_delim, w_maxsplit)
-    str_rsplit__Rope_Unicode_ANY = str_rsplit__String_Unicode_ANY
-    register_all(vars(), stringtype)

pypy/objspace/std/unicodetype.py

             return w_res
     return unicode_from_encoded_object(space, w_res, None, "strict")
 
-def unicode_from_string(space, w_str):
-    # this is a performance and bootstrapping hack
-    if space.config.objspace.std.withropeunicode:
-        from pypy.objspace.std.ropeunicodeobject import unicode_from_string
-        return unicode_from_string(space, w_str)
-    encoding = getdefaultencoding(space)
-    from pypy.objspace.std.unicodeobject import W_UnicodeObject
-    if encoding != 'ascii':
-        return unicode_from_encoded_object(space, w_str, encoding, "strict")
-    s = space.str_w(w_str)
-    try:
-        return W_UnicodeObject(s.decode("ascii"))
-    except UnicodeDecodeError:
-        # raising UnicodeDecodeError is messy, "please crash for me"
-        return unicode_from_encoded_object(space, w_str, "ascii", "strict")
-
 def unicode_decode__unitypedef_ANY_ANY(space, w_unicode, w_encoding=None,
                                        w_errors=None):
     return space.call_method(space.str(w_unicode), 'decode',