Antonio Cuni  committed 991b466

store both the unicode string and the utf8-encoded bytes for unicode objects, to avoid to do the encoding every time we call identifier_w to it. In the future, we might optimize W_UnicodeObject by storing *only* the utf-8 encoded bytes but for now it is easier to keep both. This change also fixes the methodcache tests, because they rely on the identity of the unwrapped string returned by str_w().

  • Participants
  • Parent commits c511d7b
  • Branches py3k

Comments (0)

Files changed (2)

File pypy/interpreter/

         raise OperationError(space.w_TypeError,
                              typed_unwrap_error_msg(space, "string", self))
+    def identifier_w(self, space):
+        raise OperationError(space.w_TypeError,
+                             typed_unwrap_error_msg(space, "string", self))
     def int_w(self, space):
         raise OperationError(space.w_TypeError,
                              typed_unwrap_error_msg(space, "integer", self))
         return self.str_w(w_obj)
     def str_w(self, w_obj):
+        """
+        if w_obj is unicode, call identifier_w() (i.e., return the UTF-8
+        encoded string). Else, call bytes_w().
+        Maybe we should kill str_w completely and manually substitute it with
+        identifier_w/bytes_w at all call sites?
+        """
         if self.isinstance_w(w_obj, self.w_unicode):
-            try:
-                return self.unicode_w(w_obj).encode('ascii')
-            except UnicodeEncodeError:
-                w_bytes = self.call_method(w_obj, 'encode', self.wrap('utf-8'))
-                return self.bytes_w(w_bytes)
+            return w_obj.identifier_w(self)
             return w_obj.bytes_w(self)
         variables, methdods, functions, classes etc.). In py3k, identifiers
         are unicode strings and are unwrapped as UTF-8 encoded byte strings.
-        return self.unicode_w(w_obj).encode('utf-8')
+        return w_obj.identifier_w(self)
     def bool_w(self, w_obj):
         # Unwraps a bool, also accepting an int for compatibility.

File pypy/objspace/std/

 class W_UnicodeObject(W_AbstractUnicodeObject):
     from pypy.objspace.std.unicodetype import unicode_typedef as typedef
-    _immutable_fields_ = ['_value']
+    _immutable_fields_ = ['_value', '_utf8']
     def __init__(w_self, unistr):
         assert isinstance(unistr, unicode)
         w_self._value = unistr
+        w_self._utf8 = unistr.encode('utf-8')
     def __repr__(w_self):
         """ representation for debugging purposes """
     def unicode_w(self, space):
         return self._value
+    def identifier_w(self, space):
+        return self._utf8
 W_UnicodeObject.EMPTY = W_UnicodeObject(u'')