Commits

Maciej Fijalkowski committed f07d9c4 Merge

Merged in dripton/pypy (pull request #35)

Comments (0)

Files changed (2)

pypy/objspace/std/stringobject.py

         return w_self._value
 
     def unicode_w(w_self, space):
-        # XXX should this use the default encoding?
-        from pypy.objspace.std.unicodetype import plain_str2unicode
-        return plain_str2unicode(space, w_self._value)
+        # Use the default encoding.
+        from pypy.objspace.std.unicodetype import unicode_from_string, \
+                decode_object
+        w_defaultencoding = space.call_function(space.sys.get(
+                                                'getdefaultencoding'))
+        from pypy.objspace.std.unicodetype import _get_encoding_and_errors, \
+            unicode_from_string, decode_object
+        encoding, errors = _get_encoding_and_errors(space, w_defaultencoding,
+                                                    space.w_None)
+        if encoding is None and errors is None:
+            return space.unicode_w(unicode_from_string(space, w_self))
+        return space.unicode_w(decode_object(space, w_self, encoding, errors))
 
 registerimplementation(W_StringObject)
 

pypy/objspace/std/test/test_stringobject.py

         raises(TypeError, ''.join, [1])
         raises(TypeError, ''.join, [[1]])
 
+    def test_unicode_join_str_arg_ascii(self):
+        raises(UnicodeDecodeError, u''.join, ['\xc3\xa1'])
+
+    def test_unicode_join_str_arg_utf8(self):
+        # Need default encoding utf-8, but sys.setdefaultencoding
+        # is removed after startup.
+        import sys
+        old_encoding = sys.getdefaultencoding()
+        # Duplicate unittest.test_support.CleanImport logic because it won't
+        # import.
+        self.original_modules = sys.modules.copy()
+        try:
+            import sys as temp_sys
+            module_name = 'sys'
+            if module_name in sys.modules:
+                module = sys.modules[module_name]
+                # It is possible that module_name is just an alias for
+                # another module (e.g. stub for modules renamed in 3.x).
+                # In that case, we also need delete the real module to
+                # clear the import cache.
+                if module.__name__ != module_name:
+                    del sys.modules[module.__name__]
+                del sys.modules[module_name]
+            temp_sys.setdefaultencoding('utf-8')
+            assert u''.join(['\xc3\xa1']) == u'\xe1'
+        finally:
+            temp_sys.setdefaultencoding(old_encoding)
+            sys.modules.update(self.original_modules)
+
     def test_unicode_join_endcase(self):
         # This class inserts a Unicode object into its argument's natural
         # iteration, in the 3rd position.