Commits

Amaury Forgeot d'Arc committed 10e2fbb

Fix tests around utf8 encoding

Comments (0)

Files changed (3)

pypy/objspace/std/ropeunicodeobject.py

             if result is not None:
                 return W_RopeObject(result)
         elif encoding == "utf-8":
-            result = rope.unicode_encode_utf8(node)
+            result = rope.unicode_encode_utf8(node, allow_surrogates=True)
             if result is not None:
                 return W_RopeObject(result)
     return encode_object(space, w_unistr, encoding, errors)

pypy/rlib/rope.py

     if rope.is_bytestring():
         return rope
 
-def unicode_encode_utf8(rope):
+def unicode_encode_utf8(rope, allow_surrogates=False):
     from pypy.rlib.runicode import unicode_encode_utf_8
     if rope.is_ascii():
         return rope
                                 unicode_encode_utf8(rope.right))
     elif isinstance(rope, LiteralUnicodeNode):
         return LiteralStringNode(
-            unicode_encode_utf_8(rope.u, len(rope.u), "strict"))
+            unicode_encode_utf_8(rope.u, len(rope.u), "strict",
+                                 allow_surrogates=allow_surrogates))
     elif isinstance(rope, LiteralStringNode):
         return LiteralStringNode(_str_encode_utf_8(rope.s))
 

pypy/rpython/rstr.py

         from pypy.rpython.annlowlevel import hlstr
         value = hlstr(llvalue)
         assert value is not None
-        univalue, _ = self.rstr_decode_utf_8(value, len(value), 'strict',
-                                             False, self.ll_raise_unicode_exception_decode)
+        univalue, _ = self.rstr_decode_utf_8(
+            value, len(value), 'strict', final=False,
+            errorhandler=self.ll_raise_unicode_exception_decode,
+            allow_surrogates=False)
         return self.ll.llunicode(univalue)
 
     def ll_raise_unicode_exception_decode(self, errors, encoding, msg, s,
         self.runicode_encode_utf_8 = None
 
     def ensure_ll_encode_utf8(self):
-        from pypy.rlib.runicode import unicode_encode_utf_8
-        self.runicode_encode_utf_8 = func_with_new_name(unicode_encode_utf_8,
-                                                        'runicode_encode_utf_8')
+        from pypy.rlib.runicode import unicode_encode_utf_8_impl
+        self.runicode_encode_utf_8 = func_with_new_name(
+            unicode_encode_utf_8_impl, 'runicode_encode_utf_8')
 
     def rtype_method_upper(self, hop):
         raise TypeError("Cannot do toupper on unicode string")
         from pypy.rpython.annlowlevel import hlunicode
         s = hlunicode(ll_s)
         assert s is not None
-        bytes = self.runicode_encode_utf_8(s, len(s), 'strict')
+        bytes = self.runicode_encode_utf_8(
+            s, len(s), 'strict',
+            errorhandler=self.ll_raise_unicode_exception_decode,
+            allow_surrogates=False)
         return self.ll.llstr(bytes)
 
+    def ll_raise_unicode_exception_encode(self, errors, encoding, msg, u,
+                                          startingpos, endingpos):
+        raise UnicodeEncodeError(encoding, u, startingpos, endingpos, msg)
+    
 class __extend__(annmodel.SomeString):
     def rtyper_makerepr(self, rtyper):
         return rtyper.type_system.rstr.string_repr
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.