Philip Jenvey avatar Philip Jenvey committed 6e0ce8d

add the surrogatepass error handler

Comments (0)

Files changed (2)

pypy/module/_codecs/interp_codecs.py

         raise operationerrfmt(space.w_TypeError,
             "don't know how to handle %s in error callback", typename)
 
+def surrogatepass_errors(space, w_exc):
+    check_exception(space, w_exc)
+    if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
+        obj = space.realunicode_w(space.getattr(w_exc, space.wrap('object')))
+        start = space.int_w(space.getattr(w_exc, space.wrap('start')))
+        w_end = space.getattr(w_exc, space.wrap('end'))
+        end = space.int_w(w_end)
+        res = ''
+        pos = start
+        while pos < end:
+            ch = ord(obj[pos])
+            pos += 1
+            if ch < 0xd800 or ch > 0xdfff:
+                # Not a surrogate, fail with original exception
+                raise OperationError(space.type(w_exc), w_exc)
+            res += chr(0xe0 | (ch >> 12))
+            res += chr(0x80 | ((ch >> 6) & 0x3f))
+            res += chr(0x80 | (ch >> 0x3f))
+        return space.newtuple([space.wrapbytes(res), w_end])
+    elif space.isinstance_w(w_exc, space.w_UnicodeDecodeError):
+        start = space.int_w(space.getattr(w_exc, space.wrap('start')))
+        obj = space.bytes_w(space.getattr(w_exc, space.wrap('object')))
+        ch = 0
+        # Try decoding a single surrogate character. If there are more,
+        # let the codec call us again
+        ch0 = ord(obj[start + 0])
+        ch1 = ord(obj[start + 1])
+        ch2 = ord(obj[start + 2])
+        if (ch0 & 0xf0 == 0xe0 or
+            ch1 & 0xc0 == 0x80 or
+            ch2 & 0xc0 == 0x80):
+            # it's a three-byte code
+            ch = ((ch0 & 0x0f) << 12) + ((ch1 & 0x3f) << 6) + (ch2 & 0x3f)
+            if ch < 0xd800 or ch > 0xdfff:
+                # it's not a surrogate - fail
+                ch = 0
+        if ch == 0:
+            raise OperationError(space.type(w_exc), w_exc)
+        return space.newtuple([space.wrap(unichr(ch)), space.wrap(start + 3)])
+    else:
+        typename = space.type(w_exc).getname(space)
+        raise operationerrfmt(space.w_TypeError,
+            "don't know how to handle %s in error callback", typename)
+
 def surrogateescape_errors(space, w_exc):
     check_exception(space, w_exc)
     if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
     "NOT_RPYTHON"
     state = space.fromcache(CodecState)
     for error in ("strict", "ignore", "replace", "xmlcharrefreplace",
-                  "backslashreplace", "surrogateescape"):
+                  "backslashreplace", "surrogateescape", "surrogatepass"):
         name = error + "_errors"
         state.codec_error_registry[error] = space.wrap(interp2app(globals()[name]))
 

pypy/module/_codecs/test/test_codecs.py

         assert b'a\x80b'.decode('utf-8', 'surrogateescape') == 'a\udc80b'
         assert 'a\udc80b'.encode('utf-8', 'surrogateescape') == b'a\x80b'
 
+    def test_surrogatepass_handler(self):
+        import _codecs
+        assert _codecs.lookup_error("surrogatepass")
+        assert ("abc\ud800def".encode("utf-8", "surrogatepass") ==
+                b"abc\xed\xa0\x80def")
+        assert (b"abc\xed\xa0\x80def".decode("utf-8", "surrogatepass") ==
+                "abc\ud800def")
+
     def test_badhandler(self):
         import codecs
         results = ( 42, "foo", (1,2,3), ("foo", 1, 3), ("foo", None), ("foo",), ("foo", 1, 3), ("foo", None), ("foo",) )
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.