Commits

Antonio Cuni committed aad4c9d

do the same with str_decode_utf_8

  • Participants
  • Parent commits f06c2ef

Comments (0)

Files changed (4)

File pypy/rpython/lltypesystem/rstr.py

     CACHE = CONST_STR_CACHE
 
     def __init__(self, *args):
-        from pypy.rlib.runicode import str_decode_utf_8
         AbstractStringRepr.__init__(self, *args)
         self.ll = LLHelpers
         self.malloc = mallocstr
-        self.rstr_decode_utf_8 = func_with_new_name(str_decode_utf_8, 'rstr_decode_utf_8')
 
     def ll_decode_latin1(self, value):
         lgt = len(value.chars)
             s.chars[i] = cast_primitive(UniChar, value.chars[i])
         return s
 
-    def ll_decode_utf8(self, llvalue):
-        from pypy.rpython.annlowlevel import hlstr, llunicode
-        value = hlstr(llvalue)
-        assert value is not None
-        univalue, _ = self.rstr_decode_utf_8(value, len(value), 'strict')
-        return llunicode(univalue)
-
 class UnicodeRepr(BaseLLStringRepr, AbstractUnicodeRepr):
     lowleveltype = Ptr(UNICODE)
     basetype = basestring
     CACHE = CONST_UNICODE_CACHE
 
     def __init__(self, *args):
-        from pypy.rlib.runicode import unicode_encode_utf_8
         AbstractUnicodeRepr.__init__(self, *args)
         self.ll = LLHelpers
         self.malloc = mallocunicode
 
 
 class LLHelpers(AbstractLLHelpers):
-    from pypy.rpython.annlowlevel import llstr
+    from pypy.rpython.annlowlevel import llstr, llunicode
 
     @jit.elidable
     def ll_str_mul(s, times):

File pypy/rpython/ootypesystem/rstr.py

             sb.ll_append_char(cast_primitive(UniChar, c))
         return sb.ll_build()
 
-    def ll_decode_utf8(self, llvalue):
-        from pypy.rpython.annlowlevel import hlstr, oounicode
-        from pypy.rlib.runicode import str_decode_utf_8
-        value = hlstr(llvalue)
-        assert value is not None
-        univalue, _ = str_decode_utf_8(value, len(value), 'strict')
-        return oounicode(univalue)
-
 
 class UnicodeRepr(BaseOOStringRepr, AbstractUnicodeRepr):
     lowleveltype = ootype.Unicode
 
 class LLHelpers(AbstractLLHelpers):
 
-    from pypy.rpython.annlowlevel import oostr as llstr
+    from pypy.rpython.annlowlevel import oostr as llstr, oounicode as llunicode
 
     def ll_chr2str(ch):
         return ootype.oostring(ch, -1)

File pypy/rpython/rstr.py

 from pypy.tool.sourcetools import func_with_new_name
 from pypy.annotation import model as annmodel
 from pypy.rlib import jit
+from pypy.rlib.nonconst import NonConstant
 from pypy.rpython.error import TyperError
 from pypy.rpython.rmodel import IntegerRepr, IteratorRepr
 from pypy.rpython.rmodel import inputconst, Repr
      cast_primitive, typeOf
 
 class AbstractStringRepr(Repr):
-    pass
+
+    def __init__(self, *args):
+        from pypy.rlib.runicode import str_decode_utf_8, raise_unicode_exception_decode
+        Repr.__init__(self, *args)
+        self.rstr_decode_utf_8 = func_with_new_name(str_decode_utf_8,
+                                                    'rstr_decode_utf_8')
+        self.rraise_unicode_exception_decode = func_with_new_name(
+            raise_unicode_exception_decode, 'rraise_unicode_exception_decode')
+        
+    @jit.elidable
+    def ll_decode_utf8(self, llvalue):
+        from pypy.rpython.annlowlevel import hlstr
+        value = hlstr(llvalue)
+        assert value is not None
+        univalue, _ = self.rstr_decode_utf_8(value, len(value), 'strict')
+        return self.ll.llunicode(univalue)
 
 class AbstractCharRepr(AbstractStringRepr):
     pass

File pypy/rpython/test/test_runicode.py

 
     def test_utf_8_decoding_annotation(self):
         from pypy.rlib.runicode import str_decode_utf_8
+        def errorhandler(errors, encoding, msg, s,
+                         startingpos, endingpos):
+            raise UnicodeDecodeError(encoding, s, startingpos, endingpos, msg)
+        
         strings = [u'àèì'.encode('utf-8'), u'ìòéà'.encode('utf-8')]
         def f(n):
             x = strings[n]
+            if n:
+                errors = 'strict'
+            else:
+                errors = 'foo'
             # the annotation of y is SomeUnicodeString(can_be_None=False)
-            y, _ = str_decode_utf_8(x, len(x), 'strict')
+            y, _ = str_decode_utf_8(x, len(x), errors, errorhandler)
             return x.decode('utf-8') + y
 
         assert self.ll_to_string(self.interpret(f, [1])) == f(1)