Commits

Maciej Fijalkowski committed 84c03a2 Merge

Merge a very primitive version of rpython-bytearray. It'll continue to have
new features and JIT support, but since it's not used, it's good for now
(but I'm not closing a branch)

Comments (0)

Files changed (14)

pypy/annotation/binaryop.py

 from pypy.annotation.model import SomeUnicodeCodePoint, SomeUnicodeString
 from pypy.annotation.model import SomeTuple, SomeImpossibleValue, s_ImpossibleValue
 from pypy.annotation.model import SomeInstance, SomeBuiltin, SomeIterator
-from pypy.annotation.model import SomePBC, SomeFloat, s_None
+from pypy.annotation.model import SomePBC, SomeFloat, s_None, SomeByteArray
 from pypy.annotation.model import SomeExternalObject, SomeWeakRef
 from pypy.annotation.model import SomeAddress, SomeTypedAddressAccess
 from pypy.annotation.model import SomeSingleFloat, SomeLongFloat, SomeType
             result.const = str1.const + str2.const
         return result
 
+class __extend__(pairtype(SomeByteArray, SomeByteArray)):
+    def union((b1, b2)):
+        can_be_None = b1.can_be_None or b2.can_be_None
+        return SomeByteArray(can_be_None=can_be_None)
+
+    def add((b1, b2)):
+        result = SomeByteArray()
+        if b1.is_immutable_constant() and b2.is_immutable_constant():
+            result.const = b1.const + b2.const
+        return result
+
+class __extend__(pairtype(SomeByteArray, SomeInteger)):
+    def getitem((s_b, s_i)):
+        return SomeInteger()
+
+    def setitem((s_b, s_i), s_i2):
+        assert isinstance(s_i2, SomeInteger)
+
+class __extend__(pairtype(SomeString, SomeByteArray),
+                 pairtype(SomeByteArray, SomeString),
+                 pairtype(SomeChar, SomeByteArray),
+                 pairtype(SomeByteArray, SomeChar)):
+    def add((b1, b2)):
+        result = SomeByteArray()
+        if b1.is_immutable_constant() and b2.is_immutable_constant():
+            result.const = b1.const + b2.const
+        return result
+
 class __extend__(pairtype(SomeChar, SomeChar)):
 
     def union((chr1, chr2)):

pypy/annotation/bookkeeper.py

      SomeUnicodeCodePoint, SomeOOStaticMeth, s_None, s_ImpossibleValue, \
      SomeLLADTMeth, SomeBool, SomeTuple, SomeOOClass, SomeImpossibleValue, \
      SomeUnicodeString, SomeList, SomeObject, HarmlesslyBlocked, \
-     SomeWeakRef, lltype_to_annotation, SomeType
+     SomeWeakRef, lltype_to_annotation, SomeType, SomeByteArray
 from pypy.annotation.classdef import InstanceSource, ClassDef
 from pypy.annotation.listdef import ListDef, ListItem
 from pypy.annotation.dictdef import DictDef
                 result = SomeUnicodeCodePoint()
             else:
                 result = SomeUnicodeString()
+        elif tp is bytearray:
+            result = SomeByteArray()
         elif tp is tuple:
             result = SomeTuple(items = [self.immutablevalue(e, need_const) for e in x])
         elif tp is float:

pypy/annotation/builtin.py

 from pypy.annotation.model import SomeFloat, unionof, SomeUnicodeString
 from pypy.annotation.model import SomePBC, SomeInstance, SomeDict, SomeList
 from pypy.annotation.model import SomeWeakRef, SomeIterator
-from pypy.annotation.model import SomeOOObject
+from pypy.annotation.model import SomeOOObject, SomeByteArray
 from pypy.annotation.model import annotation_to_lltype, lltype_to_annotation, ll_to_annotation
 from pypy.annotation.model import add_knowntypedata
 from pypy.annotation.model import s_ImpossibleValue
 def builtin_unicode(s_unicode):
     return constpropagate(unicode, [s_unicode], SomeUnicodeString())
 
+def builtin_bytearray(s_str):
+    return constpropagate(bytearray, [s_str], SomeByteArray())
+
 def our_issubclass(cls1, cls2):
     """ we're going to try to be less silly in the face of old-style classes"""
     from pypy.annotation.classdef import ClassDef
                 s = SomeInteger(nonneg=True, knowntype=s.knowntype)
         return s
 
-def builtin_apply(*stuff):
-    getbookkeeper().warning("ignoring apply%r" % (stuff,))
-    return SomeObject()
-
-##def builtin_slice(*args):
-##    bk = getbookkeeper()
-##    if len(args) == 1:
-##        return SomeSlice(
-##            bk.immutablevalue(None), args[0], bk.immutablevalue(None))
-##    elif len(args) == 2:
-##        return SomeSlice(
-##            args[0], args[1], bk.immutablevalue(None))
-##    elif len(args) == 3:
-##        return SomeSlice(
-##            args[0], args[1], args[2])
-##    else:
-##        raise Exception, "bogus call to slice()"
-
 
 def OSError_init(s_self, *args):
     pass

pypy/annotation/model.py

     "Stands for an object which is known to be an unicode string"
     knowntype = unicode
 
+class SomeByteArray(SomeStringOrUnicode):
+    knowntype = bytearray
+
 class SomeChar(SomeString):
     "Stands for an object known to be a string of length 1."
     can_be_None = False

pypy/annotation/test/test_annrpython.py

         a = self.RPythonAnnotator()
         a.build_types(f, []) # assert did not explode
 
+    def test_bytearray(self):
+        def f():
+            return bytearray("xyz")
+
+        a = self.RPythonAnnotator()
+        assert isinstance(a.build_types(f, []), annmodel.SomeByteArray)
+
+    def test_bytearray_add(self):
+        def f(a):
+            return a + bytearray("xyz")
+
+        a = self.RPythonAnnotator()
+        assert isinstance(a.build_types(f, [annmodel.SomeByteArray()]),
+                          annmodel.SomeByteArray)
+        a = self.RPythonAnnotator()
+        assert isinstance(a.build_types(f, [str]),
+                          annmodel.SomeByteArray)
+        a = self.RPythonAnnotator()
+        assert isinstance(a.build_types(f, [annmodel.SomeChar()]),
+                          annmodel.SomeByteArray)
+
+    def test_bytearray_setitem_getitem(self):
+        def f(b, i, c):
+            b[i] = c
+            return b[i + 1]
+
+        a = self.RPythonAnnotator()
+        assert isinstance(a.build_types(f, [annmodel.SomeByteArray(),
+                                            int, int]),
+                          annmodel.SomeInteger)
+
 def g(n):
     return [0,1,2,n]
 

pypy/jit/metainterp/test/test_string.py

             return result
         res = self.meta_interp(main, [9])
         assert res == main(9)
+
+    def test_bytearray(self):
+        py.test.skip("implement it")
+        def f(i):
+            b = bytearray("abc")
+            b[1] = i
+            return b[1]
+
+        res = self.interp_operations(f, [13])
+        assert res == 13

pypy/rpython/lltypesystem/rbytearray.py

+
+from pypy.rpython.rbytearray import AbstractByteArrayRepr
+from pypy.rpython.lltypesystem import lltype, rstr
+from pypy.rlib.debug import ll_assert
+
+BYTEARRAY = lltype.GcForwardReference()
+
+def mallocbytearray(size):
+    return lltype.malloc(BYTEARRAY, size)
+
+copy_bytearray_contents = rstr._new_copy_contents_fun(BYTEARRAY, BYTEARRAY,
+                                                      lltype.Char,
+                                                      'bytearray')
+copy_bytearray_contents_from_str = rstr._new_copy_contents_fun(rstr.STR,
+                                                               BYTEARRAY,
+                                                               lltype.Char,
+                                                               'bytearray_from_str')
+
+BYTEARRAY.become(lltype.GcStruct('rpy_bytearray',
+                 ('chars', lltype.Array(lltype.Char)), adtmeths={
+    'malloc' : lltype.staticAdtMethod(mallocbytearray),
+    'copy_contents' : lltype.staticAdtMethod(copy_bytearray_contents),
+    'copy_contents_from_str': lltype.staticAdtMethod(
+                                         copy_bytearray_contents_from_str),
+    'length': rstr.LLHelpers.ll_length,
+}))
+
+class LLHelpers(rstr.LLHelpers):
+    @classmethod
+    def ll_strsetitem(cls, s, i, item):
+        if i < 0:
+            i += s.length()
+        cls.ll_strsetitem_nonneg(s, i, item)
+
+    def ll_strsetitem_nonneg(s, i, item):
+        chars = s.chars
+        ll_assert(i >= 0, "negative str getitem index")
+        ll_assert(i < len(chars), "str getitem index out of bound")
+        chars[i] = chr(item)
+
+    def ll_stritem_nonneg(s, i):
+        return ord(rstr.LLHelpers.ll_stritem_nonneg(s, i))
+
+class ByteArrayRepr(AbstractByteArrayRepr):
+    lowleveltype = lltype.Ptr(BYTEARRAY)
+
+    def __init__(self, *args):
+        AbstractByteArrayRepr.__init__(self, *args)
+        self.ll = LLHelpers
+        self.repr = self
+
+    def convert_const(self, value):
+        if value is None:
+            return lltype.nullptr(BYTEARRAY)
+        p = lltype.malloc(BYTEARRAY, len(value))
+        for i, c in enumerate(value):
+            p.chars[i] = chr(c)
+        return p
+
+bytearray_repr = ByteArrayRepr()
+
+def hlbytearray(ll_b):
+    b = bytearray()
+    for i in range(ll_b.length()):
+        b.append(ll_b.chars[i])
+    return b

pypy/rpython/lltypesystem/rstr.py

 from pypy.rpython.lltypesystem.lltype import \
      GcStruct, Signed, Array, Char, UniChar, Ptr, malloc, \
      Bool, Void, GcArray, nullptr, cast_primitive, typeOf,\
-     staticAdtMethod, GcForwardReference
+     staticAdtMethod, GcForwardReference, malloc
 from pypy.rpython.rmodel import Repr
 from pypy.rpython.lltypesystem import llmemory
 from pypy.tool.sourcetools import func_with_new_name
 def emptyunicodefun():
     return emptyunicode
 
-def _new_copy_contents_fun(TP, CHAR_TP, name):
-    def _str_ofs(item):
-        return (llmemory.offsetof(TP, 'chars') +
-                llmemory.itemoffsetof(TP.chars, 0) +
+def _new_copy_contents_fun(SRC_TP, DST_TP, CHAR_TP, name):
+    def _str_ofs_src(item):
+        return (llmemory.offsetof(SRC_TP, 'chars') +
+                llmemory.itemoffsetof(SRC_TP.chars, 0) +
+                llmemory.sizeof(CHAR_TP) * item)
+
+    def _str_ofs_dst(item):
+        return (llmemory.offsetof(DST_TP, 'chars') +
+                llmemory.itemoffsetof(DST_TP.chars, 0) +
                 llmemory.sizeof(CHAR_TP) * item)
 
     @jit.oopspec('stroruni.copy_contents(src, dst, srcstart, dststart, length)')
         # because it might move the strings.  The keepalive_until_here()
         # are obscurely essential to make sure that the strings stay alive
         # longer than the raw_memcopy().
+        assert typeOf(src).TO == SRC_TP
+        assert typeOf(dst).TO == DST_TP
         assert srcstart >= 0
         assert dststart >= 0
         assert length >= 0
-        src = llmemory.cast_ptr_to_adr(src) + _str_ofs(srcstart)
-        dst = llmemory.cast_ptr_to_adr(dst) + _str_ofs(dststart)
+        src = llmemory.cast_ptr_to_adr(src) + _str_ofs_src(srcstart)
+        dst = llmemory.cast_ptr_to_adr(dst) + _str_ofs_dst(dststart)
         llmemory.raw_memcopy(src, dst, llmemory.sizeof(CHAR_TP) * length)
         keepalive_until_here(src)
         keepalive_until_here(dst)
     copy_string_contents._always_inline_ = True
     return func_with_new_name(copy_string_contents, 'copy_%s_contents' % name)
 
-copy_string_contents = _new_copy_contents_fun(STR, Char, 'string')
-copy_unicode_contents = _new_copy_contents_fun(UNICODE, UniChar, 'unicode')
+copy_string_contents = _new_copy_contents_fun(STR, STR, Char, 'string')
+copy_unicode_contents = _new_copy_contents_fun(UNICODE, UNICODE, UniChar,
+                                               'unicode')
 
 CONST_STR_CACHE = WeakValueDictionary()
 CONST_UNICODE_CACHE = WeakValueDictionary()
             s.chars[i] = cast_primitive(UniChar, str.chars[i])
         return s
 
+    def ll_str2bytearray(str):
+        from pypy.rpython.lltypesystem.rbytearray import BYTEARRAY
+        
+        lgt = len(str.chars)
+        b = malloc(BYTEARRAY, lgt)
+        for i in range(lgt):
+            b.chars[i] = str.chars[i]
+        return b
+
     @jit.elidable
     def ll_strhash(s):
         # unlike CPython, there is no reason to avoid to return -1
             s.hash = x
         return x
 
+    def ll_length(s):
+        return len(s.chars)
+
     def ll_strfasthash(s):
         return s.hash     # assumes that the hash is already computed
 
     @jit.elidable
     def ll_strconcat(s1, s2):
-        len1 = len(s1.chars)
-        len2 = len(s2.chars)
+        len1 = s1.length()
+        len2 = s2.length()
         # a single '+' like this is allowed to overflow: it gets
         # a negative result, and the gc will complain
-        newstr = s1.malloc(len1 + len2)
-        s1.copy_contents(s1, newstr, 0, 0, len1)
-        s1.copy_contents(s2, newstr, 0, len1, len2)
+        # the typechecks below are if TP == BYTEARRAY
+        if typeOf(s1) == Ptr(STR):
+            newstr = s2.malloc(len1 + len2)
+            newstr.copy_contents_from_str(s1, newstr, 0, 0, len1)
+        else:
+            newstr = s1.malloc(len1 + len2)            
+            newstr.copy_contents(s1, newstr, 0, 0, len1)
+        if typeOf(s2) == Ptr(STR):
+            newstr.copy_contents_from_str(s2, newstr, 0, len1, len2)
+        else:
+            newstr.copy_contents(s2, newstr, 0, len1, len2)
         return newstr
     ll_strconcat.oopspec = 'stroruni.concat(s1, s2)'
 
                     adtmeths={'malloc' : staticAdtMethod(mallocstr),
                               'empty'  : staticAdtMethod(emptystrfun),
                               'copy_contents' : staticAdtMethod(copy_string_contents),
-                              'gethash': LLHelpers.ll_strhash}))
+                              'copy_contents_from_str' : staticAdtMethod(copy_string_contents),
+                              'gethash': LLHelpers.ll_strhash,
+                              'length': LLHelpers.ll_length}))
 UNICODE.become(GcStruct('rpy_unicode', ('hash', Signed),
                         ('chars', Array(UniChar, hints={'immutable': True})),
                         adtmeths={'malloc' : staticAdtMethod(mallocunicode),
                                   'empty'  : staticAdtMethod(emptyunicodefun),
                                   'copy_contents' : staticAdtMethod(copy_unicode_contents),
-                                  'gethash': LLHelpers.ll_strhash}
+                                  'copy_contents_from_str' : staticAdtMethod(copy_unicode_contents),
+                                  'gethash': LLHelpers.ll_strhash,
+                                  'length': LLHelpers.ll_length}
                         ))
 
 

pypy/rpython/rbuiltin.py

 def rtype_builtin_unicode(hop):
     return hop.args_r[0].rtype_unicode(hop)
 
+def rtype_builtin_bytearray(hop):
+    return hop.args_r[0].rtype_bytearray(hop)
+
 def rtype_builtin_list(hop):
     return hop.args_r[0].rtype_bltn_list(hop)
 

pypy/rpython/rbytearray.py

+
+from pypy.annotation import model as annmodel
+from pypy.tool.pairtype import pairtype
+from pypy.rpython.rstr import AbstractStringRepr
+from pypy.rpython.rmodel import IntegerRepr
+from pypy.rpython.lltypesystem import lltype
+
+class AbstractByteArrayRepr(AbstractStringRepr):
+    pass
+
+class __extend__(pairtype(AbstractByteArrayRepr, AbstractByteArrayRepr)):
+    def rtype_add((r_b1, r_b2), hop):
+        if hop.s_result.is_constant():
+            return hop.inputconst(r_b1, hop.s_result.const)
+        v_b1, v_b2 = hop.inputargs(r_b1, r_b2)
+        return hop.gendirectcall(r_b1.ll.ll_strconcat, v_b1, v_b2)
+
+class __extend__(pairtype(AbstractByteArrayRepr, AbstractStringRepr)):
+    def rtype_add((r_b1, r_s2), hop):
+        str_repr = r_s2.repr
+        if hop.s_result.is_constant():
+            return hop.inputconst(r_b1, hop.s_result.const)
+        v_b1, v_str2 = hop.inputargs(r_b1, str_repr)
+        return hop.gendirectcall(r_b1.ll.ll_strconcat, v_b1, v_str2)
+
+class __extend__(pairtype(AbstractStringRepr, AbstractByteArrayRepr)):
+    def rtype_add((r_s1, r_b2), hop):
+        str_repr = r_s1.repr
+        if hop.s_result.is_constant():
+            return hop.inputconst(r_b2, hop.s_result.const)
+        v_str1, v_b2 = hop.inputargs(str_repr, r_b2)
+        return hop.gendirectcall(r_b2.ll.ll_strconcat, v_str1, v_b2)
+
+class __extend__(pairtype(AbstractByteArrayRepr, IntegerRepr)):
+    def rtype_setitem((r_b, r_int), hop, checkidx=False):
+        bytearray_repr = r_b.repr
+        v_str, v_index, v_item = hop.inputargs(bytearray_repr, lltype.Signed,
+                                               lltype.Signed)
+        if checkidx:
+            if hop.args_s[1].nonneg:
+                llfn = r_b.ll.ll_strsetitem_nonneg_checked
+            else:
+                llfn = r_b.ll.ll_strsetitem_checked
+        else:
+            if hop.args_s[1].nonneg:
+                llfn = r_b.ll.ll_strsetitem_nonneg
+            else:
+                llfn = r_b.ll.ll_strsetitem
+        if checkidx:
+            hop.exception_is_here()
+        else:
+            hop.exception_cannot_occur()
+        return hop.gendirectcall(llfn, v_str, v_index, v_item)
+
+class __extend__(annmodel.SomeByteArray):
+    def rtyper_makekey(self):
+        return self.__class__,
+
+    def rtyper_makerepr(self, rtyper):
+        return rtyper.type_system.rbytearray.bytearray_repr

pypy/rpython/rstr.py

         hop.exception_is_here()
         return hop.gendirectcall(self.ll.ll_str2unicode, v_str)
 
+    def rtype_bytearray(self, hop):
+        if hop.args_s[0].is_constant():
+            # convertion errors occur during annotation, so cannot any more:
+            hop.exception_cannot_occur()
+            return hop.inputconst(hop.r_result, hop.s_result.const)
+        hop.exception_is_here()
+        return hop.gendirectcall(self.ll.ll_str2bytearray,
+                                 hop.inputarg(hop.args_r[0].repr, 0))
+
     def rtype_method_decode(self, hop):
         if not hop.args_s[1].is_constant():
             raise TyperError("encoding must be a constant")

pypy/rpython/rtyper.py

 # and the rtyper_chooserepr() methods
 from pypy.rpython import rint, rbool, rfloat
 from pypy.rpython import rrange
-from pypy.rpython import rstr, rdict, rlist
+from pypy.rpython import rstr, rdict, rlist, rbytearray
 from pypy.rpython import rclass, rbuiltin, rpbc
 from pypy.rpython import rexternalobj
 from pypy.rpython import rptr

pypy/rpython/test/test_rbytearray.py

+
+from pypy.rpython.test.tool import BaseRtypingTest, LLRtypeMixin
+from pypy.rpython.lltypesystem.rbytearray import hlbytearray
+from pypy.rpython.annlowlevel import llstr, hlstr
+
+class TestByteArray(BaseRtypingTest, LLRtypeMixin):
+    def test_bytearray_creation(self):
+        def f(x):
+            if x:
+                b = bytearray(str(x))
+            else:
+                b = bytearray("def")
+            return b
+        ll_res = self.interpret(f, [0])
+        assert hlbytearray(ll_res) == "def"
+        ll_res = self.interpret(f, [1])
+        assert hlbytearray(ll_res) == "1"
+
+    def test_addition(self):
+        def f(x):
+            return bytearray("a") + hlstr(x)
+
+        ll_res = self.interpret(f, [llstr("def")])
+        assert hlbytearray(ll_res) == "adef"
+
+        def f2(x):
+            return hlstr(x) + bytearray("a")
+
+        ll_res = self.interpret(f2, [llstr("def")])
+        assert hlbytearray(ll_res) == "defa"
+
+        def f3(x):
+            return bytearray(hlstr(x)) + bytearray("a")
+
+        ll_res = self.interpret(f3, [llstr("def")])
+        assert hlbytearray(ll_res) == "defa"
+
+    def test_getitem_setitem(self):
+        def f(s, i, c):
+            b = bytearray(hlstr(s))
+            b[i] = c
+            return b[i] + b[i + 1] * 255
+
+        ll_res = self.interpret(f, [llstr("abc"), 1, ord('d')])
+        assert ll_res == ord('d') + ord('c') * 255

pypy/rpython/typesystem.py

                 return None
         if name in ('rclass', 'rpbc', 'rbuiltin', 'rtuple', 'rlist',
                     'rslice', 'rdict', 'rrange', 'rstr', 'rgeneric',
-                    'll_str', 'rbuilder', 'rvirtualizable2',
+                    'll_str', 'rbuilder', 'rvirtualizable2', 'rbytearray',
                     'exceptiondata'):
             mod = load(name)
             if mod is not None: