Commits

mattip  committed 7349482 Merge

merge default into branch

  • Participants
  • Parent commits 87ac4d9, 06cef90
  • Branches win32-fixes3

Comments (0)

Files changed (36)

File pypy/doc/whatsnew-head.rst

 .. branch: improve-docs-2
 Improve documents and straighten out links
 
+.. branch: fast-newarray
+Inline the fast path of newarray in the assembler.
+Disabled on ARM until we fix issues.
+
+
 .. branches we don't care about
 .. branch: autoreds
 .. branch: reflex-support
 
 .. branch: release-2.0-beta2
 .. branch: unbreak-freebsd
+
+.. branch: virtualref-virtualizable

File pypy/module/test_lib_pypy/test_ctypes_config_cache.py

 
 
 def test_syslog():
+    try:
+        import lib_pypy.syslog
+    except ImportError:
+        py.test.skip('no syslog on this platform')
     d = run('syslog.ctc.py', '_syslog_cache.py')
     assert 'LOG_NOTICE' in d
 

File pypy/module/test_lib_pypy/test_os_wait.py

 # Generates the resource cache
-from __future__ import absolute_import
-from lib_pypy.ctypes_config_cache import rebuild
-rebuild.rebuild_one('resource.ctc.py')
+#from __future__ import absolute_import
+#from lib_pypy.ctypes_config_cache import rebuild
+#rebuild.rebuild_one('resource.ctc.py')
 
 import os
 

File pypy/module/test_lib_pypy/test_resource.py

 from __future__ import absolute_import
+import py
+try:
+    from lib_pypy import resource
+except ImportError:
+    py.test.skip('no resource module available')
+
 from lib_pypy.ctypes_config_cache import rebuild
 rebuild.rebuild_one('resource.ctc.py')
 
-from lib_pypy import resource
 
 def test_resource():
     x = resource.getrusage(resource.RUSAGE_SELF)

File pypy/module/test_lib_pypy/test_site_extra.py

 def test_preimported_modules():
     lst = ['__builtin__', '_codecs', '_warnings', 'codecs', 'encodings',
            'exceptions', 'signal', 'sys', 'zipimport']
-    g = os.popen("'%s' -c 'import sys; print sorted(sys.modules)'" %
+    g = os.popen('"%s" -c "import sys; print sorted(sys.modules)"' %
                  (sys.executable,))
     real_data = g.read()
     g.close()

File pypy/module/test_lib_pypy/test_syslog.py

 from __future__ import absolute_import
+import py
+try:
+    from lib_pypy import syslog
+except ImportError:
+    py.test.skip('no syslog on this platform')
+
 # XXX very minimal test
 
 from lib_pypy.ctypes_config_cache import rebuild
 rebuild.rebuild_one('syslog.ctc.py')
 
-from lib_pypy import syslog
-
 
 def test_syslog():
     assert hasattr(syslog, 'LOG_ALERT')

File rpython/jit/backend/arm/assembler.py

     operations as regalloc_operations,
     operations_with_guard as regalloc_operations_with_guard)
 from rpython.jit.backend.llsupport import jitframe
-from rpython.jit.backend.llsupport.assembler import DEBUG_COUNTER, debug_bridge
+from rpython.jit.backend.llsupport.assembler import DEBUG_COUNTER, debug_bridge, BaseAssembler
 from rpython.jit.backend.llsupport.asmmemmgr import MachineDataBlockWrapper
 from rpython.jit.backend.model import CompiledLoopToken
 from rpython.jit.codewriter.effectinfo import EffectInfo
 from rpython.rlib.rarithmetic import r_uint
 from rpython.rtyper.annlowlevel import llhelper, cast_instance_to_gcref
 from rpython.rtyper.lltypesystem import lltype, rffi
-
+from rpython.jit.backend.arm.detect import detect_hardfloat
 
 class AssemblerARM(ResOpAssembler):
 
         self.loop_run_counters = []
         self.gcrootmap_retaddr_forced = 0
 
+    def setup_once(self):
+        BaseAssembler.setup_once(self)
+        self.hf_abi = detect_hardfloat()
+
     def setup(self, looptoken):
         assert self.memcpy_addr != 0, 'setup_once() not called?'
         if we_are_translated():
         mc.CMP_ri(r.r0.value, 0)
         mc.B(self.propagate_exception_path, c=c.EQ)
         #
-        self._reload_frame_if_necessary(mc, align_stack=True)
+        self._reload_frame_if_necessary(mc)
         self._pop_all_regs_from_jitframe(mc, [r.r0, r.r1], self.cpu.supports_floats)
         #
         nursery_free_adr = self.cpu.gc_ll_descr.get_nursery_free_addr()
         rawstart = mc.materialize(self.cpu.asmmemmgr, [])
         self.malloc_slowpath = rawstart
 
-    def _reload_frame_if_necessary(self, mc, align_stack=False, can_collect=0):
+    def _reload_frame_if_necessary(self, mc):
         gcrootmap = self.cpu.gc_ll_descr.gcrootmap
         if gcrootmap and gcrootmap.is_shadow_stack:
             rst = gcrootmap.get_root_stack_top_addr()
             # frame never uses card marking, so we enforce this is not
             # an array
             self._write_barrier_fastpath(mc, wbdescr, [r.fp], array=False,
-                                         is_frame=True)#, align_stack=align_stack)
+                                         is_frame=True)
 
     def propagate_memoryerror_if_r0_is_null(self):
         # see ../x86/assembler.py:propagate_memoryerror_if_eax_is_null

File rpython/jit/backend/arm/detect.py

 from rpython.translator.tool.cbuild import ExternalCompilationInfo
-from rpython.rtyper.lltypesystem import lltype, rffi
+from rpython.rlib.clibffi import FFI_DEFAULT_ABI, FFI_SYSV, FFI_VFP
 from rpython.rtyper.tool import rffi_platform
 from rpython.translator.platform import CompilationError
 
     """])
 
 def detect_hardfloat():
-    # http://gcc.gnu.org/ml/gcc-patches/2010-10/msg02419.html
-    if rffi_platform.getdefined('__ARM_PCS_VFP', ''):
-       return rffi_platform.getconstantinteger('__ARM_PCS_VFP', '')
-    return False
+    return FFI_DEFAULT_ABI == FFI_VFP
 
 def detect_float():
     """Check for hardware float support

File rpython/jit/backend/arm/opassembler.py

 from rpython.jit.backend.arm import registers as r
 from rpython.jit.backend.arm import shift
 from rpython.jit.backend.arm.arch import WORD, DOUBLE_WORD, JITFRAME_FIXED_SIZE
-
 from rpython.jit.backend.arm.helper.assembler import (gen_emit_op_by_helper_call,
                                                 gen_emit_op_unary_cmp,
                                                 gen_emit_guard_unary_cmp,
         return cond
 
     def _emit_call(self, adr, arglocs, fcond=c.AL, resloc=None,
-                                            result_info=(-1, -1),
-                                            can_collect=1,
-                                            reload_frame=False):
-        if self.cpu.hf_abi:
+                    result_info=(-1, -1),
+                    # whether to worry about a CALL that can collect; this
+                    # is always true except in call_release_gil
+                    can_collect=True):
+        if self.hf_abi:
             stack_args, adr = self._setup_call_hf(adr, arglocs, fcond,
                                             resloc, result_info)
         else:
                                             resloc, result_info)
 
         if can_collect:
+            # we push *now* the gcmap, describing the status of GC registers
+            # after the rearrangements done just above, ignoring the return
+            # value eax, if necessary
             noregs = self.cpu.gc_ll_descr.is_shadow_stack()
             gcmap = self._regalloc.get_gcmap([r.r0], noregs=noregs)
             self.push_gcmap(self.mc, gcmap, store=True)
 
         # ensure the result is wellformed and stored in the correct location
         if resloc is not None:
-            if resloc.is_vfp_reg() and not self.cpu.hf_abi:
+            if resloc.is_vfp_reg() and not self.hf_abi:
                 # move result to the allocated register
                 self.mov_to_vfp_loc(r.r0, r.r1, resloc)
             elif resloc.is_reg() and result_info != (-1, -1):
                 self._ensure_result_bit_extension(resloc, result_info[0],
                                                           result_info[1])
         if can_collect:
-            self._reload_frame_if_necessary(self.mc, can_collect=can_collect)
+            self._reload_frame_if_necessary(self.mc)
             self.pop_gcmap(self.mc)
-        elif reload_frame:
-            self._reload_frame_if_necessary(self.mc)
         return fcond
 
     def _restore_sp(self, stack_args, fcond):
     def emit_guard_call_release_gil(self, op, guard_op, arglocs, regalloc,
                                                                     fcond):
 
+        self._store_force_index(guard_op)
         # first, close the stack in the sense of the asmgcc GC root tracker
         gcrootmap = self.cpu.gc_ll_descr.gcrootmap
         numargs = op.numargs()
         resloc = arglocs[0]
 
         if gcrootmap:
-            noregs = self.cpu.gc_ll_descr.is_shadow_stack()
-            assert noregs
-            gcmap = self._regalloc.get_gcmap([r.r0], noregs=noregs)
+            # we put the gcmap now into the frame before releasing the GIL,
+            # and pop it below after reacquiring the GIL.  The assumption
+            # is that this gcmap describes correctly the situation at any
+            # point in-between: all values containing GC pointers should
+            # be safely saved out of registers by now, and will not be
+            # manipulated by any of the following CALLs.
+            gcmap = self._regalloc.get_gcmap(noregs=True)
             self.push_gcmap(self.mc, gcmap, store=True)
             self.call_release_gil(gcrootmap, arglocs, regalloc, fcond)
         # do the call
-        self._store_force_index(guard_op)
-        #
         descr = op.getdescr()
         size = descr.get_result_size()
         signed = descr.is_result_signed()
         #
         self._emit_call(adr, callargs, fcond,
                                     resloc, (size, signed),
-                                    can_collect=0)
+                                    can_collect=False)
         # then reopen the stack
         if gcrootmap:
             self.call_reacquire_gil(gcrootmap, resloc, regalloc, fcond)
+            self.pop_gcmap(self.mc)     # remove the gcmap saved above
 
         self._emit_guard_may_force(guard_op, arglocs[numargs+1:], numargs)
         return fcond
         # call the reopenstack() function (also reacquiring the GIL)
         with saved_registers(self.mc, regs_to_save, vfp_regs_to_save):
             self._emit_call(imm(self.reacqgil_addr), [], fcond,
-                    can_collect=False, reload_frame=True)
+                    can_collect=False)
 
     def _store_force_index(self, guard_op):
         faildescr = guard_op.getdescr()
             )
         self._alignment_check()
         return fcond
-    emit_op_call_malloc_nursery_varsize_small = emit_op_call_malloc_nursery
+    emit_op_call_malloc_nursery_varsize_frame = emit_op_call_malloc_nursery
 
 
     def _alignment_check(self):

File rpython/jit/backend/arm/regalloc.py

         self.possibly_free_var(t)
         return [imm(size)]
 
-    def prepare_op_call_malloc_nursery_varsize_small(self, op, fcond):
+    def prepare_op_call_malloc_nursery_varsize_frame(self, op, fcond):
         size_box = op.getarg(0)
         assert isinstance(size_box, BoxInt)
 

File rpython/jit/backend/arm/runner.py

 from rpython.jit.backend.llsupport.llmodel import AbstractLLCPU
 from rpython.rlib.jit_hooks import LOOP_RUN_CONTAINER
 from rpython.rtyper.lltypesystem import lltype, llmemory
+from rpython.jit.backend.arm.detect import detect_hardfloat
 
 jitframe.STATICSIZE = JITFRAME_FIXED_SIZE
 
     supports_floats = True
     supports_longlong = False # XXX requires an implementation of
                               # read_timestamp that works in user mode
-    supports_singlefloats = True
+    supports_singlefloats = not detect_hardfloat()
 
     from rpython.jit.backend.arm.arch import JITFRAME_FIXED_SIZE
     all_reg_indexes = range(len(all_regs))
 
 
 class CPU_ARM(AbstractARMCPU):
-    """ARM v7 uses softfp ABI, requires vfp"""
+    """ARM v7"""
     backend_name = "armv7"
 
-
-class CPU_ARMHF(AbstractARMCPU):
-    """ARM v7 uses hardfp ABI, requires vfp"""
-    hf_abi = True
-    backend_name = "armv7hf"
-    supports_floats = True
-    supports_singlefloats = False
-
-
-class CPU_ARMv6HF(AbstractARMCPU):
+class CPU_ARMv6(AbstractARMCPU):
     """ ARM v6, uses hardfp ABI, requires vfp"""
-    hf_abi = True
     arch_version = 6
-    backend_name = "armv6hf"
-    supports_floats = True
-    supports_singlefloats = False
+    backend_name = "armv6"

File rpython/jit/backend/arm/test/conftest.py

                     dest="run_translation_tests",
                     help="run tests that translate code")
 
-def pytest_runtest_setup(item):
+def pytest_collect_directory(path, parent):
     if not cpu.startswith('arm'):
         py.test.skip("ARM(v7) tests skipped: cpu is %r" % (cpu,))
+pytest_collect_file = pytest_collect_directory

File rpython/jit/backend/detect_cpu.py

                 model = 'x86-without-sse2'
     if model.startswith('arm'):
         from rpython.jit.backend.arm.detect import detect_hardfloat, detect_float
-        if detect_hardfloat():
-            model += 'hf'
         assert detect_float(), 'the JIT-compiler requires a vfp unit'
     return model
 
         return "rpython.jit.backend.x86.runner", "CPU_X86_64"
     elif backend_name == 'cli':
         return "rpython.jit.backend.cli.runner", "CliCPU"
-    elif backend_name == 'armv6hf':
-        return "rpython.jit.backend.arm.runner", "CPU_ARMv6HF"
-    elif backend_name == 'armv7':
+    elif backend_name.startswith('armv6'):
+        return "rpython.jit.backend.arm.runner", "CPU_ARMv6"
+    elif backend_name.startswith('armv7'):
         return "rpython.jit.backend.arm.runner", "CPU_ARM"
-    elif backend_name == 'armv7hf':
-        return "rpython.jit.backend.arm.runner", "CPU_ARMHF"
     else:
         raise ProcessorAutodetectError, (
             "we have no JIT backend for this cpu: '%s'" % backend_name)

File rpython/jit/backend/llsupport/assembler.py

         # the address of the function called by 'new'
         gc_ll_descr = self.cpu.gc_ll_descr
         gc_ll_descr.initialize()
+        if hasattr(gc_ll_descr, 'minimal_size_in_nursery'):
+            self.gc_minimal_size_in_nursery = gc_ll_descr.minimal_size_in_nursery
+        else:
+            self.gc_minimal_size_in_nursery = 0
+        if hasattr(gc_ll_descr, 'gcheaderbuilder'):
+            self.gc_size_of_header = gc_ll_descr.gcheaderbuilder.size_gc_header
+        else:
+            self.gc_size_of_header = WORD # for tests
         self.memcpy_addr = self.cpu.cast_ptr_to_int(memcpy_fn)
         self._build_failure_recovery(False, withfloats=False)
         self._build_failure_recovery(True, withfloats=False)
             self._build_wb_slowpath(True, withfloats=True)
         self._build_propagate_exception_path()
         if gc_ll_descr.get_malloc_slowpath_addr is not None:
-            self._build_malloc_slowpath()
+            # generate few slowpaths for various cases
+            self.malloc_slowpath = self._build_malloc_slowpath(kind='fixed')
+            self.malloc_slowpath_varsize = self._build_malloc_slowpath(
+                kind='var')
+        if hasattr(gc_ll_descr, 'malloc_str'):
+            self.malloc_slowpath_str = self._build_malloc_slowpath(kind='str')
+        else:
+            self.malloc_slowpath_str = None
+        if hasattr(gc_ll_descr, 'malloc_unicode'):
+            self.malloc_slowpath_unicode = self._build_malloc_slowpath(
+                kind='unicode')
+        else:
+            self.malloc_slowpath_unicode = None
+
         self._build_stack_check_slowpath()
         if gc_ll_descr.gcrootmap:
             self._build_release_gil(gc_ll_descr.gcrootmap)

File rpython/jit/backend/llsupport/gc.py

             return llop1.do_malloc_fixedsize_clear(llmemory.GCREF,
                                                    type_id, size,
                                                    False, False, False)
+
         self.generate_function('malloc_nursery', malloc_nursery_slowpath,
                                [lltype.Signed])
 
     def get_malloc_slowpath_addr(self):
         return self.get_malloc_fn_addr('malloc_nursery')
 
+    def get_malloc_slowpath_array_addr(self):
+        return self.get_malloc_fn_addr('malloc_array')
+    
 # ____________________________________________________________
 
 def get_ll_description(gcdescr, translator=None, rtyper=None):

File rpython/jit/backend/llsupport/llmodel.py

 class AbstractLLCPU(AbstractCPU):
     from rpython.jit.metainterp.typesystem import llhelper as ts
 
+    can_inline_varsize_malloc = False
+
     def __init__(self, rtyper, stats, opts, translate_support_code=False,
                  gcdescr=None):
         assert type(opts) is not bool
         def pos_exc_value():
             addr = llop.get_exc_value_addr(llmemory.Address)
             return heaptracker.adr2int(addr)
-        
+
         from rpython.rlib import rstack
-        
+
         STACK_CHECK_SLOWPATH = lltype.Ptr(lltype.FuncType([lltype.Signed],
                                                           lltype.Void))
         def insert_stack_check():

File rpython/jit/backend/llsupport/rewrite.py

 from rpython.jit.backend.llsupport.descr import SizeDescr, ArrayDescr
 from rpython.jit.metainterp.history import JitCellToken
 
+FLAG_ARRAY = 0
+FLAG_STR = 1
+FLAG_UNICODE = 2
+
 class GcRewriterAssembler(object):
-    # This class performs the following rewrites on the list of operations:
-    #
-    # - Remove the DEBUG_MERGE_POINTs.
-    #
-    # - Turn all NEW_xxx to either a CALL_MALLOC_GC, or a CALL_MALLOC_NURSERY
-    #   followed by SETFIELDs in order to initialize their GC fields.  The
-    #   two advantages of CALL_MALLOC_NURSERY is that it inlines the common
-    #   path, and we need only one such operation to allocate several blocks
-    #   of memory at once.
-    #
-    # - Add COND_CALLs to the write barrier before SETFIELD_GC and
-    #   SETARRAYITEM_GC operations.
+    """ This class performs the following rewrites on the list of operations:
+
+     - Remove the DEBUG_MERGE_POINTs.
+
+     - Turn all NEW_xxx to either a CALL_MALLOC_GC, or a CALL_MALLOC_NURSERY
+       followed by SETFIELDs in order to initialize their GC fields.  The
+       two advantages of CALL_MALLOC_NURSERY is that it inlines the common
+       path, and we need only one such operation to allocate several blocks
+       of memory at once.
+
+     - Add COND_CALLs to the write barrier before SETFIELD_GC and
+       SETARRAYITEM_GC operations.
+
+    recent_mallocs contains a dictionary of variable -> None. If a variable
+    is in the dictionary, next setfields can be called without a write barrier,
+    because the variable got allocated after the last potentially collecting
+    resop
+    """
 
     _previous_size = -1
     _op_malloc_nursery = None
         self.cpu = cpu
         self.newops = []
         self.known_lengths = {}
-        self.recent_mallocs = {}     # set of variables
+        self.recent_mallocs = {}
 
     def rewrite(self, operations):
         # we can only remember one malloc since the next malloc can possibly
             assert isinstance(descr, ArrayDescr)
             self.handle_new_array(descr, op)
         elif opnum == rop.NEWSTR:
-            self.handle_new_array(self.gc_ll_descr.str_descr, op)
+            self.handle_new_array(self.gc_ll_descr.str_descr, op,
+                                  kind=FLAG_STR)
         elif opnum == rop.NEWUNICODE:
-            self.handle_new_array(self.gc_ll_descr.unicode_descr, op)
+            self.handle_new_array(self.gc_ll_descr.unicode_descr, op,
+                                  kind=FLAG_UNICODE)
         else:
             raise NotImplementedError(op.getopname())
 
         else:
             self.gen_malloc_fixedsize(size, descr.tid, op.result)
 
-    def handle_new_array(self, arraydescr, op):
+    def handle_new_array(self, arraydescr, op, kind=FLAG_ARRAY):
         v_length = op.getarg(0)
         total_size = -1
         if isinstance(v_length, ConstInt):
                 pass    # total_size is still -1
         elif arraydescr.itemsize == 0:
             total_size = arraydescr.basesize
+        elif (self.gc_ll_descr.can_use_nursery_malloc(1) and
+              self.gen_malloc_nursery_varsize(arraydescr.itemsize,
+              v_length, op.result, arraydescr, kind=kind)):
+            # note that we cannot initialize tid here, because the array
+            # might end up being allocated by malloc_external or some
+            # stuff that initializes GC header fields differently
+            self.gen_initialize_len(op.result, v_length, arraydescr.lendescr)
+            return
         if (total_size >= 0 and
                 self.gen_malloc_nursery(total_size, op.result)):
             self.gen_initialize_tid(op.result, arraydescr.tid)
                                size_box,
                                descr=descrs.jfi_frame_size)
             self.newops.append(op0)
-            self.gen_malloc_nursery_varsize(size_box, frame, is_small=True)
+            self.gen_malloc_nursery_varsize_frame(size_box, frame)
             self.gen_initialize_tid(frame, descrs.arraydescr.tid)
             length_box = history.BoxInt()
             op1 = ResOperation(rop.GETFIELD_GC, [history.ConstInt(frame_info)],
         self._gen_call_malloc_gc([ConstInt(addr), v_num_elem], v_result,
                                  self.gc_ll_descr.malloc_unicode_descr)
 
-    def gen_malloc_nursery_varsize(self, sizebox, v_result, is_small=False):
-        """ Generate CALL_MALLOC_NURSERY_VARSIZE_SMALL
+    def gen_malloc_nursery_varsize(self, itemsize, v_length, v_result,
+                                   arraydescr, kind=FLAG_ARRAY):
+        """ itemsize is an int, v_length and v_result are boxes
         """
-        assert is_small
+        if not self.cpu.can_inline_varsize_malloc:
+            return False # temporary, kill when ARM supports it
+        gc_descr = self.gc_ll_descr
+        if (kind == FLAG_ARRAY and
+            (arraydescr.basesize != gc_descr.standard_array_basesize or
+             arraydescr.lendescr.offset != gc_descr.standard_array_length_ofs)):
+            return False
         self.emitting_an_operation_that_can_collect()
-        op = ResOperation(rop.CALL_MALLOC_NURSERY_VARSIZE_SMALL,
+        op = ResOperation(rop.CALL_MALLOC_NURSERY_VARSIZE,
+                          [ConstInt(kind), ConstInt(itemsize), v_length],
+                          v_result, descr=arraydescr)
+        self.newops.append(op)
+        self.recent_mallocs[v_result] = None
+        return True
+
+    def gen_malloc_nursery_varsize_frame(self, sizebox, v_result):
+        """ Generate CALL_MALLOC_NURSERY_VARSIZE_FRAME
+        """
+        self.emitting_an_operation_that_can_collect()
+        op = ResOperation(rop.CALL_MALLOC_NURSERY_VARSIZE_FRAME,
                           [sizebox],
                           v_result)
 

File rpython/jit/backend/llsupport/test/test_gc_integration.py

     return r[r.find('1'):]
 
 class TestRegallocGcIntegration(BaseTestRegalloc):
-    
+
     cpu = CPU(None, None)
     cpu.gc_ll_descr = GcLLDescr_boehm(None, None, None)
     cpu.setup_once()
-    
+
     S = lltype.GcForwardReference()
     S.become(lltype.GcStruct('S', ('field', lltype.Ptr(S)),
                              ('int', lltype.Signed)))
     gcrootmap = None
     passes_frame = True
     write_barrier_descr = None
+    max_size_of_young_obj = 50
 
     def __init__(self, callback):
         GcLLDescription.__init__(self, None)
                                [lltype.Signed, jitframe.JITFRAMEPTR],
                                lltype.Signed)
 
+        def malloc_array(itemsize, tid, num_elem):
+            self.calls.append((itemsize, tid, num_elem))
+            return 13
+
+        self.malloc_slowpath_array_fnptr = llhelper_args(malloc_array,
+                                                         [lltype.Signed] * 3,
+                                                         lltype.Signed)
+
+        def malloc_str(size):
+            self.calls.append(('str', size))
+            return 13
+        self.generate_function('malloc_str', malloc_str, [lltype.Signed],
+                               lltype.Signed)
+
     def get_nursery_free_addr(self):
         return rffi.cast(lltype.Signed, self.addrs)
 
     def get_malloc_slowpath_addr(self):
         return self.get_malloc_fn_addr('malloc_nursery')
 
+    def get_malloc_slowpath_array_addr(self):
+        return self.malloc_slowpath_array_fnptr
+
     def check_nothing_in_nursery(self):
         # CALL_MALLOC_NURSERY should not write anything in the nursery
         for i in range(64):
         # slowpath never called
         assert gc_ll_descr.calls == []
 
-    def test_malloc_nursery_varsize_small(self):
+    def test_malloc_nursery_varsize_frame(self):
         self.cpu = self.getcpu(None)
         ops = '''
         [i0, i1, i2]
-        p0 = call_malloc_nursery_varsize_small(i0)
-        p1 = call_malloc_nursery_varsize_small(i1)
-        p2 = call_malloc_nursery_varsize_small(i2)
+        p0 = call_malloc_nursery_varsize_frame(i0)
+        p1 = call_malloc_nursery_varsize_frame(i1)
+        p2 = call_malloc_nursery_varsize_frame(i2)
         guard_true(i0) [p0, p1, p2]
-        ''' 
+        '''
         self.interpret(ops, [16, 32, 16])
         # check the returned pointers
         gc_ll_descr = self.cpu.gc_ll_descr
         # slowpath never called
         assert gc_ll_descr.calls == []
 
+    def test_malloc_nursery_varsize(self):
+        self.cpu = self.getcpu(None)
+        A = lltype.GcArray(lltype.Signed)
+        arraydescr = self.cpu.arraydescrof(A)
+        arraydescr.tid = 15
+        ops = '''
+        [i0, i1, i2]
+        p0 = call_malloc_nursery_varsize(0, 8, i0, descr=arraydescr)
+        p1 = call_malloc_nursery_varsize(0, 5, i1, descr=arraydescr)
+        guard_false(i0) [p0, p1]
+        '''
+        self.interpret(ops, [1, 2, 3],
+                       namespace={'arraydescr': arraydescr})
+        # check the returned pointers
+        gc_ll_descr = self.cpu.gc_ll_descr
+        nurs_adr = rffi.cast(lltype.Signed, gc_ll_descr.nursery)
+        ref = lambda n: self.cpu.get_ref_value(self.deadframe, n)
+        assert rffi.cast(lltype.Signed, ref(0)) == nurs_adr + 0
+        assert rffi.cast(lltype.Signed, ref(1)) == nurs_adr + 2*WORD + 8*1
+        # check the nursery content and state
+        assert gc_ll_descr.nursery[0] == chr(15)
+        assert gc_ll_descr.nursery[2 * WORD + 8] == chr(15)
+        assert gc_ll_descr.addrs[0] == nurs_adr + (((4 * WORD + 8*1 + 5*2) + (WORD - 1)) & ~(WORD - 1))
+        # slowpath never called
+        assert gc_ll_descr.calls == []
+
+    def test_malloc_nursery_varsize_slowpath(self):
+        self.cpu = self.getcpu(None)
+        ops = """
+        [i0, i1, i2]
+        p0 = call_malloc_nursery_varsize(0, 8, i0, descr=arraydescr)
+        p1 = call_malloc_nursery_varsize(0, 5, i1, descr=arraydescr)
+        p3 = call_malloc_nursery_varsize(0, 5, i2, descr=arraydescr)
+        # overflow
+        p4 = call_malloc_nursery_varsize(0, 5, i2, descr=arraydescr)
+        # we didn't collect, so still overflow
+        p5 = call_malloc_nursery_varsize(1, 5, i2, descr=strdescr)
+        guard_false(i0) [p0, p1, p3, p4]
+        """
+        A = lltype.GcArray(lltype.Signed)
+        arraydescr = self.cpu.arraydescrof(A)
+        arraydescr.tid = 15
+        self.interpret(ops, [10, 3, 3],
+                       namespace={'arraydescr': arraydescr,
+                                  'strdescr': arraydescr})
+        # check the returned pointers
+        gc_ll_descr = self.cpu.gc_ll_descr
+        assert gc_ll_descr.calls == [(8, 15, 10), (5, 15, 3), ('str', 3)]
+        # one fit, one was too large, one was not fitting
+
     def test_malloc_slowpath(self):
         def check(frame):
             expected_size = 1
                 s = bin(x[0]).count('1') + bin(x[1]).count('1')
                 assert s == 16
             # all but two registers + some stuff on stack
-        
+
         self.cpu = self.getcpu(check)
         S1 = lltype.GcStruct('S1')
         S2 = lltype.GcStruct('S2', ('s0', lltype.Ptr(S1)),
 
 class MockShadowStackRootMap(object):
     is_shadow_stack = True
-    
+
     def __init__(self):
         TP = rffi.CArray(lltype.Signed)
         self.stack = lltype.malloc(TP, 10, flavor='raw')
         self.stack_addr[0] = rffi.cast(lltype.Signed, self.stack)
 
     def __del__(self):
-        lltype.free(self.stack_addr, flavor='raw')        
+        lltype.free(self.stack_addr, flavor='raw')
         lltype.free(self.stack, flavor='raw')
 
     def register_asm_addr(self, start, mark):
 class WriteBarrierDescr(AbstractDescr):
     jit_wb_cards_set = 0
     jit_wb_if_flag_singlebyte = 1
-    
+
     def __init__(self, gc_ll_descr):
         def write_barrier(frame):
             gc_ll_descr.write_barrier_on_frame_called = frame
         self.malloc_slowpath_fnptr = llhelper_args(malloc_slowpath,
                                                    [lltype.Signed],
                                                    lltype.Signed)
+
+        def malloc_array(itemsize, tid, num_elem):
+            import pdb
+            pdb.set_trace()
+
+        self.malloc_slowpath_array_fnptr = llhelper_args(malloc_array,
+                                                         [lltype.Signed] * 3,
+                                                         lltype.Signed)
+
         self.all_nurseries = []
 
     def init_nursery(self, nursery_size=None):
     def get_malloc_slowpath_addr(self):
         return self.malloc_slowpath_fnptr
 
+    def get_malloc_slowpath_array_addr(self):
+        return self.malloc_slowpath_array_fnptr
+
     def get_nursery_free_addr(self):
         return self.nursery_addr
 
         for nursery in self.all_nurseries:
             lltype.free(nursery, flavor='raw', track_allocation=False)
         lltype.free(self.nursery_ptrs, flavor='raw')
-    
+
 def unpack_gcmap(frame):
     res = []
     val = 0
     def test_shadowstack_call(self):
         cpu = self.cpu
         cpu.gc_ll_descr.init_nursery(100)
-        cpu.setup_once() 
+        cpu.setup_once()
         S = self.S
         frames = []
-        
+
         def check(i):
             assert cpu.gc_ll_descr.gcrootmap.stack[0] == i
             frame = rffi.cast(JITFRAMEPTR, i)
 
     def test_call_may_force_gcmap(self):
         cpu = self.cpu
-        
+
         def f(frame, arg, x):
             assert not arg
             assert frame.jf_gcmap[0] & 31 == 0
         pdying = getarrayitem_gc(p0, 0, descr=arraydescr)
         px = call_may_force(ConstClass(fptr), pf, pdying, i0, descr=calldescr)
         guard_not_forced(descr=faildescr) [p1, p2, p3, px]
-        finish(px, descr=finishdescr)
+        finish(px, descr=finaldescr)
         """, namespace={'fptr': fptr, 'calldescr': calldescr,
                         'arraydescr': cpu.arraydescrof(A),
                         'faildescr': BasicFailDescr(1),
         cpu.compile_loop(loop.inputargs, loop.operations, token)
         frame = lltype.cast_opaque_ptr(JITFRAMEPTR,
                                        cpu.execute_token(token, 1, a))
-        
+
         assert getmap(frame).count('1') == 4
 
     def test_call_gcmap_no_guard(self):
         cpu = self.cpu
-        
+
         def f(frame, arg, x):
             assert not arg
             assert frame.jf_gcmap[0] & 31 == 0
         pdying = getarrayitem_gc(p0, 0, descr=arraydescr)
         px = call(ConstClass(fptr), pf, pdying, i0, descr=calldescr)
         guard_false(i0, descr=faildescr) [p1, p2, p3, px]
-        finish(px, descr=finishdescr)
+        finish(px, descr=finaldescr)
         """, namespace={'fptr': fptr, 'calldescr': calldescr,
                         'arraydescr': cpu.arraydescrof(A),
                         'faildescr': BasicFailDescr(1),

File rpython/jit/backend/llsupport/test/test_regalloc_integration.py

                      type_system=self.type_system,
                      boxkinds=boxkinds)
 
-    def interpret(self, ops, args, run=True):
-        loop = self.parse(ops)
+    def interpret(self, ops, args, run=True, namespace=None):
+        loop = self.parse(ops, namespace=namespace)
         self.loop = loop
         looptoken = JitCellToken()
         self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)

File rpython/jit/backend/llsupport/test/test_rewrite.py

                                  ('t', lltype.Signed))
         tdescr = get_size_descr(self.gc_ll_descr, T)
         tdescr.tid = 5678
-        get_field_descr(self.gc_ll_descr, T, 'z')
+        tzdescr = get_field_descr(self.gc_ll_descr, T, 'z')
         #
         A = lltype.GcArray(lltype.Signed)
         adescr = get_array_descr(self.gc_ll_descr, A)
         signedframedescr = self.cpu.signedframedescr
         floatframedescr = self.cpu.floatframedescr
         casmdescr.compiled_loop_token = clt
+        tzdescr = None # noone cares
         #
         namespace.update(locals())
         #
 
 class BaseFakeCPU(object):
     JITFRAME_FIXED_SIZE = 0
-    
+
+    can_inline_varsize_malloc = True
+
     def __init__(self):
         self.tracker = FakeTracker()
         self._cache = {}
 
     def unpack_arraydescr_size(self, d):
         return 0, d.itemsize, 0
-    
+
     def arraydescrof(self, ARRAY):
         try:
             return self._cache[ARRAY]
             r = ArrayDescr(1, 2, FieldDescr('len', 0, 0, 0), 0)
             self._cache[ARRAY] = r
             return r
-        
+
     def fielddescrof(self, STRUCT, fname):
         key = (STRUCT, fname)
         try:
             jump(i0)
         """, """
             [i0]
-            p0 = call_malloc_gc(ConstClass(malloc_array), 1,  \
-                                %(bdescr.tid)d, i0,           \
-                                descr=malloc_array_descr)
+            p0 = call_malloc_nursery_varsize(0, 1, i0, descr=bdescr)
+            setfield_gc(p0, i0, descr=blendescr)
             jump(i0)
         """)
 
+    def test_rewrite_new_string(self):
+        self.check_rewrite("""
+        [i0]
+        p0 = newstr(i0)
+        jump(i0)
+        """, """
+        [i0]
+        p0 = call_malloc_nursery_varsize(1, 1, i0, descr=strdescr)
+        setfield_gc(p0, i0, descr=strlendescr)
+        jump(i0)
+        """)
+
     def test_rewrite_assembler_nonstandard_array(self):
         # a non-standard array is a bit hard to get; e.g. GcArray(Float)
         # is like that on Win32, but not on Linux.  Build one manually...
             p1 = int_add(p0, %(strdescr.basesize + 16 * strdescr.itemsize)d)
             setfield_gc(p1, %(unicodedescr.tid)d, descr=tiddescr)
             setfield_gc(p1, 10, descr=unicodelendescr)
-            p2 = call_malloc_gc(ConstClass(malloc_unicode), i2, \
-                                descr=malloc_unicode_descr)
-            p3 = call_malloc_gc(ConstClass(malloc_str), i2, \
-                                descr=malloc_str_descr)
+            p2 = call_malloc_nursery_varsize(2, 4, i2, \
+                                descr=unicodedescr)
+            setfield_gc(p2, i2, descr=unicodelendescr)
+            p3 = call_malloc_nursery_varsize(1, 1, i2, \
+                                descr=strdescr)
+            setfield_gc(p3, i2, descr=strlendescr)
             jump()
         """)
 
             [i0]
             p0 = call_malloc_nursery(%(tdescr.size)d)
             setfield_gc(p0, 5678, descr=tiddescr)
-            p1 = call_malloc_gc(ConstClass(malloc_str), i0, \
-                                descr=malloc_str_descr)
+            p1 = call_malloc_nursery_varsize(1, 1, i0, \
+                                descr=strdescr)
+            setfield_gc(p1, i0, descr=strlendescr)
             cond_call_gc_wb(p0, p1, descr=wbdescr)
             setfield_raw(p0, p1, descr=tzdescr)
             jump()
         """, """
         [i0, f0]
         i1 = getfield_gc(ConstClass(frame_info), descr=jfi_frame_size)
-        p1 = call_malloc_nursery_varsize_small(i1)
+        p1 = call_malloc_nursery_varsize_frame(i1)
         setfield_gc(p1, 0, descr=tiddescr)
         i2 = getfield_gc(ConstClass(frame_info), descr=jfi_frame_depth)
         setfield_gc(p1, i2, descr=framelendescr)

File rpython/jit/backend/test/runner_test.py

 
     def test_assembler_call_propagate_exc(self):
         from rpython.jit.backend.llsupport.llmodel import AbstractLLCPU
-        
+
         if not isinstance(self.cpu, AbstractLLCPU):
             py.test.skip("llgraph can't fake exceptions well enough, give up")
 
         ops = """
         [i0]
         i1 = int_force_ge_zero(i0)    # but forced to be in a register
-        finish(i1, descr=1)
+        finish(i1, descr=descr)
         """
+        descr = BasicFinalDescr()
         loop = parse(ops, self.cpu, namespace=locals())
-        descr = loop.operations[-1].getdescr()
         looptoken = JitCellToken()
         self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
         for inp, outp in [(2,2), (-3, 0)]:
             py.test.skip("pointless test on non-asm")
         from rpython.jit.backend.tool.viewcode import machine_code_dump
         import ctypes
+        targettoken = TargetToken()
         ops = """
         [i2]
         i0 = same_as(i2)    # but forced to be in a register
-        label(i0, descr=1)
+        label(i0, descr=targettoken)
         i1 = int_add(i0, i0)
-        guard_true(i1, descr=faildesr) [i1]
-        jump(i1, descr=1)
+        guard_true(i1, descr=faildescr) [i1]
+        jump(i1, descr=targettoken)
         """
         faildescr = BasicFailDescr(2)
         loop = parse(ops, self.cpu, namespace=locals())
-        faildescr = loop.operations[-2].getdescr()
-        jumpdescr = loop.operations[-1].getdescr()
         bridge_ops = """
         [i0]
-        jump(i0, descr=jumpdescr)
+        jump(i0, descr=targettoken)
         """
         bridge = parse(bridge_ops, self.cpu, namespace=locals())
         looptoken = JitCellToken()

File rpython/jit/backend/x86/assembler.py

 import sys
 import os
 
-from rpython.jit.backend.llsupport import symbolic, jitframe
+from rpython.jit.backend.llsupport import symbolic, jitframe, rewrite
 from rpython.jit.backend.llsupport.assembler import (GuardToken, BaseAssembler,
                                                 DEBUG_COUNTER, debug_bridge)
 from rpython.jit.backend.llsupport.asmmemmgr import MachineDataBlockWrapper
         self.float_const_neg_addr = 0
         self.float_const_abs_addr = 0
         self.malloc_slowpath = 0
+        self.malloc_slowpath_varsize = 0
         self.wb_slowpath = [0, 0, 0, 0, 0]
         self.setup_failure_recovery()
         self.datablockwrapper = None
         mc.RET()
         self._frame_realloc_slowpath = mc.materialize(self.cpu.asmmemmgr, [])
 
-    def _build_malloc_slowpath(self):
-        """ While arriving on slowpath, we have a gcpattern on stack,
-        nursery_head in eax and the size in edi - eax
+    def _build_malloc_slowpath(self, kind):
+        """ While arriving on slowpath, we have a gcpattern on stack 0.
+        The arguments are passed in eax and edi, as follows:
+
+        kind == 'fixed': nursery_head in eax and the size in edi - eax.
+
+        kind == 'str/unicode': length of the string to allocate in edi.
+
+        kind == 'var': length to allocate in edi, tid in eax,
+                       and itemsize in the stack 1 (position esp+WORD).
+
+        This function must preserve all registers apart from eax and edi.
         """
+        assert kind in ['fixed', 'str', 'unicode', 'var']
         mc = codebuf.MachineCodeBlockWrapper()
         self._push_all_regs_to_frame(mc, [eax, edi], self.cpu.supports_floats)
+        # store the gc pattern
         ofs = self.cpu.get_ofs_of_frame_field('jf_gcmap')
-        # store the gc pattern
         mc.MOV_rs(ecx.value, WORD)
         mc.MOV_br(ofs, ecx.value)
-        addr = self.cpu.gc_ll_descr.get_malloc_slowpath_addr()
-        mc.SUB_rr(edi.value, eax.value)       # compute the size we want
-        # the arg is already in edi
-        mc.SUB_ri(esp.value, 16 - WORD)
-        if IS_X86_32:
-            mc.MOV_sr(0, edi.value)
-            if hasattr(self.cpu.gc_ll_descr, 'passes_frame'):
-                mc.MOV_sr(WORD, ebp.value)
-        elif hasattr(self.cpu.gc_ll_descr, 'passes_frame'):
-            # for tests only
-            mc.MOV_rr(esi.value, ebp.value)
+        #
+        if kind == 'fixed':
+            addr = self.cpu.gc_ll_descr.get_malloc_slowpath_addr()
+        elif kind == 'str':
+            addr = self.cpu.gc_ll_descr.get_malloc_fn_addr('malloc_str')
+        elif kind == 'unicode':
+            addr = self.cpu.gc_ll_descr.get_malloc_fn_addr('malloc_unicode')
+        else:
+            addr = self.cpu.gc_ll_descr.get_malloc_slowpath_array_addr()
+        mc.SUB_ri(esp.value, 16 - WORD)  # restore 16-byte alignment
+        # magically, the above is enough on X86_32 to reserve 3 stack places
+        if kind == 'fixed':
+            mc.SUB_rr(edi.value, eax.value) # compute the size we want
+            # the arg is already in edi
+            if IS_X86_32:
+                mc.MOV_sr(0, edi.value)
+                if hasattr(self.cpu.gc_ll_descr, 'passes_frame'):
+                    mc.MOV_sr(WORD, ebp.value)
+            elif hasattr(self.cpu.gc_ll_descr, 'passes_frame'):
+                # for tests only
+                mc.MOV_rr(esi.value, ebp.value)
+        elif kind == 'str' or kind == 'unicode':
+            if IS_X86_32:
+                # stack layout: [---][---][---][ret].. with 3 free stack places
+                mc.MOV_sr(0, edi.value)     # store the length
+            else:
+                pass                        # length already in edi
+        else:
+            if IS_X86_32:
+                # stack layout: [---][---][---][ret][gcmap][itemsize]...
+                mc.MOV_sr(WORD * 2, edi.value)  # store the length
+                mc.MOV_sr(WORD * 1, eax.value)  # store the tid
+                mc.MOV_rs(edi.value, WORD * 5)  # load the itemsize
+                mc.MOV_sr(WORD * 0, edi.value)  # store the itemsize
+            else:
+                # stack layout: [---][ret][gcmap][itemsize]...
+                mc.MOV_rr(edx.value, edi.value) # length
+                mc.MOV_rr(esi.value, eax.value) # tid
+                mc.MOV_rs(edi.value, WORD * 3)  # load the itemsize
         self.set_extra_stack_depth(mc, 16)
         mc.CALL(imm(addr))
         mc.ADD_ri(esp.value, 16 - WORD)
         mc.JMP(imm(self.propagate_exception_path))
         #
         rawstart = mc.materialize(self.cpu.asmmemmgr, [])
-        self.malloc_slowpath = rawstart
+        return rawstart
 
     def _build_propagate_exception_path(self):
         if not self.cpu.propagate_exception_descr:
         self.mc.overwrite(jmp_adr-1, chr(offset))
         self.mc.MOV(heap(nursery_free_adr), edi)
 
-    def malloc_cond_varsize_small(self, nursery_free_adr, nursery_top_adr,
+    def malloc_cond_varsize_frame(self, nursery_free_adr, nursery_top_adr,
                                   sizeloc, gcmap):
-        self.mc.MOV(edi, heap(nursery_free_adr))
-        self.mc.MOV(eax, edi)
-        self.mc.ADD(edi, sizeloc)
+        if sizeloc is eax:
+            self.mc.MOV(edi, sizeloc)
+            sizeloc = edi
+        self.mc.MOV(eax, heap(nursery_free_adr))
+        if sizeloc is edi:
+            self.mc.ADD_rr(edi.value, eax.value)
+        else:
+            self.mc.LEA_ra(edi.value, (eax.value, sizeloc.value, 0, 0))
         self.mc.CMP(edi, heap(nursery_top_adr))
         self.mc.J_il8(rx86.Conditions['NA'], 0) # patched later
         jmp_adr = self.mc.get_relative_pos()
         self.mc.overwrite(jmp_adr-1, chr(offset))
         self.mc.MOV(heap(nursery_free_adr), edi)
 
+    def malloc_cond_varsize(self, kind, nursery_free_adr, nursery_top_adr,
+                            lengthloc, itemsize, maxlength, gcmap,
+                            arraydescr):
+        from rpython.jit.backend.llsupport.descr import ArrayDescr
+        assert isinstance(arraydescr, ArrayDescr)
+
+        # lengthloc is the length of the array, which we must not modify!
+        assert lengthloc is not eax and lengthloc is not edi
+        if isinstance(lengthloc, RegLoc):
+            varsizeloc = lengthloc
+        else:
+            self.mc.MOV(edi, lengthloc)
+            varsizeloc = edi
+
+        self.mc.CMP(varsizeloc, imm(maxlength))
+        self.mc.J_il8(rx86.Conditions['A'], 0) # patched later
+        jmp_adr0 = self.mc.get_relative_pos()
+
+        self.mc.MOV(eax, heap(nursery_free_adr))
+        shift = size2shift(itemsize)
+        if shift < 0:
+            self.mc.IMUL_rri(edi.value, varsizeloc.value, itemsize)
+            varsizeloc = edi
+            shift = 0
+        # now varsizeloc is a register != eax.  The size of
+        # the variable part of the array is (varsizeloc << shift)
+        assert arraydescr.basesize >= self.gc_minimal_size_in_nursery
+        constsize = arraydescr.basesize + self.gc_size_of_header
+        force_realignment = (itemsize % WORD) != 0
+        if force_realignment:
+            constsize += WORD - 1
+        self.mc.LEA_ra(edi.value, (eax.value, varsizeloc.value, shift,
+                                   constsize))
+        if force_realignment:
+            self.mc.AND_ri(edi.value, ~(WORD - 1))
+        # now edi contains the total size in bytes, rounded up to a multiple
+        # of WORD, plus nursery_free_adr
+        self.mc.CMP(edi, heap(nursery_top_adr))
+        self.mc.J_il8(rx86.Conditions['NA'], 0) # patched later
+        jmp_adr1 = self.mc.get_relative_pos()
+        #
+        offset = self.mc.get_relative_pos() - jmp_adr0
+        assert 0 < offset <= 127
+        self.mc.overwrite(jmp_adr0-1, chr(offset))
+        # save the gcmap
+        self.push_gcmap(self.mc, gcmap, mov=True)   # mov into RawEspLoc(0)
+        if kind == rewrite.FLAG_ARRAY:
+            self.mc.MOV_si(WORD, itemsize)
+            self.mc.MOV(edi, lengthloc)
+            self.mc.MOV_ri(eax.value, arraydescr.tid)
+            addr = self.malloc_slowpath_varsize
+        else:
+            if kind == rewrite.FLAG_STR:
+                addr = self.malloc_slowpath_str
+            else:
+                assert kind == rewrite.FLAG_UNICODE
+                addr = self.malloc_slowpath_unicode
+            self.mc.MOV(edi, lengthloc)
+        self.mc.CALL(imm(addr))
+        self.mc.JMP_l8(0)      # jump to done, patched later
+        jmp_location = self.mc.get_relative_pos()
+        #
+        offset = self.mc.get_relative_pos() - jmp_adr1
+        assert 0 < offset <= 127
+        self.mc.overwrite(jmp_adr1-1, chr(offset))
+        # write down the tid, but not if it's the result of the CALL
+        self.mc.MOV(mem(eax, 0), imm(arraydescr.tid))
+        # while we're at it, this line is not needed if we've done the CALL
+        self.mc.MOV(heap(nursery_free_adr), edi)
+        #
+        offset = self.mc.get_relative_pos() - jmp_location
+        assert 0 < offset <= 127
+        self.mc.overwrite(jmp_location - 1, chr(offset))
+
     def force_token(self, reg):
         # XXX kill me
         assert isinstance(reg, RegLoc)
     os.write(2, '[x86/asm] %s\n' % msg)
     raise NotImplementedError(msg)
 
+def size2shift(size):
+    "Return a result 0..3 such that (1<<result) == size, or -1 if impossible"
+    if size == 1: return 0
+    if size == 2: return 1
+    if size == 4: return 2
+    if size == 8: return 3
+    return -1
+
 class BridgeAlreadyCompiled(Exception):
     pass

File rpython/jit/backend/x86/regalloc.py

 class X86_64_RegisterManager(X86RegisterManager):
     # r11 omitted because it's used as scratch
     all_regs = [ecx, eax, edx, ebx, esi, edi, r8, r9, r10, r12, r13, r14, r15]
-    
+
     no_lower_byte_regs = []
     save_around_call_regs = [eax, ecx, edx, esi, edi, r8, r9, r10]
 
     def __init__(self, base_ofs):
         FrameManager.__init__(self)
         self.base_ofs = base_ofs
-    
+
     def frame_pos(self, i, box_type):
         return FrameLoc(i, get_ebp_ofs(self.base_ofs, i), box_type)
 
         # looking at the result
         self.rm.force_allocate_reg(op.result, selected_reg=eax)
         #
-        # We need edx as a temporary, but otherwise don't save any more
+        # We need edi as a temporary, but otherwise don't save any more
         # register.  See comments in _build_malloc_slowpath().
         tmp_box = TempBox()
         self.rm.force_allocate_reg(tmp_box, selected_reg=edi)
             gc_ll_descr.get_nursery_top_addr(),
             size, gcmap)
 
-    def consider_call_malloc_nursery_varsize_small(self, op):
+    def consider_call_malloc_nursery_varsize_frame(self, op):
         size_box = op.getarg(0)
         assert isinstance(size_box, BoxInt) # we cannot have a const here!
-        # looking at the result
+        # sizeloc must be in a register, but we can free it now
+        # (we take care explicitly of conflicts with eax or edi)
+        sizeloc = self.rm.make_sure_var_in_reg(size_box)
+        self.rm.possibly_free_var(size_box)
+        # the result will be in eax
         self.rm.force_allocate_reg(op.result, selected_reg=eax)
-        #
-        # We need edx as a temporary, but otherwise don't save any more
-        # register.  See comments in _build_malloc_slowpath().
+        # we need edi as a temporary
         tmp_box = TempBox()
         self.rm.force_allocate_reg(tmp_box, selected_reg=edi)
-        sizeloc = self.rm.make_sure_var_in_reg(size_box, [op.result, tmp_box])
         gcmap = self.get_gcmap([eax, edi]) # allocate the gcmap *before*
         self.rm.possibly_free_var(tmp_box)
         #
         gc_ll_descr = self.assembler.cpu.gc_ll_descr
-        self.assembler.malloc_cond_varsize_small(
+        self.assembler.malloc_cond_varsize_frame(
             gc_ll_descr.get_nursery_free_addr(),
             gc_ll_descr.get_nursery_top_addr(),
             sizeloc, gcmap)
 
+    def consider_call_malloc_nursery_varsize(self, op):
+        gc_ll_descr = self.assembler.cpu.gc_ll_descr
+        if not hasattr(gc_ll_descr, 'max_size_of_young_obj'):
+            raise Exception("unreachable code")
+            # for boehm, this function should never be called
+        arraydescr = op.getdescr()
+        length_box = op.getarg(2)
+        assert isinstance(length_box, BoxInt) # we cannot have a const here!
+        # the result will be in eax
+        self.rm.force_allocate_reg(op.result, selected_reg=eax)
+        # we need edi as a temporary
+        tmp_box = TempBox()
+        self.rm.force_allocate_reg(tmp_box, selected_reg=edi)
+        gcmap = self.get_gcmap([eax, edi]) # allocate the gcmap *before*
+        self.rm.possibly_free_var(tmp_box)
+        # length_box always survives: it's typically also present in the
+        # next operation that will copy it inside the new array.  It's
+        # fine to load it from the stack too, as long as it's != eax, edi.
+        lengthloc = self.rm.loc(length_box)
+        self.rm.possibly_free_var(length_box)
+        #
+        itemsize = op.getarg(1).getint()
+        maxlength = (gc_ll_descr.max_size_of_young_obj - WORD * 2) / itemsize
+        self.assembler.malloc_cond_varsize(
+            op.getarg(0).getint(),
+            gc_ll_descr.get_nursery_free_addr(),
+            gc_ll_descr.get_nursery_top_addr(),
+            lengthloc, itemsize, maxlength, gcmap, arraydescr)
+
     def get_gcmap(self, forbidden_regs=[], noregs=False):
         frame_depth = self.fm.get_frame_depth()
         gcmap = allocate_gcmap(self.assembler, frame_depth, JITFRAME_FIXED_SIZE)
         #jump_op = self.final_jump_op
         #if jump_op is not None and jump_op.getdescr() is descr:
         #    self._compute_hint_frame_locations_from_descr(descr)
-        
+
 
     def consider_keepalive(self, op):
         pass

File rpython/jit/backend/x86/runner.py

     with_threads = False
     frame_reg = regloc.ebp
 
+    can_inline_varsize_malloc = True
+
     from rpython.jit.backend.x86.arch import JITFRAME_FIXED_SIZE
     all_reg_indexes = gpr_reg_mgr_cls.all_reg_indexes
     gen_regs = gpr_reg_mgr_cls.all_regs

File rpython/jit/metainterp/executor.py

                          rop.QUASIIMMUT_FIELD,
                          rop.CALL_MALLOC_GC,
                          rop.CALL_MALLOC_NURSERY,
-                         rop.CALL_MALLOC_NURSERY_VARSIZE_SMALL,
+                         rop.CALL_MALLOC_NURSERY_VARSIZE,
+                         rop.CALL_MALLOC_NURSERY_VARSIZE_FRAME,
                          rop.LABEL,
                          ):      # list of opcodes never executed by pyjitpl
                 continue

File rpython/jit/metainterp/resoperation.py

     'CALL_PURE/*d',             # removed before it's passed to the backend
     'CALL_MALLOC_GC/*d',      # like CALL, but NULL => propagate MemoryError
     'CALL_MALLOC_NURSERY/1',  # nursery malloc, const number of bytes, zeroed
-    'CALL_MALLOC_NURSERY_VARSIZE_SMALL/1',
+    'CALL_MALLOC_NURSERY_VARSIZE/3d',
+    'CALL_MALLOC_NURSERY_VARSIZE_FRAME/1',
     # nursery malloc, non-const number of bytes, zeroed
     # note that the number of bytes must be well known to be small enough
     # to fulfill allocating in the nursery rules (and no card markings)

File rpython/jit/metainterp/test/test_logger.py

         [p0]
         setfield_gc(p0, 3, descr=somedescr)
         '''
-        Descr()
+        somedescr = Descr()
         self.reparse(inp, namespace=locals())
 
     def test_guard(self):

File rpython/jit/tool/oparser.py

                 tt = self.model.TargetToken(token)
                 self._consts[poss_descr] = tt
                 return tt
+            else:
+                raise
 
     def box_for_var(self, elem):
         try:

File rpython/memory/gctransform/framework.py

 
     def __init__(self, translator):
         from rpython.memory.gc.base import choose_gc_from_config
-        from rpython.memory.gc.base import ARRAY_TYPEID_MAP
-        from rpython.memory.gc import inspector
 
         super(BaseFrameworkGCTransformer, self).__init__(translator,
                                                          inline=True)
 
         classdef = bk.getuniqueclassdef(GCClass)
         s_gc = annmodel.SomeInstance(classdef)
+
+        self._declare_functions(GCClass, getfn, s_gc, s_typeid16)
+
+        # thread support
+        if translator.config.translation.continuation:
+            root_walker.stacklet_support = True
+            root_walker.need_stacklet_support(self, getfn)
+        if translator.config.translation.thread:
+            root_walker.need_thread_support(self, getfn)
+
+        self.layoutbuilder.encode_type_shapes_now()
+
+        annhelper.finish()   # at this point, annotate all mix-level helpers
+        annhelper.backend_optimize()
+
+        self.collect_analyzer = CollectAnalyzer(self.translator)
+        self.collect_analyzer.analyze_all()
+
+        s_gc = self.translator.annotator.bookkeeper.valueoftype(GCClass)
+        r_gc = self.translator.rtyper.getrepr(s_gc)
+        self.c_const_gc = rmodel.inputconst(r_gc, self.gcdata.gc)
+        s_gc_data = self.translator.annotator.bookkeeper.valueoftype(
+            gctypelayout.GCData)
+        r_gc_data = self.translator.rtyper.getrepr(s_gc_data)
+        self.c_const_gcdata = rmodel.inputconst(r_gc_data, self.gcdata)
+        self.malloc_zero_filled = GCClass.malloc_zero_filled
+
+        HDR = self.HDR = self.gcdata.gc.gcheaderbuilder.HDR
+
+        size_gc_header = self.gcdata.gc.gcheaderbuilder.size_gc_header
+        vtableinfo = (HDR, size_gc_header, self.gcdata.gc.typeid_is_in_field)
+        self.c_vtableinfo = rmodel.inputconst(lltype.Void, vtableinfo)
+        tig = self.layoutbuilder.type_info_group._as_ptr()
+        self.c_type_info_group = rmodel.inputconst(lltype.typeOf(tig), tig)
+        sko = llmemory.sizeof(gcdata.TYPE_INFO)
+        self.c_vtinfo_skip_offset = rmodel.inputconst(lltype.typeOf(sko), sko)
+
+
+    def _declare_functions(self, GCClass, getfn, s_gc, s_typeid16):
+        from rpython.memory.gc.base import ARRAY_TYPEID_MAP
+        from rpython.memory.gc import inspector
+
         s_gcref = annmodel.SomePtr(llmemory.GCREF)
+        gcdata = self.gcdata
+        translator = self.translator
 
         malloc_fixedsize_clear_meth = GCClass.malloc_fixedsize_clear.im_func
         self.malloc_fixedsize_clear_ptr = getfn(
                                                    [annmodel.SomeAddress()],
                                                    annmodel.s_None)
 
-        # thread support
-        if translator.config.translation.continuation:
-            root_walker.stacklet_support = True
-            root_walker.need_stacklet_support(self, getfn)
-        if translator.config.translation.thread:
-            root_walker.need_thread_support(self, getfn)
-
-        self.layoutbuilder.encode_type_shapes_now()
-
-        annhelper.finish()   # at this point, annotate all mix-level helpers
-        annhelper.backend_optimize()
-
-        self.collect_analyzer = CollectAnalyzer(self.translator)
-        self.collect_analyzer.analyze_all()
-
-        s_gc = self.translator.annotator.bookkeeper.valueoftype(GCClass)
-        r_gc = self.translator.rtyper.getrepr(s_gc)
-        self.c_const_gc = rmodel.inputconst(r_gc, self.gcdata.gc)
-        s_gc_data = self.translator.annotator.bookkeeper.valueoftype(
-            gctypelayout.GCData)
-        r_gc_data = self.translator.rtyper.getrepr(s_gc_data)
-        self.c_const_gcdata = rmodel.inputconst(r_gc_data, self.gcdata)
-        self.malloc_zero_filled = GCClass.malloc_zero_filled
-
-        HDR = self.HDR = self.gcdata.gc.gcheaderbuilder.HDR
-
-        size_gc_header = self.gcdata.gc.gcheaderbuilder.size_gc_header
-        vtableinfo = (HDR, size_gc_header, self.gcdata.gc.typeid_is_in_field)
-        self.c_vtableinfo = rmodel.inputconst(lltype.Void, vtableinfo)
-        tig = self.layoutbuilder.type_info_group._as_ptr()
-        self.c_type_info_group = rmodel.inputconst(lltype.typeOf(tig), tig)
-        sko = llmemory.sizeof(gcdata.TYPE_INFO)
-        self.c_vtinfo_skip_offset = rmodel.inputconst(lltype.typeOf(sko), sko)
 
     def consider_constant(self, TYPE, value):
         self.layoutbuilder.consider_constant(TYPE, value, self.gcdata.gc)

File rpython/rlib/clibffi.py

 
 from rpython.rtyper.tool import rffi_platform
 from rpython.rtyper.lltypesystem import lltype, rffi
+from rpython.rtyper.tool import rffi_platform
 from rpython.rlib.unroll import unrolling_iterable
 from rpython.rlib.rarithmetic import intmask, is_emulated_long
 from rpython.rlib.objectmodel import we_are_translated
 from rpython.translator.tool.cbuild import ExternalCompilationInfo
 from rpython.translator.platform import platform
 from rpython.conftest import cdir
+from platform import machine
 import py
 import os
 import sys
 _LITTLE_ENDIAN = sys.byteorder == 'little'
 _BIG_ENDIAN = sys.byteorder == 'big'
 
+_ARM = rffi_platform.getdefined('__arm__', '')
+
 if _WIN32:
     from rpython.rlib import rwin32
 
     if _WIN32 and not _WIN64:
         FFI_STDCALL = rffi_platform.ConstantInteger('FFI_STDCALL')
 
+    if _ARM:
+        FFI_SYSV = rffi_platform.ConstantInteger('FFI_SYSV')
+        FFI_VFP = rffi_platform.ConstantInteger('FFI_VFP')
+
     FFI_TYPE_STRUCT = rffi_platform.ConstantInteger('FFI_TYPE_STRUCT')
 
     size_t = rffi_platform.SimpleType("size_t", rffi.ULONG)
 FFI_DEFAULT_ABI = cConfig.FFI_DEFAULT_ABI
 if _WIN32 and not _WIN64:
     FFI_STDCALL = cConfig.FFI_STDCALL
+if _ARM:
+    FFI_SYSV = cConfig.FFI_SYSV
+    FFI_VFP = cConfig.FFI_VFP
 FFI_TYPE_STRUCT = cConfig.FFI_TYPE_STRUCT
 FFI_CIFP = lltype.Ptr(cConfig.ffi_cif)
 

File rpython/tool/logparser.py

         time = int(int(match.group(1), 16))
         time_decrase = time_decrase or time < lasttime
         lasttime = time
-        record(match.group(2), time=int(match.group(1), 16))
+        try:
+            record(match.group(2), time=int(match.group(1), 16))
+        except:
+            print "Line", i
+            raise
     if verbose:
         sys.stderr.write('loaded\n')
     if performance_log and time_decrase:

File rpython/translator/c/database.py

 
         self.instrument_ncounter = 0
 
-    def gettypedefnode(self, T, varlength=1):
-        if varlength <= 1:
-            varlength = 1   # it's C after all
+    def gettypedefnode(self, T, varlength=None):
+        if varlength is None:
             key = T
         else:
             key = T, varlength
             self.pendingsetupnodes.append(node)
         return node
 
-    def gettype(self, T, varlength=1, who_asks=None, argnames=[]):
+    def gettype(self, T, varlength=None, who_asks=None, argnames=[]):
         if isinstance(T, Primitive) or T == GCREF:
             return PrimitiveType[T]
         elif isinstance(T, Typedef):

File rpython/translator/c/node.py

     typetag = 'struct'
     extra_union_for_varlength = True
 
-    def __init__(self, db, STRUCT, varlength=1):
+    def __init__(self, db, STRUCT, varlength=None):
         NodeWithDependencies.__init__(self, db)
         self.STRUCT = STRUCT
         self.LLTYPE = STRUCT
         self.varlength = varlength
-        if varlength == 1:
+        if varlength is None:
             basename = STRUCT._name
             with_number = True
         else:
         self.fields = []
         db = self.db
         STRUCT = self.STRUCT
-        if self.varlength != 1:
+        if self.varlength is not None:
             self.normalizedtypename = db.gettype(STRUCT, who_asks=self)
         if needs_gcheader(self.STRUCT):
             HDR = db.gcpolicy.struct_gcheader_definition(self)
                 rtti = getRuntimeTypeInfo(STRUCT)
             except ValueError:
                 pass
-        if self.varlength == 1:
+        if self.varlength is None:
             self.db.gcpolicy.struct_setup(self, rtti)
         return self.gcinfo
     gcinfo = defaultproperty(computegcinfo)
             if typename == PrimitiveType[Void]:
                 line = '/* %s */' % line
             else:
+                if is_empty and typename.endswith('[RPY_VARLENGTH]'):
+                    yield '\tRPY_DUMMY_VARLENGTH'
                 is_empty = False
             yield '\t' + line
         if is_empty:
             yield '\t' + 'char _dummy; /* this struct is empty */'
         yield '};'
-        if self.varlength != 1:
+        if self.varlength is not None:
             assert self.typetag == 'struct'
             yield 'union %su {' % self.name
             yield '  struct %s a;' % self.name
 
     def debug_offsets(self):
         # generate number exprs giving the offset of the elements in the struct
-        assert self.varlength == 1
+        assert self.varlength is None
         for name in self.fieldnames:
             FIELD_T = self.c_struct_field_type(name)
             if FIELD_T is Void:
                     yield 'offsetof(%s %s, %s)' % (self.typetag,
                                                    self.name, cname)
 
+def deflength(varlength):
+    if varlength is None:
+        return 'RPY_VARLENGTH'
+    elif varlength == 0:
+        return 'RPY_LENGTH0'
+    else:
+        return varlength
 
 class ArrayDefNode(NodeWithDependencies):
     typetag = 'struct'
     extra_union_for_varlength = True
 
-    def __init__(self, db, ARRAY, varlength=1):
+    def __init__(self, db, ARRAY, varlength=None):
         NodeWithDependencies.__init__(self, db)
         self.ARRAY = ARRAY
         self.LLTYPE = ARRAY
         self.gcfields = []
         self.varlength = varlength
-        if varlength == 1:
+        if varlength is None:
             basename = 'array'
             with_number = True
         else:
         db = self.db
         ARRAY = self.ARRAY
         self.gcinfo    # force it to be computed
-        if self.varlength != 1:
+        if self.varlength is not None:
             self.normalizedtypename = db.gettype(ARRAY, who_asks=self)
         if needs_gcheader(ARRAY):
             HDR = db.gcpolicy.array_gcheader_definition(self)
     def computegcinfo(self):
         # let the gcpolicy do its own setup
         self.gcinfo = None   # unless overwritten below
-        if self.varlength == 1:
+        if self.varlength is None:
             self.db.gcpolicy.array_setup(self)
         return self.gcinfo
     gcinfo = defaultproperty(computegcinfo)
             yield '\t' + cdecl(typename, fname) + ';'
         if not self.ARRAY._hints.get('nolength', False):
             yield '\tlong length;'
-        line = '%s;' % cdecl(self.itemtypename, 'items[%d]'% self.varlength)
+        line = '%s;' % cdecl(self.itemtypename,
+                             'items[%s]' % deflength(self.varlength))
         if self.ARRAY.OF is Void:    # strange
             line = '/* array of void */'
             if self.ARRAY._hints.get('nolength', False):
                 line = 'char _dummy; ' + line
         yield '\t' + line
         yield '};'
-        if self.varlength != 1:
+        if self.varlength is not None:
             yield 'union %su {' % self.name
             yield '  struct %s a;' % self.name
             yield '  %s;' % cdecl(self.normalizedtypename, 'b')
             yield '};'
 
     def visitor_lines(self, prefix, on_item):
-        assert self.varlength == 1
+        assert self.varlength is None
         ARRAY = self.ARRAY
         # we need a unique name for this C variable, or at least one that does
         # not collide with the expression in 'prefix'
 
     def debug_offsets(self):
         # generate three offsets for debugging inspection
-        assert self.varlength == 1
+        assert self.varlength is None
         if not self.ARRAY._hints.get('nolength', False):
             yield 'offsetof(struct %s, length)' % (self.name,)
         else:
     forward_decl = None
     extra_union_for_varlength = False
 
-    def __init__(self, db, ARRAY, varlength=1):
+    def __init__(self, db, ARRAY, varlength=None):
         NodeWithDependencies.__init__(self, db)
         self.ARRAY = ARRAY
         self.LLTYPE = ARRAY
         # There is no such thing as an array of voids:
         # we use a an array of chars instead; only the pointer can be void*.
         self.itemtypename = db.gettype(contained_type, who_asks=self)
-        self.fulltypename = self.itemtypename.replace('@', '(@)[%d]' %
-                                                      (self.varlength,))
+        self.fulltypename = self.itemtypename.replace('@', '(@)[%s]' %
+                                                      deflength(varlength))
         if ARRAY._hints.get("render_as_void"):
             self.fullptrtypename = 'void *@'
         else:
         Node.__init__(self, db)
         self.obj = obj
         self.typename = db.gettype(T)  #, who_asks=self)
-        self.implementationtypename = db.gettype(T, varlength=self.getlength())
+        self.implementationtypename = db.gettype(
+            T, varlength=self.getvarlength())
         parent, parentindex = parentlink(obj)
         if obj in exports.EXPORTS_obj2name:
             self.name = exports.EXPORTS_obj2name[obj]
     def startupcode(self):
         return []
 
-    def getlength(self):
-        return 1
+    def getvarlength(self):
+        return None
 
 assert not USESLOTS or '__dict__' not in dir(ContainerNode)
 
         for name in T._names:
             yield getattr(self.obj, name)
 
-    def getlength(self):
+    def getvarlength(self):
         T = self.getTYPE()
         if T._arrayfld is None:
-            return 1
+            return None
         else:
             array = getattr(self.obj, T._arrayfld)
             return len(array.items)
     def enum_dependencies(self):
         return self.obj.items
 
-    def getlength(self):
+    def getvarlength(self):
         return len(self.obj.items)
 
     def initializationexpr(self, decoration=''):
         for i in range(self.obj.getlength()):
             yield self.obj.getitem(i)
 
-    def getlength(self):
-        return 1    # not variable-sized!
+    def getvarlength(self):
+        return None    # not variable-sized!
 
     def initializationexpr(self, decoration=''):
         T = self.getTYPE()

File rpython/translator/c/src/g_prerequisite.h

 
 #ifdef __GNUC__       /* other platforms too, probably */
 typedef _Bool bool_t;
+# define RPY_VARLENGTH   /* nothing: [RPY_VARLENGTH] => [] */
+# define RPY_LENGTH0     0       /* array decl [0] are ok  */
+# define RPY_DUMMY_VARLENGTH     char _dummy[0];
 #else
 typedef unsigned char bool_t;
+# define RPY_VARLENGTH   1       /* [RPY_VARLENGTH] => [1] */
+# define RPY_LENGTH0     1       /* array decl [0] are bad */
+# define RPY_DUMMY_VARLENGTH     /* nothing */
 #endif
 
 

File rpython/translator/c/src/support.h

 /************************************************************/
  /***  C header subsection: support functions              ***/
 
-/* a temporary(?) workaround for GCC 4.8.  See:
-    http://stackoverflow.com/questions/16016627/
-*/
-#ifdef __GNUC__
-# if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)
-#  pragma GCC optimize("no-aggressive-loop-optimizations")
-# endif
-#endif
-
-
 #define RUNNING_ON_LLINTERP	0
 #define OP_JIT_RECORD_KNOWN_CLASS(i, c, r)  /* nothing */
 

File rpython/translator/c/test/test_lltyped.py