Commits

Armin Rigo committed afec696

Merge the branch/virtual-forcing so far, introducing proper forcing of
virtualizables followed by a guard failure when we eventually return to
assembler. This also changes the front-end so that it always aborts a
trace after the virtualizable was forced.

The changes are not very ootype-friendly. The CLI tests are disabled
for now.

Fix the write analyzer to give the correct answer even when calling
external functions that may call back.

svn merge -r69626:69792 svn+ssh://codespeak.net/svn/pypy/branch/virtual-forcing .

Comments (0)

Files changed (51)

pypy/annotation/description.py

             graph.name = alt_name
         return graph
 
+    def getgraphs(self):
+        return self._cache.values()
+
     def getuniquegraph(self):
         if len(self._cache) != 1:
             raise NoStandardGraph(self)

pypy/jit/backend/cli/test/conftest.py

+import py
+
+class Directory(py.test.collect.Directory):
+    def collect(self):
+        py.test.skip("CLI backend tests skipped for now")

pypy/jit/backend/llgraph/llimpl.py

 _TO_OPAQUE = {}
 
 def _to_opaque(value):
-    return lltype.opaqueptr(_TO_OPAQUE[value.__class__], 'opaque',
-                            externalobj=value)
+    try:
+        return value._the_opaque_pointer
+    except AttributeError:
+        op = lltype.opaqueptr(_TO_OPAQUE[value.__class__], 'opaque',
+                              externalobj=value)
+        value._the_opaque_pointer = op
+        return op
 
 def from_opaque_string(s):
     if isinstance(s, str):
     'unicodesetitem'  : (('ref', 'int', 'int'), 'int'),
     'cast_ptr_to_int' : (('ref',), 'int'),
     'debug_merge_point': (('ref',), None),
+    'force_token'     : ((), 'int'),
+    'call_may_force'  : (('int', 'varargs'), 'intorptr'),
+    'guard_not_forced': ((), None)
     #'getitem'         : (('void', 'ref', 'int'), 'int'),
     #'setitem'         : (('void', 'ref', 'int', 'int'), None),
     #'newlist'         : (('void', 'varargs'), 'ref'),
     def __init__(self, memocast):
         self.verbose = False
         self.memocast = memocast
+        self.opindex = 1
+        self._forced = False
+        self._may_force = -1
 
     def getenv(self, v):
         if isinstance(v, Constant):
         else:
             return self.env[v]
 
+    def _populate_fail_args(self, op, skip=None):
+        fail_args = []
+        if op.fail_args:
+            for fail_arg in op.fail_args:
+                if fail_arg is None:
+                    fail_args.append(None)
+                elif fail_arg is skip:
+                    fail_args.append(fail_arg.concretetype._defl())
+                else:
+                    fail_args.append(self.getenv(fail_arg))
+        self.fail_args = fail_args
+        self.fail_index = op.fail_index
+
     def execute(self):
         """Execute all operations in a loop,
         possibly following to other loops as well.
         operations = self.loop.operations
         opindex = 0
         while True:
+            self.opindex = opindex
             op = operations[opindex]
             args = [self.getenv(v) for v in op.args]
             if not op.is_final():
                         opindex = 0
                         continue
                     else:
-                        fail_args = []
-                        if op.fail_args:
-                            for fail_arg in op.fail_args:
-                                if fail_arg is None:
-                                    fail_args.append(None)
-                                else:
-                                    fail_args.append(self.getenv(fail_arg))
+                        self._populate_fail_args(op)
                         # a non-patched guard
                         if self.verbose:
                             log.trace('failed: %s' % (
                                 ', '.join(map(str, fail_args)),))
-                        self.fail_args = fail_args
                         return op.fail_index
                 #verbose = self.verbose
                 assert (result is None) == (op.result is None)
             if op.opnum == rop.JUMP:
                 assert len(op.jump_target.inputargs) == len(args)
                 self.env = dict(zip(op.jump_target.inputargs, args))
-                operations = op.jump_target.operations
+                self.loop = op.jump_target
+                operations = self.loop.operations
                 opindex = 0
                 _stats.exec_jumps += 1
             elif op.opnum == rop.FINISH:
         try:
             res = ophandler(self, descr, *values)
         finally:
-            if verbose:
+            if 0:     # if verbose:
                 argtypes, restype = TYPES[opname]
                 if res is None:
                     resdata = ''
                 else:
                     resdata = '-> ' + repr1(res, restype, self.memocast)
                 # fish the types
-                #log.cpu('\t%s %s %s' % (opname, repr_list(values, argtypes,
-                #                                          self.memocast),
-                #                        resdata))
+                log.cpu('\t%s %s %s' % (opname, repr_list(values, argtypes,
+                                                          self.memocast),
+                                        resdata))
         return res
 
     def as_int(self, x):
     def op_uint_xor(self, descr, arg1, arg2):
         return arg1 ^ arg2
 
+    def op_force_token(self, descr):
+        opaque_frame = _to_opaque(self)
+        return llmemory.cast_ptr_to_adr(opaque_frame)
+
+    def op_call_may_force(self, calldescr, func, *args):
+        assert not self._forced
+        self._may_force = self.opindex
+        try:
+            return self.op_call(calldescr, func, *args)
+        finally:
+            self._may_force = -1
+
+    def op_guard_not_forced(self, descr):
+        forced = self._forced
+        self._forced = False
+        if forced:
+            raise GuardFailed
+
 
 class OOFrame(Frame):
 
     return lltype.cast_opaque_ptr(llmemory.GCREF,
                                   _get_error(ZeroDivisionError).args[1])
 
+def force(opaque_frame):
+    frame = _from_opaque(opaque_frame)
+    assert not frame._forced
+    frame._forced = True
+    assert frame._may_force >= 0
+    call_op = frame.loop.operations[frame._may_force]
+    guard_op = frame.loop.operations[frame._may_force+1]
+    assert call_op.opnum == rop.CALL_MAY_FORCE
+    frame._populate_fail_args(guard_op, skip=call_op.result)
+    return frame.fail_index
+
+def get_forced_token_frame(force_token):
+    opaque_frame = llmemory.cast_adr_to_ptr(force_token,
+                                            lltype.Ptr(_TO_OPAQUE[Frame]))
+    return opaque_frame
+
+def get_frame_forced_token(opaque_frame):
+    return llmemory.cast_ptr_to_adr(opaque_frame)
+
 class MemoCast(object):
     def __init__(self):
         self.addresses = [llmemory.NULL]
 setannotation(get_overflow_error_value, annmodel.SomePtr(llmemory.GCREF))
 setannotation(get_zero_division_error, annmodel.SomeAddress())
 setannotation(get_zero_division_error_value, annmodel.SomePtr(llmemory.GCREF))
+setannotation(force, annmodel.SomeInteger())
+setannotation(get_forced_token_frame, s_Frame)
+setannotation(get_frame_forced_token, annmodel.SomeAddress())
 
 setannotation(new_memo_cast, s_MemoCast)
 setannotation(cast_adr_to_int, annmodel.SomeInteger())

pypy/jit/backend/llgraph/runner.py

 
 import sys
 from pypy.rlib.unroll import unrolling_iterable
+from pypy.rlib.objectmodel import we_are_translated
 from pypy.rpython.lltypesystem import lltype, llmemory, rclass
 from pypy.rpython.ootypesystem import ootype
 from pypy.rpython.llinterp import LLInterpreter
 
 
 class Descr(history.AbstractDescr):
-    name = None
-    ofs = -1
-    typeinfo = '?'
-    
-    def __init__(self, ofs, typeinfo='?', extrainfo=None):
+
+    def __init__(self, ofs, typeinfo, extrainfo=None, name=None):
         self.ofs = ofs
         self.typeinfo = typeinfo
         self.extrainfo = extrainfo
+        self.name = name
 
     def get_extra_info(self):
         return self.extrainfo
 
-    def __hash__(self):
-        return hash((self.ofs, self.typeinfo))
-
-    def __eq__(self, other):
-        if not isinstance(other, Descr):
-            return NotImplemented
-        return self.ofs == other.ofs and self.typeinfo == other.typeinfo
-
-    def __ne__(self, other):
-        if not isinstance(other, Descr):
-            return NotImplemented
-        return self.ofs != other.ofs or self.typeinfo != other.typeinfo
-
     def sort_key(self):
         return self.ofs
 
         raise TypeError("cannot use comparison on Descrs")
 
     def __repr__(self):
+        args = [repr(self.ofs), repr(self.typeinfo)]
         if self.name is not None:
-            return '<Descr %r, %r, %r>' % (self.ofs, self.typeinfo, self.name)
-        return '<Descr %r, %r>' % (self.ofs, self.typeinfo)
+            args.append(repr(self.name))
+        if self.extrainfo is not None:
+            args.append('E')
+        return '<Descr %r>' % (', '.join(args),)
 
 
 history.TreeLoop._compiled_version = lltype.nullptr(llimpl.COMPILEDLOOP.TO)
         llimpl._stats = self.stats
         llimpl._llinterp = LLInterpreter(self.rtyper)
         self._future_values = []
+        self._descrs = {}
 
     def _freeze_(self):
         assert self.translate_support_code
         return False
 
+    def getdescr(self, ofs, typeinfo='?', extrainfo=None, name=None):
+        key = (ofs, typeinfo, extrainfo, name)
+        try:
+            return self._descrs[key]
+        except KeyError:
+            descr = Descr(ofs, typeinfo, extrainfo, name)
+            self._descrs[key] = descr
+            return descr
+
     def set_class_sizes(self, class_sizes):
         self.class_sizes = class_sizes
         for vtable, size in class_sizes.items():
     def get_latest_value_float(self, index):
         return llimpl.frame_float_getvalue(self.latest_frame, index)
 
+    def get_latest_force_token(self):
+        token = llimpl.get_frame_forced_token(self.latest_frame)
+        return self.cast_adr_to_int(token)
+
     # ----------
 
     def get_exception(self):
         return (self.cast_adr_to_int(llimpl.get_zero_division_error()),
                 llimpl.get_zero_division_error_value())
 
-    @staticmethod
-    def sizeof(S):
+    def sizeof(self, S):
         assert not isinstance(S, lltype.Ptr)
-        return Descr(symbolic.get_size(S))
-
-    @staticmethod
-    def numof(S):
-        return 4
-
-    ##addresssuffix = '4'
+        return self.getdescr(symbolic.get_size(S))
 
     def cast_adr_to_int(self, adr):
         return llimpl.cast_adr_to_int(self.memo_cast, adr)
         BaseCPU.__init__(self, *args, **kwds)
         self.fielddescrof_vtable = self.fielddescrof(rclass.OBJECT, 'typeptr')
         
-    @staticmethod
-    def fielddescrof(S, fieldname):
+    def fielddescrof(self, S, fieldname):
         ofs, size = symbolic.get_field_token(S, fieldname)
         token = history.getkind(getattr(S, fieldname))
-        res = Descr(ofs, token[0])
-        res.name = fieldname
-        return res
+        return self.getdescr(ofs, token[0], name=fieldname)
 
-    @staticmethod
-    def calldescrof(FUNC, ARGS, RESULT, extrainfo=None):
+    def calldescrof(self, FUNC, ARGS, RESULT, extrainfo=None):
         token = history.getkind(RESULT)
-        return Descr(0, token[0], extrainfo=extrainfo)
+        return self.getdescr(0, token[0], extrainfo=extrainfo)
 
     def get_exception(self):
         return self.cast_adr_to_int(llimpl.get_exception())
     def get_exc_value(self):
         return llimpl.get_exc_value()
 
-    @staticmethod
-    def arraydescrof(A):
+    def arraydescrof(self, A):
         assert isinstance(A, lltype.GcArray)
         assert A.OF != lltype.Void
         size = symbolic.get_size(A)
         token = history.getkind(A.OF)
-        return Descr(size, token[0])
+        return self.getdescr(size, token[0])
 
     # ---------- the backend-dependent operations ----------
 
         return history.BoxInt(llimpl.cast_to_int(ptrbox.getref_base(),
                                                         self.memo_cast))
 
+    def force(self, force_token):
+        token = self.cast_int_to_adr(force_token)
+        frame = llimpl.get_forced_token_frame(token)
+        fail_index = llimpl.force(frame)
+        self.latest_frame = frame
+        return self.get_fail_descr_from_number(fail_index)
+
+
 class OOtypeCPU(BaseCPU):
     is_oo = True
     ts = oohelper

pypy/jit/backend/llgraph/test/test_llgraph.py

 from pypy.jit.backend.test.runner_test import LLtypeBackendTest
 
 class TestLLTypeLLGraph(LLtypeBackendTest):
+    # for individual tests see:
+    # ====> ../../test/runner_test.py
+    
     from pypy.jit.backend.llgraph.runner import LLtypeCPU as cpu_type
 
     def setup_method(self, _):

pypy/jit/backend/llsupport/regalloc.py

             self.assembler.regalloc_mov(reg, to)
         # otherwise it's clean
 
-    def before_call(self, force_store=[]):
+    def before_call(self, force_store=[], save_all_regs=False):
         """ Spill registers before a call, as described by
         'self.save_around_call_regs'.  Registers are not spilled if
         they don't survive past the current operation, unless they
                 del self.reg_bindings[v]
                 self.free_regs.append(reg)
                 continue
-            if reg not in self.save_around_call_regs:
-                # we don't need to
+            if not save_all_regs and reg not in self.save_around_call_regs:
+                # we don't have to
                 continue
             self._sync_var(v)
             del self.reg_bindings[v]
         """ Adjust registers according to the result of the call,
         which is in variable v.
         """
-        if v is not None:
-            self._check_type(v)
-            r = self.call_result_location(v)
-            self.reg_bindings[v] = r
-            self.free_regs = [fr for fr in self.free_regs if fr is not r]
-    
+        self._check_type(v)
+        r = self.call_result_location(v)
+        self.reg_bindings[v] = r
+        self.free_regs = [fr for fr in self.free_regs if fr is not r]
+        return r
+
     # abstract methods, override
 
     def convert_to_imm(self, c):

pypy/jit/backend/llsupport/test/test_regalloc.py

         assert len(rm.reg_bindings) == 3
         rm._check_invariants()
 
+    def test_call_support_save_all_regs(self):
+        class XRegisterManager(RegisterManager):
+            save_around_call_regs = [r1, r2]
+
+            def call_result_location(self, v):
+                return r1
+
+        sm = TStackManager()
+        asm = MockAsm()
+        boxes, longevity = boxes_and_longevity(5)
+        rm = XRegisterManager(longevity, stack_manager=sm,
+                              assembler=asm)
+        for b in boxes[:-1]:
+            rm.force_allocate_reg(b)
+        rm.before_call(save_all_regs=True)
+        assert len(rm.reg_bindings) == 0
+        assert sm.stack_depth == 4
+        assert len(asm.moves) == 4
+        rm._check_invariants()
+        rm.after_call(boxes[-1])
+        assert len(rm.reg_bindings) == 1
+        rm._check_invariants()
+        
+
     def test_different_stack_width(self):
         class XRegisterManager(RegisterManager):
             reg_width = 2

pypy/jit/backend/model.py

         or from 'args' if it was a FINISH).  Returns a ptr or an obj."""
         raise NotImplementedError
 
+    def get_latest_force_token(self):
+        """After a GUARD_NOT_FORCED fails, this function returns the
+        same FORCE_TOKEN result as the one in the just-failed loop."""
+        raise NotImplementedError
+
     def get_exception(self):
         raise NotImplementedError
 
     def do_cast_ptr_to_int(self, ptrbox):
         raise NotImplementedError
 
+    def do_force_token(self):
+        # this should not be implemented at all by the backends
+        raise NotImplementedError
+
+    def do_call_may_force(self, args, calldescr):
+        return self.do_call(args, calldescr)
+
+    def force(self, force_token):
+        raise NotImplementedError
+
     # ootype specific operations
     # --------------------------
 

pypy/jit/backend/test/runner_test.py

             else:
                 assert record == []
 
+    def test_force_operations_returning_void(self):
+        values = []
+        def maybe_force(token, flag):
+            if flag:
+                descr = self.cpu.force(token)
+                values.append(descr)
+                values.append(self.cpu.get_latest_value_int(0))
+                values.append(self.cpu.get_latest_value_int(1))
+
+        FUNC = self.FuncType([lltype.Signed, lltype.Signed], lltype.Void)
+        func_ptr = llhelper(lltype.Ptr(FUNC), maybe_force)
+        funcbox = self.get_funcbox(self.cpu, func_ptr).constbox()
+        calldescr = self.cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
+        cpu = self.cpu
+        i0 = BoxInt()
+        i1 = BoxInt()
+        tok = BoxInt()
+        faildescr = BasicFailDescr(1)
+        ops = [
+        ResOperation(rop.FORCE_TOKEN, [], tok),
+        ResOperation(rop.CALL_MAY_FORCE, [funcbox, tok, i1], None,
+                     descr=calldescr),
+        ResOperation(rop.GUARD_NOT_FORCED, [], None, descr=faildescr),
+        ResOperation(rop.FINISH, [i0], None, descr=BasicFailDescr(0))
+        ]
+        ops[2].fail_args = [i1, i0]
+        looptoken = LoopToken()
+        self.cpu.compile_loop([i0, i1], ops, looptoken)
+        self.cpu.set_future_value_int(0, 20)
+        self.cpu.set_future_value_int(1, 0)
+        fail = self.cpu.execute_token(looptoken)
+        assert fail.identifier == 0
+        assert self.cpu.get_latest_value_int(0) == 20
+        assert values == []
+
+        self.cpu.set_future_value_int(0, 10)
+        self.cpu.set_future_value_int(1, 1)
+        fail = self.cpu.execute_token(looptoken)
+        assert fail.identifier == 1
+        assert self.cpu.get_latest_value_int(0) == 1
+        assert self.cpu.get_latest_value_int(1) == 10
+        assert values == [faildescr, 1, 10]
+
+    def test_force_operations_returning_int(self):
+        values = []
+        def maybe_force(token, flag):
+            if flag:
+               self.cpu.force(token)
+               values.append(self.cpu.get_latest_value_int(0))
+               values.append(self.cpu.get_latest_value_int(2))
+            return 42
+
+        FUNC = self.FuncType([lltype.Signed, lltype.Signed], lltype.Signed)
+        func_ptr = llhelper(lltype.Ptr(FUNC), maybe_force)
+        funcbox = self.get_funcbox(self.cpu, func_ptr).constbox()
+        calldescr = self.cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
+        cpu = self.cpu
+        i0 = BoxInt()
+        i1 = BoxInt()
+        i2 = BoxInt()
+        tok = BoxInt()
+        faildescr = BasicFailDescr(1)
+        ops = [
+        ResOperation(rop.FORCE_TOKEN, [], tok),
+        ResOperation(rop.CALL_MAY_FORCE, [funcbox, tok, i1], i2,
+                     descr=calldescr),
+        ResOperation(rop.GUARD_NOT_FORCED, [], None, descr=faildescr),
+        ResOperation(rop.FINISH, [i2], None, descr=BasicFailDescr(0))
+        ]
+        ops[2].fail_args = [i1, i2, i0]
+        looptoken = LoopToken()
+        self.cpu.compile_loop([i0, i1], ops, looptoken)
+        self.cpu.set_future_value_int(0, 20)
+        self.cpu.set_future_value_int(1, 0)
+        fail = self.cpu.execute_token(looptoken)
+        assert fail.identifier == 0
+        assert self.cpu.get_latest_value_int(0) == 42
+        assert values == []
+
+        self.cpu.set_future_value_int(0, 10)
+        self.cpu.set_future_value_int(1, 1)
+        fail = self.cpu.execute_token(looptoken)
+        assert fail.identifier == 1
+        assert self.cpu.get_latest_value_int(0) == 1
+        assert self.cpu.get_latest_value_int(1) == 42
+        assert self.cpu.get_latest_value_int(2) == 10
+        assert values == [1, 10]
+
+    def test_force_operations_returning_float(self):
+        values = []
+        def maybe_force(token, flag):
+            if flag:
+               self.cpu.force(token)
+               values.append(self.cpu.get_latest_value_int(0))
+               values.append(self.cpu.get_latest_value_int(2))
+            return 42.5
+
+        FUNC = self.FuncType([lltype.Signed, lltype.Signed], lltype.Float)
+        func_ptr = llhelper(lltype.Ptr(FUNC), maybe_force)
+        funcbox = self.get_funcbox(self.cpu, func_ptr).constbox()
+        calldescr = self.cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
+        cpu = self.cpu
+        i0 = BoxInt()
+        i1 = BoxInt()
+        f2 = BoxFloat()
+        tok = BoxInt()
+        faildescr = BasicFailDescr(1)
+        ops = [
+        ResOperation(rop.FORCE_TOKEN, [], tok),
+        ResOperation(rop.CALL_MAY_FORCE, [funcbox, tok, i1], f2,
+                     descr=calldescr),
+        ResOperation(rop.GUARD_NOT_FORCED, [], None, descr=faildescr),
+        ResOperation(rop.FINISH, [f2], None, descr=BasicFailDescr(0))
+        ]
+        ops[2].fail_args = [i1, f2, i0]
+        looptoken = LoopToken()
+        self.cpu.compile_loop([i0, i1], ops, looptoken)
+        self.cpu.set_future_value_int(0, 20)
+        self.cpu.set_future_value_int(1, 0)
+        fail = self.cpu.execute_token(looptoken)
+        assert fail.identifier == 0
+        assert self.cpu.get_latest_value_float(0) == 42.5
+        assert values == []
+
+        self.cpu.set_future_value_int(0, 10)
+        self.cpu.set_future_value_int(1, 1)
+        fail = self.cpu.execute_token(looptoken)
+        assert fail.identifier == 1
+        assert self.cpu.get_latest_value_int(0) == 1
+        assert self.cpu.get_latest_value_float(1) == 42.5
+        assert self.cpu.get_latest_value_int(2) == 10
+        assert values == [1, 10]
+
     # pure do_ / descr features
 
     def test_do_operations(self):

pypy/jit/backend/x86/assembler.py

 from pypy.rpython.annlowlevel import llhelper
 from pypy.tool.uid import fixid
 from pypy.jit.backend.x86.regalloc import RegAlloc, WORD, lower_byte,\
-     X86RegisterManager, X86XMMRegisterManager, get_ebp_ofs
+     X86RegisterManager, X86XMMRegisterManager, get_ebp_ofs, FRAME_FIXED_SIZE,\
+     FORCE_INDEX_OFS
 from pypy.rlib.objectmodel import we_are_translated, specialize
 from pypy.jit.backend.x86 import codebuf
 from pypy.jit.backend.x86.ri386 import *
 # our calling convention - we pass first 6 args in registers
 # and the rest stays on the stack
 
-RET_BP = 5 # ret ip + bp + bx + esi + edi = 5 words
-
 if sys.platform == 'darwin':
     # darwin requires the stack to be 16 bytes aligned on calls
     CALL_ALIGN = 4
         self.fail_boxes_int = NonmovableGrowableArraySigned()
         self.fail_boxes_ptr = NonmovableGrowableArrayGCREF()
         self.fail_boxes_float = NonmovableGrowableArrayFloat()
+        self.fail_ebp = 0
         self.setup_failure_recovery()
 
     def leave_jitted_hook(self):
         # patch stack adjustment LEA
         # possibly align, e.g. for Mac OS X        
         mc = codebuf.InMemoryCodeBuilder(adr_lea, adr_lea + 4)
-        mc.write(packimm32(-(stack_depth + RET_BP - 2) * WORD))
+        # Compute the correct offset for the instruction LEA ESP, [EBP-4*words].
+        # Given that [EBP] is where we saved EBP, i.e. in the last word
+        # of our fixed frame, then the 'words' value is:
+        words = (FRAME_FIXED_SIZE - 1) + stack_depth
+        mc.write(packimm32(-WORD * words))
         mc.done()
 
     def _assemble_bootstrap_code(self, inputargs, arglocs):
         self.mc.PUSH(ebx)
         self.mc.PUSH(esi)
         self.mc.PUSH(edi)
-        # NB. exactly 4 pushes above; if this changes, fix stack_pos().
-        # You must also keep _get_callshape() in sync.
+        # NB. the shape of the frame is hard-coded in get_basic_shape() too.
+        # Also, make sure this is consistent with FRAME_FIXED_SIZE.
         adr_stackadjust = self._patchable_stackadjust()
         tmp = X86RegisterManager.all_regs[0]
         xmmtmp = X86XMMRegisterManager.all_regs[0]
 
     regalloc_mov = mov # legacy interface
 
-    def regalloc_fstp(self, loc):
-        self.mc.FSTP(loc)
-
     def regalloc_push(self, loc):
         if isinstance(loc, XMMREG):
             self.mc.SUB(esp, imm(2*WORD))
     def implement_guard_recovery(self, guard_opnum, faildescr, failargs,
                                                                fail_locs):
         exc = (guard_opnum == rop.GUARD_EXCEPTION or
-               guard_opnum == rop.GUARD_NO_EXCEPTION)
+               guard_opnum == rop.GUARD_NO_EXCEPTION or
+               guard_opnum == rop.GUARD_NOT_FORCED)
         return self.generate_quick_failure(faildescr, failargs, fail_locs, exc)
 
     def generate_quick_failure(self, faildescr, failargs, fail_locs, exc):
             arglocs.append(loc)
         return arglocs[:]
 
+    def grab_frame_values(self, bytecode, frame_addr, allregisters):
+        # no malloc allowed here!!
+        self.fail_ebp = allregisters[16 + ebp.op]
+        num = 0
+        value_hi = 0
+        while 1:
+            # decode the next instruction from the bytecode
+            code = rffi.cast(lltype.Signed, bytecode[0])
+            bytecode = rffi.ptradd(bytecode, 1)
+            if code >= 4*self.DESCR_FROMSTACK:
+                if code > 0x7F:
+                    shift = 7
+                    code &= 0x7F
+                    while True:
+                        nextcode = rffi.cast(lltype.Signed, bytecode[0])
+                        bytecode = rffi.ptradd(bytecode, 1)
+                        code |= (nextcode & 0x7F) << shift
+                        shift += 7
+                        if nextcode <= 0x7F:
+                            break
+                # load the value from the stack
+                kind = code & 3
+                code = (code >> 2) - self.DESCR_FROMSTACK
+                stackloc = frame_addr + get_ebp_ofs(code)
+                value = rffi.cast(rffi.LONGP, stackloc)[0]
+                if kind == self.DESCR_FLOAT:
+                    value_hi = value
+                    value = rffi.cast(rffi.LONGP, stackloc - 4)[0]
+            else:
+                # 'code' identifies a register: load its value
+                kind = code & 3
+                if kind == self.DESCR_SPECIAL:
+                    if code == self.DESCR_HOLE:
+                        num += 1
+                        continue
+                    assert code == self.DESCR_STOP
+                    break
+                code >>= 2
+                if kind == self.DESCR_FLOAT:
+                    value = allregisters[2*code]
+                    value_hi = allregisters[2*code + 1]
+                else:
+                    value = allregisters[16 + code]
+
+            # store the loaded value into fail_boxes_<type>
+            if kind == self.DESCR_INT:
+                tgt = self.fail_boxes_int.get_addr_for_num(num)
+            elif kind == self.DESCR_REF:
+                tgt = self.fail_boxes_ptr.get_addr_for_num(num)
+            elif kind == self.DESCR_FLOAT:
+                tgt = self.fail_boxes_float.get_addr_for_num(num)
+                rffi.cast(rffi.LONGP, tgt)[1] = value_hi
+            else:
+                assert 0, "bogus kind"
+            rffi.cast(rffi.LONGP, tgt)[0] = value
+            num += 1
+        #
+        if not we_are_translated():
+            assert bytecode[4] == 0xCC
+        fail_index = rffi.cast(rffi.LONGP, bytecode)[0]
+        return fail_index
+
     def setup_failure_recovery(self):
 
         def failure_recovery_func(registers):
-            # no malloc allowed here!!
             # 'registers' is a pointer to a structure containing the
             # original value of the registers, optionally the original
             # value of XMM registers, and finally a reference to the
             # recovery bytecode.  See _build_failure_recovery() for details.
             stack_at_ebp = registers[ebp.op]
             bytecode = rffi.cast(rffi.UCHARP, registers[8])
-            num = 0
-            value_hi = 0
-            while 1:
-                # decode the next instruction from the bytecode
-                code = rffi.cast(lltype.Signed, bytecode[0])
-                bytecode = rffi.ptradd(bytecode, 1)
-                if code >= 4*self.DESCR_FROMSTACK:
-                    if code > 0x7F:
-                        shift = 7
-                        code &= 0x7F
-                        while True:
-                            nextcode = rffi.cast(lltype.Signed, bytecode[0])
-                            bytecode = rffi.ptradd(bytecode, 1)
-                            code |= (nextcode & 0x7F) << shift
-                            shift += 7
-                            if nextcode <= 0x7F:
-                                break
-                    # load the value from the stack
-                    kind = code & 3
-                    code = (code >> 2) - self.DESCR_FROMSTACK
-                    stackloc = stack_at_ebp + get_ebp_ofs(code)
-                    value = rffi.cast(rffi.LONGP, stackloc)[0]
-                    if kind == self.DESCR_FLOAT:
-                        value_hi = value
-                        value = rffi.cast(rffi.LONGP, stackloc - 4)[0]
-                else:
-                    # 'code' identifies a register: load its value
-                    kind = code & 3
-                    if kind == self.DESCR_SPECIAL:
-                        if code == self.DESCR_HOLE:
-                            num += 1
-                            continue
-                        assert code == self.DESCR_STOP
-                        break
-                    code >>= 2
-                    if kind == self.DESCR_FLOAT:
-                        xmmregisters = rffi.ptradd(registers, -16)
-                        value = xmmregisters[2*code]
-                        value_hi = xmmregisters[2*code + 1]
-                    else:
-                        value = registers[code]
-
-                # store the loaded value into fail_boxes_<type>
-                if kind == self.DESCR_INT:
-                    tgt = self.fail_boxes_int.get_addr_for_num(num)
-                elif kind == self.DESCR_REF:
-                    tgt = self.fail_boxes_ptr.get_addr_for_num(num)
-                elif kind == self.DESCR_FLOAT:
-                    tgt = self.fail_boxes_float.get_addr_for_num(num)
-                    rffi.cast(rffi.LONGP, tgt)[1] = value_hi
-                else:
-                    assert 0, "bogus kind"
-                rffi.cast(rffi.LONGP, tgt)[0] = value
-                num += 1
-            #
-            if not we_are_translated():
-                assert bytecode[4] == 0xCC
-            fail_index = rffi.cast(rffi.LONGP, bytecode)[0]
-            return fail_index
+            allregisters = rffi.ptradd(registers, -16)
+            return self.grab_frame_values(bytecode, stack_at_ebp, allregisters)
 
         self.failure_recovery_func = failure_recovery_func
         self.failure_recovery_code = [0, 0, 0, 0]
         # now we return from the complete frame, which starts from
         # _assemble_bootstrap_code().  The LEA below throws away most
         # of the frame, including all the PUSHes that we did just above.
-        mc.LEA(esp, addr_add(ebp, imm((-RET_BP + 2) * WORD)))
-        mc.POP(edi)
-        mc.POP(esi)
-        mc.POP(ebx)
-        mc.POP(ebp)
+        mc.LEA(esp, addr_add(ebp, imm(-3 * WORD)))
+        mc.POP(edi)    # [ebp-12]
+        mc.POP(esi)    # [ebp-8]
+        mc.POP(ebx)    # [ebp-4]
+        mc.POP(ebp)    # [ebp]
         mc.RET()
         self.mc2.done()
         self.failure_recovery_code[exc + 2 * withfloats] = recovery_addr
         addr = self.cpu.get_on_leave_jitted_int(save_exception=exc)
         mc.CALL(rel32(addr))
 
-        # don't break the following code sequence!
+        # don't break the following code sequence!   xxx no reason any more?
         mc = mc._mc
-        mc.LEA(esp, addr_add(ebp, imm((-RET_BP + 2) * WORD)))
+        mc.LEA(esp, addr_add(ebp, imm(-3 * WORD)))
         mc.MOV(eax, imm(fail_index))
-        mc.POP(edi)
-        mc.POP(esi)
-        mc.POP(ebx)
-        mc.POP(ebp)
+        mc.POP(edi)    # [ebp-12]
+        mc.POP(esi)    # [ebp-8]
+        mc.POP(ebx)    # [ebp-4]
+        mc.POP(ebp)    # [ebp]
         mc.RET()
 
     @specialize.arg(2)
         self.mc.CALL(x)
         self.mark_gc_roots()
         self.mc.ADD(esp, imm(extra_on_stack))
-        if size == 1:
+        if isinstance(resloc, MODRM64):
+            self.mc.FSTP(resloc)
+        elif size == 1:
             self.mc.AND(eax, imm(0xff))
         elif size == 2:
             self.mc.AND(eax, imm(0xffff))
 
     genop_call_pure = genop_call
+    
+    def genop_guard_call_may_force(self, op, guard_op, addr,
+                                   arglocs, result_loc):
+        faildescr = guard_op.descr
+        fail_index = self.cpu.get_fail_descr_number(faildescr)
+        self.mc.MOV(mem(ebp, FORCE_INDEX_OFS), imm(fail_index))
+        self.genop_call(op, arglocs, result_loc)
+        self.mc.CMP(mem(ebp, FORCE_INDEX_OFS), imm(0))
+        return self.implement_guard(addr, self.mc.JL)
 
     def genop_discard_cond_call_gc_wb(self, op, arglocs):
         # use 'mc._mc' directly instead of 'mc', to avoid
         assert 0 < offset <= 127
         mc.overwrite(jz_location-1, [chr(offset)])
 
+    def genop_force_token(self, op, arglocs, resloc):
+        self.mc.LEA(resloc, mem(ebp, FORCE_INDEX_OFS))
+
     def not_implemented_op_discard(self, op, arglocs):
         msg = "not implemented operation: %s" % op.getopname()
         print msg

pypy/jit/backend/x86/regalloc.py

      TempBox
 
 WORD = 4
+FRAME_FIXED_SIZE = 5     # ebp + ebx + esi + edi + force_index = 5 words
+FORCE_INDEX_OFS = -4*WORD
 
 width_of_type = {
     INT : 1,
         
     def after_call(self, v):
         # the result is stored in st0, but we don't have this around,
-        # so we move it to some stack location
-        if v is not None:
-            loc = self.stack_manager.loc(v, 2)
-            self.assembler.regalloc_fstp(loc)
+        # so genop_call will move it to some stack location immediately
+        # after the call
+        return self.stack_manager.loc(v, 2)
 
 class X86StackManager(StackManager):
 
         self.assembler.regalloc_perform_with_guard(op, guard_op, faillocs,
                                                    arglocs, result_loc,
                                                    self.sm.stack_depth)
-        self.rm.possibly_free_var(op.result)
+        if op.result is not None:
+            self.possibly_free_var(op.result)
         self.possibly_free_vars(guard_op.fail_args)
 
     def perform_guard(self, guard_op, arglocs, result_loc):
             self.assembler.dump('%s(%s)' % (op, arglocs))
         self.assembler.regalloc_perform_discard(op, arglocs)
 
-    def can_optimize_cmp_op(self, op, i, operations):
+    def can_merge_with_next_guard(self, op, i, operations):
+        if op.opnum == rop.CALL_MAY_FORCE:
+            assert operations[i + 1].opnum == rop.GUARD_NOT_FORCED
+            return True
         if not op.is_comparison():
             return False
         if (operations[i + 1].opnum != rop.GUARD_TRUE and
                 i += 1
                 self.possibly_free_vars(op.args)
                 continue
-            if self.can_optimize_cmp_op(op, i, operations):
+            if self.can_merge_with_next_guard(op, i, operations):
                 oplist[op.opnum](self, op, operations[i + 1])
                 i += 1
             else:
         self.Perform(op, [loc0], loc1)
         self.rm.possibly_free_var(op.args[0])
 
-    def _call(self, op, arglocs, force_store=[]):
-        self.rm.before_call(force_store)
-        self.xrm.before_call(force_store)
-        self.Perform(op, arglocs, eax)
+    def _call(self, op, arglocs, force_store=[], guard_not_forced_op=None):
+        save_all_regs = guard_not_forced_op is not None
+        self.rm.before_call(force_store, save_all_regs=save_all_regs)
+        self.xrm.before_call(force_store, save_all_regs=save_all_regs)
         if op.result is not None:
             if op.result.type == FLOAT:
-                self.xrm.after_call(op.result)
+                resloc = self.xrm.after_call(op.result)
             else:
-                self.rm.after_call(op.result)
+                resloc = self.rm.after_call(op.result)
+        else:
+            resloc = None
+        if guard_not_forced_op is not None:
+            self.perform_with_guard(op, guard_not_forced_op, arglocs, resloc)
+        else:
+            self.Perform(op, arglocs, resloc)
 
-    def consider_call(self, op, ignored):
+    def _consider_call(self, op, guard_not_forced_op=None):
         calldescr = op.descr
         assert isinstance(calldescr, BaseCallDescr)
         assert len(calldescr.arg_classes) == len(op.args) - 1
         size = calldescr.get_result_size(self.translate_support_code)
-        self._call(op, [imm(size)] + [self.loc(arg) for arg in op.args])
+        self._call(op, [imm(size)] + [self.loc(arg) for arg in op.args],
+                   guard_not_forced_op=guard_not_forced_op)
 
+    def consider_call(self, op, ignored):
+        self._consider_call(op)
     consider_call_pure = consider_call
 
+    def consider_call_may_force(self, op, guard_op):
+        assert guard_op is not None
+        self._consider_call(op, guard_op)
+
     def consider_cond_call_gc_wb(self, op, ignored):
         assert op.result is None
         arglocs = [self.loc(arg) for arg in op.args]
                     assert reg is eax     # ok to ignore this one
         return gcrootmap.compress_callshape(shape)
 
+    def consider_force_token(self, op, ignored):
+        loc = self.rm.force_allocate_reg(op.result)
+        self.Perform(op, [], loc)
+
     def not_implemented_op(self, op, ignored):
         msg = "[regalloc] Not implemented operation: %s" % op.getopname()
         print msg
 
 def get_ebp_ofs(position):
     # Argument is a stack position (0, 1, 2...).
-    # Returns (ebp-16), (ebp-20), (ebp-24)...
-    # This depends on the fact that our function prologue contains
-    # exactly 4 PUSHes.
-    return -WORD * (4 + position)
+    # Returns (ebp-20), (ebp-24), (ebp-28)...
+    # i.e. the n'th word beyond the fixed frame size.
+    return -WORD * (FRAME_FIXED_SIZE + position)
 
 def lower_byte(reg):
     # argh, kill, use lowest8bits instead

pypy/jit/backend/x86/runner.py

 from pypy.rlib.objectmodel import we_are_translated
 from pypy.jit.metainterp import history
 from pypy.jit.backend.x86.assembler import Assembler386
+from pypy.jit.backend.x86.regalloc import FORCE_INDEX_OFS
 from pypy.jit.backend.llsupport.llmodel import AbstractLLCPU
 
-
 class CPU386(AbstractLLCPU):
     debug = True
     supports_floats = True
             llmemory.GCREF.TO))
         return ptrvalue
 
+    def get_latest_force_token(self):
+        return self.assembler.fail_ebp + FORCE_INDEX_OFS
+
     def execute_token(self, executable_token):
         addr = executable_token._x86_bootstrap_code
         func = rffi.cast(lltype.Ptr(self.BOOTSTRAP_TP), addr)
         adr = llmemory.cast_ptr_to_adr(x)
         return CPU386.cast_adr_to_int(adr)
 
+    all_null_registers = lltype.malloc(rffi.LONGP.TO, 24,
+                                       flavor='raw', zero=True)
+
+    def force(self, addr_of_force_index):
+        TP = rffi.CArrayPtr(lltype.Signed)
+        fail_index = rffi.cast(TP, addr_of_force_index)[0]
+        assert fail_index >= 0, "already forced!"
+        faildescr = self.get_fail_descr_from_number(fail_index)
+        rffi.cast(TP, addr_of_force_index)[0] = -1
+        bytecode = rffi.cast(rffi.UCHARP,
+                             faildescr._x86_failure_recovery_bytecode)
+        # start of "no gc operation!" block
+        fail_index_2 = self.assembler.grab_frame_values(
+            bytecode,
+            addr_of_force_index - FORCE_INDEX_OFS,
+            self.all_null_registers)
+        self.assembler.leave_jitted_hook()
+        # end of "no gc operation!" block
+        assert fail_index == fail_index_2
+        return faildescr
+
 
 class CPU386_NO_SSE2(CPU386):
     supports_floats = False

pypy/jit/backend/x86/test/test_gc_integration.py

 from pypy.jit.backend.llsupport.descr import GcCache
 from pypy.jit.backend.llsupport.gc import GcLLDescription
 from pypy.jit.backend.x86.runner import CPU
-from pypy.jit.backend.x86.regalloc import RegAlloc, WORD
+from pypy.jit.backend.x86.regalloc import RegAlloc, WORD, FRAME_FIXED_SIZE
 from pypy.jit.metainterp.test.oparser import parse
 from pypy.rpython.lltypesystem import lltype, llmemory, rffi
 from pypy.rpython.annlowlevel import llhelper
         #
         mark = regalloc.get_mark_gc_roots(cpu.gc_ll_descr.gcrootmap)
         assert mark[0] == 'compressed'
-        expected = ['ebx', 'esi', 'edi', -16, -20, -24]
+        base = -WORD * FRAME_FIXED_SIZE
+        expected = ['ebx', 'esi', 'edi', base, base-4, base-8]
         assert dict.fromkeys(mark[1:]) == dict.fromkeys(expected)
 
 class TestRegallocGcIntegration(BaseTestRegalloc):

pypy/jit/metainterp/codewriter.py

 from pypy.translator.backendopt.writeanalyze import WriteAnalyzer
 from pypy.jit.metainterp.typesystem import deref, arrayItem, fieldType
 from pypy.jit.metainterp.effectinfo import effectinfo_from_writeanalyze
+from pypy.jit.metainterp.effectinfo import VirtualizableAnalyzer
 
 import py, sys
 from pypy.tool.ansi_print import ansi_log
         self.metainterp_sd = metainterp_sd
         self.cpu = metainterp_sd.cpu
         self.portal_runner_ptr = portal_runner_ptr
-        self.raise_analyzer = RaiseAnalyzer(self.rtyper.annotator.translator)
-        self.write_analyzer = WriteAnalyzer(self.rtyper.annotator.translator)
+        translator = self.rtyper.annotator.translator
+        self.raise_analyzer = RaiseAnalyzer(translator)
+        self.write_analyzer = WriteAnalyzer(translator)
+        self.virtualizable_analyzer = VirtualizableAnalyzer(translator)
 
     def make_portal_bytecode(self, graph):
         log.info("making JitCodes...")
         # ok
         if consider_effects_of is not None:
             effectinfo = effectinfo_from_writeanalyze(
-                    self.write_analyzer.analyze(consider_effects_of), self.cpu)
+                    self.write_analyzer.analyze(consider_effects_of),
+                    self.cpu,
+                    self.virtualizable_analyzer.analyze(consider_effects_of))
             calldescr = self.cpu.calldescrof(FUNC, tuple(NON_VOID_ARGS), RESULT, effectinfo)
         else:
             calldescr = self.cpu.calldescrof(FUNC, tuple(NON_VOID_ARGS), RESULT)
         if op.opname == "direct_call":
             func = getattr(get_funcobj(op.args[0].value), '_callable', None)
             pure = getattr(func, "_pure_function_", False)
+            all_promoted_args = getattr(func,
+                               "_pure_function_with_all_promoted_args_", False)
+            if pure and not all_promoted_args:
+                effectinfo = calldescr.get_extra_info()
+                assert (effectinfo is not None and
+                        not effectinfo.promotes_virtualizables)
         try:
             canraise = self.codewriter.raise_analyzer.can_raise(op)
         except lltype.DelayedPointer:
             canraise = True  # if we need to look into the delayed ptr that is
                              # the portal, then it's certainly going to raise
         if pure:
+            # XXX check what to do about exceptions (also MemoryError?)
             self.emit('residual_call_pure')
         elif canraise:
             self.emit('residual_call')
     def handle_regular_indirect_call(self, op):
         self.codewriter.register_indirect_call_targets(op)
         args = op.args[1:-1]
-        calldescr, non_void_args = self.codewriter.getcalldescr(op.args[0],
-                                                                args,
-                                                                op.result)
+        calldescr, non_void_args = self.codewriter.getcalldescr(
+            op.args[0], args, op.result, consider_effects_of=op)
         self.minimize_variables()
         self.emit('indirect_call')
         self.emit(self.get_position(calldescr))

pypy/jit/metainterp/compile.py

             if box:
                 fail_arg_types[i] = box.type
         self.fail_arg_types = fail_arg_types
+        # XXX ^^^ kill this attribute
 
     def handle_fail(self, metainterp_sd):
         from pypy.jit.metainterp.pyjitpl import MetaInterp
         send_bridge_to_backend(metainterp.staticdata, self, inputargs,
                                new_loop.operations)
 
+
+class ResumeGuardForcedDescr(ResumeGuardDescr):
+
+    def handle_fail(self, metainterp_sd):
+        from pypy.jit.metainterp.pyjitpl import MetaInterp
+        metainterp = MetaInterp(metainterp_sd)
+        token = metainterp_sd.cpu.get_latest_force_token()
+        metainterp._already_allocated_resume_virtuals = self.fetch_data(token)
+        self.counter = -2     # never compile
+        return metainterp.handle_guard_failure(self)
+
+    def force_virtualizable(self, vinfo, virtualizable, force_token):
+        from pypy.jit.metainterp.pyjitpl import MetaInterp
+        from pypy.jit.metainterp.resume import force_from_resumedata
+        metainterp = MetaInterp(self.metainterp_sd)
+        metainterp.history = None    # blackholing
+        liveboxes = metainterp.load_values_from_failure(self)
+        virtualizable_boxes, data = force_from_resumedata(metainterp,
+                                                          liveboxes, self)
+        vinfo.write_boxes(virtualizable, virtualizable_boxes)
+        self.save_data(force_token, data)
+
+    def save_data(self, key, value):
+        globaldata = self.metainterp_sd.globaldata
+        assert key not in globaldata.resume_virtuals
+        globaldata.resume_virtuals[key] = value
+
+    def fetch_data(self, key):
+        globaldata = self.metainterp_sd.globaldata
+        assert key in globaldata.resume_virtuals
+        data = globaldata.resume_virtuals[key]
+        del globaldata.resume_virtuals[key]
+        return data
+
+
 class ResumeFromInterpDescr(ResumeDescr):
     def __init__(self, original_greenkey, redkey):
         ResumeDescr.__init__(self, original_greenkey)

pypy/jit/metainterp/doc/jitpl5.txt

 matches the real data -- but this is delicate because of the
 non-escaping flag.
 
-Instead, this is done by doing tracing from the start of the loop again.
-At the end, we don't do perfect specialization (for now), but simply
-check that the already-computed specialization still applies, and then
-jump to the already-compiled loop.  (If it does not match, for now we
-just cancel everything.)
-
-If the loop is not only executed but *entered* often enough, then after
-this tracing, we generate a second copy of the loop (a "bridge") that
-starts with all variables unspecialized, and ends with a jump to the
-real loop.  From this point on, we can just jump directly to the bridge
+Instead, this is done by "entry bridges": we do tracing from
+the start of the loop again, and at the end, we try to compile
+the recorded trace as a "bridge" that comes from the
+interpreter (i.e. with no virtuals at all) and goes to the old
+loop.  Later on, we can just jump directly to the entry bridge
 from the JUMP_ABSOLUTE bytecode.
 
 
 take the set of live values and put them back into boxes, and proceed
 with tracing for the rest of the loop.
 
-For now, we just check at the end of the loop that it matches the
-already-computed specialization.  If not, we cancel creating the
-compiled version of it (and mark the guard so that future failures
-always fall back to interpretation).  To do this, when we created the
-original loop, at every guard, we needed to record the set of live
-values (mostly in which register or stack location they are) as well as
-an "escaped-so-far" flag for each pointer.
+At the end of the loop, we check that it matches an already-computed
+specialization.  If not, we go on tracing.  This might unroll the loop
+once.  (Note that there is a global limit on the length of the recorded
+trace, to avoid tracing forever.)

pypy/jit/metainterp/doc/linking.txt

-==============================================================
-Linking between the interpreter, the JIT, and the machine code
-==============================================================
-
-In a pypy-c that contains a generated JIT, the execution of app-level
-code initially uses the normal interpreter, but invokes the JIT if the
-JUMP_ABSOLUTE bytecode is seen often enough (it's the bytecode that
-closes an app-level loop).  This part of the code of pypy-c looks like
-this (in pseudo-C code coming from RPython)::
-
-    void dispatch_JUMP_ABSOLUTE(Frame *frame, int arg)
-    {
-        frame->next_op = arg;
-        can_enter_jit(frame);
-    }
-
-    void can_enter_jit(Frame *frame)
-    {
-        if (!position_seen_often_enough(frame->f_code, frame->next_op))
-            return;
-        machine_code_ptr = lookup_machine_code_for(frame->f_code,
-                                                   frame->next_op);
-        machine_code_ptr(frame);
-        /* at this point, the machine code has updated the frame
-           so that it points to the next bytecode to execute */
-    }
-
-The jit_compile() function comes from RPython sources written in the
-JIT support code (warmspot.py).  It does tracing and generally ends up
-compiling an extra loop to machine code.
-
-Then jit_compile() itself needs to transfer execution to the newly
-compiled loop.  Instead of calling the loop, jit_compile() returns a
-small data structure (a continuation) that points to the loop and
-contains values for the input arguments.  A pointer to this data arrives
-in REG3 in the guard recovery code, which contains the necessary
-assembler to set up the real registers and actually jump to the loop.
-
-The reason to organize the control flow transfers in this way is because
-when the jit_compile() function executes, the frame from the machine
-code is always still available -- it is below in the stack.  The machine
-code didn't do a RET before it CALLed the JIT.  This has two advantages.
-First, it lets the JIT read values directly out of the old frame, to
-find the values that were current when the guard failed.  Second, it
-tells the JIT where the *next* machine code frame will be: it will be at
-the same position as the old frame (because the machine code and the
-guard recovery code just jump to each other).  This is useful to
-implement virtualizables: a virtualizable object living in the heap
-(e.g. a PyFrame instance in pypy-c) needs to contain a pointer to the
-machine code frame's base pointer, and the cleanest approach is to let
-the JIT write this pointer into the virtualizable heap object just
-before it lets the guard recovery code transfer control to the machine
-code.

pypy/jit/metainterp/doc/loop.txt

 
     .       VirtualSpec(cls, name1=spec1, ...)
                     |
-         VirtualizableSpec(cls, name1=spec1, ...)
-                    |
-              FixedClassSpec(cls)
-                    |
                  NotSpec
 
-For (a simplified) example, ``VirtualizableSpec(PyFrame, x =
-VirtualSpec(W_IntObject, value = NotSpec))`` describes the virtualizable
-frame for a loop in which the only used variable is ``x``, which is a
-virtual ``W_IntObject``.
-
-The intersection rules are:
-
-* the intersection of two ``VirtualSpec`` of the same ``cls`` is a
-  further ``VirtualSpec``, and we proceed with the intersection of
-  each field.
-
-* the intersection of two ``VirtualizableSpec`` of the same ``cls`` is
-  like the previous case, except that some names may be omitted
-  completely from a given ``VirtualizableSpec``; in the case a name is
-  present in only one of the ``VirtualizableSpec``, we just keep it
-  unmodified in the intersection.
-
-* in other cases, the result is ``FixedClassSpec`` if the two specnodes
-  have the same class, or ``NotSpec`` if any is a ``NotSpec`` or if the
-  two classes differ.
+For example, ``VirtualSpec(W_IntObject, value = NotSpec))`` describes a
+variable which is a virtual ``W_IntObject``, containing a value that is
+a real integer.
 
 
 Overall Approach

pypy/jit/metainterp/doc/matching_rules.txt

-
-How does perfect specialization match nodes
-============================================
-
-Categories of spec nodes:
-
-NotSpecNode - nothing special
-
-FixedClassSpecNode - specnode with no know fields, matches the same class
-  specnode, less general than NotSpecNode.
-
-VirtualSpecNode - a virtual. matches only specnode that uses exactly
-  the same fields in the identical manner.
-
-VirtualListSpecNode - same as virtual
-
-VirtualizableSpecNode - virtualizable. This one is a little bit more complex:
-  for matching, VirtualizableSpecNode matches NotSpecNode (it was not used
-  at all, hence no guard_nonvirtualized) or VirtualizableSpecNode with a
-  common subset of fields.
-
-Say v0 (Virtualizable) has fields a, b and v1 to match has fields b, c
-means that b need to have the exact same shape, but a and c can stay
-whatever they are.

pypy/jit/metainterp/doc/virtualizables.txt

-Simplified virtualizables
-=========================
-
-Let's start with some definitions:
-
-* Virtuals are objects which are known not to escape from jit code, hence
-  they're not allocated at all and their fields are stored in registers and or
-  on the stack.
-
-* Virtualizables are objects that are known to escape (for example the frame
-  object), but they're stored anyway on the stack with a way to access and
-  modify from outside the jit code. So the jit knows where they're and have
-  a way to reconstruct them if necessary.
-
-A couple of observations, in terms of a python interpreter:
-
-Usually we pass a virtualizable around everywhere (this is a frame
-object) which is stored on a framestack and allocated before each next
-call to portal (portal is a bytecode dispatch loop). Almost everything
-is stored on top of this virtualizable. There is a valuestack and locals
-which usually are most commonly accessed fields.
-
-A typical loop, for example for adding integers (the best benchmark ever)
-will look like this:
-
-for a code:
-   
-   while i < 10000:
-       i += 1
-
-v1 = getfield_gc(frame, "locals")
-v2 = getarrayitem_gc(v1, 0) # or some other element
-v3 = getfield_gc(frame, "valuestack")
-setarrayitem_gc(v3, 0, v2)
-setarrayitem_gc(v3, 1, Constant(1))
-v4 = getarrayitem_gc(v3, 0)
-v5 = getarrayitem_gc(v3, 1)
-i0 = getfield_gc(v4, "intval")
-i1 = getfield_gc(v5, "intval")
-v3 = new_with_vtable(W_IntObject)
-i2 = int_add(i0, i1)
-setfield_gc(v3, "intval", i2)
-.... store into valuestack, load and store in locals
-
-clearly, what we really want is:
-
-i1 = int_add(i0, 1)
-
-In order to achieve this, we need:
-
-* Make sure that frame is not used
-
-* Make sure that things on the frame are virtual, so they don't get
-  allocated until needed.
-
-So the real loop will pass around virtualizable and intval of local variable i.
-We can achieve that by unpacking W_IntObject read from locals before the loop
-and carefully rebuilding this for each guard failure, by a small bit of
-assembler code.

pypy/jit/metainterp/effectinfo.py

 from pypy.rpython.lltypesystem.rclass import OBJECT
 from pypy.rpython.lltypesystem import lltype
 from pypy.rpython.ootypesystem import ootype
+from pypy.translator.backendopt.graphanalyze import BoolGraphAnalyzer
 
 class EffectInfo(object):
     _cache = {}
 
-    def __new__(cls, write_descrs_fields, write_descrs_arrays):
-        key = frozenset(write_descrs_fields), frozenset(write_descrs_arrays)
+    def __new__(cls, write_descrs_fields, write_descrs_arrays,
+                promotes_virtualizables=False):
+        key = (frozenset(write_descrs_fields), frozenset(write_descrs_arrays),
+               promotes_virtualizables)
         if key in cls._cache:
             return cls._cache[key]
         result = object.__new__(cls)
         result.write_descrs_fields = write_descrs_fields
         result.write_descrs_arrays = write_descrs_arrays
+        result.promotes_virtualizables = promotes_virtualizables
         cls._cache[key] = result
         return result
 
-def effectinfo_from_writeanalyze(effects, cpu):
+def effectinfo_from_writeanalyze(effects, cpu, promotes_virtualizables=False):
     from pypy.translator.backendopt.writeanalyze import top_set
     if effects is top_set:
         return None
             write_descrs_arrays.append(descr)
         else:
             assert 0
-    return EffectInfo(write_descrs_fields, write_descrs_arrays)
+    return EffectInfo(write_descrs_fields, write_descrs_arrays,
+                      promotes_virtualizables)
 
 def consider_struct(TYPE, fieldname):
     if fieldType(TYPE, fieldname) is lltype.Void:
         return False
     return True
 
-
 def consider_array(ARRAY):
     if arrayItem(ARRAY) is lltype.Void:
         return False
     if not isinstance(ARRAY, lltype.GcArray): # can be a non-GC-array
         return False
     return True
+
+# ____________________________________________________________
+
+class VirtualizableAnalyzer(BoolGraphAnalyzer):
+    def analyze_simple_operation(self, op):
+        return op.opname == 'promote_virtualizable'

pypy/jit/metainterp/pyjitpl.py

                 varargs = [jitcode.cfnptr] + varargs
                 res = self.execute_varargs(rop.CALL, varargs,
                                              descr=jitcode.calldescr, exc=True)
+                self.metainterp.load_fields_from_virtualizable()
             else:
                 # for oosends (ootype only): calldescr is a MethDescr
                 res = self.execute_varargs(rop.OOSEND, varargs,
 
     @arguments("descr", "varargs")
     def opimpl_residual_call(self, calldescr, varargs):
-        return self.execute_varargs(rop.CALL, varargs, descr=calldescr, exc=True)
+        return self.do_residual_call(varargs, descr=calldescr, exc=True)
 
     @arguments("varargs")
     def opimpl_recursion_leave_prep(self, varargs):
             greenkey = varargs[1:num_green_args + 1]
             if warmrunnerstate.can_inline_callable(greenkey):
                 return self.perform_call(portal_code, varargs[1:], greenkey)
-        return self.execute_varargs(rop.CALL, varargs, descr=calldescr, exc=True)
+        return self.do_residual_call(varargs, descr=calldescr, exc=True)
 
     @arguments("descr", "varargs")
     def opimpl_residual_call_noexception(self, calldescr, varargs):
-        self.execute_varargs(rop.CALL, varargs, descr=calldescr, exc=False)
+        self.do_residual_call(varargs, descr=calldescr, exc=False)
 
     @arguments("descr", "varargs")
     def opimpl_residual_call_pure(self, calldescr, varargs):
             return self.perform_call(jitcode, varargs)
         else:
             # but we should not follow calls to that graph
-            return self.execute_varargs(rop.CALL, [box] + varargs,
-                                        descr=calldescr, exc=True)
+            return self.do_residual_call([box] + varargs,
+                                         descr=calldescr, exc=True)
 
     @arguments("orgpc", "methdescr", "varargs")
     def opimpl_oosend(self, pc, methdescr, varargs):
         if isinstance(box, Const):    # no need for a guard
             return
         metainterp = self.metainterp
-        metainterp_sd = metainterp.staticdata
         if metainterp.is_blackholing():
             return
         saved_pc = self.pc
             moreargs = [box] + extraargs
         else:
             moreargs = list(extraargs)
+        metainterp_sd = metainterp.staticdata
         original_greenkey = metainterp.resumekey.original_greenkey
-        resumedescr = compile.ResumeGuardDescr(metainterp_sd, original_greenkey)
+        if opnum == rop.GUARD_NOT_FORCED:
+            resumedescr = compile.ResumeGuardForcedDescr(metainterp_sd,
+                                                         original_greenkey)
+        else:
+            resumedescr = compile.ResumeGuardDescr(metainterp_sd,
+                                                   original_greenkey)
         guard_op = metainterp.history.record(opnum, moreargs, None,
                                              descr=resumedescr)       
         virtualizable_boxes = None
             return self.metainterp.handle_exception()
         return False
 
+    def do_residual_call(self, argboxes, descr, exc):
+        effectinfo = descr.get_extra_info()
+        if effectinfo is None or effectinfo.promotes_virtualizables:
+            # residual calls require attention to keep virtualizables in-sync
+            self.metainterp.vable_before_residual_call()
+            # xxx do something about code duplication
+            resbox = self.metainterp.execute_and_record_varargs(
+                rop.CALL_MAY_FORCE, argboxes, descr=descr)
+            self.metainterp.vable_after_residual_call()
+            if resbox is not None:
+                self.make_result_box(resbox)
+            self.generate_guard(self.pc, rop.GUARD_NOT_FORCED, None, [])
+            if exc:
+                return self.metainterp.handle_exception()
+            return False
+        else:
+            return self.execute_varargs(rop.CALL, argboxes, descr, exc)
+
 # ____________________________________________________________
 
 class MetaInterpStaticData(object):
         self.indirectcall_dict = None
         self.addr2name = None
         self.loopnumbering = 0
+        self.resume_virtuals = {}
         #
         state = staticdata.state
         if state is not None:
 
 class MetaInterp(object):
     in_recursion = 0
+    _already_allocated_resume_virtuals = None
+
     def __init__(self, staticdata):
         self.staticdata = staticdata
         self.cpu = staticdata.cpu
     @specialize.arg(1)
     def execute_and_record(self, opnum, descr, *argboxes):
         history.check_descr(descr)
-        assert opnum != rop.CALL and opnum != rop.OOSEND
+        assert (opnum != rop.CALL and opnum != rop.CALL_MAY_FORCE
+                and opnum != rop.OOSEND)
         # execute the operation
         profiler = self.staticdata.profiler
         profiler.count_ops(opnum)
     @specialize.arg(1)
     def execute_and_record_varargs(self, opnum, argboxes, descr=None):
         history.check_descr(descr)
-        # residual calls require attention to keep virtualizables in-sync.
-        # CALL_PURE doesn't need it because so far 'promote_virtualizable'
-        # as an operation is enough to make the called function non-pure.
-        require_attention = (opnum == rop.CALL or opnum == rop.OOSEND)
-        if require_attention and not self.is_blackholing():
-            self.before_residual_call()
         # execute the operation
         profiler = self.staticdata.profiler
         profiler.count_ops(opnum)
         if self.is_blackholing():
             profiler.count_ops(opnum, BLACKHOLED_OPS)
         else:
-            if require_attention:
-                require_attention = self.after_residual_call()
             # check if the operation can be constant-folded away
             argboxes = list(argboxes)
             if rop._ALWAYS_PURE_FIRST <= opnum <= rop._ALWAYS_PURE_LAST:
                 resbox = self._record_helper_pure_varargs(opnum, resbox, descr, argboxes)
             else:
                 resbox = self._record_helper_nonpure_varargs(opnum, resbox, descr, argboxes)
-        # if we are blackholing require_attention has the initial meaning
-        if require_attention:
-            self.after_generate_residual_call()
         return resbox
 
     def _record_helper_pure(self, opnum, resbox, descr, *argboxes): 
             self.framestack[-1].follow_jump()
         elif opnum == rop.GUARD_FALSE:     # a goto_if_not that stops jumping
             self.framestack[-1].dont_follow_jump()
-        elif opnum == rop.GUARD_NO_EXCEPTION or opnum == rop.GUARD_EXCEPTION:
+        elif (opnum == rop.GUARD_NO_EXCEPTION or opnum == rop.GUARD_EXCEPTION
+              or opnum == rop.GUARD_NOT_FORCED):
             self.handle_exception()
         elif opnum == rop.GUARD_NO_OVERFLOW:   # an overflow now detected
             self.raise_overflow_error()
         vinfo = self.staticdata.virtualizable_info
         virtualizable_box = self.virtualizable_boxes[-1]
         virtualizable = vinfo.unwrap_virtualizable_box(virtualizable_box)
-        vinfo.clear_vable_rti(virtualizable)
+        vinfo.clear_vable_token(virtualizable)
 
-    def before_residual_call(self):
+    def vable_before_residual_call(self):
+        if self.is_blackholing():
+            return
         vinfo = self.staticdata.virtualizable_info
         if vinfo is not None:
             virtualizable_box = self.virtualizable_boxes[-1]
             virtualizable = vinfo.unwrap_virtualizable_box(virtualizable_box)
             vinfo.tracing_before_residual_call(virtualizable)
+            #
+            force_token_box = history.BoxInt()
+            self.history.record(rop.FORCE_TOKEN, [], force_token_box)
+            self.history.record(rop.SETFIELD_GC, [virtualizable_box,
+                                                  force_token_box],
+                                None, descr=vinfo.vable_token_descr)
 
-    def after_residual_call(self):
-        vinfo = self.staticdata.virtualizable_info
-        if vinfo is not None:
-            virtualizable_box = self.virtualizable_boxes[-1]
-            virtualizable = vinfo.unwrap_virtualizable_box(virtualizable_box)
-            if vinfo.tracing_after_residual_call(virtualizable):
-                # This is after the residual call is done, but before it
-                # is actually generated.  We first generate a store-
-                # everything-back, *without actually performing it now*
-                # as it contains the old values (before the call)!
-                self.gen_store_back_in_virtualizable_no_perform()
-                return True    # must call after_generate_residual_call()
-        # otherwise, don't call after_generate_residual_call()
-        return False
-
-    def after_generate_residual_call(self):
-        # Called after generating a residual call, and only if
-        # after_residual_call() returned True, i.e. if code in the residual
-        # call causes the virtualizable to escape.  Reload the modified
-        # fields of the virtualizable.
-        self.gen_load_fields_from_virtualizable()
+    def vable_after_residual_call(self):
+        if self.is_blackholing():
+            vable_escapes = True
+        else:
+            vable_escapes = False
+            vinfo = self.staticdata.virtualizable_info
+            if vinfo is not None:
+                virtualizable_box = self.virtualizable_boxes[-1]
+                virtualizable = vinfo.unwrap_virtualizable_box(virtualizable_box)
+                if vinfo.tracing_after_residual_call(virtualizable):
+                    # We just did the residual call, and it shows that the
+                    # virtualizable escapes.
+                    self.switch_to_blackhole()
+                    vable_escapes = True
+        if vable_escapes:
+            self.load_fields_from_virtualizable()
 
     def handle_exception(self):
         etype = self.cpu.get_exception()
             # is and stays NULL.
             virtualizable_box = self.virtualizable_boxes[-1]
             virtualizable = vinfo.unwrap_virtualizable_box(virtualizable_box)
-            assert not virtualizable.vable_rti
+            assert not virtualizable.vable_token
             self.synchronize_virtualizable()
 
     def check_synchronized_virtualizable(self):
         virtualizable = vinfo.unwrap_virtualizable_box(virtualizable_box)
         vinfo.write_boxes(virtualizable, self.virtualizable_boxes)
 
-    def gen_load_fields_from_virtualizable(self):
+    def load_fields_from_virtualizable(self):
+        # Force a reload of the virtualizable fields into the local
+        # boxes (called only in escaping cases)
+        assert self.is_blackholing()
         vinfo = self.staticdata.virtualizable_info
         if vinfo is not None:
-            vbox = self.virtualizable_boxes[-1]
-            for i in range(vinfo.num_static_extra_boxes):
-                descr = vinfo.static_field_descrs[i]
-                fieldbox = self.execute_and_record(rop.GETFIELD_GC, descr,
-                                                   vbox)
-                self.virtualizable_boxes[i] = fieldbox
-            i = vinfo.num_static_extra_boxes
-            virtualizable = vinfo.unwrap_virtualizable_box(vbox)
-            for k in range(vinfo.num_arrays):
-                descr = vinfo.array_field_descrs[k]
-                abox = self.execute_and_record(rop.GETFIELD_GC, descr, vbox)
-                descr = vinfo.array_descrs[k]
-                for j in range(vinfo.get_array_length(virtualizable, k)):
-                    itembox = self.execute_and_record(rop.GETARRAYITEM_GC,
-                                                      descr, abox, ConstInt(j))
-                    self.virtualizable_boxes[i] = itembox
-                    i += 1
-            assert i + 1 == len(self.virtualizable_boxes)
+            virtualizable_box = self.virtualizable_boxes[-1]
+            virtualizable = vinfo.unwrap_virtualizable_box(virtualizable_box)
+            self.virtualizable_boxes = vinfo.read_boxes(self.cpu,
+                                                        virtualizable)
+            self.virtualizable_boxes.append(virtualizable_box)
 
     def gen_store_back_in_virtualizable(self):
         vinfo = self.staticdata.virtualizable_info
                                             abox, ConstInt(j), itembox)
             assert i + 1 == len(self.virtualizable_boxes)
 
-    def gen_store_back_in_virtualizable_no_perform(self):
-        vinfo = self.staticdata.virtualizable_info
-        # xxx only write back the fields really modified
-        vbox = self.virtualizable_boxes[-1]
-        for i in range(vinfo.num_static_extra_boxes):
-            fieldbox = self.virtualizable_boxes[i]
-            self.history.record(rop.SETFIELD_GC, [vbox, fieldbox], None,
-                                descr=vinfo.static_field_descrs[i])
-        i = vinfo.num_static_extra_boxes
-        virtualizable = vinfo.unwrap_virtualizable_box(vbox)
-        for k in range(vinfo.num_arrays):
-            abox = vinfo.BoxArray()
-            self.history.record(rop.GETFIELD_GC, [vbox], abox,
-                                descr=vinfo.array_field_descrs[k])
-            for j in range(vinfo.get_array_length(virtualizable, k)):
-                itembox = self.virtualizable_boxes[i]
-                i += 1
-                self.history.record(rop.SETARRAYITEM_GC,
-                                    [abox, ConstInt(j), itembox],
-                                    None,
-                                    descr=vinfo.array_descrs[k])
-        assert i + 1 == len(self.virtualizable_boxes)
-
     def replace_box(self, oldbox, newbox):
         for frame in self.framestack:
             boxes = frame.env

pypy/jit/metainterp/resoperation.py

     'GUARD_EXCEPTION',
     'GUARD_NO_OVERFLOW',
     'GUARD_OVERFLOW',
+    'GUARD_NOT_FORCED',
     '_GUARD_LAST', # ----- end of guard operations -----
 
     '_NOSIDEEFFECT_FIRST', # ----- start of no_side_effect operations -----
     'COND_CALL_GC_MALLOC',  # [a, b, if_(a<=b)_result, if_(a>b)_call, args...]
                             #        => result          (for mallocs)
     'DEBUG_MERGE_POINT/1',      # debugging only
+    'FORCE_TOKEN/0',
 
     '_CANRAISE_FIRST', # ----- start of can_raise operations -----
     'CALL',
+    'CALL_MAY_FORCE',
     'OOSEND',                     # ootype operation
     '_CANRAISE_LAST', # ----- end of can_raise operations -----
 

pypy/jit/metainterp/resume.py

     metainterp.framestack.reverse()
     return virtualizable_boxes
 
+def force_from_resumedata(metainterp, newboxes, storage):
+    resumereader = ResumeDataReader(storage, newboxes, metainterp)
+    return resumereader.consume_boxes(), resumereader.virtuals
+
 
 class ResumeDataReader(object):
     virtuals = None
 
     def _prepare_virtuals(self, metainterp, virtuals):
         if virtuals:
+            v = metainterp._already_allocated_resume_virtuals
+            if v is not None:
+                self.virtuals = v
+                return
             self.virtuals = [None] * len(virtuals)
             for i in range(len(virtuals)):
                 vinfo = virtuals[i]
             for i in range(len(virtuals)):
                 vinfo = virtuals[i]
                 if vinfo is not None:
-                    vinfo.setfields(metainterp, self.virtuals[i], self._decode_box)
+                    vinfo.setfields(metainterp, self.virtuals[i],
+                                    self._decode_box)
 
     def consume_boxes(self):
         numb = self.cur_numb

pypy/jit/metainterp/test/test_basic.py

         assert get_stats().enter_count <= count
     def check_jumps(self, maxcount):
         assert get_stats().exec_jumps <= maxcount
-    def check_aborted_count(self, maxcount):
-        assert get_stats().aborted_count == maxcount
+    def check_aborted_count(self, count):
+        assert get_stats().aborted_count == count
 
     def meta_interp(self, *args, **kwds):
         kwds['CPUClass'] = self.CPUClass
     type_system = 'ootype'
     CPUClass = runner.OOtypeCPU
 
+    def setup_class(cls):
+        py.test.skip("ootype tests skipped for now")
+
     @staticmethod
     def Ptr(T):
         return T

pypy/jit/metainterp/test/test_codewriter.py

             supports_floats = False
             def fielddescrof(self, STRUCT, fieldname):
                 return ('fielddescr', STRUCT, fieldname)
-            def calldescrof(self, FUNC, NON_VOID_ARGS, RESULT, stuff=None):
-                return ('calldescr', FUNC, NON_VOID_ARGS, RESULT)
+            def calldescrof(self, FUNC, NON_VOID_ARGS, RESULT, effectinfo=None):
+                return ('calldescr', FUNC, NON_VOID_ARGS, RESULT, effectinfo)
             def typedescrof(self, CLASS):
                 return ('typedescr', CLASS)
             def methdescrof(self, CLASS, methname):
         cw._start(self.metainterp_sd, None)        
         jitcode = cw.make_one_bytecode((graphs[0], None), False)
         assert len(self.metainterp_sd.indirectcalls) == 1
-        names = [jitcode.name for (fnaddress, jitcode)
+        names = [jitcode1.name for (fnaddress, jitcode1)
                                in self.metainterp_sd.indirectcalls]
         assert dict.fromkeys(names) == {'g': None}
+        calldescrs = [calldescr for calldescr in jitcode.constants
+                                if isinstance(calldescr, tuple) and
+                                   calldescr[0] == 'calldescr']
+        assert len(calldescrs) == 1
+        assert calldescrs[0][4] is not None
+        assert not calldescrs[0][4].write_descrs_fields
+        assert not calldescrs[0][4].write_descrs_arrays
+        assert not calldescrs[0][4].promotes_virtualizables
 
     def test_oosend_look_inside_only_one(self):
         class A:
         assert cw.list_of_addr2name[0][1].endswith('.A1')
         assert cw.list_of_addr2name[1][1] == 'A1.g'
 
+    def test_promote_virtualizable_effectinfo(self):
+        class Frame(object):
+            _virtualizable2_ = ['x']
+            
+            def __init__(self, x, y):
+                self.x = x
+                self.y = y
+
+        def g1(f):
+            f.x += 1
+
+        def g2(f):
+            return f.x
+
+        def h(f):
+            f.y -= 1
+
+        def f(n):
+            f_inst = Frame(n+1, n+2)
+            g1(f_inst)
+            r = g2(f_inst)
+            h(f_inst)
+            return r
+
+        graphs = self.make_graphs(f, [5])
+        cw = CodeWriter(self.rtyper)
+        cw.candidate_graphs = [graphs[0]]
+        cw._start(self.metainterp_sd, None)
+        jitcode = cw.make_one_bytecode((graphs[0], None), False)
+        calldescrs = [calldescr for calldescr in jitcode.constants
+                                if isinstance(calldescr, tuple) and
+                                   calldescr[0] == 'calldescr']
+        assert len(calldescrs) == 4    # for __init__, g1, g2, h.
+        effectinfo_g1 = calldescrs[1][4]
+        effectinfo_g2 = calldescrs[2][4]
+        effectinfo_h  = calldescrs[3][4]
+        assert effectinfo_g1.promotes_virtualizables
+        assert effectinfo_g2.promotes_virtualizables
+        assert not effectinfo_h.promotes_virtualizables
+
+
 class ImmutableFieldsTests: