Commits

Hakan Ardo  committed b58d7aa Merge

hg merge jit-short-preamble: Produce proper guards when inlining the short preamble that will resume at right before the jit_merge_point and thus be traced into a single jump instruction.

  • Participants
  • Parent commits df5eb9c, 56f9663

Comments (0)

Files changed (21)

File pypy/jit/codewriter/effectinfo.py

 
     # the 'extraeffect' field is one of the following values:
     EF_PURE                            = 0 #pure function (and cannot raise)
-    EF_CANNOT_RAISE                    = 1 #a function which cannot raise
-    EF_CAN_RAISE                       = 2 #normal function (can raise)
-    EF_LOOPINVARIANT                   = 3 #special: call it only once per loop
+    EF_LOOPINVARIANT                   = 1 #special: call it only once per loop
+    EF_CANNOT_RAISE                    = 2 #a function which cannot raise
+    EF_CAN_RAISE                       = 3 #normal function (can raise)
     EF_FORCES_VIRTUAL_OR_VIRTUALIZABLE = 4 #can raise and force virtualizables
 
     # the 'oopspecindex' field is one of the following values:
             return cls._cache[key]
         result = object.__new__(cls)
         result.readonly_descrs_fields = readonly_descrs_fields
-        result.write_descrs_fields = write_descrs_fields
-        result.write_descrs_arrays = write_descrs_arrays
+        if extraeffect == EffectInfo.EF_LOOPINVARIANT or \
+           extraeffect == EffectInfo.EF_PURE:            
+            result.write_descrs_fields = []
+            result.write_descrs_arrays = []
+        else:
+            result.write_descrs_fields = write_descrs_fields
+            result.write_descrs_arrays = write_descrs_arrays
         result.extraeffect = extraeffect
         result.oopspecindex = oopspecindex
         cls._cache[key] = result

File pypy/jit/codewriter/jtransform.py

         op1 = SpaceOperation('jit_merge_point', args, None)
         op2 = SpaceOperation('-live-', [], None)
         # ^^^ we need a -live- for the case of do_recursive_call()
-        return ops + [op1, op2]
+        op3 = SpaceOperation('-live-', [], None)
+        # and one for inlined short preambles
+        return ops + [op3, op1, op2]
 
     def handle_jit_marker__loop_header(self, op, jitdriver):
         jd = self.callcontrol.jitdriver_sd_from_jitdriver(jitdriver)

File pypy/jit/codewriter/test/test_flatten.py

         self.encoding_test(f, [4, 5], """
             -live- %i0, %i1
             int_guard_value %i0
+            -live- %i0, %i1
             jit_merge_point $27, I[%i0], R[], F[], I[%i1], R[], F[]
             -live-
             loop_header $27

File pypy/jit/codewriter/test/test_jtransform.py

     tr = Transformer()
     tr.portal_jd = jd
     oplist = tr.rewrite_operation(op)
-    assert len(oplist) == 6
+    assert len(oplist) == 7
     assert oplist[0].opname == '-live-'
     assert oplist[1].opname == 'int_guard_value'
     assert oplist[1].args   == [v1]
     assert oplist[2].opname == '-live-'
     assert oplist[3].opname == 'int_guard_value'
     assert oplist[3].args   == [v2]
-    assert oplist[4].opname == 'jit_merge_point'
-    assert oplist[4].args[0].value == 42
-    assert list(oplist[4].args[1]) == [v1, v2]
-    assert list(oplist[4].args[4]) == [v3, v4]
-    assert oplist[5].opname == '-live-'
+    assert oplist[4].opname == '-live-'
+    assert oplist[5].opname == 'jit_merge_point'
+    assert oplist[5].args[0].value == 42
+    assert list(oplist[5].args[1]) == [v1, v2]
+    assert list(oplist[5].args[4]) == [v3, v4]
+    assert oplist[6].opname == '-live-'
 
 def test_getfield_gc():
     S = lltype.GcStruct('S', ('x', lltype.Char))

File pypy/jit/metainterp/blackhole.py

 from pypy.jit.codewriter.jitcode import JitCode, SwitchDictDescr
 from pypy.jit.codewriter import heaptracker
 from pypy.jit.metainterp.jitexc import JitException, get_llexception, reraise
-
+from pypy.jit.metainterp.compile import ResumeAtPositionDescr
 
 def arguments(*argtypes, **kwds):
     resulttype = kwds.pop('returns', None)
             assert kind == 'v'
         return lltype.nullptr(rclass.OBJECTPTR.TO)
 
-    def _prepare_resume_from_failure(self, opnum):
+    def _prepare_resume_from_failure(self, opnum, dont_change_position=False):
         from pypy.jit.metainterp.resoperation import rop
         #
         if opnum == rop.GUARD_TRUE:
             # Produced directly by some goto_if_not_xxx() opcode that did not
             # jump, but which must now jump.  The pc is just after the opcode.
-            self.position = self.jitcode.follow_jump(self.position)
+            if not dont_change_position:
+                self.position = self.jitcode.follow_jump(self.position)
         #
         elif opnum == rop.GUARD_FALSE:
             # Produced directly by some goto_if_not_xxx() opcode that jumped,
         jitdriver_sd,
         resumedescr,
         all_virtuals)
+    if isinstance(resumedescr, ResumeAtPositionDescr):
+        dont_change_position = True
+    else:
+        dont_change_position = False
+
     current_exc = blackholeinterp._prepare_resume_from_failure(
-        resumedescr.guard_opnum)
+        resumedescr.guard_opnum, dont_change_position)
+        
     try:
         _run_forever(blackholeinterp, current_exc)
     finally:

File pypy/jit/metainterp/compile.py

 
 # ____________________________________________________________
 
-def compile_new_loop(metainterp, old_loop_tokens, greenkey, start,
-                     full_preamble_needed=True):
+def compile_new_loop(metainterp, old_loop_tokens, greenkey, start, start_resumedescr):
     """Try to compile a new loop by closing the current history back
     to the first operation.
     """
+    full_preamble_needed=True
     history = metainterp.history
     loop = create_empty_loop(metainterp)
     loop.inputargs = history.inputargs
     loop.preamble = create_empty_loop(metainterp, 'Preamble ')
     loop.preamble.inputargs = loop.inputargs
     loop.preamble.token = make_loop_token(len(loop.inputargs), jitdriver_sd)
+    loop.preamble.start_resumedescr = start_resumedescr
 
     try:
         old_loop_token = jitdriver_sd.warmstate.optimize_loop(
         self.copy_all_attrbutes_into(res)
         return res
 
+class ResumeAtPositionDescr(ResumeGuardDescr):
+    #def __init__(self, position):
+    #    self.position = position
+        
+    def _clone_if_mutable(self):
+        res = ResumeAtPositionDescr()
+        self.copy_all_attrbutes_into(res)
+        return res
+
+    def not_handle_fail(self, metainterp_sd, jitdriver_sd):
+        if True or self.must_compile(metainterp_sd, jitdriver_sd):
+            return self._trace_and_compile_from_bridge(metainterp_sd,
+                                                       jitdriver_sd)
+        else:
+            raise NotImplementedError
+            # FIXME: patch blackhole._prepare_resume_from_failure(self, opnum)
+            from pypy.jit.metainterp.blackhole import resume_in_blackhole
+            resume_in_blackhole(metainterp_sd, jitdriver_sd, self)
+            assert 0, "unreachable"
+
 class ResumeGuardForcedDescr(ResumeGuardDescr):
 
     def __init__(self, metainterp_sd, jitdriver_sd):
     new_loop.operations = [op.clone() for op in metainterp.history.operations]
     metainterp_sd = metainterp.staticdata
     state = metainterp.jitdriver_sd.warmstate
+    if isinstance(resumekey, ResumeAtPositionDescr):
+        inline_short_preamble = False
+    else:
+        inline_short_preamble = True
     try:
         target_loop_token = state.optimize_bridge(metainterp_sd,
                                                   old_loop_tokens,
-                                                  new_loop)
+                                                  new_loop, inline_short_preamble)
     except InvalidLoop:
         # XXX I am fairly convinced that optimize_bridge cannot actually raise
         # InvalidLoop
         # know exactly what we must do (ResumeGuardDescr/ResumeFromInterpDescr)
         prepare_last_operation(new_loop, target_loop_token)
         resumekey.compile_and_attach(metainterp, new_loop)
-        compile_known_target_bridges(metainterp, new_loop)
+        #compile_known_target_bridges(metainterp, new_loop)
         record_loop_or_bridge(metainterp_sd, new_loop)
     return target_loop_token
 

File pypy/jit/metainterp/history.py

         cpu.set_future_value_int(j, self.value)
 
     def same_constant(self, other):
-        assert isinstance(other, Const)
-        return self.value == other.getint()
+        if isinstance(other, Const):
+            return self.value == other.getint()
+        return False
 
     def nonnull(self):
         return self.value != 0
         cpu.set_future_value_float(j, self.getfloat())
 
     def same_constant(self, other):
-        assert isinstance(other, ConstFloat)
-        return self.value == other.value
+        if isinstance(other, ConstFloat):
+            return self.value == other.value
+        return False
 
     def nonnull(self):
         return self.value != 0.0
         cpu.set_future_value_ref(j, self.value)
 
     def same_constant(self, other):
-        assert isinstance(other, ConstPtr)
-        return self.value == other.value
+        if isinstance(other, ConstPtr):
+            return self.value == other.value
+        return False
 
     def nonnull(self):
         return bool(self.value)
 ##        return self.value
 
     def same_constant(self, other):
-        assert isinstance(other, ConstObj)
-        return self.value == other.value
+        if isinstance(other, ConstObj):
+            return self.value == other.value
+        return False
 
     def nonnull(self):
         return bool(self.value)

File pypy/jit/metainterp/nounroll_optimize.py

     optimize_loop_1(metainterp_sd, loop, False)
     return None
 
-def optimize_bridge(metainterp_sd, old_loop_tokens, bridge):
+def optimize_bridge(metainterp_sd, old_loop_tokens, bridge, inline_short_preamble):
     debug_start("jit-optimize")
     try:
         return _optimize_bridge(metainterp_sd, old_loop_tokens, bridge)

File pypy/jit/metainterp/optimize.py

 
 # ____________________________________________________________
 
-def optimize_bridge(metainterp_sd, old_loop_tokens, bridge):
+def optimize_bridge(metainterp_sd, old_loop_tokens, bridge, inline_short_preamble=True):
     debug_start("jit-optimize")
     try:
-        return _optimize_bridge(metainterp_sd, old_loop_tokens, bridge)
+        return _optimize_bridge(metainterp_sd, old_loop_tokens, bridge, inline_short_preamble)
     finally:
         debug_stop("jit-optimize")
 
-def _optimize_bridge(metainterp_sd, old_loop_tokens, bridge):
+def _optimize_bridge(metainterp_sd, old_loop_tokens, bridge, inline_short_preamble):
     cpu = metainterp_sd.cpu
     metainterp_sd.logger_noopt.log_loop(bridge.inputargs, bridge.operations)
     if old_loop_tokens:
         old_loop_token = old_loop_tokens[0]
         bridge.operations[-1].setdescr(old_loop_token)   # patch jump target
-        optimize_bridge_1(metainterp_sd, bridge)
+        optimize_bridge_1(metainterp_sd, bridge, inline_short_preamble)
         return old_loop_tokens[0]
         #return bridge.operations[-1].getdescr()
     return None

File pypy/jit/metainterp/optimizeopt/__init__.py

 from pypy.jit.metainterp.optimizeopt.string import OptString
 from pypy.jit.metainterp.optimizeopt.unroll import optimize_unroll, OptInlineShortPreamble
 
-def optimize_loop_1(metainterp_sd, loop, unroll=True):
+def optimize_loop_1(metainterp_sd, loop, unroll=True, inline_short_preamble=True):
     """Optimize loop.operations to remove internal overheadish operations. 
     """
     opt_str = OptString()
-    optimizations = [OptInlineShortPreamble(),
-                     OptIntBounds(),
+    optimizations = [OptIntBounds(),
                      OptRewrite(),
                      OptVirtualize(),
                      opt_str,
                      OptHeap(),
                     ]
+    if inline_short_preamble:
+        optimizations = [OptInlineShortPreamble()] +  optimizations
+        
     if metainterp_sd.jit_ffi:
         from pypy.jit.metainterp.optimizeopt.fficall import OptFfiCall
         optimizations = optimizations + [
         optimizer = Optimizer(metainterp_sd, loop, optimizations)
         optimizer.propagate_all_forward()
 
-def optimize_bridge_1(metainterp_sd, bridge):
+def optimize_bridge_1(metainterp_sd, bridge, inline_short_preamble=True):
     """The same, but for a bridge. """
-    optimize_loop_1(metainterp_sd, bridge, False)
+    optimize_loop_1(metainterp_sd, bridge, False, inline_short_preamble)

File pypy/jit/metainterp/optimizeopt/heap.py

     def __init__(self):
         # cached fields:  {descr: {OptValue_instance: OptValue_fieldvalue}}
         self.cached_fields = {}
+        self.known_heap_fields = {}
         # cached array items:  {descr: CachedArrayItems}
         self.cached_arrayitems = {}
         # lazily written setfields (at most one per descr):  {descr: op}
                 newd[value.get_reconstructed(optimizer, valuemap)] = \
                                        fieldvalue.get_reconstructed(optimizer, valuemap)
             
+        for descr, d in self.known_heap_fields.items():
+            newd = {}
+            new.known_heap_fields[descr] = newd
+            for value, fieldvalue in d.items():
+                newd[value.get_reconstructed(optimizer, valuemap)] = \
+                                       fieldvalue.get_reconstructed(optimizer, valuemap)
+            
         new.cached_arrayitems = {}
         for descr, d in self.cached_arrayitems.items():
             newd = {}
                 if cache.var_index_item:
                     newcache.var_index_item = \
                           cache.var_index_item.get_reconstructed(optimizer, valuemap)
-                if newcache.var_index_indexvalue:
+                if cache.var_index_indexvalue:
                     newcache.var_index_indexvalue = \
                           cache.var_index_indexvalue.get_reconstructed(optimizer, valuemap)
                 for index, fieldvalue in cache.fixed_index_items.items():
                     newcache.fixed_index_items[index] = \
                            fieldvalue.get_reconstructed(optimizer, valuemap)
+
         return new
 
     def clean_caches(self):
         self.cached_fields.clear()
+        self.known_heap_fields.clear()
         self.cached_arrayitems.clear()
 
     def cache_field_value(self, descr, value, fieldvalue, write=False):
                     self.force_lazy_setfield(fielddescr)
                     try:
                         del self.cached_fields[fielddescr]
+                        del self.known_heap_fields[fielddescr]
                     except KeyError:
                         pass
                 for arraydescr in effectinfo.write_descrs_arrays:
         except KeyError:
             return
         del self.lazy_setfields[descr]
-        ###self.optimizer._emit_operation(op)
+        value = self.getvalue(op.getarg(0))
+        fieldvalue = self.getvalue(op.getarg(1))
+        try:
+            heapvalue = self.known_heap_fields[op.getdescr()][value]
+            if fieldvalue is heapvalue:
+                return
+        except KeyError:
+            pass
         self.next_optimization.propagate_forward(op)
-        #
+
         # hackish: reverse the order of the last two operations if it makes
         # sense to avoid a situation like "int_eq/setfield_gc/guard_true",
         # which the backend (at least the x86 backend) does not handle well.
         # default case: produce the operation
         value.ensure_nonnull()
         ###self.optimizer.optimize_default(op)
-        self.emit_operation(op) # FIXME: These might need constant propagation?
+        self.emit_operation(op)
         # then remember the result of reading the field
         fieldvalue = self.getvalue(op.result)
         self.cache_field_value(op.getdescr(), value, fieldvalue)
+        # keep track of what's on the heap
+        d = self.known_heap_fields.setdefault(op.getdescr(), {})
+        d[value] = fieldvalue
 
     def optimize_SETFIELD_GC(self, op):
         value = self.getvalue(op.getarg(0))
         fieldvalue = self.getvalue(op.getarg(1))
-        self.force_lazy_setfield_if_necessary(op, value, write=True)
-        self.lazy_setfields[op.getdescr()] = op
-        # remember the result of future reads of the field
-        self.cache_field_value(op.getdescr(), value, fieldvalue, write=True)
+        cached_fieldvalue = self.read_cached_field(op.getdescr(), value)
+        if fieldvalue is not cached_fieldvalue:
+            self.force_lazy_setfield_if_necessary(op, value, write=True)
+            self.lazy_setfields[op.getdescr()] = op
+            # remember the result of future reads of the field
+            self.cache_field_value(op.getdescr(), value, fieldvalue, write=True)
 
     def optimize_GETARRAYITEM_GC(self, op):
         value = self.getvalue(op.getarg(0))
             self.make_equal_to(op.result, fieldvalue)
             return
         ###self.optimizer.optimize_default(op)
-        self.emit_operation(op) # FIXME: These might need constant propagation?
+        self.emit_operation(op)
         fieldvalue = self.getvalue(op.result)
         self.cache_arrayitem_value(op.getdescr(), value, indexvalue, fieldvalue)
 

File pypy/jit/metainterp/optimizeopt/intbounds.py

             r = self.getvalue(op.result)
             r.intbound.intersect(resbound)
             self.emit_operation(self.nextop)
+            if self.nextop.getopnum() == rop.GUARD_NO_OVERFLOW:
+                # Synthesize the non overflowing op for optimize_default to reuse
+                self.pure(rop.INT_ADD, op.getarglist()[:], op.result)
+                
 
     def optimize_INT_SUB_OVF(self, op):
         v1 = self.getvalue(op.getarg(0))
             r = self.getvalue(op.result)
             r.intbound.intersect(resbound)
             self.emit_operation(self.nextop)
+            if self.nextop.getopnum() == rop.GUARD_NO_OVERFLOW:
+                # Synthesize the non overflowing op for optimize_default to reuse
+                self.pure(rop.INT_SUB, op.getarglist()[:], op.result)
             
     def optimize_INT_MUL_OVF(self, op):
         v1 = self.getvalue(op.getarg(0))
             r = self.getvalue(op.result)
             r.intbound.intersect(resbound)
             self.emit_operation(self.nextop)
+            if self.nextop.getopnum() == rop.GUARD_NO_OVERFLOW:
+                # Synthesize the non overflowing op for optimize_default to reuse
+                self.pure(rop.INT_MUL, op.getarglist()[:], op.result)
+            
 
     def optimize_INT_LT(self, op):
         v1 = self.getvalue(op.getarg(0))

File pypy/jit/metainterp/optimizeopt/unroll.py

 from pypy.rlib.debug import debug_start, debug_stop, debug_print
 from pypy.jit.metainterp.optimizeutil import InvalidLoop, RetraceLoop
 from pypy.jit.metainterp.jitexc import JitException
+from pypy.jit.metainterp.history import make_hashable_int
+from pypy.jit.codewriter.effectinfo import EffectInfo
 
 # Assumptions
 # ===========
            self.argmap[inputargs[i]] = jump_args[i]
         self.snapshot_map = {None: None}
 
-    def inline_op(self, newop, ignore_result=False, clone=True):
+    def inline_op(self, newop, ignore_result=False, clone=True,
+                  ignore_failargs=False):
         if clone:
             newop = newop.clone()
         args = newop.getarglist()
 
         if newop.is_guard():
             args = newop.getfailargs()
-            if args:
+            if args and not ignore_failargs:
                 newop.setfailargs([self.inline_arg(a) for a in args])
+            else:
+                newop.setfailargs([])
 
         if newop.result and not ignore_result:
             old_result = newop.result
         if jumpop:
             assert jumpop.getdescr() is loop.token
             loop.preamble.operations = self.optimizer.newoperations
-
             self.optimizer = self.optimizer.reconstruct_for_next_iteration()
 
             jump_args = jumpop.getarglist()
 
             loop.operations = self.optimizer.newoperations
 
+            start_resumedescr = loop.preamble.start_resumedescr.clone_if_mutable()
+            assert isinstance(start_resumedescr, ResumeGuardDescr)
+            snapshot = start_resumedescr.rd_snapshot
+            while snapshot is not None:
+                snapshot_args = snapshot.boxes 
+                new_snapshot_args = []
+                for a in snapshot_args:
+                    if not isinstance(a, Const):
+                        a = loop.preamble.inputargs[jump_args.index(a)]
+                    new_snapshot_args.append(a)
+                snapshot.boxes = new_snapshot_args
+                snapshot = snapshot.prev
+
             short = self.create_short_preamble(loop.preamble, loop)
             if short:
                 if False:
                     # FIXME: This should save some memory but requires
                     # a lot of tests to be fixed...
                     loop.preamble.operations = short[:]
-                    
+
                 # Turn guards into conditional jumps to the preamble
                 for i in range(len(short)):
                     op = short[i]
                     if op.is_guard():
                         op = op.clone()
-                        op.setfailargs(loop.preamble.inputargs)
-                        op.setjumptarget(loop.preamble.token)
+                        #op.setfailargs(loop.preamble.inputargs)
+                        #op.setjumptarget(loop.preamble.token)
+                        op.setfailargs(None)
+                        op.setdescr(start_resumedescr.clone_if_mutable())
                         short[i] = op
 
                 short_loop = TreeLoop('short preamble')
                 short_loop.inputargs = loop.preamble.inputargs[:]
                 short_loop.operations = short
 
-                assert isinstance(loop.preamble.token, LoopToken)
-                if loop.preamble.token.short_preamble:
-                    loop.preamble.token.short_preamble.append(short_loop)
-                else:
-                    loop.preamble.token.short_preamble = [short_loop]
-
                 # Clone ops and boxes to get private versions and 
                 newargs = [a.clonebox() for a in short_loop.inputargs]
                 inliner = Inliner(short_loop.inputargs, newargs)
                 ops = [inliner.inline_op(op) for op in short_loop.operations]
                 short_loop.operations = ops
 
+                assert isinstance(loop.preamble.token, LoopToken)
+                if loop.preamble.token.short_preamble:
+                    loop.preamble.token.short_preamble.append(short_loop)
+                else:
+                    loop.preamble.token.short_preamble = [short_loop]
+
                 # Forget the values to allow them to be freed
                 for box in short_loop.inputargs:
                     box.forget_value()
                     if op.result:
                         op.result.forget_value()
                 
-                if False:
-                    boxmap = {}
-                    for i in range(len(short_loop.inputargs)):
-                        box = short_loop.inputargs[i]
-                        newbox = box.clonebox()
-                        boxmap[box] = newbox
-                        newbox.forget_value()
-                        short_loop.inputargs[i] = newbox
-                    for i in range(len(short)):
-                        oldop = short[i]
-                        op = oldop.clone()
-                        args = []
-                        for a in op.getarglist():
-                            if not isinstance(a, Const):
-                                a = boxmap[a]
-                            args.append(a)
-                        op.initarglist(args)
-                        if op.is_guard():
-                            args = []
-                            for a in op.getfailargs():
-                                if not isinstance(a, Const):
-                                    a = boxmap[a]
-                                args.append(a)
-                            op.setfailargs(args)
-                        box = op.result
-                        if box:
-                            newbox = box.clonebox()
-                            boxmap[box] = newbox
-                            newbox.forget_value()
-                            op.result = newbox
-                        short[i] = op
-                
 
     def inline(self, loop_operations, loop_args, jump_args):
         self.inliner = inliner = Inliner(loop_args, jump_args)
             box1, box2 = args1[i], args2[i]
             val1 = self.optimizer.getvalue(box1)
             val2 = self.optimizer.getvalue(box2)
-            if val1 is not val2:
+            if val1.is_constant() and val2.is_constant():
+                if not val1.box.same_constant(val2.box):
+                    return False
+            elif val1 is not val2:
                 return False
 
         if not op1.is_guard():
         loop_ops = loop.operations
 
         boxmap = BoxMap()
-        state = ExeState()
+        state = ExeState(self.optimizer)
         short_preamble = []
         loop_i = preamble_i = 0
         while preamble_i < len(preamble_ops):
 
             op = preamble_ops[preamble_i]
             try:
-                newop = self.inliner.inline_op(op, True)
+                newop = self.inliner.inline_op(op, ignore_result=True,
+                                               ignore_failargs=True)
             except KeyError:
                 debug_print("create_short_preamble failed due to",
                             "new boxes created during optimization.",
                             "op:", op.getopnum(),
-                            "at position: ", preamble_i)
+                            "at preamble position: ", preamble_i,
+                            "loop position: ", loop_i)
                 return None
                 
             if self.sameop(newop, loop_ops[loop_i]) \
                     debug_print("create_short_preamble failed due to",
                                 "impossible link of "
                                 "op:", op.getopnum(),
-                                "at position: ", preamble_i)
+                                "at preamble position: ", preamble_i,
+                                "loop position: ", loop_i)
                     return None
                 loop_i += 1
             else:
                 if not state.safe_to_move(op):
                     debug_print("create_short_preamble failed due to",
                                 "unsafe op:", op.getopnum(),
-                                "at position: ", preamble_i)
+                                "at preamble position: ", preamble_i,
+                                "loop position: ", loop_i)
                     return None
                 short_preamble.append(op)
                 
         return short_preamble
 
 class ExeState(object):
-    def __init__(self):
+    def __init__(self, optimizer):
+        self.optimizer = optimizer
         self.heap_dirty = False
         self.unsafe_getitem = {}
-
+        self.unsafe_getarrayitem = {}
+        self.unsafe_getarrayitem_indexes = {}
+        
     # Make sure it is safe to move the instrucions in short_preamble
     # to the top making short_preamble followed by loop equvivalent
     # to preamble
     def safe_to_move(self, op):
         opnum = op.getopnum()
+        descr = op.getdescr()
         if op.is_always_pure() or op.is_foldable_guard():
             return True
         elif opnum == rop.JUMP:
               opnum == rop.GETFIELD_RAW):
             if self.heap_dirty:
                 return False
-            descr = op.getdescr()
             if descr in self.unsafe_getitem:
                 return False
             return True
+        elif (opnum == rop.GETARRAYITEM_GC or
+              opnum == rop.GETARRAYITEM_RAW):
+            if self.heap_dirty:
+                return False
+            if descr in self.unsafe_getarrayitem:
+                return False
+            index = op.getarg(1)
+            if isinstance(index, Const):
+                d = self.unsafe_getarrayitem_indexes.get(descr, None)
+                if d is not None:
+                    if index.getint() in d:
+                        return False
+            else:
+                if descr in self.unsafe_getarrayitem_indexes:
+                    return False
+            return True
+        elif opnum == rop.CALL:
+            effectinfo = descr.get_extra_info()
+            if effectinfo is not None:
+                if effectinfo.extraeffect == EffectInfo.EF_LOOPINVARIANT or \
+                   effectinfo.extraeffect == EffectInfo.EF_PURE:
+                    return True
         return False
     
     def update(self, op):
             op.is_guard()): 
             return
         opnum = op.getopnum()
+        descr = op.getdescr()
         if (opnum == rop.DEBUG_MERGE_POINT):
             return
         if (opnum == rop.SETFIELD_GC or
             opnum == rop.SETFIELD_RAW):
-            descr = op.getdescr()
             self.unsafe_getitem[descr] = True
             return
+        if (opnum == rop.SETARRAYITEM_GC or
+            opnum == rop.SETARRAYITEM_RAW):
+            index = op.getarg(1)
+            if isinstance(index, Const):                
+                d = self.unsafe_getarrayitem_indexes.get(descr, None)
+                if d is None:
+                    d = self.unsafe_getarrayitem_indexes[descr] = {}
+                d[index.getint()] = True
+            else:
+                self.unsafe_getarrayitem[descr] = True
+            return
+        if opnum == rop.CALL:
+            effectinfo = descr.get_extra_info()
+            if effectinfo is not None:
+                for fielddescr in effectinfo.write_descrs_fields:
+                    self.unsafe_getitem[fielddescr] = True
+                for arraydescr in effectinfo.write_descrs_arrays:
+                    self.unsafe_getarrayitem[arraydescr] = True
+                return
+        debug_print("heap dirty due to op ", opnum)
         self.heap_dirty = True
 
 class ImpossibleLink(JitException):
             assert isinstance(descr, LoopToken)
             # FIXME: Use a tree, similar to the tree formed by the full
             # preamble and it's bridges, instead of a list to save time and
-            # memory  
+            # memory. This should also allow better behaviour in
+            # situations that the is_emittable() chain currently cant
+            # handle and the inlining fails unexpectedly belwo.
             short = descr.short_preamble
             if short:
-                for sh in short:                    
+                for sh in short:
                     if self.inline(sh.operations, sh.inputargs,
                                    op.getarglist(), dryrun=True):
-                        self.inline(sh.operations, sh.inputargs,
-                                   op.getarglist())
+                        try:
+                            self.inline(sh.operations, sh.inputargs,
+                                        op.getarglist())
+                        except InvalidLoop:
+                            debug_print("Inlining failed unexpectedly",
+                                        "jumping to preamble instead")
+                            self.emit_operation(op)
                         return
                     
                 raise RetraceLoop
             newop = inliner.inline_op(op)
             
             if not dryrun:
-                # FIXME: Emit a proper guard instead to move these
-                # forceings into the the small bridge back to the preamble
-                if newop.is_guard():
-                    failargs = newop.getfailargs()
-                    for i in range(len(failargs)):
-                        box = failargs[i]
-                        if box in self.optimizer.values:
-                            value = self.optimizer.values[box]
-                            if value.is_constant():
-                                newbox = box.clonebox()
-                                op = ResOperation(rop.SAME_AS,
-                                                  [value.force_box()], newbox)
-                                self.optimizer.newoperations.append(op)
-                                box = newbox
-                            else:
-                                box = value.force_box()
-                        failargs[i] = box
-                    newop.setfailargs(failargs)
-                                
-                
                 self.emit_operation(newop)
             else:
                 if not self.is_emittable(newop):

File pypy/jit/metainterp/pyjitpl.py

 from pypy.jit.metainterp.jitexc import JitException, get_llexception
 from pypy.rlib.rarithmetic import intmask
 from pypy.rlib.objectmodel import specialize
-from pypy.jit.codewriter.jitcode import JitCode, SwitchDictDescr
+from pypy.jit.codewriter.jitcode import JitCode, SwitchDictDescr, MissingLiveness
 from pypy.jit.codewriter import heaptracker
 from pypy.jit.metainterp.optimizeutil import RetraceLoop
 
         self.generate_guard(rop.GUARD_CLASS, box, [clsbox], resumepc=orgpc)
         return clsbox
 
-    @arguments("int")
-    def opimpl_loop_header(self, jdindex):
+    @arguments("int", "orgpc")
+    def opimpl_loop_header(self, jdindex, orgpc):
         self.metainterp.seen_loop_header_for_jdindex = jdindex
 
     def verify_green_args(self, jitdriver_sd, varargs):
     @arguments("orgpc", "int", "boxes3", "jitcode_position", "boxes3")
     def opimpl_jit_merge_point(self, orgpc, jdindex, greenboxes,
                                jcposition, redboxes):
+        resumedescr = compile.ResumeAtPositionDescr()
+        self.capture_resumedata(resumedescr, orgpc)
+        
         any_operation = len(self.metainterp.history.operations) > 0
         jitdriver_sd = self.metainterp.staticdata.jitdrivers_sd[jdindex]
         self.verify_green_args(jitdriver_sd, greenboxes)
         # xxx we may disable the following line in some context later
         self.debug_merge_point(jitdriver_sd, self.metainterp.in_recursion,
                                greenboxes)
+
         if self.metainterp.seen_loop_header_for_jdindex < 0:
             if not jitdriver_sd.no_loop_header or not any_operation:
                 return
             "found a loop_header for a JitDriver that does not match "
             "the following jit_merge_point's")
         self.metainterp.seen_loop_header_for_jdindex = -1
+        
         #
         if not self.metainterp.in_recursion:
             assert jitdriver_sd is self.metainterp.jitdriver_sd
             # much less expensive to blackhole out of.
             saved_pc = self.pc
             self.pc = orgpc
-            self.metainterp.reached_loop_header(greenboxes, redboxes)
+            self.metainterp.reached_loop_header(greenboxes, redboxes, resumedescr)
             self.pc = saved_pc
             # no exception, which means that the jit_merge_point did not
             # close the loop.  We have to put the possibly-modified list
             resumedescr = compile.ResumeGuardDescr()
         guard_op = metainterp.history.record(opnum, moreargs, None,
                                              descr=resumedescr)
+        self.capture_resumedata(resumedescr, resumepc)
+        self.metainterp.staticdata.profiler.count_ops(opnum, GUARDS)
+        # count
+        metainterp.attach_debug_info(guard_op)
+        return guard_op
+
+    def capture_resumedata(self, resumedescr, resumepc=-1):
+        metainterp = self.metainterp
         virtualizable_boxes = None
         if (metainterp.jitdriver_sd.virtualizable_info is not None or
             metainterp.jitdriver_sd.greenfield_info is not None):
             virtualizable_boxes = metainterp.virtualizable_boxes
         saved_pc = self.pc
-        if resumepc >= 0:
-            self.pc = resumepc
-        resume.capture_resumedata(metainterp.framestack, virtualizable_boxes,
-                                  metainterp.virtualref_boxes, resumedescr)
-        self.pc = saved_pc
-        self.metainterp.staticdata.profiler.count_ops(opnum, GUARDS)
-        # count
-        metainterp.attach_debug_info(guard_op)
-        return guard_op
+        try:
+            if resumepc >= 0:
+                self.pc = resumepc
+            resume.capture_resumedata(metainterp.framestack, virtualizable_boxes,
+                                      metainterp.virtualref_boxes, resumedescr)
+        finally:
+            self.pc = saved_pc
 
     def implement_guard_value(self, orgpc, box):
         """Promote the given Box into a Const.  Note: be careful, it's a
                     self.execute_varargs(rop.CALL, allboxes, descr, False))
             elif effect == effectinfo.EF_LOOPINVARIANT:
                 return self.execute_varargs(rop.CALL_LOOPINVARIANT, allboxes,
-                                            descr, True)
+                                            descr, False)
             else:
                 return self.execute_varargs(rop.CALL, allboxes, descr, True)
 
         self.current_merge_points = []
         self.resumekey = key
         self.seen_loop_header_for_jdindex = -1
-        try:
-            self.prepare_resume_from_failure(key.guard_opnum)
+        if isinstance(key, compile.ResumeAtPositionDescr):
+            self.seen_loop_header_for_jdindex = self.jitdriver_sd.index
+            dont_change_position = True
+        else:
+            dont_change_position = False
+        try:            
+            self.prepare_resume_from_failure(key.guard_opnum, dont_change_position)
             if self.resumekey_original_loop_token is None:   # very rare case
                 raise SwitchToBlackhole(ABORT_BRIDGE)
             self.interpret()
             else:
                 duplicates[box] = None
 
-    def reached_loop_header(self, greenboxes, redboxes):
+    def reached_loop_header(self, greenboxes, redboxes, resumedescr):
         duplicates = {}
         self.remove_consts_and_duplicates(redboxes, len(redboxes),
                                           duplicates)
                     bridge_arg_boxes = self.retracing_loop_from.live_arg_boxes
                     self.compile_bridge_and_loop(original_boxes, \
                                                  live_arg_boxes, start,
-                                                 bridge_arg_boxes)
+                                                 bridge_arg_boxes, resumedescr)
                 else:
-                    self.compile(original_boxes, live_arg_boxes, start)
+                    self.compile(original_boxes, live_arg_boxes, start, resumedescr)
                 # creation of the loop was cancelled!
                 #self.staticdata.log('cancelled, tracing more...')
                 self.staticdata.log('cancelled, stopping tracing')
         history.set_future_values(self.cpu, residual_args)
         return loop_token
 
-    def prepare_resume_from_failure(self, opnum):
+    def prepare_resume_from_failure(self, opnum, dont_change_position=False):
         frame = self.framestack[-1]
         if opnum == rop.GUARD_TRUE:     # a goto_if_not that jumps only now
-            frame.pc = frame.jitcode.follow_jump(frame.pc)
+            if not dont_change_position:
+                frame.pc = frame.jitcode.follow_jump(frame.pc)
         elif opnum == rop.GUARD_FALSE:     # a goto_if_not that stops jumping
             pass
         elif opnum == rop.GUARD_VALUE or opnum == rop.GUARD_CLASS:
         cell = self.jitdriver_sd.warmstate.jit_cell_at_key(greenkey)
         cell.set_compiled_merge_points(looptokens)
 
-    def compile(self, original_boxes, live_arg_boxes, start):
+    def compile(self, original_boxes, live_arg_boxes, start, start_resumedescr):
         num_green_args = self.jitdriver_sd.num_green_args
         self.history.inputargs = original_boxes[num_green_args:]
         greenkey = original_boxes[:num_green_args]
         old_loop_tokens = self.get_compiled_merge_points(greenkey)
         self.history.record(rop.JUMP, live_arg_boxes[num_green_args:], None)
         loop_token = compile.compile_new_loop(self, old_loop_tokens,
-                                              greenkey, start)
+                                              greenkey, start, start_resumedescr)
         if loop_token is not None: # raise if it *worked* correctly
             self.set_compiled_merge_points(greenkey, old_loop_tokens)
             raise GenerateMergePoint(live_arg_boxes, loop_token)
             self.history.operations.pop()     # remove the JUMP
 
     def compile_bridge_and_loop(self, original_boxes, live_arg_boxes, start,
-                                bridge_arg_boxes):
+                                bridge_arg_boxes, start_resumedescr):
         num_green_args = self.jitdriver_sd.num_green_args
         original_inputargs = self.history.inputargs
         greenkey = original_boxes[:num_green_args]
         self.history.inputargs = original_boxes[num_green_args:]
         greenkey = original_boxes[:num_green_args]
         self.history.record(rop.JUMP, live_arg_boxes[num_green_args:], None)
-        loop_token = compile.compile_new_loop(self, [], greenkey, start)
+        loop_token = compile.compile_new_loop(self, [], greenkey, start, start_resumedescr)
         self.history.operations.pop()     # remove the JUMP
         if loop_token is None:
             return

File pypy/jit/metainterp/simple_optimize.py

             jumpop.setdescr(loop.token)
         return None
 
-def optimize_bridge(metainterp_sd, old_loops, loop):
+def optimize_bridge(metainterp_sd, old_loops, loop, inline_short_preamble):
     optimize_loop(metainterp_sd, [], loop)
     jumpop = loop.operations[-1]
     if jumpop.getopnum() == rop.JUMP:

File pypy/jit/metainterp/test/test_basic.py

                           'int_add': 1, 'int_mul': 1, 'int_sub': 2,
                           'int_gt': 2, 'jump': 2})
 
+    def test_multiple_specialied_versions_array(self):
+        myjitdriver = JitDriver(greens = [], reds = ['idx', 'y', 'x', 'res',
+                                                     'array'])
+        class Base:
+            def __init__(self, val):
+                self.val = val
+        class A(Base):
+            def binop(self, other):
+                return A(self.val + other.val)
+        class B(Base):
+            def binop(self, other):
+                return B(self.val - other.val)
+        def f(x, y):
+            res = x
+            array = [1, 2, 3]
+            array[1] = 7
+            idx = 0
+            while y > 0:
+                myjitdriver.can_enter_jit(idx=idx, y=y, x=x, res=res,
+                                          array=array)
+                myjitdriver.jit_merge_point(idx=idx, y=y, x=x, res=res,
+                                            array=array)
+                res = res.binop(x)
+                res.val += array[idx] + array[1]
+                if y < 7:
+                    idx = 2
+                y -= 1
+            return res
+        def g(x, y):
+            a1 = f(A(x), y)
+            a2 = f(A(x), y)
+            b1 = f(B(x), y)
+            b2 = f(B(x), y)
+            assert a1.val == a2.val
+            assert b1.val == b2.val
+            return a1.val + b1.val
+        res = self.meta_interp(g, [6, 14])
+        assert res == g(6, 14)
+        self.check_loop_count(8)
+        self.check_loops(getarrayitem_gc=16, everywhere=True)
+
     def test_multiple_specialied_versions_bridge(self):
         myjitdriver = JitDriver(greens = [], reds = ['y', 'x', 'z', 'res'])
         class Base:
         def g(x, y):
             a1 = f(A(x), y, A(x))
             a2 = f(A(x), y, A(x))
+            assert a1.val == a2.val
             b1 = f(B(x), y, B(x))
             b2 = f(B(x), y, B(x))
+            assert b1.val == b2.val
             c1 = f(B(x), y, A(x))
             c2 = f(B(x), y, A(x))
+            assert c1.val == c2.val
             d1 = f(A(x), y, B(x))
             d2 = f(A(x), y, B(x))
-            assert a1.val == a2.val
-            assert b1.val == b2.val
-            assert c1.val == c2.val
             assert d1.val == d2.val
             return a1.val + b1.val + c1.val + d1.val
         res = self.meta_interp(g, [3, 14])
         assert res == g(3, 14)
 
+    def test_failing_inlined_guard(self):
+        myjitdriver = JitDriver(greens = [], reds = ['y', 'x', 'z', 'res'])
+        class Base:
+            def __init__(self, val):
+                self.val = val
+            def getval(self):
+                return self.val
+        class A(Base):
+            def binop(self, other):
+                return A(self.getval() + other.getval())
+        class B(Base):
+            def binop(self, other):
+                return B(self.getval() * other.getval())
+        def f(x, y, z):
+            res = x
+            while y > 0:
+                myjitdriver.can_enter_jit(y=y, x=x, z=z, res=res)
+                myjitdriver.jit_merge_point(y=y, x=x, z=z, res=res)
+                res = res.binop(x)
+                y -= 1
+                if y < 8:
+                    x = z
+            return res
+        def g(x, y):
+            c1 = f(A(x), y, B(x))
+            c2 = f(A(x), y, B(x))
+            assert c1.val == c2.val
+            return c1.val
+        res = self.meta_interp(g, [3, 16])
+        assert res == g(3, 16)
+
+    def test_inlined_guard_in_short_preamble(self):
+        myjitdriver = JitDriver(greens = [], reds = ['y', 'x', 'z', 'res'])
+        class A:
+            def __init__(self, val):
+                self.val = val
+            def getval(self):
+                return self.val
+            def binop(self, other):
+                return A(self.getval() + other.getval())
+        def f(x, y, z):
+            res = x
+            while y > 0:
+                myjitdriver.can_enter_jit(y=y, x=x, z=z, res=res)
+                myjitdriver.jit_merge_point(y=y, x=x, z=z, res=res)
+                res = res.binop(x)
+                y -= 1
+                if y < 7:
+                    x = z
+            return res
+        def g(x, y):
+            a1 = f(A(x), y, A(x))
+            a2 = f(A(x), y, A(x))
+            assert a1.val == a2.val
+            return a1.val
+        res = self.meta_interp(g, [3, 14])
+        assert res == g(3, 14)
+
     def test_specialied_bridge(self):
         myjitdriver = JitDriver(greens = [], reds = ['y', 'x', 'res'])
         class A:
         self.check_loops(guard_true=4, guard_class=0, int_add=2, int_mul=2,
                          guard_false=2)
 
+    def test_dont_trace_every_iteration(self):
+        myjitdriver = JitDriver(greens = [], reds = ['a', 'b', 'i', 'sa'])
+        
+        def main(a, b):
+            i = sa = 0
+            #while i < 200:
+            while i < 200:
+                myjitdriver.can_enter_jit(a=a, b=b, i=i, sa=sa)
+                myjitdriver.jit_merge_point(a=a, b=b, i=i, sa=sa)
+                if a > 0: pass
+                if b < 2: pass
+                sa += a % b
+                i += 1
+            return sa
+        def g():
+            return main(10, 20) + main(-10, -20)
+        res = self.meta_interp(g, [])
+        assert res == g()
+        self.check_enter_count(3)
+
     def test_current_trace_length(self):
         myjitdriver = JitDriver(greens = ['g'], reds = ['x'])
         @dont_look_inside

File pypy/jit/metainterp/test/test_compile.py

     metainterp.history.inputargs = loop.inputargs[:]
     #
     loop_tokens = []
-    loop_token = compile_new_loop(metainterp, loop_tokens, [], 0)
+    loop_token = compile_new_loop(metainterp, loop_tokens, [], 0, None)
     assert loop_tokens == [loop_token]
     assert loop_token.number == 1
     assert staticdata.globaldata.loopnumbering == 2
     metainterp.history.operations = loop.operations[:]
     metainterp.history.inputargs = loop.inputargs[:]
     #
-    loop_token_2 = compile_new_loop(metainterp, loop_tokens, [], 0)
+    loop_token_2 = compile_new_loop(metainterp, loop_tokens, [], 0, None)
     assert loop_token_2 is loop_token
     assert loop_tokens == [loop_token]
     assert len(cpu.seen) == 0

File pypy/jit/metainterp/test/test_optimizebasic.py

         guard_false(i2) []
         i3 = int_add(i1, 1)
         i331 = force_token()
-        setfield_gc(p0, i1, descr=valuedescr)
         jump(p0, i22)
         """
         self.optimize_loop(ops, expected)
         guard_no_overflow() []
         i2 = int_gt(i1, 1)
         guard_true(i2) []
-        i3 = int_sub(1, i0)
         jump(i0)
         """
         self.optimize_loop(ops, expected)

File pypy/jit/metainterp/test/test_optimizeopt.py

             metainterp_sd.virtualref_info = self.vrefinfo
         if hasattr(self, 'callinfocollection'):
             metainterp_sd.callinfocollection = self.callinfocollection
+        class FakeDescr(compile.ResumeGuardDescr):
+            class rd_snapshot:
+                class prev:
+                    prev = None
+                    boxes = []
+                boxes = []
+            def clone_if_mutable(self):
+                return self
+        loop.preamble.start_resumedescr = FakeDescr()
         optimize_loop_1(metainterp_sd, loop)
         #
 
         guard_nonnull(p0) []
         jump(p0)
         """
-        expected = """
+        preamble = """
         [p0]
         setfield_gc(p0, 5, descr=valuedescr)
         jump(p0)
         """
-        self.optimize_loop(ops, expected)
+        expected = """
+        [p0]
+        jump(p0)
+        """
+        self.optimize_loop(ops, expected, preamble)
 
     def test_const_guard_value(self):
         ops = """
 
     def test_call_assembler_invalidates_caches(self):
         ops = '''
-        [p1, i1]
+        [p1, i1, i4]
         setfield_gc(p1, i1, descr=valuedescr)
         i3 = call_assembler(i1, descr=asmdescr)
         setfield_gc(p1, i3, descr=valuedescr)
-        jump(p1, i3)
+        jump(p1, i4, i3)
         '''
-        self.optimize_loop(ops, ops)
+        self.optimize_loop(ops, ops, ops)
+
+    def test_call_assembler_invalidates_heap_knowledge(self):
+        ops = '''
+        [p1, i1, i4]
+        setfield_gc(p1, i1, descr=valuedescr)
+        i3 = call_assembler(i1, descr=asmdescr)
+        setfield_gc(p1, i1, descr=valuedescr)
+        jump(p1, i4, i3)
+        '''
+        self.optimize_loop(ops, ops, ops)
 
     def test_call_pure_invalidates_caches(self):
         # CALL_PURE should still force the setfield_gc() to occur before it
         ops = '''
-        [p1, i1]
+        [p1, i1, i4]
         setfield_gc(p1, i1, descr=valuedescr)
         i3 = call_pure(42, p1, descr=plaincalldescr)
         setfield_gc(p1, i3, descr=valuedescr)
-        jump(p1, i3)
+        jump(p1, i4, i3)
         '''
         expected = '''
-        [p1, i1]
+        [p1, i1, i4]
         setfield_gc(p1, i1, descr=valuedescr)
         i3 = call(p1, descr=plaincalldescr)
         setfield_gc(p1, i3, descr=valuedescr)
-        jump(p1, i3)
+        jump(p1, i4, i3)
         '''
-        self.optimize_loop(ops, expected)
+        self.optimize_loop(ops, expected, expected)
+
+    def test_call_pure_invalidates_heap_knowledge(self):
+        # CALL_PURE should still force the setfield_gc() to occur before it
+        ops = '''
+        [p1, i1, i4]
+        setfield_gc(p1, i1, descr=valuedescr)
+        i3 = call_pure(42, p1, descr=plaincalldescr)
+        setfield_gc(p1, i1, descr=valuedescr)
+        jump(p1, i4, i3)
+        '''
+        expected = '''
+        [p1, i1, i4]
+        setfield_gc(p1, i1, descr=valuedescr)
+        i3 = call(p1, descr=plaincalldescr)
+        setfield_gc(p1, i1, descr=valuedescr)
+        jump(p1, i4, i3)
+        '''
+        self.optimize_loop(ops, expected, expected)
 
     def test_call_pure_constant_folding(self):
         # CALL_PURE is not marked as is_always_pure(), because it is wrong
         """
         expected = """
         [i0]
-        i2 = int_add(i0, 10)
         jump(i0)
         """
         self.optimize_loop(ops, expected, preamble)
         jump(p0, i22)
         """
         expected = """
-        [p0, i22, i1]
+        [p0, i22]
         i331 = force_token()
-        setfield_gc(p0, i1, descr=valuedescr)
-        jump(p0, i22, i1)
+        jump(p0, i22)
         """
         self.optimize_loop(ops, expected)
 
         guard_no_overflow() []
         i2 = int_gt(i1, 1)
         guard_true(i2) []
-        i3 = int_sub(1, i0)
         jump(i0)
         """
         expected = """
         guard_true(i4) []
         i5 = int_gt(i3, 2)
         guard_true(i5) []
-        jump(i0, i1, i22)
-        """
-        expected = """
-        [i0, i1, i22]
-        i3 = int_mul(i22, i1)
-        i4 = int_lt(i3, 10)
-        guard_true(i4) []
-        i5 = int_gt(i3, 2)
-        guard_true(i5) []
-        jump(i0, i1, i22)
+        jump(i0, i1)
+        """
+        expected = """
+        [i0, i1]
+        jump(i0, i1)
         """
         self.optimize_loop(ops, expected, preamble)
 
         guard_true(i4) []
         i5 = int_ge(i3, 2)
         guard_true(i5) []
-        jump(i0, i1, i2)
-        """
-        expected = """
-        [i0, i1, i2]
-        i3 = int_sub(i2, i1)
-        i4 = int_le(i3, 10)
-        guard_true(i4) []
-        i5 = int_ge(i3, 2)
-        guard_true(i5) []
-        jump(i0, i1, i2)
+        jump(i0, i1)
+        """
+        expected = """
+        [i0, i1]
+        jump(i0, i1)
         """
         self.optimize_loop(ops, expected, preamble)
 
+    def test_invariant_ovf(self):
+        ops = """
+        [i0, i1, i10, i11, i12]
+        i2 = int_add_ovf(i0, i1)
+        guard_no_overflow() []
+        i3 = int_sub_ovf(i0, i1)
+        guard_no_overflow() []
+        i4 = int_mul_ovf(i0, i1)
+        guard_no_overflow() []
+        i24 = int_mul_ovf(i10, i11)
+        guard_no_overflow() []
+        i23 = int_sub_ovf(i10, i11)
+        guard_no_overflow() []
+        i22 = int_add_ovf(i10, i11)
+        guard_no_overflow() []
+        jump(i0, i1, i2, i3, i4)
+        """
+        expected = """
+        [i0, i1, i10, i11, i12]
+        i24 = int_mul_ovf(i10, i11)
+        guard_no_overflow() []
+        i23 = int_sub_ovf(i10, i11)
+        guard_no_overflow() []
+        i22 = int_add_ovf(i10, i11)
+        guard_no_overflow() []
+        jump(i0, i1, i10, i11, i12)
+        """
+        self.optimize_loop(ops, expected, ops)
+
     def test_value_proven_to_be_constant_after_two_iterations(self):
         class FakeDescr(AbstractDescr):
             def __init__(self, name):
         """
         self.optimize_loop(ops, expected)
 
+    def test_let_getfield_kill_setfields(self):
+        ops = """
+        [p0]
+        p1 = getfield_gc(p0, descr=valuedescr)
+        setfield_gc(p0, p1, descr=valuedescr)
+        setfield_gc(p0, p1, descr=valuedescr)
+        setfield_gc(p0, p0, descr=valuedescr)        
+        jump(p0)
+        """
+        preamble = """
+        [p0]
+        p1 = getfield_gc(p0, descr=valuedescr)
+        setfield_gc(p0, p0, descr=valuedescr)                
+        jump(p0)
+        """
+        expected = """
+        [p0]
+        jump(p0)
+        """
+        self.optimize_loop(ops, expected, preamble)
+
+    def test_let_getfield_kill_chained_setfields(self):
+        ops = """
+        [p0]
+        p1 = getfield_gc(p0, descr=valuedescr)
+        setfield_gc(p0, p0, descr=valuedescr)        
+        setfield_gc(p0, p1, descr=valuedescr)
+        setfield_gc(p0, p1, descr=valuedescr)
+        jump(p0)
+        """
+        preamble = """
+        [p0]
+        p1 = getfield_gc(p0, descr=valuedescr)
+        jump(p0)
+        """
+        expected = """
+        [p0]
+        jump(p0)
+        """
+        self.optimize_loop(ops, expected, preamble)
+
     def test_inputargs_added_by_forcing_jumpargs(self):
         # FXIME: Can this occur?
         ops = """
         # more generally, supporting non-constant but virtual cases is
         # not obvious, because of the exception UnicodeDecodeError that
         # can be raised by ll_str2unicode()
+        
 
 
 

File pypy/jit/tl/pypyjit_demo.py

     import pypyjit
     pypyjit.set_param(threshold=3, inlining=True)
 
-    def main():
-        i=a=0
-        while i<10:
-            i+=1
-            a+=1
-        return a
+    def sqrt(y, n=10000):
+        x = y / 2
+        while n > 0:
+            #assert y > 0 and x > 0
+            if y > 0 and x > 0: pass
+            n -= 1
+            x = (x + y/x) / 2
+        return x
 
-    print main()
+    print sqrt(1234, 4)
     
 except Exception, e:
     print "Exception: ", type(e)

File pypy/module/pypyjit/test/test_pypy_c.py

         _, self.sliced_entrybridge, _ = \
                                     self.parse_rawloops(self.rawentrybridges)
 
+        from pypy.jit.tool.jitoutput import parse_prof
+        summaries  = logparser.extract_category(log, 'jit-summary')
+        if len(summaries) > 0:
+            self.jit_summary = parse_prof(summaries[-1])
+        else:
+            self.jit_summary = None
+        
+
     def parse_rawloops(self, rawloops):
         from pypy.jit.tool.oparser import parse
         loops = [parse(part, no_namespace=True) for part in rawloops]
                          count_debug_merge_point=False)
         
     def test_mod(self):
-        py.test.skip('Results are correct, but traces 1902 times (on trunk too).')
         avalues = ('a', 'b', 7, -42, 8)
         bvalues = ['b'] + range(-10, 0) + range(1,10)
         code = ''
 %s
                 i += 1
             return sa
-        ''' % code, 0, ([a1, b1], 2000 * res1),
+        ''' % code, 150, ([a1, b1], 2000 * res1),
                          ([a2, b2], 2000 * res2),
                          ([a3, b3], 2000 * res3),
                          count_debug_merge_point=False)
         
+    def test_dont_trace_every_iteration(self):
+        self.run_source('''
+        def main(a, b):
+            i = sa = 0
+            while i < 200:
+                if a > 0: pass
+                if 1 < b < 2: pass
+                sa += a % b
+                i += 1
+            return sa
+        ''', 11,  ([10, 20], 200 * (10 % 20)),
+                 ([-10, -20], 200 * (-10 % -20)),
+                        count_debug_merge_point=False)
+        assert self.jit_summary.tracing_no == 2
     def test_id_compare_optimization(self):
         # XXX: lower the instruction count, 35 is the old value.
         self.run_source("""