Commits

Maciej Fijalkowski committed cd6fe2b

(arigo, fijal, alex lurking) Implement asmgcc for this branch

Comments (0)

Files changed (6)

rpython/jit/backend/llsupport/assembler.py

         self._build_wb_slowpath(True)
         self._build_wb_slowpath(False, for_frame=True)
         # only one of those
-        self._build_stack_check_failure()
+        self.build_frame_realloc_slowpath()
         if self.cpu.supports_floats:
             self._build_failure_recovery(False, withfloats=True)
             self._build_failure_recovery(True, withfloats=True)

rpython/jit/backend/llsupport/gc.py

         descrs = JitFrameDescrs()
         descrs.arraydescr = cpu.arraydescrof(jitframe.JITFRAME)
         for name in ['jf_descr', 'jf_guard_exc', 'jf_force_descr',
-                     'jf_frame_info', 'jf_gcmap']:
+                     'jf_frame_info', 'jf_gcmap', 'jf_extra_stack_depth']:
             setattr(descrs, name, cpu.fielddescrof(jitframe.JITFRAME, name))
         descrs.jfi_frame_size = cpu.fielddescrof(jitframe.JITFRAMEINFO,
                                                   'jfi_frame_size')
         translator = self.translator
         self.layoutbuilder = framework.TransformerLayoutBuilder(translator)
         self.layoutbuilder.delay_encoding()
-        # XXX this can probably die horrible death
         translator._jit2gc = {'layoutbuilder': self.layoutbuilder}
 
     def _setup_gcclass(self):
 
     def _setup_tid(self):
         self.fielddescr_tid = get_field_descr(self, self.GCClass.HDR, 'tid')
+        frame_tid = self.layoutbuilder.get_type_id(jitframe.JITFRAME)
+        self.translator._jit2gc['frame_tid'] = frame_tid
 
     def _setup_write_barrier(self):
         self.WB_FUNCPTR = lltype.Ptr(lltype.FuncType(

rpython/jit/backend/llsupport/jitframe.py

     ('jf_force_descr', llmemory.GCREF),
     # a map of GC pointers
     ('jf_gcmap', lltype.Ptr(GCMAP)),
+    # how much we decrease stack pointer. Used around calls and malloc slowpath
+    ('jf_extra_stack_depth', lltype.Signed),
     # For the front-end: a GCREF for the savedata
     ('jf_savedata', llmemory.GCREF),
     # For GUARD_(NO)_EXCEPTION and GUARD_NOT_FORCED: the exception we
 LENGTHOFS = llmemory.arraylengthoffset(JITFRAME.jf_frame)
 SIGN_SIZE = llmemory.sizeof(lltype.Signed)
 UNSIGN_SIZE = llmemory.sizeof(lltype.Unsigned)
+STACK_DEPTH_OFS = getofs('jf_extra_stack_depth')
 
 def jitframe_trace(obj_addr, prev):
     if prev == llmemory.NULL:

rpython/jit/backend/x86/assembler.py

         self.float_const_neg_addr = float_constants
         self.float_const_abs_addr = float_constants + 16
 
-    def _build_stack_check_failure(self):
+    def build_frame_realloc_slowpath(self):
         mc = codebuf.MachineCodeBlockWrapper()
         self._push_all_regs_to_frame(mc, [], self.cpu.supports_floats)
         # this is the gcmap stored by push_gcmap(mov=True) in _check_stack_frame
             mc.MOV_sr(0, ebp.value)
         # align
 
+        extra_ofs = self.cpu.get_ofs_of_frame_field('jf_extra_stack_depth')
+        mc.MOV_bi(extra_ofs, align * WORD)
         self._store_and_reset_exception(mc, None, ebx, ecx)
 
         mc.CALL(imm(self.cpu.realloc_frame))
         self._restore_exception(mc, None, ebx, ecx)
         mc.ADD_ri(esp.value, (align - 1) * WORD)
         mc.MOV_rr(ebp.value, eax.value)
+        mc.MOV_bi(extra_ofs, 0)
 
 
         gcrootmap = self.cpu.gc_ll_descr.gcrootmap
         mc.MOV_bi(gcmap_ofs, 0)
         self._pop_all_regs_from_frame(mc, [], self.cpu.supports_floats)
         mc.RET()
-        self._stack_check_failure = mc.materialize(self.cpu.asmmemmgr, [])
+        self._frame_realloc_slowpath = mc.materialize(self.cpu.asmmemmgr, [])
 
     def _build_malloc_slowpath(self):
         """ While arriving on slowpath, we have a gcpattern on stack,
         mc.MOV_rs(ecx.value, WORD)
         mc.MOV_br(ofs, ecx.value)
         addr = self.cpu.gc_ll_descr.get_malloc_slowpath_addr()
-        # XXX investigate if we need to save callee-saved registers
-        #     on the frame
         mc.SUB_rr(edi.value, eax.value)       # compute the size we want
         # the arg is already in edi
         mc.SUB_ri(esp.value, 16 - WORD)
         elif hasattr(self.cpu.gc_ll_descr, 'passes_frame'):
             # for tests only
             mc.MOV_rr(esi.value, ebp.value)
+        extra_ofs = self.cpu.get_ofs_of_frame_field('jf_extra_stack_depth')
+        mc.MOV_bi(extra_ofs, 16)
         mc.CALL(imm(addr))
         mc.ADD_ri(esp.value, 16 - WORD)
         mc.TEST_rr(eax.value, eax.value)
         #
         nursery_free_adr = self.cpu.gc_ll_descr.get_nursery_free_addr()
         self._reload_frame_if_necessary(mc)
+        mc.MOV_bi(extra_ofs, 0)
         self._pop_all_regs_from_frame(mc, [eax, edi], self.cpu.supports_floats)
         mc.MOV(edi, heap(nursery_free_adr))   # load this in EDI
         # clear the gc pattern
             mc.MOV_si(WORD, expected_size)            
         ofs2 = mc.get_relative_pos() - 4
         self.push_gcmap(mc, gcmap, mov=True)
-        mc.CALL(imm(self._stack_check_failure))
+        mc.CALL(imm(self._frame_realloc_slowpath))
         # patch the JG above
         offset = mc.get_relative_pos() - jg_location
         assert 0 < offset <= 127
         return rst
 
     def _call_header_shadowstack(self, gcrootmap):
-        # we don't *really* have to do it, since we have the frame
-        # being referenced by the caller. However, we still do it
-        # to provide a place where we can read the frame from, in case
-        # we need to reload it after a collection
         rst = self._load_shadowstack_top_in_ebx(self.mc, gcrootmap)
         self.mc.MOV_mr((ebx.value, 0), ebp.value)      # MOV [ebx], ebp
         self.mc.ADD_ri(ebx.value, WORD)
             stack_depth = align_stack_words(stack_depth)
             align = (stack_depth - PASS_ON_MY_FRAME)
             self.mc.SUB_ri(esp.value, align * WORD)
+            if can_collect:
+                ofs = self.cpu.get_ofs_of_frame_field('jf_extra_stack_depth')
+                self.mc.MOV_bi(ofs, align * WORD)
         else:
             align = 0
         p = 0
         self.mc.CALL(x)
         if can_collect:
             self._reload_frame_if_necessary(self.mc)
-        if can_collect:
+            if align:
+                ofs = self.cpu.get_ofs_of_frame_field('jf_extra_stack_depth')
+                self.mc.MOV_bi(ofs, 0)
             self.pop_gcmap(self.mc)
         #
         if callconv != FFI_DEFAULT_ABI:
         if stack_depth > PASS_ON_MY_FRAME:
             stack_depth = align_stack_words(stack_depth)
             align = (stack_depth - PASS_ON_MY_FRAME)
+            if can_collect:
+                ofs = self.cpu.get_ofs_of_frame_field('jf_extra_stack_depth')
+                self.mc.MOV_bi(ofs, align * WORD)
             self.mc.SUB_ri(esp.value, align * WORD)
         for i in range(start, len(arglocs)):
             loc = arglocs[i]
         self.mc.CALL(x)
         if can_collect:
             self._reload_frame_if_necessary(self.mc)
+            if align:
+                ofs = self.cpu.get_ofs_of_frame_field('jf_extra_stack_depth')
+                self.mc.MOV_bi(ofs, 0)
         if align:
             self.mc.ADD_ri(esp.value, align * WORD)
         if can_collect:

rpython/jit/backend/x86/regalloc.py

         #  - at least the non-callee-saved registers
         #
         #  - for shadowstack, we assume that any call can collect, and we
-        #    save also the callee-saved registers that contain GC pointers.
+        #    save also the callee-saved registers that contain GC pointers
+        #    XXX for asmgcc too for now.
         #
         #  - for CALL_MAY_FORCE or CALL_ASSEMBLER, we have to save all regs
         #    anyway, in case we need to do cpu.force().  The issue is that
         self.xrm.before_call(force_store, save_all_regs=save_all_regs)
         if not save_all_regs:
             gcrootmap = self.assembler.cpu.gc_ll_descr.gcrootmap
-            if gcrootmap and gcrootmap.is_shadow_stack:
+            # we save all the registers for shadowstack and asmgcc for now
+            if gcrootmap: # and gcrootmap.is_shadow_stack:
                 save_all_regs = 2
         self.rm.before_call(force_store, save_all_regs=save_all_regs)
         if op.result is not None:

rpython/rtyper/memory/gctransform/asmgcroot.py

             self.walk_stack_from()
         self._asm_callback = _asm_callback
         self._shape_decompressor = ShapeDecompressor()
-        if hasattr(gctransformer.translator, '_jit2gc'):
+        self._with_jit = hasattr(gctransformer.translator, '_jit2gc')
+        if self._with_jit:
             jit2gc = gctransformer.translator._jit2gc
-            self._extra_gcmapstart  = jit2gc['gcmapstart']
-            self._extra_gcmapend    = jit2gc['gcmapend']
-            self._extra_mark_sorted = jit2gc['gcmarksorted']
-        else:
-            self._extra_gcmapstart  = lambda: llmemory.NULL
-            self._extra_gcmapend    = lambda: llmemory.NULL
-            self._extra_mark_sorted = lambda: True
+            self.frame_tid = jit2gc['frame_tid']
 
     def need_stacklet_support(self, gctransformer, getfn):
         # stacklet support: BIG HACK for rlib.rstacklet
         # try to locate the caller function based on retaddr.
         # set up self._shape_decompressor.
         #
-        self.locate_caller_based_on_retaddr(retaddr)
+        ebp_in_caller = callee.regs_stored_at[INDEX_OF_EBP].address[0]
+        self.locate_caller_based_on_retaddr(retaddr, ebp_in_caller)
         #
         # found!  Enumerate the GC roots in the caller frame
         #
         collect_stack_root = self.gcdata._gc_collect_stack_root
-        ebp_in_caller = callee.regs_stored_at[INDEX_OF_EBP].address[0]
         gc = self.gc
         while True:
             location = self._shape_decompressor.next()
         # of the entry point, stop walking"
         return caller.frame_address != llmemory.NULL
 
-    def locate_caller_based_on_retaddr(self, retaddr):
+    def locate_caller_based_on_retaddr(self, retaddr, ebp_in_caller):
         gcmapstart = llop.gc_asmgcroot_static(llmemory.Address, 0)
         gcmapend   = llop.gc_asmgcroot_static(llmemory.Address, 1)
         item = search_in_gcmap(gcmapstart, gcmapend, retaddr)
         if item:
             self._shape_decompressor.setpos(item.signed[1])
             return
-        gcmapstart2 = self._extra_gcmapstart()
-        gcmapend2   = self._extra_gcmapend()
-        if gcmapstart2 != gcmapend2:
-            # we have a non-empty JIT-produced table to look in
-            item = search_in_gcmap2(gcmapstart2, gcmapend2, retaddr)
+
+        if not self._shape_decompressor.sorted:
+            # the item may have been not found because the main array was
+            # not sorted.  Sort it and try again.
+            win32_follow_gcmap_jmp(gcmapstart, gcmapend)
+            sort_gcmap(gcmapstart, gcmapend)
+            self._shape_decompressor.sorted = True
+            item = search_in_gcmap(gcmapstart, gcmapend, retaddr)
             if item:
-                self._shape_decompressor.setaddr(item)
+                self._shape_decompressor.setpos(item.signed[1])
                 return
-            # maybe the JIT-produced table is not sorted?
-            was_already_sorted = self._extra_mark_sorted()
-            if not was_already_sorted:
-                sort_gcmap(gcmapstart2, gcmapend2)
-                item = search_in_gcmap2(gcmapstart2, gcmapend2, retaddr)
-                if item:
-                    self._shape_decompressor.setaddr(item)
-                    return
-            # there is a rare risk that the array contains *two* entries
-            # with the same key, one of which is dead (null value), and we
-            # found the dead one above.  Solve this case by replacing all
-            # dead keys with nulls, sorting again, and then trying again.
-            replace_dead_entries_with_nulls(gcmapstart2, gcmapend2)
-            sort_gcmap(gcmapstart2, gcmapend2)
-            item = search_in_gcmap2(gcmapstart2, gcmapend2, retaddr)
-            if item:
-                self._shape_decompressor.setaddr(item)
-                return
-        # the item may have been not found because the main array was
-        # not sorted.  Sort it and try again.
-        win32_follow_gcmap_jmp(gcmapstart, gcmapend)
-        sort_gcmap(gcmapstart, gcmapend)
-        item = search_in_gcmap(gcmapstart, gcmapend, retaddr)
-        if item:
-            self._shape_decompressor.setpos(item.signed[1])
+
+        if self._with_jit:
+            # item not found.  We assume that it's a JIT-generated
+            # location -- but we check for consistency that ebp points
+            # to a JITFRAME object.
+            from rpython.jit.backend.llsupport.jitframe import STACK_DEPTH_OFS
+            
+            tid = self.gc.get_type_id(ebp_in_caller)
+            ll_assert(rffi.cast(lltype.Signed, tid) ==
+                      rffi.cast(lltype.Signed, self.frame_tid),
+                      "found a stack frame that does not belong "
+                      "anywhere I know, bug in asmgcc")
+            # fish the depth
+            extra_stack_depth = (ebp_in_caller + STACK_DEPTH_OFS).signed[0]
+            extra_stack_depth //= rffi.sizeof(lltype.Signed)
+            self._shape_decompressor.setjitframe(extra_stack_depth)
             return
         llop.debug_fatalerror(lltype.Void, "cannot find gc roots!")
 
 class ShapeDecompressor:
     _alloc_flavor_ = "raw"
 
+    sorted = False
+
     def setpos(self, pos):
         if pos < 0:
             pos = ~ pos     # can ignore this "range" marker here
         gccallshapes = llop.gc_asmgcroot_static(llmemory.Address, 2)
         self.addr = gccallshapes + pos
 
-    def setaddr(self, addr):
-        self.addr = addr
+    def setjitframe(self, extra_stack_depth):
+        self.addr = llmemory.NULL
+        self.jit_index = 0
+        self.extra_stack_depth = extra_stack_depth
 
     def next(self):
-        value = 0
         addr = self.addr
-        while True:
-            b = ord(addr.char[0])
-            addr += 1
-            value += b
-            if b < 0x80:
-                break
-            value = (value - 0x80) << 7
-        self.addr = addr
-        return value
+        if addr:
+            # case "outside the jit"
+            value = 0
+            while True:
+                b = ord(addr.char[0])
+                addr += 1
+                value += b
+                if b < 0x80:
+                    break
+                value = (value - 0x80) << 7
+            self.addr = addr
+            return value
+        else:
+            # case "in the jit"
+            from rpython.jit.backend.x86.arch import FRAME_FIXED_SIZE
+            from rpython.jit.backend.x86.arch import PASS_ON_MY_FRAME
+            index = self.jit_index
+            self.jit_index = index + 1
+            if index == 0:
+                # the jitframe is an object in EBP
+                return LOC_REG | ((INDEX_OF_EBP + 1) << 2)
+            if index == 1:
+                return 0
+            # the remaining returned values should be:
+            #      saved %rbp
+            #      saved %r15           or on 32bit:
+            #      saved %r14             saved %ebp
+            #      saved %r13             saved %edi
+            #      saved %r12             saved %esi
+            #      saved %rbx             saved %ebx
+            #      return addr            return addr
+            if IS_64_BITS:
+                stack_depth = PASS_ON_MY_FRAME + self.extra_stack_depth
+                if index == 2:   # rbp
+                    return LOC_ESP_PLUS | (stack_depth << 2)
+                if index == 3:   # r15
+                    return LOC_ESP_PLUS | ((stack_depth + 5) << 2)
+                if index == 4:   # r14
+                    return LOC_ESP_PLUS | ((stack_depth + 4) << 2)
+                if index == 5:   # r13
+                    return LOC_ESP_PLUS | ((stack_depth + 3) << 2)
+                if index == 6:   # r12
+                    return LOC_ESP_PLUS | ((stack_depth + 2) << 2)
+                if index == 7:   # rbx
+                    return LOC_ESP_PLUS | ((stack_depth + 1) << 2)
+                if index == 8:   # return addr
+                    return (LOC_ESP_PLUS |
+                        ((FRAME_FIXED_SIZE + self.extra_stack_depth) << 2))
+            else:
+                if index == 2:   # ebp
+                    return LOC_ESP_PLUS | (stack_depth << 2)
+                if index == 3:   # edi
+                    return LOC_ESP_PLUS | ((stack_depth + 3) << 2)
+                if index == 4:   # esi
+                    return LOC_ESP_PLUS | ((stack_depth + 2) << 2)
+                if index == 5:   # ebx
+                    return LOC_ESP_PLUS | ((stack_depth + 1) << 2)
+                if index == 6:   # return addr
+                    return (LOC_ESP_PLUS |
+                        ((FRAME_FIXED_SIZE + self.extra_stack_depth) << 2))
+            llop.debug_fatalerror(lltype.Void, "asmgcroot: invalid index")
+            return 0   # annotator fix
 
 # ____________________________________________________________