Commits

David Schneider  committed fd3bb70

in progress: support frame remapping for floats and int/ref

  • Participants
  • Parent commits 9a5b9a0
  • Branches arm-backed-float

Comments (0)

Files changed (5)

File pypy/jit/backend/arm/assembler.py

             if group == self.INT_TYPE:
                 self.fail_boxes_int.setitem(fail_index, value)
             elif group == self.REF_TYPE:
-                self.fail_boxes_ptr.setitem(fail_index, rffi.cast(llmemory.GCREF, value))
+                tgt = self.fail_boxes_ptr.get_addr_for_num(fail_index)
+                rffi.cast(rffi.LONGP, tgt)[0] = value
+                #self.fail_boxes_ptr.setitem(fail_index, value)# rffi.cast(llmemory.GCREF, value))
             elif group == self.FLOAT_TYPE:
                 self.fail_boxes_float.setitem(fail_index, value)
             else:
         self.mc.SUB_ri(r.sp.value, r.sp.value,  WORD)
         self.mc.MOV_rr(r.fp.value, r.sp.value)
 
-    def gen_bootstrap_code(self, inputargs, regalloc, looptoken):
-        for i in range(len(inputargs)):
-            loc = inputargs[i]
-            reg = regalloc.force_allocate_reg(loc)
-            if loc.type != FLOAT:
-                if loc.type == REF:
-                    addr = self.fail_boxes_ptr.get_addr_for_num(i)
-                elif loc.type == INT:
-                    addr = self.fail_boxes_int.get_addr_for_num(i)
-                else:
-                    assert 0
-                self.mc.gen_load_int(reg.value, addr)
-                self.mc.LDR_ri(reg.value, reg.value)
-            elif loc.type == FLOAT:
-                addr = self.fail_boxes_float.get_addr_for_num(i)
-                self.mc.gen_load_int(r.ip.value, addr)
-                self.mc.VLDR(reg.value, r.ip.value)
+    def gen_bootstrap_code(self, nonfloatlocs, floatlocs, inputargs):
+        for i in range(len(nonfloatlocs)):
+            loc = nonfloatlocs[i]
+            if loc is None:
+                continue
+            arg = inputargs[i]
+            assert arg.type != FLOAT
+            if arg.type == REF:
+                addr = self.fail_boxes_ptr.get_addr_for_num(i)
+            elif arg.type == INT:
+                addr = self.fail_boxes_int.get_addr_for_num(i)
             else:
                 assert 0
-            regalloc.possibly_free_var(loc)
-        arglocs = [regalloc.loc(arg) for arg in inputargs]
-        looptoken._arm_arglocs = arglocs
-        return arglocs
+            if loc.is_reg():
+                reg = loc
+            else:
+                reg = r.ip
+            self.mc.gen_load_int(reg.value, addr)
+            self.mc.LDR_ri(reg.value, reg.value)
+            if loc.is_stack():
+                self.mov_loc_loc(r.ip, loc)
+        for i in range(len(floatlocs)):
+            loc = floatlocs[i]
+            if loc is None:
+                continue
+            arg = inputargs[i]
+            assert arg.type == FLOAT
+            addr = self.fail_boxes_float.get_addr_for_num(i)
+            self.mc.gen_load_int(r.ip.value, addr)
+            if loc.is_vfp_reg():
+                self.mc.VLDR(loc.value, r.ip.value)
+            else:
+                tmpreg = r.d0
+                with saved_registers(self.mc, [], [tmpreg]):
+                    self.mc.VLDR(tmpreg.value, r.ip.value)
+                    self.mov_loc_loc(tmpreg, loc)
 
-    def gen_direct_bootstrap_code(self, arglocs, loop_head, looptoken):
+    def _count_reg_args(self, args):
+        reg_args = 0
+        words = 0
+        for x in range(min(len(args), 4)):
+            if args[x].type == FLOAT:
+                words += 2
+            else:
+                words += 1
+            reg_args += 1
+            if words > 4:
+                reg_args = x
+                break
+        return reg_args
+
+    def gen_direct_bootstrap_code(self, loop_head, looptoken, inputargs):
         self.gen_func_prolog()
-        #import pdb; pdb.set_trace()
-        reg_args = self._count_reg_args(arglocs)
+        nonfloatlocs, floatlocs = looptoken._arm_arglocs
 
-        stack_locs = len(arglocs) - reg_args
+        reg_args = self._count_reg_args(inputargs)
+
+        stack_locs = len(inputargs) - reg_args
         selected_reg = 0
         for i in range(reg_args):
-            loc = arglocs[i]
+            arg = inputargs[i]
+            if arg.type == FLOAT:
+                loc = floatlocs[i]
+            else:
+                loc = nonfloatlocs[i]
             self.mov_loc_loc(r.all_regs[selected_reg], loc)
-            if arglocs[i].type == FLOAT:
+            if inputargs[i].type == FLOAT:
                 selected_reg += 2
             else:
                 selected_reg += 1
         stack_position = len(r.callee_saved_registers)*WORD + \
                             len(r.callee_saved_vfp_registers)*2*WORD + \
                             WORD # for the FAIL INDEX
-        for i in range(reg_args, len(arglocs)):
-            loc = arglocs[i]
+        for i in range(reg_args, len(inputargs)):
+            arg = inputargs[i]
+            if arg.type == FLOAT:
+                loc = floatlocs[i]
+            else:
+                loc = nonfloatlocs[i]
             if loc.is_reg():
                 self.mc.LDR_ri(loc.value, r.fp.value, stack_position)
             elif loc.is_vfp_reg():
         self.align()
         self.gen_func_prolog()
         sp_patch_location = self._prepare_sp_patch_position()
-        arglocs = self.gen_bootstrap_code(inputargs, regalloc, looptoken)
-        #for x in range(5):
-        #    self.mc.NOP()
+        nonfloatlocs, floatlocs = regalloc.prepare_loop(inputargs, operations, looptoken)
+        self.gen_bootstrap_code(nonfloatlocs, floatlocs, inputargs)
+        looptoken._arm_arglocs = [nonfloatlocs, floatlocs]
         loop_head = self.mc.currpos()
 
         looptoken._arm_loop_code = loop_head
         self.align()
 
         direct_bootstrap_code = self.mc.currpos()
-        self.gen_direct_bootstrap_code(arglocs, loop_head, looptoken)
+        self.gen_direct_bootstrap_code(loop_head, looptoken, inputargs)
 
         loop_start = self.materialize_loop(looptoken)
         looptoken._arm_bootstrap_code = loop_start
         # manager
         if frame_depth == 1:
             return
-        n = (frame_depth-1)*WORD
+        n = (frame_depth)*WORD
         self._adjust_sp(n, cb, base_reg=r.fp)
 
     def _adjust_sp(self, n, cb=None, fcond=c.AL, base_reg=r.sp):

File pypy/jit/backend/arm/jump.py

     srccount = {}    # maps dst_locations to how many times the same
                      # location appears in src_locations
     for dst in dst_locations:
-        srccount[dst.as_key()] = 0
+        key = dst.as_key()
+        assert key not in srccount, "duplicate value in dst_locations!"
+        srccount[key] = 0
     for i in range(len(dst_locations)):
         src = src_locations[i]
         if src.is_imm():
         assembler.regalloc_mov(src, tmpreg)
         src = tmpreg
     assembler.regalloc_mov(src, dst)
+
+def remap_frame_layout_mixed(assembler,
+                             src_locations1, dst_locations1, tmpreg1,
+                             src_locations2, dst_locations2, tmpreg2):
+    # find and push the xmm stack locations from src_locations2 that
+    # are going to be overwritten by dst_locations1
+    from pypy.jit.backend.arm.arch import WORD
+    extrapushes = []
+    dst_keys = {}
+    for loc in dst_locations1:
+        dst_keys[loc.as_key()] = None
+    src_locations2red = []
+    dst_locations2red = []
+    for i in range(len(src_locations2)):
+        loc    = src_locations2[i]
+        dstloc = dst_locations2[i]
+        if loc.is_stack():
+            key = loc.as_key()
+            if (key in dst_keys or (loc.width > WORD and
+                                    (key + WORD) in dst_keys)):
+                assembler.regalloc_push(loc)
+                extrapushes.append(dstloc)
+                continue
+        src_locations2red.append(loc)
+        dst_locations2red.append(dstloc)
+    src_locations2 = src_locations2red
+    dst_locations2 = dst_locations2red
+    #
+    # remap the integer and pointer registers and stack locations
+    remap_frame_layout(assembler, src_locations1, dst_locations1, tmpreg1)
+    #
+    # remap the vfp registers and stack locations
+    remap_frame_layout(assembler, src_locations2, dst_locations2, tmpreg2)
+    #
+    # finally, pop the extra xmm stack locations
+    while len(extrapushes) > 0:
+        loc = extrapushes.pop()
+        assembler.regalloc_pop(loc)

File pypy/jit/backend/arm/opassembler.py

     def emit_op_jump(self, op, arglocs, regalloc, fcond):
         descr = op.getdescr()
         assert isinstance(descr, LoopToken)
-        destlocs = descr._arm_arglocs
         assert fcond == c.AL
 
-        remap_frame_layout(self, arglocs, destlocs, r.ip)
         if descr._arm_bootstrap_code == 0:
             self.mc.B_offs(descr._arm_loop_code, fcond)
         else:
             self._ensure_result_bit_extension(loc, size, signed)
         return cond
 
-    def _count_reg_args(self, args):
-        reg_args = 0
-        words = 0
-        for x in range(min(len(args), 4)):
-            if args[x].type == FLOAT:
-                words += 2
-            else:
-                words += 1
-            reg_args += 1
-            if words > 4:
-                reg_args = x
-                break
-        return reg_args
     # XXX improve this interface
     # emit_op_call_may_force
     # XXX improve freeing of stuff here
     def _emit_call(self, adr, args, regalloc, fcond=c.AL, result=None):
         n_args = len(args)
+        #XXX replace with _count_reg_args
         reg_args = 0
         words = 0
         for x in range(min(n_args, 4)):
 
         descr = op.getdescr()
         assert isinstance(descr, LoopToken)
-        assert op.numargs() == len(descr._arm_arglocs)
+        # XXX check this
+        assert op.numargs() == len(descr._arm_arglocs[0])
         resbox = TempInt()
         self._emit_call(descr._arm_direct_bootstrap_code, op.getarglist(),
                                 regalloc, fcond, result=resbox)

File pypy/jit/backend/arm/regalloc.py

 from pypy.jit.backend.llsupport.regalloc import FrameManager, \
-        RegisterManager, compute_vars_longevity, TempBox
+        RegisterManager, compute_vars_longevity, TempBox, compute_loop_consts
 from pypy.jit.backend.arm import registers as r
 from pypy.jit.backend.arm import locations
 from pypy.jit.backend.arm.locations import imm
                                                     prepare_cmp_op,
                                                     prepare_float_op,
                                                     _check_imm_arg)
+from pypy.jit.backend.arm.jump import remap_frame_layout_mixed
 from pypy.jit.codewriter import longlong
 from pypy.jit.metainterp.history import (Const, ConstInt, ConstFloat, ConstPtr,
                                         Box, BoxInt, BoxPtr, AbstractFailDescr,
     def __init__(self):
         FrameManager.__init__(self)
         self.frame_depth = 1
+    @staticmethod
+    def frame_pos(loc, type):
+        num_words = ARMFrameManager.frame_size(type)
+        return locations.StackLocation(loc, num_words=num_words, type=type)
 
     @staticmethod
-    def frame_pos(loc, type):
-        if type == INT or type == REF:
-            num_words = 1
-        else:
+    def frame_size(type):
+        num_words = 1
+        if type == FLOAT:
             num_words = 2
-        return locations.StackLocation(loc, num_words=num_words, type=type)
+        return num_words
 
 def void(self, op, fcond):
     return []
             assert isinstance(value, ConstFloat)
             return self.vfprm.convert_to_imm(value)
 
+    def prepare_loop(self, inputargs, operations, looptoken):
+        loop_consts = compute_loop_consts(inputargs, operations[-1], looptoken)
+        floatlocs = [None] * len(inputargs)
+        nonfloatlocs = [None] * len(inputargs)
+        for i in range(len(inputargs)):
+            arg = inputargs[i]
+            assert not isinstance(arg, Const)
+            reg = None
+            loc = inputargs[i]
+            if arg not in loop_consts and self.longevity[arg][1] > -1:
+                reg = self.try_allocate_reg(loc)
+
+            loc = self.loc(arg)
+            if arg.type == FLOAT:
+                floatlocs[i] = loc
+            else:
+                nonfloatlocs[i] = loc
+        self.possibly_free_vars(list(inputargs))
+        
+        return nonfloatlocs, floatlocs
+
     def update_bindings(self, locs, frame_depth, inputargs):
         used = {}
         i = 0
 
 
     def prepare_op_jump(self, op, fcond):
+        assembler = self.assembler
         descr = op.getdescr()
         assert isinstance(descr, LoopToken)
-        locs = [self.loc(op.getarg(i)) for i in range(op.numargs())]
-        return locs
+        nonfloatlocs, floatlocs = descr._arm_arglocs
 
+        # get temporary locs
+        tmploc = r.ip
+        box = TempFloat()
+        # compute 'vfptmploc' to be all_regs[0] by spilling what is there
+        vfptmp = self.vfprm.all_regs[0]
+        vfptmploc = self.vfprm.force_allocate_reg(box, selected_reg=vfptmp)
+
+        # Part about non-floats
+        # XXX we don't need a copy, we only just the original list
+        src_locations1 = [self.loc(op.getarg(i)) for i in range(op.numargs())
+                         if op.getarg(i).type != FLOAT]
+        assert tmploc not in nonfloatlocs
+        dst_locations1 = [loc for loc in nonfloatlocs if loc is not None]
+        # Part about floats
+        src_locations2 = [self.loc(op.getarg(i)) for i in range(op.numargs())
+                         if op.getarg(i).type == FLOAT]
+        dst_locations2 = [loc for loc in floatlocs if loc is not None]
+        remap_frame_layout_mixed(self.assembler,
+                                 src_locations1, dst_locations1, tmploc,
+                                 src_locations2, dst_locations2, vfptmploc)
+        self.possibly_free_var(box)
+        return []
 
     def prepare_op_setfield_gc(self, op, fcond):
         boxes = list(op.getarglist())

File pypy/jit/backend/llsupport/regalloc.py

 
 from pypy.jit.metainterp.history import Const, Box, REF
 from pypy.rlib.objectmodel import we_are_translated
+from pypy.jit.metainterp.resoperation import rop
 
 class TempBox(Box):
     def __init__(self):