Commits

Maciej Fijalkowski  committed f33339b

implement spilling. A bit of fun with alignment

  • Participants
  • Parent commits 51f0700
  • Branches backend-vector-ops

Comments (0)

Files changed (7)

File pypy/jit/backend/llsupport/regalloc.py

         self.used = []      # list of bools
         self.hint_frame_locations = {}
 
-    frame_depth = property(lambda:xxx, lambda:xxx)   # XXX kill me
-
     def get_frame_depth(self):
         return len(self.used)
 
         return self.get_new_loc(box)
 
     def get_new_loc(self, box):
-        size = self.frame_size(box.type)
+        size = self.frame_size(box)
         # frame_depth is rounded up to a multiple of 'size', assuming
         # that 'size' is a power of two.  The reason for doing so is to
         # avoid obscure issues in jump.py with stack locations that try
             self.used.append(False)
         #
         index = self.get_frame_depth()
-        newloc = self.frame_pos(index, box.type)
+        newloc = self.frame_pos(index, box)
         for i in range(size):
             self.used.append(True)
         #
         index = self.get_loc_index(loc)
         if index < 0:
             return
-        endindex = index + self.frame_size(box.type)
+        endindex = index + self.frame_size(box)
         while len(self.used) < endindex:
             self.used.append(False)
         while index < endindex:
             return    # already gone
         del self.bindings[box]
         #
-        size = self.frame_size(box.type)
+        size = self.frame_size(box)
         baseindex = self.get_loc_index(loc)
         if baseindex < 0:
             return
         index = self.get_loc_index(loc)
         if index < 0:
             return False
-        size = self.frame_size(box.type)
+        size = self.frame_size(box)
         for i in range(size):
             while (index + i) >= len(self.used):
                 self.used.append(False)
 
     # abstract methods that need to be overwritten for specific assemblers
     @staticmethod
-    def frame_pos(loc, type):
+    def frame_pos(loc, box):
         raise NotImplementedError("Purely abstract")
     @staticmethod
-    def frame_size(type):
+    def frame_size(box):
         return 1
     @staticmethod
     def get_loc_index(loc):
         del self.reg_bindings[v_to_spill]
         if self.frame_manager.get(v_to_spill) is None:
             newloc = self.frame_manager.loc(v_to_spill)
-            self.assembler.regalloc_mov(loc, newloc)
+            self.assembler.regalloc_mov(v_to_spill, loc, newloc)
         return loc
 
     def _pick_variable_to_spill(self, v, forbidden_vars, selected_reg=None,
         immloc = self.convert_to_imm(v)
         if selected_reg:
             if selected_reg in self.free_regs:
-                self.assembler.regalloc_mov(immloc, selected_reg)
+                self.assembler.regalloc_mov(v, immloc, selected_reg)
                 return selected_reg
             loc = self._spill_var(v, forbidden_vars, selected_reg)
             self.free_regs.append(loc)
-            self.assembler.regalloc_mov(immloc, loc)
+            self.assembler.regalloc_mov(v, immloc, loc)
             return loc
         return immloc
 
         loc = self.force_allocate_reg(v, forbidden_vars, selected_reg,
                                       need_lower_byte=need_lower_byte)
         if prev_loc is not loc:
-            self.assembler.regalloc_mov(prev_loc, loc)
+            self.assembler.regalloc_mov(v, prev_loc, loc)
         return loc
 
     def _reallocate_from_to(self, from_v, to_v):
         if self.free_regs:
             loc = self.free_regs.pop()
             self.reg_bindings[v] = loc
-            self.assembler.regalloc_mov(prev_loc, loc)
+            self.assembler.regalloc_mov(v, prev_loc, loc)
         else:
             loc = self.frame_manager.loc(v)
-            self.assembler.regalloc_mov(prev_loc, loc)
+            self.assembler.regalloc_mov(v, prev_loc, loc)
 
     def force_result_in_reg(self, result_v, v, forbidden_vars=[]):
         """ Make sure that result is in the same register as v.
                 loc = self.free_regs.pop()
             else:
                 loc = self._spill_var(v, forbidden_vars, None)
-            self.assembler.regalloc_mov(self.convert_to_imm(v), loc)
+            self.assembler.regalloc_mov(v, self.convert_to_imm(v), loc)
             self.reg_bindings[result_v] = loc
             return loc
         if v not in self.reg_bindings:
             prev_loc = self.frame_manager.loc(v)
             loc = self.force_allocate_reg(v, forbidden_vars)
-            self.assembler.regalloc_mov(prev_loc, loc)
+            self.assembler.regalloc_mov(v, prev_loc, loc)
         assert v in self.reg_bindings
         if self.longevity[v][1] > self.position:
             # we need to find a new place for variable v and
         if not self.frame_manager.get(v):
             reg = self.reg_bindings[v]
             to = self.frame_manager.loc(v)
-            self.assembler.regalloc_mov(reg, to)
+            self.assembler.regalloc_mov(v, reg, to)
         # otherwise it's clean
 
     def before_call(self, force_store=[], save_all_regs=0):

File pypy/jit/backend/test/runner_test.py

         assert a[0] == 26
         assert a[1] == 30
         lltype.free(a, flavor='raw')
+        
 
 class OOtypeBackendTest(BaseBackendTest):
 

File pypy/jit/backend/x86/assembler.py

 import sys, os
 from pypy.jit.backend.llsupport import symbolic
 from pypy.jit.backend.llsupport.asmmemmgr import MachineDataBlockWrapper
-from pypy.jit.metainterp.history import Const, Box, BoxInt, ConstInt
+from pypy.jit.metainterp.history import Const, Box, BoxInt, ConstInt, BoxVector
 from pypy.jit.metainterp.history import AbstractFailDescr, INT, REF, FLOAT
 from pypy.jit.metainterp.history import JitCellToken
 from pypy.rpython.lltypesystem import lltype, rffi, rstr, llmemory
 
     # ------------------------------------------------------------
 
-    def mov(self, from_loc, to_loc):
-        if (isinstance(from_loc, RegLoc) and from_loc.is_xmm) or (isinstance(to_loc, RegLoc) and to_loc.is_xmm):
+    def mov(self, box, from_loc, to_loc):
+        if isinstance(box, BoxVector):
+            self.mc.MOVDQU(to_loc, from_loc)
+        elif (isinstance(from_loc, RegLoc) and from_loc.is_xmm) or (isinstance(to_loc, RegLoc) and to_loc.is_xmm):
             self.mc.MOVSD(to_loc, from_loc)
         else:
             assert to_loc is not ebp
         self.mc.MOVZX8(resloc, rl)
 
     def genop_same_as(self, op, arglocs, resloc):
-        self.mov(arglocs[0], resloc)
+        self.mov(op.getarg(0), arglocs[0], resloc)
     genop_cast_ptr_to_int = genop_same_as
     genop_cast_int_to_ptr = genop_same_as
 

File pypy/jit/backend/x86/regalloc.py

 import os
 from pypy.jit.metainterp.history import (Box, Const, ConstInt, ConstPtr,
                                          ResOperation, BoxPtr, ConstFloat,
-                                         BoxFloat, INT, REF, FLOAT,
+                                         BoxFloat, INT, REF, FLOAT, BoxVector,
                                          TargetToken, JitCellToken)
 from pypy.jit.backend.x86.regloc import *
 from pypy.rpython.lltypesystem import lltype, rffi, rstr
 
 class X86FrameManager(FrameManager):
     @staticmethod
-    def frame_pos(i, box_type):
-        if IS_X86_32 and box_type == FLOAT:
-            return StackLoc(i, get_ebp_ofs(i+1), box_type)
-        else:
-            return StackLoc(i, get_ebp_ofs(i), box_type)
+    def frame_pos(i, box):
+        assert isinstance(box, Box)
+        if isinstance(box, BoxVector):
+            if IS_X86_32:
+                return StackLoc(i, get_ebp_ofs(i + 3), box.type)
+            return StackLoc(i, get_ebp_ofs(i + 1), box.type)            
+        if IS_X86_32 and box.type == FLOAT:
+            return StackLoc(i, get_ebp_ofs(i+1), box.type)
+        return StackLoc(i, get_ebp_ofs(i), box.type)
     @staticmethod
-    def frame_size(box_type):
-        if IS_X86_32 and box_type == FLOAT:
+    def frame_size(box):
+        assert isinstance(box, Box)
+        if isinstance(box, BoxVector):
+            if IS_X86_32:
+                return 4
             return 2
-        else:
-            return 1
+        if IS_X86_32 and box.type == FLOAT:
+            return 2
+        return 1
     @staticmethod
     def get_loc_index(loc):
         assert isinstance(loc, StackLoc)
         self.assembler.regalloc_perform_math(op, arglocs, result_loc)
 
     def locs_for_fail(self, guard_op):
-        return [self.loc(v) for v in guard_op.getfailargs()]
+        failargs = guard_op.getfailargs()
+        for arg in failargs:
+            assert not isinstance(arg, BoxVector)
+        return [self.loc(v) for v in failargs]
 
     def get_current_depth(self):
         # return (self.fm.frame_depth, self.param_depth), but trying to share
         self.xrm.possibly_free_vars_for_op(op)
 
     consider_float_add = _consider_float_op
-    consider_float_vector_add = _consider_float_op
     consider_float_sub = _consider_float_op
     consider_float_mul = _consider_float_op
     consider_float_truediv = _consider_float_op
 
+    def _consider_float_vector_op(self, op):
+        loc1 = self.xrm.make_sure_var_in_reg(op.getarg(1))
+        args = op.getarglist()
+        loc0 = self.xrm.force_result_in_reg(op.result, op.getarg(0), args)
+        self.Perform(op, [loc0, loc1], loc0)
+        self.xrm.possibly_free_vars_for_op(op)
+
+    consider_float_vector_add = _consider_float_vector_op
+
     def _consider_float_cmp(self, op, guard_op):
         vx = op.getarg(0)
         vy = op.getarg(1)
             scale = self._get_unicode_item_scale()
             if not (isinstance(length_loc, ImmedLoc) or
                     isinstance(length_loc, RegLoc)):
-                self.assembler.mov(length_loc, bytes_loc)
+                self.assembler.mov(args[4], ength_loc, bytes_loc)
                 length_loc = bytes_loc
             self.assembler.load_effective_addr(length_loc, 0, scale, bytes_loc)
             length_box = bytes_box
         # Build the four lists
         for i in range(op.numargs()):
             box = op.getarg(i)
+            assert not isinstance(box, BoxVector)
             src_loc = self.loc(box)
             dst_loc = arglocs[i]
             if box.type != FLOAT:

File pypy/jit/backend/x86/regloc.py

     MOVSD = _binaryop('MOVSD')
     MOVAPD = _binaryop('MOVAPD')
     MOVDQA = _binaryop('MOVDQA')
+    MOVDQU = _binaryop('MOVDQU')
     ADDSD = _binaryop('ADDSD')
     ADDPD = _binaryop('ADDPD')
     SUBSD = _binaryop('SUBSD')

File pypy/jit/backend/x86/rx86.py

                    regtype='XMM')
 define_modrm_modes('MOVDQA_*x', ['\x66', rex_nw, '\x0F\x7F', register(2, 8)],
                    regtype='XMM')
+define_modrm_modes('MOVDQU_x*', ['\xF3', rex_nw, '\x0F\x6F', register(1, 8)],
+                   regtype='XMM')
+define_modrm_modes('MOVDQU_*x', ['\xF3', rex_nw, '\x0F\x7F', register(2, 8)],
+                   regtype='XMM')
 
 define_modrm_modes('SQRTSD_x*', ['\xF2', rex_nw, '\x0F\x51', register(1,8)], regtype='XMM')
 

File pypy/jit/backend/x86/test/test_runner.py

             assert self.cpu.get_latest_value_int(3) == 42
 
 
+    def test_vector_spill(self):
+        A = lltype.Array(lltype.Float, hints={'nolength': True,
+                                               'memory_position_alignment': 16})
+        descr0 = self.cpu.arraydescrof(A)
+        looptoken = JitCellToken()
+        ops = parse("""
+        [p0, p1]
+        vec0 = getarrayitem_vector_raw(p0, 0, descr=descr0)
+        vec1 = getarrayitem_vector_raw(p1, 2, descr=descr0)
+        vec2 = getarrayitem_vector_raw(p1, 4, descr=descr0)
+        vec3 = getarrayitem_vector_raw(p1, 6, descr=descr0)
+        vec4 = getarrayitem_vector_raw(p1, 8, descr=descr0)
+        vec5 = getarrayitem_vector_raw(p1, 10, descr=descr0)
+        vec6 = getarrayitem_vector_raw(p1, 12, descr=descr0)
+        vec7 = getarrayitem_vector_raw(p1, 14, descr=descr0)
+        vec8 = getarrayitem_vector_raw(p1, 16, descr=descr0)
+        vec9 = getarrayitem_vector_raw(p1, 18, descr=descr0)
+        vec10 = getarrayitem_vector_raw(p1, 20, descr=descr0)
+        vec11 = getarrayitem_vector_raw(p1, 22, descr=descr0)
+        vec12 = getarrayitem_vector_raw(p1, 24, descr=descr0)
+        vec13 = getarrayitem_vector_raw(p1, 26, descr=descr0)
+        vec14 = getarrayitem_vector_raw(p1, 28, descr=descr0)
+        vec15 = getarrayitem_vector_raw(p1, 30, descr=descr0)
+        vec16 = float_vector_add(vec0, vec1)
+        vec17 = float_vector_add(vec16, vec2)
+        vec18 = float_vector_add(vec17, vec3)
+        vec19 = float_vector_add(vec18, vec4)
+        vec20 = float_vector_add(vec19, vec5)
+        vec21 = float_vector_add(vec20, vec6)
+        vec22 = float_vector_add(vec21, vec7)
+        vec23 = float_vector_add(vec22, vec8)
+        vec24 = float_vector_add(vec23, vec9)
+        vec25 = float_vector_add(vec24, vec10)
+        vec26 = float_vector_add(vec25, vec11)
+        vec27 = float_vector_add(vec26, vec12)
+        vec28 = float_vector_add(vec27, vec13)
+        vec29 = float_vector_add(vec28, vec14)
+        vec30 = float_vector_add(vec29, vec15)
+        setarrayitem_vector_raw(p0, 0, vec30, descr=descr0)
+        finish()
+        """, namespace=locals())
+        self.cpu.compile_loop(ops.inputargs, ops.operations, looptoken)
+        a = lltype.malloc(A, 32, flavor='raw')
+        assert rffi.cast(lltype.Signed, a) % 16 == 0
+        for i in range(32):
+            a[i] = float(i)
+        self.cpu.execute_token(looptoken, a, a)
+        assert a[0] == 16 * 15
+        assert a[1] == 16 * 16
+        lltype.free(a, flavor='raw')
+
+
 class TestDebuggingAssembler(object):
     def setup_method(self, meth):
         self.cpu = CPU(rtyper=None, stats=FakeStats())