Commits

Maciej Fijalkowski committed b60d7a3

Good. First go at vectorized operations - support double reading writing
and adding in the x86 backend. No spilling so far

Comments (0)

Files changed (12)

pypy/jit/backend/model.py

     # longlongs are supported by the JIT, but stored as doubles.
     # Boxes and Consts are BoxFloats and ConstFloats.
     supports_singlefloats = False
+    supports_vector_ops = False
+    # SSE and similar
 
     done_with_this_frame_void_v = -1
     done_with_this_frame_int_v = -1

pypy/jit/backend/test/runner_test.py

         assert fail.identifier == 42
 
     def test_vector_ops(self):
-        ops = """
-        [p0]
-        guard_array_aligned(p0) []
-        """
+        if not self.cpu.supports_vector_ops:
+            py.test.skip("unsupported vector ops")
+        
+        A = lltype.Array(lltype.Float, hints={'nolength': True,
+                                               'memory_position_alignment': 16})
+        descr0 = self.cpu.arraydescrof(A)
+        looptoken = JitCellToken()
+        ops = parse("""
+        [p0, p1]
+        vec0 = getarrayitem_vector_raw(p0, 0, descr=descr0)
+        vec1 = getarrayitem_vector_raw(p1, 0, descr=descr0)
+        vec2 = float_vector_add(vec0, vec1)
+        setarrayitem_vector_raw(p0, 0, vec2, descr=descr0)
+        finish()
+        """, namespace=locals())
+        self.cpu.compile_loop(ops.inputargs, ops.operations, looptoken)
+        a = lltype.malloc(A, 10, flavor='raw')
+        a[0] = 13.0
+        a[1] = 15.0
+        self.cpu.execute_token(looptoken, a, a)
+        assert a[0] == 26
+        assert a[1] == 30
+        lltype.free(a, flavor='raw')
 
 class OOtypeBackendTest(BaseBackendTest):
 

pypy/jit/backend/x86/assembler.py

 # darwin requires the stack to be 16 bytes aligned on calls. Same for gcc 4.5.0,
 # better safe than sorry
 CALL_ALIGN = 16 // WORD
+FLOAT_VECTOR_SIZE = 1 # multiply by 2
 
 def align_stack_words(words):
     return (words + CALL_ALIGN - 1) & ~(CALL_ALIGN-1)
     genop_int_rshift = _binaryop("SAR")
     genop_uint_rshift = _binaryop("SHR")
     genop_float_add = _binaryop("ADDSD", True)
+    genop_float_vector_add = _binaryop("ADDPD", True)
     genop_float_sub = _binaryop('SUBSD')
     genop_float_mul = _binaryop('MULSD', True)
     genop_float_truediv = _binaryop('DIVSD')
     genop_getarrayitem_gc_pure = genop_getarrayitem_gc
     genop_getarrayitem_raw = genop_getarrayitem_gc
 
+    def genop_getarrayitem_vector_raw(self, op, arglocs, resloc):
+        base_loc, ofs_loc, size_loc, _, sign_loc = arglocs
+        assert isinstance(size_loc, ImmedLoc)
+        scale = _get_scale(size_loc.value)
+        src_addr = addr_add(base_loc, ofs_loc, 0, scale)
+        self.mc.MOVDQA(resloc, src_addr)
+
     def _get_interiorfield_addr(self, temp_loc, index_loc, itemsize_loc,
                                 base_loc, ofs_loc):
         assert isinstance(itemsize_loc, ImmedLoc)
         dest_addr = AddressLoc(base_loc, ofs_loc, scale, baseofs.value)
         self.save_into_mem(dest_addr, value_loc, size_loc)
 
+    def genop_discard_setarrayitem_vector_raw(self, op, arglocs):
+        base_loc, ofs_loc, value_loc, size_loc, _ = arglocs
+        assert isinstance(size_loc, ImmedLoc)
+        scale = _get_scale(size_loc.value)
+        dest_addr = AddressLoc(base_loc, ofs_loc, scale, 0)
+        self.mc.MOVDQA(dest_addr, value_loc)
+
     def genop_discard_strsetitem(self, op, arglocs):
         base_loc, ofs_loc, val_loc = arglocs
         basesize, itemsize, ofs_length = symbolic.get_array_token(rstr.STR,

pypy/jit/backend/x86/regalloc.py

 import os
 from pypy.jit.metainterp.history import (Box, Const, ConstInt, ConstPtr,
                                          ResOperation, BoxPtr, ConstFloat,
-                                         BoxFloat, INT, REF, FLOAT,
+                                         BoxFloat, INT, REF, FLOAT, VECTOR,
                                          TargetToken, JitCellToken)
 from pypy.jit.backend.x86.regloc import *
 from pypy.rpython.lltypesystem import lltype, rffi, rstr
 
 class X86XMMRegisterManager(RegisterManager):
 
-    box_types = [FLOAT]
+    box_types = [FLOAT, VECTOR]
     all_regs = [xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7]
     # we never need lower byte I hope
     save_around_call_regs = all_regs
         return pass_on_stack
 
     def possibly_free_var(self, var):
-        if var.type == FLOAT:
+        if var.type in self.xrm.box_types:
             self.xrm.possibly_free_var(var)
         else:
             self.rm.possibly_free_var(var)
 
     def make_sure_var_in_reg(self, var, forbidden_vars=[],
                              selected_reg=None, need_lower_byte=False):
-        if var.type == FLOAT:
+        if var.type in self.xrm.box_types:
             if isinstance(var, ConstFloat):
                 return FloatImmedLoc(var.getfloatstorage())
             return self.xrm.make_sure_var_in_reg(var, forbidden_vars,
 
     def force_allocate_reg(self, var, forbidden_vars=[], selected_reg=None,
                            need_lower_byte=False):
-        if var.type == FLOAT:
+        if var.type in self.xrm.box_types:
             return self.xrm.force_allocate_reg(var, forbidden_vars,
                                                selected_reg, need_lower_byte)
         else:
                                               selected_reg, need_lower_byte)
 
     def force_spill_var(self, var):
-        if var.type == FLOAT:
+        if var.type in self.xrm.box_types:
             return self.xrm.force_spill_var(var)
         else:
             return self.rm.force_spill_var(var)
     def loc(self, v):
         if v is None: # xxx kludgy
             return None
-        if v.type == FLOAT:
+        if v.type in self.xrm.box_types:
             return self.xrm.loc(v)
         return self.rm.loc(v)
 
         self.xrm.possibly_free_vars_for_op(op)
 
     consider_float_add = _consider_float_op
+    consider_float_vector_add = _consider_float_op
     consider_float_sub = _consider_float_op
     consider_float_mul = _consider_float_op
     consider_float_truediv = _consider_float_op
                                  imm(itemsize), imm(ofs)])
 
     consider_setarrayitem_raw = consider_setarrayitem_gc
+    consider_setarrayitem_vector_raw = consider_setarrayitem_gc
 
     def consider_getfield_gc(self, op):
         ofs_loc, size_loc, sign = self._unpack_fielddescr(op.getdescr())
                           sign_loc], result_loc)
 
     consider_getarrayitem_raw = consider_getarrayitem_gc
+    consider_getarrayitem_vector_raw = consider_getarrayitem_gc
     consider_getarrayitem_gc_pure = consider_getarrayitem_gc
 
     def consider_getinteriorfield_gc(self, op):

pypy/jit/backend/x86/regloc.py

 
     MOVSD = _binaryop('MOVSD')
     MOVAPD = _binaryop('MOVAPD')
+    MOVDQA = _binaryop('MOVDQA')
     ADDSD = _binaryop('ADDSD')
     ADDPD = _binaryop('ADDPD')
     SUBSD = _binaryop('SUBSD')

pypy/jit/backend/x86/runner.py

     debug = True
     supports_floats = True
     supports_singlefloats = True
+    supports_vector_ops = True
 
     dont_keepalive_stuff = False # for tests
     with_threads = False

pypy/jit/backend/x86/rx86.py

 define_modrm_modes('MOVSX16_r*', [rex_w, '\x0F\xBF', register(1, 8)])
 define_modrm_modes('MOVSX32_r*', [rex_w, '\x63', register(1, 8)])
 
-define_modrm_modes('MOVSD_x*', ['\xF2', rex_nw, '\x0F\x10', register(1,8)], regtype='XMM')
-define_modrm_modes('MOVSD_*x', ['\xF2', rex_nw, '\x0F\x11', register(2,8)], regtype='XMM')
+define_modrm_modes('MOVSD_x*', ['\xF2', rex_nw, '\x0F\x10', register(1,8)],
+                   regtype='XMM')
+define_modrm_modes('MOVSD_*x', ['\xF2', rex_nw, '\x0F\x11', register(2,8)],
+                   regtype='XMM')
 define_modrm_modes('MOVAPD_x*', ['\x66', rex_nw, '\x0F\x28', register(1,8)],
                    regtype='XMM')
 define_modrm_modes('MOVAPD_*x', ['\x66', rex_nw, '\x0F\x29', register(2,8)],
                    regtype='XMM')
+define_modrm_modes('MOVDQA_x*', ['\x66', rex_nw, '\x0F\x6F', register(1, 8)],
+                   regtype='XMM')
+define_modrm_modes('MOVDQA_*x', ['\x66', rex_nw, '\x0F\x7F', register(2, 8)],
+                   regtype='XMM')
 
 define_modrm_modes('SQRTSD_x*', ['\xF2', rex_nw, '\x0F\x51', register(1,8)], regtype='XMM')
 

pypy/jit/metainterp/executor.py

 # ____________________________________________________________
 
 
+IGNORED = ['FLOAT_VECTOR_ADD', 'GETARRAYITEM_VECTOR_RAW',
+           'SETARRAYITEM_VECTOR_RAW']
+
 def _make_execute_list():
     if 0:     # enable this to trace calls to do_xxx
         def wrap(fn):
                          rop.LABEL,
                          ):      # list of opcodes never executed by pyjitpl
                 continue
-            raise AssertionError("missing %r" % (key,))
+            if not key in IGNORED:
+                raise AssertionError("missing %r" % (key,))
     return execute_by_num_args
 
 def make_execute_function_with_boxes(name, func):

pypy/jit/metainterp/history.py

     def repr_rpython(self):
         return repr_rpython(self, 'bi')
 
-class BoxFloatVector(Box):
+class BoxVector(Box):
     type = VECTOR
 
-    def __init__(self, floats):
-        self.floats = floats
+    def __init__(self):
+        pass
 
-class BoxIntVector(Box):
-    type = VECTOR
-
-    def __init__(self, ints):
-        self.ints = ints
+    def _getrepr_(self):
+        return ''
 
 class BoxFloat(Box):
     type = FLOAT

pypy/jit/metainterp/resoperation.py

 
     'SETARRAYITEM_GC/3d',
     'SETARRAYITEM_RAW/3d',
-    'SETARRAYITEM_VECTOR_RAW/2d',
+    'SETARRAYITEM_VECTOR_RAW/3d',
     'SETINTERIORFIELD_GC/3d',
     'SETINTERIORFIELD_RAW/3d',
     'SETFIELD_GC/2d',

pypy/jit/tool/oparser.py

         elif elem.startswith('f'):
             box = self.model.BoxFloat()
             _box_counter_more_than(self.model, elem[1:])
+        elif elem.startswith('vec'):
+            box = self.model.BoxVector()
+            _box_counter_more_than(self.model, elem[3:])
         elif elem.startswith('p'):
             # pointer
             ts = getattr(self.cpu, 'ts', self.model.llhelper)

pypy/jit/tool/oparser_model.py

 def get_real_model():
     class LoopModel(object):
         from pypy.jit.metainterp.history import TreeLoop, JitCellToken
-        from pypy.jit.metainterp.history import Box, BoxInt, BoxFloat
+        from pypy.jit.metainterp.history import Box, BoxInt, BoxFloat, BoxVector
         from pypy.jit.metainterp.history import ConstInt, ConstObj, ConstPtr, ConstFloat
         from pypy.jit.metainterp.history import BasicFailDescr, TargetToken
         from pypy.jit.metainterp.typesystem import llhelper
         class BoxRef(Box):
             type = 'p'
 
+        class BoxVector(Box):
+            type = 'e'
+
         class Const(object):
             def __init__(self, value=None):
                 self.value = value
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.