Commits

Hakan Ardo committed 9aea519 Merge

hg merge

Comments (0)

Files changed (3)

pypy/jit/backend/x86/regloc.py

 
 class AssemblerLocation(object):
     # XXX: Is adding "width" here correct?
-    __slots__ = ('value', 'width')
+    _attrs_ = ('value', 'width', '_location_code')
     _immutable_ = True
     def _getregkey(self):
         return self.value
     def is_memory_reference(self):
         return self.location_code() in ('b', 's', 'j', 'a', 'm')
 
+    def location_code(self):
+        return self._location_code
+
     def value_r(self): return self.value
     def value_b(self): return self.value
     def value_s(self): return self.value
 
 class StackLoc(AssemblerLocation):
     _immutable_ = True
+    _location_code = 'b'
+
     def __init__(self, position, ebp_offset, num_words, type):
         assert ebp_offset < 0   # so no confusion with RegLoc.value
         self.position = position
     def __repr__(self):
         return '%d(%%ebp)' % (self.value,)
 
-    def location_code(self):
-        return 'b'
-
     def assembler(self):
         return repr(self)
 
         self.is_xmm = is_xmm
         if self.is_xmm:
             self.width = 8
+            self._location_code = 'x'
         else:
             self.width = WORD
+            self._location_code = 'r'
     def __repr__(self):
         if self.is_xmm:
             return rx86.R.xmmnames[self.value]
         assert not self.is_xmm
         return RegLoc(rx86.high_byte(self.value), False)
 
-    def location_code(self):
-        if self.is_xmm:
-            return 'x'
-        else:
-            return 'r'
-
     def assembler(self):
         return '%' + repr(self)
 
 class ImmedLoc(AssemblerLocation):
     _immutable_ = True
     width = WORD
+    _location_code = 'i'
+
     def __init__(self, value):
         from pypy.rpython.lltypesystem import rffi, lltype
         # force as a real int
         self.value = rffi.cast(lltype.Signed, value)
 
-    def location_code(self):
-        return 'i'
-
     def getint(self):
         return self.value
 
         info = getattr(self, attr, '?')
         return '<AddressLoc %r: %s>' % (self._location_code, info)
 
-    def location_code(self):
-        return self._location_code
-
     def value_a(self):
         return self.loc_a
 
     # we want a width of 8  (... I think.  Check this!)
     _immutable_ = True
     width = 8
+    _location_code = 'j'
 
     def __init__(self, address):
         self.value = address
     def __repr__(self):
         return '<ConstFloatLoc @%s>' % (self.value,)
 
-    def location_code(self):
-        return 'j'
-
 if IS_X86_32:
     class FloatImmedLoc(AssemblerLocation):
         # This stands for an immediate float.  It cannot be directly used in
         # instead; see below.
         _immutable_ = True
         width = 8
+        _location_code = '#'     # don't use me
 
         def __init__(self, floatstorage):
             self.aslonglong = floatstorage
             floatvalue = longlong.getrealfloat(self.aslonglong)
             return '<FloatImmedLoc(%s)>' % (floatvalue,)
 
-        def location_code(self):
-            raise NotImplementedError
-
 if IS_X86_64:
     def FloatImmedLoc(floatstorage):
         from pypy.rlib.longlong2float import float2longlong
     else:
         raise AssertionError(methname + " undefined")
 
+def _missing_binary_insn(name, code1, code2):
+    raise AssertionError(name + "_" + code1 + code2 + " missing")
+_missing_binary_insn._dont_inline_ = True
+
+
 class LocationCodeBuilder(object):
     _mixin_ = True
 
             else:
                 # For this case, we should not need the scratch register more than here.
                 self._load_scratch(val2)
+                if name == 'MOV' and loc1 is X86_64_SCRATCH_REG:
+                    return     # don't need a dummy "MOV r11, r11"
                 INSN(self, loc1, X86_64_SCRATCH_REG)
 
         def invoke(self, codes, val1, val2):
             _rx86_getattr(self, methname)(val1, val2)
         invoke._annspecialcase_ = 'specialize:arg(1)'
 
+        def has_implementation_for(loc1, loc2):
+            # A memo function that returns True if there is any NAME_xy that could match.
+            # If it returns False we know the whole subcase can be omitted from translated
+            # code.  Without this hack, the size of most _binaryop INSN functions ends up
+            # quite large in C code.
+            if loc1 == '?':
+                return any([has_implementation_for(loc1, loc2)
+                            for loc1 in unrolling_location_codes])
+            methname = name + "_" + loc1 + loc2
+            if not hasattr(rx86.AbstractX86CodeBuilder, methname):
+                return False
+            # any NAME_j should have a NAME_m as a fallback, too.  Check it
+            if loc1 == 'j': assert has_implementation_for('m', loc2), methname
+            if loc2 == 'j': assert has_implementation_for(loc1, 'm'), methname
+            return True
+        has_implementation_for._annspecialcase_ = 'specialize:memo'
+
         def INSN(self, loc1, loc2):
             code1 = loc1.location_code()
             code2 = loc2.location_code()
                 assert code2 not in ('j', 'i')
 
             for possible_code2 in unrolling_location_codes:
+                if not has_implementation_for('?', possible_code2):
+                    continue
                 if code2 == possible_code2:
                     val2 = getattr(loc2, "value_" + possible_code2)()
                     #
                     #
                     # Regular case
                     for possible_code1 in unrolling_location_codes:
+                        if not has_implementation_for(possible_code1,
+                                                      possible_code2):
+                            continue
                         if code1 == possible_code1:
                             val1 = getattr(loc1, "value_" + possible_code1)()
                             # More faking out of certain operations for x86_64
-                            if possible_code1 == 'j' and not rx86.fits_in_32bits(val1):
+                            fits32 = rx86.fits_in_32bits
+                            if possible_code1 == 'j' and not fits32(val1):
                                 val1 = self._addr_as_reg_offset(val1)
                                 invoke(self, "m" + possible_code2, val1, val2)
-                            elif possible_code2 == 'j' and not rx86.fits_in_32bits(val2):
+                                return
+                            if possible_code2 == 'j' and not fits32(val2):
                                 val2 = self._addr_as_reg_offset(val2)
                                 invoke(self, possible_code1 + "m", val1, val2)
-                            elif possible_code1 == 'm' and not rx86.fits_in_32bits(val1[1]):
+                                return
+                            if possible_code1 == 'm' and not fits32(val1[1]):
                                 val1 = self._fix_static_offset_64_m(val1)
-                                invoke(self, "a" + possible_code2, val1, val2)
-                            elif possible_code2 == 'm' and not rx86.fits_in_32bits(val2[1]):
+                            if possible_code2 == 'm' and not fits32(val2[1]):
                                 val2 = self._fix_static_offset_64_m(val2)
-                                invoke(self, possible_code1 + "a", val1, val2)
-                            else:
-                                if possible_code1 == 'a' and not rx86.fits_in_32bits(val1[3]):
-                                    val1 = self._fix_static_offset_64_a(val1)
-                                if possible_code2 == 'a' and not rx86.fits_in_32bits(val2[3]):
-                                    val2 = self._fix_static_offset_64_a(val2)
-                                invoke(self, possible_code1 + possible_code2, val1, val2)
+                            if possible_code1 == 'a' and not fits32(val1[3]):
+                                val1 = self._fix_static_offset_64_a(val1)
+                            if possible_code2 == 'a' and not fits32(val2[3]):
+                                val2 = self._fix_static_offset_64_a(val2)
+                            invoke(self, possible_code1 + possible_code2, val1, val2)
                             return
+            _missing_binary_insn(name, code1, code2)
 
         return func_with_new_name(INSN, "INSN_" + name)
 
     def _fix_static_offset_64_m(self, (basereg, static_offset)):
         # For cases where an AddressLoc has the location_code 'm', but
         # where the static offset does not fit in 32-bits.  We have to fall
-        # back to the X86_64_SCRATCH_REG.  Note that this returns a location
-        # encoded as mode 'a'.  These are all possibly rare cases; don't try
+        # back to the X86_64_SCRATCH_REG.  Returns a new location encoded
+        # as mode 'm' too.  These are all possibly rare cases; don't try
         # to reuse a past value of the scratch register at all.
         self._scratch_register_known = False
         self.MOV_ri(X86_64_SCRATCH_REG.value, static_offset)
-        return (basereg, X86_64_SCRATCH_REG.value, 0, 0)
+        self.LEA_ra(X86_64_SCRATCH_REG.value,
+                    (basereg, X86_64_SCRATCH_REG.value, 0, 0))
+        return (X86_64_SCRATCH_REG.value, 0)
 
     def _fix_static_offset_64_a(self, (basereg, scalereg,
                                        scale, static_offset)):

pypy/jit/backend/x86/rx86.py

     assert insnname_template.count('*') == 1
     add_insn('x', register(2), '\xC0')
     add_insn('j', abs_, immediate(2))
+    add_insn('m', mem_reg_plus_const(2))
 
 define_pxmm_insn('PADDQ_x*',     '\xD4')
 define_pxmm_insn('PSUBQ_x*',     '\xFB')

pypy/jit/backend/x86/test/test_regloc.py

         expected_instructions = (
                 # mov r11, 0xFEDCBA9876543210
                 '\x49\xBB\x10\x32\x54\x76\x98\xBA\xDC\xFE'
-                # mov rcx, [rdx+r11]
-                '\x4A\x8B\x0C\x1A'
+                # lea r11, [rdx+r11]
+                '\x4E\x8D\x1C\x1A'
+                # mov rcx, [r11]
+                '\x49\x8B\x0B'
         )
         assert cb.getvalue() == expected_instructions
 
 
     # ------------------------------------------------------------
 
+    def test_MOV_64bit_constant_into_r11(self):
+        base_constant = 0xFEDCBA9876543210
+        cb = LocationCodeBuilder64()
+        cb.MOV(r11, imm(base_constant))
+
+        expected_instructions = (
+                # mov r11, 0xFEDCBA9876543210
+                '\x49\xBB\x10\x32\x54\x76\x98\xBA\xDC\xFE'
+        )
+        assert cb.getvalue() == expected_instructions
+
+    def test_MOV_64bit_address_into_r11(self):
+        base_addr = 0xFEDCBA9876543210
+        cb = LocationCodeBuilder64()
+        cb.MOV(r11, heap(base_addr))
+
+        expected_instructions = (
+                # mov r11, 0xFEDCBA9876543210
+                '\x49\xBB\x10\x32\x54\x76\x98\xBA\xDC\xFE' +
+                # mov r11, [r11]
+                '\x4D\x8B\x1B'
+        )
+        assert cb.getvalue() == expected_instructions
+
     def test_MOV_immed32_into_64bit_address_1(self):
         immed = -0x01234567
         base_addr = 0xFEDCBA9876543210
         expected_instructions = (
                 # mov r11, 0xFEDCBA9876543210
                 '\x49\xBB\x10\x32\x54\x76\x98\xBA\xDC\xFE'
-                # mov [rdx+r11], -0x01234567
-                '\x4A\xC7\x04\x1A\x99\xBA\xDC\xFE'
+                # lea r11, [rdx+r11]
+                '\x4E\x8D\x1C\x1A'
+                # mov [r11], -0x01234567
+                '\x49\xC7\x03\x99\xBA\xDC\xFE'
         )
         assert cb.getvalue() == expected_instructions
 
                 '\x48\xBA\xEF\xCD\xAB\x89\x67\x45\x23\x01'
                 # mov r11, 0xFEDCBA9876543210
                 '\x49\xBB\x10\x32\x54\x76\x98\xBA\xDC\xFE'
-                # mov [rax+r11], rdx
-                '\x4A\x89\x14\x18'
+                # lea r11, [rax+r11]
+                '\x4E\x8D\x1C\x18'
+                # mov [r11], rdx
+                '\x49\x89\x13'
                 # pop rdx
                 '\x5A'
         )