Commits

Carl Friedrich Bolz committed f0418b9 Merge

merge default

Comments (0)

Files changed (270)

 syntax: regexp
 ^testresult$
 ^site-packages$
+^site-packages/.*$
+^site-packages/.*$
 ^bin$
 ^pypy/bin/pypy-c
 ^pypy/module/cpyext/src/.+\.o$
 ^pypy/translator/goal/.+\.dll$
 ^pypy/translator/goal/target.+-c$
 ^pypy/_cache$
-^site-packages/.+\.egg$
-^site-packages/.+\.pth$
 ^pypy/doc/statistic/.+\.html$
 ^pypy/doc/statistic/.+\.eps$
 ^pypy/doc/statistic/.+\.pdf$
     Impara, Germany
     Change Maker, Sweden 
 
+The PyPy Logo as used by http://speed.pypy.org and others was created
+by Samuel Reis and is distributed on terms of Creative Commons Share Alike
+License.
  
-License for 'lib-python/2.5.2' and 'lib-python/2.5.2-modified'
+License for 'lib-python/2.7.0' and 'lib-python/2.7.0-modified'
 ============================================================== 
 
 Except when otherwise stated (look for LICENSE files or
 copyright/license information at the beginning of each file) the files
-in the 'lib-python/2.5.2' and 'lib-python/2.5.2-modified' directories
+in the 'lib-python/2.7.0' and 'lib-python/2.7.0-modified' directories
 are all copyrighted by the Python Software Foundation and licensed under
 the Python Software License of which you can find a copy here:
 http://www.python.org/doc/Copyright.html 
 ======================================
 
 The following files are from the website of The Unicode Consortium
-at http://www.unicode.org/. For the terms of use of these files, see
-http://www.unicode.org/terms_of_use.html
+at http://www.unicode.org/.  For the terms of use of these files, see
+http://www.unicode.org/terms_of_use.html .  Or they are derived from
+files from the above website, and the same terms of use apply.
 
-    CompositionExclusions-3.2.0.txt
-    CompositionExclusions-4.1.0.txt
-    CompositionExclusions-5.0.0.txt
-    EastAsianWidth-3.2.0.txt
-    EastAsianWidth-4.1.0.txt
-    EastAsianWidth-5.0.0.txt
-    UnicodeData-3.2.0.txt
-    UnicodeData-4.1.0.txt
-    UnicodeData-5.0.0.txt
-    
-The following files are derived from files from the above website. The same
-terms of use apply.
-    UnihanNumeric-3.2.0.txt
-    UnihanNumeric-4.1.0.txt
-    UnihanNumeric-5.0.0.txt
+    CompositionExclusions-*.txt
+    EastAsianWidth-*.txt
+    LineBreak-*.txt
+    UnicodeData-*.txt
+    UnihanNumeric-*.txt

_pytest/resultlog.py

         elif report.failed:
             longrepr = str(report.longrepr)
         elif report.skipped:
-            longrepr = str(report.longrepr[2])
+            longrepr = str(report.longrepr)
         self.log_outcome(report, code, longrepr)
 
     def pytest_collectreport(self, report):
 ===================
 
 You can find the results of the most recent buildbot run at:
-http://buildbot.pypy.org/summary?branch=fast-forward
+http://buildbot.pypy.org/
 
 
 Probably easy tasks
 Medium tasks
 ------------
 
-- Ast objects should be picklable, see in pypy/module/_ast/test/test_ast.py:
-  test_pickle()
-
 - socket module has a couple of changes (including AF_TIPC packet range)
 
-- (test_lib2to3) When a "for" loop runs a generator function, if the loop is
-  exited before the end, the "finally" clause of the generator is not called
-  until the next gc collection.  In our case, in lib2to3/pytree.py,
-  WildcardPattern.match_seq() does not exhaust the generate_matches() generator,
-  and stderr is not restored.
-
-
 Longer tasks
 ------------
 

lib_pypy/_ctypes/builtin.py

 
 import _rawffi, sys
-import threading
+try:
+    from thread import _local as local
+except ImportError:
+    local = object    # no threads
 
 class ConvMode:
     encoding = 'ascii'
     arg = cobj._get_buffer_value()
     return _rawffi.wcharp2rawunicode(arg, lgt)
 
-class ErrorObject(threading.local):
+class ErrorObject(local):
     def __init__(self):
         self.errno = 0
         self.winerror = 0

lib_pypy/_pypy_wait.py

 from ctypes.util import find_library
 from resource import _struct_rusage, struct_rusage
 
+__all__ = ["wait3", "wait4"]
+
 libc = CDLL(find_library("c"))
 c_wait3 = libc.wait3
 
 c_wait3.argtypes = [POINTER(c_int), c_int, POINTER(_struct_rusage)]
 
+c_wait4 = libc.wait4
+
+c_wait4.argtypes = [c_int, POINTER(c_int), c_int, POINTER(_struct_rusage)]
+
+def create_struct_rusage(c_struct):
+    return struct_rusage((
+        float(c_struct.ru_utime),
+        float(c_struct.ru_stime),
+        c_struct.ru_maxrss,
+        c_struct.ru_ixrss,
+        c_struct.ru_idrss,
+        c_struct.ru_isrss,
+        c_struct.ru_minflt,
+        c_struct.ru_majflt,
+        c_struct.ru_nswap,
+        c_struct.ru_inblock,
+        c_struct.ru_oublock,
+        c_struct.ru_msgsnd,
+        c_struct.ru_msgrcv,
+        c_struct.ru_nsignals,
+        c_struct.ru_nvcsw,
+        c_struct.ru_nivcsw))
+
 def wait3(options):
     status = c_int()
     _rusage = _struct_rusage()
     pid = c_wait3(byref(status), c_int(options), byref(_rusage))
 
-    rusage = struct_rusage((
-        float(_rusage.ru_utime),
-        float(_rusage.ru_stime),
-        _rusage.ru_maxrss,
-        _rusage.ru_ixrss,
-        _rusage.ru_idrss,
-        _rusage.ru_isrss,
-        _rusage.ru_minflt,
-        _rusage.ru_majflt,
-        _rusage.ru_nswap,
-        _rusage.ru_inblock,
-        _rusage.ru_oublock,
-        _rusage.ru_msgsnd,
-        _rusage.ru_msgrcv,
-        _rusage.ru_nsignals,
-        _rusage.ru_nvcsw,
-        _rusage.ru_nivcsw))
+    rusage = create_struct_rusage(_rusage)
 
     return pid, status.value, rusage
 
-__all__ = ["wait3"]
+def wait4(pid, options):
+    status = c_int()
+    _rusage = _struct_rusage()
+    pid = c_wait4(c_int(pid), byref(status), c_int(options), byref(_rusage))
+
+    rusage = create_struct_rusage(_rusage)
+
+    return pid, status.value, rusage

lib_pypy/pypy_test/test_os_wait.py

+# Generates the resource cache
+from __future__ import absolute_import
+from lib_pypy.ctypes_config_cache import rebuild
+rebuild.rebuild_one('resource.ctc.py')
+
+import os
+
+from lib_pypy._pypy_wait import wait3, wait4
+
+if hasattr(os, 'wait3'):
+    def test_os_wait3():
+        exit_status = 0x33
+
+        if not hasattr(os, "fork"):
+            skip("Need fork() to test wait3()")
+
+        child = os.fork()
+        if child == 0: # in child
+            os._exit(exit_status)
+        else:
+            pid, status, rusage = wait3(0)
+            assert child == pid
+            assert os.WIFEXITED(status)
+            assert os.WEXITSTATUS(status) == exit_status
+            assert isinstance(rusage.ru_utime, float)
+            assert isinstance(rusage.ru_maxrss, int)
+
+if hasattr(os, 'wait4'):
+    def test_os_wait4():
+        exit_status = 0x33
+
+        if not hasattr(os, "fork"):
+            skip("Need fork() to test wait4()")
+
+        child = os.fork()
+        if child == 0: # in child
+            os._exit(exit_status)
+        else:
+            pid, status, rusage = wait4(child, 0)
+            assert child == pid
+            assert os.WIFEXITED(status)
+            assert os.WEXITSTATUS(status) == exit_status
+            assert isinstance(rusage.ru_utime, float)
+            assert isinstance(rusage.ru_maxrss, int)

lib_pypy/pypy_test/test_os_wait3.py

-import os
-
-if hasattr(os, 'wait3'):
-    def test_os_wait3():
-        exit_status = 0x33
-
-        if not hasattr(os, "fork"):
-            skip("Need fork() to test wait3()")
-
-        child = os.fork()
-        if child == 0: # in child
-            os._exit(exit_status)
-        else:
-            pid, status, rusage = os.wait3(0)
-            assert child == pid
-            assert os.WIFEXITED(status)
-            assert os.WEXITSTATUS(status) == exit_status
-            assert isinstance(rusage.ru_utime, float)
-            assert isinstance(rusage.ru_maxrss, int)

pypy/annotation/description.py

         except AttributeError:
             return False
 
+    def warn_missing_attribute(self, attr):
+        # only warn for missing attribute names whose name doesn't start
+        # with '$', to silence the warnings about '$memofield_xxx'.
+        return not self.has_attribute(attr) and not attr.startswith('$')
+
     def read_attribute(self, attr):
         try:
             return self.attrcache[attr]

pypy/annotation/model.py

     except TypeError:
         s = None    # unhashable T, e.g. a Ptr(GcForwardReference())
     if s is None:
+        if isinstance(T, lltype.Typedef):
+            return lltype_to_annotation(T.OF)
         if isinstance(T, lltype.Number):
             return SomeInteger(knowntype=T._type)
         if isinstance(T, (ootype.Instance, ootype.BuiltinType)):

pypy/config/pypyoption.py

                and not p.basename.startswith('test')]
 
 essential_modules = dict.fromkeys(
-    ["exceptions", "_file", "sys", "__builtin__", "posix", "signal"]
+    ["exceptions", "_file", "sys", "__builtin__", "posix"]
 )
 
 default_modules = essential_modules.copy()
 translation_modules = default_modules.copy()
 translation_modules.update(dict.fromkeys(
     ["fcntl", "rctime", "select", "signal", "_rawffi", "zlib",
-     "struct", "md5", "cStringIO", "array"]))
+     "struct", "_md5", "cStringIO", "array"]))
 
 working_oo_modules = default_modules.copy()
 working_oo_modules.update(dict.fromkeys(

pypy/config/translationoption.py

     }
 
 def final_check_config(config):
-    pass
+    # XXX: this should be a real config option, but it is hard to refactor it;
+    # instead, we "just" patch it from here
+    from pypy.rlib import rfloat
+    if config.translation.type_system == 'ootype':
+        rfloat.USE_SHORT_FLOAT_REPR = False
 
 def set_opt_level(config, level):
     """Apply optimization suggestions on the 'config'.
 # built documents.
 #
 # The short X.Y version.
-version = '1.4.1'
+version = '1.5'
 # The full version, including alpha/beta/rc tags.
-release = '1.4.1'
+release = '1.5-alpha'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.

pypy/doc/getting-started-python.rst

 
    possibly replacing ``--opt=jit`` with another `optimization level`_
    of your choice like ``--opt=2`` if you do not want the included JIT
-   compiler.  As of March 2011, Intel **32-bit** environment needs ``4GB``.
+   compiler.  As of March 2011, Intel 32-bit environment needs **at
+   least** 2GB, and 64-bit needs 4GB.
 
 .. _`optimization level`: config/opt.html
 

pypy/interpreter/astcompiler/test/test_astbuilder.py

             ("{x for x in z}", "set comprehension"),
             ("{x : x for x in z}", "dict comprehension"),
             ("'str'", "literal"),
+            ("u'str'", "literal"),
+            ("b'bytes'", "literal"),
             ("()", "()"),
             ("23", "literal"),
             ("{}", "literal"),

pypy/interpreter/baseobjspace.py

         pass
     def _freeze_(self):
         return True
+    def __enter__(self):
+        pass
+    def __exit__(self, *args):
+        pass
+
 dummy_lock = DummyLock()
 
 ## Table describing the regular part of the interface of object spaces,

pypy/interpreter/eval.py

     """A frame is an environment supporting the execution of a code object.
     Abstract base class."""
 
-    def __init__(self, space, w_globals=None, numlocals=-1):
+    def __init__(self, space, w_globals=None):
         self.space      = space
         self.w_globals  = w_globals  # wrapped dict of globals
         self.w_locals   = None       # wrapped dict of locals
-        if numlocals < 0:  # compute the minimal size based on arguments
-            numlocals = len(self.getcode().getvarnames())
-        self.numlocals = numlocals
 
     def run(self):
         "Abstract method to override. Runs the frame"
         where the order is according to self.getcode().signature()."""
         raise TypeError, "abstract"
 
+    def getfastscopelength(self):
+        "Abstract. Get the expected number of locals."
+        raise TypeError, "abstract"
+
     def fast2locals(self):
         # Copy values from self.fastlocals_w to self.w_locals
         if self.w_locals is None:
         # Copy values from self.w_locals to self.fastlocals_w
         assert self.w_locals is not None
         varnames = self.getcode().getvarnames()
+        numlocals = self.getfastscopelength()
 
-        new_fastlocals_w = [None]*self.numlocals
-        
-        for i in range(min(len(varnames), self.numlocals)):
+        new_fastlocals_w = [None] * numlocals
+
+        for i in range(min(len(varnames), numlocals)):
             w_name = self.space.wrap(varnames[i])
             try:
                 w_value = self.space.getitem(self.w_locals, w_name)

pypy/interpreter/executioncontext.py

             return
         code = frame.pycode
         if frame.instr_lb <= frame.last_instr < frame.instr_ub:
-            if frame.last_instr <= frame.instr_prev:
+            if frame.last_instr < frame.instr_prev_plus_one:
                 # We jumped backwards in the same line.
                 executioncontext._trace(frame, 'line', self.space.w_None)
         else:
                 frame.f_lineno = line
                 executioncontext._trace(frame, 'line', self.space.w_None)
 
-        frame.instr_prev = frame.last_instr
+        frame.instr_prev_plus_one = frame.last_instr + 1
         self.space.frame_trace_action.fire()     # continue tracing

pypy/interpreter/function.py

     return func.code
 
 class Defaults(object):
-    _immutable_fields_ = ["items[*]"]
+    _immutable_fields_ = ["items[*]", "promote"]
 
-    def __init__(self, items):
+    def __init__(self, items, promote=False):
         self.items = items
+        self.promote = promote
 
     def getitems(self):
-        return jit.hint(self, promote=True).items
+        # an idea - we want to promote only items that we know won't change
+        # too often. this is the case for builtin functions and functions
+        # with known constant defaults. Otherwise we don't want to promote
+        # this so lambda a=a won't create a new trace each time it's
+        # encountered
+        if self.promote:
+            return jit.hint(self, promote=True).items
+        return self.items
 
     def getitem(self, idx):
         return self.getitems()[idx]
     can_change_code = True
 
     def __init__(self, space, code, w_globals=None, defs_w=[], closure=None,
-                 forcename=None):
+                 forcename=None, promote_defs=False):
         self.space = space
         self.name = forcename or code.co_name
         self.w_doc = None   # lazily read from code.getdocstring()
         self.code = code       # Code instance
         self.w_func_globals = w_globals  # the globals dictionary
         self.closure   = closure    # normally, list of Cell instances or None
-        self.defs = Defaults(defs_w)     # wrapper around list of w_default's
+        self.defs = Defaults(defs_w, promote=promote_defs)
+        # wrapper around list of w_default's
         self.w_func_dict = None # filled out below if needed
         self.w_module = None
 
     def __init__(self, func):
         assert isinstance(func, Function)
         Function.__init__(self, func.space, func.code, func.w_func_globals,
-                          func.defs.getitems(), func.closure, func.name)
+                          func.defs.getitems(), func.closure, func.name,
+                          promote_defs=True)
         self.w_doc = func.w_doc
         self.w_func_dict = func.w_func_dict
         self.w_module = func.w_module

pypy/interpreter/nestedscope.py

             freevars = [self.space.interp_w(Cell, cell)
                         for cell in self.space.fixedview(w_freevarstuple)]
         else:
-            nfreevars = len(codeobj.co_freevars)
-            freevars = [self.space.interp_w(Cell, self.popvalue())
-                        for i in range(nfreevars)]
-            freevars.reverse()
-        defaultarguments = [self.popvalue() for i in range(numdefaults)]
-        defaultarguments.reverse()
+            n = len(codeobj.co_freevars)
+            freevars = [None] * n
+            while True:
+                n -= 1
+                if n < 0:
+                    break
+                freevars[n] = self.space.interp_w(Cell, self.popvalue())
+        defaultarguments = self.popvalues(numdefaults)
         fn = function.Function(self.space, codeobj, self.w_globals,
                                defaultarguments, freevars)
         self.pushvalue(self.space.wrap(fn))

pypy/interpreter/pyframe.py

     w_f_trace                = None
     # For tracing
     instr_lb                 = 0
-    instr_ub                 = -1
-    instr_prev               = -1
+    instr_ub                 = 0
+    instr_prev_plus_one      = 0
     is_being_profiled        = False
 
     def __init__(self, space, code, w_globals, closure):
         self = hint(self, access_directly=True, fresh_virtualizable=True)
         assert isinstance(code, pycode.PyCode)
         self.pycode = code
-        eval.Frame.__init__(self, space, w_globals, code.co_nlocals)
+        eval.Frame.__init__(self, space, w_globals)
         self.valuestack_w = [None] * code.co_stacksize
         self.valuestackdepth = 0
         self.lastblock = None
         # regular functions always have CO_OPTIMIZED and CO_NEWLOCALS.
         # class bodies only have CO_NEWLOCALS.
         self.initialize_frame_scopes(closure, code)
-        self.fastlocals_w = [None]*self.numlocals
+        self.fastlocals_w = [None] * code.co_nlocals
         make_sure_not_resized(self.fastlocals_w)
         self.f_lineno = code.co_firstlineno
 
 
             w(self.instr_lb), #do we need these three (that are for tracing)
             w(self.instr_ub),
-            w(self.instr_prev),
+            w(self.instr_prev_plus_one),
             w_cells,
             ]
 
         args_w = space.unpackiterable(w_args)
         w_f_back, w_builtin, w_pycode, w_valuestack, w_blockstack, w_exc_value, w_tb,\
             w_globals, w_last_instr, w_finished, w_f_lineno, w_fastlocals, w_f_locals, \
-            w_f_trace, w_instr_lb, w_instr_ub, w_instr_prev, w_cells = args_w
+            w_f_trace, w_instr_lb, w_instr_ub, w_instr_prev_plus_one, w_cells = args_w
 
         new_frame = self
         pycode = space.interp_w(PyCode, w_pycode)
 
         new_frame.instr_lb = space.int_w(w_instr_lb)   #the three for tracing
         new_frame.instr_ub = space.int_w(w_instr_ub)
-        new_frame.instr_prev = space.int_w(w_instr_prev)
+        new_frame.instr_prev_plus_one = space.int_w(w_instr_prev_plus_one)
 
         self._setcellvars(cellvars)
         # XXX what if the frame is in another thread??
         """Initialize cellvars from self.fastlocals_w
         This is overridden in nestedscope.py"""
         pass
-    
+
+    def getfastscopelength(self):
+        return self.pycode.co_nlocals
+
     def getclosure(self):
         return None
 

pypy/interpreter/pyopcode.py

                 # raised after the exception handler block was popped.
                 try:
                     trace = self.w_f_trace
-                    self.w_f_trace = None
+                    if trace is not None:
+                        self.w_f_trace = None
                     try:
                         ec.bytecode_trace_after_exception(self)
                     finally:
-                        self.w_f_trace = trace
+                        if trace is not None:
+                            self.w_f_trace = trace
                 except OperationError, e:
                     operr = e
             pytraceback.record_application_traceback(
 
         # add a softspace unless we just printed a string which ends in a '\t'
         # or '\n' -- or more generally any whitespace character but ' '
-        if isinstance(x, str) and x and x[-1].isspace() and x[-1]!=' ':
-            return
-        # XXX add unicode handling
+        if isinstance(x, (str, unicode)) and x:
+            lastchar = x[-1]
+            if lastchar.isspace() and lastchar != ' ':
+                return
         file_softspace(stream, True)
     print_item_to._annspecialcase_ = "specialize:argtype(0)"
 

pypy/interpreter/test/test_eval.py

             
             def __init__(self, space, code, numlocals):
                 self.code = code
-                Frame.__init__(self, space, numlocals=numlocals)
+                Frame.__init__(self, space)
+                self.numlocals = numlocals
                 self.fastlocals_w = [None] * self.numlocals
 
             def getcode(self):
 
             def getfastscope(self):
                 return self.fastlocals_w
-        
+
+            def getfastscopelength(self):
+                return self.numlocals
+
         self.f = ConcreteFastscopeFrame(self.space, code, numlocals=5)
         
 

pypy/interpreter/test/test_interpreter.py

         sys.stdout = out = Out()
         try:
             raises(UnicodeError, "print unichr(0xa2)")
+            assert out.data == []
             out.encoding = "cp424"
             print unichr(0xa2)
             assert out.data == [unichr(0xa2).encode("cp424"), "\n"]
+            del out.data[:]
+            del out.encoding
+            print u"foo\t", u"bar\n", u"trick", u"baz\n"  # softspace handling
+            assert out.data == ["foo\t", "bar\n", "trick", " ", "baz\n", "\n"]
         finally:
             sys.stdout = save
 

pypy/jit/backend/cli/test/test_basic.py

 import py
 from pypy.jit.backend.cli.runner import CliCPU
-from pypy.jit.metainterp.test import test_basic
+from pypy.jit.metainterp.test import support, test_ajit
 
-class CliJitMixin(test_basic.OOJitMixin):
+class CliJitMixin(suport.OOJitMixin):
     CPUClass = CliCPU
     def setup_class(cls):
         from pypy.translator.cli.support import PythonNet
         PythonNet.System     # possibly raises Skip
 
-class TestBasic(CliJitMixin, test_basic.TestOOtype):
+class TestBasic(CliJitMixin, test_ajit.TestOOtype):
     # for the individual tests see
     # ====> ../../../metainterp/test/test_basic.py
 

pypy/jit/backend/llgraph/llimpl.py

File contents unchanged.

pypy/jit/backend/llgraph/runner.py

 class Descr(history.AbstractDescr):
 
     def __init__(self, ofs, typeinfo, extrainfo=None, name=None,
-                 arg_types=None):
+                 arg_types=None, count_fields_if_immut=-1):
         self.ofs = ofs
         self.typeinfo = typeinfo
         self.extrainfo = extrainfo
         self.name = name
         self.arg_types = arg_types
+        self.count_fields_if_immut = count_fields_if_immut
 
     def get_arg_types(self):
         return self.arg_types
     def as_vtable_size_descr(self):
         return self
 
+    def count_fields_if_immutable(self):
+        return self.count_fields_if_immut
+
     def __lt__(self, other):
         raise TypeError("cannot use comparison on Descrs")
     def __le__(self, other):
         return False
 
     def getdescr(self, ofs, typeinfo='?', extrainfo=None, name=None,
-                 arg_types=None):
-        key = (ofs, typeinfo, extrainfo, name, arg_types)
+                 arg_types=None, count_fields_if_immut=-1):
+        key = (ofs, typeinfo, extrainfo, name, arg_types,
+               count_fields_if_immut)
         try:
             return self._descrs[key]
         except KeyError:
-            descr = Descr(ofs, typeinfo, extrainfo, name, arg_types)
+            descr = Descr(ofs, typeinfo, extrainfo, name, arg_types,
+                          count_fields_if_immut)
             self._descrs[key] = descr
             return descr
 
 
     def sizeof(self, S):
         assert not isinstance(S, lltype.Ptr)
-        return self.getdescr(symbolic.get_size(S))
+        count = heaptracker.count_fields_if_immutable(S)
+        return self.getdescr(symbolic.get_size(S), count_fields_if_immut=count)
 
 
 class LLtypeCPU(BaseCPU):

pypy/jit/backend/llsupport/descr.py

 
 class SizeDescr(AbstractDescr):
     size = 0      # help translation
+    is_immutable = False
 
-    def __init__(self, size):
+    def __init__(self, size, count_fields_if_immut=-1):
         self.size = size
+        self.count_fields_if_immut = count_fields_if_immut
+
+    def count_fields_if_immutable(self):
+        return self.count_fields_if_immut
 
     def repr_of_descr(self):
         return '<SizeDescr %s>' % self.size
         return cache[STRUCT]
     except KeyError:
         size = symbolic.get_size(STRUCT, gccache.translate_support_code)
+        count_fields_if_immut = heaptracker.count_fields_if_immutable(STRUCT)
         if heaptracker.has_gcstruct_a_vtable(STRUCT):
-            sizedescr = SizeDescrWithVTable(size)
+            sizedescr = SizeDescrWithVTable(size, count_fields_if_immut)
         else:
-            sizedescr = SizeDescr(size)
+            sizedescr = SizeDescr(size, count_fields_if_immut)
         gccache.init_size_descr(STRUCT, sizedescr)
         cache[STRUCT] = sizedescr
         return sizedescr
 
-
 # ____________________________________________________________
 # FieldDescrs
 

pypy/jit/backend/llsupport/gc.py

+import os
 from pypy.rlib import rgc
 from pypy.rlib.objectmodel import we_are_translated
 from pypy.rlib.debug import fatalerror
+from pypy.rlib.rarithmetic import ovfcheck
 from pypy.rpython.lltypesystem import lltype, llmemory, rffi, rclass, rstr
 from pypy.rpython.lltypesystem import llgroup
 from pypy.rpython.lltypesystem.lloperation import llop
 
 class GcLLDescription(GcCache):
     minimal_size_in_nursery = 0
+    get_malloc_slowpath_addr = None
+
     def __init__(self, gcdescr, translator=None, rtyper=None):
         GcCache.__init__(self, translator is not None, rtyper)
         self.gcdescr = gcdescr
         pass
     def can_inline_malloc(self, descr):
         return False
+    def can_inline_malloc_varsize(self, descr, num_elem):
+        return False
     def has_write_barrier_class(self):
         return None
     def freeing_block(self, start, stop):
         return addr_ref
 
 
-class GcRootMap_asmgcc:
+class GcRootMap_asmgcc(object):
     """Handles locating the stack roots in the assembler.
     This is the class supporting --gcrootfinder=asmgcc.
     """
+    is_shadow_stack = False
+
     LOC_REG       = 0
     LOC_ESP_PLUS  = 1
     LOC_EBP_PLUS  = 2
     GCMAP_ARRAY = rffi.CArray(lltype.Signed)
     CALLSHAPE_ARRAY_PTR = rffi.CArrayPtr(rffi.UCHAR)
 
-    def __init__(self):
+    def __init__(self, gcdescr=None):
         # '_gcmap' is an array of length '_gcmap_maxlength' of addresses.
         # '_gcmap_curlength' tells how full the array really is.
         # The addresses are actually grouped in pairs:
         self._gcmap_deadentries = 0
         self._gcmap_sorted = True
 
+    def add_jit2gc_hooks(self, jit2gc):
+        jit2gc.update({
+            'gcmapstart': lambda: self.gcmapstart(),
+            'gcmapend': lambda: self.gcmapend(),
+            'gcmarksorted': lambda: self.gcmarksorted(),
+            })
+
     def initialize(self):
         # hack hack hack.  Remove these lines and see MissingRTypeAttribute
         # when the rtyper tries to annotate these methods only when GC-ing...
             number >>= 7
         shape.append(chr(number | flag))
 
-    def add_ebp_offset(self, shape, offset):
+    def add_frame_offset(self, shape, offset):
         assert (offset & 3) == 0
         if offset >= 0:
             num = self.LOC_EBP_PLUS | offset
         return rawaddr
 
 
+class GcRootMap_shadowstack(object):
+    """Handles locating the stack roots in the assembler.
+    This is the class supporting --gcrootfinder=shadowstack.
+    """
+    is_shadow_stack = True
+    MARKER = 8
+
+    # The "shadowstack" is a portable way in which the GC finds the
+    # roots that live in the stack.  Normally it is just a list of
+    # pointers to GC objects.  The pointers may be moved around by a GC
+    # collection.  But with the JIT, an entry can also be MARKER, in
+    # which case the next entry points to an assembler stack frame.
+    # During a residual CALL from the assembler (which may indirectly
+    # call the GC), we use the force_index stored in the assembler
+    # stack frame to identify the call: we can go from the force_index
+    # to a list of where the GC pointers are in the frame (this is the
+    # purpose of the present class).
+    #
+    # Note that across CALL_MAY_FORCE or CALL_ASSEMBLER, we can also go
+    # from the force_index to a ResumeGuardForcedDescr instance, which
+    # is used if the virtualizable or the virtualrefs need to be forced
+    # (see pypy.jit.backend.model).  The force_index number in the stack
+    # frame is initially set to a non-negative value x, but it is
+    # occasionally turned into (~x) in case of forcing.
+
+    INTARRAYPTR = rffi.CArrayPtr(rffi.INT)
+    CALLSHAPES_ARRAY = rffi.CArray(INTARRAYPTR)
+
+    def __init__(self, gcdescr):
+        self._callshapes = lltype.nullptr(self.CALLSHAPES_ARRAY)
+        self._callshapes_maxlength = 0
+        self.force_index_ofs = gcdescr.force_index_ofs
+
+    def add_jit2gc_hooks(self, jit2gc):
+        #
+        def collect_jit_stack_root(callback, gc, addr):
+            if addr.signed[0] != GcRootMap_shadowstack.MARKER:
+                # common case
+                if gc.points_to_valid_gc_object(addr):
+                    callback(gc, addr)
+                return WORD
+            else:
+                # case of a MARKER followed by an assembler stack frame
+                follow_stack_frame_of_assembler(callback, gc, addr)
+                return 2 * WORD
+        #
+        def follow_stack_frame_of_assembler(callback, gc, addr):
+            frame_addr = addr.signed[1]
+            addr = llmemory.cast_int_to_adr(frame_addr + self.force_index_ofs)
+            force_index = addr.signed[0]
+            if force_index < 0:
+                force_index = ~force_index
+            callshape = self._callshapes[force_index]
+            n = 0
+            while True:
+                offset = rffi.cast(lltype.Signed, callshape[n])
+                if offset == 0:
+                    break
+                addr = llmemory.cast_int_to_adr(frame_addr + offset)
+                if gc.points_to_valid_gc_object(addr):
+                    callback(gc, addr)
+                n += 1
+        #
+        jit2gc.update({
+            'rootstackhook': collect_jit_stack_root,
+            })
+
+    def initialize(self):
+        pass
+
+    def get_basic_shape(self, is_64_bit=False):
+        return []
+
+    def add_frame_offset(self, shape, offset):
+        assert offset != 0
+        shape.append(offset)
+
+    def add_callee_save_reg(self, shape, register):
+        msg = "GC pointer in %s was not spilled" % register
+        os.write(2, '[llsupport/gc] %s\n' % msg)
+        raise AssertionError(msg)
+
+    def compress_callshape(self, shape, datablockwrapper):
+        length = len(shape)
+        SZINT = rffi.sizeof(rffi.INT)
+        rawaddr = datablockwrapper.malloc_aligned((length + 1) * SZINT, SZINT)
+        p = rffi.cast(self.INTARRAYPTR, rawaddr)
+        for i in range(length):
+            p[i] = rffi.cast(rffi.INT, shape[i])
+        p[length] = rffi.cast(rffi.INT, 0)
+        return p
+
+    def write_callshape(self, p, force_index):
+        if force_index >= self._callshapes_maxlength:
+            self._enlarge_callshape_list(force_index + 1)
+        self._callshapes[force_index] = p
+
+    def _enlarge_callshape_list(self, minsize):
+        newlength = 250 + (self._callshapes_maxlength // 3) * 4
+        if newlength < minsize:
+            newlength = minsize
+        newarray = lltype.malloc(self.CALLSHAPES_ARRAY, newlength,
+                                 flavor='raw', track_allocation=False)
+        if self._callshapes:
+            i = self._callshapes_maxlength - 1
+            while i >= 0:
+                newarray[i] = self._callshapes[i]
+                i -= 1
+            lltype.free(self._callshapes, flavor='raw', track_allocation=False)
+        self._callshapes = newarray
+        self._callshapes_maxlength = newlength
+
+    def freeing_block(self, start, stop):
+        pass     # nothing needed here
+
+    def get_root_stack_top_addr(self):
+        rst_addr = llop.gc_adr_of_root_stack_top(llmemory.Address)
+        return rffi.cast(lltype.Signed, rst_addr)
+
+
 class WriteBarrierDescr(AbstractDescr):
     def __init__(self, gc_ll_descr):
         self.llop1 = gc_ll_descr.llop1
         except KeyError:
             raise NotImplementedError("--gcrootfinder=%s not implemented"
                                       " with the JIT" % (name,))
-        gcrootmap = cls()
+        gcrootmap = cls(gcdescr)
         self.gcrootmap = gcrootmap
         self.gcrefs = GcRefList()
         self.single_gcref_descr = GcPtrFieldDescr('', 0)
         # where it can be fished and reused by the FrameworkGCTransformer
         self.layoutbuilder = framework.TransformerLayoutBuilder(translator)
         self.layoutbuilder.delay_encoding()
-        self.translator._jit2gc = {
-            'layoutbuilder': self.layoutbuilder,
-            'gcmapstart': lambda: gcrootmap.gcmapstart(),
-            'gcmapend': lambda: gcrootmap.gcmapend(),
-            'gcmarksorted': lambda: gcrootmap.gcmarksorted(),
-            }
+        self.translator._jit2gc = {'layoutbuilder': self.layoutbuilder}
+        gcrootmap.add_jit2gc_hooks(self.translator._jit2gc)
+
         self.GCClass = self.layoutbuilder.GCClass
         self.moving_gc = self.GCClass.moving_gc
         self.HDRPTR = lltype.Ptr(self.GCClass.HDR)
         self.max_size_of_young_obj = self.GCClass.JIT_max_size_of_young_obj()
         self.minimal_size_in_nursery=self.GCClass.JIT_minimal_size_in_nursery()
 
+        # for the fast path of mallocs, the following must be true, at least
+        assert self.GCClass.inline_simple_malloc
+        assert self.GCClass.inline_simple_malloc_varsize
+
         # make a malloc function, with three arguments
         def malloc_basic(size, tid):
             type_id = llop.extract_ushort(llgroup.HALFWORD, tid)
             x3 = x0 * 0.3
             for_test_only.x = x0 + x1 + x2 + x3
         #
-        def malloc_fixedsize_slowpath(size):
+        def malloc_slowpath(size):
             if self.DEBUG:
                 random_usage_of_xmm_registers()
             assert size >= self.minimal_size_in_nursery
             try:
+                # NB. although we call do_malloc_fixedsize_clear() here,
+                # it's a bit of a hack because we set tid to 0 and may
+                # also use it to allocate varsized objects.  The tid
+                # and possibly the length are both set afterward.
                 gcref = llop1.do_malloc_fixedsize_clear(llmemory.GCREF,
                                             0, size, True, False, False)
             except MemoryError:
                 fatalerror("out of memory (from JITted code)")
                 return 0
             return rffi.cast(lltype.Signed, gcref)
-        self.malloc_fixedsize_slowpath = malloc_fixedsize_slowpath
-        self.MALLOC_FIXEDSIZE_SLOWPATH = lltype.FuncType([lltype.Signed],
-                                                         lltype.Signed)
+        self.malloc_slowpath = malloc_slowpath
+        self.MALLOC_SLOWPATH = lltype.FuncType([lltype.Signed], lltype.Signed)
 
     def get_nursery_free_addr(self):
         nurs_addr = llop.gc_adr_of_nursery_free(llmemory.Address)
         nurs_top_addr = llop.gc_adr_of_nursery_top(llmemory.Address)
         return rffi.cast(lltype.Signed, nurs_top_addr)
 
-    def get_malloc_fixedsize_slowpath_addr(self):
-        fptr = llhelper(lltype.Ptr(self.MALLOC_FIXEDSIZE_SLOWPATH),
-                        self.malloc_fixedsize_slowpath)
+    def get_malloc_slowpath_addr(self):
+        fptr = llhelper(lltype.Ptr(self.MALLOC_SLOWPATH), self.malloc_slowpath)
         return rffi.cast(lltype.Signed, fptr)
 
     def initialize(self):
             return True
         return False
 
+    def can_inline_malloc_varsize(self, arraydescr, num_elem):
+        assert isinstance(arraydescr, BaseArrayDescr)
+        basesize = arraydescr.get_base_size(self.translate_support_code)
+        itemsize = arraydescr.get_item_size(self.translate_support_code)
+        try:
+            size = ovfcheck(basesize + ovfcheck(itemsize * num_elem))
+            return size < self.max_size_of_young_obj
+        except OverflowError:
+            return False
+
     def has_write_barrier_class(self):
         return WriteBarrierDescr
 

pypy/jit/backend/llsupport/regalloc.py

 
-from pypy.jit.metainterp.history import Const, Box
+from pypy.jit.metainterp.history import Const, Box, REF
 from pypy.rlib.objectmodel import we_are_translated
 
 class TempBox(Box):
             self.assembler.regalloc_mov(reg, to)
         # otherwise it's clean
 
-    def before_call(self, force_store=[], save_all_regs=False):
+    def before_call(self, force_store=[], save_all_regs=0):
         """ Spill registers before a call, as described by
         'self.save_around_call_regs'.  Registers are not spilled if
         they don't survive past the current operation, unless they
-        are listed in 'force_store'.
+        are listed in 'force_store'.  'save_all_regs' can be 0 (default),
+        1 (save all), or 2 (save default+PTRs).
         """
         for v, reg in self.reg_bindings.items():
             if v not in force_store and self.longevity[v][1] <= self.position:
                 del self.reg_bindings[v]
                 self.free_regs.append(reg)
                 continue
-            if not save_all_regs and reg not in self.save_around_call_regs:
-                # we don't have to
-                continue
+            if save_all_regs != 1 and reg not in self.save_around_call_regs:
+                if save_all_regs == 0:
+                    continue    # we don't have to
+                if v.type != REF:
+                    continue    # only save GC pointers
             self._sync_var(v)
             del self.reg_bindings[v]
             self.free_regs.append(reg)

pypy/jit/backend/llsupport/test/test_descr.py

     descr_t = get_size_descr(c0, T)
     assert descr_s.size == symbolic.get_size(S, False)
     assert descr_t.size == symbolic.get_size(T, False)
+    assert descr_s.count_fields_if_immutable() == -1
+    assert descr_t.count_fields_if_immutable() == -1
     assert descr_s == get_size_descr(c0, S)
     assert descr_s != get_size_descr(c1, S)
     #
     descr_s = get_size_descr(c1, S)
     assert isinstance(descr_s.size, Symbolic)
+    assert descr_s.count_fields_if_immutable() == -1
 
+def test_get_size_descr_immut():
+    S = lltype.GcStruct('S', hints={'immutable': True})
+    T = lltype.GcStruct('T', ('parent', S),
+                        ('x', lltype.Char),
+                        hints={'immutable': True})
+    U = lltype.GcStruct('U', ('parent', T),
+                        ('u', lltype.Ptr(T)),
+                        ('v', lltype.Signed),
+                        hints={'immutable': True})
+    V = lltype.GcStruct('V', ('parent', U),
+                        ('miss1', lltype.Void),
+                        ('miss2', lltype.Void),
+                        hints={'immutable': True})
+    for STRUCT, expected in [(S, 0), (T, 1), (U, 3), (V, 3)]:
+        for translated in [False, True]:
+            c0 = GcCache(translated)
+            descr_s = get_size_descr(c0, STRUCT)
+            assert descr_s.count_fields_if_immutable() == expected
 
 def test_get_field_descr():
     U = lltype.Struct('U')

pypy/jit/backend/llsupport/test/test_gc.py

 from pypy.jit.tool.oparser import parse
 from pypy.rpython.lltypesystem.rclass import OBJECT, OBJECT_VTABLE
 from pypy.jit.metainterp.test.test_optimizeopt import equaloplists
-from pypy.rpython.memory.gctransform import asmgcroot
 
 def test_boehm():
     gc_ll_descr = GcLLDescr_boehm(None, None, None)
         num2a = ((-num2|3) >> 7) | 128
         num2b = (-num2|3) & 127
         shape = gcrootmap.get_basic_shape()
-        gcrootmap.add_ebp_offset(shape, num1)
-        gcrootmap.add_ebp_offset(shape, num2)
+        gcrootmap.add_frame_offset(shape, num1)
+        gcrootmap.add_frame_offset(shape, num2)
         assert shape == map(chr, [6, 7, 11, 15, 2, 0, num1a, num2b, num2a])
         gcrootmap.add_callee_save_reg(shape, 1)
         assert shape == map(chr, [6, 7, 11, 15, 2, 0, num1a, num2b, num2a,
             gc.asmgcroot = saved
 
 
+class TestGcRootMapShadowStack:
+    class FakeGcDescr:
+        force_index_ofs = 92
+
+    def test_make_shapes(self):
+        gcrootmap = GcRootMap_shadowstack(self.FakeGcDescr())
+        shape = gcrootmap.get_basic_shape()
+        gcrootmap.add_frame_offset(shape, 16)
+        gcrootmap.add_frame_offset(shape, -24)
+        assert shape == [16, -24]
+
+    def test_compress_callshape(self):
+        class FakeDataBlockWrapper:
+            def malloc_aligned(self, size, alignment):
+                assert alignment == 4    # even on 64-bits
+                assert size == 12        # 4*3, even on 64-bits
+                return rffi.cast(lltype.Signed, p)
+        datablockwrapper = FakeDataBlockWrapper()
+        p = lltype.malloc(rffi.CArray(rffi.INT), 3, immortal=True)
+        gcrootmap = GcRootMap_shadowstack(self.FakeGcDescr())
+        shape = [16, -24]
+        gcrootmap.compress_callshape(shape, datablockwrapper)
+        assert rffi.cast(lltype.Signed, p[0]) == 16
+        assert rffi.cast(lltype.Signed, p[1]) == -24
+        assert rffi.cast(lltype.Signed, p[2]) == 0
+
+
 class FakeLLOp(object):
     def __init__(self):
         self.record = []

pypy/jit/backend/model.py

         self.fail_descr_list = []
         self.fail_descr_free_list = []
 
+    def reserve_some_free_fail_descr_number(self):
+        lst = self.fail_descr_list
+        if len(self.fail_descr_free_list) > 0:
+            n = self.fail_descr_free_list.pop()
+            assert lst[n] is None
+        else:
+            n = len(lst)
+            lst.append(None)
+        return n
+
     def get_fail_descr_number(self, descr):
         assert isinstance(descr, history.AbstractFailDescr)
         n = descr.index
         if n < 0:
-            lst = self.fail_descr_list
-            if len(self.fail_descr_free_list) > 0:
-                n = self.fail_descr_free_list.pop()
-                assert lst[n] is None
-                lst[n] = descr
-            else:
-                n = len(lst)
-                lst.append(descr)
+            n = self.reserve_some_free_fail_descr_number()
+            self.fail_descr_list[n] = descr
             descr.index = n
         return n
 
     def record_faildescr_index(self, n):
         self.faildescr_indices.append(n)
 
+    def reserve_and_record_some_faildescr_index(self):
+        # like record_faildescr_index(), but invent and return a new,
+        # unused faildescr index
+        n = self.cpu.reserve_some_free_fail_descr_number()
+        self.record_faildescr_index(n)
+        return n
+
     def compiling_a_bridge(self):
         self.cpu.total_compiled_bridges += 1
         self.bridges_count += 1

pypy/jit/backend/test/test_random.py

 def test_random_function(BuilderClass=OperationBuilder):
     r = Random()
     cpu = get_cpu()
+    cpu.setup_once()
     if pytest.config.option.repeat == -1:
         while 1:
             check_random_function(cpu, BuilderClass, r)

pypy/jit/backend/x86/arch.py

 # Constants that depend on whether we are on 32-bit or 64-bit
 
+# The frame size gives the standard fixed part at the start of
+# every assembler frame: the saved value of some registers,
+# one word for the force_index, and some extra space used only
+# during a malloc that needs to go via its slow path.
+
 import sys
 if sys.maxint == (2**31 - 1):
     WORD = 4
-    # ebp + ebx + esi + edi + force_index = 5 words
-    FRAME_FIXED_SIZE = 5
+    # ebp + ebx + esi + edi + 4 extra words + force_index = 9 words
+    FRAME_FIXED_SIZE = 9
+    FORCE_INDEX_OFS = -8*WORD
+    MY_COPY_OF_REGS = -7*WORD
     IS_X86_32 = True
     IS_X86_64 = False
 else:
     WORD = 8
-    # rbp + rbx + r12 + r13 + r14 + r15 + force_index = 7 words
-    FRAME_FIXED_SIZE = 7
+    # rbp + rbx + r12 + r13 + r14 + r15 + 11 extra words + force_index = 18
+    FRAME_FIXED_SIZE = 18
+    FORCE_INDEX_OFS = -17*WORD
+    MY_COPY_OF_REGS = -16*WORD
     IS_X86_32 = False
     IS_X86_64 = True
 
-FORCE_INDEX_OFS = -(FRAME_FIXED_SIZE-1)*WORD
+# The extra space has room for almost all registers, apart from eax and edx
+# which are used in the malloc itself.  They are:
+#   ecx, ebx, esi, edi               [32 and 64 bits]
+#   r8, r9, r10, r12, r13, r14, r15    [64 bits only]

pypy/jit/backend/x86/assembler.py

 from pypy.rpython.lltypesystem.lloperation import llop
 from pypy.rpython.annlowlevel import llhelper
 from pypy.jit.backend.model import CompiledLoopToken
-from pypy.jit.backend.x86.regalloc import (RegAlloc, X86RegisterManager,
-                                           X86XMMRegisterManager, get_ebp_ofs,
-                                           _get_scale)
+from pypy.jit.backend.x86.regalloc import (RegAlloc, get_ebp_ofs,
+                                           _get_scale, gpr_reg_mgr_cls)
 
 from pypy.jit.backend.x86.arch import (FRAME_FIXED_SIZE, FORCE_INDEX_OFS, WORD,
                                        IS_X86_32, IS_X86_64)
         self.loop_run_counters = []
         self.float_const_neg_addr = 0
         self.float_const_abs_addr = 0
-        self.malloc_fixedsize_slowpath1 = 0
-        self.malloc_fixedsize_slowpath2 = 0
+        self.malloc_slowpath1 = 0
+        self.malloc_slowpath2 = 0
         self.memcpy_addr = 0
         self.setup_failure_recovery()
         self._debug = False
             self._build_failure_recovery(True, withfloats=True)
             support.ensure_sse2_floats()
             self._build_float_constants()
-        if hasattr(gc_ll_descr, 'get_malloc_fixedsize_slowpath_addr'):
-            self._build_malloc_fixedsize_slowpath()
+        if gc_ll_descr.get_malloc_slowpath_addr is not None:
+            self._build_malloc_slowpath()
         self._build_stack_check_slowpath()
         debug_start('jit-backend-counts')
         self.set_debug(have_debug_prints())
 
     def setup(self, looptoken):
         assert self.memcpy_addr != 0, "setup_once() not called?"
+        self.current_clt = looptoken.compiled_loop_token
         self.pending_guard_tokens = []
         self.mc = codebuf.MachineCodeBlockWrapper()
         if self.datablockwrapper is None:
         self.mc = None
         self.looppos = -1
         self.currently_compiling_loop = None
+        self.current_clt = None
 
     def finish_once(self):
         if self._debug:
         self.float_const_neg_addr = float_constants
         self.float_const_abs_addr = float_constants + 16
 
-    def _build_malloc_fixedsize_slowpath(self):
+    def _build_malloc_slowpath(self):
+        # With asmgcc, we need two helpers, so that we can write two CALL
+        # instructions in assembler, with a mark_gc_roots in between.
+        # With shadowstack, this is not needed, so we produce a single helper.
+        gcrootmap = self.cpu.gc_ll_descr.gcrootmap
+        #
         # ---------- first helper for the slow path of malloc ----------
         mc = codebuf.MachineCodeBlockWrapper()
         if self.cpu.supports_floats:          # save the XMM registers in
             for i in range(self.cpu.NUM_REGS):# the *caller* frame, from esp+8
                 mc.MOVSD_sx((WORD*2)+8*i, i)
         mc.SUB_rr(edx.value, eax.value)       # compute the size we want
-        if IS_X86_32:
-            mc.MOV_sr(WORD, edx.value)        # save it as the new argument
-        elif IS_X86_64:
-            # rdi can be clobbered: its content was forced to the stack
-            # by _fastpath_malloc(), like all other save_around_call_regs.
-            mc.MOV_rr(edi.value, edx.value)
-
-        addr = self.cpu.gc_ll_descr.get_malloc_fixedsize_slowpath_addr()
-        mc.JMP(imm(addr))                    # tail call to the real malloc
-        rawstart = mc.materialize(self.cpu.asmmemmgr, [])
-        self.malloc_fixedsize_slowpath1 = rawstart
-        # ---------- second helper for the slow path of malloc ----------
-        mc = codebuf.MachineCodeBlockWrapper()
+        addr = self.cpu.gc_ll_descr.get_malloc_slowpath_addr()
+        #
+        if gcrootmap is not None and gcrootmap.is_shadow_stack:
+            # ---- shadowstack ----
+            for reg, ofs in gpr_reg_mgr_cls.REGLOC_TO_COPY_AREA_OFS.items():
+                mc.MOV_br(ofs, reg.value)
+            mc.SUB_ri(esp.value, 16 - WORD)      # stack alignment of 16 bytes
+            if IS_X86_32:
+                mc.MOV_sr(0, edx.value)          # push argument
+            elif IS_X86_64:
+                mc.MOV_rr(edi.value, edx.value)
+            mc.CALL(imm(addr))
+            mc.ADD_ri(esp.value, 16 - WORD)
+            for reg, ofs in gpr_reg_mgr_cls.REGLOC_TO_COPY_AREA_OFS.items():
+                mc.MOV_rb(reg.value, ofs)
+        else:
+            # ---- asmgcc ----
+            if IS_X86_32:
+                mc.MOV_sr(WORD, edx.value)       # save it as the new argument
+            elif IS_X86_64:
+                # rdi can be clobbered: its content was forced to the stack
+                # by _fastpath_malloc(), like all other save_around_call_regs.
+                mc.MOV_rr(edi.value, edx.value)
+            mc.JMP(imm(addr))                    # tail call to the real malloc
+            rawstart = mc.materialize(self.cpu.asmmemmgr, [])
+            self.malloc_slowpath1 = rawstart
+            # ---------- second helper for the slow path of malloc ----------
+            mc = codebuf.MachineCodeBlockWrapper()
+        #
         if self.cpu.supports_floats:          # restore the XMM registers
             for i in range(self.cpu.NUM_REGS):# from where they were saved
                 mc.MOVSD_xs(i, (WORD*2)+8*i)
         mc.MOV(edx, heap(nursery_free_adr))   # load this in EDX
         mc.RET()
         rawstart = mc.materialize(self.cpu.asmmemmgr, [])
-        self.malloc_fixedsize_slowpath2 = rawstart
+        self.malloc_slowpath2 = rawstart
 
     def _build_stack_check_slowpath(self):
         _, _, slowpathaddr = self.cpu.insert_stack_check()
     def _get_offset_of_ebp_from_esp(self, allocated_depth):
         # Given that [EBP] is where we saved EBP, i.e. in the last word
         # of our fixed frame, then the 'words' value is:
-        words = (self.cpu.FRAME_FIXED_SIZE - 1) + allocated_depth
+        words = (FRAME_FIXED_SIZE - 1) + allocated_depth
         # align, e.g. for Mac OS X
         aligned_words = align_stack_words(words+2)-2 # 2 = EIP+EBP
         return -WORD * aligned_words
         for regloc in self.cpu.CALLEE_SAVE_REGISTERS:
             self.mc.PUSH_r(regloc.value)
 
+        gcrootmap = self.cpu.gc_ll_descr.gcrootmap
+        if gcrootmap and gcrootmap.is_shadow_stack:
+            self._call_header_shadowstack(gcrootmap)
+
     def _call_header_with_stack_check(self):
         if self.stack_check_slowpath == 0:
             pass                # no stack check (e.g. not translated)
     def _call_footer(self):
         self.mc.LEA_rb(esp.value, -len(self.cpu.CALLEE_SAVE_REGISTERS) * WORD)
 
+        gcrootmap = self.cpu.gc_ll_descr.gcrootmap
+        if gcrootmap and gcrootmap.is_shadow_stack:
+            self._call_footer_shadowstack(gcrootmap)
+
         for i in range(len(self.cpu.CALLEE_SAVE_REGISTERS)-1, -1, -1):
             self.mc.POP_r(self.cpu.CALLEE_SAVE_REGISTERS[i].value)
 
         self.mc.POP_r(ebp.value)
         self.mc.RET()
 
+    def _call_header_shadowstack(self, gcrootmap):
+        # we need to put two words into the shadowstack: the MARKER
+        # and the address of the frame (ebp, actually)
+        rst = gcrootmap.get_root_stack_top_addr()
+        assert rx86.fits_in_32bits(rst)
+        self.mc.MOV_rj(eax.value, rst)                # MOV eax, [rootstacktop]
+        self.mc.LEA_rm(edx.value, (eax.value, 2*WORD))  # LEA edx, [eax+2*WORD]
+        self.mc.MOV_mi((eax.value, 0), gcrootmap.MARKER)    # MOV [eax], MARKER
+        self.mc.MOV_mr((eax.value, WORD), ebp.value)      # MOV [eax+WORD], ebp
+        self.mc.MOV_jr(rst, edx.value)                # MOV [rootstacktop], edx
+
+    def _call_footer_shadowstack(self, gcrootmap):
+        rst = gcrootmap.get_root_stack_top_addr()
+        assert rx86.fits_in_32bits(rst)
+        self.mc.SUB_ji8(rst, 2*WORD)       # SUB [rootstacktop], 2*WORD
+
     def _assemble_bootstrap_direct_call(self, arglocs, jmppos, stackdepth):
         if IS_X86_64:
             return self._assemble_bootstrap_direct_call_64(arglocs, jmppos, stackdepth)
         nonfloatlocs, floatlocs = arglocs
         self._call_header()
         stackadjustpos = self._patchable_stackadjust()
-        tmp = X86RegisterManager.all_regs[0]
-        xmmtmp = X86XMMRegisterManager.all_regs[0]
+        tmp = eax
+        xmmtmp = xmm0
         self.mc.begin_reuse_scratch_register()
         for i in range(len(nonfloatlocs)):
             loc = nonfloatlocs[i]
                     self.implement_guard(guard_token, checkfalsecond)
         return genop_cmp_guard_float
 
-    def _emit_call(self, x, arglocs, start=0, tmp=eax):
+    def _emit_call(self, force_index, x, arglocs, start=0, tmp=eax):
         if IS_X86_64:
-            return self._emit_call_64(x, arglocs, start)
+            return self._emit_call_64(force_index, x, arglocs, start)
 
         p = 0
         n = len(arglocs)
         self._regalloc.reserve_param(p//WORD)
         # x is a location
         self.mc.CALL(x)
-        self.mark_gc_roots()
+        self.mark_gc_roots(force_index)
 
-    def _emit_call_64(self, x, arglocs, start=0):
+    def _emit_call_64(self, force_index, x, arglocs, start):
         src_locs = []
         dst_locs = []
         xmm_src_locs = []
 
         self._regalloc.reserve_param(len(pass_on_stack))
         self.mc.CALL(x)
-        self.mark_gc_roots()
+        self.mark_gc_roots(force_index)
 
     def call(self, addr, args, res):
-        self._emit_call(imm(addr), args)
+        force_index = self.write_new_force_index()
+        self._emit_call(force_index, imm(addr), args)
         assert res is eax
 
+    def write_new_force_index(self):
+        # for shadowstack only: get a new, unused force_index number and
+        # write it to FORCE_INDEX_OFS.  Used to record the call shape
+        # (i.e. where the GC pointers are in the stack) around a CALL
+        # instruction that doesn't already have a force_index.
+        gcrootmap = self.cpu.gc_ll_descr.gcrootmap
+        if gcrootmap and gcrootmap.is_shadow_stack:
+            clt = self.current_clt
+            force_index = clt.reserve_and_record_some_faildescr_index()
+            self.mc.MOV_bi(FORCE_INDEX_OFS, force_index)
+            return force_index
+        else:
+            return 0
+
     genop_int_neg = _unaryop("NEG")
     genop_int_invert = _unaryop("NOT")
     genop_int_add = _binaryop("ADD", True)
             assert isinstance(loc_vtable, ImmedLoc)
             self.mc.MOV(mem(loc, self.cpu.vtable_offset), loc_vtable)
 
+    def set_new_array_length(self, loc, ofs_length, loc_num_elem):
+        assert isinstance(loc, RegLoc)
+        assert isinstance(loc_num_elem, ImmedLoc)
+        self.mc.MOV(mem(loc, ofs_length), loc_num_elem)
+
     # XXX genop_new is abused for all varsized mallocs with Boehm, for now
     # (instead of genop_new_array, genop_newstr, genop_newunicode)
     def genop_new(self, op, arglocs, result_loc):
         self.pending_guard_tokens.append(guard_token)
 
     def genop_call(self, op, arglocs, resloc):
+        force_index = self.write_new_force_index()
+        self._genop_call(op, arglocs, resloc, force_index)
+
+    def _genop_call(self, op, arglocs, resloc, force_index):
         sizeloc = arglocs[0]
         assert isinstance(sizeloc, ImmedLoc)
         size = sizeloc.value
             tmp = ecx
         else:
             tmp = eax
-        
-        self._emit_call(x, arglocs, 3, tmp=tmp)
+
+        self._emit_call(force_index, x, arglocs, 3, tmp=tmp)
 
         if IS_X86_32 and isinstance(resloc, StackLoc) and resloc.width == 8:
             # a float or a long long return
         faildescr = guard_op.getdescr()
         fail_index = self.cpu.get_fail_descr_number(faildescr)
         self.mc.MOV_bi(FORCE_INDEX_OFS, fail_index)
-        self.genop_call(op, arglocs, result_loc)
+        self._genop_call(op, arglocs, result_loc, fail_index)
         self.mc.CMP_bi(FORCE_INDEX_OFS, 0)
         self.implement_guard(guard_token, 'L')
 
         assert len(arglocs) - 2 == len(descr._x86_arglocs[0])
         #
         # Write a call to the direct_bootstrap_code of the target assembler
-        self._emit_call(imm(descr._x86_direct_bootstrap_code), arglocs, 2,
-                        tmp=eax)
+        self._emit_call(fail_index, imm(descr._x86_direct_bootstrap_code),
+                        arglocs, 2, tmp=eax)
         if op.result is None:
             assert result_loc is None
             value = self.cpu.done_with_this_frame_void_v
         jd = descr.outermost_jitdriver_sd
         assert jd is not None
         asm_helper_adr = self.cpu.cast_adr_to_int(jd.assembler_helper_adr)
-        self._emit_call(imm(asm_helper_adr), [eax, arglocs[1]], 0,
+        self._emit_call(fail_index, imm(asm_helper_adr), [eax, arglocs[1]], 0,
                         tmp=ecx)
         if IS_X86_32 and isinstance(result_loc, StackLoc) and result_loc.type == FLOAT:
             self.mc.FSTP_b(result_loc.value)
             # load the return value from fail_boxes_xxx[0]
             kind = op.result.type
             if kind == FLOAT:
-                xmmtmp = X86XMMRegisterManager.all_regs[0]
+                xmmtmp = xmm0
                 adr = self.fail_boxes_float.get_addr_for_num(0)
                 self.mc.MOVSD(xmmtmp, heap(adr))
                 self.mc.MOVSD(result_loc, xmmtmp)
         not_implemented("not implemented operation (guard): %s" %
                         op.getopname())
 
-    def mark_gc_roots(self):
+    def mark_gc_roots(self, force_index, use_copy_area=False):
+        if force_index < 0:
+            return     # not needed
         gcrootmap = self.cpu.gc_ll_descr.gcrootmap
         if gcrootmap:
-            mark = self._regalloc.get_mark_gc_roots(gcrootmap)
-            self.mc.insert_gcroot_marker(mark)
+            mark = self._regalloc.get_mark_gc_roots(gcrootmap, use_copy_area)
+            if gcrootmap.is_shadow_stack:
+                gcrootmap.write_callshape(mark, force_index)
+            else:
+                self.mc.insert_gcroot_marker(mark)
 
     def target_arglocs(self, loop_token):
         return loop_token._x86_arglocs
         else:
             self.mc.JMP(imm(loop_token._x86_loop_code))
 
-    def malloc_cond_fixedsize(self, nursery_free_adr, nursery_top_adr,
-                              size, tid):
+    def malloc_cond(self, nursery_free_adr, nursery_top_adr, size, tid):
         size = max(size, self.cpu.gc_ll_descr.minimal_size_in_nursery)
         self.mc.MOV(eax, heap(nursery_free_adr))
         self.mc.LEA_rm(edx.value, (eax.value, size))
         self.mc.J_il8(rx86.Conditions['NA'], 0) # patched later
         jmp_adr = self.mc.get_relative_pos()
 
-        # See comments in _build_malloc_fixedsize_slowpath for the
+        # See comments in _build_malloc_slowpath for the
         # details of the two helper functions that we are calling below.
         # First, we need to call two of them and not just one because we
         # need to have a mark_gc_roots() in between.  Then the calling
         # result in EAX; slowpath_addr2 additionally returns in EDX a
         # copy of heap(nursery_free_adr), so that the final MOV below is
         # a no-op.
-        slowpath_addr1 = self.malloc_fixedsize_slowpath1
+
         # reserve room for the argument to the real malloc and the
         # 8 saved XMM regs
         self._regalloc.reserve_param(1+16)
-        self.mc.CALL(imm(slowpath_addr1))
-        self.mark_gc_roots()
-        slowpath_addr2 = self.malloc_fixedsize_slowpath2
+
+        gcrootmap = self.cpu.gc_ll_descr.gcrootmap
+        shadow_stack = (gcrootmap is not None and gcrootmap.is_shadow_stack)
+        if not shadow_stack:
+            # there are two helpers to call only with asmgcc
+            slowpath_addr1 = self.malloc_slowpath1
+            self.mc.CALL(imm(slowpath_addr1))
+        self.mark_gc_roots(self.write_new_force_index(),
+                           use_copy_area=shadow_stack)