Commits

Armin Rigo committed 8f606f5 Merge

hg merge r15-for-exception

  • Participants
  • Parent commits a25d76f, 523c9a2
  • Branches shadowstack-perf

Comments (0)

Files changed (21)

File pypy/jit/backend/llsupport/llmodel.py

         self.pos_exc_value = pos_exc_value
         self.save_exception = save_exception
         self.insert_stack_check = lambda: (0, 0, 0)
+        self.special_register = None
 
 
     def _setup_exception_handling_translated(self):
+        from pypy.rlib import register
+        from pypy.rlib.register import register_number
 
         def pos_exception():
             addr = llop.get_exception_addr(llmemory.Address)
             return heaptracker.adr2int(addr)
 
         def save_exception():
+            if register_number is not None:
+                register.store_into_reg(register.nonnull)
             addr = llop.get_exception_addr(llmemory.Address)
             addr.address[0] = llmemory.NULL
             addr = llop.get_exc_value_addr(llmemory.Address)
         self.pos_exc_value = pos_exc_value
         self.save_exception = save_exception
         self.insert_stack_check = insert_stack_check
+        self.special_register = register_number
+        self.special_register_nonnull = llmemory.cast_adr_to_int(
+                                                          register.nonnull)
 
     def _setup_on_leave_jitted_untranslated(self):
         # assume we don't need a backend leave in this case

File pypy/jit/backend/x86/arch.py

 # during a malloc that needs to go via its slow path.
 
 import sys
+from pypy.rlib.register import register_number as special_register
+
 if sys.maxint == (2**31 - 1):
     WORD = 4
     # ebp + ebx + esi + edi + 4 extra words + force_index = 9 words
     MY_COPY_OF_REGS = -7*WORD
     IS_X86_32 = True
     IS_X86_64 = False
+    assert special_register is None
 else:
     WORD = 8
-    # rbp + rbx + r12 + r13 + r14 + r15 + 11 extra words + force_index = 18
-    FRAME_FIXED_SIZE = 18
-    FORCE_INDEX_OFS = -17*WORD
-    MY_COPY_OF_REGS = -16*WORD
+    if special_register is not None:
+        assert special_register == 15
+        # rbp + rbx + r12 + r13 + r14 + 10 extra words + force_index = 16
+        FRAME_FIXED_SIZE = 16
+        FORCE_INDEX_OFS = -15*WORD
+        MY_COPY_OF_REGS = -14*WORD
+    else:
+        # rbp + rbx + r12 + r13 + r14 + r15 + 11 extra words + force_index = 18
+        FRAME_FIXED_SIZE = 18
+        FORCE_INDEX_OFS = -17*WORD
+        MY_COPY_OF_REGS = -16*WORD
     IS_X86_32 = False
     IS_X86_64 = True
 
 # The extra space has room for almost all registers, apart from eax and edx
 # which are used in the malloc itself.  They are:
 #   ecx, ebx, esi, edi               [32 and 64 bits]
-#   r8, r9, r10, r12, r13, r14, r15    [64 bits only]
+#   r8, r9, r10, r12, r13, r14, r15?   [64 bits only]
 #
 # Note that with asmgcc, the locations corresponding to callee-save registers
 # are never used.

File pypy/jit/backend/x86/assembler.py

         # esp is now aligned to a multiple of 16 again
         mc.CALL(imm(slowpathaddr))
         #
-        mc.MOV(eax, heap(self.cpu.pos_exception()))
-        mc.TEST_rr(eax.value, eax.value)
-        mc.J_il8(rx86.Conditions['NZ'], 0)
+        if self.cpu.special_register is None:
+            mc.MOV(eax, heap(self.cpu.pos_exception()))
+            mc.TEST_rr(eax.value, eax.value)
+            mc.J_il8(rx86.Conditions['NZ'], 0)
+        else:
+            rnum = self.cpu.special_register
+            mc.TEST_rr(rnum, rnum)
+            mc.J_il8(rx86.Conditions['Z'], 0)
         jnz_location = mc.get_relative_pos()
         #
         if IS_X86_32:
         mc.overwrite(jnz_location-1, chr(offset))
         # clear the exception from the global position
         mc.MOV(eax, heap(self.cpu.pos_exc_value()))
-        mc.MOV(heap(self.cpu.pos_exception()), imm0)
-        mc.MOV(heap(self.cpu.pos_exc_value()), imm0)
+        self.clear_current_exception(mc)
         # save the current exception instance into fail_boxes_ptr[0]
         adr = self.fail_boxes_ptr.get_addr_for_num(0)
         mc.MOV(heap(adr), eax)
         rawstart = mc.materialize(self.cpu.asmmemmgr, [])
         self.stack_check_slowpath = rawstart
 
+    def clear_current_exception(self, mc):
+        if self.cpu.special_register is not None:
+            mc.MOV_ri(self.cpu.special_register,
+                      self.cpu.special_register_nonnull)
+        mc.MOV(heap(self.cpu.pos_exception()), imm0)
+        mc.MOV(heap(self.cpu.pos_exc_value()), imm0)
+
     @staticmethod
     def _release_gil_asmgcc(css):
         # similar to trackgcroot.py:pypy_asm_stackwalk, first part
 
     def genop_guard_guard_no_exception(self, ign_1, guard_op, guard_token,
                                        locs, ign_2):
-        self.mc.CMP(heap(self.cpu.pos_exception()), imm0)
-        self.implement_guard(guard_token, 'NZ')
+        if self.cpu.special_register is None:
+            self.mc.CMP(heap(self.cpu.pos_exception()), imm0)
+            self.implement_guard(guard_token, 'NZ')
+        else:
+            rnum = self.cpu.special_register
+            self.mc.TEST_rr(rnum, rnum)
+            self.implement_guard(guard_token, 'Z')
 
     def genop_guard_guard_not_invalidated(self, ign_1, guard_op, guard_token,
                                      locs, ign_2):
     def genop_guard_guard_exception(self, ign_1, guard_op, guard_token,
                                     locs, resloc):
         loc = locs[0]
-        loc1 = locs[1]
-        self.mc.MOV(loc1, heap(self.cpu.pos_exception()))
-        self.mc.CMP(loc1, loc)
+        self.mc.CMP(heap(self.cpu.pos_exception()), loc)
         self.implement_guard(guard_token, 'NE')
         if resloc is not None:
             self.mc.MOV(resloc, heap(self.cpu.pos_exc_value()))
-        self.mc.MOV(heap(self.cpu.pos_exception()), imm0)
-        self.mc.MOV(heap(self.cpu.pos_exc_value()), imm0)
+        self.clear_current_exception(self.mc)
 
     def _gen_guard_overflow(self, guard_op, guard_token):
         guard_opnum = guard_op.getopnum()

File pypy/jit/backend/x86/regalloc.py

 from pypy.jit.backend.llsupport.descr import BaseCallDescr, BaseSizeDescr
 from pypy.jit.backend.llsupport.regalloc import FrameManager, RegisterManager,\
      TempBox
-from pypy.jit.backend.x86.arch import WORD, FRAME_FIXED_SIZE
+from pypy.jit.backend.x86.arch import WORD, FRAME_FIXED_SIZE, special_register
 from pypy.jit.backend.x86.arch import IS_X86_32, IS_X86_64, MY_COPY_OF_REGS
 from pypy.rlib.rarithmetic import r_longlong, r_uint
 
             not_implemented("convert_to_imm: got a %s" % c)
 
 class X86_64_RegisterManager(X86RegisterManager):
-    # r11 omitted because it's used as scratch
-    all_regs = [eax, ecx, edx, ebx, esi, edi, r8, r9, r10, r12, r13, r14, r15]
+    # r11 omitted because it's used as scratch; r15 is omitted if used
+    # as a special register
+    all_regs = [eax, ecx, edx, ebx, esi, edi, r8, r9, r10, r12, r13, r14]
+    if special_register is None:
+        all_regs.append(r15)
     no_lower_byte_regs = []
     save_around_call_regs = [eax, ecx, edx, esi, edi, r8, r9, r10]
 
         r12: MY_COPY_OF_REGS + 7 * WORD,
         r13: MY_COPY_OF_REGS + 8 * WORD,
         r14: MY_COPY_OF_REGS + 9 * WORD,
-        r15: MY_COPY_OF_REGS + 10 * WORD,
     }
+    if special_register is None:
+        REGLOC_TO_COPY_AREA_OFS[r15] = MY_COPY_OF_REGS + 10 * WORD
 
 class X86XMMRegisterManager(RegisterManager):
 
 
     def consider_guard_exception(self, op):
         loc = self.rm.make_sure_var_in_reg(op.getarg(0))
-        box = TempBox()
-        args = op.getarglist()
-        loc1 = self.rm.force_allocate_reg(box, args)
         if op.result in self.longevity:
             # this means, is it ever used
-            resloc = self.rm.force_allocate_reg(op.result, args + [box])
+            resloc = self.rm.force_allocate_reg(op.result)
         else:
             resloc = None
-        self.perform_guard(op, [loc, loc1], resloc)
+        self.perform_guard(op, [loc], resloc)
         self.rm.possibly_free_vars_for_op(op)
-        self.rm.possibly_free_var(box)
 
     consider_guard_no_overflow = consider_guard_no_exception
     consider_guard_overflow    = consider_guard_no_exception

File pypy/jit/backend/x86/runner.py

 from pypy.rlib.objectmodel import we_are_translated
 from pypy.jit.metainterp import history, compile
 from pypy.jit.backend.x86.assembler import Assembler386
-from pypy.jit.backend.x86.arch import FORCE_INDEX_OFS
+from pypy.jit.backend.x86.arch import FORCE_INDEX_OFS, special_register
 from pypy.jit.backend.x86.profagent import ProfileAgent
 from pypy.jit.backend.llsupport.llmodel import AbstractLLCPU
 from pypy.jit.backend.x86 import regloc
     backend_name = 'x86_64'
     WORD = 8
     NUM_REGS = 16
-    CALLEE_SAVE_REGISTERS = [regloc.ebx, regloc.r12, regloc.r13, regloc.r14, regloc.r15]
+    CALLEE_SAVE_REGISTERS = [regloc.ebx, regloc.r12, regloc.r13, regloc.r14]
+    if special_register is None:
+        CALLEE_SAVE_REGISTERS.append(regloc.r15)
 
     def __init__(self, *args, **kwargs):
         assert sys.maxint == (2**63 - 1)

File pypy/module/cpyext/api.py

 
     @specialize.ll()
     def wrapper(*args):
+        from pypy.rpython.lltypesystem import llmemory
         from pypy.module.cpyext.pyobject import make_ref, from_ref
         from pypy.module.cpyext.pyobject import Reference
         # we hope that malloc removal removes the newtuple() that is
         # inserted exactly here by the varargs specializer
-        llop.gc_stack_bottom(lltype.Void)   # marker for trackgcroot.py
         rffi.stackcounter.stacks_counter += 1
+        saved = llop.gc_stack_bottom(llmemory.Address)   # for trackgcroot.py
         retval = fatal_value
         boxed_args = ()
         try:
             else:
                 print str(e)
                 pypy_debug_catch_fatal_exception()
+        llop.gc_stack_bottom_stop(lltype.Void, saved)
         rffi.stackcounter.stacks_counter -= 1
         return retval
     callable._always_inline_ = True

File pypy/rlib/register.py

+from pypy.translator.tool.cbuild import ExternalCompilationInfo
+from pypy.rpython.tool import rffi_platform
+
+# On platforms with enough hardware registers and with gcc, we can
+# (ab)use gcc to globally assign a register to a single global void*
+# variable.  We use it with a double meaning:
+#
+# - when it is NULL upon return from a function, it means that an
+#   exception occurred.  It allows the caller to quickly check for
+#   exceptions.
+#
+# - in other cases, with --gcrootfinder=shadowstack, it points to
+#   the top of the shadow stack.
+
+
+# For now, only for x86-64.  Tries to use the register r15.
+eci = ExternalCompilationInfo(
+    post_include_bits=[
+        'register void *pypy_r15 asm("r15");\n'
+        '#define PYPY_GET_SPECIAL_REG() pypy_r15\n'
+        '#define PYPY_SPECIAL_REG_NONNULL() (pypy_r15 != NULL)\n'
+        '#define PYPY_SET_SPECIAL_REG(x) (pypy_r15 = x)\n'
+        ],
+    )
+
+_test_eci = eci.merge(ExternalCompilationInfo(
+    post_include_bits=["""
+            void f(void) {
+                pypy_r15 = &f;
+            }
+    """]))
+
+try:
+    rffi_platform.verify_eci(_test_eci)
+    register_number = 15      # r15
+except rffi_platform.CompilationError:
+    eci = None
+    register_number = None
+else:
+
+    from pypy.rpython.lltypesystem import lltype, llmemory, rffi
+
+    # use addr=load_from_reg() and store_into_reg(addr) to load and store
+    # an Address out of the special register.  When running on top of Python,
+    # the behavior is emulated.
+
+    _value_reg = None
+
+    def _pypy_get_special_reg():
+        assert _value_reg is not None
+        return _value_reg
+
+    def _pypy_special_reg_nonnull():
+        assert _value_reg is not None
+        return bool(_value_reg)
+
+    def _pypy_set_special_reg(addr):
+        global _value_reg
+        _value_reg = addr
+
+    load_from_reg = rffi.llexternal('PYPY_GET_SPECIAL_REG', [],
+                                    llmemory.Address,
+                                    _callable=_pypy_get_special_reg,
+                                    compilation_info=eci,
+                                    _nowrapper=True)
+
+    reg_is_nonnull = rffi.llexternal('PYPY_SPECIAL_REG_NONNULL', [],
+                                     lltype.Bool,
+                                     _callable=_pypy_special_reg_nonnull,
+                                     compilation_info=eci,
+                                     _nowrapper=True)
+
+    store_into_reg = rffi.llexternal('PYPY_SET_SPECIAL_REG',
+                                     [llmemory.Address],
+                                     lltype.Void,
+                                     _callable=_pypy_set_special_reg,
+                                     compilation_info=eci,
+                                     _nowrapper=True)
+
+    # xxx temporary
+    nonnull = llmemory.cast_int_to_adr(-1)

File pypy/rlib/test/test_register.py

+import py
+from pypy.rlib import register
+from pypy.rpython.lltypesystem import lltype, llmemory, rffi
+from pypy.translator.c.test.test_standalone import StandaloneTests
+
+
+def test_register():
+    #
+    from pypy.jit.backend.detect_cpu import autodetect
+    if autodetect() == 'x86_64':
+        assert register.eci is not None
+        assert register.register_number == 15        # r15
+    else:
+        assert register.eci is None
+        assert register.register_number is None
+
+
+class TestLoadStore(object):
+    def setup_class(cls):
+        if register.register_number is None:
+            py.test.skip("rlib/register not supported on this platform")
+
+    def test_direct(self):
+        a = rffi.cast(llmemory.Address, 27)
+        register.store_into_reg(a)
+        b = register.load_from_reg()
+        assert lltype.typeOf(b) == llmemory.Address
+        assert rffi.cast(lltype.Signed, b) == 27
+
+    def test_llinterp(self):
+        from pypy.rpython.test.test_llinterp import interpret
+        def f(n):
+            a = rffi.cast(llmemory.Address, n)
+            register.store_into_reg(a)
+            b = register.load_from_reg()
+            return rffi.cast(lltype.Signed, b)
+        res = interpret(f, [41])
+        assert res == 41
+
+
+class TestLoadStoreCompiled(StandaloneTests):
+    def setup_class(cls):
+        if register.register_number is None:
+            py.test.skip("rlib/register not supported on this platform")
+
+    def test_compiled(self):
+        def f(argv):
+            a = rffi.cast(llmemory.Address, 43)
+            register.store_into_reg(a)
+            b = register.load_from_reg()
+            print rffi.cast(lltype.Signed, b)
+            return 0
+        t, cbuilder = self.compile(f)
+        data = cbuilder.cmdexec('')
+        assert data.startswith('43\n')

File pypy/rpython/llinterp.py

     def op_gc_asmgcroot_static(self, index):
         raise NotImplementedError("gc_asmgcroot_static")
 
-    def op_gc_stack_bottom(self):
-        pass       # marker for trackgcroot.py
-
     def op_gc_get_type_info_group(self):
         raise NotImplementedError("gc_get_type_info_group")
 

File pypy/rpython/lltypesystem/lloperation.py

     # see translator/c/src/mem.h for the valid indices
     'gc_asmgcroot_static':  LLOp(sideeffects=False),
     'gc_stack_bottom':      LLOp(canrun=True),
+    'gc_stack_bottom_stop': LLOp(canrun=True),
 
     # NOTE NOTE NOTE! don't forget *** canunwindgc=True *** for anything that
     # can go through a stack unwind, in particular anything that mallocs!

File pypy/rpython/lltypesystem/opimpl.py

     return debug.have_debug_prints()
 
 def op_gc_stack_bottom():
-    pass       # marker for trackgcroot.py
+    return llmemory.NULL       # marker for trackgcroot.py
+
+def op_gc_stack_bottom_stop(saved):
+    pass                       # for rlib/register.py
 
 def op_jit_force_virtualizable(*args):
     pass

File pypy/rpython/lltypesystem/rffi.py

     """ Function creating wrappers for callbacks. Note that this is
     cheating as we assume constant callbacks and we just memoize wrappers
     """
-    from pypy.rpython.lltypesystem import lltype
+    from pypy.rpython.lltypesystem import lltype, llmemory
     from pypy.rpython.lltypesystem.lloperation import llop
     if hasattr(callable, '_errorcode_'):
         errorcode = callable._errorcode_
     args = ', '.join(['a%d' % i for i in range(len(TP.TO.ARGS))])
     source = py.code.Source(r"""
         def wrapper(%s):    # no *args - no GIL for mallocing the tuple
-            llop.gc_stack_bottom(lltype.Void)   # marker for trackgcroot.py
             if aroundstate is not None:
                 after = aroundstate.after
                 if after:
                     after()
             # from now on we hold the GIL
             stackcounter.stacks_counter += 1
+            # marker for trackgcroot.py and for rlib/register.py:
+            # initialize the value of the special register
+            saved = llop.gc_stack_bottom(llmemory.Address)
             try:
                 result = callable(%s)
             except Exception, e:
                     import traceback
                     traceback.print_exc()
                 result = errorcode
+            llop.gc_stack_bottom_stop(lltype.Void, saved)
             stackcounter.stacks_counter -= 1
             if aroundstate is not None:
                 before = aroundstate.before

File pypy/translator/c/database.py

         if translator is None or translator.rtyper is None:
             self.exctransformer = None
         else:
-            self.exctransformer = translator.getexceptiontransformer()
+            self.exctransformer = translator.getexceptiontransformer(
+                standalone=standalone)
         if translator is not None:
             self.gctransformer = self.gcpolicy.transformerclass(translator)
         self.completed = False

File pypy/translator/c/gc.py

     def OP_GC_ASSUME_YOUNG_POINTERS(self, funcgen, op):
         return ''
 
-    def OP_GC_STACK_BOTTOM(self, funcgen, op):
-        return ''
-
 
 class RefcountingInfo:
     static_deallocator = None
     def GC_KEEPALIVE(self, funcgen, v):
         return 'pypy_asm_keepalive(%s);' % funcgen.expr(v)
 
-    def OP_GC_STACK_BOTTOM(self, funcgen, op):
-        return 'pypy_asm_stack_bottom();'
-
 
 name_to_gcpolicy = {
     'boehm': BoehmGcPolicy,

File pypy/translator/c/gcc/test/test_asmgcroot.py

 from pypy.annotation.listdef import s_list_of_strings
 from pypy import conftest
 from pypy.translator.tool.cbuild import ExternalCompilationInfo
-from pypy.rpython.lltypesystem import lltype, rffi
+from pypy.rpython.lltypesystem import lltype, llmemory, rffi
 from pypy.rlib.entrypoint import entrypoint, secondary_entrypoints
 from pypy.rpython.lltypesystem.lloperation import llop
 
         
         @entrypoint("x42", [lltype.Signed, lltype.Signed], c_name='callback')
         def mycallback(a, b):
-            llop.gc_stack_bottom(lltype.Void)
             rffi.stackcounter.stacks_counter += 1
+            saved = llop.gc_stack_bottom(llmemory.Address)
             gc.collect()
+            llop.gc_stack_bottom_stop(lltype.Void, saved)
             rffi.stackcounter.stacks_counter -= 1
             return a + b
 

File pypy/translator/c/gcc/trackgcroot.py

                 else:
                     regindex = self.CALLEE_SAVE_REGISTERS.index(tag)
                     shape[1 + regindex] = loc
-            if LOC_NOWHERE in shape and not self.is_stack_bottom:
+            #
+            if self.special_register is None:
+                shape_wo_specialreg = shape
+            else:
+                tag = self.special_register
+                regindex = self.CALLEE_SAVE_REGISTERS.index(tag)
+                shape_wo_specialreg = shape[:]
+                del shape_wo_specialreg[1 + regindex]
+            if LOC_NOWHERE in shape_wo_specialreg and not self.is_stack_bottom:
                 reg = self.CALLEE_SAVE_REGISTERS[shape.index(LOC_NOWHERE) - 1]
                 raise AssertionError("cannot track where register %s is saved"
                                      % (reg,))
     def process_function(self, lines, filename):
         tracker = self.FunctionGcRootTracker(
             lines, filetag=getidentifier(filename))
+        tracker.special_register = special_register
         if self.verbose == 1:
             sys.stderr.write('.')
         elif self.verbose > 1:
 
 class GcRootTracker(object):
 
-    def __init__(self, verbose=0, shuffle=False, format='elf'):
+    def __init__(self, verbose=0, shuffle=False, format='elf',
+                 special_register=None):
         self.verbose = verbose
         self.shuffle = shuffle     # to debug the sorting logic in asmgcroot.py
         self.format = format
+        self.special_register = special_register
         self.gcmaptable = []
 
     def dump_raw_table(self, output):
     verbose = 0
     shuffle = False
     output_raw_table = False
+    special_register = None
     if sys.platform == 'darwin':
         if sys.maxint > 2147483647:
             format = 'darwin64'
         elif sys.argv[1].startswith('-f'):
             format = sys.argv[1][2:]
             del sys.argv[1]
+        elif sys.argv[1].startswith('-%'):
+            special_register = sys.argv[1][1:]
+            del sys.argv[1]
         elif sys.argv[1].startswith('-'):
             print >> sys.stderr, "unrecognized option:", sys.argv[1]
             sys.exit(1)
         else:
             break
-    tracker = GcRootTracker(verbose=verbose, shuffle=shuffle, format=format)
+    tracker = GcRootTracker(verbose=verbose, shuffle=shuffle, format=format,
+                            special_register=special_register)
     for fn in sys.argv[1:]:
         f = open(fn, 'r')
         firstline = f.readline()

File pypy/translator/c/genc.py

         for rule in rules:
             mk.rule(*rule)
 
+        from pypy.rlib.register import register_number
+        if register_number is None:
+            extra_trackgcroot_arg = ''
+        else:
+            extra_trackgcroot_arg = '-%%r%d' % register_number
+
         if self.config.translation.gcrootfinder == 'asmgcc':
             trackgcfiles = [cfile[:-2] for cfile in mk.cfiles]
             if self.translator.platform.name == 'msvc':
                         'cmd /c $(MASM) /nologo /Cx /Cp /Zm /coff /Fo$@ /c $< $(INCLUDEDIRS)')
                 mk.rule('.c.gcmap', '',
                         ['$(CC) /nologo $(ASM_CFLAGS) /c /FAs /Fa$*.s $< $(INCLUDEDIRS)',
-                         'cmd /c ' + python + '$(PYPYDIR)/translator/c/gcc/trackgcroot.py -fmsvc -t $*.s > $@']
+                         'cmd /c ' + python + '$(PYPYDIR)/translator/c/gcc/trackgcroot.py -fmsvc -t %s $*.s > $@' % extra_trackgcroot_arg]
                         )
                 mk.rule('gcmaptable.c', '$(GCMAPFILES)',
                         'cmd /c ' + python + '$(PYPYDIR)/translator/c/gcc/trackgcroot.py -fmsvc $(GCMAPFILES) > $@')
                 mk.rule('%.lbl.s %.gcmap', '%.s',
                         [python +
                              '$(PYPYDIR)/translator/c/gcc/trackgcroot.py '
-                             '-t $< > $*.gctmp',
+                             '-t %s $< > $*.gctmp' % extra_trackgcroot_arg,
                          'mv $*.gctmp $*.gcmap'])
                 mk.rule('gcmaptable.s', '$(GCMAPFILES)',
                         [python +

File pypy/translator/c/src/main.h

     char *errmsg;
     int i, exitcode;
     RPyListOfString *list;
+    void *saved;
 
-    pypy_asm_stack_bottom();
+    OP_GC_STACK_BOTTOM(saved);
     instrument_setup();
 
     if (sizeof(void*) != SIZEOF_LONG) {
         pypy_debug_catch_fatal_exception();
     }
 
+    OP_GC_STACK_BOTTOM_STOP(saved, /*nothing*/);
     return exitcode;
 
  memory_out:
     fprintf(stderr, "Fatal error during initialization: %s\n", errmsg);
 #endif
     abort();
-    return 1;
+    return 1;   /* not actually reachable */
 }
 
 int PYPY_MAIN_FUNCTION(int argc, char *argv[])

File pypy/translator/c/src/mem.h

 #endif
 
 
+#ifdef PYPY_GET_SPECIAL_REG      /* pypy/rlib/register.py */
+#  define OP_GC_STACK_BOTTOM(r)        pypy_asm_stack_bottom();        \
+                                       r = PYPY_GET_SPECIAL_REG();     \
+                                       PYPY_SET_SPECIAL_REG((void*)-1)
+#  define OP_GC_STACK_BOTTOM_STOP(v,r) PYPY_SET_SPECIAL_REG(v)
+#else
+#  define OP_GC_STACK_BOTTOM(r)         pypy_asm_stack_bottom()
+#  define OP_GC_STACK_BOTTOM_STOP(v,r)  /* nothing */
+#endif
+
+
 /* used by pypy.rlib.rstack, but also by asmgcc */
 #define OP_STACK_CURRENT(r)  r = (long)&r
 

File pypy/translator/exceptiontransform.py

 from pypy.rlib.rarithmetic import r_singlefloat
 from pypy.rlib.debug import ll_assert
 from pypy.rlib.rstackovf import _StackOverflow
+from pypy.rlib import register
 from pypy.annotation import model as annmodel
 from pypy.rpython.annlowlevel import MixLevelHelperAnnotator
 from pypy.tool.sourcetools import func_with_new_name
 
 class BaseExceptionTransformer(object):
 
-    def __init__(self, translator):
+    def __init__(self, translator, standalone):
         self.translator = translator
+        self.standalone = standalone
         self.raise_analyzer = canraise.RaiseAnalyzer(translator)
         edata = translator.rtyper.getexceptiondata()
         self.lltype_of_exception_value = edata.lltype_of_exception_value
             assertion_error_ll_exc_type)
         self.c_n_i_error_ll_exc_type = constant_value(n_i_error_ll_exc_type)
 
+        use_special_reg = standalone and register.register_number is not None
+        self.use_special_reg = use_special_reg
+        if use_special_reg:
+            self.c_nonnull_specialregister = constant_value(register.nonnull)
+            self.c_load_from_reg = constant_value(register.load_from_reg)
+            self.c_reg_is_nonnull = constant_value(register.reg_is_nonnull)
+            self.c_store_into_reg = constant_value(register.store_into_reg)
+
         def rpyexc_occured():
-            exc_type = exc_data.exc_type
-            return bool(exc_type)
+            if use_special_reg:
+                # an exception occurred iff the special register is 0
+                return register.load_from_reg() == llmemory.NULL
+            else:
+                exc_type = exc_data.exc_type
+                return bool(exc_type)
 
         def rpyexc_fetch_type():
             return exc_data.exc_type
             return exc_data.exc_value
 
         def rpyexc_clear():
+            if use_special_reg:
+                register.store_into_reg(register.nonnull)
             exc_data.exc_type = null_type
             exc_data.exc_value = null_value
 
             exc_data.exc_type = etype
             exc_data.exc_value = evalue
             lloperation.llop.debug_start_traceback(lltype.Void, etype)
+            if use_special_reg:
+                register.store_into_reg(llmemory.NULL)
 
         def rpyexc_reraise(etype, evalue):
             exc_data.exc_type = etype
             exc_data.exc_value = evalue
             lloperation.llop.debug_reraise_traceback(lltype.Void, etype)
+            if use_special_reg:
+                register.store_into_reg(llmemory.NULL)
 
         def rpyexc_fetch_exception():
             evalue = rpyexc_fetch_value()
             if evalue:
                 exc_data.exc_type = rclass.ll_inst_type(evalue)
                 exc_data.exc_value = evalue
+                if use_special_reg:
+                    register.store_into_reg(llmemory.NULL)
 
         def rpyexc_raise_stack_overflow():
             rpyexc_raise(stackovf_ll_exc_type, stackovf_ll_exc)
         #
         self.gen_setfield('exc_value', self.c_null_evalue, llops)
         self.gen_setfield('exc_type',  self.c_null_etype,  llops)
+        if self.use_special_reg:
+            self.gen_setspecialregister(self.c_nonnull_specialregister, llops)
         excblock.operations[:] = llops
         newgraph.exceptblock.inputargs[0].concretetype = self.lltype_of_exception_type
         newgraph.exceptblock.inputargs[1].concretetype = self.lltype_of_exception_value
         if alloc_shortcut:
             T = spaceop.result.concretetype
             var_no_exc = self.gen_nonnull(spaceop.result, llops)
+        elif self.use_special_reg:
+            var_no_exc = self.gen_specialreg_no_exc(llops)
         else:
             v_exc_type = self.gen_getfield('exc_type', llops)
             var_no_exc = self.gen_isnull(v_exc_type, llops)
     def gen_nonnull(self, v, llops):
         return llops.genop('ptr_nonzero', [v], lltype.Bool)
 
+    def gen_getspecialregister(self, llops):
+        return llops.genop('direct_call', [self.c_load_from_reg],
+                           resulttype = llmemory.Address)
+
+    def gen_specialreg_no_exc(self, llops):
+        return llops.genop('direct_call', [self.c_reg_is_nonnull],
+                           resulttype = lltype.Bool)
+
+    def gen_setspecialregister(self, v, llops):
+        llops.genop('direct_call', [self.c_store_into_reg, v])
+
     def same_obj(self, ptr1, ptr2):
         return ptr1._same_obj(ptr2)
 
     def build_extra_funcs(self):
         pass
 
-def ExceptionTransformer(translator):
+def ExceptionTransformer(translator, standalone):
     type_system = translator.rtyper.type_system.name
     if type_system == 'lltypesystem':
-        return LLTypeExceptionTransformer(translator)
+        return LLTypeExceptionTransformer(translator, standalone)
     else:
         assert type_system == 'ootypesystem'
-        return OOTypeExceptionTransformer(translator)
+        return OOTypeExceptionTransformer(translator, standalone)

File pypy/translator/translator.py

                                    type_system = type_system)
         return self.rtyper
 
-    def getexceptiontransformer(self):
+    def getexceptiontransformer(self, standalone):
         if self.rtyper is None:
             raise ValueError("no rtyper")
         if self.exceptiontransformer is not None:
+            assert self.exceptiontransformer.standalone == standalone
             return self.exceptiontransformer
         from pypy.translator.exceptiontransform import ExceptionTransformer
-        self.exceptiontransformer = ExceptionTransformer(self)
+        self.exceptiontransformer = ExceptionTransformer(self, standalone)
         return self.exceptiontransformer
 
     def checkgraphs(self):