Commits

Armin Rigo  committed c7608f8

Rewrite the stack overflow detection logic. The goal is to get a simple
check done in rlib/rstack.py, followed by a call to the slow path if it
fails; the check should be simple enough to be easily inlinable by the JIT.

  • Participants
  • Parent commits 649f3ba
  • Branches jit-stackcheck

Comments (0)

Files changed (3)

File pypy/rlib/rstack.py

 import inspect
 
 from pypy.rlib.objectmodel import we_are_translated
+from pypy.rlib.rarithmetic import r_uint
 from pypy.rpython.extregistry import ExtRegistryEntry
-from pypy.rpython.lltypesystem import rffi, lltype
+from pypy.rpython.lltypesystem import lltype, rffi
+from pypy.rpython.lltypesystem.lloperation import llop
 from pypy.rpython.controllerentry import Controller, SomeControlledInstance
 from pypy.translator.tool.cbuild import ExternalCompilationInfo
 
 def stack_unwind():
     if we_are_translated():
-        from pypy.rpython.lltypesystem.lloperation import llop
         return llop.stack_unwind(lltype.Void)
     raise RuntimeError("cannot unwind stack in non-translated versions")
 
 
 def stack_capture():
     if we_are_translated():
-        from pypy.rpython.lltypesystem.lloperation import llop
         ptr = llop.stack_capture(OPAQUE_STATE_HEADER_PTR)
         return frame_stack_top_controller.box(ptr)
     raise RuntimeError("cannot unwind stack in non-translated versions")
 
 def stack_frames_depth():
     if we_are_translated():
-        from pypy.rpython.lltypesystem.lloperation import llop
         return llop.stack_frames_depth(lltype.Signed)
     else:
         return len(inspect.stack())
 
+# ____________________________________________________________
+
 compilation_info = ExternalCompilationInfo(includes=['src/stack.h'])
 
-stack_too_big = rffi.llexternal('LL_stack_too_big', [], rffi.INT,
-                                compilation_info=compilation_info,
-                                _nowrapper=True,
-                                _callable=lambda: _zero,
-                                sandboxsafe=True)
-_zero = rffi.cast(rffi.INT, 0)
+def llexternal(name, args, res):
+    return rffi.llexternal(name, args, res, compilation_info=compilation_info,
+                           sandboxsafe=True, _nowrapper=True)
+
+_stack_get_start = llexternal('LL_stack_get_start', [], lltype.Signed)
+_stack_get_length = llexternal('LL_stack_get_length', [], lltype.Signed)
+_stack_too_big_slowpath = llexternal('LL_stack_too_big_slowpath',
+                                     [lltype.Signed], lltype.Char)
+# the following is used by the JIT
+_stack_get_start_adr = llexternal('LL_stack_get_start_adr', [], lltype.Signed)
+
 
 def stack_check():
-    if rffi.cast(lltype.Signed, stack_too_big()):
+    if not we_are_translated():
+        return
+    #
+    # Load the "current" stack position, or at least some address that
+    # points close to the current stack head
+    current = llop.stack_current(lltype.Signed)
+    #
+    # Load these variables from C code
+    start = _stack_get_start()
+    length = _stack_get_length()
+    #
+    # Common case: if 'current' is within [start:start+length], everything
+    # is fine
+    ofs = r_uint(current - start)
+    if ofs < r_uint(length):
+        return
+    #
+    # Else call the slow path
+    if ord(_stack_too_big_slowpath(current)):
+        #
+        # Now we are sure that the stack is really too big.  Note that the
         # stack_unwind implementation is different depending on if stackless
         # is enabled. If it is it unwinds the stack, otherwise it simply
         # raises a RuntimeError.

File pypy/rpython/lltypesystem/lloperation.py

     'get_stack_depth_limit':LLOp(sideeffects=False),
     'set_stack_depth_limit':LLOp(),
 
+    'stack_current':        LLOp(sideeffects=False),
+
     # __________ misc operations __________
 
     'keepalive':            LLOp(),

File pypy/translator/c/src/stack.h

  * It is needed to have RPyThreadStaticTLS, too. */
 #include "thread.h"
 
+extern char *_LLstacktoobig_stack_start;
+
 void LL_stack_unwind(void);
-int LL_stack_too_big_slowpath(void);
+char LL_stack_too_big_slowpath(long);    /* returns 0 (ok) or 1 (too big) */
 
-extern volatile char *_LLstacktoobig_stack_base_pointer;
-extern long _LLstacktoobig_stack_min;
-extern long _LLstacktoobig_stack_max;
+/* some macros referenced from pypy.rlib.rstack */
+#define OP_STACK_CURRENT(r)  r = (long)&r
+#define LL_stack_get_start() ((long)_LLstacktoobig_stack_start)
+#define LL_stack_get_length() MAX_STACK_SIZE
+#define LL_stack_get_start_adr() ((long)&_LLstacktoobig_stack_start)  /* JIT */
 
-static int LL_stack_too_big(void)
-{
-	/* The fast path of stack_too_big, called extremely often.
-	   Making it static makes an *inlinable* copy of this small
-	   function's implementation in each compilation unit. */
-	char local;
-	long diff = &local - _LLstacktoobig_stack_base_pointer;
-	/* common case: we are still in the same thread as last time
-	   we checked, and still in the allowed part of the stack */
-	return ((diff < _LLstacktoobig_stack_min ||
-		 diff > _LLstacktoobig_stack_max)
-		/* if not, call the slow path */
-		&& LL_stack_too_big_slowpath());
-}
 
 #ifdef __GNUC__
 #  define PYPY_INHIBIT_TAIL_CALL()   asm("/* inhibit_tail_call */")
 		return &local - parent;
 }
 
-volatile char *_LLstacktoobig_stack_base_pointer = NULL;
-long _LLstacktoobig_stack_min = 0;
-long _LLstacktoobig_stack_max = 0;
-RPyThreadStaticTLS _LLstacktoobig_stack_base_pointer_key;
+char *_LLstacktoobig_stack_start = NULL;
+int stack_direction = 0;
+RPyThreadStaticTLS start_tls_key;
 
-int LL_stack_too_big_slowpath(void)
+char LL_stack_too_big_slowpath(long current)
 {
-	char local;
 	long diff;
-	char *baseptr;
-	/* Check that the stack is less than MAX_STACK_SIZE bytes bigger
-	   than the value recorded in stack_base_pointer.  The base
-	   pointer is updated to the current value if it is still NULL
-	   or if we later find a &local that is below it.  The real
-	   stack base pointer is stored in thread-local storage, but we
-	   try to minimize its overhead by keeping a local copy in
-	   stack_pointer_pointer. */
+	char *baseptr, *curptr = (char*)current;
 
-	if (_LLstacktoobig_stack_min == _LLstacktoobig_stack_max /* == 0 */) {
+	/* The stack_start variable is updated to match the current value
+	   if it is still 0 or if we later find a 'curptr' position
+	   that is below it.  The real stack_start pointer is stored in
+	   thread-local storage, but we try to minimize its overhead by
+	   keeping a local copy in _LLstacktoobig_stack_start. */
+
+	if (stack_direction == 0) {
 		/* not initialized */
 		/* XXX We assume that initialization is performed early,
 		   when there is still only one thread running.  This
 		   allows us to ignore race conditions here */
-		char *errmsg = RPyThreadStaticTLS_Create(
-			&_LLstacktoobig_stack_base_pointer_key);
+		char *errmsg = RPyThreadStaticTLS_Create(&start_tls_key);
 		if (errmsg) {
 			/* XXX should we exit the process? */
 			fprintf(stderr, "Internal PyPy error: %s\n", errmsg);
 			return 1;
 		}
 		if (_LL_stack_growing_direction(NULL) > 0)
-			_LLstacktoobig_stack_max = MAX_STACK_SIZE;
+			stack_direction = +1;
 		else
-			_LLstacktoobig_stack_min = -MAX_STACK_SIZE;
+			stack_direction = -1;
 	}
 
-	baseptr = (char *) RPyThreadStaticTLS_Get(
-			_LLstacktoobig_stack_base_pointer_key);
+	baseptr = (char *) RPyThreadStaticTLS_Get(start_tls_key);
 	if (baseptr != NULL) {
-		diff = &local - baseptr;
-		if (_LLstacktoobig_stack_min <= diff &&
-		    diff <= _LLstacktoobig_stack_max) {
-			/* within bounds */
-			_LLstacktoobig_stack_base_pointer = baseptr;
+		diff = curptr - baseptr;
+		if (((unsigned long)diff) < (unsigned long)MAX_STACK_SIZE) {
+			/* within bounds, probably just had a thread switch */
+			_LLstacktoobig_stack_start = baseptr;
 			return 0;
 		}
 
-		if ((_LLstacktoobig_stack_min == 0 && diff < 0) ||
-		    (_LLstacktoobig_stack_max == 0 && diff > 0)) {
-			/* we underflowed the stack, which means that
-			   the initial estimation of the stack base must
-			   be revised (see below) */
+		if (stack_direction > 0) {
+			if (diff < 0 && diff > -MAX_STACK_SIZE)
+				;           /* stack underflow */
+			else
+				return 1;   /* stack overflow (probably) */
 		}
 		else {
-			return 1;   /* stack overflow */
+			if (diff >= MAX_STACK_SIZE && diff < 2*MAX_STACK_SIZE)
+				;           /* stack underflow */
+			else
+				return 1;   /* stack overflow (probably) */
 		}
+		/* else we underflowed the stack, which means that
+		   the initial estimation of the stack base must
+		   be revised */
 	}
 
 	/* update the stack base pointer to the current value */
-	baseptr = &local;
-	RPyThreadStaticTLS_Set(_LLstacktoobig_stack_base_pointer_key, baseptr);
-	_LLstacktoobig_stack_base_pointer = baseptr;
+	if (stack_direction > 0) {
+		/* the valid range is [curptr:curptr+MAX_STACK_SIZE] */
+		baseptr = curptr;
+	}
+	else {
+		/* the valid range is [curptr-MAX_STACK_SIZE+1:curptr+1] */
+		baseptr = curptr - MAX_STACK_SIZE + 1;
+	}
+	RPyThreadStaticTLS_Set(start_tls_key, baseptr);
+	_LLstacktoobig_stack_start = baseptr;
 	return 0;
 }