aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Eggert2015-07-16 00:48:40 -0700
committerPaul Eggert2015-07-16 07:36:47 -0700
commita5522abbca2235771384949dfa87c8efc68831b2 (patch)
tree1e7f8ffd9a3ae3290672b8e6694696a37b0426fd
parentbd8b5ac77250d9fe0634d58a30a3bf6d2497725a (diff)
downloademacs-a5522abbca2235771384949dfa87c8efc68831b2.tar.gz
emacs-a5522abbca2235771384949dfa87c8efc68831b2.zip
Better heuristic for C stack overflow
Improve the heuristic for distinguishing stack overflows from other SIGSEGV causes (Bug#21004). Corinna Vinschen explained that the getrlimit method wasn't portable to Cygwin; see: https://www.cygwin.com/ml/cygwin/2015-07/msg00092.html Corinna suggested pthread_getattr_np but this also has problems. Instead, replace the low-level system stuff with a simple heuristic based on known good stack addresses. * src/eval.c, src/lisp.h (near_C_stack_top): New function. * src/sysdep.c: Don't include <sys/resource.h>. (stack_direction): Remove. All uses removed. (stack_overflow): New function. (handle_sigsegv): Use it instead of incorrect getrlimit heuristic. Make SEGV fatal in non-main threads.
-rw-r--r--src/eval.c6
-rw-r--r--src/lisp.h1
-rw-r--r--src/sysdep.c93
3 files changed, 63 insertions, 37 deletions
diff --git a/src/eval.c b/src/eval.c
index 4f7f42f1ebe..9bdcf4bed17 100644
--- a/src/eval.c
+++ b/src/eval.c
@@ -200,6 +200,12 @@ backtrace_next (union specbinding *pdl)
200 return pdl; 200 return pdl;
201} 201}
202 202
203/* Return a pointer to somewhere near the top of the C stack. */
204void *
205near_C_stack_top (void)
206{
207 return backtrace_args (backtrace_top ());
208}
203 209
204void 210void
205init_eval_once (void) 211init_eval_once (void)
diff --git a/src/lisp.h b/src/lisp.h
index c3289c9d700..341603f311f 100644
--- a/src/lisp.h
+++ b/src/lisp.h
@@ -4029,6 +4029,7 @@ extern _Noreturn void verror (const char *, va_list)
4029 ATTRIBUTE_FORMAT_PRINTF (1, 0); 4029 ATTRIBUTE_FORMAT_PRINTF (1, 0);
4030extern void un_autoload (Lisp_Object); 4030extern void un_autoload (Lisp_Object);
4031extern Lisp_Object call_debugger (Lisp_Object arg); 4031extern Lisp_Object call_debugger (Lisp_Object arg);
4032extern void *near_C_stack_top (void);
4032extern void init_eval_once (void); 4033extern void init_eval_once (void);
4033extern Lisp_Object safe_call (ptrdiff_t, Lisp_Object, ...); 4034extern Lisp_Object safe_call (ptrdiff_t, Lisp_Object, ...);
4034extern Lisp_Object safe_call1 (Lisp_Object, Lisp_Object); 4035extern Lisp_Object safe_call1 (Lisp_Object, Lisp_Object);
diff --git a/src/sysdep.c b/src/sysdep.c
index 91036f07c58..30a55f11409 100644
--- a/src/sysdep.c
+++ b/src/sysdep.c
@@ -79,9 +79,6 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
79#include "msdos.h" 79#include "msdos.h"
80#endif 80#endif
81 81
82#ifdef HAVE_SYS_RESOURCE_H
83#include <sys/resource.h>
84#endif
85#include <sys/param.h> 82#include <sys/param.h>
86#include <sys/file.h> 83#include <sys/file.h>
87#include <fcntl.h> 84#include <fcntl.h>
@@ -1625,14 +1622,58 @@ handle_arith_signal (int sig)
1625 1622
1626#ifdef HAVE_STACK_OVERFLOW_HANDLING 1623#ifdef HAVE_STACK_OVERFLOW_HANDLING
1627 1624
1628/* -1 if stack grows down as expected on most OS/ABI variants, 1 otherwise. */
1629
1630static int stack_direction;
1631
1632/* Alternate stack used by SIGSEGV handler below. */ 1625/* Alternate stack used by SIGSEGV handler below. */
1633 1626
1634static unsigned char sigsegv_stack[SIGSTKSZ]; 1627static unsigned char sigsegv_stack[SIGSTKSZ];
1635 1628
1629
1630/* Return true if SIGINFO indicates a stack overflow. */
1631
1632static bool
1633stack_overflow (siginfo_t *siginfo)
1634{
1635 /* In theory, a more-accurate heuristic can be obtained by using
1636 GNU/Linux pthread_getattr_np along with POSIX pthread_attr_getstack
1637 and pthread_attr_getguardsize to find the location and size of the
1638 guard area. In practice, though, these functions are so hard to
1639 use reliably that they're not worth bothering with. E.g., see:
1640 https://sourceware.org/bugzilla/show_bug.cgi?id=16291
1641 Other operating systems also have problems, e.g., Solaris's
1642 stack_violation function is tailor-made for this problem, but it
1643 doesn't work on Solaris 11.2 x86-64 with a 32-bit executable.
1644
1645 GNU libsigsegv is overkill for Emacs; otherwise it might be a
1646 candidate here. */
1647
1648 if (!siginfo)
1649 return false;
1650
1651 /* The faulting address. */
1652 char *addr = siginfo->si_addr;
1653 if (!addr)
1654 return false;
1655
1656 /* The known top and bottom of the stack. The actual stack may
1657 extend a bit beyond these boundaries. */
1658 char *bot = stack_bottom;
1659 char *top = near_C_stack_top ();
1660
1661 /* Log base 2 of the stack heuristic ratio. This ratio is the size
1662 of the known stack divided by the size of the guard area past the
1663 end of the stack top. The heuristic is that a bad address is
1664 considered to be a stack overflow if it occurs within
1665 stacksize>>LG_STACK_HEURISTIC bytes above the top of the known
1666 stack. This heuristic is not exactly correct but it's good
1667 enough in practice. */
1668 enum { LG_STACK_HEURISTIC = 8 };
1669
1670 if (bot < top)
1671 return 0 <= addr - top && addr - top < (top - bot) >> LG_STACK_HEURISTIC;
1672 else
1673 return 0 <= top - addr && top - addr < (bot - top) >> LG_STACK_HEURISTIC;
1674}
1675
1676
1636/* Attempt to recover from SIGSEGV caused by C stack overflow. */ 1677/* Attempt to recover from SIGSEGV caused by C stack overflow. */
1637 1678
1638static void 1679static void
@@ -1640,35 +1681,15 @@ handle_sigsegv (int sig, siginfo_t *siginfo, void *arg)
1640{ 1681{
1641 /* Hard GC error may lead to stack overflow caused by 1682 /* Hard GC error may lead to stack overflow caused by
1642 too nested calls to mark_object. No way to survive. */ 1683 too nested calls to mark_object. No way to survive. */
1643 if (!gc_in_progress) 1684 bool fatal = gc_in_progress;
1644 {
1645 struct rlimit rlim;
1646 1685
1647 if (!getrlimit (RLIMIT_STACK, &rlim)) 1686#ifdef FORWARD_SIGNAL_TO_MAIN_THREAD
1648 { 1687 if (!fatal && !pthread_equal (pthread_self (), main_thread))
1649 /* STACK_DANGER_ZONE has to be bigger than 16K on Cygwin, for 1688 fatal = true;
1650 reasons explained in 1689#endif
1651 https://www.cygwin.com/ml/cygwin/2015-06/msg00381.html. */ 1690
1652#ifdef CYGWIN 1691 if (!fatal && stack_overflow (siginfo))
1653 enum { STACK_DANGER_ZONE = 32 * 1024 }; 1692 siglongjmp (return_to_command_loop, 1);
1654#else
1655 enum { STACK_DANGER_ZONE = 16 * 1024 };
1656#endif
1657 char *beg, *end, *addr;
1658
1659 beg = stack_bottom;
1660 end = stack_bottom + stack_direction * rlim.rlim_cur;
1661 if (beg > end)
1662 addr = beg, beg = end, end = addr;
1663 addr = (char *) siginfo->si_addr;
1664 /* If we're somewhere on stack and too close to
1665 one of its boundaries, most likely this is it. */
1666 if (beg < addr && addr < end
1667 && (addr - beg < STACK_DANGER_ZONE
1668 || end - addr < STACK_DANGER_ZONE))
1669 siglongjmp (return_to_command_loop, 1);
1670 }
1671 }
1672 1693
1673 /* Otherwise we can't do anything with this. */ 1694 /* Otherwise we can't do anything with this. */
1674 deliver_fatal_thread_signal (sig); 1695 deliver_fatal_thread_signal (sig);
@@ -1683,8 +1704,6 @@ init_sigsegv (void)
1683 struct sigaction sa; 1704 struct sigaction sa;
1684 stack_t ss; 1705 stack_t ss;
1685 1706
1686 stack_direction = ((char *) &ss < stack_bottom) ? -1 : 1;
1687
1688 ss.ss_sp = sigsegv_stack; 1707 ss.ss_sp = sigsegv_stack;
1689 ss.ss_size = sizeof (sigsegv_stack); 1708 ss.ss_size = sizeof (sigsegv_stack);
1690 ss.ss_flags = 0; 1709 ss.ss_flags = 0;