aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNoam Postavsky2017-01-01 14:09:13 -0500
committerNoam Postavsky2017-01-08 18:45:52 -0500
commit13c6f1d185d301aad2f6d756c148acb2edd0889f (patch)
treeac1df03118e312742b8a9fdf83698ad6b25eb869
parent9a19f26cd796c7321f659a8dbea5296b0eeea51d (diff)
downloademacs-13c6f1d185d301aad2f6d756c148acb2edd0889f.tar.gz
emacs-13c6f1d185d301aad2f6d756c148acb2edd0889f.zip
Use expanded stack during regex matches
While the stack is increased in main(), to allow the regex stack allocation to use alloca we also need to modify regex.c to actually take advantage of the increased stack, and not limit stack allocations to SAFE_ALLOCA bytes. * src/regex.c (MATCH_MAY_ALLOCATE): Remove obsolete comment about allocations in signal handlers which no longer happens and correct description about when and why MATCH_MAY_ALLOCATE should be defined. (emacs_re_safe_alloca): New variable. (REGEX_USE_SAFE_ALLOCA): Use it as the limit of stack allocation instead of MAX_ALLOCA. (emacs_re_max_failures): Rename from `re_max_failures' to avoid confusion with glibc's `re_max_failures'. * src/emacs.c (main): Increase the amount of fixed 'extra' bytes we add to the stack. Instead of changing emacs_re_max_failures based on the new stack size, just change emacs_re_safe_alloca; emacs_re_max_failures remains constant regardless, since if we run out stack space SAFE_ALLOCA will fall back to heap allocation. Co-authored-by: Eli Zaretskii <eliz@gnu.org>
-rw-r--r--src/emacs.c22
-rw-r--r--src/regex.c73
-rw-r--r--src/regex.h7
3 files changed, 62 insertions, 40 deletions
diff --git a/src/emacs.c b/src/emacs.c
index ae29e9ad29b..28b395c4fb4 100644
--- a/src/emacs.c
+++ b/src/emacs.c
@@ -831,14 +831,16 @@ main (int argc, char **argv)
831 rlim_t lim = rlim.rlim_cur; 831 rlim_t lim = rlim.rlim_cur;
832 832
833 /* Approximate the amount regex.c needs per unit of 833 /* Approximate the amount regex.c needs per unit of
834 re_max_failures, then add 33% to cover the size of the 834 emacs_re_max_failures, then add 33% to cover the size of the
835 smaller stacks that regex.c successively allocates and 835 smaller stacks that regex.c successively allocates and
836 discards on its way to the maximum. */ 836 discards on its way to the maximum. */
837 int ratio = 20 * sizeof (char *); 837 int min_ratio = 20 * sizeof (char *);
838 ratio += ratio / 3; 838 int ratio = min_ratio + min_ratio / 3;
839 839
840 /* Extra space to cover what we're likely to use for other reasons. */ 840 /* Extra space to cover what we're likely to use for other
841 int extra = 200000; 841 reasons. For example, a typical GC might take 30K stack
842 frames. */
843 int extra = (30 * 1000) * 50;
842 844
843 bool try_to_grow_stack = true; 845 bool try_to_grow_stack = true;
844#ifndef CANNOT_DUMP 846#ifndef CANNOT_DUMP
@@ -847,7 +849,7 @@ main (int argc, char **argv)
847 849
848 if (try_to_grow_stack) 850 if (try_to_grow_stack)
849 { 851 {
850 rlim_t newlim = re_max_failures * ratio + extra; 852 rlim_t newlim = emacs_re_max_failures * ratio + extra;
851 853
852 /* Round the new limit to a page boundary; this is needed 854 /* Round the new limit to a page boundary; this is needed
853 for Darwin kernel 15.4.0 (see Bug#23622) and perhaps 855 for Darwin kernel 15.4.0 (see Bug#23622) and perhaps
@@ -869,9 +871,11 @@ main (int argc, char **argv)
869 lim = newlim; 871 lim = newlim;
870 } 872 }
871 } 873 }
872 874 /* If the stack is big enough, let regex.c more of it before
873 /* Don't let regex.c overflow the stack. */ 875 falling back to heap allocation. */
874 re_max_failures = lim < extra ? 0 : min (lim - extra, SIZE_MAX) / ratio; 876 emacs_re_safe_alloca = max
877 (min (lim - extra, SIZE_MAX) * (min_ratio / ratio),
878 MAX_ALLOCA);
875 } 879 }
876#endif /* HAVE_SETRLIMIT and RLIMIT_STACK and not CYGWIN */ 880#endif /* HAVE_SETRLIMIT and RLIMIT_STACK and not CYGWIN */
877 881
diff --git a/src/regex.c b/src/regex.c
index 8aa54331fc7..db3f0c16a2d 100644
--- a/src/regex.c
+++ b/src/regex.c
@@ -431,9 +431,12 @@ init_syntax_once (void)
431 431
432/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we 432/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
433 use `alloca' instead of `malloc'. This is because using malloc in 433 use `alloca' instead of `malloc'. This is because using malloc in
434 re_search* or re_match* could cause memory leaks when C-g is used in 434 re_search* or re_match* could cause memory leaks when C-g is used
435 Emacs; also, malloc is slower and causes storage fragmentation. On 435 in Emacs (note that SAFE_ALLOCA could also call malloc, but does so
436 the other hand, malloc is more portable, and easier to debug. 436 via `record_xmalloc' which uses `unwind_protect' to ensure the
437 memory is freed even in case of non-local exits); also, malloc is
438 slower and causes storage fragmentation. On the other hand, malloc
439 is more portable, and easier to debug.
437 440
438 Because we sometimes use alloca, some routines have to be macros, 441 Because we sometimes use alloca, some routines have to be macros,
439 not functions -- `alloca'-allocated space disappears at the end of the 442 not functions -- `alloca'-allocated space disappears at the end of the
@@ -448,7 +451,13 @@ init_syntax_once (void)
448#else /* not REGEX_MALLOC */ 451#else /* not REGEX_MALLOC */
449 452
450# ifdef emacs 453# ifdef emacs
451# define REGEX_USE_SAFE_ALLOCA USE_SAFE_ALLOCA 454/* This may be adjusted in main(), if the stack is successfully grown. */
455ptrdiff_t emacs_re_safe_alloca = MAX_ALLOCA;
456/* Like USE_SAFE_ALLOCA, but use emacs_re_safe_alloca. */
457# define REGEX_USE_SAFE_ALLOCA \
458 ptrdiff_t sa_avail = emacs_re_safe_alloca; \
459 ptrdiff_t sa_count = SPECPDL_INDEX (); bool sa_must_free = false
460
452# define REGEX_SAFE_FREE() SAFE_FREE () 461# define REGEX_SAFE_FREE() SAFE_FREE ()
453# define REGEX_ALLOCATE SAFE_ALLOCA 462# define REGEX_ALLOCATE SAFE_ALLOCA
454# else 463# else
@@ -1196,24 +1205,28 @@ static const char *re_error_msgid[] =
1196 gettext_noop ("Range striding over charsets") /* REG_ERANGEX */ 1205 gettext_noop ("Range striding over charsets") /* REG_ERANGEX */
1197 }; 1206 };
1198 1207
1199/* Avoiding alloca during matching, to placate r_alloc. */ 1208/* Whether to allocate memory during matching. */
1200 1209
1201/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the 1210/* Define MATCH_MAY_ALLOCATE to allow the searching and matching
1202 searching and matching functions should not call alloca. On some 1211 functions allocate memory for the failure stack and registers.
1203 systems, alloca is implemented in terms of malloc, and if we're 1212 Normally should be defined, because otherwise searching and
1204 using the relocating allocator routines, then malloc could cause a 1213 matching routines will have much smaller memory resources at their
1205 relocation, which might (if the strings being searched are in the 1214 disposal, and therefore might fail to handle complex regexps.
1206 ralloc heap) shift the data out from underneath the regexp 1215 Therefore undefine MATCH_MAY_ALLOCATE only in the following
1207 routines. 1216 exceptional situations:
1208 1217
1209 Here's another reason to avoid allocation: Emacs 1218 . When running on a system where memory is at premium.
1210 processes input from X in a signal handler; processing X input may 1219 . When alloca cannot be used at all, perhaps due to bugs in
1211 call malloc; if input arrives while a matching routine is calling 1220 its implementation, or its being unavailable, or due to a
1212 malloc, then we're scrod. But Emacs can't just block input while 1221 very small stack size. This requires to define REGEX_MALLOC
1213 calling matching routines; then we don't notice interrupts when 1222 to use malloc instead, which in turn could lead to memory
1214 they come in. So, Emacs blocks input around all regexp calls 1223 leaks if search is interrupted by a signal. (For these
1215 except the matching calls, which it leaves unprotected, in the 1224 reasons, defining REGEX_MALLOC when building Emacs
1216 faith that they will not malloc. */ 1225 automatically undefines MATCH_MAY_ALLOCATE, but outside
1226 Emacs you may not care about memory leaks.) If you want to
1227 prevent the memory leaks, undefine MATCH_MAY_ALLOCATE.
1228 . When code that calls the searching and matching functions
1229 cannot allow memory allocation, for whatever reasons. */
1217 1230
1218/* Normally, this is fine. */ 1231/* Normally, this is fine. */
1219#define MATCH_MAY_ALLOCATE 1232#define MATCH_MAY_ALLOCATE
@@ -1250,9 +1263,9 @@ static const char *re_error_msgid[] =
1250 whose default stack limit is 2mb. In order for a larger 1263 whose default stack limit is 2mb. In order for a larger
1251 value to work reliably, you have to try to make it accord 1264 value to work reliably, you have to try to make it accord
1252 with the process stack limit. */ 1265 with the process stack limit. */
1253size_t re_max_failures = 40000; 1266size_t emacs_re_max_failures = 40000;
1254# else 1267# else
1255size_t re_max_failures = 4000; 1268size_t emacs_re_max_failures = 4000;
1256# endif 1269# endif
1257 1270
1258union fail_stack_elt 1271union fail_stack_elt
@@ -1305,7 +1318,7 @@ typedef struct
1305 1318
1306 1319
1307/* Double the size of FAIL_STACK, up to a limit 1320/* Double the size of FAIL_STACK, up to a limit
1308 which allows approximately `re_max_failures' items. 1321 which allows approximately `emacs_re_max_failures' items.
1309 1322
1310 Return 1 if succeeds, and 0 if either ran out of memory 1323 Return 1 if succeeds, and 0 if either ran out of memory
1311 allocating space for it or it was already too large. 1324 allocating space for it or it was already too large.
@@ -1320,19 +1333,19 @@ typedef struct
1320#define FAIL_STACK_GROWTH_FACTOR 4 1333#define FAIL_STACK_GROWTH_FACTOR 4
1321 1334
1322#define GROW_FAIL_STACK(fail_stack) \ 1335#define GROW_FAIL_STACK(fail_stack) \
1323 (((fail_stack).size >= re_max_failures * TYPICAL_FAILURE_SIZE) \ 1336 (((fail_stack).size >= emacs_re_max_failures * TYPICAL_FAILURE_SIZE) \
1324 ? 0 \ 1337 ? 0 \
1325 : ((fail_stack).stack \ 1338 : ((fail_stack).stack \
1326 = REGEX_REALLOCATE_STACK ((fail_stack).stack, \ 1339 = REGEX_REALLOCATE_STACK ((fail_stack).stack, \
1327 (fail_stack).size * sizeof (fail_stack_elt_t), \ 1340 (fail_stack).size * sizeof (fail_stack_elt_t), \
1328 min (re_max_failures * TYPICAL_FAILURE_SIZE, \ 1341 min (emacs_re_max_failures * TYPICAL_FAILURE_SIZE, \
1329 ((fail_stack).size * FAIL_STACK_GROWTH_FACTOR)) \ 1342 ((fail_stack).size * FAIL_STACK_GROWTH_FACTOR)) \
1330 * sizeof (fail_stack_elt_t)), \ 1343 * sizeof (fail_stack_elt_t)), \
1331 \ 1344 \
1332 (fail_stack).stack == NULL \ 1345 (fail_stack).stack == NULL \
1333 ? 0 \ 1346 ? 0 \
1334 : ((fail_stack).size \ 1347 : ((fail_stack).size \
1335 = (min (re_max_failures * TYPICAL_FAILURE_SIZE, \ 1348 = (min (emacs_re_max_failures * TYPICAL_FAILURE_SIZE, \
1336 ((fail_stack).size * FAIL_STACK_GROWTH_FACTOR))), \ 1349 ((fail_stack).size * FAIL_STACK_GROWTH_FACTOR))), \
1337 1))) 1350 1)))
1338 1351
@@ -3641,9 +3654,9 @@ regex_compile (const_re_char *pattern, size_t size,
3641 { 3654 {
3642 int num_regs = bufp->re_nsub + 1; 3655 int num_regs = bufp->re_nsub + 1;
3643 3656
3644 if (fail_stack.size < re_max_failures * TYPICAL_FAILURE_SIZE) 3657 if (fail_stack.size < emacs_re_max_failures * TYPICAL_FAILURE_SIZE)
3645 { 3658 {
3646 fail_stack.size = re_max_failures * TYPICAL_FAILURE_SIZE; 3659 fail_stack.size = emacs_re_max_failures * TYPICAL_FAILURE_SIZE;
3647 falk_stack.stack = realloc (fail_stack.stack, 3660 falk_stack.stack = realloc (fail_stack.stack,
3648 fail_stack.size * sizeof *falk_stack.stack); 3661 fail_stack.size * sizeof *falk_stack.stack);
3649 } 3662 }
diff --git a/src/regex.h b/src/regex.h
index 34c9929f93d..1d439de259c 100644
--- a/src/regex.h
+++ b/src/regex.h
@@ -186,7 +186,12 @@ typedef unsigned long reg_syntax_t;
186#endif 186#endif
187 187
188/* Roughly the maximum number of failure points on the stack. */ 188/* Roughly the maximum number of failure points on the stack. */
189extern size_t re_max_failures; 189extern size_t emacs_re_max_failures;
190
191#ifdef emacs
192/* Amount of memory that we can safely stack allocate. */
193extern ptrdiff_t emacs_re_safe_alloca;
194#endif
190 195
191 196
192/* Define combinations of the above bits for the standard possibilities. 197/* Define combinations of the above bits for the standard possibilities.