aboutsummaryrefslogtreecommitdiffstats
path: root/src/regex.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/regex.c')
-rw-r--r--src/regex.c96
1 files changed, 55 insertions, 41 deletions
diff --git a/src/regex.c b/src/regex.c
index f1686cf700c..db3f0c16a2d 100644
--- a/src/regex.c
+++ b/src/regex.c
@@ -2,7 +2,7 @@
2 0.12. (Implements POSIX draft P1003.2/D11.2, except for some of the 2 0.12. (Implements POSIX draft P1003.2/D11.2, except for some of the
3 internationalization features.) 3 internationalization features.)
4 4
5 Copyright (C) 1993-2016 Free Software Foundation, Inc. 5 Copyright (C) 1993-2017 Free Software Foundation, Inc.
6 6
7 This program is free software; you can redistribute it and/or modify 7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by 8 it under the terms of the GNU General Public License as published by
@@ -310,11 +310,12 @@ enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 };
310 || ((c) >= 'a' && (c) <= 'f') \ 310 || ((c) >= 'a' && (c) <= 'f') \
311 || ((c) >= 'A' && (c) <= 'F')) 311 || ((c) >= 'A' && (c) <= 'F'))
312 312
313/* This is only used for single-byte characters. */
314# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
315
316/* The rest must handle multibyte characters. */ 313/* The rest must handle multibyte characters. */
317 314
315# define ISBLANK(c) (IS_REAL_ASCII (c) \
316 ? ((c) == ' ' || (c) == '\t') \
317 : blankp (c))
318
318# define ISGRAPH(c) (SINGLE_BYTE_CHAR_P (c) \ 319# define ISGRAPH(c) (SINGLE_BYTE_CHAR_P (c) \
319 ? (c) > ' ' && !((c) >= 0177 && (c) <= 0240) \ 320 ? (c) > ' ' && !((c) >= 0177 && (c) <= 0240) \
320 : graphicp (c)) 321 : graphicp (c))
@@ -430,9 +431,12 @@ init_syntax_once (void)
430 431
431/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we 432/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
432 use `alloca' instead of `malloc'. This is because using malloc in 433 use `alloca' instead of `malloc'. This is because using malloc in
433 re_search* or re_match* could cause memory leaks when C-g is used in 434 re_search* or re_match* could cause memory leaks when C-g is used
434 Emacs; also, malloc is slower and causes storage fragmentation. On 435 in Emacs (note that SAFE_ALLOCA could also call malloc, but does so
435 the other hand, malloc is more portable, and easier to debug. 436 via `record_xmalloc' which uses `unwind_protect' to ensure the
437 memory is freed even in case of non-local exits); also, malloc is
438 slower and causes storage fragmentation. On the other hand, malloc
439 is more portable, and easier to debug.
436 440
437 Because we sometimes use alloca, some routines have to be macros, 441 Because we sometimes use alloca, some routines have to be macros,
438 not functions -- `alloca'-allocated space disappears at the end of the 442 not functions -- `alloca'-allocated space disappears at the end of the
@@ -447,7 +451,13 @@ init_syntax_once (void)
447#else /* not REGEX_MALLOC */ 451#else /* not REGEX_MALLOC */
448 452
449# ifdef emacs 453# ifdef emacs
450# define REGEX_USE_SAFE_ALLOCA USE_SAFE_ALLOCA 454/* This may be adjusted in main(), if the stack is successfully grown. */
455ptrdiff_t emacs_re_safe_alloca = MAX_ALLOCA;
456/* Like USE_SAFE_ALLOCA, but use emacs_re_safe_alloca. */
457# define REGEX_USE_SAFE_ALLOCA \
458 ptrdiff_t sa_avail = emacs_re_safe_alloca; \
459 ptrdiff_t sa_count = SPECPDL_INDEX (); bool sa_must_free = false
460
451# define REGEX_SAFE_FREE() SAFE_FREE () 461# define REGEX_SAFE_FREE() SAFE_FREE ()
452# define REGEX_ALLOCATE SAFE_ALLOCA 462# define REGEX_ALLOCATE SAFE_ALLOCA
453# else 463# else
@@ -1195,24 +1205,28 @@ static const char *re_error_msgid[] =
1195 gettext_noop ("Range striding over charsets") /* REG_ERANGEX */ 1205 gettext_noop ("Range striding over charsets") /* REG_ERANGEX */
1196 }; 1206 };
1197 1207
1198/* Avoiding alloca during matching, to placate r_alloc. */ 1208/* Whether to allocate memory during matching. */
1199 1209
1200/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the 1210/* Define MATCH_MAY_ALLOCATE to allow the searching and matching
1201 searching and matching functions should not call alloca. On some 1211 functions allocate memory for the failure stack and registers.
1202 systems, alloca is implemented in terms of malloc, and if we're 1212 Normally should be defined, because otherwise searching and
1203 using the relocating allocator routines, then malloc could cause a 1213 matching routines will have much smaller memory resources at their
1204 relocation, which might (if the strings being searched are in the 1214 disposal, and therefore might fail to handle complex regexps.
1205 ralloc heap) shift the data out from underneath the regexp 1215 Therefore undefine MATCH_MAY_ALLOCATE only in the following
1206 routines. 1216 exceptional situations:
1207 1217
1208 Here's another reason to avoid allocation: Emacs 1218 . When running on a system where memory is at premium.
1209 processes input from X in a signal handler; processing X input may 1219 . When alloca cannot be used at all, perhaps due to bugs in
1210 call malloc; if input arrives while a matching routine is calling 1220 its implementation, or its being unavailable, or due to a
1211 malloc, then we're scrod. But Emacs can't just block input while 1221 very small stack size. This requires to define REGEX_MALLOC
1212 calling matching routines; then we don't notice interrupts when 1222 to use malloc instead, which in turn could lead to memory
1213 they come in. So, Emacs blocks input around all regexp calls 1223 leaks if search is interrupted by a signal. (For these
1214 except the matching calls, which it leaves unprotected, in the 1224 reasons, defining REGEX_MALLOC when building Emacs
1215 faith that they will not malloc. */ 1225 automatically undefines MATCH_MAY_ALLOCATE, but outside
1226 Emacs you may not care about memory leaks.) If you want to
1227 prevent the memory leaks, undefine MATCH_MAY_ALLOCATE.
1228 . When code that calls the searching and matching functions
1229 cannot allow memory allocation, for whatever reasons. */
1216 1230
1217/* Normally, this is fine. */ 1231/* Normally, this is fine. */
1218#define MATCH_MAY_ALLOCATE 1232#define MATCH_MAY_ALLOCATE
@@ -1249,9 +1263,9 @@ static const char *re_error_msgid[] =
1249 whose default stack limit is 2mb. In order for a larger 1263 whose default stack limit is 2mb. In order for a larger
1250 value to work reliably, you have to try to make it accord 1264 value to work reliably, you have to try to make it accord
1251 with the process stack limit. */ 1265 with the process stack limit. */
1252size_t re_max_failures = 40000; 1266size_t emacs_re_max_failures = 40000;
1253# else 1267# else
1254size_t re_max_failures = 4000; 1268size_t emacs_re_max_failures = 4000;
1255# endif 1269# endif
1256 1270
1257union fail_stack_elt 1271union fail_stack_elt
@@ -1304,7 +1318,7 @@ typedef struct
1304 1318
1305 1319
1306/* Double the size of FAIL_STACK, up to a limit 1320/* Double the size of FAIL_STACK, up to a limit
1307 which allows approximately `re_max_failures' items. 1321 which allows approximately `emacs_re_max_failures' items.
1308 1322
1309 Return 1 if succeeds, and 0 if either ran out of memory 1323 Return 1 if succeeds, and 0 if either ran out of memory
1310 allocating space for it or it was already too large. 1324 allocating space for it or it was already too large.
@@ -1319,23 +1333,20 @@ typedef struct
1319#define FAIL_STACK_GROWTH_FACTOR 4 1333#define FAIL_STACK_GROWTH_FACTOR 4
1320 1334
1321#define GROW_FAIL_STACK(fail_stack) \ 1335#define GROW_FAIL_STACK(fail_stack) \
1322 (((fail_stack).size * sizeof (fail_stack_elt_t) \ 1336 (((fail_stack).size >= emacs_re_max_failures * TYPICAL_FAILURE_SIZE) \
1323 >= re_max_failures * TYPICAL_FAILURE_SIZE) \
1324 ? 0 \ 1337 ? 0 \
1325 : ((fail_stack).stack \ 1338 : ((fail_stack).stack \
1326 = REGEX_REALLOCATE_STACK ((fail_stack).stack, \ 1339 = REGEX_REALLOCATE_STACK ((fail_stack).stack, \
1327 (fail_stack).size * sizeof (fail_stack_elt_t), \ 1340 (fail_stack).size * sizeof (fail_stack_elt_t), \
1328 min (re_max_failures * TYPICAL_FAILURE_SIZE, \ 1341 min (emacs_re_max_failures * TYPICAL_FAILURE_SIZE, \
1329 ((fail_stack).size * sizeof (fail_stack_elt_t) \ 1342 ((fail_stack).size * FAIL_STACK_GROWTH_FACTOR)) \
1330 * FAIL_STACK_GROWTH_FACTOR))), \ 1343 * sizeof (fail_stack_elt_t)), \
1331 \ 1344 \
1332 (fail_stack).stack == NULL \ 1345 (fail_stack).stack == NULL \
1333 ? 0 \ 1346 ? 0 \
1334 : ((fail_stack).size \ 1347 : ((fail_stack).size \
1335 = (min (re_max_failures * TYPICAL_FAILURE_SIZE, \ 1348 = (min (emacs_re_max_failures * TYPICAL_FAILURE_SIZE, \
1336 ((fail_stack).size * sizeof (fail_stack_elt_t) \ 1349 ((fail_stack).size * FAIL_STACK_GROWTH_FACTOR))), \
1337 * FAIL_STACK_GROWTH_FACTOR)) \
1338 / sizeof (fail_stack_elt_t)), \
1339 1))) 1350 1)))
1340 1351
1341 1352
@@ -1790,6 +1801,7 @@ struct range_table_work_area
1790#define BIT_ALNUM 0x80 1801#define BIT_ALNUM 0x80
1791#define BIT_GRAPH 0x100 1802#define BIT_GRAPH 0x100
1792#define BIT_PRINT 0x200 1803#define BIT_PRINT 0x200
1804#define BIT_BLANK 0x400
1793 1805
1794 1806
1795/* Set the bit for character C in a list. */ 1807/* Set the bit for character C in a list. */
@@ -2066,8 +2078,9 @@ re_wctype_to_bit (re_wctype_t cc)
2066 case RECC_SPACE: return BIT_SPACE; 2078 case RECC_SPACE: return BIT_SPACE;
2067 case RECC_GRAPH: return BIT_GRAPH; 2079 case RECC_GRAPH: return BIT_GRAPH;
2068 case RECC_PRINT: return BIT_PRINT; 2080 case RECC_PRINT: return BIT_PRINT;
2081 case RECC_BLANK: return BIT_BLANK;
2069 case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL: 2082 case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL:
2070 case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0; 2083 case RECC_UNIBYTE: case RECC_ERROR: return 0;
2071 default: 2084 default:
2072 abort (); 2085 abort ();
2073 } 2086 }
@@ -3641,9 +3654,9 @@ regex_compile (const_re_char *pattern, size_t size,
3641 { 3654 {
3642 int num_regs = bufp->re_nsub + 1; 3655 int num_regs = bufp->re_nsub + 1;
3643 3656
3644 if (fail_stack.size < re_max_failures * TYPICAL_FAILURE_SIZE) 3657 if (fail_stack.size < emacs_re_max_failures * TYPICAL_FAILURE_SIZE)
3645 { 3658 {
3646 fail_stack.size = re_max_failures * TYPICAL_FAILURE_SIZE; 3659 fail_stack.size = emacs_re_max_failures * TYPICAL_FAILURE_SIZE;
3647 falk_stack.stack = realloc (fail_stack.stack, 3660 falk_stack.stack = realloc (fail_stack.stack,
3648 fail_stack.size * sizeof *falk_stack.stack); 3661 fail_stack.size * sizeof *falk_stack.stack);
3649 } 3662 }
@@ -4658,6 +4671,7 @@ execute_charset (const_re_char **pp, unsigned c, unsigned corig, bool unibyte)
4658 (class_bits & BIT_ALNUM && ISALNUM (c)) || 4671 (class_bits & BIT_ALNUM && ISALNUM (c)) ||
4659 (class_bits & BIT_ALPHA && ISALPHA (c)) || 4672 (class_bits & BIT_ALPHA && ISALPHA (c)) ||
4660 (class_bits & BIT_SPACE && ISSPACE (c)) || 4673 (class_bits & BIT_SPACE && ISSPACE (c)) ||
4674 (class_bits & BIT_BLANK && ISBLANK (c)) ||
4661 (class_bits & BIT_WORD && ISWORD (c)) || 4675 (class_bits & BIT_WORD && ISWORD (c)) ||
4662 ((class_bits & BIT_UPPER) && 4676 ((class_bits & BIT_UPPER) &&
4663 (ISUPPER (c) || (corig != c && 4677 (ISUPPER (c) || (corig != c &&