diff options
Diffstat (limited to 'src/regex.c')
| -rw-r--r-- | src/regex.c | 96 |
1 files changed, 55 insertions, 41 deletions
diff --git a/src/regex.c b/src/regex.c index f1686cf700c..db3f0c16a2d 100644 --- a/src/regex.c +++ b/src/regex.c | |||
| @@ -2,7 +2,7 @@ | |||
| 2 | 0.12. (Implements POSIX draft P1003.2/D11.2, except for some of the | 2 | 0.12. (Implements POSIX draft P1003.2/D11.2, except for some of the |
| 3 | internationalization features.) | 3 | internationalization features.) |
| 4 | 4 | ||
| 5 | Copyright (C) 1993-2016 Free Software Foundation, Inc. | 5 | Copyright (C) 1993-2017 Free Software Foundation, Inc. |
| 6 | 6 | ||
| 7 | This program is free software; you can redistribute it and/or modify | 7 | This program is free software; you can redistribute it and/or modify |
| 8 | it under the terms of the GNU General Public License as published by | 8 | it under the terms of the GNU General Public License as published by |
| @@ -310,11 +310,12 @@ enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 }; | |||
| 310 | || ((c) >= 'a' && (c) <= 'f') \ | 310 | || ((c) >= 'a' && (c) <= 'f') \ |
| 311 | || ((c) >= 'A' && (c) <= 'F')) | 311 | || ((c) >= 'A' && (c) <= 'F')) |
| 312 | 312 | ||
| 313 | /* This is only used for single-byte characters. */ | ||
| 314 | # define ISBLANK(c) ((c) == ' ' || (c) == '\t') | ||
| 315 | |||
| 316 | /* The rest must handle multibyte characters. */ | 313 | /* The rest must handle multibyte characters. */ |
| 317 | 314 | ||
| 315 | # define ISBLANK(c) (IS_REAL_ASCII (c) \ | ||
| 316 | ? ((c) == ' ' || (c) == '\t') \ | ||
| 317 | : blankp (c)) | ||
| 318 | |||
| 318 | # define ISGRAPH(c) (SINGLE_BYTE_CHAR_P (c) \ | 319 | # define ISGRAPH(c) (SINGLE_BYTE_CHAR_P (c) \ |
| 319 | ? (c) > ' ' && !((c) >= 0177 && (c) <= 0240) \ | 320 | ? (c) > ' ' && !((c) >= 0177 && (c) <= 0240) \ |
| 320 | : graphicp (c)) | 321 | : graphicp (c)) |
| @@ -430,9 +431,12 @@ init_syntax_once (void) | |||
| 430 | 431 | ||
| 431 | /* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we | 432 | /* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we |
| 432 | use `alloca' instead of `malloc'. This is because using malloc in | 433 | use `alloca' instead of `malloc'. This is because using malloc in |
| 433 | re_search* or re_match* could cause memory leaks when C-g is used in | 434 | re_search* or re_match* could cause memory leaks when C-g is used |
| 434 | Emacs; also, malloc is slower and causes storage fragmentation. On | 435 | in Emacs (note that SAFE_ALLOCA could also call malloc, but does so |
| 435 | the other hand, malloc is more portable, and easier to debug. | 436 | via `record_xmalloc' which uses `unwind_protect' to ensure the |
| 437 | memory is freed even in case of non-local exits); also, malloc is | ||
| 438 | slower and causes storage fragmentation. On the other hand, malloc | ||
| 439 | is more portable, and easier to debug. | ||
| 436 | 440 | ||
| 437 | Because we sometimes use alloca, some routines have to be macros, | 441 | Because we sometimes use alloca, some routines have to be macros, |
| 438 | not functions -- `alloca'-allocated space disappears at the end of the | 442 | not functions -- `alloca'-allocated space disappears at the end of the |
| @@ -447,7 +451,13 @@ init_syntax_once (void) | |||
| 447 | #else /* not REGEX_MALLOC */ | 451 | #else /* not REGEX_MALLOC */ |
| 448 | 452 | ||
| 449 | # ifdef emacs | 453 | # ifdef emacs |
| 450 | # define REGEX_USE_SAFE_ALLOCA USE_SAFE_ALLOCA | 454 | /* This may be adjusted in main(), if the stack is successfully grown. */ |
| 455 | ptrdiff_t emacs_re_safe_alloca = MAX_ALLOCA; | ||
| 456 | /* Like USE_SAFE_ALLOCA, but use emacs_re_safe_alloca. */ | ||
| 457 | # define REGEX_USE_SAFE_ALLOCA \ | ||
| 458 | ptrdiff_t sa_avail = emacs_re_safe_alloca; \ | ||
| 459 | ptrdiff_t sa_count = SPECPDL_INDEX (); bool sa_must_free = false | ||
| 460 | |||
| 451 | # define REGEX_SAFE_FREE() SAFE_FREE () | 461 | # define REGEX_SAFE_FREE() SAFE_FREE () |
| 452 | # define REGEX_ALLOCATE SAFE_ALLOCA | 462 | # define REGEX_ALLOCATE SAFE_ALLOCA |
| 453 | # else | 463 | # else |
| @@ -1195,24 +1205,28 @@ static const char *re_error_msgid[] = | |||
| 1195 | gettext_noop ("Range striding over charsets") /* REG_ERANGEX */ | 1205 | gettext_noop ("Range striding over charsets") /* REG_ERANGEX */ |
| 1196 | }; | 1206 | }; |
| 1197 | 1207 | ||
| 1198 | /* Avoiding alloca during matching, to placate r_alloc. */ | 1208 | /* Whether to allocate memory during matching. */ |
| 1199 | 1209 | ||
| 1200 | /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the | 1210 | /* Define MATCH_MAY_ALLOCATE to allow the searching and matching |
| 1201 | searching and matching functions should not call alloca. On some | 1211 | functions allocate memory for the failure stack and registers. |
| 1202 | systems, alloca is implemented in terms of malloc, and if we're | 1212 | Normally should be defined, because otherwise searching and |
| 1203 | using the relocating allocator routines, then malloc could cause a | 1213 | matching routines will have much smaller memory resources at their |
| 1204 | relocation, which might (if the strings being searched are in the | 1214 | disposal, and therefore might fail to handle complex regexps. |
| 1205 | ralloc heap) shift the data out from underneath the regexp | 1215 | Therefore undefine MATCH_MAY_ALLOCATE only in the following |
| 1206 | routines. | 1216 | exceptional situations: |
| 1207 | 1217 | ||
| 1208 | Here's another reason to avoid allocation: Emacs | 1218 | . When running on a system where memory is at premium. |
| 1209 | processes input from X in a signal handler; processing X input may | 1219 | . When alloca cannot be used at all, perhaps due to bugs in |
| 1210 | call malloc; if input arrives while a matching routine is calling | 1220 | its implementation, or its being unavailable, or due to a |
| 1211 | malloc, then we're scrod. But Emacs can't just block input while | 1221 | very small stack size. This requires to define REGEX_MALLOC |
| 1212 | calling matching routines; then we don't notice interrupts when | 1222 | to use malloc instead, which in turn could lead to memory |
| 1213 | they come in. So, Emacs blocks input around all regexp calls | 1223 | leaks if search is interrupted by a signal. (For these |
| 1214 | except the matching calls, which it leaves unprotected, in the | 1224 | reasons, defining REGEX_MALLOC when building Emacs |
| 1215 | faith that they will not malloc. */ | 1225 | automatically undefines MATCH_MAY_ALLOCATE, but outside |
| 1226 | Emacs you may not care about memory leaks.) If you want to | ||
| 1227 | prevent the memory leaks, undefine MATCH_MAY_ALLOCATE. | ||
| 1228 | . When code that calls the searching and matching functions | ||
| 1229 | cannot allow memory allocation, for whatever reasons. */ | ||
| 1216 | 1230 | ||
| 1217 | /* Normally, this is fine. */ | 1231 | /* Normally, this is fine. */ |
| 1218 | #define MATCH_MAY_ALLOCATE | 1232 | #define MATCH_MAY_ALLOCATE |
| @@ -1249,9 +1263,9 @@ static const char *re_error_msgid[] = | |||
| 1249 | whose default stack limit is 2mb. In order for a larger | 1263 | whose default stack limit is 2mb. In order for a larger |
| 1250 | value to work reliably, you have to try to make it accord | 1264 | value to work reliably, you have to try to make it accord |
| 1251 | with the process stack limit. */ | 1265 | with the process stack limit. */ |
| 1252 | size_t re_max_failures = 40000; | 1266 | size_t emacs_re_max_failures = 40000; |
| 1253 | # else | 1267 | # else |
| 1254 | size_t re_max_failures = 4000; | 1268 | size_t emacs_re_max_failures = 4000; |
| 1255 | # endif | 1269 | # endif |
| 1256 | 1270 | ||
| 1257 | union fail_stack_elt | 1271 | union fail_stack_elt |
| @@ -1304,7 +1318,7 @@ typedef struct | |||
| 1304 | 1318 | ||
| 1305 | 1319 | ||
| 1306 | /* Double the size of FAIL_STACK, up to a limit | 1320 | /* Double the size of FAIL_STACK, up to a limit |
| 1307 | which allows approximately `re_max_failures' items. | 1321 | which allows approximately `emacs_re_max_failures' items. |
| 1308 | 1322 | ||
| 1309 | Return 1 if succeeds, and 0 if either ran out of memory | 1323 | Return 1 if succeeds, and 0 if either ran out of memory |
| 1310 | allocating space for it or it was already too large. | 1324 | allocating space for it or it was already too large. |
| @@ -1319,23 +1333,20 @@ typedef struct | |||
| 1319 | #define FAIL_STACK_GROWTH_FACTOR 4 | 1333 | #define FAIL_STACK_GROWTH_FACTOR 4 |
| 1320 | 1334 | ||
| 1321 | #define GROW_FAIL_STACK(fail_stack) \ | 1335 | #define GROW_FAIL_STACK(fail_stack) \ |
| 1322 | (((fail_stack).size * sizeof (fail_stack_elt_t) \ | 1336 | (((fail_stack).size >= emacs_re_max_failures * TYPICAL_FAILURE_SIZE) \ |
| 1323 | >= re_max_failures * TYPICAL_FAILURE_SIZE) \ | ||
| 1324 | ? 0 \ | 1337 | ? 0 \ |
| 1325 | : ((fail_stack).stack \ | 1338 | : ((fail_stack).stack \ |
| 1326 | = REGEX_REALLOCATE_STACK ((fail_stack).stack, \ | 1339 | = REGEX_REALLOCATE_STACK ((fail_stack).stack, \ |
| 1327 | (fail_stack).size * sizeof (fail_stack_elt_t), \ | 1340 | (fail_stack).size * sizeof (fail_stack_elt_t), \ |
| 1328 | min (re_max_failures * TYPICAL_FAILURE_SIZE, \ | 1341 | min (emacs_re_max_failures * TYPICAL_FAILURE_SIZE, \ |
| 1329 | ((fail_stack).size * sizeof (fail_stack_elt_t) \ | 1342 | ((fail_stack).size * FAIL_STACK_GROWTH_FACTOR)) \ |
| 1330 | * FAIL_STACK_GROWTH_FACTOR))), \ | 1343 | * sizeof (fail_stack_elt_t)), \ |
| 1331 | \ | 1344 | \ |
| 1332 | (fail_stack).stack == NULL \ | 1345 | (fail_stack).stack == NULL \ |
| 1333 | ? 0 \ | 1346 | ? 0 \ |
| 1334 | : ((fail_stack).size \ | 1347 | : ((fail_stack).size \ |
| 1335 | = (min (re_max_failures * TYPICAL_FAILURE_SIZE, \ | 1348 | = (min (emacs_re_max_failures * TYPICAL_FAILURE_SIZE, \ |
| 1336 | ((fail_stack).size * sizeof (fail_stack_elt_t) \ | 1349 | ((fail_stack).size * FAIL_STACK_GROWTH_FACTOR))), \ |
| 1337 | * FAIL_STACK_GROWTH_FACTOR)) \ | ||
| 1338 | / sizeof (fail_stack_elt_t)), \ | ||
| 1339 | 1))) | 1350 | 1))) |
| 1340 | 1351 | ||
| 1341 | 1352 | ||
| @@ -1790,6 +1801,7 @@ struct range_table_work_area | |||
| 1790 | #define BIT_ALNUM 0x80 | 1801 | #define BIT_ALNUM 0x80 |
| 1791 | #define BIT_GRAPH 0x100 | 1802 | #define BIT_GRAPH 0x100 |
| 1792 | #define BIT_PRINT 0x200 | 1803 | #define BIT_PRINT 0x200 |
| 1804 | #define BIT_BLANK 0x400 | ||
| 1793 | 1805 | ||
| 1794 | 1806 | ||
| 1795 | /* Set the bit for character C in a list. */ | 1807 | /* Set the bit for character C in a list. */ |
| @@ -2066,8 +2078,9 @@ re_wctype_to_bit (re_wctype_t cc) | |||
| 2066 | case RECC_SPACE: return BIT_SPACE; | 2078 | case RECC_SPACE: return BIT_SPACE; |
| 2067 | case RECC_GRAPH: return BIT_GRAPH; | 2079 | case RECC_GRAPH: return BIT_GRAPH; |
| 2068 | case RECC_PRINT: return BIT_PRINT; | 2080 | case RECC_PRINT: return BIT_PRINT; |
| 2081 | case RECC_BLANK: return BIT_BLANK; | ||
| 2069 | case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL: | 2082 | case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL: |
| 2070 | case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0; | 2083 | case RECC_UNIBYTE: case RECC_ERROR: return 0; |
| 2071 | default: | 2084 | default: |
| 2072 | abort (); | 2085 | abort (); |
| 2073 | } | 2086 | } |
| @@ -3641,9 +3654,9 @@ regex_compile (const_re_char *pattern, size_t size, | |||
| 3641 | { | 3654 | { |
| 3642 | int num_regs = bufp->re_nsub + 1; | 3655 | int num_regs = bufp->re_nsub + 1; |
| 3643 | 3656 | ||
| 3644 | if (fail_stack.size < re_max_failures * TYPICAL_FAILURE_SIZE) | 3657 | if (fail_stack.size < emacs_re_max_failures * TYPICAL_FAILURE_SIZE) |
| 3645 | { | 3658 | { |
| 3646 | fail_stack.size = re_max_failures * TYPICAL_FAILURE_SIZE; | 3659 | fail_stack.size = emacs_re_max_failures * TYPICAL_FAILURE_SIZE; |
| 3647 | falk_stack.stack = realloc (fail_stack.stack, | 3660 | falk_stack.stack = realloc (fail_stack.stack, |
| 3648 | fail_stack.size * sizeof *falk_stack.stack); | 3661 | fail_stack.size * sizeof *falk_stack.stack); |
| 3649 | } | 3662 | } |
| @@ -4658,6 +4671,7 @@ execute_charset (const_re_char **pp, unsigned c, unsigned corig, bool unibyte) | |||
| 4658 | (class_bits & BIT_ALNUM && ISALNUM (c)) || | 4671 | (class_bits & BIT_ALNUM && ISALNUM (c)) || |
| 4659 | (class_bits & BIT_ALPHA && ISALPHA (c)) || | 4672 | (class_bits & BIT_ALPHA && ISALPHA (c)) || |
| 4660 | (class_bits & BIT_SPACE && ISSPACE (c)) || | 4673 | (class_bits & BIT_SPACE && ISSPACE (c)) || |
| 4674 | (class_bits & BIT_BLANK && ISBLANK (c)) || | ||
| 4661 | (class_bits & BIT_WORD && ISWORD (c)) || | 4675 | (class_bits & BIT_WORD && ISWORD (c)) || |
| 4662 | ((class_bits & BIT_UPPER) && | 4676 | ((class_bits & BIT_UPPER) && |
| 4663 | (ISUPPER (c) || (corig != c && | 4677 | (ISUPPER (c) || (corig != c && |