diff options
| author | Paul Eggert | 2018-08-05 18:41:21 -0700 |
|---|---|---|
| committer | Paul Eggert | 2018-08-05 19:36:10 -0700 |
| commit | 9c022a488bd462b85895ef84313fe84c5bc2bb4d (patch) | |
| tree | c0e2d8ac4912cb4651b6c36cafed765b278ea2a4 | |
| parent | e097826f8972c78577d1d5a14389ec8e888be1b7 (diff) | |
| download | emacs-9c022a488bd462b85895ef84313fe84c5bc2bb4d.tar.gz emacs-9c022a488bd462b85895ef84313fe84c5bc2bb4d.zip | |
Spruce up some regex-emacs comments
* src/regex-emacs.c, src/regex-emacs.h: Update comments.
| -rw-r--r-- | src/regex-emacs.c | 442 | ||||
| -rw-r--r-- | src/regex-emacs.h | 56 |
2 files changed, 243 insertions, 255 deletions
diff --git a/src/regex-emacs.c b/src/regex-emacs.c index b944fe0c5a7..d19838a876e 100644 --- a/src/regex-emacs.c +++ b/src/regex-emacs.c | |||
| @@ -1,6 +1,4 @@ | |||
| 1 | /* Extended regular expression matching and search library, version | 1 | /* Emacs regular expression matching and search |
| 2 | 0.12. (Implements POSIX draft P1003.2/D11.2, except for some of the | ||
| 3 | internationalization features.) | ||
| 4 | 2 | ||
| 5 | Copyright (C) 1993-2018 Free Software Foundation, Inc. | 3 | Copyright (C) 1993-2018 Free Software Foundation, Inc. |
| 6 | 4 | ||
| @@ -19,7 +17,6 @@ | |||
| 19 | 17 | ||
| 20 | /* TODO: | 18 | /* TODO: |
| 21 | - structure the opcode space into opcode+flag. | 19 | - structure the opcode space into opcode+flag. |
| 22 | - merge with glibc's regex.[ch]. | ||
| 23 | - replace (succeed_n + jump_n + set_number_at) with something that doesn't | 20 | - replace (succeed_n + jump_n + set_number_at) with something that doesn't |
| 24 | need to modify the compiled regexp so that re_search can be reentrant. | 21 | need to modify the compiled regexp so that re_search can be reentrant. |
| 25 | - get rid of on_failure_jump_smart by doing the optimization in re_comp | 22 | - get rid of on_failure_jump_smart by doing the optimization in re_comp |
| @@ -28,34 +25,30 @@ | |||
| 28 | 25 | ||
| 29 | #include <config.h> | 26 | #include <config.h> |
| 30 | 27 | ||
| 31 | /* Get the interface, including the syntax bits. */ | ||
| 32 | #include "regex-emacs.h" | 28 | #include "regex-emacs.h" |
| 33 | 29 | ||
| 34 | #include <stdlib.h> | 30 | #include <stdlib.h> |
| 35 | 31 | ||
| 36 | #include "character.h" | 32 | #include "character.h" |
| 37 | #include "buffer.h" | 33 | #include "buffer.h" |
| 38 | |||
| 39 | #include "syntax.h" | 34 | #include "syntax.h" |
| 40 | #include "category.h" | 35 | #include "category.h" |
| 41 | 36 | ||
| 42 | /* Maximum number of duplicates an interval can allow. Some systems | 37 | /* Maximum number of duplicates an interval can allow. Some systems |
| 43 | define this in other header files, but we want our | 38 | define this in other header files, but we want our value, so remove |
| 44 | value, so remove any previous define. */ | 39 | any previous define. Repeat counts are stored in opcodes as 2-byte |
| 40 | unsigned integers. */ | ||
| 45 | #ifdef RE_DUP_MAX | 41 | #ifdef RE_DUP_MAX |
| 46 | # undef RE_DUP_MAX | 42 | # undef RE_DUP_MAX |
| 47 | #endif | 43 | #endif |
| 48 | /* Repeat counts are stored in opcodes as 2 byte integers. This was | ||
| 49 | previously limited to 7fff because the parsing code uses signed | ||
| 50 | ints. But Emacs only runs on 32 bit platforms anyway. */ | ||
| 51 | #define RE_DUP_MAX (0xffff) | 44 | #define RE_DUP_MAX (0xffff) |
| 52 | 45 | ||
| 53 | /* Make syntax table lookup grant data in gl_state. */ | 46 | /* Make syntax table lookup grant data in gl_state. */ |
| 54 | #define SYNTAX(c) syntax_property (c, 1) | 47 | #define SYNTAX(c) syntax_property (c, 1) |
| 55 | 48 | ||
| 56 | /* Converts the pointer to the char to BEG-based offset from the start. */ | 49 | /* Convert the pointer to the char to BEG-based offset from the start. */ |
| 57 | #define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d)) | 50 | #define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d)) |
| 58 | /* Strings are 0-indexed, buffers are 1-indexed; we pun on the boolean | 51 | /* Strings are 0-indexed, buffers are 1-indexed; pun on the boolean |
| 59 | result to get the right base index. */ | 52 | result to get the right base index. */ |
| 60 | #define POS_AS_IN_BUFFER(p) \ | 53 | #define POS_AS_IN_BUFFER(p) \ |
| 61 | ((p) + (NILP (gl_state.object) || BUFFERP (gl_state.object))) | 54 | ((p) + (NILP (gl_state.object) || BUFFERP (gl_state.object))) |
| @@ -63,9 +56,9 @@ | |||
| 63 | #define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte) | 56 | #define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte) |
| 64 | #define RE_TARGET_MULTIBYTE_P(bufp) ((bufp)->target_multibyte) | 57 | #define RE_TARGET_MULTIBYTE_P(bufp) ((bufp)->target_multibyte) |
| 65 | #define RE_STRING_CHAR(p, multibyte) \ | 58 | #define RE_STRING_CHAR(p, multibyte) \ |
| 66 | (multibyte ? (STRING_CHAR (p)) : (*(p))) | 59 | (multibyte ? STRING_CHAR (p) : *(p)) |
| 67 | #define RE_STRING_CHAR_AND_LENGTH(p, len, multibyte) \ | 60 | #define RE_STRING_CHAR_AND_LENGTH(p, len, multibyte) \ |
| 68 | (multibyte ? (STRING_CHAR_AND_LENGTH (p, len)) : ((len) = 1, *(p))) | 61 | (multibyte ? STRING_CHAR_AND_LENGTH (p, len) : ((len) = 1, *(p))) |
| 69 | 62 | ||
| 70 | #define RE_CHAR_TO_MULTIBYTE(c) UNIBYTE_TO_CHAR (c) | 63 | #define RE_CHAR_TO_MULTIBYTE(c) UNIBYTE_TO_CHAR (c) |
| 71 | 64 | ||
| @@ -79,8 +72,9 @@ | |||
| 79 | if (target_multibyte) \ | 72 | if (target_multibyte) \ |
| 80 | { \ | 73 | { \ |
| 81 | re_char *dtemp = (p) == (str2) ? (end1) : (p); \ | 74 | re_char *dtemp = (p) == (str2) ? (end1) : (p); \ |
| 82 | re_char *dlimit = ((p) > (str2) && (p) <= (end2)) ? (str2) : (str1); \ | 75 | re_char *dlimit = (p) > (str2) && (p) <= (end2) ? (str2) : (str1); \ |
| 83 | while (dtemp-- > dlimit && !CHAR_HEAD_P (*dtemp)); \ | 76 | while (dtemp-- > dlimit && !CHAR_HEAD_P (*dtemp)) \ |
| 77 | continue; \ | ||
| 84 | c = STRING_CHAR (dtemp); \ | 78 | c = STRING_CHAR (dtemp); \ |
| 85 | } \ | 79 | } \ |
| 86 | else \ | 80 | else \ |
| @@ -88,7 +82,7 @@ | |||
| 88 | (c = ((p) == (str2) ? (end1) : (p))[-1]); \ | 82 | (c = ((p) == (str2) ? (end1) : (p))[-1]); \ |
| 89 | (c) = RE_CHAR_TO_MULTIBYTE (c); \ | 83 | (c) = RE_CHAR_TO_MULTIBYTE (c); \ |
| 90 | } \ | 84 | } \ |
| 91 | } while (0) | 85 | } while (false) |
| 92 | 86 | ||
| 93 | /* Set C a (possibly converted to multibyte) character at P, and set | 87 | /* Set C a (possibly converted to multibyte) character at P, and set |
| 94 | LEN to the byte length of that character. */ | 88 | LEN to the byte length of that character. */ |
| @@ -102,11 +96,8 @@ | |||
| 102 | len = 1; \ | 96 | len = 1; \ |
| 103 | (c) = RE_CHAR_TO_MULTIBYTE (c); \ | 97 | (c) = RE_CHAR_TO_MULTIBYTE (c); \ |
| 104 | } \ | 98 | } \ |
| 105 | } while (0) | 99 | } while (false) |
| 106 | 100 | ||
| 107 | /* isalpha etc. are used for the character classes. */ | ||
| 108 | #include <ctype.h> | ||
| 109 | |||
| 110 | /* 1 if C is an ASCII character. */ | 101 | /* 1 if C is an ASCII character. */ |
| 111 | #define IS_REAL_ASCII(c) ((c) < 0200) | 102 | #define IS_REAL_ASCII(c) ((c) < 0200) |
| 112 | 103 | ||
| @@ -165,13 +156,13 @@ | |||
| 165 | /* Use alloca instead of malloc. This is because using malloc in | 156 | /* Use alloca instead of malloc. This is because using malloc in |
| 166 | re_search* or re_match* could cause memory leaks when C-g is used | 157 | re_search* or re_match* could cause memory leaks when C-g is used |
| 167 | in Emacs (note that SAFE_ALLOCA could also call malloc, but does so | 158 | in Emacs (note that SAFE_ALLOCA could also call malloc, but does so |
| 168 | via `record_xmalloc' which uses `unwind_protect' to ensure the | 159 | via 'record_xmalloc' which uses 'unwind_protect' to ensure the |
| 169 | memory is freed even in case of non-local exits); also, malloc is | 160 | memory is freed even in case of non-local exits); also, malloc is |
| 170 | slower and causes storage fragmentation. On the other hand, malloc | 161 | slower and causes storage fragmentation. On the other hand, malloc |
| 171 | is more portable, and easier to debug. | 162 | is more portable, and easier to debug. |
| 172 | 163 | ||
| 173 | Because we sometimes use alloca, some routines have to be macros, | 164 | Because we sometimes use alloca, some routines have to be macros, |
| 174 | not functions -- `alloca'-allocated space disappears at the end of the | 165 | not functions -- 'alloca'-allocated space disappears at the end of the |
| 175 | function it is called in. */ | 166 | function it is called in. */ |
| 176 | 167 | ||
| 177 | /* This may be adjusted in main(), if the stack is successfully grown. */ | 168 | /* This may be adjusted in main(), if the stack is successfully grown. */ |
| @@ -180,13 +171,13 @@ ptrdiff_t emacs_re_safe_alloca = MAX_ALLOCA; | |||
| 180 | #define REGEX_USE_SAFE_ALLOCA \ | 171 | #define REGEX_USE_SAFE_ALLOCA \ |
| 181 | USE_SAFE_ALLOCA; sa_avail = emacs_re_safe_alloca | 172 | USE_SAFE_ALLOCA; sa_avail = emacs_re_safe_alloca |
| 182 | 173 | ||
| 183 | /* Assumes a `char *destination' variable. */ | 174 | /* Assumes a 'char *destination' variable. */ |
| 184 | #define REGEX_REALLOCATE(source, osize, nsize) \ | 175 | #define REGEX_REALLOCATE(source, osize, nsize) \ |
| 185 | (destination = SAFE_ALLOCA (nsize), \ | 176 | (destination = SAFE_ALLOCA (nsize), \ |
| 186 | memcpy (destination, source, osize)) | 177 | memcpy (destination, source, osize)) |
| 187 | 178 | ||
| 188 | /* True if `size1' is non-NULL and PTR is pointing anywhere inside | 179 | /* True if 'size1' is non-NULL and PTR is pointing anywhere inside |
| 189 | `string1' or just past its end. This works if PTR is NULL, which is | 180 | 'string1' or just past its end. This works if PTR is NULL, which is |
| 190 | a good thing. */ | 181 | a good thing. */ |
| 191 | #define FIRST_STRING_P(ptr) \ | 182 | #define FIRST_STRING_P(ptr) \ |
| 192 | (size1 && string1 <= (ptr) && (ptr) <= string1 + size1) | 183 | (size1 && string1 <= (ptr) && (ptr) <= string1 + size1) |
| @@ -254,7 +245,7 @@ typedef enum | |||
| 254 | 245 | ||
| 255 | /* Stop remembering the text that is matched and store it in a | 246 | /* Stop remembering the text that is matched and store it in a |
| 256 | memory register. Followed by one byte with the register | 247 | memory register. Followed by one byte with the register |
| 257 | number, in the range 0 to one less than `re_nsub' in the | 248 | number, in the range 0 to one less than 're_nsub' in the |
| 258 | pattern buffer. */ | 249 | pattern buffer. */ |
| 259 | stop_memory, | 250 | stop_memory, |
| 260 | 251 | ||
| @@ -285,23 +276,23 @@ typedef enum | |||
| 285 | current string position when executed. */ | 276 | current string position when executed. */ |
| 286 | on_failure_keep_string_jump, | 277 | on_failure_keep_string_jump, |
| 287 | 278 | ||
| 288 | /* Just like `on_failure_jump', except that it checks that we | 279 | /* Just like 'on_failure_jump', except that it checks that we |
| 289 | don't get stuck in an infinite loop (matching an empty string | 280 | don't get stuck in an infinite loop (matching an empty string |
| 290 | indefinitely). */ | 281 | indefinitely). */ |
| 291 | on_failure_jump_loop, | 282 | on_failure_jump_loop, |
| 292 | 283 | ||
| 293 | /* Just like `on_failure_jump_loop', except that it checks for | 284 | /* Just like 'on_failure_jump_loop', except that it checks for |
| 294 | a different kind of loop (the kind that shows up with non-greedy | 285 | a different kind of loop (the kind that shows up with non-greedy |
| 295 | operators). This operation has to be immediately preceded | 286 | operators). This operation has to be immediately preceded |
| 296 | by a `no_op'. */ | 287 | by a 'no_op'. */ |
| 297 | on_failure_jump_nastyloop, | 288 | on_failure_jump_nastyloop, |
| 298 | 289 | ||
| 299 | /* A smart `on_failure_jump' used for greedy * and + operators. | 290 | /* A smart 'on_failure_jump' used for greedy * and + operators. |
| 300 | It analyzes the loop before which it is put and if the | 291 | It analyzes the loop before which it is put and if the |
| 301 | loop does not require backtracking, it changes itself to | 292 | loop does not require backtracking, it changes itself to |
| 302 | `on_failure_keep_string_jump' and short-circuits the loop, | 293 | 'on_failure_keep_string_jump' and short-circuits the loop, |
| 303 | else it just defaults to changing itself into `on_failure_jump'. | 294 | else it just defaults to changing itself into 'on_failure_jump'. |
| 304 | It assumes that it is pointing to just past a `jump'. */ | 295 | It assumes that it is pointing to just past a 'jump'. */ |
| 305 | on_failure_jump_smart, | 296 | on_failure_jump_smart, |
| 306 | 297 | ||
| 307 | /* Followed by two-byte relative address and two-byte number n. | 298 | /* Followed by two-byte relative address and two-byte number n. |
| @@ -356,7 +347,7 @@ typedef enum | |||
| 356 | do { \ | 347 | do { \ |
| 357 | (destination)[0] = (number) & 0377; \ | 348 | (destination)[0] = (number) & 0377; \ |
| 358 | (destination)[1] = (number) >> 8; \ | 349 | (destination)[1] = (number) >> 8; \ |
| 359 | } while (0) | 350 | } while (false) |
| 360 | 351 | ||
| 361 | /* Same as STORE_NUMBER, except increment DESTINATION to | 352 | /* Same as STORE_NUMBER, except increment DESTINATION to |
| 362 | the byte after where the number is stored. Therefore, DESTINATION | 353 | the byte after where the number is stored. Therefore, DESTINATION |
| @@ -366,7 +357,7 @@ typedef enum | |||
| 366 | do { \ | 357 | do { \ |
| 367 | STORE_NUMBER (destination, number); \ | 358 | STORE_NUMBER (destination, number); \ |
| 368 | (destination) += 2; \ | 359 | (destination) += 2; \ |
| 369 | } while (0) | 360 | } while (false) |
| 370 | 361 | ||
| 371 | /* Put into DESTINATION a number stored in two contiguous bytes starting | 362 | /* Put into DESTINATION a number stored in two contiguous bytes starting |
| 372 | at SOURCE. */ | 363 | at SOURCE. */ |
| @@ -405,7 +396,7 @@ extract_number_and_incr (re_char **source) | |||
| 405 | (destination)[1] = ((character) >> 8) & 0377; \ | 396 | (destination)[1] = ((character) >> 8) & 0377; \ |
| 406 | (destination)[2] = (character) >> 16; \ | 397 | (destination)[2] = (character) >> 16; \ |
| 407 | (destination) += 3; \ | 398 | (destination) += 3; \ |
| 408 | } while (0) | 399 | } while (false) |
| 409 | 400 | ||
| 410 | /* Put into DESTINATION a character stored in three contiguous bytes | 401 | /* Put into DESTINATION a character stored in three contiguous bytes |
| 411 | starting at SOURCE. */ | 402 | starting at SOURCE. */ |
| @@ -415,7 +406,7 @@ extract_number_and_incr (re_char **source) | |||
| 415 | (destination) = ((source)[0] \ | 406 | (destination) = ((source)[0] \ |
| 416 | | ((source)[1] << 8) \ | 407 | | ((source)[1] << 8) \ |
| 417 | | ((source)[2] << 16)); \ | 408 | | ((source)[2] << 16)); \ |
| 418 | } while (0) | 409 | } while (false) |
| 419 | 410 | ||
| 420 | 411 | ||
| 421 | /* Macros for charset. */ | 412 | /* Macros for charset. */ |
| @@ -429,7 +420,7 @@ extract_number_and_incr (re_char **source) | |||
| 429 | 420 | ||
| 430 | /* Return the address of range table of charset P. But not the start | 421 | /* Return the address of range table of charset P. But not the start |
| 431 | of table itself, but the before where the number of ranges is | 422 | of table itself, but the before where the number of ranges is |
| 432 | stored. `2 +' means to skip re_opcode_t and size of bitmap, | 423 | stored. '2 +' means to skip re_opcode_t and size of bitmap, |
| 433 | and the 2 bytes of flags at the start of the range table. */ | 424 | and the 2 bytes of flags at the start of the range table. */ |
| 434 | #define CHARSET_RANGE_TABLE(p) (&(p)[4 + CHARSET_BITMAP_SIZE (p)]) | 425 | #define CHARSET_RANGE_TABLE(p) (&(p)[4 + CHARSET_BITMAP_SIZE (p)]) |
| 435 | 426 | ||
| @@ -439,8 +430,8 @@ extract_number_and_incr (re_char **source) | |||
| 439 | + (p)[3 + CHARSET_BITMAP_SIZE (p)] * 0x100) | 430 | + (p)[3 + CHARSET_BITMAP_SIZE (p)] * 0x100) |
| 440 | 431 | ||
| 441 | /* Return the address of end of RANGE_TABLE. COUNT is number of | 432 | /* Return the address of end of RANGE_TABLE. COUNT is number of |
| 442 | ranges (which is a pair of (start, end)) in the RANGE_TABLE. `* 2' | 433 | ranges (which is a pair of (start, end)) in the RANGE_TABLE. '* 2' |
| 443 | is start of range and end of range. `* 3' is size of each start | 434 | is start of range and end of range. '* 3' is size of each start |
| 444 | and end. */ | 435 | and end. */ |
| 445 | #define CHARSET_RANGE_TABLE_END(range_table, count) \ | 436 | #define CHARSET_RANGE_TABLE_END(range_table, count) \ |
| 446 | ((range_table) + (count) * 2 * 3) | 437 | ((range_table) + (count) * 2 * 3) |
| @@ -450,7 +441,7 @@ extract_number_and_incr (re_char **source) | |||
| 450 | 441 | ||
| 451 | #ifdef REGEX_EMACS_DEBUG | 442 | #ifdef REGEX_EMACS_DEBUG |
| 452 | 443 | ||
| 453 | /* We use standard I/O for debugging. */ | 444 | /* Use standard I/O for debugging. */ |
| 454 | # include <stdio.h> | 445 | # include <stdio.h> |
| 455 | 446 | ||
| 456 | static int regex_emacs_debug = -100000; | 447 | static int regex_emacs_debug = -100000; |
| @@ -859,7 +850,7 @@ enum { REGS_UNALLOCATED, REGS_REALLOCATE, REGS_FIXED }; | |||
| 859 | 850 | ||
| 860 | /* If 'regs_allocated' is REGS_UNALLOCATED in the pattern buffer, | 851 | /* If 'regs_allocated' is REGS_UNALLOCATED in the pattern buffer, |
| 861 | 're_match_2' returns information about at least this many registers | 852 | 're_match_2' returns information about at least this many registers |
| 862 | the first time a `regs' structure is passed. */ | 853 | the first time a 'regs' structure is passed. */ |
| 863 | enum { RE_NREGS = 30 }; | 854 | enum { RE_NREGS = 30 }; |
| 864 | 855 | ||
| 865 | /* The searching and matching functions allocate memory for the | 856 | /* The searching and matching functions allocate memory for the |
| @@ -878,7 +869,7 @@ enum { RE_NREGS = 30 }; | |||
| 878 | #define INIT_FAILURE_ALLOC 20 | 869 | #define INIT_FAILURE_ALLOC 20 |
| 879 | 870 | ||
| 880 | /* Roughly the maximum number of failure points on the stack. Would be | 871 | /* Roughly the maximum number of failure points on the stack. Would be |
| 881 | exactly that if always used TYPICAL_FAILURE_SIZE items each time we failed. | 872 | exactly that if failure always used TYPICAL_FAILURE_SIZE items. |
| 882 | This is a variable only so users of regex can assign to it; we never | 873 | This is a variable only so users of regex can assign to it; we never |
| 883 | change it ourselves. We always multiply it by TYPICAL_FAILURE_SIZE | 874 | change it ourselves. We always multiply it by TYPICAL_FAILURE_SIZE |
| 884 | before using it, so it should probably be a byte-count instead. */ | 875 | before using it, so it should probably be a byte-count instead. */ |
| @@ -891,7 +882,7 @@ size_t emacs_re_max_failures = 40000; | |||
| 891 | union fail_stack_elt | 882 | union fail_stack_elt |
| 892 | { | 883 | { |
| 893 | re_char *pointer; | 884 | re_char *pointer; |
| 894 | /* This should be the biggest `int' that's no bigger than a pointer. */ | 885 | /* This should be the biggest 'int' that's no bigger than a pointer. */ |
| 895 | long integer; | 886 | long integer; |
| 896 | }; | 887 | }; |
| 897 | 888 | ||
| @@ -918,19 +909,18 @@ typedef struct | |||
| 918 | fail_stack.size = INIT_FAILURE_ALLOC; \ | 909 | fail_stack.size = INIT_FAILURE_ALLOC; \ |
| 919 | fail_stack.avail = 0; \ | 910 | fail_stack.avail = 0; \ |
| 920 | fail_stack.frame = 0; \ | 911 | fail_stack.frame = 0; \ |
| 921 | } while (0) | 912 | } while (false) |
| 922 | 913 | ||
| 923 | 914 | ||
| 924 | /* Double the size of FAIL_STACK, up to a limit | 915 | /* Double the size of FAIL_STACK, up to a limit |
| 925 | which allows approximately `emacs_re_max_failures' items. | 916 | which allows approximately 'emacs_re_max_failures' items. |
| 926 | 917 | ||
| 927 | Return 1 if succeeds, and 0 if either ran out of memory | 918 | Return 1 if succeeds, and 0 if either ran out of memory |
| 928 | allocating space for it or it was already too large. | 919 | allocating space for it or it was already too large. |
| 929 | 920 | ||
| 930 | REGEX_REALLOCATE requires `destination' be declared. */ | 921 | REGEX_REALLOCATE requires 'destination' be declared. */ |
| 931 | 922 | ||
| 932 | /* Factor to increase the failure stack size by | 923 | /* Factor to increase the failure stack size by. |
| 933 | when we increase it. | ||
| 934 | This used to be 2, but 2 was too wasteful | 924 | This used to be 2, but 2 was too wasteful |
| 935 | because the old discarded stacks added up to as much space | 925 | because the old discarded stacks added up to as much space |
| 936 | were as ultimate, maximum-size stack. */ | 926 | were as ultimate, maximum-size stack. */ |
| @@ -952,19 +942,19 @@ typedef struct | |||
| 952 | 942 | ||
| 953 | 943 | ||
| 954 | /* Push a pointer value onto the failure stack. | 944 | /* Push a pointer value onto the failure stack. |
| 955 | Assumes the variable `fail_stack'. Probably should only | 945 | Assumes the variable 'fail_stack'. Probably should only |
| 956 | be called from within `PUSH_FAILURE_POINT'. */ | 946 | be called from within 'PUSH_FAILURE_POINT'. */ |
| 957 | #define PUSH_FAILURE_POINTER(item) \ | 947 | #define PUSH_FAILURE_POINTER(item) \ |
| 958 | fail_stack.stack[fail_stack.avail++].pointer = (item) | 948 | fail_stack.stack[fail_stack.avail++].pointer = (item) |
| 959 | 949 | ||
| 960 | /* This pushes an integer-valued item onto the failure stack. | 950 | /* This pushes an integer-valued item onto the failure stack. |
| 961 | Assumes the variable `fail_stack'. Probably should only | 951 | Assumes the variable 'fail_stack'. Probably should only |
| 962 | be called from within `PUSH_FAILURE_POINT'. */ | 952 | be called from within 'PUSH_FAILURE_POINT'. */ |
| 963 | #define PUSH_FAILURE_INT(item) \ | 953 | #define PUSH_FAILURE_INT(item) \ |
| 964 | fail_stack.stack[fail_stack.avail++].integer = (item) | 954 | fail_stack.stack[fail_stack.avail++].integer = (item) |
| 965 | 955 | ||
| 966 | /* These POP... operations complement the PUSH... operations. | 956 | /* These POP... operations complement the PUSH... operations. |
| 967 | All assume that `fail_stack' is nonempty. */ | 957 | All assume that 'fail_stack' is nonempty. */ |
| 968 | #define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer | 958 | #define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer |
| 969 | #define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer | 959 | #define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer |
| 970 | 960 | ||
| @@ -997,7 +987,7 @@ do { \ | |||
| 997 | PUSH_FAILURE_POINTER (regstart[n]); \ | 987 | PUSH_FAILURE_POINTER (regstart[n]); \ |
| 998 | PUSH_FAILURE_POINTER (regend[n]); \ | 988 | PUSH_FAILURE_POINTER (regend[n]); \ |
| 999 | PUSH_FAILURE_INT (n); \ | 989 | PUSH_FAILURE_INT (n); \ |
| 1000 | } while (0) | 990 | } while (false) |
| 1001 | 991 | ||
| 1002 | /* Change the counter's value to VAL, but make sure that it will | 992 | /* Change the counter's value to VAL, but make sure that it will |
| 1003 | be reset when backtracking. */ | 993 | be reset when backtracking. */ |
| @@ -1012,7 +1002,7 @@ do { \ | |||
| 1012 | PUSH_FAILURE_POINTER (ptr); \ | 1002 | PUSH_FAILURE_POINTER (ptr); \ |
| 1013 | PUSH_FAILURE_INT (-1); \ | 1003 | PUSH_FAILURE_INT (-1); \ |
| 1014 | STORE_NUMBER (ptr, val); \ | 1004 | STORE_NUMBER (ptr, val); \ |
| 1015 | } while (0) | 1005 | } while (false) |
| 1016 | 1006 | ||
| 1017 | /* Pop a saved register off the stack. */ | 1007 | /* Pop a saved register off the stack. */ |
| 1018 | #define POP_FAILURE_REG_OR_COUNT() \ | 1008 | #define POP_FAILURE_REG_OR_COUNT() \ |
| @@ -1034,7 +1024,7 @@ do { \ | |||
| 1034 | DEBUG_PRINT (" Pop reg %ld (spanning %p -> %p)\n", \ | 1024 | DEBUG_PRINT (" Pop reg %ld (spanning %p -> %p)\n", \ |
| 1035 | pfreg, regstart[pfreg], regend[pfreg]); \ | 1025 | pfreg, regstart[pfreg], regend[pfreg]); \ |
| 1036 | } \ | 1026 | } \ |
| 1037 | } while (0) | 1027 | } while (false) |
| 1038 | 1028 | ||
| 1039 | /* Check that we are not stuck in an infinite loop. */ | 1029 | /* Check that we are not stuck in an infinite loop. */ |
| 1040 | #define CHECK_INFINITE_LOOP(pat_cur, string_place) \ | 1030 | #define CHECK_INFINITE_LOOP(pat_cur, string_place) \ |
| @@ -1056,23 +1046,20 @@ do { \ | |||
| 1056 | failure = NEXT_FAILURE_HANDLE(failure); \ | 1046 | failure = NEXT_FAILURE_HANDLE(failure); \ |
| 1057 | } \ | 1047 | } \ |
| 1058 | DEBUG_PRINT (" Other string: %p\n", FAILURE_STR (failure)); \ | 1048 | DEBUG_PRINT (" Other string: %p\n", FAILURE_STR (failure)); \ |
| 1059 | } while (0) | 1049 | } while (false) |
| 1060 | 1050 | ||
| 1061 | /* Push the information about the state we will need | 1051 | /* Push the information about the state we will need |
| 1062 | if we ever fail back to it. | 1052 | if we ever fail back to it. |
| 1063 | 1053 | ||
| 1064 | Requires variables fail_stack, regstart, regend and | 1054 | Requires variables fail_stack, regstart, regend and |
| 1065 | num_regs be declared. GROW_FAIL_STACK requires `destination' be | 1055 | num_regs be declared. GROW_FAIL_STACK requires 'destination' be |
| 1066 | declared. | 1056 | declared. |
| 1067 | 1057 | ||
| 1068 | Does `return FAILURE_CODE' if runs out of memory. */ | 1058 | Does 'return FAILURE_CODE' if runs out of memory. */ |
| 1069 | 1059 | ||
| 1070 | #define PUSH_FAILURE_POINT(pattern, string_place) \ | 1060 | #define PUSH_FAILURE_POINT(pattern, string_place) \ |
| 1071 | do { \ | 1061 | do { \ |
| 1072 | char *destination; \ | 1062 | char *destination; \ |
| 1073 | /* Must be int, so when we don't save any registers, the arithmetic \ | ||
| 1074 | of 0 + -1 isn't done as unsigned. */ \ | ||
| 1075 | \ | ||
| 1076 | DEBUG_STATEMENT (nfailure_points_pushed++); \ | 1063 | DEBUG_STATEMENT (nfailure_points_pushed++); \ |
| 1077 | DEBUG_PRINT ("\nPUSH_FAILURE_POINT:\n"); \ | 1064 | DEBUG_PRINT ("\nPUSH_FAILURE_POINT:\n"); \ |
| 1078 | DEBUG_PRINT (" Before push, next avail: %zu\n", (fail_stack).avail); \ | 1065 | DEBUG_PRINT (" Before push, next avail: %zu\n", (fail_stack).avail); \ |
| @@ -1096,7 +1083,7 @@ do { \ | |||
| 1096 | \ | 1083 | \ |
| 1097 | /* Close the frame by moving the frame pointer past it. */ \ | 1084 | /* Close the frame by moving the frame pointer past it. */ \ |
| 1098 | fail_stack.frame = fail_stack.avail; \ | 1085 | fail_stack.frame = fail_stack.avail; \ |
| 1099 | } while (0) | 1086 | } while (false) |
| 1100 | 1087 | ||
| 1101 | /* Estimate the size of data pushed by a typical failure stack entry. | 1088 | /* Estimate the size of data pushed by a typical failure stack entry. |
| 1102 | An estimate is all we need, because all we use this for | 1089 | An estimate is all we need, because all we use this for |
| @@ -1108,15 +1095,15 @@ do { \ | |||
| 1108 | #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail) | 1095 | #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail) |
| 1109 | 1096 | ||
| 1110 | 1097 | ||
| 1111 | /* Pops what PUSH_FAIL_STACK pushes. | 1098 | /* Pop what PUSH_FAIL_STACK pushes. |
| 1112 | 1099 | ||
| 1113 | We restore into the parameters, all of which should be lvalues: | 1100 | Restore into the parameters, all of which should be lvalues: |
| 1114 | STR -- the saved data position. | 1101 | STR -- the saved data position. |
| 1115 | PAT -- the saved pattern position. | 1102 | PAT -- the saved pattern position. |
| 1116 | REGSTART, REGEND -- arrays of string positions. | 1103 | REGSTART, REGEND -- arrays of string positions. |
| 1117 | 1104 | ||
| 1118 | Also assumes the variables `fail_stack' and (if debugging), `bufp', | 1105 | Also assume the variables FAIL_STACK and (if debugging) BUFP, PEND, |
| 1119 | `pend', `string1', `size1', `string2', and `size2'. */ | 1106 | STRING1, SIZE1, STRING2, and SIZE2. */ |
| 1120 | 1107 | ||
| 1121 | #define POP_FAILURE_POINT(str, pat) \ | 1108 | #define POP_FAILURE_POINT(str, pat) \ |
| 1122 | do { \ | 1109 | do { \ |
| @@ -1150,7 +1137,7 @@ do { \ | |||
| 1150 | eassert (fail_stack.frame <= fail_stack.avail); \ | 1137 | eassert (fail_stack.frame <= fail_stack.avail); \ |
| 1151 | \ | 1138 | \ |
| 1152 | DEBUG_STATEMENT (nfailure_points_popped++); \ | 1139 | DEBUG_STATEMENT (nfailure_points_popped++); \ |
| 1153 | } while (0) /* POP_FAILURE_POINT */ | 1140 | } while (false) /* POP_FAILURE_POINT */ |
| 1154 | 1141 | ||
| 1155 | 1142 | ||
| 1156 | 1143 | ||
| @@ -1183,28 +1170,28 @@ static int analyze_first (re_char *p, re_char *pend, | |||
| 1183 | if (p == pend) return REG_EEND; \ | 1170 | if (p == pend) return REG_EEND; \ |
| 1184 | c = RE_STRING_CHAR_AND_LENGTH (p, len, multibyte); \ | 1171 | c = RE_STRING_CHAR_AND_LENGTH (p, len, multibyte); \ |
| 1185 | p += len; \ | 1172 | p += len; \ |
| 1186 | } while (0) | 1173 | } while (false) |
| 1187 | 1174 | ||
| 1188 | 1175 | ||
| 1189 | #define RE_TRANSLATE(TBL, C) char_table_translate (TBL, C) | 1176 | #define RE_TRANSLATE(TBL, C) char_table_translate (TBL, C) |
| 1190 | #define TRANSLATE(d) (!NILP (translate) ? RE_TRANSLATE (translate, d) : (d)) | 1177 | #define TRANSLATE(d) (!NILP (translate) ? RE_TRANSLATE (translate, d) : (d)) |
| 1191 | 1178 | ||
| 1192 | /* Macros for outputting the compiled pattern into `buffer'. */ | 1179 | /* Macros for outputting the compiled pattern into 'buffer'. */ |
| 1193 | 1180 | ||
| 1194 | /* If the buffer isn't allocated when it comes in, use this. */ | 1181 | /* If the buffer isn't allocated when it comes in, use this. */ |
| 1195 | #define INIT_BUF_SIZE 32 | 1182 | #define INIT_BUF_SIZE 32 |
| 1196 | 1183 | ||
| 1197 | /* Make sure we have at least N more bytes of space in buffer. */ | 1184 | /* Ensure at least N more bytes of space in buffer. */ |
| 1198 | #define GET_BUFFER_SPACE(n) \ | 1185 | #define GET_BUFFER_SPACE(n) \ |
| 1199 | while ((size_t) (b - bufp->buffer + (n)) > bufp->allocated) \ | 1186 | while ((size_t) (b - bufp->buffer + (n)) > bufp->allocated) \ |
| 1200 | EXTEND_BUFFER () | 1187 | EXTEND_BUFFER () |
| 1201 | 1188 | ||
| 1202 | /* Make sure we have one more byte of buffer space and then add C to it. */ | 1189 | /* Ensure one more byte of buffer space and then add C to it. */ |
| 1203 | #define BUF_PUSH(c) \ | 1190 | #define BUF_PUSH(c) \ |
| 1204 | do { \ | 1191 | do { \ |
| 1205 | GET_BUFFER_SPACE (1); \ | 1192 | GET_BUFFER_SPACE (1); \ |
| 1206 | *b++ = (unsigned char) (c); \ | 1193 | *b++ = (unsigned char) (c); \ |
| 1207 | } while (0) | 1194 | } while (false) |
| 1208 | 1195 | ||
| 1209 | 1196 | ||
| 1210 | /* Ensure we have two more bytes of buffer space and then append C1 and C2. */ | 1197 | /* Ensure we have two more bytes of buffer space and then append C1 and C2. */ |
| @@ -1213,10 +1200,10 @@ static int analyze_first (re_char *p, re_char *pend, | |||
| 1213 | GET_BUFFER_SPACE (2); \ | 1200 | GET_BUFFER_SPACE (2); \ |
| 1214 | *b++ = (unsigned char) (c1); \ | 1201 | *b++ = (unsigned char) (c1); \ |
| 1215 | *b++ = (unsigned char) (c2); \ | 1202 | *b++ = (unsigned char) (c2); \ |
| 1216 | } while (0) | 1203 | } while (false) |
| 1217 | 1204 | ||
| 1218 | 1205 | ||
| 1219 | /* Store a jump with opcode OP at LOC to location TO. We store a | 1206 | /* Store a jump with opcode OP at LOC to location TO. Store a |
| 1220 | relative address offset by the three bytes the jump itself occupies. */ | 1207 | relative address offset by the three bytes the jump itself occupies. */ |
| 1221 | #define STORE_JUMP(op, loc, to) \ | 1208 | #define STORE_JUMP(op, loc, to) \ |
| 1222 | store_op1 (op, loc, (to) - (loc) - 3) | 1209 | store_op1 (op, loc, (to) - (loc) - 3) |
| @@ -1225,11 +1212,11 @@ static int analyze_first (re_char *p, re_char *pend, | |||
| 1225 | #define STORE_JUMP2(op, loc, to, arg) \ | 1212 | #define STORE_JUMP2(op, loc, to, arg) \ |
| 1226 | store_op2 (op, loc, (to) - (loc) - 3, arg) | 1213 | store_op2 (op, loc, (to) - (loc) - 3, arg) |
| 1227 | 1214 | ||
| 1228 | /* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */ | 1215 | /* Like 'STORE_JUMP', but for inserting. Assume B is the buffer end. */ |
| 1229 | #define INSERT_JUMP(op, loc, to) \ | 1216 | #define INSERT_JUMP(op, loc, to) \ |
| 1230 | insert_op1 (op, loc, (to) - (loc) - 3, b) | 1217 | insert_op1 (op, loc, (to) - (loc) - 3, b) |
| 1231 | 1218 | ||
| 1232 | /* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */ | 1219 | /* Like 'STORE_JUMP2', but for inserting. Assume B is the buffer end. */ |
| 1233 | #define INSERT_JUMP2(op, loc, to, arg) \ | 1220 | #define INSERT_JUMP2(op, loc, to, arg) \ |
| 1234 | insert_op2 (op, loc, (to) - (loc) - 3, arg, b) | 1221 | insert_op2 (op, loc, (to) - (loc) - 3, arg, b) |
| 1235 | 1222 | ||
| @@ -1237,7 +1224,7 @@ static int analyze_first (re_char *p, re_char *pend, | |||
| 1237 | /* This is not an arbitrary limit: the arguments which represent offsets | 1224 | /* This is not an arbitrary limit: the arguments which represent offsets |
| 1238 | into the pattern are two bytes long. So if 2^15 bytes turns out to | 1225 | into the pattern are two bytes long. So if 2^15 bytes turns out to |
| 1239 | be too small, many things would have to change. */ | 1226 | be too small, many things would have to change. */ |
| 1240 | # define MAX_BUF_SIZE (1L << 15) | 1227 | # define MAX_BUF_SIZE (1 << 15) |
| 1241 | 1228 | ||
| 1242 | /* Extend the buffer by twice its current size via realloc and | 1229 | /* Extend the buffer by twice its current size via realloc and |
| 1243 | reset the pointers that pointed into the old block to point to the | 1230 | reset the pointers that pointed into the old block to point to the |
| @@ -1267,7 +1254,7 @@ static int analyze_first (re_char *p, re_char *pend, | |||
| 1267 | if (fixup_alt_jump_set) fixup_alt_jump = new_buffer + fixup_alt_jump_off; \ | 1254 | if (fixup_alt_jump_set) fixup_alt_jump = new_buffer + fixup_alt_jump_off; \ |
| 1268 | if (laststart_set) laststart = new_buffer + laststart_off; \ | 1255 | if (laststart_set) laststart = new_buffer + laststart_off; \ |
| 1269 | if (pending_exact_set) pending_exact = new_buffer + pending_exact_off; \ | 1256 | if (pending_exact_set) pending_exact = new_buffer + pending_exact_off; \ |
| 1270 | } while (0) | 1257 | } while (false) |
| 1271 | 1258 | ||
| 1272 | 1259 | ||
| 1273 | /* Since we have one byte reserved for the register number argument to | 1260 | /* Since we have one byte reserved for the register number argument to |
| @@ -1275,7 +1262,7 @@ static int analyze_first (re_char *p, re_char *pend, | |||
| 1275 | things about is what fits in that byte. */ | 1262 | things about is what fits in that byte. */ |
| 1276 | #define MAX_REGNUM 255 | 1263 | #define MAX_REGNUM 255 |
| 1277 | 1264 | ||
| 1278 | /* But patterns can have more than `MAX_REGNUM' registers. We just | 1265 | /* But patterns can have more than 'MAX_REGNUM' registers. Just |
| 1279 | ignore the excess. */ | 1266 | ignore the excess. */ |
| 1280 | typedef int regnum_t; | 1267 | typedef int regnum_t; |
| 1281 | 1268 | ||
| @@ -1284,7 +1271,6 @@ typedef int regnum_t; | |||
| 1284 | 1271 | ||
| 1285 | /* Since offsets can go either forwards or backwards, this type needs to | 1272 | /* Since offsets can go either forwards or backwards, this type needs to |
| 1286 | be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */ | 1273 | be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */ |
| 1287 | /* int may be not enough when sizeof(int) == 2. */ | ||
| 1288 | typedef long pattern_offset_t; | 1274 | typedef long pattern_offset_t; |
| 1289 | 1275 | ||
| 1290 | typedef struct | 1276 | typedef struct |
| @@ -1334,7 +1320,7 @@ struct range_table_work_area | |||
| 1334 | if ((work_area).table == 0) \ | 1320 | if ((work_area).table == 0) \ |
| 1335 | return (REG_ESPACE); \ | 1321 | return (REG_ESPACE); \ |
| 1336 | } \ | 1322 | } \ |
| 1337 | } while (0) | 1323 | } while (false) |
| 1338 | 1324 | ||
| 1339 | #define SET_RANGE_TABLE_WORK_AREA_BIT(work_area, bit) \ | 1325 | #define SET_RANGE_TABLE_WORK_AREA_BIT(work_area, bit) \ |
| 1340 | (work_area).bits |= (bit) | 1326 | (work_area).bits |= (bit) |
| @@ -1345,16 +1331,17 @@ struct range_table_work_area | |||
| 1345 | EXTEND_RANGE_TABLE ((work_area), 2); \ | 1331 | EXTEND_RANGE_TABLE ((work_area), 2); \ |
| 1346 | (work_area).table[(work_area).used++] = (range_start); \ | 1332 | (work_area).table[(work_area).used++] = (range_start); \ |
| 1347 | (work_area).table[(work_area).used++] = (range_end); \ | 1333 | (work_area).table[(work_area).used++] = (range_end); \ |
| 1348 | } while (0) | 1334 | } while (false) |
| 1349 | 1335 | ||
| 1350 | /* Free allocated memory for WORK_AREA. */ | 1336 | /* Free allocated memory for WORK_AREA. */ |
| 1351 | #define FREE_RANGE_TABLE_WORK_AREA(work_area) \ | 1337 | #define FREE_RANGE_TABLE_WORK_AREA(work_area) \ |
| 1352 | do { \ | 1338 | do { \ |
| 1353 | if ((work_area).table) \ | 1339 | if ((work_area).table) \ |
| 1354 | xfree ((work_area).table); \ | 1340 | xfree ((work_area).table); \ |
| 1355 | } while (0) | 1341 | } while (false) |
| 1356 | 1342 | ||
| 1357 | #define CLEAR_RANGE_TABLE_WORK_USED(work_area) ((work_area).used = 0, (work_area).bits = 0) | 1343 | #define CLEAR_RANGE_TABLE_WORK_USED(work_area) \ |
| 1344 | ((work_area).used = 0, (work_area).bits = 0) | ||
| 1358 | #define RANGE_TABLE_WORK_USED(work_area) ((work_area).used) | 1345 | #define RANGE_TABLE_WORK_USED(work_area) ((work_area).used) |
| 1359 | #define RANGE_TABLE_WORK_BITS(work_area) ((work_area).bits) | 1346 | #define RANGE_TABLE_WORK_BITS(work_area) ((work_area).bits) |
| 1360 | #define RANGE_TABLE_WORK_ELT(work_area, i) ((work_area).table[i]) | 1347 | #define RANGE_TABLE_WORK_ELT(work_area, i) ((work_area).table[i]) |
| @@ -1405,7 +1392,7 @@ struct range_table_work_area | |||
| 1405 | } \ | 1392 | } \ |
| 1406 | SET_LIST_BIT (C1); \ | 1393 | SET_LIST_BIT (C1); \ |
| 1407 | } \ | 1394 | } \ |
| 1408 | } while (0) | 1395 | } while (false) |
| 1409 | 1396 | ||
| 1410 | 1397 | ||
| 1411 | /* Both FROM and TO are unibyte characters (0x80..0xFF). */ | 1398 | /* Both FROM and TO are unibyte characters (0x80..0xFF). */ |
| @@ -1445,7 +1432,7 @@ struct range_table_work_area | |||
| 1445 | SET_RANGE_TABLE_WORK_AREA ((work_area), C2, C2); \ | 1432 | SET_RANGE_TABLE_WORK_AREA ((work_area), C2, C2); \ |
| 1446 | } \ | 1433 | } \ |
| 1447 | } \ | 1434 | } \ |
| 1448 | } while (0) | 1435 | } while (false) |
| 1449 | 1436 | ||
| 1450 | 1437 | ||
| 1451 | /* Both FROM and TO are multibyte characters. */ | 1438 | /* Both FROM and TO are multibyte characters. */ |
| @@ -1480,7 +1467,7 @@ struct range_table_work_area | |||
| 1480 | if (I < USED) \ | 1467 | if (I < USED) \ |
| 1481 | SET_RANGE_TABLE_WORK_AREA ((work_area), C1, C1); \ | 1468 | SET_RANGE_TABLE_WORK_AREA ((work_area), C1, C1); \ |
| 1482 | } \ | 1469 | } \ |
| 1483 | } while (0) | 1470 | } while (false) |
| 1484 | 1471 | ||
| 1485 | /* Get the next unsigned number in the uncompiled pattern. */ | 1472 | /* Get the next unsigned number in the uncompiled pattern. */ |
| 1486 | #define GET_INTERVAL_COUNT(num) \ | 1473 | #define GET_INTERVAL_COUNT(num) \ |
| @@ -1502,7 +1489,7 @@ struct range_table_work_area | |||
| 1502 | PATFETCH (c); \ | 1489 | PATFETCH (c); \ |
| 1503 | } \ | 1490 | } \ |
| 1504 | } \ | 1491 | } \ |
| 1505 | } while (0) | 1492 | } while (false) |
| 1506 | 1493 | ||
| 1507 | /* Parse a character class, i.e. string such as "[:name:]". *strp | 1494 | /* Parse a character class, i.e. string such as "[:name:]". *strp |
| 1508 | points to the string to be parsed and limit is length, in bytes, of | 1495 | points to the string to be parsed and limit is length, in bytes, of |
| @@ -1662,34 +1649,17 @@ extend_range_table_work_area (struct range_table_work_area *work_area) | |||
| 1662 | work_area->table = xrealloc (work_area->table, work_area->allocated); | 1649 | work_area->table = xrealloc (work_area->table, work_area->allocated); |
| 1663 | } | 1650 | } |
| 1664 | 1651 | ||
| 1665 | static bool group_in_compile_stack (compile_stack_type, regnum_t); | 1652 | /* regex_compile and helpers. */ |
| 1666 | |||
| 1667 | /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. | ||
| 1668 | Returns one of error codes defined in `regex-emacs.h', or zero for success. | ||
| 1669 | |||
| 1670 | If WHITESPACE_REGEXP is given, it is used instead of a space | ||
| 1671 | character in PATTERN. | ||
| 1672 | |||
| 1673 | Assumes the `allocated' (and perhaps `buffer') and `translate' | ||
| 1674 | fields are set in BUFP on entry. | ||
| 1675 | |||
| 1676 | If it succeeds, results are put in BUFP (if it returns an error, the | ||
| 1677 | contents of BUFP are undefined): | ||
| 1678 | `buffer' is the compiled pattern; | ||
| 1679 | `syntax' is set to SYNTAX; | ||
| 1680 | `used' is set to the length of the compiled pattern; | ||
| 1681 | `fastmap_accurate' is zero; | ||
| 1682 | `re_nsub' is the number of subexpressions in PATTERN; | ||
| 1683 | 1653 | ||
| 1684 | The `fastmap' field is neither examined nor set. */ | 1654 | static bool group_in_compile_stack (compile_stack_type, regnum_t); |
| 1685 | 1655 | ||
| 1686 | /* Insert the `jump' from the end of last alternative to "here". | 1656 | /* Insert the 'jump' from the end of last alternative to "here". |
| 1687 | The space for the jump has already been allocated. */ | 1657 | The space for the jump has already been allocated. */ |
| 1688 | #define FIXUP_ALT_JUMP() \ | 1658 | #define FIXUP_ALT_JUMP() \ |
| 1689 | do { \ | 1659 | do { \ |
| 1690 | if (fixup_alt_jump) \ | 1660 | if (fixup_alt_jump) \ |
| 1691 | STORE_JUMP (jump, fixup_alt_jump, b); \ | 1661 | STORE_JUMP (jump, fixup_alt_jump, b); \ |
| 1692 | } while (0) | 1662 | } while (false) |
| 1693 | 1663 | ||
| 1694 | 1664 | ||
| 1695 | /* Return, freeing storage we allocated. */ | 1665 | /* Return, freeing storage we allocated. */ |
| @@ -1698,7 +1668,26 @@ do { \ | |||
| 1698 | FREE_RANGE_TABLE_WORK_AREA (range_table_work); \ | 1668 | FREE_RANGE_TABLE_WORK_AREA (range_table_work); \ |
| 1699 | xfree (compile_stack.stack); \ | 1669 | xfree (compile_stack.stack); \ |
| 1700 | return value; \ | 1670 | return value; \ |
| 1701 | } while (0) | 1671 | } while (false) |
| 1672 | |||
| 1673 | /* Compile PATTERN (of length SIZE) according to SYNTAX. | ||
| 1674 | Return a nonzero error code on failure, or zero for success. | ||
| 1675 | |||
| 1676 | If WHITESPACE_REGEXP is given, use it instead of a space | ||
| 1677 | character in PATTERN. | ||
| 1678 | |||
| 1679 | Assume the 'allocated' (and perhaps 'buffer') and 'translate' | ||
| 1680 | fields are set in BUFP on entry. | ||
| 1681 | |||
| 1682 | If successful, put results in *BUFP (otherwise the | ||
| 1683 | contents of *BUFP are undefined): | ||
| 1684 | 'buffer' is the compiled pattern; | ||
| 1685 | 'syntax' is set to SYNTAX; | ||
| 1686 | 'used' is set to the length of the compiled pattern; | ||
| 1687 | 'fastmap_accurate' is zero; | ||
| 1688 | 're_nsub' is the number of subexpressions in PATTERN; | ||
| 1689 | |||
| 1690 | The 'fastmap' field is neither examined nor set. */ | ||
| 1702 | 1691 | ||
| 1703 | static reg_errcode_t | 1692 | static reg_errcode_t |
| 1704 | regex_compile (re_char *pattern, size_t size, | 1693 | regex_compile (re_char *pattern, size_t size, |
| @@ -1706,7 +1695,7 @@ regex_compile (re_char *pattern, size_t size, | |||
| 1706 | const char *whitespace_regexp, | 1695 | const char *whitespace_regexp, |
| 1707 | struct re_pattern_buffer *bufp) | 1696 | struct re_pattern_buffer *bufp) |
| 1708 | { | 1697 | { |
| 1709 | /* We fetch characters from PATTERN here. */ | 1698 | /* Fetch characters from PATTERN here. */ |
| 1710 | int c, c1; | 1699 | int c, c1; |
| 1711 | 1700 | ||
| 1712 | /* Points to the end of the buffer, where we should append. */ | 1701 | /* Points to the end of the buffer, where we should append. */ |
| @@ -1722,10 +1711,10 @@ regex_compile (re_char *pattern, size_t size, | |||
| 1722 | /* How to translate the characters in the pattern. */ | 1711 | /* How to translate the characters in the pattern. */ |
| 1723 | Lisp_Object translate = bufp->translate; | 1712 | Lisp_Object translate = bufp->translate; |
| 1724 | 1713 | ||
| 1725 | /* Address of the count-byte of the most recently inserted `exactn' | 1714 | /* Address of the count-byte of the most recently inserted 'exactn' |
| 1726 | command. This makes it possible to tell if a new exact-match | 1715 | command. This makes it possible to tell if a new exact-match |
| 1727 | character can be added to that command or if the character requires | 1716 | character can be added to that command or if the character requires |
| 1728 | a new `exactn' command. */ | 1717 | a new 'exactn' command. */ |
| 1729 | unsigned char *pending_exact = 0; | 1718 | unsigned char *pending_exact = 0; |
| 1730 | 1719 | ||
| 1731 | /* Address of start of the most recently finished expression. | 1720 | /* Address of start of the most recently finished expression. |
| @@ -1741,7 +1730,7 @@ regex_compile (re_char *pattern, size_t size, | |||
| 1741 | re_char *beg_interval; | 1730 | re_char *beg_interval; |
| 1742 | 1731 | ||
| 1743 | /* Address of the place where a forward jump should go to the end of | 1732 | /* Address of the place where a forward jump should go to the end of |
| 1744 | the containing expression. Each alternative of an `or' -- except the | 1733 | the containing expression. Each alternative of an 'or' -- except the |
| 1745 | last -- ends with a forward jump of this sort. */ | 1734 | last -- ends with a forward jump of this sort. */ |
| 1746 | unsigned char *fixup_alt_jump = 0; | 1735 | unsigned char *fixup_alt_jump = 0; |
| 1747 | 1736 | ||
| @@ -1785,7 +1774,7 @@ regex_compile (re_char *pattern, size_t size, | |||
| 1785 | bufp->fastmap_accurate = 0; | 1774 | bufp->fastmap_accurate = 0; |
| 1786 | bufp->used_syntax = 0; | 1775 | bufp->used_syntax = 0; |
| 1787 | 1776 | ||
| 1788 | /* Set `used' to zero, so that if we return an error, the pattern | 1777 | /* Set 'used' to zero, so that if we return an error, the pattern |
| 1789 | printer (for debugging) will think there's no pattern. We reset it | 1778 | printer (for debugging) will think there's no pattern. We reset it |
| 1790 | at the end. */ | 1779 | at the end. */ |
| 1791 | bufp->used = 0; | 1780 | bufp->used = 0; |
| @@ -1892,8 +1881,8 @@ regex_compile (re_char *pattern, size_t size, | |||
| 1892 | 1881 | ||
| 1893 | /* If there is a sequence of repetition chars, collapse it | 1882 | /* If there is a sequence of repetition chars, collapse it |
| 1894 | down to just one (the right one). We can't combine | 1883 | down to just one (the right one). We can't combine |
| 1895 | interval operators with these because of, e.g., `a{2}*', | 1884 | interval operators with these because of, e.g., 'a{2}*', |
| 1896 | which should only match an even number of `a's. */ | 1885 | which should only match an even number of 'a's. */ |
| 1897 | 1886 | ||
| 1898 | for (;;) | 1887 | for (;;) |
| 1899 | { | 1888 | { |
| @@ -2025,8 +2014,8 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2025 | 2014 | ||
| 2026 | laststart = b; | 2015 | laststart = b; |
| 2027 | 2016 | ||
| 2028 | /* We test `*p == '^' twice, instead of using an if | 2017 | /* Test '*p == '^' twice, instead of using an if |
| 2029 | statement, so we only need one BUF_PUSH. */ | 2018 | statement, so we need only one BUF_PUSH. */ |
| 2030 | BUF_PUSH (*p == '^' ? charset_not : charset); | 2019 | BUF_PUSH (*p == '^' ? charset_not : charset); |
| 2031 | if (*p == '^') | 2020 | if (*p == '^') |
| 2032 | p++; | 2021 | p++; |
| @@ -2104,7 +2093,7 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2104 | PATFETCH (c); | 2093 | PATFETCH (c); |
| 2105 | 2094 | ||
| 2106 | /* Could be the end of the bracket expression. If it's | 2095 | /* Could be the end of the bracket expression. If it's |
| 2107 | not (i.e., when the bracket expression is `[]' so | 2096 | not (i.e., when the bracket expression is '[]' so |
| 2108 | far), the ']' character bit gets set way below. */ | 2097 | far), the ']' character bit gets set way below. */ |
| 2109 | if (c == ']' && p2 != p1) | 2098 | if (c == ']' && p2 != p1) |
| 2110 | break; | 2099 | break; |
| @@ -2112,7 +2101,7 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2112 | if (p < pend && p[0] == '-' && p[1] != ']') | 2101 | if (p < pend && p[0] == '-' && p[1] != ']') |
| 2113 | { | 2102 | { |
| 2114 | 2103 | ||
| 2115 | /* Discard the `-'. */ | 2104 | /* Discard the '-'. */ |
| 2116 | PATFETCH (c1); | 2105 | PATFETCH (c1); |
| 2117 | 2106 | ||
| 2118 | /* Fetch the character which ends the range. */ | 2107 | /* Fetch the character which ends the range. */ |
| @@ -2294,12 +2283,12 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2294 | FREE_STACK_RETURN (REG_ERPAREN); | 2283 | FREE_STACK_RETURN (REG_ERPAREN); |
| 2295 | 2284 | ||
| 2296 | /* Since we just checked for an empty stack above, this | 2285 | /* Since we just checked for an empty stack above, this |
| 2297 | ``can't happen''. */ | 2286 | "can't happen". */ |
| 2298 | eassert (compile_stack.avail != 0); | 2287 | eassert (compile_stack.avail != 0); |
| 2299 | { | 2288 | { |
| 2300 | /* We don't just want to restore into `regnum', because | 2289 | /* We don't just want to restore into 'regnum', because |
| 2301 | later groups should continue to be numbered higher, | 2290 | later groups should continue to be numbered higher, |
| 2302 | as in `(ab)c(de)' -- the second group is #2. */ | 2291 | as in '(ab)c(de)' -- the second group is #2. */ |
| 2303 | regnum_t regnum; | 2292 | regnum_t regnum; |
| 2304 | 2293 | ||
| 2305 | compile_stack.avail--; | 2294 | compile_stack.avail--; |
| @@ -2323,7 +2312,7 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2323 | break; | 2312 | break; |
| 2324 | 2313 | ||
| 2325 | 2314 | ||
| 2326 | case '|': /* `\|'. */ | 2315 | case '|': /* '\|'. */ |
| 2327 | /* Insert before the previous alternative a jump which | 2316 | /* Insert before the previous alternative a jump which |
| 2328 | jumps to this alternative if the former fails. */ | 2317 | jumps to this alternative if the former fails. */ |
| 2329 | GET_BUFFER_SPACE (3); | 2318 | GET_BUFFER_SPACE (3); |
| @@ -2340,12 +2329,12 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2340 | _____ _____ | 2329 | _____ _____ |
| 2341 | | | | | | 2330 | | | | | |
| 2342 | | v | v | 2331 | | v | v |
| 2343 | a | b | c | 2332 | A | B | C |
| 2344 | 2333 | ||
| 2345 | If we are at `b', then fixup_alt_jump right now points to a | 2334 | If we are at B, then fixup_alt_jump right now points to a |
| 2346 | three-byte space after `a'. We'll put in the jump, set | 2335 | three-byte space after A. We'll put in the jump, set |
| 2347 | fixup_alt_jump to right after `b', and leave behind three | 2336 | fixup_alt_jump to right after B, and leave behind three |
| 2348 | bytes which we'll fill in when we get to after `c'. */ | 2337 | bytes which we'll fill in when we get to after C. */ |
| 2349 | 2338 | ||
| 2350 | FIXUP_ALT_JUMP (); | 2339 | FIXUP_ALT_JUMP (); |
| 2351 | 2340 | ||
| @@ -2373,7 +2362,7 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2373 | if (c == ',') | 2362 | if (c == ',') |
| 2374 | GET_INTERVAL_COUNT (upper_bound); | 2363 | GET_INTERVAL_COUNT (upper_bound); |
| 2375 | else | 2364 | else |
| 2376 | /* Interval such as `{1}' => match exactly once. */ | 2365 | /* Interval such as '{1}' => match exactly once. */ |
| 2377 | upper_bound = lower_bound; | 2366 | upper_bound = lower_bound; |
| 2378 | 2367 | ||
| 2379 | if (lower_bound < 0 | 2368 | if (lower_bound < 0 |
| @@ -2406,8 +2395,8 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2406 | succeed_n <after jump addr> <succeed_n count> | 2395 | succeed_n <after jump addr> <succeed_n count> |
| 2407 | <body of loop> | 2396 | <body of loop> |
| 2408 | jump_n <succeed_n addr> <jump count> | 2397 | jump_n <succeed_n addr> <jump count> |
| 2409 | (The upper bound and `jump_n' are omitted if | 2398 | (The upper bound and 'jump_n' are omitted if |
| 2410 | `upper_bound' is 1, though.) */ | 2399 | 'upper_bound' is 1, though.) */ |
| 2411 | else | 2400 | else |
| 2412 | { /* If the upper bound is > 1, we need to insert | 2401 | { /* If the upper bound is > 1, we need to insert |
| 2413 | more at the end of the loop. */ | 2402 | more at the end of the loop. */ |
| @@ -2427,21 +2416,22 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2427 | } | 2416 | } |
| 2428 | else | 2417 | else |
| 2429 | { | 2418 | { |
| 2430 | /* Initialize lower bound of the `succeed_n', even | 2419 | /* Initialize lower bound of the 'succeed_n', even |
| 2431 | though it will be set during matching by its | 2420 | though it will be set during matching by its |
| 2432 | attendant `set_number_at' (inserted next), | 2421 | attendant 'set_number_at' (inserted next), |
| 2433 | because `re_compile_fastmap' needs to know. | 2422 | because 're_compile_fastmap' needs to know. |
| 2434 | Jump to the `jump_n' we might insert below. */ | 2423 | Jump to the 'jump_n' we might insert below. */ |
| 2435 | INSERT_JUMP2 (succeed_n, laststart, | 2424 | INSERT_JUMP2 (succeed_n, laststart, |
| 2436 | b + 5 + nbytes, | 2425 | b + 5 + nbytes, |
| 2437 | lower_bound); | 2426 | lower_bound); |
| 2438 | b += 5; | 2427 | b += 5; |
| 2439 | 2428 | ||
| 2440 | /* Code to initialize the lower bound. Insert | 2429 | /* Code to initialize the lower bound. Insert |
| 2441 | before the `succeed_n'. The `5' is the last two | 2430 | before the 'succeed_n'. The '5' is the last two |
| 2442 | bytes of this `set_number_at', plus 3 bytes of | 2431 | bytes of this 'set_number_at', plus 3 bytes of |
| 2443 | the following `succeed_n'. */ | 2432 | the following 'succeed_n'. */ |
| 2444 | insert_op2 (set_number_at, laststart, 5, lower_bound, b); | 2433 | insert_op2 (set_number_at, laststart, 5, |
| 2434 | lower_bound, b); | ||
| 2445 | b += 5; | 2435 | b += 5; |
| 2446 | startoffset += 5; | 2436 | startoffset += 5; |
| 2447 | } | 2437 | } |
| @@ -2455,28 +2445,28 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2455 | } | 2445 | } |
| 2456 | else if (upper_bound > 1) | 2446 | else if (upper_bound > 1) |
| 2457 | { /* More than one repetition is allowed, so | 2447 | { /* More than one repetition is allowed, so |
| 2458 | append a backward jump to the `succeed_n' | 2448 | append a backward jump to the 'succeed_n' |
| 2459 | that starts this interval. | 2449 | that starts this interval. |
| 2460 | 2450 | ||
| 2461 | When we've reached this during matching, | 2451 | When we've reached this during matching, |
| 2462 | we'll have matched the interval once, so | 2452 | we'll have matched the interval once, so |
| 2463 | jump back only `upper_bound - 1' times. */ | 2453 | jump back only 'upper_bound - 1' times. */ |
| 2464 | STORE_JUMP2 (jump_n, b, laststart + startoffset, | 2454 | STORE_JUMP2 (jump_n, b, laststart + startoffset, |
| 2465 | upper_bound - 1); | 2455 | upper_bound - 1); |
| 2466 | b += 5; | 2456 | b += 5; |
| 2467 | 2457 | ||
| 2468 | /* The location we want to set is the second | 2458 | /* The location we want to set is the second |
| 2469 | parameter of the `jump_n'; that is `b-2' as | 2459 | parameter of the 'jump_n'; that is 'b-2' as |
| 2470 | an absolute address. `laststart' will be | 2460 | an absolute address. 'laststart' will be |
| 2471 | the `set_number_at' we're about to insert; | 2461 | the 'set_number_at' we're about to insert; |
| 2472 | `laststart+3' the number to set, the source | 2462 | 'laststart+3' the number to set, the source |
| 2473 | for the relative address. But we are | 2463 | for the relative address. But we are |
| 2474 | inserting into the middle of the pattern -- | 2464 | inserting into the middle of the pattern -- |
| 2475 | so everything is getting moved up by 5. | 2465 | so everything is getting moved up by 5. |
| 2476 | Conclusion: (b - 2) - (laststart + 3) + 5, | 2466 | Conclusion: (b - 2) - (laststart + 3) + 5, |
| 2477 | i.e., b - laststart. | 2467 | i.e., b - laststart. |
| 2478 | 2468 | ||
| 2479 | We insert this at the beginning of the loop | 2469 | Insert this at the beginning of the loop |
| 2480 | so that if we fail during matching, we'll | 2470 | so that if we fail during matching, we'll |
| 2481 | reinitialize the bounds. */ | 2471 | reinitialize the bounds. */ |
| 2482 | insert_op2 (set_number_at, laststart, b - laststart, | 2472 | insert_op2 (set_number_at, laststart, b - laststart, |
| @@ -2601,7 +2591,7 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2601 | 2591 | ||
| 2602 | 2592 | ||
| 2603 | default: | 2593 | default: |
| 2604 | /* Expects the character in `c'. */ | 2594 | /* Expects the character in C. */ |
| 2605 | normal_char: | 2595 | normal_char: |
| 2606 | /* If no exactn currently being built. */ | 2596 | /* If no exactn currently being built. */ |
| 2607 | if (!pending_exact | 2597 | if (!pending_exact |
| @@ -2609,7 +2599,7 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2609 | /* If last exactn not at current position. */ | 2599 | /* If last exactn not at current position. */ |
| 2610 | || pending_exact + *pending_exact + 1 != b | 2600 | || pending_exact + *pending_exact + 1 != b |
| 2611 | 2601 | ||
| 2612 | /* We have only one byte following the exactn for the count. */ | 2602 | /* Only one byte follows the exactn for the count. */ |
| 2613 | || *pending_exact >= (1 << BYTEWIDTH) - MAX_MULTIBYTE_LENGTH | 2603 | || *pending_exact >= (1 << BYTEWIDTH) - MAX_MULTIBYTE_LENGTH |
| 2614 | 2604 | ||
| 2615 | /* If followed by a repetition operator. */ | 2605 | /* If followed by a repetition operator. */ |
| @@ -2668,7 +2658,7 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2668 | if (!posix_backtracking) | 2658 | if (!posix_backtracking) |
| 2669 | BUF_PUSH (succeed); | 2659 | BUF_PUSH (succeed); |
| 2670 | 2660 | ||
| 2671 | /* We have succeeded; set the length of the buffer. */ | 2661 | /* Success; set the length of the buffer. */ |
| 2672 | bufp->used = b - bufp->buffer; | 2662 | bufp->used = b - bufp->buffer; |
| 2673 | 2663 | ||
| 2674 | #ifdef REGEX_EMACS_DEBUG | 2664 | #ifdef REGEX_EMACS_DEBUG |
| @@ -2685,7 +2675,7 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2685 | 2675 | ||
| 2686 | } /* regex_compile */ | 2676 | } /* regex_compile */ |
| 2687 | 2677 | ||
| 2688 | /* Subroutines for `regex_compile'. */ | 2678 | /* Subroutines for 'regex_compile'. */ |
| 2689 | 2679 | ||
| 2690 | /* Store OP at LOC followed by two-byte integer parameter ARG. */ | 2680 | /* Store OP at LOC followed by two-byte integer parameter ARG. */ |
| 2691 | 2681 | ||
| @@ -2697,7 +2687,7 @@ store_op1 (re_opcode_t op, unsigned char *loc, int arg) | |||
| 2697 | } | 2687 | } |
| 2698 | 2688 | ||
| 2699 | 2689 | ||
| 2700 | /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */ | 2690 | /* Like 'store_op1', but for two two-byte parameters ARG1 and ARG2. */ |
| 2701 | 2691 | ||
| 2702 | static void | 2692 | static void |
| 2703 | store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2) | 2693 | store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2) |
| @@ -2724,10 +2714,11 @@ insert_op1 (re_opcode_t op, unsigned char *loc, int arg, unsigned char *end) | |||
| 2724 | } | 2714 | } |
| 2725 | 2715 | ||
| 2726 | 2716 | ||
| 2727 | /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */ | 2717 | /* Like 'insert_op1', but for two two-byte parameters ARG1 and ARG2. */ |
| 2728 | 2718 | ||
| 2729 | static void | 2719 | static void |
| 2730 | insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, unsigned char *end) | 2720 | insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, |
| 2721 | unsigned char *end) | ||
| 2731 | { | 2722 | { |
| 2732 | register unsigned char *pfrom = end; | 2723 | register unsigned char *pfrom = end; |
| 2733 | register unsigned char *pto = end + 5; | 2724 | register unsigned char *pto = end + 5; |
| @@ -2740,7 +2731,7 @@ insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, unsigned cha | |||
| 2740 | 2731 | ||
| 2741 | 2732 | ||
| 2742 | /* P points to just after a ^ in PATTERN. Return true if that ^ comes | 2733 | /* P points to just after a ^ in PATTERN. Return true if that ^ comes |
| 2743 | after an alternative or a begin-subexpression. We assume there is at | 2734 | after an alternative or a begin-subexpression. Assume there is at |
| 2744 | least one character before the ^. */ | 2735 | least one character before the ^. */ |
| 2745 | 2736 | ||
| 2746 | static bool | 2737 | static bool |
| @@ -2776,8 +2767,8 @@ at_begline_loc_p (re_char *pattern, re_char *p) | |||
| 2776 | } | 2767 | } |
| 2777 | 2768 | ||
| 2778 | 2769 | ||
| 2779 | /* The dual of at_begline_loc_p. This one is for $. We assume there is | 2770 | /* The dual of at_begline_loc_p. This one is for $. Assume there is |
| 2780 | at least one character after the $, i.e., `P < PEND'. */ | 2771 | at least one character after the $, i.e., 'P < PEND'. */ |
| 2781 | 2772 | ||
| 2782 | static bool | 2773 | static bool |
| 2783 | at_endline_loc_p (re_char *p, re_char *pend) | 2774 | at_endline_loc_p (re_char *p, re_char *pend) |
| @@ -2832,22 +2823,22 @@ analyze_first (re_char *p, re_char *pend, char *fastmap, | |||
| 2832 | starts by only containing a pointer to the first operation. | 2823 | starts by only containing a pointer to the first operation. |
| 2833 | - If the opcode we're looking at is a match against some set of | 2824 | - If the opcode we're looking at is a match against some set of |
| 2834 | chars, then we add those chars to the fastmap and go on to the | 2825 | chars, then we add those chars to the fastmap and go on to the |
| 2835 | next work element from the worklist (done via `break'). | 2826 | next work element from the worklist (done via 'break'). |
| 2836 | - If the opcode is a control operator on the other hand, we either | 2827 | - If the opcode is a control operator on the other hand, we either |
| 2837 | ignore it (if it's meaningless at this point, such as `start_memory') | 2828 | ignore it (if it's meaningless at this point, such as 'start_memory') |
| 2838 | or execute it (if it's a jump). If the jump has several destinations | 2829 | or execute it (if it's a jump). If the jump has several destinations |
| 2839 | (i.e. `on_failure_jump'), then we push the other destination onto the | 2830 | (i.e. 'on_failure_jump'), then we push the other destination onto the |
| 2840 | worklist. | 2831 | worklist. |
| 2841 | We guarantee termination by ignoring backward jumps (more or less), | 2832 | We guarantee termination by ignoring backward jumps (more or less), |
| 2842 | so that `p' is monotonically increasing. More to the point, we | 2833 | so that P is monotonically increasing. More to the point, we |
| 2843 | never set `p' (or push) anything `<= p1'. */ | 2834 | never set P (or push) anything '<= p1'. */ |
| 2844 | 2835 | ||
| 2845 | while (p < pend) | 2836 | while (p < pend) |
| 2846 | { | 2837 | { |
| 2847 | /* `p1' is used as a marker of how far back a `on_failure_jump' | 2838 | /* P1 is used as a marker of how far back a 'on_failure_jump' |
| 2848 | can go without being ignored. It is normally equal to `p' | 2839 | can go without being ignored. It is normally equal to P |
| 2849 | (which prevents any backward `on_failure_jump') except right | 2840 | (which prevents any backward 'on_failure_jump') except right |
| 2850 | after a plain `jump', to allow patterns such as: | 2841 | after a plain 'jump', to allow patterns such as: |
| 2851 | 0: jump 10 | 2842 | 0: jump 10 |
| 2852 | 3..9: <body> | 2843 | 3..9: <body> |
| 2853 | 10: on_failure_jump 3 | 2844 | 10: on_failure_jump 3 |
| @@ -2869,7 +2860,7 @@ analyze_first (re_char *p, re_char *pend, char *fastmap, | |||
| 2869 | 2860 | ||
| 2870 | 2861 | ||
| 2871 | /* Following are the cases which match a character. These end | 2862 | /* Following are the cases which match a character. These end |
| 2872 | with `break'. */ | 2863 | with 'break'. */ |
| 2873 | 2864 | ||
| 2874 | case exactn: | 2865 | case exactn: |
| 2875 | if (fastmap) | 2866 | if (fastmap) |
| @@ -2943,7 +2934,7 @@ analyze_first (re_char *p, re_char *pend, char *fastmap, | |||
| 2943 | int c, count; | 2934 | int c, count; |
| 2944 | unsigned char lc1, lc2; | 2935 | unsigned char lc1, lc2; |
| 2945 | 2936 | ||
| 2946 | /* Make P points the range table. `+ 2' is to skip flag | 2937 | /* Make P points the range table. '+ 2' is to skip flag |
| 2947 | bits for a character class. */ | 2938 | bits for a character class. */ |
| 2948 | p += CHARSET_BITMAP_SIZE (&p[-2]) + 2; | 2939 | p += CHARSET_BITMAP_SIZE (&p[-2]) + 2; |
| 2949 | 2940 | ||
| @@ -2991,7 +2982,7 @@ analyze_first (re_char *p, re_char *pend, char *fastmap, | |||
| 2991 | break; | 2982 | break; |
| 2992 | 2983 | ||
| 2993 | /* All cases after this match the empty string. These end with | 2984 | /* All cases after this match the empty string. These end with |
| 2994 | `continue'. */ | 2985 | 'continue'. */ |
| 2995 | 2986 | ||
| 2996 | case at_dot: | 2987 | case at_dot: |
| 2997 | case no_op: | 2988 | case no_op: |
| @@ -3012,7 +3003,7 @@ analyze_first (re_char *p, re_char *pend, char *fastmap, | |||
| 3012 | EXTRACT_NUMBER_AND_INCR (j, p); | 3003 | EXTRACT_NUMBER_AND_INCR (j, p); |
| 3013 | if (j < 0) | 3004 | if (j < 0) |
| 3014 | /* Backward jumps can only go back to code that we've already | 3005 | /* Backward jumps can only go back to code that we've already |
| 3015 | visited. `re_compile' should make sure this is true. */ | 3006 | visited. 're_compile' should make sure this is true. */ |
| 3016 | break; | 3007 | break; |
| 3017 | p += j; | 3008 | p += j; |
| 3018 | switch (*p) | 3009 | switch (*p) |
| @@ -3027,7 +3018,7 @@ analyze_first (re_char *p, re_char *pend, char *fastmap, | |||
| 3027 | default: | 3018 | default: |
| 3028 | continue; | 3019 | continue; |
| 3029 | }; | 3020 | }; |
| 3030 | /* Keep `p1' to allow the `on_failure_jump' we are jumping to | 3021 | /* Keep P1 to allow the 'on_failure_jump' we are jumping to |
| 3031 | to jump back to "just after here". */ | 3022 | to jump back to "just after here". */ |
| 3032 | FALLTHROUGH; | 3023 | FALLTHROUGH; |
| 3033 | case on_failure_jump: | 3024 | case on_failure_jump: |
| @@ -3094,8 +3085,8 @@ analyze_first (re_char *p, re_char *pend, char *fastmap, | |||
| 3094 | 3085 | ||
| 3095 | } /* analyze_first */ | 3086 | } /* analyze_first */ |
| 3096 | 3087 | ||
| 3097 | /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in | 3088 | /* Compute a fastmap for the compiled pattern in BUFP. |
| 3098 | BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible | 3089 | A fastmap records which of the (1 << BYTEWIDTH) possible |
| 3099 | characters can start a string that matches the pattern. This fastmap | 3090 | characters can start a string that matches the pattern. This fastmap |
| 3100 | is used by re_search to skip quickly over impossible starting points. | 3091 | is used by re_search to skip quickly over impossible starting points. |
| 3101 | 3092 | ||
| @@ -3106,10 +3097,8 @@ analyze_first (re_char *p, re_char *pend, char *fastmap, | |||
| 3106 | The caller must supply the address of a (1 << BYTEWIDTH)-byte data | 3097 | The caller must supply the address of a (1 << BYTEWIDTH)-byte data |
| 3107 | area as BUFP->fastmap. | 3098 | area as BUFP->fastmap. |
| 3108 | 3099 | ||
| 3109 | We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in | 3100 | Set the 'fastmap', 'fastmap_accurate', and 'can_be_null' fields in |
| 3110 | the pattern buffer. | 3101 | the pattern buffer. */ |
| 3111 | |||
| 3112 | Returns 0 if we succeed, -2 if an internal error. */ | ||
| 3113 | 3102 | ||
| 3114 | static void | 3103 | static void |
| 3115 | re_compile_fastmap (struct re_pattern_buffer *bufp) | 3104 | re_compile_fastmap (struct re_pattern_buffer *bufp) |
| @@ -3197,13 +3186,14 @@ re_search (struct re_pattern_buffer *bufp, const char *string, size_t size, | |||
| 3197 | Do not consider matching one past the index STOP in the virtual | 3186 | Do not consider matching one past the index STOP in the virtual |
| 3198 | concatenation of STRING1 and STRING2. | 3187 | concatenation of STRING1 and STRING2. |
| 3199 | 3188 | ||
| 3200 | We return either the position in the strings at which the match was | 3189 | Return either the position in the strings at which the match was |
| 3201 | found, -1 if no match, or -2 if error (such as failure | 3190 | found, -1 if no match, or -2 if error (such as failure |
| 3202 | stack overflow). */ | 3191 | stack overflow). */ |
| 3203 | 3192 | ||
| 3204 | ptrdiff_t | 3193 | ptrdiff_t |
| 3205 | re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1, | 3194 | re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1, |
| 3206 | const char *str2, size_t size2, ptrdiff_t startpos, ptrdiff_t range, | 3195 | const char *str2, size_t size2, |
| 3196 | ptrdiff_t startpos, ptrdiff_t range, | ||
| 3207 | struct re_registers *regs, ptrdiff_t stop) | 3197 | struct re_registers *regs, ptrdiff_t stop) |
| 3208 | { | 3198 | { |
| 3209 | ptrdiff_t val; | 3199 | ptrdiff_t val; |
| @@ -3267,7 +3257,7 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1, | |||
| 3267 | { | 3257 | { |
| 3268 | /* If the pattern is anchored, | 3258 | /* If the pattern is anchored, |
| 3269 | skip quickly past places we cannot match. | 3259 | skip quickly past places we cannot match. |
| 3270 | We don't bother to treat startpos == 0 specially | 3260 | Don't bother to treat startpos == 0 specially |
| 3271 | because that case doesn't repeat. */ | 3261 | because that case doesn't repeat. */ |
| 3272 | if (anchored_start && startpos > 0) | 3262 | if (anchored_start && startpos > 0) |
| 3273 | { | 3263 | { |
| @@ -3295,7 +3285,7 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1, | |||
| 3295 | if (startpos < size1 && startpos + range >= size1) | 3285 | if (startpos < size1 && startpos + range >= size1) |
| 3296 | lim = range - (size1 - startpos); | 3286 | lim = range - (size1 - startpos); |
| 3297 | 3287 | ||
| 3298 | /* Written out as an if-else to avoid testing `translate' | 3288 | /* Written out as an if-else to avoid testing 'translate' |
| 3299 | inside the loop. */ | 3289 | inside the loop. */ |
| 3300 | if (!NILP (translate)) | 3290 | if (!NILP (translate)) |
| 3301 | { | 3291 | { |
| @@ -3440,8 +3430,8 @@ static int bcmp_translate (re_char *s1, re_char *s2, | |||
| 3440 | Lisp_Object translate, | 3430 | Lisp_Object translate, |
| 3441 | const int multibyte); | 3431 | const int multibyte); |
| 3442 | 3432 | ||
| 3443 | /* This converts PTR, a pointer into one of the search strings `string1' | 3433 | /* This converts PTR, a pointer into one of the search strings 'string1' |
| 3444 | and `string2' into an offset from the beginning of that string. */ | 3434 | and 'string2' into an offset from the beginning of that string. */ |
| 3445 | #define POINTER_TO_OFFSET(ptr) \ | 3435 | #define POINTER_TO_OFFSET(ptr) \ |
| 3446 | (FIRST_STRING_P (ptr) \ | 3436 | (FIRST_STRING_P (ptr) \ |
| 3447 | ? (ptr) - string1 \ | 3437 | ? (ptr) - string1 \ |
| @@ -3465,7 +3455,7 @@ static int bcmp_translate (re_char *s1, re_char *s2, | |||
| 3465 | /* Call before fetching a char with *d if you already checked other limits. | 3455 | /* Call before fetching a char with *d if you already checked other limits. |
| 3466 | This is meant for use in lookahead operations like wordend, etc.. | 3456 | This is meant for use in lookahead operations like wordend, etc.. |
| 3467 | where we might need to look at parts of the string that might be | 3457 | where we might need to look at parts of the string that might be |
| 3468 | outside of the LIMITs (i.e past `stop'). */ | 3458 | outside of the LIMITs (i.e past 'stop'). */ |
| 3469 | #define PREFETCH_NOLIMIT() \ | 3459 | #define PREFETCH_NOLIMIT() \ |
| 3470 | if (d == end1) \ | 3460 | if (d == end1) \ |
| 3471 | { \ | 3461 | { \ |
| @@ -3474,7 +3464,7 @@ static int bcmp_translate (re_char *s1, re_char *s2, | |||
| 3474 | } \ | 3464 | } \ |
| 3475 | 3465 | ||
| 3476 | /* Test if at very beginning or at very end of the virtual concatenation | 3466 | /* Test if at very beginning or at very end of the virtual concatenation |
| 3477 | of `string1' and `string2'. If only one string, it's `string2'. */ | 3467 | of STRING1 and STRING2. If only one string, it's STRING2. */ |
| 3478 | #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) | 3468 | #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) |
| 3479 | #define AT_STRINGS_END(d) ((d) == end2) | 3469 | #define AT_STRINGS_END(d) ((d) == end2) |
| 3480 | 3470 | ||
| @@ -3599,7 +3589,7 @@ execute_charset (re_char **pp, unsigned c, unsigned corig, bool unibyte) | |||
| 3599 | 3589 | ||
| 3600 | if (unibyte && c < (1 << BYTEWIDTH)) | 3590 | if (unibyte && c < (1 << BYTEWIDTH)) |
| 3601 | { /* Lookup bitmap. */ | 3591 | { /* Lookup bitmap. */ |
| 3602 | /* Cast to `unsigned' instead of `unsigned char' in | 3592 | /* Cast to 'unsigned' instead of 'unsigned char' in |
| 3603 | case the bit list is a full 32 bytes long. */ | 3593 | case the bit list is a full 32 bytes long. */ |
| 3604 | if (c < (unsigned) (CHARSET_BITMAP_SIZE (p) * BYTEWIDTH) | 3594 | if (c < (unsigned) (CHARSET_BITMAP_SIZE (p) * BYTEWIDTH) |
| 3605 | && p[2 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) | 3595 | && p[2 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) |
| @@ -3700,7 +3690,7 @@ mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1, | |||
| 3700 | else if ((re_opcode_t) *p1 == charset | 3690 | else if ((re_opcode_t) *p1 == charset |
| 3701 | || (re_opcode_t) *p1 == charset_not) | 3691 | || (re_opcode_t) *p1 == charset_not) |
| 3702 | { | 3692 | { |
| 3703 | if (!execute_charset (&p1, c, c, !multibyte || IS_REAL_ASCII (c))) | 3693 | if (!execute_charset (&p1, c, c, !multibyte || ASCII_CHAR_P (c))) |
| 3704 | { | 3694 | { |
| 3705 | DEBUG_PRINT (" No match => fast loop.\n"); | 3695 | DEBUG_PRINT (" No match => fast loop.\n"); |
| 3706 | return 1; | 3696 | return 1; |
| @@ -3727,10 +3717,10 @@ mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1, | |||
| 3727 | else if (!multibyte || !CHARSET_RANGE_TABLE_EXISTS_P (p2)) | 3717 | else if (!multibyte || !CHARSET_RANGE_TABLE_EXISTS_P (p2)) |
| 3728 | { | 3718 | { |
| 3729 | /* Now, we are sure that P2 has no range table. | 3719 | /* Now, we are sure that P2 has no range table. |
| 3730 | So, for the size of bitmap in P2, `p2[1]' is | 3720 | So, for the size of bitmap in P2, 'p2[1]' is |
| 3731 | enough. But P1 may have range table, so the | 3721 | enough. But P1 may have range table, so the |
| 3732 | size of bitmap table of P1 is extracted by | 3722 | size of bitmap table of P1 is extracted by |
| 3733 | using macro `CHARSET_BITMAP_SIZE'. | 3723 | using macro 'CHARSET_BITMAP_SIZE'. |
| 3734 | 3724 | ||
| 3735 | In a multibyte case, we know that all the character | 3725 | In a multibyte case, we know that all the character |
| 3736 | listed in P2 is ASCII. In a unibyte case, P1 has only a | 3726 | listed in P2 is ASCII. In a unibyte case, P1 has only a |
| @@ -3934,11 +3924,11 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 3934 | unsigned best_regs_set = false; | 3924 | unsigned best_regs_set = false; |
| 3935 | re_char **best_regstart UNINIT, **best_regend UNINIT; | 3925 | re_char **best_regstart UNINIT, **best_regend UNINIT; |
| 3936 | 3926 | ||
| 3937 | /* Logically, this is `best_regend[0]'. But we don't want to have to | 3927 | /* Logically, this is 'best_regend[0]'. But we don't want to have to |
| 3938 | allocate space for that if we're not allocating space for anything | 3928 | allocate space for that if we're not allocating space for anything |
| 3939 | else (see below). Also, we never need info about register 0 for | 3929 | else (see below). Also, we never need info about register 0 for |
| 3940 | any of the other register vectors, and it seems rather a kludge to | 3930 | any of the other register vectors, and it seems rather a kludge to |
| 3941 | treat `best_regend' differently than the rest. So we keep track of | 3931 | treat 'best_regend' differently than the rest. So we keep track of |
| 3942 | the end of the best match so far in a separate variable. We | 3932 | the end of the best match so far in a separate variable. We |
| 3943 | initialize this to NULL so that when we backtrack the first time | 3933 | initialize this to NULL so that when we backtrack the first time |
| 3944 | and need to test it, it's not garbage. */ | 3934 | and need to test it, it's not garbage. */ |
| @@ -3981,8 +3971,8 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 3981 | for (reg = 1; reg < num_regs; reg++) | 3971 | for (reg = 1; reg < num_regs; reg++) |
| 3982 | regstart[reg] = regend[reg] = NULL; | 3972 | regstart[reg] = regend[reg] = NULL; |
| 3983 | 3973 | ||
| 3984 | /* We move `string1' into `string2' if the latter's empty -- but not if | 3974 | /* We move 'string1' into 'string2' if the latter's empty -- but not if |
| 3985 | `string1' is null. */ | 3975 | 'string1' is null. */ |
| 3986 | if (size2 == 0 && string1 != NULL) | 3976 | if (size2 == 0 && string1 != NULL) |
| 3987 | { | 3977 | { |
| 3988 | string2 = string1; | 3978 | string2 = string1; |
| @@ -3993,12 +3983,12 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 3993 | end1 = string1 + size1; | 3983 | end1 = string1 + size1; |
| 3994 | end2 = string2 + size2; | 3984 | end2 = string2 + size2; |
| 3995 | 3985 | ||
| 3996 | /* `p' scans through the pattern as `d' scans through the data. | 3986 | /* P scans through the pattern as D scans through the data. |
| 3997 | `dend' is the end of the input string that `d' points within. `d' | 3987 | DEND is the end of the input string that D points within. |
| 3998 | is advanced into the following input string whenever necessary, but | 3988 | Advance D into the following input string whenever necessary, but |
| 3999 | this happens before fetching; therefore, at the beginning of the | 3989 | this happens before fetching; therefore, at the beginning of the |
| 4000 | loop, `d' can be pointing at the end of a string, but it cannot | 3990 | loop, D can be pointing at the end of a string, but it cannot |
| 4001 | equal `string2'. */ | 3991 | equal STRING2. */ |
| 4002 | if (pos >= size1) | 3992 | if (pos >= size1) |
| 4003 | { | 3993 | { |
| 4004 | /* Only match within string2. */ | 3994 | /* Only match within string2. */ |
| @@ -4015,7 +4005,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 4015 | /* BEWARE! | 4005 | /* BEWARE! |
| 4016 | When we reach end_match_1, PREFETCH normally switches to string2. | 4006 | When we reach end_match_1, PREFETCH normally switches to string2. |
| 4017 | But in the present case, this means that just doing a PREFETCH | 4007 | But in the present case, this means that just doing a PREFETCH |
| 4018 | makes us jump from `stop' to `gap' within the string. | 4008 | makes us jump from 'stop' to 'gap' within the string. |
| 4019 | What we really want here is for the search to stop as | 4009 | What we really want here is for the search to stop as |
| 4020 | soon as we hit end_match_1. That's why we set end_match_2 | 4010 | soon as we hit end_match_1. That's why we set end_match_2 |
| 4021 | to end_match_1 (since PREFETCH fails as soon as we hit | 4011 | to end_match_1 (since PREFETCH fails as soon as we hit |
| @@ -4023,8 +4013,8 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 4023 | end_match_2 = end_match_1; | 4013 | end_match_2 = end_match_1; |
| 4024 | } | 4014 | } |
| 4025 | else | 4015 | else |
| 4026 | { /* It's important to use this code when stop == size so that | 4016 | { /* It's important to use this code when STOP == SIZE so that |
| 4027 | moving `d' from end1 to string2 will not prevent the d == dend | 4017 | moving D from end1 to string2 will not prevent the D == DEND |
| 4028 | check from catching the end of string. */ | 4018 | check from catching the end of string. */ |
| 4029 | end_match_1 = end1; | 4019 | end_match_1 = end1; |
| 4030 | end_match_2 = string2 + stop - size1; | 4020 | end_match_2 = string2 + stop - size1; |
| @@ -4100,10 +4090,10 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 4100 | else if (best_regs_set && !best_match_p) | 4090 | else if (best_regs_set && !best_match_p) |
| 4101 | { | 4091 | { |
| 4102 | restore_best_regs: | 4092 | restore_best_regs: |
| 4103 | /* Restore best match. It may happen that `dend == | 4093 | /* Restore best match. It may happen that 'dend == |
| 4104 | end_match_1' while the restored d is in string2. | 4094 | end_match_1' while the restored d is in string2. |
| 4105 | For example, the pattern `x.*y.*z' against the | 4095 | For example, the pattern 'x.*y.*z' against the |
| 4106 | strings `x-' and `y-z-', if the two strings are | 4096 | strings 'x-' and 'y-z-', if the two strings are |
| 4107 | not consecutive in memory. */ | 4097 | not consecutive in memory. */ |
| 4108 | DEBUG_PRINT ("Restoring best registers.\n"); | 4098 | DEBUG_PRINT ("Restoring best registers.\n"); |
| 4109 | 4099 | ||
| @@ -4128,7 +4118,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 4128 | /* Have the register data arrays been allocated? */ | 4118 | /* Have the register data arrays been allocated? */ |
| 4129 | if (bufp->regs_allocated == REGS_UNALLOCATED) | 4119 | if (bufp->regs_allocated == REGS_UNALLOCATED) |
| 4130 | { /* No. So allocate them with malloc. We need one | 4120 | { /* No. So allocate them with malloc. We need one |
| 4131 | extra element beyond `num_regs' for the `-1' marker | 4121 | extra element beyond 'num_regs' for the '-1' marker |
| 4132 | GNU code uses. */ | 4122 | GNU code uses. */ |
| 4133 | regs->num_regs = max (RE_NREGS, num_regs + 1); | 4123 | regs->num_regs = max (RE_NREGS, num_regs + 1); |
| 4134 | regs->start = TALLOC (regs->num_regs, ptrdiff_t); | 4124 | regs->start = TALLOC (regs->num_regs, ptrdiff_t); |
| @@ -4149,7 +4139,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 4149 | else | 4139 | else |
| 4150 | eassert (bufp->regs_allocated == REGS_FIXED); | 4140 | eassert (bufp->regs_allocated == REGS_FIXED); |
| 4151 | 4141 | ||
| 4152 | /* Convert the pointer data in `regstart' and `regend' to | 4142 | /* Convert the pointer data in 'regstart' and 'regend' to |
| 4153 | indices. Register zero has to be set differently, | 4143 | indices. Register zero has to be set differently, |
| 4154 | since we haven't kept track of any info for it. */ | 4144 | since we haven't kept track of any info for it. */ |
| 4155 | if (regs->num_regs > 0) | 4145 | if (regs->num_regs > 0) |
| @@ -4158,7 +4148,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 4158 | regs->end[0] = POINTER_TO_OFFSET (d); | 4148 | regs->end[0] = POINTER_TO_OFFSET (d); |
| 4159 | } | 4149 | } |
| 4160 | 4150 | ||
| 4161 | /* Go through the first `min (num_regs, regs->num_regs)' | 4151 | /* Go through the first 'min (num_regs, regs->num_regs)' |
| 4162 | registers, since that is all we initialized. */ | 4152 | registers, since that is all we initialized. */ |
| 4163 | for (reg = 1; reg < min (num_regs, regs->num_regs); reg++) | 4153 | for (reg = 1; reg < min (num_regs, regs->num_regs); reg++) |
| 4164 | { | 4154 | { |
| @@ -4216,7 +4206,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 4216 | /* Remember the start point to rollback upon failure. */ | 4206 | /* Remember the start point to rollback upon failure. */ |
| 4217 | dfail = d; | 4207 | dfail = d; |
| 4218 | 4208 | ||
| 4219 | /* The cost of testing `translate' is comparatively small. */ | 4209 | /* The cost of testing 'translate' is comparatively small. */ |
| 4220 | if (target_multibyte) | 4210 | if (target_multibyte) |
| 4221 | do | 4211 | do |
| 4222 | { | 4212 | { |
| @@ -4405,7 +4395,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 4405 | break; | 4395 | break; |
| 4406 | 4396 | ||
| 4407 | 4397 | ||
| 4408 | /* \<digit> has been turned into a `duplicate' command which is | 4398 | /* \<digit> has been turned into a 'duplicate' command which is |
| 4409 | followed by the numeric value of <digit> as the register number. */ | 4399 | followed by the numeric value of <digit> as the register number. */ |
| 4410 | case duplicate: | 4400 | case duplicate: |
| 4411 | { | 4401 | { |
| @@ -4520,21 +4510,21 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 4520 | goto fail; | 4510 | goto fail; |
| 4521 | 4511 | ||
| 4522 | 4512 | ||
| 4523 | /* on_failure_keep_string_jump is used to optimize `.*\n'. It | 4513 | /* on_failure_keep_string_jump is used to optimize '.*\n'. It |
| 4524 | pushes NULL as the value for the string on the stack. Then | 4514 | pushes NULL as the value for the string on the stack. Then |
| 4525 | `POP_FAILURE_POINT' will keep the current value for the | 4515 | 'POP_FAILURE_POINT' will keep the current value for the |
| 4526 | string, instead of restoring it. To see why, consider | 4516 | string, instead of restoring it. To see why, consider |
| 4527 | matching `foo\nbar' against `.*\n'. The .* matches the foo; | 4517 | matching 'foo\nbar' against '.*\n'. The .* matches the foo; |
| 4528 | then the . fails against the \n. But the next thing we want | 4518 | then the . fails against the \n. But the next thing we want |
| 4529 | to do is match the \n against the \n; if we restored the | 4519 | to do is match the \n against the \n; if we restored the |
| 4530 | string value, we would be back at the foo. | 4520 | string value, we would be back at the foo. |
| 4531 | 4521 | ||
| 4532 | Because this is used only in specific cases, we don't need to | 4522 | Because this is used only in specific cases, we don't need to |
| 4533 | check all the things that `on_failure_jump' does, to make | 4523 | check all the things that 'on_failure_jump' does, to make |
| 4534 | sure the right things get saved on the stack. Hence we don't | 4524 | sure the right things get saved on the stack. Hence we don't |
| 4535 | share its code. The only reason to push anything on the | 4525 | share its code. The only reason to push anything on the |
| 4536 | stack at all is that otherwise we would have to change | 4526 | stack at all is that otherwise we would have to change |
| 4537 | `anychar's code to do something besides goto fail in this | 4527 | 'anychar's code to do something besides goto fail in this |
| 4538 | case; that seems worse than this. */ | 4528 | case; that seems worse than this. */ |
| 4539 | case on_failure_keep_string_jump: | 4529 | case on_failure_keep_string_jump: |
| 4540 | EXTRACT_NUMBER_AND_INCR (mcnt, p); | 4530 | EXTRACT_NUMBER_AND_INCR (mcnt, p); |
| @@ -4588,7 +4578,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 4588 | CHECK_INFINITE_LOOP (p - 3, d); | 4578 | CHECK_INFINITE_LOOP (p - 3, d); |
| 4589 | if (cycle) | 4579 | if (cycle) |
| 4590 | /* If there's a cycle, get out of the loop, as if the matching | 4580 | /* If there's a cycle, get out of the loop, as if the matching |
| 4591 | had failed. We used to just `goto fail' here, but that was | 4581 | had failed. We used to just 'goto fail' here, but that was |
| 4592 | aborting the search a bit too early: we want to keep the | 4582 | aborting the search a bit too early: we want to keep the |
| 4593 | empty-loop-match and keep matching after the loop. | 4583 | empty-loop-match and keep matching after the loop. |
| 4594 | We want (x?)*y\1z to match both xxyz and xxyxz. */ | 4584 | We want (x?)*y\1z to match both xxyz and xxyxz. */ |
| @@ -4623,7 +4613,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 4623 | Compare the beginning of the repeat with what in the | 4613 | Compare the beginning of the repeat with what in the |
| 4624 | pattern follows its end. If we can establish that there | 4614 | pattern follows its end. If we can establish that there |
| 4625 | is nothing that they would both match, i.e., that we | 4615 | is nothing that they would both match, i.e., that we |
| 4626 | would have to backtrack because of (as in, e.g., `a*a') | 4616 | would have to backtrack because of (as in, e.g., 'a*a') |
| 4627 | then we can use a non-backtracking loop based on | 4617 | then we can use a non-backtracking loop based on |
| 4628 | on_failure_keep_string_jump instead of on_failure_jump. */ | 4618 | on_failure_keep_string_jump instead of on_failure_jump. */ |
| 4629 | case on_failure_jump_smart: | 4619 | case on_failure_jump_smart: |
| @@ -4648,14 +4638,14 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 4648 | DEBUG_STATEMENT (regex_emacs_debug += 2); | 4638 | DEBUG_STATEMENT (regex_emacs_debug += 2); |
| 4649 | if (mutually_exclusive_p (bufp, p1, p2)) | 4639 | if (mutually_exclusive_p (bufp, p1, p2)) |
| 4650 | { | 4640 | { |
| 4651 | /* Use a fast `on_failure_keep_string_jump' loop. */ | 4641 | /* Use a fast 'on_failure_keep_string_jump' loop. */ |
| 4652 | DEBUG_PRINT (" smart exclusive => fast loop.\n"); | 4642 | DEBUG_PRINT (" smart exclusive => fast loop.\n"); |
| 4653 | *p3 = (unsigned char) on_failure_keep_string_jump; | 4643 | *p3 = (unsigned char) on_failure_keep_string_jump; |
| 4654 | STORE_NUMBER (p2 - 2, mcnt + 3); | 4644 | STORE_NUMBER (p2 - 2, mcnt + 3); |
| 4655 | } | 4645 | } |
| 4656 | else | 4646 | else |
| 4657 | { | 4647 | { |
| 4658 | /* Default to a safe `on_failure_jump' loop. */ | 4648 | /* Default to a safe 'on_failure_jump' loop. */ |
| 4659 | DEBUG_PRINT (" smart default => slow loop.\n"); | 4649 | DEBUG_PRINT (" smart default => slow loop.\n"); |
| 4660 | *p3 = (unsigned char) on_failure_jump; | 4650 | *p3 = (unsigned char) on_failure_jump; |
| 4661 | } | 4651 | } |
| @@ -4675,7 +4665,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 4675 | 4665 | ||
| 4676 | 4666 | ||
| 4677 | /* Have to succeed matching what follows at least n times. | 4667 | /* Have to succeed matching what follows at least n times. |
| 4678 | After that, handle like `on_failure_jump'. */ | 4668 | After that, handle like 'on_failure_jump'. */ |
| 4679 | case succeed_n: | 4669 | case succeed_n: |
| 4680 | /* Signedness doesn't matter since we only compare MCNT to 0. */ | 4670 | /* Signedness doesn't matter since we only compare MCNT to 0. */ |
| 4681 | EXTRACT_NUMBER (mcnt, p + 2); | 4671 | EXTRACT_NUMBER (mcnt, p + 2); |
| @@ -5054,7 +5044,7 @@ bcmp_translate (re_char *s1, re_char *s2, ptrdiff_t len, | |||
| 5054 | re_char *p2_end = s2 + len; | 5044 | re_char *p2_end = s2 + len; |
| 5055 | 5045 | ||
| 5056 | /* FIXME: Checking both p1 and p2 presumes that the two strings might have | 5046 | /* FIXME: Checking both p1 and p2 presumes that the two strings might have |
| 5057 | different lengths, but relying on a single `len' would break this. -sm */ | 5047 | different lengths, but relying on a single LEN would break this. -sm */ |
| 5058 | while (p1 < p1_end && p2 < p2_end) | 5048 | while (p1 < p1_end && p2 < p2_end) |
| 5059 | { | 5049 | { |
| 5060 | int p1_charlen, p2_charlen; | 5050 | int p1_charlen, p2_charlen; |
| @@ -5082,7 +5072,7 @@ bcmp_translate (re_char *s1, re_char *s2, ptrdiff_t len, | |||
| 5082 | compiles PATTERN (of length SIZE) and puts the result in BUFP. | 5072 | compiles PATTERN (of length SIZE) and puts the result in BUFP. |
| 5083 | Returns 0 if the pattern was valid, otherwise an error string. | 5073 | Returns 0 if the pattern was valid, otherwise an error string. |
| 5084 | 5074 | ||
| 5085 | Assumes the `allocated' (and perhaps `buffer') and `translate' fields | 5075 | Assumes the 'allocated' (and perhaps 'buffer') and 'translate' fields |
| 5086 | are set in BUFP on entry. | 5076 | are set in BUFP on entry. |
| 5087 | 5077 | ||
| 5088 | We call regex_compile to do the actual compilation. */ | 5078 | We call regex_compile to do the actual compilation. */ |
diff --git a/src/regex-emacs.h b/src/regex-emacs.h index b6dd26b2f4d..a849cbea054 100644 --- a/src/regex-emacs.h +++ b/src/regex-emacs.h | |||
| @@ -1,5 +1,4 @@ | |||
| 1 | /* Definitions for data structures and routines for the regular | 1 | /* Emacs regular expression API |
| 2 | expression library, version 0.12. | ||
| 3 | 2 | ||
| 4 | Copyright (C) 1985, 1989-1993, 1995, 2000-2018 Free Software | 3 | Copyright (C) 1985, 1989-1993, 1995, 2000-2018 Free Software |
| 5 | Foundation, Inc. | 4 | Foundation, Inc. |
| @@ -22,8 +21,7 @@ | |||
| 22 | 21 | ||
| 23 | #include <stddef.h> | 22 | #include <stddef.h> |
| 24 | 23 | ||
| 25 | /* This is the structure we store register match data in. See | 24 | /* This is the structure we store register match data in. |
| 26 | regex.texinfo for a full description of what registers match. | ||
| 27 | Declare this before including lisp.h, since lisp.h (via thread.h) | 25 | Declare this before including lisp.h, since lisp.h (via thread.h) |
| 28 | uses struct re_registers. */ | 26 | uses struct re_registers. */ |
| 29 | struct re_registers | 27 | struct re_registers |
| @@ -35,12 +33,12 @@ struct re_registers | |||
| 35 | 33 | ||
| 36 | #include "lisp.h" | 34 | #include "lisp.h" |
| 37 | 35 | ||
| 38 | /* In Emacs, this is the string or buffer in which we are matching. | 36 | /* The string or buffer being matched. |
| 39 | It is used for looking up syntax properties. | 37 | It is used for looking up syntax properties. |
| 40 | 38 | ||
| 41 | If the value is a Lisp string object, we are matching text in that | 39 | If the value is a Lisp string object, match text in that string; if |
| 42 | string; if it's nil, we are matching text in the current buffer; if | 40 | it's nil, match text in the current buffer; if it's t, match text |
| 43 | it's t, we are matching text in a C string. | 41 | in a C string. |
| 44 | 42 | ||
| 45 | This value is effectively another parameter to re_search_2 and | 43 | This value is effectively another parameter to re_search_2 and |
| 46 | re_match_2. No calls into Lisp or thread switches are allowed | 44 | re_match_2. No calls into Lisp or thread switches are allowed |
| @@ -58,25 +56,25 @@ extern size_t emacs_re_max_failures; | |||
| 58 | extern ptrdiff_t emacs_re_safe_alloca; | 56 | extern ptrdiff_t emacs_re_safe_alloca; |
| 59 | 57 | ||
| 60 | /* This data structure represents a compiled pattern. Before calling | 58 | /* This data structure represents a compiled pattern. Before calling |
| 61 | the pattern compiler, the fields `buffer', `allocated', `fastmap', | 59 | the pattern compiler, the fields 'buffer', 'allocated', 'fastmap', |
| 62 | and `translate' can be set. After the pattern has been | 60 | and 'translate' can be set. After the pattern has been |
| 63 | compiled, the `re_nsub' field is available. All other fields are | 61 | compiled, the 're_nsub' field is available. All other fields are |
| 64 | private to the regex routines. */ | 62 | private to the regex routines. */ |
| 65 | 63 | ||
| 66 | struct re_pattern_buffer | 64 | struct re_pattern_buffer |
| 67 | { | 65 | { |
| 68 | /* Space that holds the compiled pattern. It is declared as | 66 | /* Space that holds the compiled pattern. It is declared as |
| 69 | `unsigned char *' because its elements are | 67 | 'unsigned char *' because its elements are |
| 70 | sometimes used as array indexes. */ | 68 | sometimes used as array indexes. */ |
| 71 | unsigned char *buffer; | 69 | unsigned char *buffer; |
| 72 | 70 | ||
| 73 | /* Number of bytes to which `buffer' points. */ | 71 | /* Number of bytes to which 'buffer' points. */ |
| 74 | size_t allocated; | 72 | size_t allocated; |
| 75 | 73 | ||
| 76 | /* Number of bytes actually used in `buffer'. */ | 74 | /* Number of bytes actually used in 'buffer'. */ |
| 77 | size_t used; | 75 | size_t used; |
| 78 | 76 | ||
| 79 | /* Charset of unibyte characters at compiling time. */ | 77 | /* Charset of unibyte characters at compiling time. */ |
| 80 | int charset_unibyte; | 78 | int charset_unibyte; |
| 81 | 79 | ||
| 82 | /* Pointer to a fastmap, if any, otherwise zero. re_search uses | 80 | /* Pointer to a fastmap, if any, otherwise zero. re_search uses |
| @@ -86,31 +84,31 @@ struct re_pattern_buffer | |||
| 86 | 84 | ||
| 87 | /* Either a translate table to apply to all characters before | 85 | /* Either a translate table to apply to all characters before |
| 88 | comparing them, or zero for no translation. The translation | 86 | comparing them, or zero for no translation. The translation |
| 89 | is applied to a pattern when it is compiled and to a string | 87 | applies to a pattern when it is compiled and to a string |
| 90 | when it is matched. */ | 88 | when it is matched. */ |
| 91 | Lisp_Object translate; | 89 | Lisp_Object translate; |
| 92 | 90 | ||
| 93 | /* Number of subexpressions found by the compiler. */ | 91 | /* Number of subexpressions found by the compiler. */ |
| 94 | size_t re_nsub; | 92 | size_t re_nsub; |
| 95 | 93 | ||
| 96 | /* Zero if this pattern cannot match the empty string, one else. | 94 | /* True if and only if this pattern can match the empty string. |
| 97 | Well, in truth it's used only in `re_search_2', to see | 95 | Well, in truth it's used only in 're_search_2', to see |
| 98 | whether or not we should use the fastmap, so we don't set | 96 | whether or not we should use the fastmap, so we don't set |
| 99 | this absolutely perfectly; see `re_compile_fastmap'. */ | 97 | this absolutely perfectly; see 're_compile_fastmap'. */ |
| 100 | unsigned can_be_null : 1; | 98 | unsigned can_be_null : 1; |
| 101 | 99 | ||
| 102 | /* If REGS_UNALLOCATED, allocate space in the `regs' structure | 100 | /* If REGS_UNALLOCATED, allocate space in the 'regs' structure |
| 103 | for `max (RE_NREGS, re_nsub + 1)' groups. | 101 | for 'max (RE_NREGS, re_nsub + 1)' groups. |
| 104 | If REGS_REALLOCATE, reallocate space if necessary. | 102 | If REGS_REALLOCATE, reallocate space if necessary. |
| 105 | If REGS_FIXED, use what's there. */ | 103 | If REGS_FIXED, use what's there. */ |
| 106 | unsigned regs_allocated : 2; | 104 | unsigned regs_allocated : 2; |
| 107 | 105 | ||
| 108 | /* Set to zero when `regex_compile' compiles a pattern; set to one | 106 | /* Set to false when 'regex_compile' compiles a pattern; set to true |
| 109 | by `re_compile_fastmap' if it updates the fastmap. */ | 107 | by 're_compile_fastmap' if it updates the fastmap. */ |
| 110 | unsigned fastmap_accurate : 1; | 108 | unsigned fastmap_accurate : 1; |
| 111 | 109 | ||
| 112 | /* If true, the compilation of the pattern had to look up the syntax table, | 110 | /* If true, the compilation of the pattern had to look up the syntax table, |
| 113 | so the compiled pattern is only valid for the current syntax table. */ | 111 | so the compiled pattern is valid for the current syntax table only. */ |
| 114 | unsigned used_syntax : 1; | 112 | unsigned used_syntax : 1; |
| 115 | 113 | ||
| 116 | /* If true, multi-byte form in the regexp pattern should be | 114 | /* If true, multi-byte form in the regexp pattern should be |
| @@ -125,7 +123,7 @@ struct re_pattern_buffer | |||
| 125 | /* Declarations for routines. */ | 123 | /* Declarations for routines. */ |
| 126 | 124 | ||
| 127 | /* Compile the regular expression PATTERN, with length LENGTH | 125 | /* Compile the regular expression PATTERN, with length LENGTH |
| 128 | and syntax given by the global `re_syntax_options', into the buffer | 126 | and syntax given by the global 're_syntax_options', into the buffer |
| 129 | BUFFER. Return NULL if successful, and an error string if not. */ | 127 | BUFFER. Return NULL if successful, and an error string if not. */ |
| 130 | extern const char *re_compile_pattern (const char *pattern, size_t length, | 128 | extern const char *re_compile_pattern (const char *pattern, size_t length, |
| 131 | bool posix_backtracking, | 129 | bool posix_backtracking, |
| @@ -137,14 +135,14 @@ extern const char *re_compile_pattern (const char *pattern, size_t length, | |||
| 137 | compiled into BUFFER. Start searching at position START, for RANGE | 135 | compiled into BUFFER. Start searching at position START, for RANGE |
| 138 | characters. Return the starting position of the match, -1 for no | 136 | characters. Return the starting position of the match, -1 for no |
| 139 | match, or -2 for an internal error. Also return register | 137 | match, or -2 for an internal error. Also return register |
| 140 | information in REGS (if REGS is nonzero). */ | 138 | information in REGS (if REGS is non-null). */ |
| 141 | extern ptrdiff_t re_search (struct re_pattern_buffer *buffer, | 139 | extern ptrdiff_t re_search (struct re_pattern_buffer *buffer, |
| 142 | const char *string, size_t length, | 140 | const char *string, size_t length, |
| 143 | ptrdiff_t start, ptrdiff_t range, | 141 | ptrdiff_t start, ptrdiff_t range, |
| 144 | struct re_registers *regs); | 142 | struct re_registers *regs); |
| 145 | 143 | ||
| 146 | 144 | ||
| 147 | /* Like `re_search', but search in the concatenation of STRING1 and | 145 | /* Like 're_search', but search in the concatenation of STRING1 and |
| 148 | STRING2. Also, stop searching at index START + STOP. */ | 146 | STRING2. Also, stop searching at index START + STOP. */ |
| 149 | extern ptrdiff_t re_search_2 (struct re_pattern_buffer *buffer, | 147 | extern ptrdiff_t re_search_2 (struct re_pattern_buffer *buffer, |
| 150 | const char *string1, size_t length1, | 148 | const char *string1, size_t length1, |
| @@ -166,7 +164,7 @@ extern ptrdiff_t re_match_2 (struct re_pattern_buffer *buffer, | |||
| 166 | /* Set REGS to hold NUM_REGS registers, storing them in STARTS and | 164 | /* Set REGS to hold NUM_REGS registers, storing them in STARTS and |
| 167 | ENDS. Subsequent matches using BUFFER and REGS will use this memory | 165 | ENDS. Subsequent matches using BUFFER and REGS will use this memory |
| 168 | for recording register information. STARTS and ENDS must be | 166 | for recording register information. STARTS and ENDS must be |
| 169 | allocated with malloc, and must each be at least `NUM_REGS * sizeof | 167 | allocated with malloc, and must each be at least 'NUM_REGS * sizeof |
| 170 | (ptrdiff_t)' bytes long. | 168 | (ptrdiff_t)' bytes long. |
| 171 | 169 | ||
| 172 | If NUM_REGS == 0, then subsequent matches should allocate their own | 170 | If NUM_REGS == 0, then subsequent matches should allocate their own |
| @@ -196,4 +194,4 @@ extern bool re_iswctype (int ch, re_wctype_t cc); | |||
| 196 | extern re_wctype_t re_wctype_parse (const unsigned char **strp, | 194 | extern re_wctype_t re_wctype_parse (const unsigned char **strp, |
| 197 | unsigned limit); | 195 | unsigned limit); |
| 198 | 196 | ||
| 199 | #endif /* regex-emacs.h */ | 197 | #endif /* EMACS_REGEX_H */ |