diff options
| author | Stefan Monnier | 2000-03-08 23:25:41 +0000 |
|---|---|---|
| committer | Stefan Monnier | 2000-03-08 23:25:41 +0000 |
| commit | 505bde11b0fb6f539db1ef0713f19c3c2d86064a (patch) | |
| tree | 2e6f73eb8373fd90ff8d93860acd15daf32ef038 /src | |
| parent | dd52d9705c258145292bef9fc490035abc05e51f (diff) | |
| download | emacs-505bde11b0fb6f539db1ef0713f19c3c2d86064a.tar.gz emacs-505bde11b0fb6f539db1ef0713f19c3c2d86064a.zip | |
This is a big redesign of failure-stack and register handling, prompted
by bugs revealed when trying to add shy-groups. Overall, what happened
is that loops are now structured a little differently, groups can be
shy and the code is a little simpler.
(enum re_opcode_t): Remove jump_past_alt, maybe_pop_jump,
push_dummy_failure and dumy_failure_jump.
Add on_failure_jump_(exclusive, loop and smart).
Also fix the comment for (start|stop)_memory since they now only take
one argument (the second has becomes unnecessary).
(print_partial_compiled_pattern): Adjust for changes in re_opcode_t.
(print_compiled_pattern): Use %ld to printf long ints and flush to make
debugging a little easier.
(union fail_stack_elt): Make the integer unsigned.
(struct fail_stack_type): Add a `frame' element.
(INIT_FAIL_STACK): Init `frame' as well.
(POP_PATTERN_OP): New macro for re_compile_fastmap.
(DEBUG_PUSH, DEBUG_POP): Remove.
(NUM_REG_ITEMS): Remove.
(NUM_NONREG_ITEMS): Adjust.
(FAILURE_PAT, FAILURE_STR, NEXT_FAILURE_HANDLE, TOP_FAILURE_HANDLE):
New macros for the cycle detection.
(ENSURE_FAIL_STACK): New macro for PUSH_FAILURE_(REG|POINT).
(PUSH_FAILURE_REG, POP_FAILURE_REG, CHECK_INFINITE_LOOP): New macros.
(PUSH_FAILURE_POINT): Don't push registers any more.
The pattern address pushed is not the destination of the jump
but the source of it instead.
(NUM_FAILURE_ITEMS): Remove.
(POP_FAILURE_POINT): Adapt to the new stack structure (i.e. pop
registers before the actual failure point).
Don't hardcode any meaning for str==NULL anymore.
(union register_info_type, REG_MATCH_NULL_STRING_P, IS_ACTIVE)
(MATCHED_SOMETHING, EVER_MATCHED_SOMETHING, SET_REGS_MATCHED): Remove.
(REG_UNSET_VALUE): Use NULL (why not?).
(compile_range): Remove declaration since it doesn't exist.
(struct compile_stack_elt_t): Remove inner_group_offset.
(old_reg(start|end), reg_info, reg_dummy, reg_info_dummy): Remove.
(regex_grow_registers): Remove dead code.
(FIXUP_ALT_JUMP): New macro.
(regex_compile): Add shy-groups
Change loops to use on_failure_jump_smart&jump instead of
on_failure_jump&maybe_pop_jump.
Change + loops to eliminate the initial (dummy_failure_)jump.
Remove c1_base (looks like unused variable to me).
Use `jump' instead of `jump_past_alt' and don't bother with
push_dummy_failure in alternatives since it is now unnecessary.
Use FIXUP_ALT_JUMP.
Eliminate a useless `#ifdef emacs' for (re)allocating the stack.
(re_compile_fastmap): Remove dead variables i and num_regs.
Exit from loop when bufp->can_be_null rather than jumping to `done'.
Avoid jumping backwards so as to ensure termination.
Use PATTERN_STACK_EMPTY and POP_PATTERN_OP.
Improved handling of backreferences.
Remove dead code in handling of `anychar'.
(skip_noops, mutually_exclusive_p): New functions taken from the
handling of `maybe_pop_jump' in re_match_2_internal.
Slightly improve mutually_exclusive_p to handle ".+\n".
((lowest|highest)_active_reg, NO_(LOWEST|HIGHEST)_ACTIVE_REG)
Remove.
(re_match_2_internal): Use %p instead of 0x%x when printf'ing ptrs.
Don't SET_REGS_MATCHED anymore. Remove many dead variables.
Push register (in `start_memory') on the stack rather than storing it
in old_reg(start|end).
Remove the cycle detection from `stop_memory', replaced by the use
of on_failure_jump_loop for greedy loops.
Add code for the new on_failure_jump_<foo>.
Remove ad-hoc code in `on_failure_jump' to push more registers
in the case of a loop.
Take out code from `maybe_pop_jump' into separate functions and
adapt it to the semantics of `on_failure_jump_smart'.
Remove jump_past_alt, dummy_failure_jump and push_dummy_failure.
Remove dummy_failure handling and handling of `failures to jump
to on_failure_jump' (this last one was already dead code, it seems).
((group|alt|common_op)_match_null_string_p): Remove.
Diffstat (limited to 'src')
| -rw-r--r-- | src/regex.c | 1842 |
1 files changed, 623 insertions, 1219 deletions
diff --git a/src/regex.c b/src/regex.c index 09f56a1dffc..809a7d24219 100644 --- a/src/regex.c +++ b/src/regex.c | |||
| @@ -2,7 +2,7 @@ | |||
| 2 | 0.12. (Implements POSIX draft P10003.2/D11.2, except for | 2 | 0.12. (Implements POSIX draft P10003.2/D11.2, except for |
| 3 | internationalization features.) | 3 | internationalization features.) |
| 4 | 4 | ||
| 5 | Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc. | 5 | Copyright (C) 1993,94,95,96,97,98,2000 Free Software Foundation, Inc. |
| 6 | 6 | ||
| 7 | This program is free software; you can redistribute it and/or modify | 7 | This program is free software; you can redistribute it and/or modify |
| 8 | it under the terms of the GNU General Public License as published by | 8 | it under the terms of the GNU General Public License as published by |
| @@ -19,6 +19,14 @@ | |||
| 19 | Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, | 19 | Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, |
| 20 | USA. */ | 20 | USA. */ |
| 21 | 21 | ||
| 22 | /* TODO: | ||
| 23 | - detect nasty infinite loops like "\\(\\)+?ab" when matching "ac". | ||
| 24 | - use `keep_string' more often than just .*\n. | ||
| 25 | - structure the opcode space into opcode+flag. | ||
| 26 | - merge with glic's regex.[ch] | ||
| 27 | |||
| 28 | That's it for now -sm */ | ||
| 29 | |||
| 22 | /* AIX requires this to be the first thing in the file. */ | 30 | /* AIX requires this to be the first thing in the file. */ |
| 23 | #if defined (_AIX) && !defined (REGEX_MALLOC) | 31 | #if defined (_AIX) && !defined (REGEX_MALLOC) |
| 24 | #pragma alloca | 32 | #pragma alloca |
| @@ -473,19 +481,13 @@ typedef enum | |||
| 473 | /* Start remembering the text that is matched, for storing in a | 481 | /* Start remembering the text that is matched, for storing in a |
| 474 | register. Followed by one byte with the register number, in | 482 | register. Followed by one byte with the register number, in |
| 475 | the range 0 to one less than the pattern buffer's re_nsub | 483 | the range 0 to one less than the pattern buffer's re_nsub |
| 476 | field. Then followed by one byte with the number of groups | 484 | field. */ |
| 477 | inner to this one. (This last has to be part of the | ||
| 478 | start_memory only because we need it in the on_failure_jump | ||
| 479 | of re_match_2.) */ | ||
| 480 | start_memory, | 485 | start_memory, |
| 481 | 486 | ||
| 482 | /* Stop remembering the text that is matched and store it in a | 487 | /* Stop remembering the text that is matched and store it in a |
| 483 | memory register. Followed by one byte with the register | 488 | memory register. Followed by one byte with the register |
| 484 | number, in the range 0 to one less than `re_nsub' in the | 489 | number, in the range 0 to one less than `re_nsub' in the |
| 485 | pattern buffer, and one byte with the number of inner groups, | 490 | pattern buffer. */ |
| 486 | just like `start_memory'. (We need the number of inner | ||
| 487 | groups here because we don't have any easy way of finding the | ||
| 488 | corresponding start_memory when we're at a stop_memory.) */ | ||
| 489 | stop_memory, | 491 | stop_memory, |
| 490 | 492 | ||
| 491 | /* Match a duplicate of something remembered. Followed by one | 493 | /* Match a duplicate of something remembered. Followed by one |
| @@ -508,9 +510,6 @@ typedef enum | |||
| 508 | /* Followed by two byte relative address to which to jump. */ | 510 | /* Followed by two byte relative address to which to jump. */ |
| 509 | jump, | 511 | jump, |
| 510 | 512 | ||
| 511 | /* Same as jump, but marks the end of an alternative. */ | ||
| 512 | jump_past_alt, | ||
| 513 | |||
| 514 | /* Followed by two-byte relative address of place to resume at | 513 | /* Followed by two-byte relative address of place to resume at |
| 515 | in case of failure. */ | 514 | in case of failure. */ |
| 516 | on_failure_jump, | 515 | on_failure_jump, |
| @@ -519,29 +518,24 @@ typedef enum | |||
| 519 | current string position when executed. */ | 518 | current string position when executed. */ |
| 520 | on_failure_keep_string_jump, | 519 | on_failure_keep_string_jump, |
| 521 | 520 | ||
| 522 | /* Throw away latest failure point and then jump to following | 521 | /* Like `on_failure_jump', except that it assumes that the |
| 523 | two-byte relative address. */ | 522 | pattern following it is mutually exclusive with the pattern |
| 524 | pop_failure_jump, | 523 | at the destination of the jump: if one matches something, |
| 525 | 524 | the other won't match at all. | |
| 526 | /* Change to pop_failure_jump if know won't have to backtrack to | 525 | Always used via `on_failure_jump_smart'. */ |
| 527 | match; otherwise change to jump. This is used to jump | 526 | on_failure_jump_exclusive, |
| 528 | back to the beginning of a repeat. If what follows this jump | 527 | |
| 529 | clearly won't match what the repeat does, such that we can be | 528 | /* Just like `on_failure_jump', except that it checks that we |
| 530 | sure that there is no use backtracking out of repetitions | 529 | don't get stuck in an infinite loop (matching an empty string |
| 531 | already matched, then we change it to a pop_failure_jump. | 530 | indefinitely). */ |
| 532 | Followed by two-byte address. */ | 531 | on_failure_jump_loop, |
| 533 | maybe_pop_jump, | 532 | |
| 534 | 533 | /* A smart `on_failure_jump' used for greedy * and + operators. | |
| 535 | /* Jump to following two-byte address, and push a dummy failure | 534 | It analyses the loop before which it is put and if the |
| 536 | point. This failure point will be thrown away if an attempt | 535 | loop does not require backtracking, it changes itself to |
| 537 | is made to use it for a failure. A `+' construct makes this | 536 | `on_failure_jump_exclusive', else it just defaults to |
| 538 | before the first repeat. Also used as an intermediary kind | 537 | changing itself into `on_failure_jump_loop'. */ |
| 539 | of jump when compiling an alternative. */ | 538 | on_failure_jump_smart, |
| 540 | dummy_failure_jump, | ||
| 541 | |||
| 542 | /* Push a dummy failure point and continue. Used at the end of | ||
| 543 | alternatives. */ | ||
| 544 | push_dummy_failure, | ||
| 545 | 539 | ||
| 546 | /* Followed by two-byte relative address and two-byte number n. | 540 | /* Followed by two-byte relative address and two-byte number n. |
| 547 | After matching N times, jump to the address upon failure. */ | 541 | After matching N times, jump to the address upon failure. */ |
| @@ -855,13 +849,11 @@ print_partial_compiled_pattern (start, end) | |||
| 855 | break; | 849 | break; |
| 856 | 850 | ||
| 857 | case start_memory: | 851 | case start_memory: |
| 858 | mcnt = *p++; | 852 | printf ("/start_memory/%d", *p++); |
| 859 | printf ("/start_memory/%d/%d", mcnt, *p++); | ||
| 860 | break; | 853 | break; |
| 861 | 854 | ||
| 862 | case stop_memory: | 855 | case stop_memory: |
| 863 | mcnt = *p++; | 856 | printf ("/stop_memory/%d", *p++); |
| 864 | printf ("/stop_memory/%d/%d", mcnt, *p++); | ||
| 865 | break; | 857 | break; |
| 866 | 858 | ||
| 867 | case duplicate: | 859 | case duplicate: |
| @@ -944,28 +936,19 @@ print_partial_compiled_pattern (start, end) | |||
| 944 | printf ("/on_failure_keep_string_jump to %d", p + mcnt - start); | 936 | printf ("/on_failure_keep_string_jump to %d", p + mcnt - start); |
| 945 | break; | 937 | break; |
| 946 | 938 | ||
| 947 | case dummy_failure_jump: | 939 | case on_failure_jump_exclusive: |
| 948 | extract_number_and_incr (&mcnt, &p); | ||
| 949 | printf ("/dummy_failure_jump to %d", p + mcnt - start); | ||
| 950 | break; | ||
| 951 | |||
| 952 | case push_dummy_failure: | ||
| 953 | printf ("/push_dummy_failure"); | ||
| 954 | break; | ||
| 955 | |||
| 956 | case maybe_pop_jump: | ||
| 957 | extract_number_and_incr (&mcnt, &p); | 940 | extract_number_and_incr (&mcnt, &p); |
| 958 | printf ("/maybe_pop_jump to %d", p + mcnt - start); | 941 | printf ("/on_failure_jump_exclusive to %d", p + mcnt - start); |
| 959 | break; | 942 | break; |
| 960 | 943 | ||
| 961 | case pop_failure_jump: | 944 | case on_failure_jump_loop: |
| 962 | extract_number_and_incr (&mcnt, &p); | 945 | extract_number_and_incr (&mcnt, &p); |
| 963 | printf ("/pop_failure_jump to %d", p + mcnt - start); | 946 | printf ("/on_failure_jump_loop to %d", p + mcnt - start); |
| 964 | break; | 947 | break; |
| 965 | 948 | ||
| 966 | case jump_past_alt: | 949 | case on_failure_jump_smart: |
| 967 | extract_number_and_incr (&mcnt, &p); | 950 | extract_number_and_incr (&mcnt, &p); |
| 968 | printf ("/jump_past_alt to %d", p + mcnt - start); | 951 | printf ("/on_failure_jump_smart to %d", p + mcnt - start); |
| 969 | break; | 952 | break; |
| 970 | 953 | ||
| 971 | case jump: | 954 | case jump: |
| @@ -1066,7 +1049,7 @@ print_compiled_pattern (bufp) | |||
| 1066 | unsigned char *buffer = bufp->buffer; | 1049 | unsigned char *buffer = bufp->buffer; |
| 1067 | 1050 | ||
| 1068 | print_partial_compiled_pattern (buffer, buffer + bufp->used); | 1051 | print_partial_compiled_pattern (buffer, buffer + bufp->used); |
| 1069 | printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated); | 1052 | printf ("%ld bytes used/%ld bytes allocated.\n", bufp->used, bufp->allocated); |
| 1070 | 1053 | ||
| 1071 | if (bufp->fastmap_accurate && bufp->fastmap) | 1054 | if (bufp->fastmap_accurate && bufp->fastmap) |
| 1072 | { | 1055 | { |
| @@ -1082,6 +1065,7 @@ print_compiled_pattern (bufp) | |||
| 1082 | printf ("not_bol: %d\t", bufp->not_bol); | 1065 | printf ("not_bol: %d\t", bufp->not_bol); |
| 1083 | printf ("not_eol: %d\t", bufp->not_eol); | 1066 | printf ("not_eol: %d\t", bufp->not_eol); |
| 1084 | printf ("syntax: %d\n", bufp->syntax); | 1067 | printf ("syntax: %d\n", bufp->syntax); |
| 1068 | fflush (stdout); | ||
| 1085 | /* Perhaps we should print the translate table? */ | 1069 | /* Perhaps we should print the translate table? */ |
| 1086 | } | 1070 | } |
| 1087 | 1071 | ||
| @@ -1246,7 +1230,7 @@ int re_max_failures = 4000; | |||
| 1246 | union fail_stack_elt | 1230 | union fail_stack_elt |
| 1247 | { | 1231 | { |
| 1248 | unsigned char *pointer; | 1232 | unsigned char *pointer; |
| 1249 | int integer; | 1233 | unsigned int integer; |
| 1250 | }; | 1234 | }; |
| 1251 | 1235 | ||
| 1252 | typedef union fail_stack_elt fail_stack_elt_t; | 1236 | typedef union fail_stack_elt fail_stack_elt_t; |
| @@ -1255,11 +1239,12 @@ typedef struct | |||
| 1255 | { | 1239 | { |
| 1256 | fail_stack_elt_t *stack; | 1240 | fail_stack_elt_t *stack; |
| 1257 | unsigned size; | 1241 | unsigned size; |
| 1258 | unsigned avail; /* Offset of next open position. */ | 1242 | unsigned avail; /* Offset of next open position. */ |
| 1243 | unsigned frame; /* Offset of the cur constructed frame. */ | ||
| 1259 | } fail_stack_type; | 1244 | } fail_stack_type; |
| 1260 | 1245 | ||
| 1261 | #define FAIL_STACK_EMPTY() (fail_stack.avail == 0) | 1246 | #define PATTERN_STACK_EMPTY() (fail_stack.avail == 0) |
| 1262 | #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0) | 1247 | #define FAIL_STACK_EMPTY() (fail_stack.frame == 0) |
| 1263 | #define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size) | 1248 | #define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size) |
| 1264 | 1249 | ||
| 1265 | 1250 | ||
| @@ -1278,6 +1263,7 @@ typedef struct | |||
| 1278 | \ | 1263 | \ |
| 1279 | fail_stack.size = INIT_FAILURE_ALLOC; \ | 1264 | fail_stack.size = INIT_FAILURE_ALLOC; \ |
| 1280 | fail_stack.avail = 0; \ | 1265 | fail_stack.avail = 0; \ |
| 1266 | fail_stack.frame = 0; \ | ||
| 1281 | } while (0) | 1267 | } while (0) |
| 1282 | 1268 | ||
| 1283 | #define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack) | 1269 | #define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack) |
| @@ -1285,6 +1271,7 @@ typedef struct | |||
| 1285 | #define INIT_FAIL_STACK() \ | 1271 | #define INIT_FAIL_STACK() \ |
| 1286 | do { \ | 1272 | do { \ |
| 1287 | fail_stack.avail = 0; \ | 1273 | fail_stack.avail = 0; \ |
| 1274 | fail_stack.frame = 0; \ | ||
| 1288 | } while (0) | 1275 | } while (0) |
| 1289 | 1276 | ||
| 1290 | #define RESET_FAIL_STACK() | 1277 | #define RESET_FAIL_STACK() |
| @@ -1337,6 +1324,7 @@ typedef struct | |||
| 1337 | ? 0 \ | 1324 | ? 0 \ |
| 1338 | : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \ | 1325 | : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \ |
| 1339 | 1)) | 1326 | 1)) |
| 1327 | #define POP_PATTERN_OP() POP_FAILURE_POINTER () | ||
| 1340 | 1328 | ||
| 1341 | /* Push a pointer value onto the failure stack. | 1329 | /* Push a pointer value onto the failure stack. |
| 1342 | Assumes the variable `fail_stack'. Probably should only | 1330 | Assumes the variable `fail_stack'. Probably should only |
| @@ -1362,110 +1350,105 @@ typedef struct | |||
| 1362 | #define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer | 1350 | #define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer |
| 1363 | #define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail] | 1351 | #define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail] |
| 1364 | 1352 | ||
| 1365 | /* Used to omit pushing failure point id's when we're not debugging. */ | 1353 | /* Individual items aside from the registers. */ |
| 1366 | #ifdef DEBUG | 1354 | #define NUM_NONREG_ITEMS 3 |
| 1367 | #define DEBUG_PUSH PUSH_FAILURE_INT | 1355 | |
| 1368 | #define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT () | 1356 | /* Used to examine the stack (to detect infinite loops). */ |
| 1369 | #else | 1357 | #define FAILURE_PAT(h) fail_stack.stack[(h) - 1].pointer |
| 1370 | #define DEBUG_PUSH(item) | 1358 | #define FAILURE_STR(h) ((char*)fail_stack.stack[(h) - 2].pointer) |
| 1371 | #define DEBUG_POP(item_addr) | 1359 | #define NEXT_FAILURE_HANDLE(h) fail_stack.stack[(h) - 3].integer |
| 1372 | #endif | 1360 | #define TOP_FAILURE_HANDLE() fail_stack.frame |
| 1373 | 1361 | ||
| 1374 | 1362 | ||
| 1363 | #define ENSURE_FAIL_STACK(space) \ | ||
| 1364 | while (REMAINING_AVAIL_SLOTS <= space) { \ | ||
| 1365 | if (!GROW_FAIL_STACK (fail_stack)) \ | ||
| 1366 | return -2; \ | ||
| 1367 | DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", (fail_stack).size);\ | ||
| 1368 | DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\ | ||
| 1369 | } | ||
| 1370 | |||
| 1371 | /* Push register NUM onto the stack. */ | ||
| 1372 | #define PUSH_FAILURE_REG(num) \ | ||
| 1373 | do { \ | ||
| 1374 | char *destination; \ | ||
| 1375 | ENSURE_FAIL_STACK(3); \ | ||
| 1376 | DEBUG_PRINT4 (" Push reg %d (spanning %p -> %p)\n", \ | ||
| 1377 | num, regstart[num], regend[num]); \ | ||
| 1378 | PUSH_FAILURE_POINTER (regstart[num]); \ | ||
| 1379 | PUSH_FAILURE_POINTER (regend[num]); \ | ||
| 1380 | PUSH_FAILURE_INT (num); \ | ||
| 1381 | } while (0) | ||
| 1382 | |||
| 1383 | /* Pop a saved register off the stack. */ | ||
| 1384 | #define POP_FAILURE_REG() \ | ||
| 1385 | do { \ | ||
| 1386 | int reg = POP_FAILURE_INT (); \ | ||
| 1387 | regend[reg] = POP_FAILURE_POINTER (); \ | ||
| 1388 | regstart[reg] = POP_FAILURE_POINTER (); \ | ||
| 1389 | DEBUG_PRINT4 (" Pop reg %d (spanning %p -> %p)\n", \ | ||
| 1390 | reg, regstart[reg], regend[reg]); \ | ||
| 1391 | } while (0) | ||
| 1392 | |||
| 1393 | /* Check that we are not stuck in an infinite loop. */ | ||
| 1394 | #define CHECK_INFINITE_LOOP(pat_cur, string_place) \ | ||
| 1395 | do { \ | ||
| 1396 | int failure = TOP_FAILURE_HANDLE(); \ | ||
| 1397 | /* Check for infinite matching loops */ \ | ||
| 1398 | while (failure > 0 && \ | ||
| 1399 | (FAILURE_STR (failure) == string_place \ | ||
| 1400 | || FAILURE_STR (failure) == NULL)) \ | ||
| 1401 | { \ | ||
| 1402 | assert (FAILURE_PAT (failure) >= bufp->buffer \ | ||
| 1403 | && FAILURE_PAT (failure) <= bufp->buffer + bufp->used);\ | ||
| 1404 | if (FAILURE_PAT (failure) == pat_cur) \ | ||
| 1405 | goto fail; \ | ||
| 1406 | DEBUG_PRINT2 (" Other pattern: %p\n", FAILURE_PAT (failure));\ | ||
| 1407 | failure = NEXT_FAILURE_HANDLE(failure); \ | ||
| 1408 | } \ | ||
| 1409 | DEBUG_PRINT2 (" Other string: %p\n", FAILURE_STR (failure)); \ | ||
| 1410 | } while (0) | ||
| 1411 | |||
| 1375 | /* Push the information about the state we will need | 1412 | /* Push the information about the state we will need |
| 1376 | if we ever fail back to it. | 1413 | if we ever fail back to it. |
| 1377 | 1414 | ||
| 1378 | Requires variables fail_stack, regstart, regend, reg_info, and | 1415 | Requires variables fail_stack, regstart, regend and |
| 1379 | num_regs be declared. GROW_FAIL_STACK requires `destination' be | 1416 | num_regs be declared. GROW_FAIL_STACK requires `destination' be |
| 1380 | declared. | 1417 | declared. |
| 1381 | 1418 | ||
| 1382 | Does `return FAILURE_CODE' if runs out of memory. */ | 1419 | Does `return FAILURE_CODE' if runs out of memory. */ |
| 1383 | 1420 | ||
| 1384 | #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \ | 1421 | #define PUSH_FAILURE_POINT(pattern, string_place) \ |
| 1385 | do { \ | 1422 | do { \ |
| 1386 | char *destination; \ | 1423 | char *destination; \ |
| 1387 | /* Must be int, so when we don't save any registers, the arithmetic \ | 1424 | /* Must be int, so when we don't save any registers, the arithmetic \ |
| 1388 | of 0 + -1 isn't done as unsigned. */ \ | 1425 | of 0 + -1 isn't done as unsigned. */ \ |
| 1389 | int this_reg; \ | 1426 | \ |
| 1390 | \ | 1427 | DEBUG_STATEMENT (failure_id++); \ |
| 1391 | DEBUG_STATEMENT (failure_id++); \ | 1428 | DEBUG_STATEMENT (nfailure_points_pushed++); \ |
| 1392 | DEBUG_STATEMENT (nfailure_points_pushed++); \ | 1429 | DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \ |
| 1393 | DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \ | 1430 | DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail); \ |
| 1394 | DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\ | 1431 | DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\ |
| 1395 | DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\ | 1432 | \ |
| 1396 | \ | 1433 | ENSURE_FAIL_STACK (NUM_NONREG_ITEMS); \ |
| 1397 | DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \ | 1434 | \ |
| 1398 | DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \ | 1435 | DEBUG_PRINT1 ("\n"); \ |
| 1399 | \ | 1436 | \ |
| 1400 | /* Ensure we have enough space allocated for what we will push. */ \ | 1437 | DEBUG_PRINT2 (" Push frame index: %d\n", fail_stack.frame); \ |
| 1401 | while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \ | 1438 | PUSH_FAILURE_INT (fail_stack.frame); \ |
| 1402 | { \ | 1439 | \ |
| 1403 | if (!GROW_FAIL_STACK (fail_stack)) \ | 1440 | DEBUG_PRINT2 (" Push string %p: `", string_place); \ |
| 1404 | return failure_code; \ | 1441 | DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, size2);\ |
| 1405 | \ | 1442 | DEBUG_PRINT1 ("'\n"); \ |
| 1406 | DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \ | 1443 | PUSH_FAILURE_POINTER (string_place); \ |
| 1407 | (fail_stack).size); \ | 1444 | \ |
| 1408 | DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\ | 1445 | DEBUG_PRINT2 (" Push pattern %p: ", pattern); \ |
| 1409 | } \ | 1446 | DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern, pend); \ |
| 1410 | \ | 1447 | PUSH_FAILURE_POINTER (pattern); \ |
| 1411 | /* Push the info, starting with the registers. */ \ | 1448 | \ |
| 1412 | DEBUG_PRINT1 ("\n"); \ | 1449 | /* Close the frame by moving the frame pointer past it. */ \ |
| 1413 | \ | 1450 | fail_stack.frame = fail_stack.avail; \ |
| 1414 | if (1) \ | 1451 | } while (0) |
| 1415 | for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \ | ||
| 1416 | this_reg++) \ | ||
| 1417 | { \ | ||
| 1418 | DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \ | ||
| 1419 | DEBUG_STATEMENT (num_regs_pushed++); \ | ||
| 1420 | \ | ||
| 1421 | DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \ | ||
| 1422 | PUSH_FAILURE_POINTER (regstart[this_reg]); \ | ||
| 1423 | \ | ||
| 1424 | DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \ | ||
| 1425 | PUSH_FAILURE_POINTER (regend[this_reg]); \ | ||
| 1426 | \ | ||
| 1427 | DEBUG_PRINT2 (" info: 0x%x\n ", reg_info[this_reg]); \ | ||
| 1428 | DEBUG_PRINT2 (" match_null=%d", \ | ||
| 1429 | REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \ | ||
| 1430 | DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \ | ||
| 1431 | DEBUG_PRINT2 (" matched_something=%d", \ | ||
| 1432 | MATCHED_SOMETHING (reg_info[this_reg])); \ | ||
| 1433 | DEBUG_PRINT2 (" ever_matched=%d", \ | ||
| 1434 | EVER_MATCHED_SOMETHING (reg_info[this_reg])); \ | ||
| 1435 | DEBUG_PRINT1 ("\n"); \ | ||
| 1436 | PUSH_FAILURE_ELT (reg_info[this_reg].word); \ | ||
| 1437 | } \ | ||
| 1438 | \ | ||
| 1439 | DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg);\ | ||
| 1440 | PUSH_FAILURE_INT (lowest_active_reg); \ | ||
| 1441 | \ | ||
| 1442 | DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg);\ | ||
| 1443 | PUSH_FAILURE_INT (highest_active_reg); \ | ||
| 1444 | \ | ||
| 1445 | DEBUG_PRINT2 (" Pushing pattern 0x%x: ", pattern_place); \ | ||
| 1446 | DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \ | ||
| 1447 | PUSH_FAILURE_POINTER (pattern_place); \ | ||
| 1448 | \ | ||
| 1449 | DEBUG_PRINT2 (" Pushing string 0x%x: `", string_place); \ | ||
| 1450 | DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \ | ||
| 1451 | size2); \ | ||
| 1452 | DEBUG_PRINT1 ("'\n"); \ | ||
| 1453 | PUSH_FAILURE_POINTER (string_place); \ | ||
| 1454 | \ | ||
| 1455 | DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \ | ||
| 1456 | DEBUG_PUSH (failure_id); \ | ||
| 1457 | } while (0) | ||
| 1458 | |||
| 1459 | /* This is the number of items that are pushed and popped on the stack | ||
| 1460 | for each register. */ | ||
| 1461 | #define NUM_REG_ITEMS 3 | ||
| 1462 | |||
| 1463 | /* Individual items aside from the registers. */ | ||
| 1464 | #ifdef DEBUG | ||
| 1465 | #define NUM_NONREG_ITEMS 5 /* Includes failure point id. */ | ||
| 1466 | #else | ||
| 1467 | #define NUM_NONREG_ITEMS 4 | ||
| 1468 | #endif | ||
| 1469 | 1452 | ||
| 1470 | /* Estimate the size of data pushed by a typical failure stack entry. | 1453 | /* Estimate the size of data pushed by a typical failure stack entry. |
| 1471 | An estimate is all we need, because all we use this for | 1454 | An estimate is all we need, because all we use this for |
| @@ -1473,14 +1456,6 @@ typedef struct | |||
| 1473 | 1456 | ||
| 1474 | #define TYPICAL_FAILURE_SIZE 20 | 1457 | #define TYPICAL_FAILURE_SIZE 20 |
| 1475 | 1458 | ||
| 1476 | /* This is how many items we actually use for a failure point. | ||
| 1477 | It depends on the regexp. */ | ||
| 1478 | #define NUM_FAILURE_ITEMS \ | ||
| 1479 | (((0 \ | ||
| 1480 | ? 0 : highest_active_reg - lowest_active_reg + 1) \ | ||
| 1481 | * NUM_REG_ITEMS) \ | ||
| 1482 | + NUM_NONREG_ITEMS) | ||
| 1483 | |||
| 1484 | /* How many items can still be added to the stack without overflowing it. */ | 1459 | /* How many items can still be added to the stack without overflowing it. */ |
| 1485 | #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail) | 1460 | #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail) |
| 1486 | 1461 | ||
| @@ -1490,19 +1465,13 @@ typedef struct | |||
| 1490 | We restore into the parameters, all of which should be lvalues: | 1465 | We restore into the parameters, all of which should be lvalues: |
| 1491 | STR -- the saved data position. | 1466 | STR -- the saved data position. |
| 1492 | PAT -- the saved pattern position. | 1467 | PAT -- the saved pattern position. |
| 1493 | LOW_REG, HIGH_REG -- the highest and lowest active registers. | ||
| 1494 | REGSTART, REGEND -- arrays of string positions. | 1468 | REGSTART, REGEND -- arrays of string positions. |
| 1495 | REG_INFO -- array of information about each subexpression. | ||
| 1496 | 1469 | ||
| 1497 | Also assumes the variables `fail_stack' and (if debugging), `bufp', | 1470 | Also assumes the variables `fail_stack' and (if debugging), `bufp', |
| 1498 | `pend', `string1', `size1', `string2', and `size2'. */ | 1471 | `pend', `string1', `size1', `string2', and `size2'. */ |
| 1499 | 1472 | ||
| 1500 | #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\ | 1473 | #define POP_FAILURE_POINT(str, pat) \ |
| 1501 | { \ | 1474 | do { \ |
| 1502 | DEBUG_STATEMENT (fail_stack_elt_t failure_id;) \ | ||
| 1503 | int this_reg; \ | ||
| 1504 | const unsigned char *string_temp; \ | ||
| 1505 | \ | ||
| 1506 | assert (!FAIL_STACK_EMPTY ()); \ | 1475 | assert (!FAIL_STACK_EMPTY ()); \ |
| 1507 | \ | 1476 | \ |
| 1508 | /* Remove failure points and point to how many regs pushed. */ \ | 1477 | /* Remove failure points and point to how many regs pushed. */ \ |
| @@ -1510,119 +1479,35 @@ typedef struct | |||
| 1510 | DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \ | 1479 | DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \ |
| 1511 | DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \ | 1480 | DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \ |
| 1512 | \ | 1481 | \ |
| 1513 | assert (fail_stack.avail >= NUM_NONREG_ITEMS); \ | 1482 | /* Pop the saved registers. */ \ |
| 1483 | while (fail_stack.frame < fail_stack.avail) \ | ||
| 1484 | POP_FAILURE_REG (); \ | ||
| 1514 | \ | 1485 | \ |
| 1515 | DEBUG_POP (&failure_id.integer); \ | 1486 | pat = (unsigned char *) POP_FAILURE_POINTER (); \ |
| 1516 | DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id.integer); \ | 1487 | DEBUG_PRINT2 (" Popping pattern %p: ", pat); \ |
| 1488 | DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ | ||
| 1517 | \ | 1489 | \ |
| 1518 | /* If the saved string location is NULL, it came from an \ | 1490 | /* If the saved string location is NULL, it came from an \ |
| 1519 | on_failure_keep_string_jump opcode, and we want to throw away the \ | 1491 | on_failure_keep_string_jump opcode, and we want to throw away the \ |
| 1520 | saved NULL, thus retaining our current position in the string. */ \ | 1492 | saved NULL, thus retaining our current position in the string. */ \ |
| 1521 | string_temp = POP_FAILURE_POINTER (); \ | 1493 | str = (char *) POP_FAILURE_POINTER (); \ |
| 1522 | if (string_temp != NULL) \ | 1494 | DEBUG_PRINT2 (" Popping string %p: `", str); \ |
| 1523 | str = (const char *) string_temp; \ | ||
| 1524 | \ | ||
| 1525 | DEBUG_PRINT2 (" Popping string 0x%x: `", str); \ | ||
| 1526 | DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \ | 1495 | DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \ |
| 1527 | DEBUG_PRINT1 ("'\n"); \ | 1496 | DEBUG_PRINT1 ("'\n"); \ |
| 1528 | \ | 1497 | \ |
| 1529 | pat = (unsigned char *) POP_FAILURE_POINTER (); \ | 1498 | fail_stack.frame = POP_FAILURE_INT (); \ |
| 1530 | DEBUG_PRINT2 (" Popping pattern 0x%x: ", pat); \ | 1499 | DEBUG_PRINT2 (" Popping frame index: %d\n", fail_stack.frame); \ |
| 1531 | DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ | ||
| 1532 | \ | ||
| 1533 | /* Restore register info. */ \ | ||
| 1534 | high_reg = (unsigned) POP_FAILURE_INT (); \ | ||
| 1535 | DEBUG_PRINT2 (" Popping high active reg: %d\n", high_reg); \ | ||
| 1536 | \ | ||
| 1537 | low_reg = (unsigned) POP_FAILURE_INT (); \ | ||
| 1538 | DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \ | ||
| 1539 | \ | ||
| 1540 | if (1) \ | ||
| 1541 | for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \ | ||
| 1542 | { \ | ||
| 1543 | DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \ | ||
| 1544 | \ | ||
| 1545 | reg_info[this_reg].word = POP_FAILURE_ELT (); \ | ||
| 1546 | DEBUG_PRINT2 (" info: 0x%x\n", reg_info[this_reg]); \ | ||
| 1547 | \ | ||
| 1548 | regend[this_reg] = (const char *) POP_FAILURE_POINTER (); \ | ||
| 1549 | DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \ | ||
| 1550 | \ | 1500 | \ |
| 1551 | regstart[this_reg] = (const char *) POP_FAILURE_POINTER (); \ | 1501 | assert (fail_stack.avail >= 0); \ |
| 1552 | DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \ | 1502 | assert (fail_stack.frame <= fail_stack.avail); \ |
| 1553 | } \ | ||
| 1554 | else \ | ||
| 1555 | { \ | ||
| 1556 | for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \ | ||
| 1557 | { \ | ||
| 1558 | reg_info[this_reg].word.integer = 0; \ | ||
| 1559 | regend[this_reg] = 0; \ | ||
| 1560 | regstart[this_reg] = 0; \ | ||
| 1561 | } \ | ||
| 1562 | highest_active_reg = high_reg; \ | ||
| 1563 | } \ | ||
| 1564 | \ | 1503 | \ |
| 1565 | set_regs_matched_done = 0; \ | ||
| 1566 | DEBUG_STATEMENT (nfailure_points_popped++); \ | 1504 | DEBUG_STATEMENT (nfailure_points_popped++); \ |
| 1567 | } /* POP_FAILURE_POINT */ | 1505 | } while (0) /* POP_FAILURE_POINT */ |
| 1568 | 1506 | ||
| 1569 | 1507 | ||
| 1570 | 1508 | ||
| 1571 | /* Structure for per-register (a.k.a. per-group) information. | ||
| 1572 | Other register information, such as the | ||
| 1573 | starting and ending positions (which are addresses), and the list of | ||
| 1574 | inner groups (which is a bits list) are maintained in separate | ||
| 1575 | variables. | ||
| 1576 | |||
| 1577 | We are making a (strictly speaking) nonportable assumption here: that | ||
| 1578 | the compiler will pack our bit fields into something that fits into | ||
| 1579 | the type of `word', i.e., is something that fits into one item on the | ||
| 1580 | failure stack. */ | ||
| 1581 | |||
| 1582 | typedef union | ||
| 1583 | { | ||
| 1584 | fail_stack_elt_t word; | ||
| 1585 | struct | ||
| 1586 | { | ||
| 1587 | /* This field is one if this group can match the empty string, | ||
| 1588 | zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */ | ||
| 1589 | #define MATCH_NULL_UNSET_VALUE 3 | ||
| 1590 | unsigned match_null_string_p : 2; | ||
| 1591 | unsigned is_active : 1; | ||
| 1592 | unsigned matched_something : 1; | ||
| 1593 | unsigned ever_matched_something : 1; | ||
| 1594 | } bits; | ||
| 1595 | } register_info_type; | ||
| 1596 | |||
| 1597 | #define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p) | ||
| 1598 | #define IS_ACTIVE(R) ((R).bits.is_active) | ||
| 1599 | #define MATCHED_SOMETHING(R) ((R).bits.matched_something) | ||
| 1600 | #define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something) | ||
| 1601 | |||
| 1602 | |||
| 1603 | /* Call this when have matched a real character; it sets `matched' flags | ||
| 1604 | for the subexpressions which we are currently inside. Also records | ||
| 1605 | that those subexprs have matched. */ | ||
| 1606 | #define SET_REGS_MATCHED() \ | ||
| 1607 | do \ | ||
| 1608 | { \ | ||
| 1609 | if (!set_regs_matched_done) \ | ||
| 1610 | { \ | ||
| 1611 | unsigned r; \ | ||
| 1612 | set_regs_matched_done = 1; \ | ||
| 1613 | for (r = lowest_active_reg; r <= highest_active_reg; r++) \ | ||
| 1614 | { \ | ||
| 1615 | MATCHED_SOMETHING (reg_info[r]) \ | ||
| 1616 | = EVER_MATCHED_SOMETHING (reg_info[r]) \ | ||
| 1617 | = 1; \ | ||
| 1618 | } \ | ||
| 1619 | } \ | ||
| 1620 | } \ | ||
| 1621 | while (0) | ||
| 1622 | |||
| 1623 | /* Registers are set to a sentinel when they haven't yet matched. */ | 1509 | /* Registers are set to a sentinel when they haven't yet matched. */ |
| 1624 | static char reg_unset_dummy; | 1510 | #define REG_UNSET_VALUE NULL |
| 1625 | #define REG_UNSET_VALUE (®_unset_dummy) | ||
| 1626 | #define REG_UNSET(e) ((e) == REG_UNSET_VALUE) | 1511 | #define REG_UNSET(e) ((e) == REG_UNSET_VALUE) |
| 1627 | 1512 | ||
| 1628 | /* Subroutine declarations and macros for regex_compile. */ | 1513 | /* Subroutine declarations and macros for regex_compile. */ |
| @@ -1631,7 +1516,6 @@ static void store_op1 (), store_op2 (); | |||
| 1631 | static void insert_op1 (), insert_op2 (); | 1516 | static void insert_op1 (), insert_op2 (); |
| 1632 | static boolean at_begline_loc_p (), at_endline_loc_p (); | 1517 | static boolean at_begline_loc_p (), at_endline_loc_p (); |
| 1633 | static boolean group_in_compile_stack (); | 1518 | static boolean group_in_compile_stack (); |
| 1634 | static reg_errcode_t compile_range (); | ||
| 1635 | 1519 | ||
| 1636 | /* Fetch the next character in the uncompiled pattern---translating it | 1520 | /* Fetch the next character in the uncompiled pattern---translating it |
| 1637 | if necessary. Also cast from a signed character in the constant | 1521 | if necessary. Also cast from a signed character in the constant |
| @@ -1778,7 +1662,6 @@ typedef struct | |||
| 1778 | { | 1662 | { |
| 1779 | pattern_offset_t begalt_offset; | 1663 | pattern_offset_t begalt_offset; |
| 1780 | pattern_offset_t fixup_alt_jump; | 1664 | pattern_offset_t fixup_alt_jump; |
| 1781 | pattern_offset_t inner_group_offset; | ||
| 1782 | pattern_offset_t laststart_offset; | 1665 | pattern_offset_t laststart_offset; |
| 1783 | regnum_t regnum; | 1666 | regnum_t regnum; |
| 1784 | } compile_stack_elt_t; | 1667 | } compile_stack_elt_t; |
| @@ -1920,11 +1803,7 @@ static fail_stack_type fail_stack; | |||
| 1920 | static int regs_allocated_size; | 1803 | static int regs_allocated_size; |
| 1921 | 1804 | ||
| 1922 | static const char ** regstart, ** regend; | 1805 | static const char ** regstart, ** regend; |
| 1923 | static const char ** old_regstart, ** old_regend; | ||
| 1924 | static const char **best_regstart, **best_regend; | 1806 | static const char **best_regstart, **best_regend; |
| 1925 | static register_info_type *reg_info; | ||
| 1926 | static const char **reg_dummy; | ||
| 1927 | static register_info_type *reg_info_dummy; | ||
| 1928 | 1807 | ||
| 1929 | /* Make the register vectors big enough for NUM_REGS registers, | 1808 | /* Make the register vectors big enough for NUM_REGS registers, |
| 1930 | but don't make them smaller. */ | 1809 | but don't make them smaller. */ |
| @@ -1937,13 +1816,8 @@ regex_grow_registers (num_regs) | |||
| 1937 | { | 1816 | { |
| 1938 | RETALLOC_IF (regstart, num_regs, const char *); | 1817 | RETALLOC_IF (regstart, num_regs, const char *); |
| 1939 | RETALLOC_IF (regend, num_regs, const char *); | 1818 | RETALLOC_IF (regend, num_regs, const char *); |
| 1940 | RETALLOC_IF (old_regstart, num_regs, const char *); | ||
| 1941 | RETALLOC_IF (old_regend, num_regs, const char *); | ||
| 1942 | RETALLOC_IF (best_regstart, num_regs, const char *); | 1819 | RETALLOC_IF (best_regstart, num_regs, const char *); |
| 1943 | RETALLOC_IF (best_regend, num_regs, const char *); | 1820 | RETALLOC_IF (best_regend, num_regs, const char *); |
| 1944 | RETALLOC_IF (reg_info, num_regs, register_info_type); | ||
| 1945 | RETALLOC_IF (reg_dummy, num_regs, const char *); | ||
| 1946 | RETALLOC_IF (reg_info_dummy, num_regs, register_info_type); | ||
| 1947 | 1821 | ||
| 1948 | regs_allocated_size = num_regs; | 1822 | regs_allocated_size = num_regs; |
| 1949 | } | 1823 | } |
| @@ -1969,6 +1843,15 @@ regex_grow_registers (num_regs) | |||
| 1969 | The `fastmap' and `newline_anchor' fields are neither | 1843 | The `fastmap' and `newline_anchor' fields are neither |
| 1970 | examined nor set. */ | 1844 | examined nor set. */ |
| 1971 | 1845 | ||
| 1846 | /* Insert the `jump' from the end of last alternative to "here". | ||
| 1847 | The space for the jump has already been allocated. */ | ||
| 1848 | #define FIXUP_ALT_JUMP() \ | ||
| 1849 | do { \ | ||
| 1850 | if (fixup_alt_jump) \ | ||
| 1851 | STORE_JUMP (jump, fixup_alt_jump, b); \ | ||
| 1852 | } while (0) | ||
| 1853 | |||
| 1854 | |||
| 1972 | /* Return, freeing storage we allocated. */ | 1855 | /* Return, freeing storage we allocated. */ |
| 1973 | #define FREE_STACK_RETURN(value) \ | 1856 | #define FREE_STACK_RETURN(value) \ |
| 1974 | do { \ | 1857 | do { \ |
| @@ -2042,6 +1925,7 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2042 | struct range_table_work_area range_table_work; | 1925 | struct range_table_work_area range_table_work; |
| 2043 | 1926 | ||
| 2044 | #ifdef DEBUG | 1927 | #ifdef DEBUG |
| 1928 | /* debug = 1; */ | ||
| 2045 | DEBUG_PRINT1 ("\nCompiling pattern: "); | 1929 | DEBUG_PRINT1 ("\nCompiling pattern: "); |
| 2046 | if (debug) | 1930 | if (debug) |
| 2047 | { | 1931 | { |
| @@ -2258,9 +2142,8 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2258 | keep_string_p = true; | 2142 | keep_string_p = true; |
| 2259 | } | 2143 | } |
| 2260 | else | 2144 | else |
| 2261 | /* Anything else. */ | 2145 | STORE_JUMP (jump, b, laststart - 3); |
| 2262 | STORE_JUMP (maybe_pop_jump, b, laststart - 3); | 2146 | |
| 2263 | |||
| 2264 | /* We've added more stuff to the buffer. */ | 2147 | /* We've added more stuff to the buffer. */ |
| 2265 | b += 3; | 2148 | b += 3; |
| 2266 | } | 2149 | } |
| @@ -2268,31 +2151,29 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2268 | /* On failure, jump from laststart to b + 3, which will be the | 2151 | /* On failure, jump from laststart to b + 3, which will be the |
| 2269 | end of the buffer after this jump is inserted. */ | 2152 | end of the buffer after this jump is inserted. */ |
| 2270 | GET_BUFFER_SPACE (3); | 2153 | GET_BUFFER_SPACE (3); |
| 2271 | INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump | ||
| 2272 | : on_failure_jump, | ||
| 2273 | laststart, b + 3); | ||
| 2274 | pending_exact = 0; | ||
| 2275 | b += 3; | ||
| 2276 | |||
| 2277 | if (!zero_times_ok) | 2154 | if (!zero_times_ok) |
| 2278 | { | 2155 | { |
| 2279 | /* At least one repetition is required, so insert a | 2156 | assert (many_times_ok); |
| 2280 | `dummy_failure_jump' before the initial | 2157 | INSERT_JUMP (on_failure_jump_smart, b - 3, b + 3); |
| 2281 | `on_failure_jump' instruction of the loop. This | 2158 | pending_exact = 0; |
| 2282 | effects a skip over that instruction the first time | 2159 | b += 3; |
| 2283 | we hit that loop. */ | 2160 | } |
| 2284 | GET_BUFFER_SPACE (3); | 2161 | else |
| 2285 | INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6); | 2162 | { |
| 2163 | INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump | ||
| 2164 | : !many_times_ok ? | ||
| 2165 | on_failure_jump : on_failure_jump_smart, | ||
| 2166 | laststart, b + 3); | ||
| 2167 | pending_exact = 0; | ||
| 2286 | b += 3; | 2168 | b += 3; |
| 2287 | } | 2169 | } |
| 2288 | |||
| 2289 | } | 2170 | } |
| 2290 | else /* not greedy */ | 2171 | else /* not greedy */ |
| 2291 | { /* I wish the greedy and non-greedy cases could be merged. */ | 2172 | { /* I wish the greedy and non-greedy cases could be merged. */ |
| 2292 | 2173 | ||
| 2293 | if (many_times_ok) | 2174 | if (many_times_ok) |
| 2294 | { | 2175 | { |
| 2295 | /* The greedy multiple match looks like a repeat..until: | 2176 | /* The non-greedy multiple match looks like a repeat..until: |
| 2296 | we only need a conditional jump at the end of the loop */ | 2177 | we only need a conditional jump at the end of the loop */ |
| 2297 | GET_BUFFER_SPACE (3); | 2178 | GET_BUFFER_SPACE (3); |
| 2298 | STORE_JUMP (on_failure_jump, b, laststart); | 2179 | STORE_JUMP (on_failure_jump, b, laststart); |
| @@ -2556,7 +2437,9 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2556 | Split that into two ranges,, | 2437 | Split that into two ranges,, |
| 2557 | the low one ending at 0237, and the high one | 2438 | the low one ending at 0237, and the high one |
| 2558 | starting at ...040. */ | 2439 | starting at ...040. */ |
| 2559 | int c1_base = (c1 & ~0177) | 040; | 2440 | /* Unless I'm missing something, |
| 2441 | this line is useless. -sm | ||
| 2442 | int c1_base = (c1 & ~0177) | 040; */ | ||
| 2560 | SET_RANGE_TABLE_WORK_AREA (range_table_work, c, c1); | 2443 | SET_RANGE_TABLE_WORK_AREA (range_table_work, c, c1); |
| 2561 | c1 = 0237; | 2444 | c1 = 0237; |
| 2562 | } | 2445 | } |
| @@ -2678,8 +2561,31 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2678 | goto normal_backslash; | 2561 | goto normal_backslash; |
| 2679 | 2562 | ||
| 2680 | handle_open: | 2563 | handle_open: |
| 2681 | bufp->re_nsub++; | 2564 | { |
| 2682 | regnum++; | 2565 | int shy = 0; |
| 2566 | if (p+1 < pend) | ||
| 2567 | { | ||
| 2568 | /* Look for a special (?...) construct */ | ||
| 2569 | PATFETCH (c); | ||
| 2570 | if ((syntax & RE_SHY_GROUPS) && c == '?') | ||
| 2571 | { | ||
| 2572 | PATFETCH (c); | ||
| 2573 | switch (c) | ||
| 2574 | { | ||
| 2575 | case ':': shy = 1; break; | ||
| 2576 | default: | ||
| 2577 | /* Only (?:...) is supported right now. */ | ||
| 2578 | FREE_STACK_RETURN (REG_BADPAT); | ||
| 2579 | } | ||
| 2580 | } | ||
| 2581 | else PATUNFETCH; | ||
| 2582 | } | ||
| 2583 | |||
| 2584 | if (!shy) | ||
| 2585 | { | ||
| 2586 | bufp->re_nsub++; | ||
| 2587 | regnum++; | ||
| 2588 | } | ||
| 2683 | 2589 | ||
| 2684 | if (COMPILE_STACK_FULL) | 2590 | if (COMPILE_STACK_FULL) |
| 2685 | { | 2591 | { |
| @@ -2698,17 +2604,13 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2698 | COMPILE_STACK_TOP.fixup_alt_jump | 2604 | COMPILE_STACK_TOP.fixup_alt_jump |
| 2699 | = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; | 2605 | = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; |
| 2700 | COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer; | 2606 | COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer; |
| 2701 | COMPILE_STACK_TOP.regnum = regnum; | 2607 | COMPILE_STACK_TOP.regnum = shy ? -regnum : regnum; |
| 2702 | 2608 | ||
| 2703 | /* We will eventually replace the 0 with the number of | 2609 | /* Do not push a |
| 2704 | groups inner to this one. But do not push a | ||
| 2705 | start_memory for groups beyond the last one we can | 2610 | start_memory for groups beyond the last one we can |
| 2706 | represent in the compiled pattern. */ | 2611 | represent in the compiled pattern. */ |
| 2707 | if (regnum <= MAX_REGNUM) | 2612 | if (regnum <= MAX_REGNUM && !shy) |
| 2708 | { | 2613 | BUF_PUSH_2 (start_memory, regnum); |
| 2709 | COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2; | ||
| 2710 | BUF_PUSH_3 (start_memory, regnum, 0); | ||
| 2711 | } | ||
| 2712 | 2614 | ||
| 2713 | compile_stack.avail++; | 2615 | compile_stack.avail++; |
| 2714 | 2616 | ||
| @@ -2720,36 +2622,30 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2720 | clear pending_exact explicitly. */ | 2622 | clear pending_exact explicitly. */ |
| 2721 | pending_exact = 0; | 2623 | pending_exact = 0; |
| 2722 | break; | 2624 | break; |
| 2723 | 2625 | } | |
| 2724 | 2626 | ||
| 2725 | case ')': | 2627 | case ')': |
| 2726 | if (syntax & RE_NO_BK_PARENS) goto normal_backslash; | 2628 | if (syntax & RE_NO_BK_PARENS) goto normal_backslash; |
| 2727 | 2629 | ||
| 2728 | if (COMPILE_STACK_EMPTY) | 2630 | if (COMPILE_STACK_EMPTY) |
| 2729 | if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) | 2631 | { |
| 2730 | goto normal_backslash; | 2632 | if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) |
| 2731 | else | 2633 | goto normal_backslash; |
| 2732 | FREE_STACK_RETURN (REG_ERPAREN); | 2634 | else |
| 2635 | FREE_STACK_RETURN (REG_ERPAREN); | ||
| 2636 | } | ||
| 2733 | 2637 | ||
| 2734 | handle_close: | 2638 | handle_close: |
| 2735 | if (fixup_alt_jump) | 2639 | FIXUP_ALT_JUMP (); |
| 2736 | { /* Push a dummy failure point at the end of the | ||
| 2737 | alternative for a possible future | ||
| 2738 | `pop_failure_jump' to pop. See comments at | ||
| 2739 | `push_dummy_failure' in `re_match_2'. */ | ||
| 2740 | BUF_PUSH (push_dummy_failure); | ||
| 2741 | |||
| 2742 | /* We allocated space for this jump when we assigned | ||
| 2743 | to `fixup_alt_jump', in the `handle_alt' case below. */ | ||
| 2744 | STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1); | ||
| 2745 | } | ||
| 2746 | 2640 | ||
| 2747 | /* See similar code for backslashed left paren above. */ | 2641 | /* See similar code for backslashed left paren above. */ |
| 2748 | if (COMPILE_STACK_EMPTY) | 2642 | if (COMPILE_STACK_EMPTY) |
| 2749 | if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) | 2643 | { |
| 2750 | goto normal_char; | 2644 | if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) |
| 2751 | else | 2645 | goto normal_char; |
| 2752 | FREE_STACK_RETURN (REG_ERPAREN); | 2646 | else |
| 2647 | FREE_STACK_RETURN (REG_ERPAREN); | ||
| 2648 | } | ||
| 2753 | 2649 | ||
| 2754 | /* Since we just checked for an empty stack above, this | 2650 | /* Since we just checked for an empty stack above, this |
| 2755 | ``can't happen''. */ | 2651 | ``can't happen''. */ |
| @@ -2775,15 +2671,8 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2775 | 2671 | ||
| 2776 | /* We're at the end of the group, so now we know how many | 2672 | /* We're at the end of the group, so now we know how many |
| 2777 | groups were inside this one. */ | 2673 | groups were inside this one. */ |
| 2778 | if (this_group_regnum <= MAX_REGNUM) | 2674 | if (this_group_regnum <= MAX_REGNUM && this_group_regnum > 0) |
| 2779 | { | 2675 | BUF_PUSH_2 (stop_memory, this_group_regnum); |
| 2780 | unsigned char *inner_group_loc | ||
| 2781 | = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset; | ||
| 2782 | |||
| 2783 | *inner_group_loc = regnum - this_group_regnum; | ||
| 2784 | BUF_PUSH_3 (stop_memory, this_group_regnum, | ||
| 2785 | regnum - this_group_regnum); | ||
| 2786 | } | ||
| 2787 | } | 2676 | } |
| 2788 | break; | 2677 | break; |
| 2789 | 2678 | ||
| @@ -2818,8 +2707,7 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2818 | fixup_alt_jump to right after `b', and leave behind three | 2707 | fixup_alt_jump to right after `b', and leave behind three |
| 2819 | bytes which we'll fill in when we get to after `c'. */ | 2708 | bytes which we'll fill in when we get to after `c'. */ |
| 2820 | 2709 | ||
| 2821 | if (fixup_alt_jump) | 2710 | FIXUP_ALT_JUMP (); |
| 2822 | STORE_JUMP (jump_past_alt, fixup_alt_jump, b); | ||
| 2823 | 2711 | ||
| 2824 | /* Mark and leave space for a jump after this alternative, | 2712 | /* Mark and leave space for a jump after this alternative, |
| 2825 | to be filled in later either by next alternative or | 2713 | to be filled in later either by next alternative or |
| @@ -3173,8 +3061,7 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 3173 | 3061 | ||
| 3174 | /* Through the pattern now. */ | 3062 | /* Through the pattern now. */ |
| 3175 | 3063 | ||
| 3176 | if (fixup_alt_jump) | 3064 | FIXUP_ALT_JUMP (); |
| 3177 | STORE_JUMP (jump_past_alt, fixup_alt_jump, b); | ||
| 3178 | 3065 | ||
| 3179 | if (!COMPILE_STACK_EMPTY) | 3066 | if (!COMPILE_STACK_EMPTY) |
| 3180 | FREE_STACK_RETURN (REG_EPAREN); | 3067 | FREE_STACK_RETURN (REG_EPAREN); |
| @@ -3192,8 +3079,10 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 3192 | #ifdef DEBUG | 3079 | #ifdef DEBUG |
| 3193 | if (debug) | 3080 | if (debug) |
| 3194 | { | 3081 | { |
| 3082 | re_compile_fastmap (bufp); | ||
| 3195 | DEBUG_PRINT1 ("\nCompiled pattern: \n"); | 3083 | DEBUG_PRINT1 ("\nCompiled pattern: \n"); |
| 3196 | print_compiled_pattern (bufp); | 3084 | print_compiled_pattern (bufp); |
| 3085 | /* debug = 0; */ | ||
| 3197 | } | 3086 | } |
| 3198 | #endif /* DEBUG */ | 3087 | #endif /* DEBUG */ |
| 3199 | 3088 | ||
| @@ -3208,17 +3097,6 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 3208 | { | 3097 | { |
| 3209 | fail_stack.size = re_max_failures * TYPICAL_FAILURE_SIZE; | 3098 | fail_stack.size = re_max_failures * TYPICAL_FAILURE_SIZE; |
| 3210 | 3099 | ||
| 3211 | #ifdef emacs | ||
| 3212 | if (! fail_stack.stack) | ||
| 3213 | fail_stack.stack | ||
| 3214 | = (fail_stack_elt_t *) xmalloc (fail_stack.size | ||
| 3215 | * sizeof (fail_stack_elt_t)); | ||
| 3216 | else | ||
| 3217 | fail_stack.stack | ||
| 3218 | = (fail_stack_elt_t *) xrealloc (fail_stack.stack, | ||
| 3219 | (fail_stack.size | ||
| 3220 | * sizeof (fail_stack_elt_t))); | ||
| 3221 | #else /* not emacs */ | ||
| 3222 | if (! fail_stack.stack) | 3100 | if (! fail_stack.stack) |
| 3223 | fail_stack.stack | 3101 | fail_stack.stack |
| 3224 | = (fail_stack_elt_t *) malloc (fail_stack.size | 3102 | = (fail_stack_elt_t *) malloc (fail_stack.size |
| @@ -3228,7 +3106,6 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 3228 | = (fail_stack_elt_t *) realloc (fail_stack.stack, | 3106 | = (fail_stack_elt_t *) realloc (fail_stack.stack, |
| 3229 | (fail_stack.size | 3107 | (fail_stack.size |
| 3230 | * sizeof (fail_stack_elt_t))); | 3108 | * sizeof (fail_stack_elt_t))); |
| 3231 | #endif /* not emacs */ | ||
| 3232 | } | 3109 | } |
| 3233 | 3110 | ||
| 3234 | regex_grow_registers (num_regs); | 3111 | regex_grow_registers (num_regs); |
| @@ -3388,15 +3265,13 @@ int | |||
| 3388 | re_compile_fastmap (bufp) | 3265 | re_compile_fastmap (bufp) |
| 3389 | struct re_pattern_buffer *bufp; | 3266 | struct re_pattern_buffer *bufp; |
| 3390 | { | 3267 | { |
| 3391 | int i, j, k; | 3268 | int j, k; |
| 3392 | #ifdef MATCH_MAY_ALLOCATE | 3269 | #ifdef MATCH_MAY_ALLOCATE |
| 3393 | fail_stack_type fail_stack; | 3270 | fail_stack_type fail_stack; |
| 3394 | #endif | 3271 | #endif |
| 3395 | #ifndef REGEX_MALLOC | 3272 | #ifndef REGEX_MALLOC |
| 3396 | char *destination; | 3273 | char *destination; |
| 3397 | #endif | 3274 | #endif |
| 3398 | /* We don't push any register information onto the failure stack. */ | ||
| 3399 | unsigned num_regs = 0; | ||
| 3400 | 3275 | ||
| 3401 | register char *fastmap = bufp->fastmap; | 3276 | register char *fastmap = bufp->fastmap; |
| 3402 | unsigned char *pattern = bufp->buffer; | 3277 | unsigned char *pattern = bufp->buffer; |
| @@ -3431,19 +3306,45 @@ re_compile_fastmap (bufp) | |||
| 3431 | bufp->fastmap_accurate = 1; /* It will be when we're done. */ | 3306 | bufp->fastmap_accurate = 1; /* It will be when we're done. */ |
| 3432 | bufp->can_be_null = 0; | 3307 | bufp->can_be_null = 0; |
| 3433 | 3308 | ||
| 3434 | while (1) | 3309 | /* The loop below works as follows: |
| 3310 | - It has a working-list kept in the PATTERN_STACK and which basically | ||
| 3311 | starts by only containing a pointer to the first operation. | ||
| 3312 | - If the opcode we're looking at is a match against some set of | ||
| 3313 | chars, then we add those chars to the fastmap and go on to the | ||
| 3314 | next work element from the worklist (done via `break'). | ||
| 3315 | - If the opcode is a control operator on the other hand, we either | ||
| 3316 | ignore it (if it's meaningless at this point, such as `start_memory') | ||
| 3317 | or execute it (if it's a jump). If the jump has several destinations | ||
| 3318 | (i.e. `on_failure_jump'), then we push the other destination onto the | ||
| 3319 | worklist. | ||
| 3320 | We guarantee termination by ignoring backward jumps (more or less), | ||
| 3321 | so that `p' is monotonically increasing. More to the point, we | ||
| 3322 | never set `p' (or push) anything `<= p1'. */ | ||
| 3323 | |||
| 3324 | /* If can_be_null is set, then the fastmap will not be used anyway. */ | ||
| 3325 | while (!bufp->can_be_null) | ||
| 3435 | { | 3326 | { |
| 3327 | /* `p1' is used as a marker of how far back a `on_failure_jump' | ||
| 3328 | can go without being ignored. It is normally equal to `p' | ||
| 3329 | (which prevents any backward `on_failure_jump') except right | ||
| 3330 | after a plain `jump', to allow patterns such as: | ||
| 3331 | 0: jump 10 | ||
| 3332 | 3..9: <body> | ||
| 3333 | 10: on_failure_jump 3 | ||
| 3334 | as used for the *? operator. */ | ||
| 3335 | unsigned char *p1 = p; | ||
| 3336 | |||
| 3436 | if (p == pend || *p == succeed) | 3337 | if (p == pend || *p == succeed) |
| 3437 | { | 3338 | { |
| 3438 | /* We have reached the (effective) end of pattern. */ | 3339 | /* We have reached the (effective) end of pattern. */ |
| 3439 | if (!FAIL_STACK_EMPTY ()) | 3340 | if (!PATTERN_STACK_EMPTY ()) |
| 3440 | { | 3341 | { |
| 3441 | bufp->can_be_null |= path_can_be_null; | 3342 | bufp->can_be_null |= path_can_be_null; |
| 3442 | 3343 | ||
| 3443 | /* Reset for next path. */ | 3344 | /* Reset for next path. */ |
| 3444 | path_can_be_null = true; | 3345 | path_can_be_null = true; |
| 3445 | 3346 | ||
| 3446 | p = fail_stack.stack[--fail_stack.avail].pointer; | 3347 | p = POP_PATTERN_OP (); |
| 3447 | 3348 | ||
| 3448 | continue; | 3349 | continue; |
| 3449 | } | 3350 | } |
| @@ -3457,14 +3358,13 @@ re_compile_fastmap (bufp) | |||
| 3457 | switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++)) | 3358 | switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++)) |
| 3458 | { | 3359 | { |
| 3459 | 3360 | ||
| 3460 | /* I guess the idea here is to simply not bother with a fastmap | ||
| 3461 | if a backreference is used, since it's too hard to figure out | ||
| 3462 | the fastmap for the corresponding group. Setting | ||
| 3463 | `can_be_null' stops `re_search_2' from using the fastmap, so | ||
| 3464 | that is all we do. */ | ||
| 3465 | case duplicate: | 3361 | case duplicate: |
| 3466 | bufp->can_be_null = 1; | 3362 | /* If the first character has to match a backreference, that means |
| 3467 | goto done; | 3363 | that the group was empty (since it already matched). Since this |
| 3364 | is the only case that interests us here, we can assume that the | ||
| 3365 | backreference must match the empty string. */ | ||
| 3366 | p++; | ||
| 3367 | continue; | ||
| 3468 | 3368 | ||
| 3469 | 3369 | ||
| 3470 | /* Following are the cases which match a character. These end | 3370 | /* Following are the cases which match a character. These end |
| @@ -3534,9 +3434,8 @@ re_compile_fastmap (bufp) | |||
| 3534 | multibyte character in the range table. */ | 3434 | multibyte character in the range table. */ |
| 3535 | int c, count; | 3435 | int c, count; |
| 3536 | 3436 | ||
| 3537 | /* Make P points the range table. `+ 2' is to skip flag | 3437 | /* Make P points the range table. */ |
| 3538 | bits for a character class. */ | 3438 | p += CHARSET_BITMAP_SIZE (&p[-2]); |
| 3539 | p += CHARSET_BITMAP_SIZE (&p[-2]) + 2; | ||
| 3540 | 3439 | ||
| 3541 | /* Extract the number of ranges in range table into COUNT. */ | 3440 | /* Extract the number of ranges in range table into COUNT. */ |
| 3542 | EXTRACT_NUMBER_AND_INCR (count, p); | 3441 | EXTRACT_NUMBER_AND_INCR (count, p); |
| @@ -3630,11 +3529,6 @@ re_compile_fastmap (bufp) | |||
| 3630 | if (!(bufp->syntax & RE_DOT_NEWLINE)) | 3529 | if (!(bufp->syntax & RE_DOT_NEWLINE)) |
| 3631 | fastmap['\n'] = fastmap_newline; | 3530 | fastmap['\n'] = fastmap_newline; |
| 3632 | 3531 | ||
| 3633 | /* Return if we have already set `can_be_null'; if we have, | ||
| 3634 | then the fastmap is irrelevant. Something's wrong here. */ | ||
| 3635 | else if (bufp->can_be_null) | ||
| 3636 | goto done; | ||
| 3637 | |||
| 3638 | /* Otherwise, have to check alternative paths. */ | 3532 | /* Otherwise, have to check alternative paths. */ |
| 3639 | break; | 3533 | break; |
| 3640 | } | 3534 | } |
| @@ -3649,7 +3543,7 @@ re_compile_fastmap (bufp) | |||
| 3649 | /* This match depends on text properties. These end with | 3543 | /* This match depends on text properties. These end with |
| 3650 | aborting optimizations. */ | 3544 | aborting optimizations. */ |
| 3651 | bufp->can_be_null = 1; | 3545 | bufp->can_be_null = 1; |
| 3652 | goto done; | 3546 | continue; |
| 3653 | #if 0 | 3547 | #if 0 |
| 3654 | k = *p++; | 3548 | k = *p++; |
| 3655 | simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH); | 3549 | simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH); |
| @@ -3727,44 +3621,38 @@ re_compile_fastmap (bufp) | |||
| 3727 | case wordbeg: | 3621 | case wordbeg: |
| 3728 | case wordend: | 3622 | case wordend: |
| 3729 | #endif | 3623 | #endif |
| 3730 | case push_dummy_failure: | ||
| 3731 | continue; | 3624 | continue; |
| 3732 | 3625 | ||
| 3733 | 3626 | ||
| 3734 | case jump_n: | 3627 | case jump_n: |
| 3735 | case pop_failure_jump: | ||
| 3736 | case maybe_pop_jump: | ||
| 3737 | case jump: | 3628 | case jump: |
| 3738 | case jump_past_alt: | ||
| 3739 | case dummy_failure_jump: | ||
| 3740 | EXTRACT_NUMBER_AND_INCR (j, p); | ||
| 3741 | p += j; | ||
| 3742 | if (j > 0) | ||
| 3743 | continue; | ||
| 3744 | |||
| 3745 | /* Jump backward implies we just went through the body of a | ||
| 3746 | loop and matched nothing. Opcode jumped to should be | ||
| 3747 | `on_failure_jump' or `succeed_n'. Just treat it like an | ||
| 3748 | ordinary jump. For a * loop, it has pushed its failure | ||
| 3749 | point already; if so, discard that as redundant. */ | ||
| 3750 | if ((re_opcode_t) *p != on_failure_jump | ||
| 3751 | && (re_opcode_t) *p != succeed_n) | ||
| 3752 | continue; | ||
| 3753 | |||
| 3754 | p++; | ||
| 3755 | EXTRACT_NUMBER_AND_INCR (j, p); | 3629 | EXTRACT_NUMBER_AND_INCR (j, p); |
| 3630 | if (j < 0) | ||
| 3631 | /* Backward jumps can only go back to code that we've already | ||
| 3632 | visited. `re_compile' should make sure this is true. */ | ||
| 3633 | break; | ||
| 3756 | p += j; | 3634 | p += j; |
| 3757 | 3635 | switch (SWITCH_ENUM_CAST ((re_opcode_t) *p)) | |
| 3758 | /* If what's on the stack is where we are now, pop it. */ | 3636 | { |
| 3759 | if (!FAIL_STACK_EMPTY () | 3637 | case on_failure_jump: |
| 3760 | && fail_stack.stack[fail_stack.avail - 1].pointer == p) | 3638 | case on_failure_keep_string_jump: |
| 3761 | fail_stack.avail--; | 3639 | case on_failure_jump_exclusive: |
| 3762 | 3640 | case on_failure_jump_loop: | |
| 3763 | continue; | 3641 | case on_failure_jump_smart: |
| 3764 | 3642 | p++; | |
| 3643 | break; | ||
| 3644 | default: | ||
| 3645 | continue; | ||
| 3646 | }; | ||
| 3647 | /* Keep `p1' to allow the `on_failure_jump' we are jumping to | ||
| 3648 | to jump back to "just after here". */ | ||
| 3649 | /* Fallthrough */ | ||
| 3765 | 3650 | ||
| 3766 | case on_failure_jump: | 3651 | case on_failure_jump: |
| 3767 | case on_failure_keep_string_jump: | 3652 | case on_failure_keep_string_jump: |
| 3653 | case on_failure_jump_exclusive: | ||
| 3654 | case on_failure_jump_loop: | ||
| 3655 | case on_failure_jump_smart: | ||
| 3768 | handle_on_failure_jump: | 3656 | handle_on_failure_jump: |
| 3769 | EXTRACT_NUMBER_AND_INCR (j, p); | 3657 | EXTRACT_NUMBER_AND_INCR (j, p); |
| 3770 | 3658 | ||
| @@ -3775,7 +3663,10 @@ re_compile_fastmap (bufp) | |||
| 3775 | to push such a point since we obviously won't find any more | 3663 | to push such a point since we obviously won't find any more |
| 3776 | fastmap entries beyond `pend'. Such a pattern can match | 3664 | fastmap entries beyond `pend'. Such a pattern can match |
| 3777 | the null string, though. */ | 3665 | the null string, though. */ |
| 3778 | if (p + j < pend) | 3666 | if (p + j <= p1) |
| 3667 | /* Backward jump to be ignored. */ | ||
| 3668 | ; | ||
| 3669 | else if (p + j < pend) | ||
| 3779 | { | 3670 | { |
| 3780 | if (!PUSH_PATTERN_OP (p + j, fail_stack)) | 3671 | if (!PUSH_PATTERN_OP (p + j, fail_stack)) |
| 3781 | { | 3672 | { |
| @@ -3817,7 +3708,7 @@ re_compile_fastmap (bufp) | |||
| 3817 | 3708 | ||
| 3818 | case start_memory: | 3709 | case start_memory: |
| 3819 | case stop_memory: | 3710 | case stop_memory: |
| 3820 | p += 2; | 3711 | p += 1; |
| 3821 | continue; | 3712 | continue; |
| 3822 | 3713 | ||
| 3823 | 3714 | ||
| @@ -3838,8 +3729,6 @@ re_compile_fastmap (bufp) | |||
| 3838 | /* Set `can_be_null' for the last path (also the first path, if the | 3729 | /* Set `can_be_null' for the last path (also the first path, if the |
| 3839 | pattern is empty). */ | 3730 | pattern is empty). */ |
| 3840 | bufp->can_be_null |= path_can_be_null; | 3731 | bufp->can_be_null |= path_can_be_null; |
| 3841 | |||
| 3842 | done: | ||
| 3843 | RESET_FAIL_STACK (); | 3732 | RESET_FAIL_STACK (); |
| 3844 | return 0; | 3733 | return 0; |
| 3845 | } /* re_compile_fastmap */ | 3734 | } /* re_compile_fastmap */ |
| @@ -4167,9 +4056,6 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) | |||
| 4167 | /* Declarations and macros for re_match_2. */ | 4056 | /* Declarations and macros for re_match_2. */ |
| 4168 | 4057 | ||
| 4169 | static int bcmp_translate (); | 4058 | static int bcmp_translate (); |
| 4170 | static boolean alt_match_null_string_p (), | ||
| 4171 | common_op_match_null_string_p (), | ||
| 4172 | group_match_null_string_p (); | ||
| 4173 | 4059 | ||
| 4174 | /* This converts PTR, a pointer into one of the search strings `string1' | 4060 | /* This converts PTR, a pointer into one of the search strings `string1' |
| 4175 | and `string2' into an offset from the beginning of that string. */ | 4061 | and `string2' into an offset from the beginning of that string. */ |
| @@ -4237,27 +4123,208 @@ static boolean alt_match_null_string_p (), | |||
| 4237 | REGEX_FREE_STACK (fail_stack.stack); \ | 4123 | REGEX_FREE_STACK (fail_stack.stack); \ |
| 4238 | FREE_VAR (regstart); \ | 4124 | FREE_VAR (regstart); \ |
| 4239 | FREE_VAR (regend); \ | 4125 | FREE_VAR (regend); \ |
| 4240 | FREE_VAR (old_regstart); \ | ||
| 4241 | FREE_VAR (old_regend); \ | ||
| 4242 | FREE_VAR (best_regstart); \ | 4126 | FREE_VAR (best_regstart); \ |
| 4243 | FREE_VAR (best_regend); \ | 4127 | FREE_VAR (best_regend); \ |
| 4244 | FREE_VAR (reg_info); \ | ||
| 4245 | FREE_VAR (reg_dummy); \ | ||
| 4246 | FREE_VAR (reg_info_dummy); \ | ||
| 4247 | } while (0) | 4128 | } while (0) |
| 4248 | #else | 4129 | #else |
| 4249 | #define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */ | 4130 | #define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */ |
| 4250 | #endif /* not MATCH_MAY_ALLOCATE */ | 4131 | #endif /* not MATCH_MAY_ALLOCATE */ |
| 4251 | 4132 | ||
| 4252 | /* These values must meet several constraints. They must not be valid | 4133 | |
| 4253 | register values; since we have a limit of 255 registers (because | 4134 | /* Optimization routines. */ |
| 4254 | we use only one byte in the pattern for the register number), we can | 4135 | |
| 4255 | use numbers larger than 255. They must differ by 1, because of | 4136 | /* Jump over non-matching operations. */ |
| 4256 | NUM_FAILURE_ITEMS above. And the value for the lowest register must | 4137 | static unsigned char * |
| 4257 | be larger than the value for the highest register, so we do not try | 4138 | skip_noops (p, pend, memory) |
| 4258 | to actually save any registers when none are active. */ | 4139 | unsigned char *p, *pend; |
| 4259 | #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH) | 4140 | int memory; |
| 4260 | #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1) | 4141 | { |
| 4142 | int mcnt; | ||
| 4143 | while (p < pend) | ||
| 4144 | { | ||
| 4145 | switch (SWITCH_ENUM_CAST ((re_opcode_t) *p)) | ||
| 4146 | { | ||
| 4147 | case start_memory: | ||
| 4148 | if (!memory) | ||
| 4149 | return p; | ||
| 4150 | case stop_memory: | ||
| 4151 | p += 2; break; | ||
| 4152 | case no_op: | ||
| 4153 | p += 1; break; | ||
| 4154 | case jump: | ||
| 4155 | p += 1; | ||
| 4156 | EXTRACT_NUMBER_AND_INCR (mcnt, p); | ||
| 4157 | p += mcnt; | ||
| 4158 | break; | ||
| 4159 | default: | ||
| 4160 | return p; | ||
| 4161 | } | ||
| 4162 | } | ||
| 4163 | assert (p == pend); | ||
| 4164 | return p; | ||
| 4165 | } | ||
| 4166 | |||
| 4167 | /* Non-zero if "p1 matches something" implies "p2 fails". */ | ||
| 4168 | static int | ||
| 4169 | mutually_exclusive_p (bufp, p1, p2) | ||
| 4170 | struct re_pattern_buffer *bufp; | ||
| 4171 | unsigned char *p1, *p2; | ||
| 4172 | { | ||
| 4173 | int multibyte = bufp->multibyte; | ||
| 4174 | unsigned char *pend = bufp->buffer + bufp->used; | ||
| 4175 | |||
| 4176 | assert (p1 >= bufp->buffer && p1 <= pend | ||
| 4177 | && p2 >= bufp->buffer && p2 <= pend); | ||
| 4178 | |||
| 4179 | /* Skip over open/close-group commands. | ||
| 4180 | If what follows this loop is a ...+ construct, | ||
| 4181 | look at what begins its body, since we will have to | ||
| 4182 | match at least one of that. */ | ||
| 4183 | p2 = skip_noops (p2, pend, 1); | ||
| 4184 | /* The same skip can be done for p1, except that skipping over | ||
| 4185 | start_memory is not a good idea (if there's a group inside | ||
| 4186 | the loop delimited by on_failure_jump_exclusive, then it | ||
| 4187 | can't optimize the push away (it still works properly, but | ||
| 4188 | slightly slower rather than faster)). */ | ||
| 4189 | p1 = skip_noops (p1, pend, 0); | ||
| 4190 | |||
| 4191 | /* If we're at the end of the pattern, we can change. */ | ||
| 4192 | if (p2 == pend) | ||
| 4193 | { | ||
| 4194 | switch (SWITCH_ENUM_CAST ((re_opcode_t) *p1)) | ||
| 4195 | { | ||
| 4196 | case anychar: | ||
| 4197 | case charset_not: | ||
| 4198 | case charset: | ||
| 4199 | case exactn: | ||
| 4200 | DEBUG_PRINT1 (" End of pattern: fast loop.\n"); | ||
| 4201 | return 1; | ||
| 4202 | default: | ||
| 4203 | return 0; | ||
| 4204 | } | ||
| 4205 | } | ||
| 4206 | |||
| 4207 | else if ((re_opcode_t) *p2 == exactn | ||
| 4208 | || (bufp->newline_anchor && (re_opcode_t) *p2 == endline)) | ||
| 4209 | { | ||
| 4210 | register unsigned int c | ||
| 4211 | = *p2 == (unsigned char) endline ? '\n' : p2[2]; | ||
| 4212 | |||
| 4213 | if ((re_opcode_t) *p1 == exactn) | ||
| 4214 | { | ||
| 4215 | if (!(multibyte /* && (c != '\n') */ | ||
| 4216 | && BASE_LEADING_CODE_P (c)) | ||
| 4217 | ? c != p1[2] | ||
| 4218 | : (STRING_CHAR (&p2[2], pend - &p2[2]) | ||
| 4219 | != STRING_CHAR (&p1[2], pend - &p1[2]))) | ||
| 4220 | { | ||
| 4221 | DEBUG_PRINT3 (" '%c' != '%c' => fast loop.\n", c, p1[2]); | ||
| 4222 | return 1; | ||
| 4223 | } | ||
| 4224 | } | ||
| 4225 | |||
| 4226 | else if ((re_opcode_t) *p1 == charset | ||
| 4227 | || (re_opcode_t) *p1 == charset_not) | ||
| 4228 | { | ||
| 4229 | int not = (re_opcode_t) *p1 == charset_not; | ||
| 4230 | |||
| 4231 | if (multibyte /* && (c != '\n') */ | ||
| 4232 | && BASE_LEADING_CODE_P (c)) | ||
| 4233 | c = STRING_CHAR (&p2[2], pend - &p2[2]); | ||
| 4234 | |||
| 4235 | /* Test if C is listed in charset (or charset_not) | ||
| 4236 | at `p1'. */ | ||
| 4237 | if (SINGLE_BYTE_CHAR_P (c)) | ||
| 4238 | { | ||
| 4239 | if (c < CHARSET_BITMAP_SIZE (p1) * BYTEWIDTH | ||
| 4240 | && p1[2 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) | ||
| 4241 | not = !not; | ||
| 4242 | } | ||
| 4243 | else if (CHARSET_RANGE_TABLE_EXISTS_P (p1)) | ||
| 4244 | CHARSET_LOOKUP_RANGE_TABLE (not, c, p1); | ||
| 4245 | |||
| 4246 | /* `not' is equal to 1 if c would match, which means | ||
| 4247 | that we can't change to pop_failure_jump. */ | ||
| 4248 | if (!not) | ||
| 4249 | { | ||
| 4250 | DEBUG_PRINT1 (" No match => fast loop.\n"); | ||
| 4251 | return 1; | ||
| 4252 | } | ||
| 4253 | } | ||
| 4254 | else if ((re_opcode_t) *p1 == anychar | ||
| 4255 | && c == '\n') | ||
| 4256 | { | ||
| 4257 | DEBUG_PRINT1 (" . != \\n => fast loop.\n"); | ||
| 4258 | return 1; | ||
| 4259 | } | ||
| 4260 | } | ||
| 4261 | else if ((re_opcode_t) *p2 == charset | ||
| 4262 | || (re_opcode_t) *p2 == charset_not) | ||
| 4263 | { | ||
| 4264 | if ((re_opcode_t) *p1 == exactn) | ||
| 4265 | /* Reuse the code above. */ | ||
| 4266 | return mutually_exclusive_p (bufp, p2, p1); | ||
| 4267 | |||
| 4268 | |||
| 4269 | /* It is hard to list up all the character in charset | ||
| 4270 | P2 if it includes multibyte character. Give up in | ||
| 4271 | such case. */ | ||
| 4272 | else if (!multibyte || !CHARSET_RANGE_TABLE_EXISTS_P (p2)) | ||
| 4273 | { | ||
| 4274 | /* Now, we are sure that P2 has no range table. | ||
| 4275 | So, for the size of bitmap in P2, `p2[1]' is | ||
| 4276 | enough. But P1 may have range table, so the | ||
| 4277 | size of bitmap table of P1 is extracted by | ||
| 4278 | using macro `CHARSET_BITMAP_SIZE'. | ||
| 4279 | |||
| 4280 | Since we know that all the character listed in | ||
| 4281 | P2 is ASCII, it is enough to test only bitmap | ||
| 4282 | table of P1. */ | ||
| 4283 | |||
| 4284 | if (*p1 == *p2) | ||
| 4285 | { | ||
| 4286 | int idx; | ||
| 4287 | /* We win if the charset inside the loop | ||
| 4288 | has no overlap with the one after the loop. */ | ||
| 4289 | for (idx = 0; | ||
| 4290 | (idx < (int) p2[1] | ||
| 4291 | && idx < CHARSET_BITMAP_SIZE (p1)); | ||
| 4292 | idx++) | ||
| 4293 | if ((p2[2 + idx] & p1[2 + idx]) != 0) | ||
| 4294 | break; | ||
| 4295 | |||
| 4296 | if (idx == p2[1] | ||
| 4297 | || idx == CHARSET_BITMAP_SIZE (p1)) | ||
| 4298 | { | ||
| 4299 | DEBUG_PRINT1 (" No match => fast loop.\n"); | ||
| 4300 | return 1; | ||
| 4301 | } | ||
| 4302 | } | ||
| 4303 | else if ((re_opcode_t) *p1 == charset | ||
| 4304 | || (re_opcode_t) *p1 == charset_not) | ||
| 4305 | { | ||
| 4306 | int idx; | ||
| 4307 | /* We win if the charset_not inside the loop lists | ||
| 4308 | every character listed in the charset after. */ | ||
| 4309 | for (idx = 0; idx < (int) p2[1]; idx++) | ||
| 4310 | if (! (p2[2 + idx] == 0 | ||
| 4311 | || (idx < CHARSET_BITMAP_SIZE (p1) | ||
| 4312 | && ((p2[2 + idx] & ~ p1[2 + idx]) == 0)))) | ||
| 4313 | break; | ||
| 4314 | |||
| 4315 | if (idx == p2[1]) | ||
| 4316 | { | ||
| 4317 | DEBUG_PRINT1 (" No match => fast loop.\n"); | ||
| 4318 | return 1; | ||
| 4319 | } | ||
| 4320 | } | ||
| 4321 | } | ||
| 4322 | } | ||
| 4323 | |||
| 4324 | /* Safe default. */ | ||
| 4325 | return 0; | ||
| 4326 | } | ||
| 4327 | |||
| 4261 | 4328 | ||
| 4262 | /* Matching routines. */ | 4329 | /* Matching routines. */ |
| 4263 | 4330 | ||
| @@ -4351,10 +4418,6 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4351 | unsigned char *p = bufp->buffer; | 4418 | unsigned char *p = bufp->buffer; |
| 4352 | register unsigned char *pend = p + bufp->used; | 4419 | register unsigned char *pend = p + bufp->used; |
| 4353 | 4420 | ||
| 4354 | /* Mark the opcode just after a start_memory, so we can test for an | ||
| 4355 | empty subpattern when we get to the stop_memory. */ | ||
| 4356 | unsigned char *just_past_start_mem = 0; | ||
| 4357 | |||
| 4358 | /* We use this to map every character in the string. */ | 4421 | /* We use this to map every character in the string. */ |
| 4359 | RE_TRANSLATE_TYPE translate = bufp->translate; | 4422 | RE_TRANSLATE_TYPE translate = bufp->translate; |
| 4360 | 4423 | ||
| @@ -4363,13 +4426,11 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4363 | 4426 | ||
| 4364 | /* Failure point stack. Each place that can handle a failure further | 4427 | /* Failure point stack. Each place that can handle a failure further |
| 4365 | down the line pushes a failure point on this stack. It consists of | 4428 | down the line pushes a failure point on this stack. It consists of |
| 4366 | restart, regend, and reg_info for all registers corresponding to | 4429 | regstart, and regend for all registers corresponding to |
| 4367 | the subexpressions we're currently inside, plus the number of such | 4430 | the subexpressions we're currently inside, plus the number of such |
| 4368 | registers, and, finally, two char *'s. The first char * is where | 4431 | registers, and, finally, two char *'s. The first char * is where |
| 4369 | to resume scanning the pattern; the second one is where to resume | 4432 | to resume scanning the pattern; the second one is where to resume |
| 4370 | scanning the strings. If the latter is zero, the failure point is | 4433 | scanning the strings. */ |
| 4371 | a ``dummy''; if a failure happens and the failure point is a dummy, | ||
| 4372 | it gets discarded and the next next one is tried. */ | ||
| 4373 | #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ | 4434 | #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ |
| 4374 | fail_stack_type fail_stack; | 4435 | fail_stack_type fail_stack; |
| 4375 | #endif | 4436 | #endif |
| @@ -4387,10 +4448,6 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4387 | an element for register zero. */ | 4448 | an element for register zero. */ |
| 4388 | unsigned num_regs = bufp->re_nsub + 1; | 4449 | unsigned num_regs = bufp->re_nsub + 1; |
| 4389 | 4450 | ||
| 4390 | /* The currently active registers. */ | ||
| 4391 | unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG; | ||
| 4392 | unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG; | ||
| 4393 | |||
| 4394 | /* Information on the contents of registers. These are pointers into | 4451 | /* Information on the contents of registers. These are pointers into |
| 4395 | the input strings; they record just what was matched (on this | 4452 | the input strings; they record just what was matched (on this |
| 4396 | attempt) by a subexpression part of the pattern, that is, the | 4453 | attempt) by a subexpression part of the pattern, that is, the |
| @@ -4402,25 +4459,6 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4402 | const char **regstart, **regend; | 4459 | const char **regstart, **regend; |
| 4403 | #endif | 4460 | #endif |
| 4404 | 4461 | ||
| 4405 | /* If a group that's operated upon by a repetition operator fails to | ||
| 4406 | match anything, then the register for its start will need to be | ||
| 4407 | restored because it will have been set to wherever in the string we | ||
| 4408 | are when we last see its open-group operator. Similarly for a | ||
| 4409 | register's end. */ | ||
| 4410 | #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ | ||
| 4411 | const char **old_regstart, **old_regend; | ||
| 4412 | #endif | ||
| 4413 | |||
| 4414 | /* The is_active field of reg_info helps us keep track of which (possibly | ||
| 4415 | nested) subexpressions we are currently in. The matched_something | ||
| 4416 | field of reg_info[reg_num] helps us tell whether or not we have | ||
| 4417 | matched any of the pattern so far this time through the reg_num-th | ||
| 4418 | subexpression. These two fields get reset each time through any | ||
| 4419 | loop their register is in. */ | ||
| 4420 | #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ | ||
| 4421 | register_info_type *reg_info; | ||
| 4422 | #endif | ||
| 4423 | |||
| 4424 | /* The following record the register info as found in the above | 4462 | /* The following record the register info as found in the above |
| 4425 | variables when we find a match better than any we've seen before. | 4463 | variables when we find a match better than any we've seen before. |
| 4426 | This happens as we backtrack through the failure points, which in | 4464 | This happens as we backtrack through the failure points, which in |
| @@ -4440,15 +4478,6 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4440 | and need to test it, it's not garbage. */ | 4478 | and need to test it, it's not garbage. */ |
| 4441 | const char *match_end = NULL; | 4479 | const char *match_end = NULL; |
| 4442 | 4480 | ||
| 4443 | /* This helps SET_REGS_MATCHED avoid doing redundant work. */ | ||
| 4444 | int set_regs_matched_done = 0; | ||
| 4445 | |||
| 4446 | /* Used when we pop values we don't care about. */ | ||
| 4447 | #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ | ||
| 4448 | const char **reg_dummy; | ||
| 4449 | register_info_type *reg_info_dummy; | ||
| 4450 | #endif | ||
| 4451 | |||
| 4452 | #ifdef DEBUG | 4481 | #ifdef DEBUG |
| 4453 | /* Counts the total number of registers pushed. */ | 4482 | /* Counts the total number of registers pushed. */ |
| 4454 | unsigned num_regs_pushed = 0; | 4483 | unsigned num_regs_pushed = 0; |
| @@ -4468,16 +4497,10 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4468 | { | 4497 | { |
| 4469 | regstart = REGEX_TALLOC (num_regs, const char *); | 4498 | regstart = REGEX_TALLOC (num_regs, const char *); |
| 4470 | regend = REGEX_TALLOC (num_regs, const char *); | 4499 | regend = REGEX_TALLOC (num_regs, const char *); |
| 4471 | old_regstart = REGEX_TALLOC (num_regs, const char *); | ||
| 4472 | old_regend = REGEX_TALLOC (num_regs, const char *); | ||
| 4473 | best_regstart = REGEX_TALLOC (num_regs, const char *); | 4500 | best_regstart = REGEX_TALLOC (num_regs, const char *); |
| 4474 | best_regend = REGEX_TALLOC (num_regs, const char *); | 4501 | best_regend = REGEX_TALLOC (num_regs, const char *); |
| 4475 | reg_info = REGEX_TALLOC (num_regs, register_info_type); | ||
| 4476 | reg_dummy = REGEX_TALLOC (num_regs, const char *); | ||
| 4477 | reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type); | ||
| 4478 | 4502 | ||
| 4479 | if (!(regstart && regend && old_regstart && old_regend && reg_info | 4503 | if (!(regstart && regend && best_regstart && best_regend)) |
| 4480 | && best_regstart && best_regend && reg_dummy && reg_info_dummy)) | ||
| 4481 | { | 4504 | { |
| 4482 | FREE_VARIABLES (); | 4505 | FREE_VARIABLES (); |
| 4483 | return -2; | 4506 | return -2; |
| @@ -4487,9 +4510,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4487 | { | 4510 | { |
| 4488 | /* We must initialize all our variables to NULL, so that | 4511 | /* We must initialize all our variables to NULL, so that |
| 4489 | `FREE_VARIABLES' doesn't try to free them. */ | 4512 | `FREE_VARIABLES' doesn't try to free them. */ |
| 4490 | regstart = regend = old_regstart = old_regend = best_regstart | 4513 | regstart = regend = best_regstart = best_regend = NULL; |
| 4491 | = best_regend = reg_dummy = NULL; | ||
| 4492 | reg_info = reg_info_dummy = (register_info_type *) NULL; | ||
| 4493 | } | 4514 | } |
| 4494 | #endif /* MATCH_MAY_ALLOCATE */ | 4515 | #endif /* MATCH_MAY_ALLOCATE */ |
| 4495 | 4516 | ||
| @@ -4505,13 +4526,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4505 | register information struct. */ | 4526 | register information struct. */ |
| 4506 | for (mcnt = 1; mcnt < num_regs; mcnt++) | 4527 | for (mcnt = 1; mcnt < num_regs; mcnt++) |
| 4507 | { | 4528 | { |
| 4508 | regstart[mcnt] = regend[mcnt] | 4529 | regstart[mcnt] = regend[mcnt] = REG_UNSET_VALUE; |
| 4509 | = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE; | ||
| 4510 | |||
| 4511 | REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; | ||
| 4512 | IS_ACTIVE (reg_info[mcnt]) = 0; | ||
| 4513 | MATCHED_SOMETHING (reg_info[mcnt]) = 0; | ||
| 4514 | EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; | ||
| 4515 | } | 4530 | } |
| 4516 | 4531 | ||
| 4517 | /* We move `string1' into `string2' if the latter's empty -- but not if | 4532 | /* We move `string1' into `string2' if the latter's empty -- but not if |
| @@ -4566,7 +4581,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4566 | fails at this starting point in the input data. */ | 4581 | fails at this starting point in the input data. */ |
| 4567 | for (;;) | 4582 | for (;;) |
| 4568 | { | 4583 | { |
| 4569 | DEBUG_PRINT2 ("\n0x%x: ", p); | 4584 | DEBUG_PRINT2 ("\n%p: ", p); |
| 4570 | 4585 | ||
| 4571 | if (p == pend) | 4586 | if (p == pend) |
| 4572 | { /* End of pattern means we might have succeeded. */ | 4587 | { /* End of pattern means we might have succeeded. */ |
| @@ -4796,7 +4811,6 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4796 | } | 4811 | } |
| 4797 | while (--mcnt); | 4812 | while (--mcnt); |
| 4798 | } | 4813 | } |
| 4799 | SET_REGS_MATCHED (); | ||
| 4800 | break; | 4814 | break; |
| 4801 | 4815 | ||
| 4802 | 4816 | ||
| @@ -4828,7 +4842,6 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4828 | && buf_ch == '\000')) | 4842 | && buf_ch == '\000')) |
| 4829 | goto fail; | 4843 | goto fail; |
| 4830 | 4844 | ||
| 4831 | SET_REGS_MATCHED (); | ||
| 4832 | DEBUG_PRINT2 (" Matched `%d'.\n", *d); | 4845 | DEBUG_PRINT2 (" Matched `%d'.\n", *d); |
| 4833 | d += buf_charlen; | 4846 | d += buf_charlen; |
| 4834 | } | 4847 | } |
| @@ -4913,195 +4926,55 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4913 | 4926 | ||
| 4914 | if (!not) goto fail; | 4927 | if (!not) goto fail; |
| 4915 | 4928 | ||
| 4916 | SET_REGS_MATCHED (); | ||
| 4917 | d += len; | 4929 | d += len; |
| 4918 | break; | 4930 | break; |
| 4919 | } | 4931 | } |
| 4920 | 4932 | ||
| 4921 | 4933 | ||
| 4922 | /* The beginning of a group is represented by start_memory. | 4934 | /* The beginning of a group is represented by start_memory. |
| 4923 | The arguments are the register number in the next byte, and the | 4935 | The argument is the register number. The text |
| 4924 | number of groups inner to this one in the next. The text | ||
| 4925 | matched within the group is recorded (in the internal | 4936 | matched within the group is recorded (in the internal |
| 4926 | registers data structure) under the register number. */ | 4937 | registers data structure) under the register number. */ |
| 4927 | case start_memory: | 4938 | case start_memory: |
| 4928 | DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]); | 4939 | DEBUG_PRINT2 ("EXECUTING start_memory %d:\n", *p); |
| 4929 | 4940 | ||
| 4930 | /* Find out if this group can match the empty string. */ | 4941 | /* In case we need to undo this operation (via backtracking). */ |
| 4931 | p1 = p; /* To send to group_match_null_string_p. */ | 4942 | PUSH_FAILURE_REG ((unsigned int)*p); |
| 4932 | |||
| 4933 | if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE) | ||
| 4934 | REG_MATCH_NULL_STRING_P (reg_info[*p]) | ||
| 4935 | = group_match_null_string_p (&p1, pend, reg_info); | ||
| 4936 | |||
| 4937 | /* Save the position in the string where we were the last time | ||
| 4938 | we were at this open-group operator in case the group is | ||
| 4939 | operated upon by a repetition operator, e.g., with `(a*)*b' | ||
| 4940 | against `ab'; then we want to ignore where we are now in | ||
| 4941 | the string in case this attempt to match fails. */ | ||
| 4942 | old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) | ||
| 4943 | ? REG_UNSET (regstart[*p]) ? d : regstart[*p] | ||
| 4944 | : regstart[*p]; | ||
| 4945 | DEBUG_PRINT2 (" old_regstart: %d\n", | ||
| 4946 | POINTER_TO_OFFSET (old_regstart[*p])); | ||
| 4947 | 4943 | ||
| 4948 | regstart[*p] = d; | 4944 | regstart[*p] = d; |
| 4945 | regend[*p] = REG_UNSET_VALUE; /* probably unnecessary. -sm */ | ||
| 4949 | DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p])); | 4946 | DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p])); |
| 4950 | 4947 | ||
| 4951 | IS_ACTIVE (reg_info[*p]) = 1; | ||
| 4952 | MATCHED_SOMETHING (reg_info[*p]) = 0; | ||
| 4953 | |||
| 4954 | /* Clear this whenever we change the register activity status. */ | ||
| 4955 | set_regs_matched_done = 0; | ||
| 4956 | |||
| 4957 | /* This is the new highest active register. */ | ||
| 4958 | highest_active_reg = *p; | ||
| 4959 | |||
| 4960 | /* If nothing was active before, this is the new lowest active | ||
| 4961 | register. */ | ||
| 4962 | if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) | ||
| 4963 | lowest_active_reg = *p; | ||
| 4964 | |||
| 4965 | /* Move past the register number and inner group count. */ | 4948 | /* Move past the register number and inner group count. */ |
| 4966 | p += 2; | 4949 | p += 1; |
| 4967 | just_past_start_mem = p; | ||
| 4968 | |||
| 4969 | break; | 4950 | break; |
| 4970 | 4951 | ||
| 4971 | 4952 | ||
| 4972 | /* The stop_memory opcode represents the end of a group. Its | 4953 | /* The stop_memory opcode represents the end of a group. Its |
| 4973 | arguments are the same as start_memory's: the register | 4954 | argument is the same as start_memory's: the register number. */ |
| 4974 | number, and the number of inner groups. */ | ||
| 4975 | case stop_memory: | 4955 | case stop_memory: |
| 4976 | DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]); | 4956 | DEBUG_PRINT2 ("EXECUTING stop_memory %d:\n", *p); |
| 4977 | 4957 | ||
| 4978 | /* We need to save the string position the last time we were at | 4958 | assert (!REG_UNSET (regstart[*p])); |
| 4979 | this close-group operator in case the group is operated | 4959 | /* Strictly speaking, there should be code such as: |
| 4980 | upon by a repetition operator, e.g., with `((a*)*(b*)*)*' | 4960 | |
| 4981 | against `aba'; then we want to ignore where we are now in | 4961 | assert (REG_UNSET (regend[*p])); |
| 4982 | the string in case this attempt to match fails. */ | 4962 | PUSH_FAILURE_REGSTOP ((unsigned int)*p); |
| 4983 | old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) | 4963 | |
| 4984 | ? REG_UNSET (regend[*p]) ? d : regend[*p] | 4964 | But the only info to be pushed is regend[*p] and it is known to |
| 4985 | : regend[*p]; | 4965 | be UNSET, so there really isn't anything to push. |
| 4986 | DEBUG_PRINT2 (" old_regend: %d\n", | 4966 | Not pushing anything, on the other hand deprives us from the |
| 4987 | POINTER_TO_OFFSET (old_regend[*p])); | 4967 | guarantee that regend[*p] is UNSET since undoing this operation |
| 4968 | will not reset its value properly. This is not important since | ||
| 4969 | the value will only be read on the next start_memory or at | ||
| 4970 | the very end and both events can only happen if this stop_memory | ||
| 4971 | is *not* undone. */ | ||
| 4988 | 4972 | ||
| 4989 | regend[*p] = d; | 4973 | regend[*p] = d; |
| 4990 | DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p])); | 4974 | DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p])); |
| 4991 | 4975 | ||
| 4992 | /* This register isn't active anymore. */ | ||
| 4993 | IS_ACTIVE (reg_info[*p]) = 0; | ||
| 4994 | |||
| 4995 | /* Clear this whenever we change the register activity status. */ | ||
| 4996 | set_regs_matched_done = 0; | ||
| 4997 | |||
| 4998 | /* If this was the only register active, nothing is active | ||
| 4999 | anymore. */ | ||
| 5000 | if (lowest_active_reg == highest_active_reg) | ||
| 5001 | { | ||
| 5002 | lowest_active_reg = NO_LOWEST_ACTIVE_REG; | ||
| 5003 | highest_active_reg = NO_HIGHEST_ACTIVE_REG; | ||
| 5004 | } | ||
| 5005 | else | ||
| 5006 | { /* We must scan for the new highest active register, since | ||
| 5007 | it isn't necessarily one less than now: consider | ||
| 5008 | (a(b)c(d(e)f)g). When group 3 ends, after the f), the | ||
| 5009 | new highest active register is 1. */ | ||
| 5010 | unsigned char r = *p - 1; | ||
| 5011 | while (r > 0 && !IS_ACTIVE (reg_info[r])) | ||
| 5012 | r--; | ||
| 5013 | |||
| 5014 | /* If we end up at register zero, that means that we saved | ||
| 5015 | the registers as the result of an `on_failure_jump', not | ||
| 5016 | a `start_memory', and we jumped to past the innermost | ||
| 5017 | `stop_memory'. For example, in ((.)*) we save | ||
| 5018 | registers 1 and 2 as a result of the *, but when we pop | ||
| 5019 | back to the second ), we are at the stop_memory 1. | ||
| 5020 | Thus, nothing is active. */ | ||
| 5021 | if (r == 0) | ||
| 5022 | { | ||
| 5023 | lowest_active_reg = NO_LOWEST_ACTIVE_REG; | ||
| 5024 | highest_active_reg = NO_HIGHEST_ACTIVE_REG; | ||
| 5025 | } | ||
| 5026 | else | ||
| 5027 | highest_active_reg = r; | ||
| 5028 | } | ||
| 5029 | |||
| 5030 | /* If just failed to match something this time around with a | ||
| 5031 | group that's operated on by a repetition operator, try to | ||
| 5032 | force exit from the ``loop'', and restore the register | ||
| 5033 | information for this group that we had before trying this | ||
| 5034 | last match. */ | ||
| 5035 | if ((!MATCHED_SOMETHING (reg_info[*p]) | ||
| 5036 | || just_past_start_mem == p - 1) | ||
| 5037 | && (p + 2) < pend) | ||
| 5038 | { | ||
| 5039 | boolean is_a_jump_n = false; | ||
| 5040 | |||
| 5041 | p1 = p + 2; | ||
| 5042 | mcnt = 0; | ||
| 5043 | switch ((re_opcode_t) *p1++) | ||
| 5044 | { | ||
| 5045 | case jump_n: | ||
| 5046 | is_a_jump_n = true; | ||
| 5047 | case pop_failure_jump: | ||
| 5048 | case maybe_pop_jump: | ||
| 5049 | case jump: | ||
| 5050 | case dummy_failure_jump: | ||
| 5051 | EXTRACT_NUMBER_AND_INCR (mcnt, p1); | ||
| 5052 | if (is_a_jump_n) | ||
| 5053 | p1 += 2; | ||
| 5054 | break; | ||
| 5055 | |||
| 5056 | default: | ||
| 5057 | /* do nothing */ ; | ||
| 5058 | } | ||
| 5059 | p1 += mcnt; | ||
| 5060 | |||
| 5061 | /* If the next operation is a jump backwards in the pattern | ||
| 5062 | to an on_failure_jump right before the start_memory | ||
| 5063 | corresponding to this stop_memory, exit from the loop | ||
| 5064 | by forcing a failure after pushing on the stack the | ||
| 5065 | on_failure_jump's jump in the pattern, and d. */ | ||
| 5066 | if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump | ||
| 5067 | && (re_opcode_t) p1[3] == start_memory && p1[4] == *p) | ||
| 5068 | { | ||
| 5069 | /* If this group ever matched anything, then restore | ||
| 5070 | what its registers were before trying this last | ||
| 5071 | failed match, e.g., with `(a*)*b' against `ab' for | ||
| 5072 | regstart[1], and, e.g., with `((a*)*(b*)*)*' | ||
| 5073 | against `aba' for regend[3]. | ||
| 5074 | |||
| 5075 | Also restore the registers for inner groups for, | ||
| 5076 | e.g., `((a*)(b*))*' against `aba' (register 3 would | ||
| 5077 | otherwise get trashed). */ | ||
| 5078 | |||
| 5079 | if (EVER_MATCHED_SOMETHING (reg_info[*p])) | ||
| 5080 | { | ||
| 5081 | unsigned r; | ||
| 5082 | |||
| 5083 | EVER_MATCHED_SOMETHING (reg_info[*p]) = 0; | ||
| 5084 | |||
| 5085 | /* Restore this and inner groups' (if any) registers. */ | ||
| 5086 | for (r = *p; r < *p + *(p + 1); r++) | ||
| 5087 | { | ||
| 5088 | regstart[r] = old_regstart[r]; | ||
| 5089 | |||
| 5090 | /* xx why this test? */ | ||
| 5091 | if (old_regend[r] >= regstart[r]) | ||
| 5092 | regend[r] = old_regend[r]; | ||
| 5093 | } | ||
| 5094 | } | ||
| 5095 | p1++; | ||
| 5096 | EXTRACT_NUMBER_AND_INCR (mcnt, p1); | ||
| 5097 | PUSH_FAILURE_POINT (p1 + mcnt, d, -2); | ||
| 5098 | |||
| 5099 | goto fail; | ||
| 5100 | } | ||
| 5101 | } | ||
| 5102 | |||
| 5103 | /* Move past the register number and the inner group count. */ | 4976 | /* Move past the register number and the inner group count. */ |
| 5104 | p += 2; | 4977 | p += 1; |
| 5105 | break; | 4978 | break; |
| 5106 | 4979 | ||
| 5107 | 4980 | ||
| @@ -5162,9 +5035,6 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5162 | : bcmp (d, d2, mcnt)) | 5035 | : bcmp (d, d2, mcnt)) |
| 5163 | goto fail; | 5036 | goto fail; |
| 5164 | d += mcnt, d2 += mcnt; | 5037 | d += mcnt, d2 += mcnt; |
| 5165 | |||
| 5166 | /* Do this because we've match some characters. */ | ||
| 5167 | SET_REGS_MATCHED (); | ||
| 5168 | } | 5038 | } |
| 5169 | } | 5039 | } |
| 5170 | break; | 5040 | break; |
| @@ -5224,7 +5094,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5224 | 5094 | ||
| 5225 | /* on_failure_keep_string_jump is used to optimize `.*\n'. It | 5095 | /* on_failure_keep_string_jump is used to optimize `.*\n'. It |
| 5226 | pushes NULL as the value for the string on the stack. Then | 5096 | pushes NULL as the value for the string on the stack. Then |
| 5227 | `pop_failure_point' will keep the current value for the | 5097 | `POP_FAILURE_POINT' will keep the current value for the |
| 5228 | string, instead of restoring it. To see why, consider | 5098 | string, instead of restoring it. To see why, consider |
| 5229 | matching `foo\nbar' against `.*\n'. The .* matches the foo; | 5099 | matching `foo\nbar' against `.*\n'. The .* matches the foo; |
| 5230 | then the . fails against the \n. But the next thing we want | 5100 | then the . fails against the \n. But the next thing we want |
| @@ -5239,12 +5109,47 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5239 | `anychar's code to do something besides goto fail in this | 5109 | `anychar's code to do something besides goto fail in this |
| 5240 | case; that seems worse than this. */ | 5110 | case; that seems worse than this. */ |
| 5241 | case on_failure_keep_string_jump: | 5111 | case on_failure_keep_string_jump: |
| 5242 | DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump"); | 5112 | EXTRACT_NUMBER_AND_INCR (mcnt, p); |
| 5113 | DEBUG_PRINT3 ("EXECUTING on_failure_keep_string_jump %d (to %p):\n", | ||
| 5114 | mcnt, p + mcnt); | ||
| 5115 | |||
| 5116 | PUSH_FAILURE_POINT (p - 3, NULL); | ||
| 5117 | break; | ||
| 5243 | 5118 | ||
| 5119 | case on_failure_jump_exclusive: | ||
| 5244 | EXTRACT_NUMBER_AND_INCR (mcnt, p); | 5120 | EXTRACT_NUMBER_AND_INCR (mcnt, p); |
| 5245 | DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt); | 5121 | DEBUG_PRINT3 ("EXECUTING on_failure_jump_exclusive %d (to %p):\n", |
| 5122 | mcnt, p + mcnt); | ||
| 5246 | 5123 | ||
| 5247 | PUSH_FAILURE_POINT (p + mcnt, NULL, -2); | 5124 | if (! FAIL_STACK_EMPTY () |
| 5125 | && FAILURE_PAT (TOP_FAILURE_HANDLE ()) == (p - 3) | ||
| 5126 | && fail_stack.avail == fail_stack.frame) | ||
| 5127 | { | ||
| 5128 | /* We are trying to push failure F2 onto the stack but there | ||
| 5129 | is already a failure F1 pushed from the same instruction. | ||
| 5130 | Between F1 and now, something has matched (else this is an | ||
| 5131 | improper use of on_failure_jump_exclusive), so that we know | ||
| 5132 | that the fail-destination of F1 cannot match, hence we can | ||
| 5133 | pop F1 before pushing F2. Instead of doing this pop/push, | ||
| 5134 | we manually turn F1 into F2. | ||
| 5135 | `fail_stack.avail == fail_stack.frame' makes sure | ||
| 5136 | that popping F1 doesn't involve registers, else | ||
| 5137 | this optimization cannot be done so trivially. */ | ||
| 5138 | assert (FAILURE_STR (TOP_FAILURE_HANDLE ()) != d); | ||
| 5139 | FAILURE_STR (TOP_FAILURE_HANDLE ()) = d; | ||
| 5140 | } | ||
| 5141 | else | ||
| 5142 | PUSH_FAILURE_POINT (p - 3, d); | ||
| 5143 | break; | ||
| 5144 | |||
| 5145 | case on_failure_jump_loop: | ||
| 5146 | on_failure: | ||
| 5147 | EXTRACT_NUMBER_AND_INCR (mcnt, p); | ||
| 5148 | DEBUG_PRINT3 ("EXECUTING on_failure_jump_loop %d (to %p):\n", | ||
| 5149 | mcnt, p + mcnt); | ||
| 5150 | |||
| 5151 | CHECK_INFINITE_LOOP (p - 3, d); | ||
| 5152 | PUSH_FAILURE_POINT (p - 3, d); | ||
| 5248 | break; | 5153 | break; |
| 5249 | 5154 | ||
| 5250 | 5155 | ||
| @@ -5261,272 +5166,55 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5261 | the repetition text and either the following jump or | 5166 | the repetition text and either the following jump or |
| 5262 | pop_failure_jump back to this on_failure_jump. */ | 5167 | pop_failure_jump back to this on_failure_jump. */ |
| 5263 | case on_failure_jump: | 5168 | case on_failure_jump: |
| 5264 | on_failure: | ||
| 5265 | DEBUG_PRINT1 ("EXECUTING on_failure_jump"); | ||
| 5266 | 5169 | ||
| 5267 | #if defined (WINDOWSNT) && defined (emacs) | 5170 | #if defined (WINDOWSNT) && defined (emacs) |
| 5268 | QUIT; | 5171 | QUIT; |
| 5269 | #endif | 5172 | #endif |
| 5270 | 5173 | ||
| 5271 | EXTRACT_NUMBER_AND_INCR (mcnt, p); | 5174 | EXTRACT_NUMBER_AND_INCR (mcnt, p); |
| 5272 | DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt); | 5175 | DEBUG_PRINT3 ("EXECUTING on_failure_jump %d (to %p):\n", |
| 5273 | 5176 | mcnt, p + mcnt); | |
| 5274 | /* If this on_failure_jump comes right before a group (i.e., | ||
| 5275 | the original * applied to a group), save the information | ||
| 5276 | for that group and all inner ones, so that if we fail back | ||
| 5277 | to this point, the group's information will be correct. | ||
| 5278 | For example, in \(a*\)*\1, we need the preceding group, | ||
| 5279 | and in \(zz\(a*\)b*\)\2, we need the inner group. */ | ||
| 5280 | |||
| 5281 | /* We can't use `p' to check ahead because we push | ||
| 5282 | a failure point to `p + mcnt' after we do this. */ | ||
| 5283 | p1 = p; | ||
| 5284 | |||
| 5285 | /* We need to skip no_op's before we look for the | ||
| 5286 | start_memory in case this on_failure_jump is happening as | ||
| 5287 | the result of a completed succeed_n, as in \(a\)\{1,3\}b\1 | ||
| 5288 | against aba. */ | ||
| 5289 | while (p1 < pend && (re_opcode_t) *p1 == no_op) | ||
| 5290 | p1++; | ||
| 5291 | |||
| 5292 | if (p1 < pend && (re_opcode_t) *p1 == start_memory) | ||
| 5293 | { | ||
| 5294 | /* We have a new highest active register now. This will | ||
| 5295 | get reset at the start_memory we are about to get to, | ||
| 5296 | but we will have saved all the registers relevant to | ||
| 5297 | this repetition op, as described above. */ | ||
| 5298 | highest_active_reg = *(p1 + 1) + *(p1 + 2); | ||
| 5299 | if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) | ||
| 5300 | lowest_active_reg = *(p1 + 1); | ||
| 5301 | } | ||
| 5302 | 5177 | ||
| 5303 | DEBUG_PRINT1 (":\n"); | 5178 | PUSH_FAILURE_POINT (p -3, d); |
| 5304 | PUSH_FAILURE_POINT (p + mcnt, d, -2); | ||
| 5305 | break; | 5179 | break; |
| 5306 | 5180 | ||
| 5307 | 5181 | /* This operation is used for greedy * and +. | |
| 5308 | /* A smart repeat ends with `maybe_pop_jump'. | 5182 | Compare the beginning of the repeat with what in the |
| 5309 | We change it to either `pop_failure_jump' or `jump'. */ | 5183 | pattern follows its end. If we can establish that there |
| 5310 | case maybe_pop_jump: | 5184 | is nothing that they would both match, i.e., that we |
| 5185 | would have to backtrack because of (as in, e.g., `a*a') | ||
| 5186 | then we can use a non-backtracking loop based on | ||
| 5187 | on_failure_jump_exclusive instead of on_failure_jump_loop. */ | ||
| 5188 | case on_failure_jump_smart: | ||
| 5311 | #if defined (WINDOWSNT) && defined (emacs) | 5189 | #if defined (WINDOWSNT) && defined (emacs) |
| 5312 | QUIT; | 5190 | QUIT; |
| 5313 | #endif | 5191 | #endif |
| 5314 | EXTRACT_NUMBER_AND_INCR (mcnt, p); | 5192 | EXTRACT_NUMBER_AND_INCR (mcnt, p); |
| 5315 | DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt); | 5193 | DEBUG_PRINT3 ("EXECUTING on_failure_jump_smart %d (to %p).\n", |
| 5194 | mcnt, p + mcnt); | ||
| 5316 | { | 5195 | { |
| 5317 | register unsigned char *p2 = p; | 5196 | unsigned char *p1 = p; /* Next operation. */ |
| 5318 | 5197 | unsigned char *p2 = p + mcnt; /* Destination of the jump. */ | |
| 5319 | /* Compare the beginning of the repeat with what in the | ||
| 5320 | pattern follows its end. If we can establish that there | ||
| 5321 | is nothing that they would both match, i.e., that we | ||
| 5322 | would have to backtrack because of (as in, e.g., `a*a') | ||
| 5323 | then we can change to pop_failure_jump, because we'll | ||
| 5324 | never have to backtrack. | ||
| 5325 | |||
| 5326 | This is not true in the case of alternatives: in | ||
| 5327 | `(a|ab)*' we do need to backtrack to the `ab' alternative | ||
| 5328 | (e.g., if the string was `ab'). But instead of trying to | ||
| 5329 | detect that here, the alternative has put on a dummy | ||
| 5330 | failure point which is what we will end up popping. */ | ||
| 5331 | |||
| 5332 | /* Skip over open/close-group commands. | ||
| 5333 | If what follows this loop is a ...+ construct, | ||
| 5334 | look at what begins its body, since we will have to | ||
| 5335 | match at least one of that. */ | ||
| 5336 | while (1) | ||
| 5337 | { | ||
| 5338 | if (p2 + 2 < pend | ||
| 5339 | && ((re_opcode_t) *p2 == stop_memory | ||
| 5340 | || (re_opcode_t) *p2 == start_memory)) | ||
| 5341 | p2 += 3; | ||
| 5342 | else if (p2 + 6 < pend | ||
| 5343 | && (re_opcode_t) *p2 == dummy_failure_jump) | ||
| 5344 | p2 += 6; | ||
| 5345 | else | ||
| 5346 | break; | ||
| 5347 | } | ||
| 5348 | 5198 | ||
| 5349 | p1 = p + mcnt; | 5199 | p -= 3; /* Reset so that we will re-execute the |
| 5350 | /* p1[0] ... p1[2] are the `on_failure_jump' corresponding | 5200 | instruction once it's been changed. */ |
| 5351 | to the `maybe_finalize_jump' of this case. Examine what | ||
| 5352 | follows. */ | ||
| 5353 | 5201 | ||
| 5354 | /* If we're at the end of the pattern, we can change. */ | 5202 | /* DEBUG_STATEMENT (debug = 1); */ |
| 5355 | if (p2 == pend) | 5203 | if (mutually_exclusive_p (bufp, p1, p2)) |
| 5356 | { | 5204 | { |
| 5357 | /* Consider what happens when matching ":\(.*\)" | 5205 | /* Use a fast `on_failure_keep_string_jump' loop. */ |
| 5358 | against ":/". I don't really understand this code | 5206 | *p = (unsigned char) on_failure_jump_exclusive; |
| 5359 | yet. */ | 5207 | /* STORE_NUMBER (p2 - 2, mcnt + 3); */ |
| 5360 | p[-3] = (unsigned char) pop_failure_jump; | ||
| 5361 | DEBUG_PRINT1 | ||
| 5362 | (" End of pattern: change to `pop_failure_jump'.\n"); | ||
| 5363 | } | 5208 | } |
| 5364 | 5209 | else | |
| 5365 | else if ((re_opcode_t) *p2 == exactn | ||
| 5366 | || (bufp->newline_anchor && (re_opcode_t) *p2 == endline)) | ||
| 5367 | { | 5210 | { |
| 5368 | register unsigned int c | 5211 | /* Default to a safe `on_failure_jump' loop. */ |
| 5369 | = *p2 == (unsigned char) endline ? '\n' : p2[2]; | 5212 | DEBUG_PRINT1 (" smart default => slow loop.\n"); |
| 5370 | 5213 | *p = (unsigned char) on_failure_jump_loop; | |
| 5371 | if ((re_opcode_t) p1[3] == exactn) | ||
| 5372 | { | ||
| 5373 | if (!(multibyte /* && (c != '\n') */ | ||
| 5374 | && BASE_LEADING_CODE_P (c)) | ||
| 5375 | ? c != p1[5] | ||
| 5376 | : (STRING_CHAR (&p2[2], pend - &p2[2]) | ||
| 5377 | != STRING_CHAR (&p1[5], pend - &p1[5]))) | ||
| 5378 | { | ||
| 5379 | p[-3] = (unsigned char) pop_failure_jump; | ||
| 5380 | DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", | ||
| 5381 | c, p1[5]); | ||
| 5382 | } | ||
| 5383 | } | ||
| 5384 | |||
| 5385 | else if ((re_opcode_t) p1[3] == charset | ||
| 5386 | || (re_opcode_t) p1[3] == charset_not) | ||
| 5387 | { | ||
| 5388 | int not = (re_opcode_t) p1[3] == charset_not; | ||
| 5389 | |||
| 5390 | if (multibyte /* && (c != '\n') */ | ||
| 5391 | && BASE_LEADING_CODE_P (c)) | ||
| 5392 | c = STRING_CHAR (&p2[2], pend - &p2[2]); | ||
| 5393 | |||
| 5394 | /* Test if C is listed in charset (or charset_not) | ||
| 5395 | at `&p1[3]'. */ | ||
| 5396 | if (SINGLE_BYTE_CHAR_P (c)) | ||
| 5397 | { | ||
| 5398 | if (c < CHARSET_BITMAP_SIZE (&p1[3]) * BYTEWIDTH | ||
| 5399 | && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) | ||
| 5400 | not = !not; | ||
| 5401 | } | ||
| 5402 | else if (CHARSET_RANGE_TABLE_EXISTS_P (&p1[3])) | ||
| 5403 | CHARSET_LOOKUP_RANGE_TABLE (not, c, &p1[3]); | ||
| 5404 | |||
| 5405 | /* `not' is equal to 1 if c would match, which means | ||
| 5406 | that we can't change to pop_failure_jump. */ | ||
| 5407 | if (!not) | ||
| 5408 | { | ||
| 5409 | p[-3] = (unsigned char) pop_failure_jump; | ||
| 5410 | DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); | ||
| 5411 | } | ||
| 5412 | } | ||
| 5413 | } | ||
| 5414 | else if ((re_opcode_t) *p2 == charset) | ||
| 5415 | { | ||
| 5416 | if ((re_opcode_t) p1[3] == exactn) | ||
| 5417 | { | ||
| 5418 | register unsigned int c = p1[5]; | ||
| 5419 | int not = 0; | ||
| 5420 | |||
| 5421 | if (multibyte && BASE_LEADING_CODE_P (c)) | ||
| 5422 | c = STRING_CHAR (&p1[5], pend - &p1[5]); | ||
| 5423 | |||
| 5424 | /* Test if C is listed in charset at `p2'. */ | ||
| 5425 | if (SINGLE_BYTE_CHAR_P (c)) | ||
| 5426 | { | ||
| 5427 | if (c < CHARSET_BITMAP_SIZE (p2) * BYTEWIDTH | ||
| 5428 | && (p2[2 + c / BYTEWIDTH] | ||
| 5429 | & (1 << (c % BYTEWIDTH)))) | ||
| 5430 | not = !not; | ||
| 5431 | } | ||
| 5432 | else if (CHARSET_RANGE_TABLE_EXISTS_P (p2)) | ||
| 5433 | CHARSET_LOOKUP_RANGE_TABLE (not, c, p2); | ||
| 5434 | |||
| 5435 | if (!not) | ||
| 5436 | { | ||
| 5437 | p[-3] = (unsigned char) pop_failure_jump; | ||
| 5438 | DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); | ||
| 5439 | } | ||
| 5440 | } | ||
| 5441 | |||
| 5442 | /* It is hard to list up all the character in charset | ||
| 5443 | P2 if it includes multibyte character. Give up in | ||
| 5444 | such case. */ | ||
| 5445 | else if (!multibyte || !CHARSET_RANGE_TABLE_EXISTS_P (p2)) | ||
| 5446 | { | ||
| 5447 | /* Now, we are sure that P2 has no range table. | ||
| 5448 | So, for the size of bitmap in P2, `p2[1]' is | ||
| 5449 | enough. But P1 may have range table, so the | ||
| 5450 | size of bitmap table of P1 is extracted by | ||
| 5451 | using macro `CHARSET_BITMAP_SIZE'. | ||
| 5452 | |||
| 5453 | Since we know that all the character listed in | ||
| 5454 | P2 is ASCII, it is enough to test only bitmap | ||
| 5455 | table of P1. */ | ||
| 5456 | |||
| 5457 | if ((re_opcode_t) p1[3] == charset_not) | ||
| 5458 | { | ||
| 5459 | int idx; | ||
| 5460 | /* We win if the charset_not inside the loop lists | ||
| 5461 | every character listed in the charset after. */ | ||
| 5462 | for (idx = 0; idx < (int) p2[1]; idx++) | ||
| 5463 | if (! (p2[2 + idx] == 0 | ||
| 5464 | || (idx < CHARSET_BITMAP_SIZE (&p1[3]) | ||
| 5465 | && ((p2[2 + idx] & ~ p1[5 + idx]) == 0)))) | ||
| 5466 | break; | ||
| 5467 | |||
| 5468 | if (idx == p2[1]) | ||
| 5469 | { | ||
| 5470 | p[-3] = (unsigned char) pop_failure_jump; | ||
| 5471 | DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); | ||
| 5472 | } | ||
| 5473 | } | ||
| 5474 | else if ((re_opcode_t) p1[3] == charset) | ||
| 5475 | { | ||
| 5476 | int idx; | ||
| 5477 | /* We win if the charset inside the loop | ||
| 5478 | has no overlap with the one after the loop. */ | ||
| 5479 | for (idx = 0; | ||
| 5480 | (idx < (int) p2[1] | ||
| 5481 | && idx < CHARSET_BITMAP_SIZE (&p1[3])); | ||
| 5482 | idx++) | ||
| 5483 | if ((p2[2 + idx] & p1[5 + idx]) != 0) | ||
| 5484 | break; | ||
| 5485 | |||
| 5486 | if (idx == p2[1] | ||
| 5487 | || idx == CHARSET_BITMAP_SIZE (&p1[3])) | ||
| 5488 | { | ||
| 5489 | p[-3] = (unsigned char) pop_failure_jump; | ||
| 5490 | DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); | ||
| 5491 | } | ||
| 5492 | } | ||
| 5493 | } | 5214 | } |
| 5215 | /* DEBUG_STATEMENT (debug = 0); */ | ||
| 5494 | } | 5216 | } |
| 5495 | } | 5217 | break; |
| 5496 | p -= 2; /* Point at relative address again. */ | ||
| 5497 | if ((re_opcode_t) p[-1] != pop_failure_jump) | ||
| 5498 | { | ||
| 5499 | p[-1] = (unsigned char) jump; | ||
| 5500 | DEBUG_PRINT1 (" Match => jump.\n"); | ||
| 5501 | goto unconditional_jump; | ||
| 5502 | } | ||
| 5503 | /* Note fall through. */ | ||
| 5504 | |||
| 5505 | |||
| 5506 | /* The end of a simple repeat has a pop_failure_jump back to | ||
| 5507 | its matching on_failure_jump, where the latter will push a | ||
| 5508 | failure point. The pop_failure_jump takes off failure | ||
| 5509 | points put on by this pop_failure_jump's matching | ||
| 5510 | on_failure_jump; we got through the pattern to here from the | ||
| 5511 | matching on_failure_jump, so didn't fail. */ | ||
| 5512 | case pop_failure_jump: | ||
| 5513 | { | ||
| 5514 | /* We need to pass separate storage for the lowest and | ||
| 5515 | highest registers, even though we don't care about the | ||
| 5516 | actual values. Otherwise, we will restore only one | ||
| 5517 | register from the stack, since lowest will == highest in | ||
| 5518 | `pop_failure_point'. */ | ||
| 5519 | unsigned dummy_low_reg, dummy_high_reg; | ||
| 5520 | unsigned char *pdummy; | ||
| 5521 | const char *sdummy; | ||
| 5522 | |||
| 5523 | DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n"); | ||
| 5524 | POP_FAILURE_POINT (sdummy, pdummy, | ||
| 5525 | dummy_low_reg, dummy_high_reg, | ||
| 5526 | reg_dummy, reg_dummy, reg_info_dummy); | ||
| 5527 | } | ||
| 5528 | /* Note fall through. */ | ||
| 5529 | |||
| 5530 | 5218 | ||
| 5531 | /* Unconditionally jump (without popping any failure points). */ | 5219 | /* Unconditionally jump (without popping any failure points). */ |
| 5532 | case jump: | 5220 | case jump: |
| @@ -5537,42 +5225,10 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5537 | EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */ | 5225 | EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */ |
| 5538 | DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt); | 5226 | DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt); |
| 5539 | p += mcnt; /* Do the jump. */ | 5227 | p += mcnt; /* Do the jump. */ |
| 5540 | DEBUG_PRINT2 ("(to 0x%x).\n", p); | 5228 | DEBUG_PRINT2 ("(to %p).\n", p); |
| 5541 | break; | 5229 | break; |
| 5542 | 5230 | ||
| 5543 | 5231 | ||
| 5544 | /* We need this opcode so we can detect where alternatives end | ||
| 5545 | in `group_match_null_string_p' et al. */ | ||
| 5546 | case jump_past_alt: | ||
| 5547 | DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n"); | ||
| 5548 | goto unconditional_jump; | ||
| 5549 | |||
| 5550 | |||
| 5551 | /* Normally, the on_failure_jump pushes a failure point, which | ||
| 5552 | then gets popped at pop_failure_jump. We will end up at | ||
| 5553 | pop_failure_jump, also, and with a pattern of, say, `a+', we | ||
| 5554 | are skipping over the on_failure_jump, so we have to push | ||
| 5555 | something meaningless for pop_failure_jump to pop. */ | ||
| 5556 | case dummy_failure_jump: | ||
| 5557 | DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n"); | ||
| 5558 | /* It doesn't matter what we push for the string here. What | ||
| 5559 | the code at `fail' tests is the value for the pattern. */ | ||
| 5560 | PUSH_FAILURE_POINT (0, 0, -2); | ||
| 5561 | goto unconditional_jump; | ||
| 5562 | |||
| 5563 | |||
| 5564 | /* At the end of an alternative, we need to push a dummy failure | ||
| 5565 | point in case we are followed by a `pop_failure_jump', because | ||
| 5566 | we don't want the failure point for the alternative to be | ||
| 5567 | popped. For example, matching `(a|ab)*' against `aab' | ||
| 5568 | requires that we match the `ab' alternative. */ | ||
| 5569 | case push_dummy_failure: | ||
| 5570 | DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n"); | ||
| 5571 | /* See comments just above at `dummy_failure_jump' about the | ||
| 5572 | two zeroes. */ | ||
| 5573 | PUSH_FAILURE_POINT (0, 0, -2); | ||
| 5574 | break; | ||
| 5575 | |||
| 5576 | /* Have to succeed matching what follows at least n times. | 5232 | /* Have to succeed matching what follows at least n times. |
| 5577 | After that, handle like `on_failure_jump'. */ | 5233 | After that, handle like `on_failure_jump'. */ |
| 5578 | case succeed_n: | 5234 | case succeed_n: |
| @@ -5586,11 +5242,11 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5586 | mcnt--; | 5242 | mcnt--; |
| 5587 | p += 2; | 5243 | p += 2; |
| 5588 | STORE_NUMBER_AND_INCR (p, mcnt); | 5244 | STORE_NUMBER_AND_INCR (p, mcnt); |
| 5589 | DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p, mcnt); | 5245 | DEBUG_PRINT3 (" Setting %p to %d.\n", p, mcnt); |
| 5590 | } | 5246 | } |
| 5591 | else if (mcnt == 0) | 5247 | else if (mcnt == 0) |
| 5592 | { | 5248 | { |
| 5593 | DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n", p+2); | 5249 | DEBUG_PRINT2 (" Setting two bytes from %p to no_op.\n", p+2); |
| 5594 | p[2] = (unsigned char) no_op; | 5250 | p[2] = (unsigned char) no_op; |
| 5595 | p[3] = (unsigned char) no_op; | 5251 | p[3] = (unsigned char) no_op; |
| 5596 | goto on_failure; | 5252 | goto on_failure; |
| @@ -5620,7 +5276,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5620 | EXTRACT_NUMBER_AND_INCR (mcnt, p); | 5276 | EXTRACT_NUMBER_AND_INCR (mcnt, p); |
| 5621 | p1 = p + mcnt; | 5277 | p1 = p + mcnt; |
| 5622 | EXTRACT_NUMBER_AND_INCR (mcnt, p); | 5278 | EXTRACT_NUMBER_AND_INCR (mcnt, p); |
| 5623 | DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt); | 5279 | DEBUG_PRINT3 (" Setting %p to %d.\n", p1, mcnt); |
| 5624 | STORE_NUMBER (p1, mcnt); | 5280 | STORE_NUMBER (p1, mcnt); |
| 5625 | break; | 5281 | break; |
| 5626 | } | 5282 | } |
| @@ -5837,7 +5493,6 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5837 | goto fail; | 5493 | goto fail; |
| 5838 | d += len; | 5494 | d += len; |
| 5839 | } | 5495 | } |
| 5840 | SET_REGS_MATCHED (); | ||
| 5841 | break; | 5496 | break; |
| 5842 | 5497 | ||
| 5843 | case notsyntaxspec: | 5498 | case notsyntaxspec: |
| @@ -5868,7 +5523,6 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5868 | goto fail; | 5523 | goto fail; |
| 5869 | d += len; | 5524 | d += len; |
| 5870 | } | 5525 | } |
| 5871 | SET_REGS_MATCHED (); | ||
| 5872 | break; | 5526 | break; |
| 5873 | 5527 | ||
| 5874 | case categoryspec: | 5528 | case categoryspec: |
| @@ -5887,7 +5541,6 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5887 | goto fail; | 5541 | goto fail; |
| 5888 | d += len; | 5542 | d += len; |
| 5889 | } | 5543 | } |
| 5890 | SET_REGS_MATCHED (); | ||
| 5891 | break; | 5544 | break; |
| 5892 | 5545 | ||
| 5893 | case notcategoryspec: | 5546 | case notcategoryspec: |
| @@ -5906,7 +5559,6 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5906 | goto fail; | 5559 | goto fail; |
| 5907 | d += len; | 5560 | d += len; |
| 5908 | } | 5561 | } |
| 5909 | SET_REGS_MATCHED (); | ||
| 5910 | break; | 5562 | break; |
| 5911 | 5563 | ||
| 5912 | #else /* not emacs */ | 5564 | #else /* not emacs */ |
| @@ -5915,7 +5567,6 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5915 | PREFETCH (); | 5567 | PREFETCH (); |
| 5916 | if (!WORDCHAR_P (d)) | 5568 | if (!WORDCHAR_P (d)) |
| 5917 | goto fail; | 5569 | goto fail; |
| 5918 | SET_REGS_MATCHED (); | ||
| 5919 | d++; | 5570 | d++; |
| 5920 | break; | 5571 | break; |
| 5921 | 5572 | ||
| @@ -5924,7 +5575,6 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5924 | PREFETCH (); | 5575 | PREFETCH (); |
| 5925 | if (WORDCHAR_P (d)) | 5576 | if (WORDCHAR_P (d)) |
| 5926 | goto fail; | 5577 | goto fail; |
| 5927 | SET_REGS_MATCHED (); | ||
| 5928 | d++; | 5578 | d++; |
| 5929 | break; | 5579 | break; |
| 5930 | #endif /* not emacs */ | 5580 | #endif /* not emacs */ |
| @@ -5941,44 +5591,40 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5941 | QUIT; | 5591 | QUIT; |
| 5942 | #endif | 5592 | #endif |
| 5943 | if (!FAIL_STACK_EMPTY ()) | 5593 | if (!FAIL_STACK_EMPTY ()) |
| 5944 | { /* A restart point is known. Restore to that state. */ | 5594 | { |
| 5595 | char *str; | ||
| 5596 | unsigned char *pat; | ||
| 5597 | /* A restart point is known. Restore to that state. */ | ||
| 5945 | DEBUG_PRINT1 ("\nFAIL:\n"); | 5598 | DEBUG_PRINT1 ("\nFAIL:\n"); |
| 5946 | POP_FAILURE_POINT (d, p, | 5599 | POP_FAILURE_POINT (str, pat); |
| 5947 | lowest_active_reg, highest_active_reg, | 5600 | switch (SWITCH_ENUM_CAST ((re_opcode_t) *pat++)) |
| 5948 | regstart, regend, reg_info); | 5601 | { |
| 5602 | case on_failure_keep_string_jump: | ||
| 5603 | assert (str == NULL); | ||
| 5604 | goto continue_failure_jump; | ||
| 5605 | |||
| 5606 | case on_failure_jump_exclusive: | ||
| 5607 | /* If something has matched, the alternative will not match, | ||
| 5608 | so we might as well keep popping right away. */ | ||
| 5609 | if (0 /* d != str && d != string2 */) /* Don't bother. -sm */ | ||
| 5610 | /* (d == string2 && str == end1) => (d =~ str) */ | ||
| 5611 | goto fail; | ||
| 5612 | /* Fallthrough */ | ||
| 5613 | |||
| 5614 | case on_failure_jump_loop: | ||
| 5615 | case on_failure_jump: | ||
| 5616 | case succeed_n: | ||
| 5617 | d = str; | ||
| 5618 | continue_failure_jump: | ||
| 5619 | EXTRACT_NUMBER_AND_INCR (mcnt, pat); | ||
| 5620 | p = pat + mcnt; | ||
| 5621 | break; | ||
| 5949 | 5622 | ||
| 5950 | /* If this failure point is a dummy, try the next one. */ | 5623 | default: |
| 5951 | if (!p) | 5624 | abort(); |
| 5952 | goto fail; | 5625 | } |
| 5953 | 5626 | ||
| 5954 | /* If we failed to the end of the pattern, don't examine *p. */ | 5627 | assert (p >= bufp->buffer && p <= pend); |
| 5955 | assert (p <= pend); | ||
| 5956 | if (p < pend) | ||
| 5957 | { | ||
| 5958 | boolean is_a_jump_n = false; | ||
| 5959 | |||
| 5960 | /* If failed to a backwards jump that's part of a repetition | ||
| 5961 | loop, need to pop this failure point and use the next one. */ | ||
| 5962 | switch ((re_opcode_t) *p) | ||
| 5963 | { | ||
| 5964 | case jump_n: | ||
| 5965 | is_a_jump_n = true; | ||
| 5966 | case maybe_pop_jump: | ||
| 5967 | case pop_failure_jump: | ||
| 5968 | case jump: | ||
| 5969 | p1 = p + 1; | ||
| 5970 | EXTRACT_NUMBER_AND_INCR (mcnt, p1); | ||
| 5971 | p1 += mcnt; | ||
| 5972 | |||
| 5973 | if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n) | ||
| 5974 | || (!is_a_jump_n | ||
| 5975 | && (re_opcode_t) *p1 == on_failure_jump)) | ||
| 5976 | goto fail; | ||
| 5977 | break; | ||
| 5978 | default: | ||
| 5979 | /* do nothing */ ; | ||
| 5980 | } | ||
| 5981 | } | ||
| 5982 | 5628 | ||
| 5983 | if (d >= string1 && d <= end1) | 5629 | if (d >= string1 && d <= end1) |
| 5984 | dend = end_match_1; | 5630 | dend = end_match_1; |
| @@ -5997,248 +5643,6 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5997 | 5643 | ||
| 5998 | /* Subroutine definitions for re_match_2. */ | 5644 | /* Subroutine definitions for re_match_2. */ |
| 5999 | 5645 | ||
| 6000 | |||
| 6001 | /* We are passed P pointing to a register number after a start_memory. | ||
| 6002 | |||
| 6003 | Return true if the pattern up to the corresponding stop_memory can | ||
| 6004 | match the empty string, and false otherwise. | ||
| 6005 | |||
| 6006 | If we find the matching stop_memory, sets P to point to one past its number. | ||
| 6007 | Otherwise, sets P to an undefined byte less than or equal to END. | ||
| 6008 | |||
| 6009 | We don't handle duplicates properly (yet). */ | ||
| 6010 | |||
| 6011 | static boolean | ||
| 6012 | group_match_null_string_p (p, end, reg_info) | ||
| 6013 | unsigned char **p, *end; | ||
| 6014 | register_info_type *reg_info; | ||
| 6015 | { | ||
| 6016 | int mcnt; | ||
| 6017 | /* Point to after the args to the start_memory. */ | ||
| 6018 | unsigned char *p1 = *p + 2; | ||
| 6019 | |||
| 6020 | while (p1 < end) | ||
| 6021 | { | ||
| 6022 | /* Skip over opcodes that can match nothing, and return true or | ||
| 6023 | false, as appropriate, when we get to one that can't, or to the | ||
| 6024 | matching stop_memory. */ | ||
| 6025 | |||
| 6026 | switch ((re_opcode_t) *p1) | ||
| 6027 | { | ||
| 6028 | /* Could be either a loop or a series of alternatives. */ | ||
| 6029 | case on_failure_jump: | ||
| 6030 | p1++; | ||
| 6031 | EXTRACT_NUMBER_AND_INCR (mcnt, p1); | ||
| 6032 | |||
| 6033 | /* If the next operation is not a jump backwards in the | ||
| 6034 | pattern. */ | ||
| 6035 | |||
| 6036 | if (mcnt >= 0) | ||
| 6037 | { | ||
| 6038 | /* Go through the on_failure_jumps of the alternatives, | ||
| 6039 | seeing if any of the alternatives cannot match nothing. | ||
| 6040 | The last alternative starts with only a jump, | ||
| 6041 | whereas the rest start with on_failure_jump and end | ||
| 6042 | with a jump, e.g., here is the pattern for `a|b|c': | ||
| 6043 | |||
| 6044 | /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6 | ||
| 6045 | /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3 | ||
| 6046 | /exactn/1/c | ||
| 6047 | |||
| 6048 | So, we have to first go through the first (n-1) | ||
| 6049 | alternatives and then deal with the last one separately. */ | ||
| 6050 | |||
| 6051 | |||
| 6052 | /* Deal with the first (n-1) alternatives, which start | ||
| 6053 | with an on_failure_jump (see above) that jumps to right | ||
| 6054 | past a jump_past_alt. */ | ||
| 6055 | |||
| 6056 | while ((re_opcode_t) p1[mcnt-3] == jump_past_alt) | ||
| 6057 | { | ||
| 6058 | /* `mcnt' holds how many bytes long the alternative | ||
| 6059 | is, including the ending `jump_past_alt' and | ||
| 6060 | its number. */ | ||
| 6061 | |||
| 6062 | if (!alt_match_null_string_p (p1, p1 + mcnt - 3, | ||
| 6063 | reg_info)) | ||
| 6064 | return false; | ||
| 6065 | |||
| 6066 | /* Move to right after this alternative, including the | ||
| 6067 | jump_past_alt. */ | ||
| 6068 | p1 += mcnt; | ||
| 6069 | |||
| 6070 | /* Break if it's the beginning of an n-th alternative | ||
| 6071 | that doesn't begin with an on_failure_jump. */ | ||
| 6072 | if ((re_opcode_t) *p1 != on_failure_jump) | ||
| 6073 | break; | ||
| 6074 | |||
| 6075 | /* Still have to check that it's not an n-th | ||
| 6076 | alternative that starts with an on_failure_jump. */ | ||
| 6077 | p1++; | ||
| 6078 | EXTRACT_NUMBER_AND_INCR (mcnt, p1); | ||
| 6079 | if ((re_opcode_t) p1[mcnt-3] != jump_past_alt) | ||
| 6080 | { | ||
| 6081 | /* Get to the beginning of the n-th alternative. */ | ||
| 6082 | p1 -= 3; | ||
| 6083 | break; | ||
| 6084 | } | ||
| 6085 | } | ||
| 6086 | |||
| 6087 | /* Deal with the last alternative: go back and get number | ||
| 6088 | of the `jump_past_alt' just before it. `mcnt' contains | ||
| 6089 | the length of the alternative. */ | ||
| 6090 | EXTRACT_NUMBER (mcnt, p1 - 2); | ||
| 6091 | |||
| 6092 | if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info)) | ||
| 6093 | return false; | ||
| 6094 | |||
| 6095 | p1 += mcnt; /* Get past the n-th alternative. */ | ||
| 6096 | } /* if mcnt > 0 */ | ||
| 6097 | break; | ||
| 6098 | |||
| 6099 | |||
| 6100 | case stop_memory: | ||
| 6101 | assert (p1[1] == **p); | ||
| 6102 | *p = p1 + 2; | ||
| 6103 | return true; | ||
| 6104 | |||
| 6105 | |||
| 6106 | default: | ||
| 6107 | if (!common_op_match_null_string_p (&p1, end, reg_info)) | ||
| 6108 | return false; | ||
| 6109 | } | ||
| 6110 | } /* while p1 < end */ | ||
| 6111 | |||
| 6112 | return false; | ||
| 6113 | } /* group_match_null_string_p */ | ||
| 6114 | |||
| 6115 | |||
| 6116 | /* Similar to group_match_null_string_p, but doesn't deal with alternatives: | ||
| 6117 | It expects P to be the first byte of a single alternative and END one | ||
| 6118 | byte past the last. The alternative can contain groups. */ | ||
| 6119 | |||
| 6120 | static boolean | ||
| 6121 | alt_match_null_string_p (p, end, reg_info) | ||
| 6122 | unsigned char *p, *end; | ||
| 6123 | register_info_type *reg_info; | ||
| 6124 | { | ||
| 6125 | int mcnt; | ||
| 6126 | unsigned char *p1 = p; | ||
| 6127 | |||
| 6128 | while (p1 < end) | ||
| 6129 | { | ||
| 6130 | /* Skip over opcodes that can match nothing, and break when we get | ||
| 6131 | to one that can't. */ | ||
| 6132 | |||
| 6133 | switch ((re_opcode_t) *p1) | ||
| 6134 | { | ||
| 6135 | /* It's a loop. */ | ||
| 6136 | case on_failure_jump: | ||
| 6137 | p1++; | ||
| 6138 | EXTRACT_NUMBER_AND_INCR (mcnt, p1); | ||
| 6139 | p1 += mcnt; | ||
| 6140 | break; | ||
| 6141 | |||
| 6142 | default: | ||
| 6143 | if (!common_op_match_null_string_p (&p1, end, reg_info)) | ||
| 6144 | return false; | ||
| 6145 | } | ||
| 6146 | } /* while p1 < end */ | ||
| 6147 | |||
| 6148 | return true; | ||
| 6149 | } /* alt_match_null_string_p */ | ||
| 6150 | |||
| 6151 | |||
| 6152 | /* Deals with the ops common to group_match_null_string_p and | ||
| 6153 | alt_match_null_string_p. | ||
| 6154 | |||
| 6155 | Sets P to one after the op and its arguments, if any. */ | ||
| 6156 | |||
| 6157 | static boolean | ||
| 6158 | common_op_match_null_string_p (p, end, reg_info) | ||
| 6159 | unsigned char **p, *end; | ||
| 6160 | register_info_type *reg_info; | ||
| 6161 | { | ||
| 6162 | int mcnt; | ||
| 6163 | boolean ret; | ||
| 6164 | int reg_no; | ||
| 6165 | unsigned char *p1 = *p; | ||
| 6166 | |||
| 6167 | switch ((re_opcode_t) *p1++) | ||
| 6168 | { | ||
| 6169 | case no_op: | ||
| 6170 | case begline: | ||
| 6171 | case endline: | ||
| 6172 | case begbuf: | ||
| 6173 | case endbuf: | ||
| 6174 | case wordbeg: | ||
| 6175 | case wordend: | ||
| 6176 | case wordbound: | ||
| 6177 | case notwordbound: | ||
| 6178 | #ifdef emacs | ||
| 6179 | case before_dot: | ||
| 6180 | case at_dot: | ||
| 6181 | case after_dot: | ||
| 6182 | #endif | ||
| 6183 | break; | ||
| 6184 | |||
| 6185 | case start_memory: | ||
| 6186 | reg_no = *p1; | ||
| 6187 | assert (reg_no > 0 && reg_no <= MAX_REGNUM); | ||
| 6188 | ret = group_match_null_string_p (&p1, end, reg_info); | ||
| 6189 | |||
| 6190 | /* Have to set this here in case we're checking a group which | ||
| 6191 | contains a group and a back reference to it. */ | ||
| 6192 | |||
| 6193 | if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE) | ||
| 6194 | REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret; | ||
| 6195 | |||
| 6196 | if (!ret) | ||
| 6197 | return false; | ||
| 6198 | break; | ||
| 6199 | |||
| 6200 | /* If this is an optimized succeed_n for zero times, make the jump. */ | ||
| 6201 | case jump: | ||
| 6202 | EXTRACT_NUMBER_AND_INCR (mcnt, p1); | ||
| 6203 | if (mcnt >= 0) | ||
| 6204 | p1 += mcnt; | ||
| 6205 | else | ||
| 6206 | return false; | ||
| 6207 | break; | ||
| 6208 | |||
| 6209 | case succeed_n: | ||
| 6210 | /* Get to the number of times to succeed. */ | ||
| 6211 | p1 += 2; | ||
| 6212 | EXTRACT_NUMBER_AND_INCR (mcnt, p1); | ||
| 6213 | |||
| 6214 | if (mcnt == 0) | ||
| 6215 | { | ||
| 6216 | p1 -= 4; | ||
| 6217 | EXTRACT_NUMBER_AND_INCR (mcnt, p1); | ||
| 6218 | p1 += mcnt; | ||
| 6219 | } | ||
| 6220 | else | ||
| 6221 | return false; | ||
| 6222 | break; | ||
| 6223 | |||
| 6224 | case duplicate: | ||
| 6225 | if (!REG_MATCH_NULL_STRING_P (reg_info[*p1])) | ||
| 6226 | return false; | ||
| 6227 | break; | ||
| 6228 | |||
| 6229 | case set_number_at: | ||
| 6230 | p1 += 4; | ||
| 6231 | |||
| 6232 | default: | ||
| 6233 | /* All other opcodes mean we cannot match the empty string. */ | ||
| 6234 | return false; | ||
| 6235 | } | ||
| 6236 | |||
| 6237 | *p = p1; | ||
| 6238 | return true; | ||
| 6239 | } /* common_op_match_null_string_p */ | ||
| 6240 | |||
| 6241 | |||
| 6242 | /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN | 5646 | /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN |
| 6243 | bytes; nonzero otherwise. */ | 5647 | bytes; nonzero otherwise. */ |
| 6244 | 5648 | ||