aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorStefan Monnier2000-03-08 23:25:41 +0000
committerStefan Monnier2000-03-08 23:25:41 +0000
commit505bde11b0fb6f539db1ef0713f19c3c2d86064a (patch)
tree2e6f73eb8373fd90ff8d93860acd15daf32ef038 /src
parentdd52d9705c258145292bef9fc490035abc05e51f (diff)
downloademacs-505bde11b0fb6f539db1ef0713f19c3c2d86064a.tar.gz
emacs-505bde11b0fb6f539db1ef0713f19c3c2d86064a.zip
This is a big redesign of failure-stack and register handling, prompted
by bugs revealed when trying to add shy-groups. Overall, what happened is that loops are now structured a little differently, groups can be shy and the code is a little simpler. (enum re_opcode_t): Remove jump_past_alt, maybe_pop_jump, push_dummy_failure and dumy_failure_jump. Add on_failure_jump_(exclusive, loop and smart). Also fix the comment for (start|stop)_memory since they now only take one argument (the second has becomes unnecessary). (print_partial_compiled_pattern): Adjust for changes in re_opcode_t. (print_compiled_pattern): Use %ld to printf long ints and flush to make debugging a little easier. (union fail_stack_elt): Make the integer unsigned. (struct fail_stack_type): Add a `frame' element. (INIT_FAIL_STACK): Init `frame' as well. (POP_PATTERN_OP): New macro for re_compile_fastmap. (DEBUG_PUSH, DEBUG_POP): Remove. (NUM_REG_ITEMS): Remove. (NUM_NONREG_ITEMS): Adjust. (FAILURE_PAT, FAILURE_STR, NEXT_FAILURE_HANDLE, TOP_FAILURE_HANDLE): New macros for the cycle detection. (ENSURE_FAIL_STACK): New macro for PUSH_FAILURE_(REG|POINT). (PUSH_FAILURE_REG, POP_FAILURE_REG, CHECK_INFINITE_LOOP): New macros. (PUSH_FAILURE_POINT): Don't push registers any more. The pattern address pushed is not the destination of the jump but the source of it instead. (NUM_FAILURE_ITEMS): Remove. (POP_FAILURE_POINT): Adapt to the new stack structure (i.e. pop registers before the actual failure point). Don't hardcode any meaning for str==NULL anymore. (union register_info_type, REG_MATCH_NULL_STRING_P, IS_ACTIVE) (MATCHED_SOMETHING, EVER_MATCHED_SOMETHING, SET_REGS_MATCHED): Remove. (REG_UNSET_VALUE): Use NULL (why not?). (compile_range): Remove declaration since it doesn't exist. (struct compile_stack_elt_t): Remove inner_group_offset. (old_reg(start|end), reg_info, reg_dummy, reg_info_dummy): Remove. (regex_grow_registers): Remove dead code. (FIXUP_ALT_JUMP): New macro. (regex_compile): Add shy-groups Change loops to use on_failure_jump_smart&jump instead of on_failure_jump&maybe_pop_jump. Change + loops to eliminate the initial (dummy_failure_)jump. Remove c1_base (looks like unused variable to me). Use `jump' instead of `jump_past_alt' and don't bother with push_dummy_failure in alternatives since it is now unnecessary. Use FIXUP_ALT_JUMP. Eliminate a useless `#ifdef emacs' for (re)allocating the stack. (re_compile_fastmap): Remove dead variables i and num_regs. Exit from loop when bufp->can_be_null rather than jumping to `done'. Avoid jumping backwards so as to ensure termination. Use PATTERN_STACK_EMPTY and POP_PATTERN_OP. Improved handling of backreferences. Remove dead code in handling of `anychar'. (skip_noops, mutually_exclusive_p): New functions taken from the handling of `maybe_pop_jump' in re_match_2_internal. Slightly improve mutually_exclusive_p to handle ".+\n". ((lowest|highest)_active_reg, NO_(LOWEST|HIGHEST)_ACTIVE_REG) Remove. (re_match_2_internal): Use %p instead of 0x%x when printf'ing ptrs. Don't SET_REGS_MATCHED anymore. Remove many dead variables. Push register (in `start_memory') on the stack rather than storing it in old_reg(start|end). Remove the cycle detection from `stop_memory', replaced by the use of on_failure_jump_loop for greedy loops. Add code for the new on_failure_jump_<foo>. Remove ad-hoc code in `on_failure_jump' to push more registers in the case of a loop. Take out code from `maybe_pop_jump' into separate functions and adapt it to the semantics of `on_failure_jump_smart'. Remove jump_past_alt, dummy_failure_jump and push_dummy_failure. Remove dummy_failure handling and handling of `failures to jump to on_failure_jump' (this last one was already dead code, it seems). ((group|alt|common_op)_match_null_string_p): Remove.
Diffstat (limited to 'src')
-rw-r--r--src/regex.c1842
1 files changed, 623 insertions, 1219 deletions
diff --git a/src/regex.c b/src/regex.c
index 09f56a1dffc..809a7d24219 100644
--- a/src/regex.c
+++ b/src/regex.c
@@ -2,7 +2,7 @@
2 0.12. (Implements POSIX draft P10003.2/D11.2, except for 2 0.12. (Implements POSIX draft P10003.2/D11.2, except for
3 internationalization features.) 3 internationalization features.)
4 4
5 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc. 5 Copyright (C) 1993,94,95,96,97,98,2000 Free Software Foundation, Inc.
6 6
7 This program is free software; you can redistribute it and/or modify 7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by 8 it under the terms of the GNU General Public License as published by
@@ -19,6 +19,14 @@
19 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, 19 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
20 USA. */ 20 USA. */
21 21
22/* TODO:
23 - detect nasty infinite loops like "\\(\\)+?ab" when matching "ac".
24 - use `keep_string' more often than just .*\n.
25 - structure the opcode space into opcode+flag.
26 - merge with glic's regex.[ch]
27
28 That's it for now -sm */
29
22/* AIX requires this to be the first thing in the file. */ 30/* AIX requires this to be the first thing in the file. */
23#if defined (_AIX) && !defined (REGEX_MALLOC) 31#if defined (_AIX) && !defined (REGEX_MALLOC)
24 #pragma alloca 32 #pragma alloca
@@ -473,19 +481,13 @@ typedef enum
473 /* Start remembering the text that is matched, for storing in a 481 /* Start remembering the text that is matched, for storing in a
474 register. Followed by one byte with the register number, in 482 register. Followed by one byte with the register number, in
475 the range 0 to one less than the pattern buffer's re_nsub 483 the range 0 to one less than the pattern buffer's re_nsub
476 field. Then followed by one byte with the number of groups 484 field. */
477 inner to this one. (This last has to be part of the
478 start_memory only because we need it in the on_failure_jump
479 of re_match_2.) */
480 start_memory, 485 start_memory,
481 486
482 /* Stop remembering the text that is matched and store it in a 487 /* Stop remembering the text that is matched and store it in a
483 memory register. Followed by one byte with the register 488 memory register. Followed by one byte with the register
484 number, in the range 0 to one less than `re_nsub' in the 489 number, in the range 0 to one less than `re_nsub' in the
485 pattern buffer, and one byte with the number of inner groups, 490 pattern buffer. */
486 just like `start_memory'. (We need the number of inner
487 groups here because we don't have any easy way of finding the
488 corresponding start_memory when we're at a stop_memory.) */
489 stop_memory, 491 stop_memory,
490 492
491 /* Match a duplicate of something remembered. Followed by one 493 /* Match a duplicate of something remembered. Followed by one
@@ -508,9 +510,6 @@ typedef enum
508 /* Followed by two byte relative address to which to jump. */ 510 /* Followed by two byte relative address to which to jump. */
509 jump, 511 jump,
510 512
511 /* Same as jump, but marks the end of an alternative. */
512 jump_past_alt,
513
514 /* Followed by two-byte relative address of place to resume at 513 /* Followed by two-byte relative address of place to resume at
515 in case of failure. */ 514 in case of failure. */
516 on_failure_jump, 515 on_failure_jump,
@@ -519,29 +518,24 @@ typedef enum
519 current string position when executed. */ 518 current string position when executed. */
520 on_failure_keep_string_jump, 519 on_failure_keep_string_jump,
521 520
522 /* Throw away latest failure point and then jump to following 521 /* Like `on_failure_jump', except that it assumes that the
523 two-byte relative address. */ 522 pattern following it is mutually exclusive with the pattern
524 pop_failure_jump, 523 at the destination of the jump: if one matches something,
525 524 the other won't match at all.
526 /* Change to pop_failure_jump if know won't have to backtrack to 525 Always used via `on_failure_jump_smart'. */
527 match; otherwise change to jump. This is used to jump 526 on_failure_jump_exclusive,
528 back to the beginning of a repeat. If what follows this jump 527
529 clearly won't match what the repeat does, such that we can be 528 /* Just like `on_failure_jump', except that it checks that we
530 sure that there is no use backtracking out of repetitions 529 don't get stuck in an infinite loop (matching an empty string
531 already matched, then we change it to a pop_failure_jump. 530 indefinitely). */
532 Followed by two-byte address. */ 531 on_failure_jump_loop,
533 maybe_pop_jump, 532
534 533 /* A smart `on_failure_jump' used for greedy * and + operators.
535 /* Jump to following two-byte address, and push a dummy failure 534 It analyses the loop before which it is put and if the
536 point. This failure point will be thrown away if an attempt 535 loop does not require backtracking, it changes itself to
537 is made to use it for a failure. A `+' construct makes this 536 `on_failure_jump_exclusive', else it just defaults to
538 before the first repeat. Also used as an intermediary kind 537 changing itself into `on_failure_jump_loop'. */
539 of jump when compiling an alternative. */ 538 on_failure_jump_smart,
540 dummy_failure_jump,
541
542 /* Push a dummy failure point and continue. Used at the end of
543 alternatives. */
544 push_dummy_failure,
545 539
546 /* Followed by two-byte relative address and two-byte number n. 540 /* Followed by two-byte relative address and two-byte number n.
547 After matching N times, jump to the address upon failure. */ 541 After matching N times, jump to the address upon failure. */
@@ -855,13 +849,11 @@ print_partial_compiled_pattern (start, end)
855 break; 849 break;
856 850
857 case start_memory: 851 case start_memory:
858 mcnt = *p++; 852 printf ("/start_memory/%d", *p++);
859 printf ("/start_memory/%d/%d", mcnt, *p++);
860 break; 853 break;
861 854
862 case stop_memory: 855 case stop_memory:
863 mcnt = *p++; 856 printf ("/stop_memory/%d", *p++);
864 printf ("/stop_memory/%d/%d", mcnt, *p++);
865 break; 857 break;
866 858
867 case duplicate: 859 case duplicate:
@@ -944,28 +936,19 @@ print_partial_compiled_pattern (start, end)
944 printf ("/on_failure_keep_string_jump to %d", p + mcnt - start); 936 printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
945 break; 937 break;
946 938
947 case dummy_failure_jump: 939 case on_failure_jump_exclusive:
948 extract_number_and_incr (&mcnt, &p);
949 printf ("/dummy_failure_jump to %d", p + mcnt - start);
950 break;
951
952 case push_dummy_failure:
953 printf ("/push_dummy_failure");
954 break;
955
956 case maybe_pop_jump:
957 extract_number_and_incr (&mcnt, &p); 940 extract_number_and_incr (&mcnt, &p);
958 printf ("/maybe_pop_jump to %d", p + mcnt - start); 941 printf ("/on_failure_jump_exclusive to %d", p + mcnt - start);
959 break; 942 break;
960 943
961 case pop_failure_jump: 944 case on_failure_jump_loop:
962 extract_number_and_incr (&mcnt, &p); 945 extract_number_and_incr (&mcnt, &p);
963 printf ("/pop_failure_jump to %d", p + mcnt - start); 946 printf ("/on_failure_jump_loop to %d", p + mcnt - start);
964 break; 947 break;
965 948
966 case jump_past_alt: 949 case on_failure_jump_smart:
967 extract_number_and_incr (&mcnt, &p); 950 extract_number_and_incr (&mcnt, &p);
968 printf ("/jump_past_alt to %d", p + mcnt - start); 951 printf ("/on_failure_jump_smart to %d", p + mcnt - start);
969 break; 952 break;
970 953
971 case jump: 954 case jump:
@@ -1066,7 +1049,7 @@ print_compiled_pattern (bufp)
1066 unsigned char *buffer = bufp->buffer; 1049 unsigned char *buffer = bufp->buffer;
1067 1050
1068 print_partial_compiled_pattern (buffer, buffer + bufp->used); 1051 print_partial_compiled_pattern (buffer, buffer + bufp->used);
1069 printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated); 1052 printf ("%ld bytes used/%ld bytes allocated.\n", bufp->used, bufp->allocated);
1070 1053
1071 if (bufp->fastmap_accurate && bufp->fastmap) 1054 if (bufp->fastmap_accurate && bufp->fastmap)
1072 { 1055 {
@@ -1082,6 +1065,7 @@ print_compiled_pattern (bufp)
1082 printf ("not_bol: %d\t", bufp->not_bol); 1065 printf ("not_bol: %d\t", bufp->not_bol);
1083 printf ("not_eol: %d\t", bufp->not_eol); 1066 printf ("not_eol: %d\t", bufp->not_eol);
1084 printf ("syntax: %d\n", bufp->syntax); 1067 printf ("syntax: %d\n", bufp->syntax);
1068 fflush (stdout);
1085 /* Perhaps we should print the translate table? */ 1069 /* Perhaps we should print the translate table? */
1086} 1070}
1087 1071
@@ -1246,7 +1230,7 @@ int re_max_failures = 4000;
1246union fail_stack_elt 1230union fail_stack_elt
1247{ 1231{
1248 unsigned char *pointer; 1232 unsigned char *pointer;
1249 int integer; 1233 unsigned int integer;
1250}; 1234};
1251 1235
1252typedef union fail_stack_elt fail_stack_elt_t; 1236typedef union fail_stack_elt fail_stack_elt_t;
@@ -1255,11 +1239,12 @@ typedef struct
1255{ 1239{
1256 fail_stack_elt_t *stack; 1240 fail_stack_elt_t *stack;
1257 unsigned size; 1241 unsigned size;
1258 unsigned avail; /* Offset of next open position. */ 1242 unsigned avail; /* Offset of next open position. */
1243 unsigned frame; /* Offset of the cur constructed frame. */
1259} fail_stack_type; 1244} fail_stack_type;
1260 1245
1261#define FAIL_STACK_EMPTY() (fail_stack.avail == 0) 1246#define PATTERN_STACK_EMPTY() (fail_stack.avail == 0)
1262#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0) 1247#define FAIL_STACK_EMPTY() (fail_stack.frame == 0)
1263#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size) 1248#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
1264 1249
1265 1250
@@ -1278,6 +1263,7 @@ typedef struct
1278 \ 1263 \
1279 fail_stack.size = INIT_FAILURE_ALLOC; \ 1264 fail_stack.size = INIT_FAILURE_ALLOC; \
1280 fail_stack.avail = 0; \ 1265 fail_stack.avail = 0; \
1266 fail_stack.frame = 0; \
1281 } while (0) 1267 } while (0)
1282 1268
1283#define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack) 1269#define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack)
@@ -1285,6 +1271,7 @@ typedef struct
1285#define INIT_FAIL_STACK() \ 1271#define INIT_FAIL_STACK() \
1286 do { \ 1272 do { \
1287 fail_stack.avail = 0; \ 1273 fail_stack.avail = 0; \
1274 fail_stack.frame = 0; \
1288 } while (0) 1275 } while (0)
1289 1276
1290#define RESET_FAIL_STACK() 1277#define RESET_FAIL_STACK()
@@ -1337,6 +1324,7 @@ typedef struct
1337 ? 0 \ 1324 ? 0 \
1338 : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \ 1325 : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \
1339 1)) 1326 1))
1327#define POP_PATTERN_OP() POP_FAILURE_POINTER ()
1340 1328
1341/* Push a pointer value onto the failure stack. 1329/* Push a pointer value onto the failure stack.
1342 Assumes the variable `fail_stack'. Probably should only 1330 Assumes the variable `fail_stack'. Probably should only
@@ -1362,110 +1350,105 @@ typedef struct
1362#define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer 1350#define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
1363#define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail] 1351#define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
1364 1352
1365/* Used to omit pushing failure point id's when we're not debugging. */ 1353/* Individual items aside from the registers. */
1366#ifdef DEBUG 1354#define NUM_NONREG_ITEMS 3
1367#define DEBUG_PUSH PUSH_FAILURE_INT 1355
1368#define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT () 1356/* Used to examine the stack (to detect infinite loops). */
1369#else 1357#define FAILURE_PAT(h) fail_stack.stack[(h) - 1].pointer
1370#define DEBUG_PUSH(item) 1358#define FAILURE_STR(h) ((char*)fail_stack.stack[(h) - 2].pointer)
1371#define DEBUG_POP(item_addr) 1359#define NEXT_FAILURE_HANDLE(h) fail_stack.stack[(h) - 3].integer
1372#endif 1360#define TOP_FAILURE_HANDLE() fail_stack.frame
1373 1361
1374 1362
1363#define ENSURE_FAIL_STACK(space) \
1364while (REMAINING_AVAIL_SLOTS <= space) { \
1365 if (!GROW_FAIL_STACK (fail_stack)) \
1366 return -2; \
1367 DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", (fail_stack).size);\
1368 DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\
1369}
1370
1371/* Push register NUM onto the stack. */
1372#define PUSH_FAILURE_REG(num) \
1373do { \
1374 char *destination; \
1375 ENSURE_FAIL_STACK(3); \
1376 DEBUG_PRINT4 (" Push reg %d (spanning %p -> %p)\n", \
1377 num, regstart[num], regend[num]); \
1378 PUSH_FAILURE_POINTER (regstart[num]); \
1379 PUSH_FAILURE_POINTER (regend[num]); \
1380 PUSH_FAILURE_INT (num); \
1381} while (0)
1382
1383/* Pop a saved register off the stack. */
1384#define POP_FAILURE_REG() \
1385do { \
1386 int reg = POP_FAILURE_INT (); \
1387 regend[reg] = POP_FAILURE_POINTER (); \
1388 regstart[reg] = POP_FAILURE_POINTER (); \
1389 DEBUG_PRINT4 (" Pop reg %d (spanning %p -> %p)\n", \
1390 reg, regstart[reg], regend[reg]); \
1391} while (0)
1392
1393/* Check that we are not stuck in an infinite loop. */
1394#define CHECK_INFINITE_LOOP(pat_cur, string_place) \
1395do { \
1396 int failure = TOP_FAILURE_HANDLE(); \
1397 /* Check for infinite matching loops */ \
1398 while (failure > 0 && \
1399 (FAILURE_STR (failure) == string_place \
1400 || FAILURE_STR (failure) == NULL)) \
1401 { \
1402 assert (FAILURE_PAT (failure) >= bufp->buffer \
1403 && FAILURE_PAT (failure) <= bufp->buffer + bufp->used);\
1404 if (FAILURE_PAT (failure) == pat_cur) \
1405 goto fail; \
1406 DEBUG_PRINT2 (" Other pattern: %p\n", FAILURE_PAT (failure));\
1407 failure = NEXT_FAILURE_HANDLE(failure); \
1408 } \
1409 DEBUG_PRINT2 (" Other string: %p\n", FAILURE_STR (failure)); \
1410} while (0)
1411
1375/* Push the information about the state we will need 1412/* Push the information about the state we will need
1376 if we ever fail back to it. 1413 if we ever fail back to it.
1377 1414
1378 Requires variables fail_stack, regstart, regend, reg_info, and 1415 Requires variables fail_stack, regstart, regend and
1379 num_regs be declared. GROW_FAIL_STACK requires `destination' be 1416 num_regs be declared. GROW_FAIL_STACK requires `destination' be
1380 declared. 1417 declared.
1381 1418
1382 Does `return FAILURE_CODE' if runs out of memory. */ 1419 Does `return FAILURE_CODE' if runs out of memory. */
1383 1420
1384#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \ 1421#define PUSH_FAILURE_POINT(pattern, string_place) \
1385 do { \ 1422do { \
1386 char *destination; \ 1423 char *destination; \
1387 /* Must be int, so when we don't save any registers, the arithmetic \ 1424 /* Must be int, so when we don't save any registers, the arithmetic \
1388 of 0 + -1 isn't done as unsigned. */ \ 1425 of 0 + -1 isn't done as unsigned. */ \
1389 int this_reg; \ 1426 \
1390 \ 1427 DEBUG_STATEMENT (failure_id++); \
1391 DEBUG_STATEMENT (failure_id++); \ 1428 DEBUG_STATEMENT (nfailure_points_pushed++); \
1392 DEBUG_STATEMENT (nfailure_points_pushed++); \ 1429 DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
1393 DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \ 1430 DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail); \
1394 DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\ 1431 DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
1395 DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\ 1432 \
1396 \ 1433 ENSURE_FAIL_STACK (NUM_NONREG_ITEMS); \
1397 DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \ 1434 \
1398 DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \ 1435 DEBUG_PRINT1 ("\n"); \
1399 \ 1436 \
1400 /* Ensure we have enough space allocated for what we will push. */ \ 1437 DEBUG_PRINT2 (" Push frame index: %d\n", fail_stack.frame); \
1401 while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \ 1438 PUSH_FAILURE_INT (fail_stack.frame); \
1402 { \ 1439 \
1403 if (!GROW_FAIL_STACK (fail_stack)) \ 1440 DEBUG_PRINT2 (" Push string %p: `", string_place); \
1404 return failure_code; \ 1441 DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, size2);\
1405 \ 1442 DEBUG_PRINT1 ("'\n"); \
1406 DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \ 1443 PUSH_FAILURE_POINTER (string_place); \
1407 (fail_stack).size); \ 1444 \
1408 DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\ 1445 DEBUG_PRINT2 (" Push pattern %p: ", pattern); \
1409 } \ 1446 DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern, pend); \
1410 \ 1447 PUSH_FAILURE_POINTER (pattern); \
1411 /* Push the info, starting with the registers. */ \ 1448 \
1412 DEBUG_PRINT1 ("\n"); \ 1449 /* Close the frame by moving the frame pointer past it. */ \
1413 \ 1450 fail_stack.frame = fail_stack.avail; \
1414 if (1) \ 1451} while (0)
1415 for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
1416 this_reg++) \
1417 { \
1418 DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \
1419 DEBUG_STATEMENT (num_regs_pushed++); \
1420 \
1421 DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
1422 PUSH_FAILURE_POINTER (regstart[this_reg]); \
1423 \
1424 DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
1425 PUSH_FAILURE_POINTER (regend[this_reg]); \
1426 \
1427 DEBUG_PRINT2 (" info: 0x%x\n ", reg_info[this_reg]); \
1428 DEBUG_PRINT2 (" match_null=%d", \
1429 REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \
1430 DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \
1431 DEBUG_PRINT2 (" matched_something=%d", \
1432 MATCHED_SOMETHING (reg_info[this_reg])); \
1433 DEBUG_PRINT2 (" ever_matched=%d", \
1434 EVER_MATCHED_SOMETHING (reg_info[this_reg])); \
1435 DEBUG_PRINT1 ("\n"); \
1436 PUSH_FAILURE_ELT (reg_info[this_reg].word); \
1437 } \
1438 \
1439 DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg);\
1440 PUSH_FAILURE_INT (lowest_active_reg); \
1441 \
1442 DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg);\
1443 PUSH_FAILURE_INT (highest_active_reg); \
1444 \
1445 DEBUG_PRINT2 (" Pushing pattern 0x%x: ", pattern_place); \
1446 DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \
1447 PUSH_FAILURE_POINTER (pattern_place); \
1448 \
1449 DEBUG_PRINT2 (" Pushing string 0x%x: `", string_place); \
1450 DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \
1451 size2); \
1452 DEBUG_PRINT1 ("'\n"); \
1453 PUSH_FAILURE_POINTER (string_place); \
1454 \
1455 DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \
1456 DEBUG_PUSH (failure_id); \
1457 } while (0)
1458
1459/* This is the number of items that are pushed and popped on the stack
1460 for each register. */
1461#define NUM_REG_ITEMS 3
1462
1463/* Individual items aside from the registers. */
1464#ifdef DEBUG
1465#define NUM_NONREG_ITEMS 5 /* Includes failure point id. */
1466#else
1467#define NUM_NONREG_ITEMS 4
1468#endif
1469 1452
1470/* Estimate the size of data pushed by a typical failure stack entry. 1453/* Estimate the size of data pushed by a typical failure stack entry.
1471 An estimate is all we need, because all we use this for 1454 An estimate is all we need, because all we use this for
@@ -1473,14 +1456,6 @@ typedef struct
1473 1456
1474#define TYPICAL_FAILURE_SIZE 20 1457#define TYPICAL_FAILURE_SIZE 20
1475 1458
1476/* This is how many items we actually use for a failure point.
1477 It depends on the regexp. */
1478#define NUM_FAILURE_ITEMS \
1479 (((0 \
1480 ? 0 : highest_active_reg - lowest_active_reg + 1) \
1481 * NUM_REG_ITEMS) \
1482 + NUM_NONREG_ITEMS)
1483
1484/* How many items can still be added to the stack without overflowing it. */ 1459/* How many items can still be added to the stack without overflowing it. */
1485#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail) 1460#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
1486 1461
@@ -1490,19 +1465,13 @@ typedef struct
1490 We restore into the parameters, all of which should be lvalues: 1465 We restore into the parameters, all of which should be lvalues:
1491 STR -- the saved data position. 1466 STR -- the saved data position.
1492 PAT -- the saved pattern position. 1467 PAT -- the saved pattern position.
1493 LOW_REG, HIGH_REG -- the highest and lowest active registers.
1494 REGSTART, REGEND -- arrays of string positions. 1468 REGSTART, REGEND -- arrays of string positions.
1495 REG_INFO -- array of information about each subexpression.
1496 1469
1497 Also assumes the variables `fail_stack' and (if debugging), `bufp', 1470 Also assumes the variables `fail_stack' and (if debugging), `bufp',
1498 `pend', `string1', `size1', `string2', and `size2'. */ 1471 `pend', `string1', `size1', `string2', and `size2'. */
1499 1472
1500#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\ 1473#define POP_FAILURE_POINT(str, pat) \
1501{ \ 1474do { \
1502 DEBUG_STATEMENT (fail_stack_elt_t failure_id;) \
1503 int this_reg; \
1504 const unsigned char *string_temp; \
1505 \
1506 assert (!FAIL_STACK_EMPTY ()); \ 1475 assert (!FAIL_STACK_EMPTY ()); \
1507 \ 1476 \
1508 /* Remove failure points and point to how many regs pushed. */ \ 1477 /* Remove failure points and point to how many regs pushed. */ \
@@ -1510,119 +1479,35 @@ typedef struct
1510 DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \ 1479 DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \
1511 DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \ 1480 DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \
1512 \ 1481 \
1513 assert (fail_stack.avail >= NUM_NONREG_ITEMS); \ 1482 /* Pop the saved registers. */ \
1483 while (fail_stack.frame < fail_stack.avail) \
1484 POP_FAILURE_REG (); \
1514 \ 1485 \
1515 DEBUG_POP (&failure_id.integer); \ 1486 pat = (unsigned char *) POP_FAILURE_POINTER (); \
1516 DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id.integer); \ 1487 DEBUG_PRINT2 (" Popping pattern %p: ", pat); \
1488 DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
1517 \ 1489 \
1518 /* If the saved string location is NULL, it came from an \ 1490 /* If the saved string location is NULL, it came from an \
1519 on_failure_keep_string_jump opcode, and we want to throw away the \ 1491 on_failure_keep_string_jump opcode, and we want to throw away the \
1520 saved NULL, thus retaining our current position in the string. */ \ 1492 saved NULL, thus retaining our current position in the string. */ \
1521 string_temp = POP_FAILURE_POINTER (); \ 1493 str = (char *) POP_FAILURE_POINTER (); \
1522 if (string_temp != NULL) \ 1494 DEBUG_PRINT2 (" Popping string %p: `", str); \
1523 str = (const char *) string_temp; \
1524 \
1525 DEBUG_PRINT2 (" Popping string 0x%x: `", str); \
1526 DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \ 1495 DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
1527 DEBUG_PRINT1 ("'\n"); \ 1496 DEBUG_PRINT1 ("'\n"); \
1528 \ 1497 \
1529 pat = (unsigned char *) POP_FAILURE_POINTER (); \ 1498 fail_stack.frame = POP_FAILURE_INT (); \
1530 DEBUG_PRINT2 (" Popping pattern 0x%x: ", pat); \ 1499 DEBUG_PRINT2 (" Popping frame index: %d\n", fail_stack.frame); \
1531 DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
1532 \
1533 /* Restore register info. */ \
1534 high_reg = (unsigned) POP_FAILURE_INT (); \
1535 DEBUG_PRINT2 (" Popping high active reg: %d\n", high_reg); \
1536 \
1537 low_reg = (unsigned) POP_FAILURE_INT (); \
1538 DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \
1539 \
1540 if (1) \
1541 for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \
1542 { \
1543 DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \
1544 \
1545 reg_info[this_reg].word = POP_FAILURE_ELT (); \
1546 DEBUG_PRINT2 (" info: 0x%x\n", reg_info[this_reg]); \
1547 \
1548 regend[this_reg] = (const char *) POP_FAILURE_POINTER (); \
1549 DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
1550 \ 1500 \
1551 regstart[this_reg] = (const char *) POP_FAILURE_POINTER (); \ 1501 assert (fail_stack.avail >= 0); \
1552 DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \ 1502 assert (fail_stack.frame <= fail_stack.avail); \
1553 } \
1554 else \
1555 { \
1556 for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
1557 { \
1558 reg_info[this_reg].word.integer = 0; \
1559 regend[this_reg] = 0; \
1560 regstart[this_reg] = 0; \
1561 } \
1562 highest_active_reg = high_reg; \
1563 } \
1564 \ 1503 \
1565 set_regs_matched_done = 0; \
1566 DEBUG_STATEMENT (nfailure_points_popped++); \ 1504 DEBUG_STATEMENT (nfailure_points_popped++); \
1567} /* POP_FAILURE_POINT */ 1505} while (0) /* POP_FAILURE_POINT */
1568 1506
1569 1507
1570 1508
1571/* Structure for per-register (a.k.a. per-group) information.
1572 Other register information, such as the
1573 starting and ending positions (which are addresses), and the list of
1574 inner groups (which is a bits list) are maintained in separate
1575 variables.
1576
1577 We are making a (strictly speaking) nonportable assumption here: that
1578 the compiler will pack our bit fields into something that fits into
1579 the type of `word', i.e., is something that fits into one item on the
1580 failure stack. */
1581
1582typedef union
1583{
1584 fail_stack_elt_t word;
1585 struct
1586 {
1587 /* This field is one if this group can match the empty string,
1588 zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */
1589#define MATCH_NULL_UNSET_VALUE 3
1590 unsigned match_null_string_p : 2;
1591 unsigned is_active : 1;
1592 unsigned matched_something : 1;
1593 unsigned ever_matched_something : 1;
1594 } bits;
1595} register_info_type;
1596
1597#define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p)
1598#define IS_ACTIVE(R) ((R).bits.is_active)
1599#define MATCHED_SOMETHING(R) ((R).bits.matched_something)
1600#define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something)
1601
1602
1603/* Call this when have matched a real character; it sets `matched' flags
1604 for the subexpressions which we are currently inside. Also records
1605 that those subexprs have matched. */
1606#define SET_REGS_MATCHED() \
1607 do \
1608 { \
1609 if (!set_regs_matched_done) \
1610 { \
1611 unsigned r; \
1612 set_regs_matched_done = 1; \
1613 for (r = lowest_active_reg; r <= highest_active_reg; r++) \
1614 { \
1615 MATCHED_SOMETHING (reg_info[r]) \
1616 = EVER_MATCHED_SOMETHING (reg_info[r]) \
1617 = 1; \
1618 } \
1619 } \
1620 } \
1621 while (0)
1622
1623/* Registers are set to a sentinel when they haven't yet matched. */ 1509/* Registers are set to a sentinel when they haven't yet matched. */
1624static char reg_unset_dummy; 1510#define REG_UNSET_VALUE NULL
1625#define REG_UNSET_VALUE (&reg_unset_dummy)
1626#define REG_UNSET(e) ((e) == REG_UNSET_VALUE) 1511#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
1627 1512
1628/* Subroutine declarations and macros for regex_compile. */ 1513/* Subroutine declarations and macros for regex_compile. */
@@ -1631,7 +1516,6 @@ static void store_op1 (), store_op2 ();
1631static void insert_op1 (), insert_op2 (); 1516static void insert_op1 (), insert_op2 ();
1632static boolean at_begline_loc_p (), at_endline_loc_p (); 1517static boolean at_begline_loc_p (), at_endline_loc_p ();
1633static boolean group_in_compile_stack (); 1518static boolean group_in_compile_stack ();
1634static reg_errcode_t compile_range ();
1635 1519
1636/* Fetch the next character in the uncompiled pattern---translating it 1520/* Fetch the next character in the uncompiled pattern---translating it
1637 if necessary. Also cast from a signed character in the constant 1521 if necessary. Also cast from a signed character in the constant
@@ -1778,7 +1662,6 @@ typedef struct
1778{ 1662{
1779 pattern_offset_t begalt_offset; 1663 pattern_offset_t begalt_offset;
1780 pattern_offset_t fixup_alt_jump; 1664 pattern_offset_t fixup_alt_jump;
1781 pattern_offset_t inner_group_offset;
1782 pattern_offset_t laststart_offset; 1665 pattern_offset_t laststart_offset;
1783 regnum_t regnum; 1666 regnum_t regnum;
1784} compile_stack_elt_t; 1667} compile_stack_elt_t;
@@ -1920,11 +1803,7 @@ static fail_stack_type fail_stack;
1920static int regs_allocated_size; 1803static int regs_allocated_size;
1921 1804
1922static const char ** regstart, ** regend; 1805static const char ** regstart, ** regend;
1923static const char ** old_regstart, ** old_regend;
1924static const char **best_regstart, **best_regend; 1806static const char **best_regstart, **best_regend;
1925static register_info_type *reg_info;
1926static const char **reg_dummy;
1927static register_info_type *reg_info_dummy;
1928 1807
1929/* Make the register vectors big enough for NUM_REGS registers, 1808/* Make the register vectors big enough for NUM_REGS registers,
1930 but don't make them smaller. */ 1809 but don't make them smaller. */
@@ -1937,13 +1816,8 @@ regex_grow_registers (num_regs)
1937 { 1816 {
1938 RETALLOC_IF (regstart, num_regs, const char *); 1817 RETALLOC_IF (regstart, num_regs, const char *);
1939 RETALLOC_IF (regend, num_regs, const char *); 1818 RETALLOC_IF (regend, num_regs, const char *);
1940 RETALLOC_IF (old_regstart, num_regs, const char *);
1941 RETALLOC_IF (old_regend, num_regs, const char *);
1942 RETALLOC_IF (best_regstart, num_regs, const char *); 1819 RETALLOC_IF (best_regstart, num_regs, const char *);
1943 RETALLOC_IF (best_regend, num_regs, const char *); 1820 RETALLOC_IF (best_regend, num_regs, const char *);
1944 RETALLOC_IF (reg_info, num_regs, register_info_type);
1945 RETALLOC_IF (reg_dummy, num_regs, const char *);
1946 RETALLOC_IF (reg_info_dummy, num_regs, register_info_type);
1947 1821
1948 regs_allocated_size = num_regs; 1822 regs_allocated_size = num_regs;
1949 } 1823 }
@@ -1969,6 +1843,15 @@ regex_grow_registers (num_regs)
1969 The `fastmap' and `newline_anchor' fields are neither 1843 The `fastmap' and `newline_anchor' fields are neither
1970 examined nor set. */ 1844 examined nor set. */
1971 1845
1846/* Insert the `jump' from the end of last alternative to "here".
1847 The space for the jump has already been allocated. */
1848#define FIXUP_ALT_JUMP() \
1849do { \
1850 if (fixup_alt_jump) \
1851 STORE_JUMP (jump, fixup_alt_jump, b); \
1852} while (0)
1853
1854
1972/* Return, freeing storage we allocated. */ 1855/* Return, freeing storage we allocated. */
1973#define FREE_STACK_RETURN(value) \ 1856#define FREE_STACK_RETURN(value) \
1974 do { \ 1857 do { \
@@ -2042,6 +1925,7 @@ regex_compile (pattern, size, syntax, bufp)
2042 struct range_table_work_area range_table_work; 1925 struct range_table_work_area range_table_work;
2043 1926
2044#ifdef DEBUG 1927#ifdef DEBUG
1928 /* debug = 1; */
2045 DEBUG_PRINT1 ("\nCompiling pattern: "); 1929 DEBUG_PRINT1 ("\nCompiling pattern: ");
2046 if (debug) 1930 if (debug)
2047 { 1931 {
@@ -2258,9 +2142,8 @@ regex_compile (pattern, size, syntax, bufp)
2258 keep_string_p = true; 2142 keep_string_p = true;
2259 } 2143 }
2260 else 2144 else
2261 /* Anything else. */ 2145 STORE_JUMP (jump, b, laststart - 3);
2262 STORE_JUMP (maybe_pop_jump, b, laststart - 3); 2146
2263
2264 /* We've added more stuff to the buffer. */ 2147 /* We've added more stuff to the buffer. */
2265 b += 3; 2148 b += 3;
2266 } 2149 }
@@ -2268,31 +2151,29 @@ regex_compile (pattern, size, syntax, bufp)
2268 /* On failure, jump from laststart to b + 3, which will be the 2151 /* On failure, jump from laststart to b + 3, which will be the
2269 end of the buffer after this jump is inserted. */ 2152 end of the buffer after this jump is inserted. */
2270 GET_BUFFER_SPACE (3); 2153 GET_BUFFER_SPACE (3);
2271 INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
2272 : on_failure_jump,
2273 laststart, b + 3);
2274 pending_exact = 0;
2275 b += 3;
2276
2277 if (!zero_times_ok) 2154 if (!zero_times_ok)
2278 { 2155 {
2279 /* At least one repetition is required, so insert a 2156 assert (many_times_ok);
2280 `dummy_failure_jump' before the initial 2157 INSERT_JUMP (on_failure_jump_smart, b - 3, b + 3);
2281 `on_failure_jump' instruction of the loop. This 2158 pending_exact = 0;
2282 effects a skip over that instruction the first time 2159 b += 3;
2283 we hit that loop. */ 2160 }
2284 GET_BUFFER_SPACE (3); 2161 else
2285 INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6); 2162 {
2163 INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
2164 : !many_times_ok ?
2165 on_failure_jump : on_failure_jump_smart,
2166 laststart, b + 3);
2167 pending_exact = 0;
2286 b += 3; 2168 b += 3;
2287 } 2169 }
2288
2289 } 2170 }
2290 else /* not greedy */ 2171 else /* not greedy */
2291 { /* I wish the greedy and non-greedy cases could be merged. */ 2172 { /* I wish the greedy and non-greedy cases could be merged. */
2292 2173
2293 if (many_times_ok) 2174 if (many_times_ok)
2294 { 2175 {
2295 /* The greedy multiple match looks like a repeat..until: 2176 /* The non-greedy multiple match looks like a repeat..until:
2296 we only need a conditional jump at the end of the loop */ 2177 we only need a conditional jump at the end of the loop */
2297 GET_BUFFER_SPACE (3); 2178 GET_BUFFER_SPACE (3);
2298 STORE_JUMP (on_failure_jump, b, laststart); 2179 STORE_JUMP (on_failure_jump, b, laststart);
@@ -2556,7 +2437,9 @@ regex_compile (pattern, size, syntax, bufp)
2556 Split that into two ranges,, 2437 Split that into two ranges,,
2557 the low one ending at 0237, and the high one 2438 the low one ending at 0237, and the high one
2558 starting at ...040. */ 2439 starting at ...040. */
2559 int c1_base = (c1 & ~0177) | 040; 2440 /* Unless I'm missing something,
2441 this line is useless. -sm
2442 int c1_base = (c1 & ~0177) | 040; */
2560 SET_RANGE_TABLE_WORK_AREA (range_table_work, c, c1); 2443 SET_RANGE_TABLE_WORK_AREA (range_table_work, c, c1);
2561 c1 = 0237; 2444 c1 = 0237;
2562 } 2445 }
@@ -2678,8 +2561,31 @@ regex_compile (pattern, size, syntax, bufp)
2678 goto normal_backslash; 2561 goto normal_backslash;
2679 2562
2680 handle_open: 2563 handle_open:
2681 bufp->re_nsub++; 2564 {
2682 regnum++; 2565 int shy = 0;
2566 if (p+1 < pend)
2567 {
2568 /* Look for a special (?...) construct */
2569 PATFETCH (c);
2570 if ((syntax & RE_SHY_GROUPS) && c == '?')
2571 {
2572 PATFETCH (c);
2573 switch (c)
2574 {
2575 case ':': shy = 1; break;
2576 default:
2577 /* Only (?:...) is supported right now. */
2578 FREE_STACK_RETURN (REG_BADPAT);
2579 }
2580 }
2581 else PATUNFETCH;
2582 }
2583
2584 if (!shy)
2585 {
2586 bufp->re_nsub++;
2587 regnum++;
2588 }
2683 2589
2684 if (COMPILE_STACK_FULL) 2590 if (COMPILE_STACK_FULL)
2685 { 2591 {
@@ -2698,17 +2604,13 @@ regex_compile (pattern, size, syntax, bufp)
2698 COMPILE_STACK_TOP.fixup_alt_jump 2604 COMPILE_STACK_TOP.fixup_alt_jump
2699 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; 2605 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
2700 COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer; 2606 COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
2701 COMPILE_STACK_TOP.regnum = regnum; 2607 COMPILE_STACK_TOP.regnum = shy ? -regnum : regnum;
2702 2608
2703 /* We will eventually replace the 0 with the number of 2609 /* Do not push a
2704 groups inner to this one. But do not push a
2705 start_memory for groups beyond the last one we can 2610 start_memory for groups beyond the last one we can
2706 represent in the compiled pattern. */ 2611 represent in the compiled pattern. */
2707 if (regnum <= MAX_REGNUM) 2612 if (regnum <= MAX_REGNUM && !shy)
2708 { 2613 BUF_PUSH_2 (start_memory, regnum);
2709 COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
2710 BUF_PUSH_3 (start_memory, regnum, 0);
2711 }
2712 2614
2713 compile_stack.avail++; 2615 compile_stack.avail++;
2714 2616
@@ -2720,36 +2622,30 @@ regex_compile (pattern, size, syntax, bufp)
2720 clear pending_exact explicitly. */ 2622 clear pending_exact explicitly. */
2721 pending_exact = 0; 2623 pending_exact = 0;
2722 break; 2624 break;
2723 2625 }
2724 2626
2725 case ')': 2627 case ')':
2726 if (syntax & RE_NO_BK_PARENS) goto normal_backslash; 2628 if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
2727 2629
2728 if (COMPILE_STACK_EMPTY) 2630 if (COMPILE_STACK_EMPTY)
2729 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) 2631 {
2730 goto normal_backslash; 2632 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2731 else 2633 goto normal_backslash;
2732 FREE_STACK_RETURN (REG_ERPAREN); 2634 else
2635 FREE_STACK_RETURN (REG_ERPAREN);
2636 }
2733 2637
2734 handle_close: 2638 handle_close:
2735 if (fixup_alt_jump) 2639 FIXUP_ALT_JUMP ();
2736 { /* Push a dummy failure point at the end of the
2737 alternative for a possible future
2738 `pop_failure_jump' to pop. See comments at
2739 `push_dummy_failure' in `re_match_2'. */
2740 BUF_PUSH (push_dummy_failure);
2741
2742 /* We allocated space for this jump when we assigned
2743 to `fixup_alt_jump', in the `handle_alt' case below. */
2744 STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
2745 }
2746 2640
2747 /* See similar code for backslashed left paren above. */ 2641 /* See similar code for backslashed left paren above. */
2748 if (COMPILE_STACK_EMPTY) 2642 if (COMPILE_STACK_EMPTY)
2749 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) 2643 {
2750 goto normal_char; 2644 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2751 else 2645 goto normal_char;
2752 FREE_STACK_RETURN (REG_ERPAREN); 2646 else
2647 FREE_STACK_RETURN (REG_ERPAREN);
2648 }
2753 2649
2754 /* Since we just checked for an empty stack above, this 2650 /* Since we just checked for an empty stack above, this
2755 ``can't happen''. */ 2651 ``can't happen''. */
@@ -2775,15 +2671,8 @@ regex_compile (pattern, size, syntax, bufp)
2775 2671
2776 /* We're at the end of the group, so now we know how many 2672 /* We're at the end of the group, so now we know how many
2777 groups were inside this one. */ 2673 groups were inside this one. */
2778 if (this_group_regnum <= MAX_REGNUM) 2674 if (this_group_regnum <= MAX_REGNUM && this_group_regnum > 0)
2779 { 2675 BUF_PUSH_2 (stop_memory, this_group_regnum);
2780 unsigned char *inner_group_loc
2781 = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
2782
2783 *inner_group_loc = regnum - this_group_regnum;
2784 BUF_PUSH_3 (stop_memory, this_group_regnum,
2785 regnum - this_group_regnum);
2786 }
2787 } 2676 }
2788 break; 2677 break;
2789 2678
@@ -2818,8 +2707,7 @@ regex_compile (pattern, size, syntax, bufp)
2818 fixup_alt_jump to right after `b', and leave behind three 2707 fixup_alt_jump to right after `b', and leave behind three
2819 bytes which we'll fill in when we get to after `c'. */ 2708 bytes which we'll fill in when we get to after `c'. */
2820 2709
2821 if (fixup_alt_jump) 2710 FIXUP_ALT_JUMP ();
2822 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2823 2711
2824 /* Mark and leave space for a jump after this alternative, 2712 /* Mark and leave space for a jump after this alternative,
2825 to be filled in later either by next alternative or 2713 to be filled in later either by next alternative or
@@ -3173,8 +3061,7 @@ regex_compile (pattern, size, syntax, bufp)
3173 3061
3174 /* Through the pattern now. */ 3062 /* Through the pattern now. */
3175 3063
3176 if (fixup_alt_jump) 3064 FIXUP_ALT_JUMP ();
3177 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
3178 3065
3179 if (!COMPILE_STACK_EMPTY) 3066 if (!COMPILE_STACK_EMPTY)
3180 FREE_STACK_RETURN (REG_EPAREN); 3067 FREE_STACK_RETURN (REG_EPAREN);
@@ -3192,8 +3079,10 @@ regex_compile (pattern, size, syntax, bufp)
3192#ifdef DEBUG 3079#ifdef DEBUG
3193 if (debug) 3080 if (debug)
3194 { 3081 {
3082 re_compile_fastmap (bufp);
3195 DEBUG_PRINT1 ("\nCompiled pattern: \n"); 3083 DEBUG_PRINT1 ("\nCompiled pattern: \n");
3196 print_compiled_pattern (bufp); 3084 print_compiled_pattern (bufp);
3085 /* debug = 0; */
3197 } 3086 }
3198#endif /* DEBUG */ 3087#endif /* DEBUG */
3199 3088
@@ -3208,17 +3097,6 @@ regex_compile (pattern, size, syntax, bufp)
3208 { 3097 {
3209 fail_stack.size = re_max_failures * TYPICAL_FAILURE_SIZE; 3098 fail_stack.size = re_max_failures * TYPICAL_FAILURE_SIZE;
3210 3099
3211#ifdef emacs
3212 if (! fail_stack.stack)
3213 fail_stack.stack
3214 = (fail_stack_elt_t *) xmalloc (fail_stack.size
3215 * sizeof (fail_stack_elt_t));
3216 else
3217 fail_stack.stack
3218 = (fail_stack_elt_t *) xrealloc (fail_stack.stack,
3219 (fail_stack.size
3220 * sizeof (fail_stack_elt_t)));
3221#else /* not emacs */
3222 if (! fail_stack.stack) 3100 if (! fail_stack.stack)
3223 fail_stack.stack 3101 fail_stack.stack
3224 = (fail_stack_elt_t *) malloc (fail_stack.size 3102 = (fail_stack_elt_t *) malloc (fail_stack.size
@@ -3228,7 +3106,6 @@ regex_compile (pattern, size, syntax, bufp)
3228 = (fail_stack_elt_t *) realloc (fail_stack.stack, 3106 = (fail_stack_elt_t *) realloc (fail_stack.stack,
3229 (fail_stack.size 3107 (fail_stack.size
3230 * sizeof (fail_stack_elt_t))); 3108 * sizeof (fail_stack_elt_t)));
3231#endif /* not emacs */
3232 } 3109 }
3233 3110
3234 regex_grow_registers (num_regs); 3111 regex_grow_registers (num_regs);
@@ -3388,15 +3265,13 @@ int
3388re_compile_fastmap (bufp) 3265re_compile_fastmap (bufp)
3389 struct re_pattern_buffer *bufp; 3266 struct re_pattern_buffer *bufp;
3390{ 3267{
3391 int i, j, k; 3268 int j, k;
3392#ifdef MATCH_MAY_ALLOCATE 3269#ifdef MATCH_MAY_ALLOCATE
3393 fail_stack_type fail_stack; 3270 fail_stack_type fail_stack;
3394#endif 3271#endif
3395#ifndef REGEX_MALLOC 3272#ifndef REGEX_MALLOC
3396 char *destination; 3273 char *destination;
3397#endif 3274#endif
3398 /* We don't push any register information onto the failure stack. */
3399 unsigned num_regs = 0;
3400 3275
3401 register char *fastmap = bufp->fastmap; 3276 register char *fastmap = bufp->fastmap;
3402 unsigned char *pattern = bufp->buffer; 3277 unsigned char *pattern = bufp->buffer;
@@ -3431,19 +3306,45 @@ re_compile_fastmap (bufp)
3431 bufp->fastmap_accurate = 1; /* It will be when we're done. */ 3306 bufp->fastmap_accurate = 1; /* It will be when we're done. */
3432 bufp->can_be_null = 0; 3307 bufp->can_be_null = 0;
3433 3308
3434 while (1) 3309 /* The loop below works as follows:
3310 - It has a working-list kept in the PATTERN_STACK and which basically
3311 starts by only containing a pointer to the first operation.
3312 - If the opcode we're looking at is a match against some set of
3313 chars, then we add those chars to the fastmap and go on to the
3314 next work element from the worklist (done via `break').
3315 - If the opcode is a control operator on the other hand, we either
3316 ignore it (if it's meaningless at this point, such as `start_memory')
3317 or execute it (if it's a jump). If the jump has several destinations
3318 (i.e. `on_failure_jump'), then we push the other destination onto the
3319 worklist.
3320 We guarantee termination by ignoring backward jumps (more or less),
3321 so that `p' is monotonically increasing. More to the point, we
3322 never set `p' (or push) anything `<= p1'. */
3323
3324 /* If can_be_null is set, then the fastmap will not be used anyway. */
3325 while (!bufp->can_be_null)
3435 { 3326 {
3327 /* `p1' is used as a marker of how far back a `on_failure_jump'
3328 can go without being ignored. It is normally equal to `p'
3329 (which prevents any backward `on_failure_jump') except right
3330 after a plain `jump', to allow patterns such as:
3331 0: jump 10
3332 3..9: <body>
3333 10: on_failure_jump 3
3334 as used for the *? operator. */
3335 unsigned char *p1 = p;
3336
3436 if (p == pend || *p == succeed) 3337 if (p == pend || *p == succeed)
3437 { 3338 {
3438 /* We have reached the (effective) end of pattern. */ 3339 /* We have reached the (effective) end of pattern. */
3439 if (!FAIL_STACK_EMPTY ()) 3340 if (!PATTERN_STACK_EMPTY ())
3440 { 3341 {
3441 bufp->can_be_null |= path_can_be_null; 3342 bufp->can_be_null |= path_can_be_null;
3442 3343
3443 /* Reset for next path. */ 3344 /* Reset for next path. */
3444 path_can_be_null = true; 3345 path_can_be_null = true;
3445 3346
3446 p = fail_stack.stack[--fail_stack.avail].pointer; 3347 p = POP_PATTERN_OP ();
3447 3348
3448 continue; 3349 continue;
3449 } 3350 }
@@ -3457,14 +3358,13 @@ re_compile_fastmap (bufp)
3457 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++)) 3358 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
3458 { 3359 {
3459 3360
3460 /* I guess the idea here is to simply not bother with a fastmap
3461 if a backreference is used, since it's too hard to figure out
3462 the fastmap for the corresponding group. Setting
3463 `can_be_null' stops `re_search_2' from using the fastmap, so
3464 that is all we do. */
3465 case duplicate: 3361 case duplicate:
3466 bufp->can_be_null = 1; 3362 /* If the first character has to match a backreference, that means
3467 goto done; 3363 that the group was empty (since it already matched). Since this
3364 is the only case that interests us here, we can assume that the
3365 backreference must match the empty string. */
3366 p++;
3367 continue;
3468 3368
3469 3369
3470 /* Following are the cases which match a character. These end 3370 /* Following are the cases which match a character. These end
@@ -3534,9 +3434,8 @@ re_compile_fastmap (bufp)
3534 multibyte character in the range table. */ 3434 multibyte character in the range table. */
3535 int c, count; 3435 int c, count;
3536 3436
3537 /* Make P points the range table. `+ 2' is to skip flag 3437 /* Make P points the range table. */
3538 bits for a character class. */ 3438 p += CHARSET_BITMAP_SIZE (&p[-2]);
3539 p += CHARSET_BITMAP_SIZE (&p[-2]) + 2;
3540 3439
3541 /* Extract the number of ranges in range table into COUNT. */ 3440 /* Extract the number of ranges in range table into COUNT. */
3542 EXTRACT_NUMBER_AND_INCR (count, p); 3441 EXTRACT_NUMBER_AND_INCR (count, p);
@@ -3630,11 +3529,6 @@ re_compile_fastmap (bufp)
3630 if (!(bufp->syntax & RE_DOT_NEWLINE)) 3529 if (!(bufp->syntax & RE_DOT_NEWLINE))
3631 fastmap['\n'] = fastmap_newline; 3530 fastmap['\n'] = fastmap_newline;
3632 3531
3633 /* Return if we have already set `can_be_null'; if we have,
3634 then the fastmap is irrelevant. Something's wrong here. */
3635 else if (bufp->can_be_null)
3636 goto done;
3637
3638 /* Otherwise, have to check alternative paths. */ 3532 /* Otherwise, have to check alternative paths. */
3639 break; 3533 break;
3640 } 3534 }
@@ -3649,7 +3543,7 @@ re_compile_fastmap (bufp)
3649 /* This match depends on text properties. These end with 3543 /* This match depends on text properties. These end with
3650 aborting optimizations. */ 3544 aborting optimizations. */
3651 bufp->can_be_null = 1; 3545 bufp->can_be_null = 1;
3652 goto done; 3546 continue;
3653#if 0 3547#if 0
3654 k = *p++; 3548 k = *p++;
3655 simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH); 3549 simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH);
@@ -3727,44 +3621,38 @@ re_compile_fastmap (bufp)
3727 case wordbeg: 3621 case wordbeg:
3728 case wordend: 3622 case wordend:
3729#endif 3623#endif
3730 case push_dummy_failure:
3731 continue; 3624 continue;
3732 3625
3733 3626
3734 case jump_n: 3627 case jump_n:
3735 case pop_failure_jump:
3736 case maybe_pop_jump:
3737 case jump: 3628 case jump:
3738 case jump_past_alt:
3739 case dummy_failure_jump:
3740 EXTRACT_NUMBER_AND_INCR (j, p);
3741 p += j;
3742 if (j > 0)
3743 continue;
3744
3745 /* Jump backward implies we just went through the body of a
3746 loop and matched nothing. Opcode jumped to should be
3747 `on_failure_jump' or `succeed_n'. Just treat it like an
3748 ordinary jump. For a * loop, it has pushed its failure
3749 point already; if so, discard that as redundant. */
3750 if ((re_opcode_t) *p != on_failure_jump
3751 && (re_opcode_t) *p != succeed_n)
3752 continue;
3753
3754 p++;
3755 EXTRACT_NUMBER_AND_INCR (j, p); 3629 EXTRACT_NUMBER_AND_INCR (j, p);
3630 if (j < 0)
3631 /* Backward jumps can only go back to code that we've already
3632 visited. `re_compile' should make sure this is true. */
3633 break;
3756 p += j; 3634 p += j;
3757 3635 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p))
3758 /* If what's on the stack is where we are now, pop it. */ 3636 {
3759 if (!FAIL_STACK_EMPTY () 3637 case on_failure_jump:
3760 && fail_stack.stack[fail_stack.avail - 1].pointer == p) 3638 case on_failure_keep_string_jump:
3761 fail_stack.avail--; 3639 case on_failure_jump_exclusive:
3762 3640 case on_failure_jump_loop:
3763 continue; 3641 case on_failure_jump_smart:
3764 3642 p++;
3643 break;
3644 default:
3645 continue;
3646 };
3647 /* Keep `p1' to allow the `on_failure_jump' we are jumping to
3648 to jump back to "just after here". */
3649 /* Fallthrough */
3765 3650
3766 case on_failure_jump: 3651 case on_failure_jump:
3767 case on_failure_keep_string_jump: 3652 case on_failure_keep_string_jump:
3653 case on_failure_jump_exclusive:
3654 case on_failure_jump_loop:
3655 case on_failure_jump_smart:
3768 handle_on_failure_jump: 3656 handle_on_failure_jump:
3769 EXTRACT_NUMBER_AND_INCR (j, p); 3657 EXTRACT_NUMBER_AND_INCR (j, p);
3770 3658
@@ -3775,7 +3663,10 @@ re_compile_fastmap (bufp)
3775 to push such a point since we obviously won't find any more 3663 to push such a point since we obviously won't find any more
3776 fastmap entries beyond `pend'. Such a pattern can match 3664 fastmap entries beyond `pend'. Such a pattern can match
3777 the null string, though. */ 3665 the null string, though. */
3778 if (p + j < pend) 3666 if (p + j <= p1)
3667 /* Backward jump to be ignored. */
3668 ;
3669 else if (p + j < pend)
3779 { 3670 {
3780 if (!PUSH_PATTERN_OP (p + j, fail_stack)) 3671 if (!PUSH_PATTERN_OP (p + j, fail_stack))
3781 { 3672 {
@@ -3817,7 +3708,7 @@ re_compile_fastmap (bufp)
3817 3708
3818 case start_memory: 3709 case start_memory:
3819 case stop_memory: 3710 case stop_memory:
3820 p += 2; 3711 p += 1;
3821 continue; 3712 continue;
3822 3713
3823 3714
@@ -3838,8 +3729,6 @@ re_compile_fastmap (bufp)
3838 /* Set `can_be_null' for the last path (also the first path, if the 3729 /* Set `can_be_null' for the last path (also the first path, if the
3839 pattern is empty). */ 3730 pattern is empty). */
3840 bufp->can_be_null |= path_can_be_null; 3731 bufp->can_be_null |= path_can_be_null;
3841
3842 done:
3843 RESET_FAIL_STACK (); 3732 RESET_FAIL_STACK ();
3844 return 0; 3733 return 0;
3845} /* re_compile_fastmap */ 3734} /* re_compile_fastmap */
@@ -4167,9 +4056,6 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
4167/* Declarations and macros for re_match_2. */ 4056/* Declarations and macros for re_match_2. */
4168 4057
4169static int bcmp_translate (); 4058static int bcmp_translate ();
4170static boolean alt_match_null_string_p (),
4171 common_op_match_null_string_p (),
4172 group_match_null_string_p ();
4173 4059
4174/* This converts PTR, a pointer into one of the search strings `string1' 4060/* This converts PTR, a pointer into one of the search strings `string1'
4175 and `string2' into an offset from the beginning of that string. */ 4061 and `string2' into an offset from the beginning of that string. */
@@ -4237,27 +4123,208 @@ static boolean alt_match_null_string_p (),
4237 REGEX_FREE_STACK (fail_stack.stack); \ 4123 REGEX_FREE_STACK (fail_stack.stack); \
4238 FREE_VAR (regstart); \ 4124 FREE_VAR (regstart); \
4239 FREE_VAR (regend); \ 4125 FREE_VAR (regend); \
4240 FREE_VAR (old_regstart); \
4241 FREE_VAR (old_regend); \
4242 FREE_VAR (best_regstart); \ 4126 FREE_VAR (best_regstart); \
4243 FREE_VAR (best_regend); \ 4127 FREE_VAR (best_regend); \
4244 FREE_VAR (reg_info); \
4245 FREE_VAR (reg_dummy); \
4246 FREE_VAR (reg_info_dummy); \
4247 } while (0) 4128 } while (0)
4248#else 4129#else
4249#define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */ 4130#define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */
4250#endif /* not MATCH_MAY_ALLOCATE */ 4131#endif /* not MATCH_MAY_ALLOCATE */
4251 4132
4252/* These values must meet several constraints. They must not be valid 4133
4253 register values; since we have a limit of 255 registers (because 4134/* Optimization routines. */
4254 we use only one byte in the pattern for the register number), we can 4135
4255 use numbers larger than 255. They must differ by 1, because of 4136/* Jump over non-matching operations. */
4256 NUM_FAILURE_ITEMS above. And the value for the lowest register must 4137static unsigned char *
4257 be larger than the value for the highest register, so we do not try 4138skip_noops (p, pend, memory)
4258 to actually save any registers when none are active. */ 4139 unsigned char *p, *pend;
4259#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH) 4140 int memory;
4260#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1) 4141{
4142 int mcnt;
4143 while (p < pend)
4144 {
4145 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p))
4146 {
4147 case start_memory:
4148 if (!memory)
4149 return p;
4150 case stop_memory:
4151 p += 2; break;
4152 case no_op:
4153 p += 1; break;
4154 case jump:
4155 p += 1;
4156 EXTRACT_NUMBER_AND_INCR (mcnt, p);
4157 p += mcnt;
4158 break;
4159 default:
4160 return p;
4161 }
4162 }
4163 assert (p == pend);
4164 return p;
4165}
4166
4167/* Non-zero if "p1 matches something" implies "p2 fails". */
4168static int
4169mutually_exclusive_p (bufp, p1, p2)
4170 struct re_pattern_buffer *bufp;
4171 unsigned char *p1, *p2;
4172{
4173 int multibyte = bufp->multibyte;
4174 unsigned char *pend = bufp->buffer + bufp->used;
4175
4176 assert (p1 >= bufp->buffer && p1 <= pend
4177 && p2 >= bufp->buffer && p2 <= pend);
4178
4179 /* Skip over open/close-group commands.
4180 If what follows this loop is a ...+ construct,
4181 look at what begins its body, since we will have to
4182 match at least one of that. */
4183 p2 = skip_noops (p2, pend, 1);
4184 /* The same skip can be done for p1, except that skipping over
4185 start_memory is not a good idea (if there's a group inside
4186 the loop delimited by on_failure_jump_exclusive, then it
4187 can't optimize the push away (it still works properly, but
4188 slightly slower rather than faster)). */
4189 p1 = skip_noops (p1, pend, 0);
4190
4191 /* If we're at the end of the pattern, we can change. */
4192 if (p2 == pend)
4193 {
4194 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p1))
4195 {
4196 case anychar:
4197 case charset_not:
4198 case charset:
4199 case exactn:
4200 DEBUG_PRINT1 (" End of pattern: fast loop.\n");
4201 return 1;
4202 default:
4203 return 0;
4204 }
4205 }
4206
4207 else if ((re_opcode_t) *p2 == exactn
4208 || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
4209 {
4210 register unsigned int c
4211 = *p2 == (unsigned char) endline ? '\n' : p2[2];
4212
4213 if ((re_opcode_t) *p1 == exactn)
4214 {
4215 if (!(multibyte /* && (c != '\n') */
4216 && BASE_LEADING_CODE_P (c))
4217 ? c != p1[2]
4218 : (STRING_CHAR (&p2[2], pend - &p2[2])
4219 != STRING_CHAR (&p1[2], pend - &p1[2])))
4220 {
4221 DEBUG_PRINT3 (" '%c' != '%c' => fast loop.\n", c, p1[2]);
4222 return 1;
4223 }
4224 }
4225
4226 else if ((re_opcode_t) *p1 == charset
4227 || (re_opcode_t) *p1 == charset_not)
4228 {
4229 int not = (re_opcode_t) *p1 == charset_not;
4230
4231 if (multibyte /* && (c != '\n') */
4232 && BASE_LEADING_CODE_P (c))
4233 c = STRING_CHAR (&p2[2], pend - &p2[2]);
4234
4235 /* Test if C is listed in charset (or charset_not)
4236 at `p1'. */
4237 if (SINGLE_BYTE_CHAR_P (c))
4238 {
4239 if (c < CHARSET_BITMAP_SIZE (p1) * BYTEWIDTH
4240 && p1[2 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4241 not = !not;
4242 }
4243 else if (CHARSET_RANGE_TABLE_EXISTS_P (p1))
4244 CHARSET_LOOKUP_RANGE_TABLE (not, c, p1);
4245
4246 /* `not' is equal to 1 if c would match, which means
4247 that we can't change to pop_failure_jump. */
4248 if (!not)
4249 {
4250 DEBUG_PRINT1 (" No match => fast loop.\n");
4251 return 1;
4252 }
4253 }
4254 else if ((re_opcode_t) *p1 == anychar
4255 && c == '\n')
4256 {
4257 DEBUG_PRINT1 (" . != \\n => fast loop.\n");
4258 return 1;
4259 }
4260 }
4261 else if ((re_opcode_t) *p2 == charset
4262 || (re_opcode_t) *p2 == charset_not)
4263 {
4264 if ((re_opcode_t) *p1 == exactn)
4265 /* Reuse the code above. */
4266 return mutually_exclusive_p (bufp, p2, p1);
4267
4268
4269 /* It is hard to list up all the character in charset
4270 P2 if it includes multibyte character. Give up in
4271 such case. */
4272 else if (!multibyte || !CHARSET_RANGE_TABLE_EXISTS_P (p2))
4273 {
4274 /* Now, we are sure that P2 has no range table.
4275 So, for the size of bitmap in P2, `p2[1]' is
4276 enough. But P1 may have range table, so the
4277 size of bitmap table of P1 is extracted by
4278 using macro `CHARSET_BITMAP_SIZE'.
4279
4280 Since we know that all the character listed in
4281 P2 is ASCII, it is enough to test only bitmap
4282 table of P1. */
4283
4284 if (*p1 == *p2)
4285 {
4286 int idx;
4287 /* We win if the charset inside the loop
4288 has no overlap with the one after the loop. */
4289 for (idx = 0;
4290 (idx < (int) p2[1]
4291 && idx < CHARSET_BITMAP_SIZE (p1));
4292 idx++)
4293 if ((p2[2 + idx] & p1[2 + idx]) != 0)
4294 break;
4295
4296 if (idx == p2[1]
4297 || idx == CHARSET_BITMAP_SIZE (p1))
4298 {
4299 DEBUG_PRINT1 (" No match => fast loop.\n");
4300 return 1;
4301 }
4302 }
4303 else if ((re_opcode_t) *p1 == charset
4304 || (re_opcode_t) *p1 == charset_not)
4305 {
4306 int idx;
4307 /* We win if the charset_not inside the loop lists
4308 every character listed in the charset after. */
4309 for (idx = 0; idx < (int) p2[1]; idx++)
4310 if (! (p2[2 + idx] == 0
4311 || (idx < CHARSET_BITMAP_SIZE (p1)
4312 && ((p2[2 + idx] & ~ p1[2 + idx]) == 0))))
4313 break;
4314
4315 if (idx == p2[1])
4316 {
4317 DEBUG_PRINT1 (" No match => fast loop.\n");
4318 return 1;
4319 }
4320 }
4321 }
4322 }
4323
4324 /* Safe default. */
4325 return 0;
4326}
4327
4261 4328
4262/* Matching routines. */ 4329/* Matching routines. */
4263 4330
@@ -4351,10 +4418,6 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4351 unsigned char *p = bufp->buffer; 4418 unsigned char *p = bufp->buffer;
4352 register unsigned char *pend = p + bufp->used; 4419 register unsigned char *pend = p + bufp->used;
4353 4420
4354 /* Mark the opcode just after a start_memory, so we can test for an
4355 empty subpattern when we get to the stop_memory. */
4356 unsigned char *just_past_start_mem = 0;
4357
4358 /* We use this to map every character in the string. */ 4421 /* We use this to map every character in the string. */
4359 RE_TRANSLATE_TYPE translate = bufp->translate; 4422 RE_TRANSLATE_TYPE translate = bufp->translate;
4360 4423
@@ -4363,13 +4426,11 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4363 4426
4364 /* Failure point stack. Each place that can handle a failure further 4427 /* Failure point stack. Each place that can handle a failure further
4365 down the line pushes a failure point on this stack. It consists of 4428 down the line pushes a failure point on this stack. It consists of
4366 restart, regend, and reg_info for all registers corresponding to 4429 regstart, and regend for all registers corresponding to
4367 the subexpressions we're currently inside, plus the number of such 4430 the subexpressions we're currently inside, plus the number of such
4368 registers, and, finally, two char *'s. The first char * is where 4431 registers, and, finally, two char *'s. The first char * is where
4369 to resume scanning the pattern; the second one is where to resume 4432 to resume scanning the pattern; the second one is where to resume
4370 scanning the strings. If the latter is zero, the failure point is 4433 scanning the strings. */
4371 a ``dummy''; if a failure happens and the failure point is a dummy,
4372 it gets discarded and the next next one is tried. */
4373#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ 4434#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */
4374 fail_stack_type fail_stack; 4435 fail_stack_type fail_stack;
4375#endif 4436#endif
@@ -4387,10 +4448,6 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4387 an element for register zero. */ 4448 an element for register zero. */
4388 unsigned num_regs = bufp->re_nsub + 1; 4449 unsigned num_regs = bufp->re_nsub + 1;
4389 4450
4390 /* The currently active registers. */
4391 unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4392 unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4393
4394 /* Information on the contents of registers. These are pointers into 4451 /* Information on the contents of registers. These are pointers into
4395 the input strings; they record just what was matched (on this 4452 the input strings; they record just what was matched (on this
4396 attempt) by a subexpression part of the pattern, that is, the 4453 attempt) by a subexpression part of the pattern, that is, the
@@ -4402,25 +4459,6 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4402 const char **regstart, **regend; 4459 const char **regstart, **regend;
4403#endif 4460#endif
4404 4461
4405 /* If a group that's operated upon by a repetition operator fails to
4406 match anything, then the register for its start will need to be
4407 restored because it will have been set to wherever in the string we
4408 are when we last see its open-group operator. Similarly for a
4409 register's end. */
4410#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
4411 const char **old_regstart, **old_regend;
4412#endif
4413
4414 /* The is_active field of reg_info helps us keep track of which (possibly
4415 nested) subexpressions we are currently in. The matched_something
4416 field of reg_info[reg_num] helps us tell whether or not we have
4417 matched any of the pattern so far this time through the reg_num-th
4418 subexpression. These two fields get reset each time through any
4419 loop their register is in. */
4420#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */
4421 register_info_type *reg_info;
4422#endif
4423
4424 /* The following record the register info as found in the above 4462 /* The following record the register info as found in the above
4425 variables when we find a match better than any we've seen before. 4463 variables when we find a match better than any we've seen before.
4426 This happens as we backtrack through the failure points, which in 4464 This happens as we backtrack through the failure points, which in
@@ -4440,15 +4478,6 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4440 and need to test it, it's not garbage. */ 4478 and need to test it, it's not garbage. */
4441 const char *match_end = NULL; 4479 const char *match_end = NULL;
4442 4480
4443 /* This helps SET_REGS_MATCHED avoid doing redundant work. */
4444 int set_regs_matched_done = 0;
4445
4446 /* Used when we pop values we don't care about. */
4447#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
4448 const char **reg_dummy;
4449 register_info_type *reg_info_dummy;
4450#endif
4451
4452#ifdef DEBUG 4481#ifdef DEBUG
4453 /* Counts the total number of registers pushed. */ 4482 /* Counts the total number of registers pushed. */
4454 unsigned num_regs_pushed = 0; 4483 unsigned num_regs_pushed = 0;
@@ -4468,16 +4497,10 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4468 { 4497 {
4469 regstart = REGEX_TALLOC (num_regs, const char *); 4498 regstart = REGEX_TALLOC (num_regs, const char *);
4470 regend = REGEX_TALLOC (num_regs, const char *); 4499 regend = REGEX_TALLOC (num_regs, const char *);
4471 old_regstart = REGEX_TALLOC (num_regs, const char *);
4472 old_regend = REGEX_TALLOC (num_regs, const char *);
4473 best_regstart = REGEX_TALLOC (num_regs, const char *); 4500 best_regstart = REGEX_TALLOC (num_regs, const char *);
4474 best_regend = REGEX_TALLOC (num_regs, const char *); 4501 best_regend = REGEX_TALLOC (num_regs, const char *);
4475 reg_info = REGEX_TALLOC (num_regs, register_info_type);
4476 reg_dummy = REGEX_TALLOC (num_regs, const char *);
4477 reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
4478 4502
4479 if (!(regstart && regend && old_regstart && old_regend && reg_info 4503 if (!(regstart && regend && best_regstart && best_regend))
4480 && best_regstart && best_regend && reg_dummy && reg_info_dummy))
4481 { 4504 {
4482 FREE_VARIABLES (); 4505 FREE_VARIABLES ();
4483 return -2; 4506 return -2;
@@ -4487,9 +4510,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4487 { 4510 {
4488 /* We must initialize all our variables to NULL, so that 4511 /* We must initialize all our variables to NULL, so that
4489 `FREE_VARIABLES' doesn't try to free them. */ 4512 `FREE_VARIABLES' doesn't try to free them. */
4490 regstart = regend = old_regstart = old_regend = best_regstart 4513 regstart = regend = best_regstart = best_regend = NULL;
4491 = best_regend = reg_dummy = NULL;
4492 reg_info = reg_info_dummy = (register_info_type *) NULL;
4493 } 4514 }
4494#endif /* MATCH_MAY_ALLOCATE */ 4515#endif /* MATCH_MAY_ALLOCATE */
4495 4516
@@ -4505,13 +4526,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4505 register information struct. */ 4526 register information struct. */
4506 for (mcnt = 1; mcnt < num_regs; mcnt++) 4527 for (mcnt = 1; mcnt < num_regs; mcnt++)
4507 { 4528 {
4508 regstart[mcnt] = regend[mcnt] 4529 regstart[mcnt] = regend[mcnt] = REG_UNSET_VALUE;
4509 = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
4510
4511 REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
4512 IS_ACTIVE (reg_info[mcnt]) = 0;
4513 MATCHED_SOMETHING (reg_info[mcnt]) = 0;
4514 EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
4515 } 4530 }
4516 4531
4517 /* We move `string1' into `string2' if the latter's empty -- but not if 4532 /* We move `string1' into `string2' if the latter's empty -- but not if
@@ -4566,7 +4581,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4566 fails at this starting point in the input data. */ 4581 fails at this starting point in the input data. */
4567 for (;;) 4582 for (;;)
4568 { 4583 {
4569 DEBUG_PRINT2 ("\n0x%x: ", p); 4584 DEBUG_PRINT2 ("\n%p: ", p);
4570 4585
4571 if (p == pend) 4586 if (p == pend)
4572 { /* End of pattern means we might have succeeded. */ 4587 { /* End of pattern means we might have succeeded. */
@@ -4796,7 +4811,6 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4796 } 4811 }
4797 while (--mcnt); 4812 while (--mcnt);
4798 } 4813 }
4799 SET_REGS_MATCHED ();
4800 break; 4814 break;
4801 4815
4802 4816
@@ -4828,7 +4842,6 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4828 && buf_ch == '\000')) 4842 && buf_ch == '\000'))
4829 goto fail; 4843 goto fail;
4830 4844
4831 SET_REGS_MATCHED ();
4832 DEBUG_PRINT2 (" Matched `%d'.\n", *d); 4845 DEBUG_PRINT2 (" Matched `%d'.\n", *d);
4833 d += buf_charlen; 4846 d += buf_charlen;
4834 } 4847 }
@@ -4913,195 +4926,55 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4913 4926
4914 if (!not) goto fail; 4927 if (!not) goto fail;
4915 4928
4916 SET_REGS_MATCHED ();
4917 d += len; 4929 d += len;
4918 break; 4930 break;
4919 } 4931 }
4920 4932
4921 4933
4922 /* The beginning of a group is represented by start_memory. 4934 /* The beginning of a group is represented by start_memory.
4923 The arguments are the register number in the next byte, and the 4935 The argument is the register number. The text
4924 number of groups inner to this one in the next. The text
4925 matched within the group is recorded (in the internal 4936 matched within the group is recorded (in the internal
4926 registers data structure) under the register number. */ 4937 registers data structure) under the register number. */
4927 case start_memory: 4938 case start_memory:
4928 DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]); 4939 DEBUG_PRINT2 ("EXECUTING start_memory %d:\n", *p);
4929 4940
4930 /* Find out if this group can match the empty string. */ 4941 /* In case we need to undo this operation (via backtracking). */
4931 p1 = p; /* To send to group_match_null_string_p. */ 4942 PUSH_FAILURE_REG ((unsigned int)*p);
4932
4933 if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
4934 REG_MATCH_NULL_STRING_P (reg_info[*p])
4935 = group_match_null_string_p (&p1, pend, reg_info);
4936
4937 /* Save the position in the string where we were the last time
4938 we were at this open-group operator in case the group is
4939 operated upon by a repetition operator, e.g., with `(a*)*b'
4940 against `ab'; then we want to ignore where we are now in
4941 the string in case this attempt to match fails. */
4942 old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
4943 ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
4944 : regstart[*p];
4945 DEBUG_PRINT2 (" old_regstart: %d\n",
4946 POINTER_TO_OFFSET (old_regstart[*p]));
4947 4943
4948 regstart[*p] = d; 4944 regstart[*p] = d;
4945 regend[*p] = REG_UNSET_VALUE; /* probably unnecessary. -sm */
4949 DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p])); 4946 DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
4950 4947
4951 IS_ACTIVE (reg_info[*p]) = 1;
4952 MATCHED_SOMETHING (reg_info[*p]) = 0;
4953
4954 /* Clear this whenever we change the register activity status. */
4955 set_regs_matched_done = 0;
4956
4957 /* This is the new highest active register. */
4958 highest_active_reg = *p;
4959
4960 /* If nothing was active before, this is the new lowest active
4961 register. */
4962 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
4963 lowest_active_reg = *p;
4964
4965 /* Move past the register number and inner group count. */ 4948 /* Move past the register number and inner group count. */
4966 p += 2; 4949 p += 1;
4967 just_past_start_mem = p;
4968
4969 break; 4950 break;
4970 4951
4971 4952
4972 /* The stop_memory opcode represents the end of a group. Its 4953 /* The stop_memory opcode represents the end of a group. Its
4973 arguments are the same as start_memory's: the register 4954 argument is the same as start_memory's: the register number. */
4974 number, and the number of inner groups. */
4975 case stop_memory: 4955 case stop_memory:
4976 DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]); 4956 DEBUG_PRINT2 ("EXECUTING stop_memory %d:\n", *p);
4977 4957
4978 /* We need to save the string position the last time we were at 4958 assert (!REG_UNSET (regstart[*p]));
4979 this close-group operator in case the group is operated 4959 /* Strictly speaking, there should be code such as:
4980 upon by a repetition operator, e.g., with `((a*)*(b*)*)*' 4960
4981 against `aba'; then we want to ignore where we are now in 4961 assert (REG_UNSET (regend[*p]));
4982 the string in case this attempt to match fails. */ 4962 PUSH_FAILURE_REGSTOP ((unsigned int)*p);
4983 old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) 4963
4984 ? REG_UNSET (regend[*p]) ? d : regend[*p] 4964 But the only info to be pushed is regend[*p] and it is known to
4985 : regend[*p]; 4965 be UNSET, so there really isn't anything to push.
4986 DEBUG_PRINT2 (" old_regend: %d\n", 4966 Not pushing anything, on the other hand deprives us from the
4987 POINTER_TO_OFFSET (old_regend[*p])); 4967 guarantee that regend[*p] is UNSET since undoing this operation
4968 will not reset its value properly. This is not important since
4969 the value will only be read on the next start_memory or at
4970 the very end and both events can only happen if this stop_memory
4971 is *not* undone. */
4988 4972
4989 regend[*p] = d; 4973 regend[*p] = d;
4990 DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p])); 4974 DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
4991 4975
4992 /* This register isn't active anymore. */
4993 IS_ACTIVE (reg_info[*p]) = 0;
4994
4995 /* Clear this whenever we change the register activity status. */
4996 set_regs_matched_done = 0;
4997
4998 /* If this was the only register active, nothing is active
4999 anymore. */
5000 if (lowest_active_reg == highest_active_reg)
5001 {
5002 lowest_active_reg = NO_LOWEST_ACTIVE_REG;
5003 highest_active_reg = NO_HIGHEST_ACTIVE_REG;
5004 }
5005 else
5006 { /* We must scan for the new highest active register, since
5007 it isn't necessarily one less than now: consider
5008 (a(b)c(d(e)f)g). When group 3 ends, after the f), the
5009 new highest active register is 1. */
5010 unsigned char r = *p - 1;
5011 while (r > 0 && !IS_ACTIVE (reg_info[r]))
5012 r--;
5013
5014 /* If we end up at register zero, that means that we saved
5015 the registers as the result of an `on_failure_jump', not
5016 a `start_memory', and we jumped to past the innermost
5017 `stop_memory'. For example, in ((.)*) we save
5018 registers 1 and 2 as a result of the *, but when we pop
5019 back to the second ), we are at the stop_memory 1.
5020 Thus, nothing is active. */
5021 if (r == 0)
5022 {
5023 lowest_active_reg = NO_LOWEST_ACTIVE_REG;
5024 highest_active_reg = NO_HIGHEST_ACTIVE_REG;
5025 }
5026 else
5027 highest_active_reg = r;
5028 }
5029
5030 /* If just failed to match something this time around with a
5031 group that's operated on by a repetition operator, try to
5032 force exit from the ``loop'', and restore the register
5033 information for this group that we had before trying this
5034 last match. */
5035 if ((!MATCHED_SOMETHING (reg_info[*p])
5036 || just_past_start_mem == p - 1)
5037 && (p + 2) < pend)
5038 {
5039 boolean is_a_jump_n = false;
5040
5041 p1 = p + 2;
5042 mcnt = 0;
5043 switch ((re_opcode_t) *p1++)
5044 {
5045 case jump_n:
5046 is_a_jump_n = true;
5047 case pop_failure_jump:
5048 case maybe_pop_jump:
5049 case jump:
5050 case dummy_failure_jump:
5051 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5052 if (is_a_jump_n)
5053 p1 += 2;
5054 break;
5055
5056 default:
5057 /* do nothing */ ;
5058 }
5059 p1 += mcnt;
5060
5061 /* If the next operation is a jump backwards in the pattern
5062 to an on_failure_jump right before the start_memory
5063 corresponding to this stop_memory, exit from the loop
5064 by forcing a failure after pushing on the stack the
5065 on_failure_jump's jump in the pattern, and d. */
5066 if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
5067 && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
5068 {
5069 /* If this group ever matched anything, then restore
5070 what its registers were before trying this last
5071 failed match, e.g., with `(a*)*b' against `ab' for
5072 regstart[1], and, e.g., with `((a*)*(b*)*)*'
5073 against `aba' for regend[3].
5074
5075 Also restore the registers for inner groups for,
5076 e.g., `((a*)(b*))*' against `aba' (register 3 would
5077 otherwise get trashed). */
5078
5079 if (EVER_MATCHED_SOMETHING (reg_info[*p]))
5080 {
5081 unsigned r;
5082
5083 EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
5084
5085 /* Restore this and inner groups' (if any) registers. */
5086 for (r = *p; r < *p + *(p + 1); r++)
5087 {
5088 regstart[r] = old_regstart[r];
5089
5090 /* xx why this test? */
5091 if (old_regend[r] >= regstart[r])
5092 regend[r] = old_regend[r];
5093 }
5094 }
5095 p1++;
5096 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5097 PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
5098
5099 goto fail;
5100 }
5101 }
5102
5103 /* Move past the register number and the inner group count. */ 4976 /* Move past the register number and the inner group count. */
5104 p += 2; 4977 p += 1;
5105 break; 4978 break;
5106 4979
5107 4980
@@ -5162,9 +5035,6 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5162 : bcmp (d, d2, mcnt)) 5035 : bcmp (d, d2, mcnt))
5163 goto fail; 5036 goto fail;
5164 d += mcnt, d2 += mcnt; 5037 d += mcnt, d2 += mcnt;
5165
5166 /* Do this because we've match some characters. */
5167 SET_REGS_MATCHED ();
5168 } 5038 }
5169 } 5039 }
5170 break; 5040 break;
@@ -5224,7 +5094,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5224 5094
5225 /* on_failure_keep_string_jump is used to optimize `.*\n'. It 5095 /* on_failure_keep_string_jump is used to optimize `.*\n'. It
5226 pushes NULL as the value for the string on the stack. Then 5096 pushes NULL as the value for the string on the stack. Then
5227 `pop_failure_point' will keep the current value for the 5097 `POP_FAILURE_POINT' will keep the current value for the
5228 string, instead of restoring it. To see why, consider 5098 string, instead of restoring it. To see why, consider
5229 matching `foo\nbar' against `.*\n'. The .* matches the foo; 5099 matching `foo\nbar' against `.*\n'. The .* matches the foo;
5230 then the . fails against the \n. But the next thing we want 5100 then the . fails against the \n. But the next thing we want
@@ -5239,12 +5109,47 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5239 `anychar's code to do something besides goto fail in this 5109 `anychar's code to do something besides goto fail in this
5240 case; that seems worse than this. */ 5110 case; that seems worse than this. */
5241 case on_failure_keep_string_jump: 5111 case on_failure_keep_string_jump:
5242 DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump"); 5112 EXTRACT_NUMBER_AND_INCR (mcnt, p);
5113 DEBUG_PRINT3 ("EXECUTING on_failure_keep_string_jump %d (to %p):\n",
5114 mcnt, p + mcnt);
5115
5116 PUSH_FAILURE_POINT (p - 3, NULL);
5117 break;
5243 5118
5119 case on_failure_jump_exclusive:
5244 EXTRACT_NUMBER_AND_INCR (mcnt, p); 5120 EXTRACT_NUMBER_AND_INCR (mcnt, p);
5245 DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt); 5121 DEBUG_PRINT3 ("EXECUTING on_failure_jump_exclusive %d (to %p):\n",
5122 mcnt, p + mcnt);
5246 5123
5247 PUSH_FAILURE_POINT (p + mcnt, NULL, -2); 5124 if (! FAIL_STACK_EMPTY ()
5125 && FAILURE_PAT (TOP_FAILURE_HANDLE ()) == (p - 3)
5126 && fail_stack.avail == fail_stack.frame)
5127 {
5128 /* We are trying to push failure F2 onto the stack but there
5129 is already a failure F1 pushed from the same instruction.
5130 Between F1 and now, something has matched (else this is an
5131 improper use of on_failure_jump_exclusive), so that we know
5132 that the fail-destination of F1 cannot match, hence we can
5133 pop F1 before pushing F2. Instead of doing this pop/push,
5134 we manually turn F1 into F2.
5135 `fail_stack.avail == fail_stack.frame' makes sure
5136 that popping F1 doesn't involve registers, else
5137 this optimization cannot be done so trivially. */
5138 assert (FAILURE_STR (TOP_FAILURE_HANDLE ()) != d);
5139 FAILURE_STR (TOP_FAILURE_HANDLE ()) = d;
5140 }
5141 else
5142 PUSH_FAILURE_POINT (p - 3, d);
5143 break;
5144
5145 case on_failure_jump_loop:
5146 on_failure:
5147 EXTRACT_NUMBER_AND_INCR (mcnt, p);
5148 DEBUG_PRINT3 ("EXECUTING on_failure_jump_loop %d (to %p):\n",
5149 mcnt, p + mcnt);
5150
5151 CHECK_INFINITE_LOOP (p - 3, d);
5152 PUSH_FAILURE_POINT (p - 3, d);
5248 break; 5153 break;
5249 5154
5250 5155
@@ -5261,272 +5166,55 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5261 the repetition text and either the following jump or 5166 the repetition text and either the following jump or
5262 pop_failure_jump back to this on_failure_jump. */ 5167 pop_failure_jump back to this on_failure_jump. */
5263 case on_failure_jump: 5168 case on_failure_jump:
5264 on_failure:
5265 DEBUG_PRINT1 ("EXECUTING on_failure_jump");
5266 5169
5267#if defined (WINDOWSNT) && defined (emacs) 5170#if defined (WINDOWSNT) && defined (emacs)
5268 QUIT; 5171 QUIT;
5269#endif 5172#endif
5270 5173
5271 EXTRACT_NUMBER_AND_INCR (mcnt, p); 5174 EXTRACT_NUMBER_AND_INCR (mcnt, p);
5272 DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt); 5175 DEBUG_PRINT3 ("EXECUTING on_failure_jump %d (to %p):\n",
5273 5176 mcnt, p + mcnt);
5274 /* If this on_failure_jump comes right before a group (i.e.,
5275 the original * applied to a group), save the information
5276 for that group and all inner ones, so that if we fail back
5277 to this point, the group's information will be correct.
5278 For example, in \(a*\)*\1, we need the preceding group,
5279 and in \(zz\(a*\)b*\)\2, we need the inner group. */
5280
5281 /* We can't use `p' to check ahead because we push
5282 a failure point to `p + mcnt' after we do this. */
5283 p1 = p;
5284
5285 /* We need to skip no_op's before we look for the
5286 start_memory in case this on_failure_jump is happening as
5287 the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
5288 against aba. */
5289 while (p1 < pend && (re_opcode_t) *p1 == no_op)
5290 p1++;
5291
5292 if (p1 < pend && (re_opcode_t) *p1 == start_memory)
5293 {
5294 /* We have a new highest active register now. This will
5295 get reset at the start_memory we are about to get to,
5296 but we will have saved all the registers relevant to
5297 this repetition op, as described above. */
5298 highest_active_reg = *(p1 + 1) + *(p1 + 2);
5299 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
5300 lowest_active_reg = *(p1 + 1);
5301 }
5302 5177
5303 DEBUG_PRINT1 (":\n"); 5178 PUSH_FAILURE_POINT (p -3, d);
5304 PUSH_FAILURE_POINT (p + mcnt, d, -2);
5305 break; 5179 break;
5306 5180
5307 5181 /* This operation is used for greedy * and +.
5308 /* A smart repeat ends with `maybe_pop_jump'. 5182 Compare the beginning of the repeat with what in the
5309 We change it to either `pop_failure_jump' or `jump'. */ 5183 pattern follows its end. If we can establish that there
5310 case maybe_pop_jump: 5184 is nothing that they would both match, i.e., that we
5185 would have to backtrack because of (as in, e.g., `a*a')
5186 then we can use a non-backtracking loop based on
5187 on_failure_jump_exclusive instead of on_failure_jump_loop. */
5188 case on_failure_jump_smart:
5311#if defined (WINDOWSNT) && defined (emacs) 5189#if defined (WINDOWSNT) && defined (emacs)
5312 QUIT; 5190 QUIT;
5313#endif 5191#endif
5314 EXTRACT_NUMBER_AND_INCR (mcnt, p); 5192 EXTRACT_NUMBER_AND_INCR (mcnt, p);
5315 DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt); 5193 DEBUG_PRINT3 ("EXECUTING on_failure_jump_smart %d (to %p).\n",
5194 mcnt, p + mcnt);
5316 { 5195 {
5317 register unsigned char *p2 = p; 5196 unsigned char *p1 = p; /* Next operation. */
5318 5197 unsigned char *p2 = p + mcnt; /* Destination of the jump. */
5319 /* Compare the beginning of the repeat with what in the
5320 pattern follows its end. If we can establish that there
5321 is nothing that they would both match, i.e., that we
5322 would have to backtrack because of (as in, e.g., `a*a')
5323 then we can change to pop_failure_jump, because we'll
5324 never have to backtrack.
5325
5326 This is not true in the case of alternatives: in
5327 `(a|ab)*' we do need to backtrack to the `ab' alternative
5328 (e.g., if the string was `ab'). But instead of trying to
5329 detect that here, the alternative has put on a dummy
5330 failure point which is what we will end up popping. */
5331
5332 /* Skip over open/close-group commands.
5333 If what follows this loop is a ...+ construct,
5334 look at what begins its body, since we will have to
5335 match at least one of that. */
5336 while (1)
5337 {
5338 if (p2 + 2 < pend
5339 && ((re_opcode_t) *p2 == stop_memory
5340 || (re_opcode_t) *p2 == start_memory))
5341 p2 += 3;
5342 else if (p2 + 6 < pend
5343 && (re_opcode_t) *p2 == dummy_failure_jump)
5344 p2 += 6;
5345 else
5346 break;
5347 }
5348 5198
5349 p1 = p + mcnt; 5199 p -= 3; /* Reset so that we will re-execute the
5350 /* p1[0] ... p1[2] are the `on_failure_jump' corresponding 5200 instruction once it's been changed. */
5351 to the `maybe_finalize_jump' of this case. Examine what
5352 follows. */
5353 5201
5354 /* If we're at the end of the pattern, we can change. */ 5202 /* DEBUG_STATEMENT (debug = 1); */
5355 if (p2 == pend) 5203 if (mutually_exclusive_p (bufp, p1, p2))
5356 { 5204 {
5357 /* Consider what happens when matching ":\(.*\)" 5205 /* Use a fast `on_failure_keep_string_jump' loop. */
5358 against ":/". I don't really understand this code 5206 *p = (unsigned char) on_failure_jump_exclusive;
5359 yet. */ 5207 /* STORE_NUMBER (p2 - 2, mcnt + 3); */
5360 p[-3] = (unsigned char) pop_failure_jump;
5361 DEBUG_PRINT1
5362 (" End of pattern: change to `pop_failure_jump'.\n");
5363 } 5208 }
5364 5209 else
5365 else if ((re_opcode_t) *p2 == exactn
5366 || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
5367 { 5210 {
5368 register unsigned int c 5211 /* Default to a safe `on_failure_jump' loop. */
5369 = *p2 == (unsigned char) endline ? '\n' : p2[2]; 5212 DEBUG_PRINT1 (" smart default => slow loop.\n");
5370 5213 *p = (unsigned char) on_failure_jump_loop;
5371 if ((re_opcode_t) p1[3] == exactn)
5372 {
5373 if (!(multibyte /* && (c != '\n') */
5374 && BASE_LEADING_CODE_P (c))
5375 ? c != p1[5]
5376 : (STRING_CHAR (&p2[2], pend - &p2[2])
5377 != STRING_CHAR (&p1[5], pend - &p1[5])))
5378 {
5379 p[-3] = (unsigned char) pop_failure_jump;
5380 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
5381 c, p1[5]);
5382 }
5383 }
5384
5385 else if ((re_opcode_t) p1[3] == charset
5386 || (re_opcode_t) p1[3] == charset_not)
5387 {
5388 int not = (re_opcode_t) p1[3] == charset_not;
5389
5390 if (multibyte /* && (c != '\n') */
5391 && BASE_LEADING_CODE_P (c))
5392 c = STRING_CHAR (&p2[2], pend - &p2[2]);
5393
5394 /* Test if C is listed in charset (or charset_not)
5395 at `&p1[3]'. */
5396 if (SINGLE_BYTE_CHAR_P (c))
5397 {
5398 if (c < CHARSET_BITMAP_SIZE (&p1[3]) * BYTEWIDTH
5399 && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
5400 not = !not;
5401 }
5402 else if (CHARSET_RANGE_TABLE_EXISTS_P (&p1[3]))
5403 CHARSET_LOOKUP_RANGE_TABLE (not, c, &p1[3]);
5404
5405 /* `not' is equal to 1 if c would match, which means
5406 that we can't change to pop_failure_jump. */
5407 if (!not)
5408 {
5409 p[-3] = (unsigned char) pop_failure_jump;
5410 DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
5411 }
5412 }
5413 }
5414 else if ((re_opcode_t) *p2 == charset)
5415 {
5416 if ((re_opcode_t) p1[3] == exactn)
5417 {
5418 register unsigned int c = p1[5];
5419 int not = 0;
5420
5421 if (multibyte && BASE_LEADING_CODE_P (c))
5422 c = STRING_CHAR (&p1[5], pend - &p1[5]);
5423
5424 /* Test if C is listed in charset at `p2'. */
5425 if (SINGLE_BYTE_CHAR_P (c))
5426 {
5427 if (c < CHARSET_BITMAP_SIZE (p2) * BYTEWIDTH
5428 && (p2[2 + c / BYTEWIDTH]
5429 & (1 << (c % BYTEWIDTH))))
5430 not = !not;
5431 }
5432 else if (CHARSET_RANGE_TABLE_EXISTS_P (p2))
5433 CHARSET_LOOKUP_RANGE_TABLE (not, c, p2);
5434
5435 if (!not)
5436 {
5437 p[-3] = (unsigned char) pop_failure_jump;
5438 DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
5439 }
5440 }
5441
5442 /* It is hard to list up all the character in charset
5443 P2 if it includes multibyte character. Give up in
5444 such case. */
5445 else if (!multibyte || !CHARSET_RANGE_TABLE_EXISTS_P (p2))
5446 {
5447 /* Now, we are sure that P2 has no range table.
5448 So, for the size of bitmap in P2, `p2[1]' is
5449 enough. But P1 may have range table, so the
5450 size of bitmap table of P1 is extracted by
5451 using macro `CHARSET_BITMAP_SIZE'.
5452
5453 Since we know that all the character listed in
5454 P2 is ASCII, it is enough to test only bitmap
5455 table of P1. */
5456
5457 if ((re_opcode_t) p1[3] == charset_not)
5458 {
5459 int idx;
5460 /* We win if the charset_not inside the loop lists
5461 every character listed in the charset after. */
5462 for (idx = 0; idx < (int) p2[1]; idx++)
5463 if (! (p2[2 + idx] == 0
5464 || (idx < CHARSET_BITMAP_SIZE (&p1[3])
5465 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
5466 break;
5467
5468 if (idx == p2[1])
5469 {
5470 p[-3] = (unsigned char) pop_failure_jump;
5471 DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
5472 }
5473 }
5474 else if ((re_opcode_t) p1[3] == charset)
5475 {
5476 int idx;
5477 /* We win if the charset inside the loop
5478 has no overlap with the one after the loop. */
5479 for (idx = 0;
5480 (idx < (int) p2[1]
5481 && idx < CHARSET_BITMAP_SIZE (&p1[3]));
5482 idx++)
5483 if ((p2[2 + idx] & p1[5 + idx]) != 0)
5484 break;
5485
5486 if (idx == p2[1]
5487 || idx == CHARSET_BITMAP_SIZE (&p1[3]))
5488 {
5489 p[-3] = (unsigned char) pop_failure_jump;
5490 DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
5491 }
5492 }
5493 } 5214 }
5215 /* DEBUG_STATEMENT (debug = 0); */
5494 } 5216 }
5495 } 5217 break;
5496 p -= 2; /* Point at relative address again. */
5497 if ((re_opcode_t) p[-1] != pop_failure_jump)
5498 {
5499 p[-1] = (unsigned char) jump;
5500 DEBUG_PRINT1 (" Match => jump.\n");
5501 goto unconditional_jump;
5502 }
5503 /* Note fall through. */
5504
5505
5506 /* The end of a simple repeat has a pop_failure_jump back to
5507 its matching on_failure_jump, where the latter will push a
5508 failure point. The pop_failure_jump takes off failure
5509 points put on by this pop_failure_jump's matching
5510 on_failure_jump; we got through the pattern to here from the
5511 matching on_failure_jump, so didn't fail. */
5512 case pop_failure_jump:
5513 {
5514 /* We need to pass separate storage for the lowest and
5515 highest registers, even though we don't care about the
5516 actual values. Otherwise, we will restore only one
5517 register from the stack, since lowest will == highest in
5518 `pop_failure_point'. */
5519 unsigned dummy_low_reg, dummy_high_reg;
5520 unsigned char *pdummy;
5521 const char *sdummy;
5522
5523 DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
5524 POP_FAILURE_POINT (sdummy, pdummy,
5525 dummy_low_reg, dummy_high_reg,
5526 reg_dummy, reg_dummy, reg_info_dummy);
5527 }
5528 /* Note fall through. */
5529
5530 5218
5531 /* Unconditionally jump (without popping any failure points). */ 5219 /* Unconditionally jump (without popping any failure points). */
5532 case jump: 5220 case jump:
@@ -5537,42 +5225,10 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5537 EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */ 5225 EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */
5538 DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt); 5226 DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
5539 p += mcnt; /* Do the jump. */ 5227 p += mcnt; /* Do the jump. */
5540 DEBUG_PRINT2 ("(to 0x%x).\n", p); 5228 DEBUG_PRINT2 ("(to %p).\n", p);
5541 break; 5229 break;
5542 5230
5543 5231
5544 /* We need this opcode so we can detect where alternatives end
5545 in `group_match_null_string_p' et al. */
5546 case jump_past_alt:
5547 DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
5548 goto unconditional_jump;
5549
5550
5551 /* Normally, the on_failure_jump pushes a failure point, which
5552 then gets popped at pop_failure_jump. We will end up at
5553 pop_failure_jump, also, and with a pattern of, say, `a+', we
5554 are skipping over the on_failure_jump, so we have to push
5555 something meaningless for pop_failure_jump to pop. */
5556 case dummy_failure_jump:
5557 DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
5558 /* It doesn't matter what we push for the string here. What
5559 the code at `fail' tests is the value for the pattern. */
5560 PUSH_FAILURE_POINT (0, 0, -2);
5561 goto unconditional_jump;
5562
5563
5564 /* At the end of an alternative, we need to push a dummy failure
5565 point in case we are followed by a `pop_failure_jump', because
5566 we don't want the failure point for the alternative to be
5567 popped. For example, matching `(a|ab)*' against `aab'
5568 requires that we match the `ab' alternative. */
5569 case push_dummy_failure:
5570 DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
5571 /* See comments just above at `dummy_failure_jump' about the
5572 two zeroes. */
5573 PUSH_FAILURE_POINT (0, 0, -2);
5574 break;
5575
5576 /* Have to succeed matching what follows at least n times. 5232 /* Have to succeed matching what follows at least n times.
5577 After that, handle like `on_failure_jump'. */ 5233 After that, handle like `on_failure_jump'. */
5578 case succeed_n: 5234 case succeed_n:
@@ -5586,11 +5242,11 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5586 mcnt--; 5242 mcnt--;
5587 p += 2; 5243 p += 2;
5588 STORE_NUMBER_AND_INCR (p, mcnt); 5244 STORE_NUMBER_AND_INCR (p, mcnt);
5589 DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p, mcnt); 5245 DEBUG_PRINT3 (" Setting %p to %d.\n", p, mcnt);
5590 } 5246 }
5591 else if (mcnt == 0) 5247 else if (mcnt == 0)
5592 { 5248 {
5593 DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n", p+2); 5249 DEBUG_PRINT2 (" Setting two bytes from %p to no_op.\n", p+2);
5594 p[2] = (unsigned char) no_op; 5250 p[2] = (unsigned char) no_op;
5595 p[3] = (unsigned char) no_op; 5251 p[3] = (unsigned char) no_op;
5596 goto on_failure; 5252 goto on_failure;
@@ -5620,7 +5276,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5620 EXTRACT_NUMBER_AND_INCR (mcnt, p); 5276 EXTRACT_NUMBER_AND_INCR (mcnt, p);
5621 p1 = p + mcnt; 5277 p1 = p + mcnt;
5622 EXTRACT_NUMBER_AND_INCR (mcnt, p); 5278 EXTRACT_NUMBER_AND_INCR (mcnt, p);
5623 DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt); 5279 DEBUG_PRINT3 (" Setting %p to %d.\n", p1, mcnt);
5624 STORE_NUMBER (p1, mcnt); 5280 STORE_NUMBER (p1, mcnt);
5625 break; 5281 break;
5626 } 5282 }
@@ -5837,7 +5493,6 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5837 goto fail; 5493 goto fail;
5838 d += len; 5494 d += len;
5839 } 5495 }
5840 SET_REGS_MATCHED ();
5841 break; 5496 break;
5842 5497
5843 case notsyntaxspec: 5498 case notsyntaxspec:
@@ -5868,7 +5523,6 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5868 goto fail; 5523 goto fail;
5869 d += len; 5524 d += len;
5870 } 5525 }
5871 SET_REGS_MATCHED ();
5872 break; 5526 break;
5873 5527
5874 case categoryspec: 5528 case categoryspec:
@@ -5887,7 +5541,6 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5887 goto fail; 5541 goto fail;
5888 d += len; 5542 d += len;
5889 } 5543 }
5890 SET_REGS_MATCHED ();
5891 break; 5544 break;
5892 5545
5893 case notcategoryspec: 5546 case notcategoryspec:
@@ -5906,7 +5559,6 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5906 goto fail; 5559 goto fail;
5907 d += len; 5560 d += len;
5908 } 5561 }
5909 SET_REGS_MATCHED ();
5910 break; 5562 break;
5911 5563
5912#else /* not emacs */ 5564#else /* not emacs */
@@ -5915,7 +5567,6 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5915 PREFETCH (); 5567 PREFETCH ();
5916 if (!WORDCHAR_P (d)) 5568 if (!WORDCHAR_P (d))
5917 goto fail; 5569 goto fail;
5918 SET_REGS_MATCHED ();
5919 d++; 5570 d++;
5920 break; 5571 break;
5921 5572
@@ -5924,7 +5575,6 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5924 PREFETCH (); 5575 PREFETCH ();
5925 if (WORDCHAR_P (d)) 5576 if (WORDCHAR_P (d))
5926 goto fail; 5577 goto fail;
5927 SET_REGS_MATCHED ();
5928 d++; 5578 d++;
5929 break; 5579 break;
5930#endif /* not emacs */ 5580#endif /* not emacs */
@@ -5941,44 +5591,40 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5941 QUIT; 5591 QUIT;
5942#endif 5592#endif
5943 if (!FAIL_STACK_EMPTY ()) 5593 if (!FAIL_STACK_EMPTY ())
5944 { /* A restart point is known. Restore to that state. */ 5594 {
5595 char *str;
5596 unsigned char *pat;
5597 /* A restart point is known. Restore to that state. */
5945 DEBUG_PRINT1 ("\nFAIL:\n"); 5598 DEBUG_PRINT1 ("\nFAIL:\n");
5946 POP_FAILURE_POINT (d, p, 5599 POP_FAILURE_POINT (str, pat);
5947 lowest_active_reg, highest_active_reg, 5600 switch (SWITCH_ENUM_CAST ((re_opcode_t) *pat++))
5948 regstart, regend, reg_info); 5601 {
5602 case on_failure_keep_string_jump:
5603 assert (str == NULL);
5604 goto continue_failure_jump;
5605
5606 case on_failure_jump_exclusive:
5607 /* If something has matched, the alternative will not match,
5608 so we might as well keep popping right away. */
5609 if (0 /* d != str && d != string2 */) /* Don't bother. -sm */
5610 /* (d == string2 && str == end1) => (d =~ str) */
5611 goto fail;
5612 /* Fallthrough */
5613
5614 case on_failure_jump_loop:
5615 case on_failure_jump:
5616 case succeed_n:
5617 d = str;
5618 continue_failure_jump:
5619 EXTRACT_NUMBER_AND_INCR (mcnt, pat);
5620 p = pat + mcnt;
5621 break;
5949 5622
5950 /* If this failure point is a dummy, try the next one. */ 5623 default:
5951 if (!p) 5624 abort();
5952 goto fail; 5625 }
5953 5626
5954 /* If we failed to the end of the pattern, don't examine *p. */ 5627 assert (p >= bufp->buffer && p <= pend);
5955 assert (p <= pend);
5956 if (p < pend)
5957 {
5958 boolean is_a_jump_n = false;
5959
5960 /* If failed to a backwards jump that's part of a repetition
5961 loop, need to pop this failure point and use the next one. */
5962 switch ((re_opcode_t) *p)
5963 {
5964 case jump_n:
5965 is_a_jump_n = true;
5966 case maybe_pop_jump:
5967 case pop_failure_jump:
5968 case jump:
5969 p1 = p + 1;
5970 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5971 p1 += mcnt;
5972
5973 if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
5974 || (!is_a_jump_n
5975 && (re_opcode_t) *p1 == on_failure_jump))
5976 goto fail;
5977 break;
5978 default:
5979 /* do nothing */ ;
5980 }
5981 }
5982 5628
5983 if (d >= string1 && d <= end1) 5629 if (d >= string1 && d <= end1)
5984 dend = end_match_1; 5630 dend = end_match_1;
@@ -5997,248 +5643,6 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5997 5643
5998/* Subroutine definitions for re_match_2. */ 5644/* Subroutine definitions for re_match_2. */
5999 5645
6000
6001/* We are passed P pointing to a register number after a start_memory.
6002
6003 Return true if the pattern up to the corresponding stop_memory can
6004 match the empty string, and false otherwise.
6005
6006 If we find the matching stop_memory, sets P to point to one past its number.
6007 Otherwise, sets P to an undefined byte less than or equal to END.
6008
6009 We don't handle duplicates properly (yet). */
6010
6011static boolean
6012group_match_null_string_p (p, end, reg_info)
6013 unsigned char **p, *end;
6014 register_info_type *reg_info;
6015{
6016 int mcnt;
6017 /* Point to after the args to the start_memory. */
6018 unsigned char *p1 = *p + 2;
6019
6020 while (p1 < end)
6021 {
6022 /* Skip over opcodes that can match nothing, and return true or
6023 false, as appropriate, when we get to one that can't, or to the
6024 matching stop_memory. */
6025
6026 switch ((re_opcode_t) *p1)
6027 {
6028 /* Could be either a loop or a series of alternatives. */
6029 case on_failure_jump:
6030 p1++;
6031 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
6032
6033 /* If the next operation is not a jump backwards in the
6034 pattern. */
6035
6036 if (mcnt >= 0)
6037 {
6038 /* Go through the on_failure_jumps of the alternatives,
6039 seeing if any of the alternatives cannot match nothing.
6040 The last alternative starts with only a jump,
6041 whereas the rest start with on_failure_jump and end
6042 with a jump, e.g., here is the pattern for `a|b|c':
6043
6044 /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
6045 /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
6046 /exactn/1/c
6047
6048 So, we have to first go through the first (n-1)
6049 alternatives and then deal with the last one separately. */
6050
6051
6052 /* Deal with the first (n-1) alternatives, which start
6053 with an on_failure_jump (see above) that jumps to right
6054 past a jump_past_alt. */
6055
6056 while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
6057 {
6058 /* `mcnt' holds how many bytes long the alternative
6059 is, including the ending `jump_past_alt' and
6060 its number. */
6061
6062 if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
6063 reg_info))
6064 return false;
6065
6066 /* Move to right after this alternative, including the
6067 jump_past_alt. */
6068 p1 += mcnt;
6069
6070 /* Break if it's the beginning of an n-th alternative
6071 that doesn't begin with an on_failure_jump. */
6072 if ((re_opcode_t) *p1 != on_failure_jump)
6073 break;
6074
6075 /* Still have to check that it's not an n-th
6076 alternative that starts with an on_failure_jump. */
6077 p1++;
6078 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
6079 if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
6080 {
6081 /* Get to the beginning of the n-th alternative. */
6082 p1 -= 3;
6083 break;
6084 }
6085 }
6086
6087 /* Deal with the last alternative: go back and get number
6088 of the `jump_past_alt' just before it. `mcnt' contains
6089 the length of the alternative. */
6090 EXTRACT_NUMBER (mcnt, p1 - 2);
6091
6092 if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
6093 return false;
6094
6095 p1 += mcnt; /* Get past the n-th alternative. */
6096 } /* if mcnt > 0 */
6097 break;
6098
6099
6100 case stop_memory:
6101 assert (p1[1] == **p);
6102 *p = p1 + 2;
6103 return true;
6104
6105
6106 default:
6107 if (!common_op_match_null_string_p (&p1, end, reg_info))
6108 return false;
6109 }
6110 } /* while p1 < end */
6111
6112 return false;
6113} /* group_match_null_string_p */
6114
6115
6116/* Similar to group_match_null_string_p, but doesn't deal with alternatives:
6117 It expects P to be the first byte of a single alternative and END one
6118 byte past the last. The alternative can contain groups. */
6119
6120static boolean
6121alt_match_null_string_p (p, end, reg_info)
6122 unsigned char *p, *end;
6123 register_info_type *reg_info;
6124{
6125 int mcnt;
6126 unsigned char *p1 = p;
6127
6128 while (p1 < end)
6129 {
6130 /* Skip over opcodes that can match nothing, and break when we get
6131 to one that can't. */
6132
6133 switch ((re_opcode_t) *p1)
6134 {
6135 /* It's a loop. */
6136 case on_failure_jump:
6137 p1++;
6138 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
6139 p1 += mcnt;
6140 break;
6141
6142 default:
6143 if (!common_op_match_null_string_p (&p1, end, reg_info))
6144 return false;
6145 }
6146 } /* while p1 < end */
6147
6148 return true;
6149} /* alt_match_null_string_p */
6150
6151
6152/* Deals with the ops common to group_match_null_string_p and
6153 alt_match_null_string_p.
6154
6155 Sets P to one after the op and its arguments, if any. */
6156
6157static boolean
6158common_op_match_null_string_p (p, end, reg_info)
6159 unsigned char **p, *end;
6160 register_info_type *reg_info;
6161{
6162 int mcnt;
6163 boolean ret;
6164 int reg_no;
6165 unsigned char *p1 = *p;
6166
6167 switch ((re_opcode_t) *p1++)
6168 {
6169 case no_op:
6170 case begline:
6171 case endline:
6172 case begbuf:
6173 case endbuf:
6174 case wordbeg:
6175 case wordend:
6176 case wordbound:
6177 case notwordbound:
6178#ifdef emacs
6179 case before_dot:
6180 case at_dot:
6181 case after_dot:
6182#endif
6183 break;
6184
6185 case start_memory:
6186 reg_no = *p1;
6187 assert (reg_no > 0 && reg_no <= MAX_REGNUM);
6188 ret = group_match_null_string_p (&p1, end, reg_info);
6189
6190 /* Have to set this here in case we're checking a group which
6191 contains a group and a back reference to it. */
6192
6193 if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
6194 REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
6195
6196 if (!ret)
6197 return false;
6198 break;
6199
6200 /* If this is an optimized succeed_n for zero times, make the jump. */
6201 case jump:
6202 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
6203 if (mcnt >= 0)
6204 p1 += mcnt;
6205 else
6206 return false;
6207 break;
6208
6209 case succeed_n:
6210 /* Get to the number of times to succeed. */
6211 p1 += 2;
6212 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
6213
6214 if (mcnt == 0)
6215 {
6216 p1 -= 4;
6217 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
6218 p1 += mcnt;
6219 }
6220 else
6221 return false;
6222 break;
6223
6224 case duplicate:
6225 if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
6226 return false;
6227 break;
6228
6229 case set_number_at:
6230 p1 += 4;
6231
6232 default:
6233 /* All other opcodes mean we cannot match the empty string. */
6234 return false;
6235 }
6236
6237 *p = p1;
6238 return true;
6239} /* common_op_match_null_string_p */
6240
6241
6242/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN 5646/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
6243 bytes; nonzero otherwise. */ 5647 bytes; nonzero otherwise. */
6244 5648