diff options
| author | Karl Berry | 1992-11-21 01:51:33 +0000 |
|---|---|---|
| committer | Karl Berry | 1992-11-21 01:51:33 +0000 |
| commit | 9114e2792fe477299d6d7f8856c616fe4ce31d21 (patch) | |
| tree | 0289e6410211f571f96229fc8321d9e83b1abef1 /src | |
| parent | 9549c46d0b7b76c104c152e78feb33029c0e562a (diff) | |
| download | emacs-9114e2792fe477299d6d7f8856c616fe4ce31d21.tar.gz emacs-9114e2792fe477299d6d7f8856c616fe4ce31d21.zip | |
*** empty log message ***
Diffstat (limited to 'src')
| -rw-r--r-- | src/regex.c | 166 | ||||
| -rw-r--r-- | src/regex.h | 28 |
2 files changed, 115 insertions, 79 deletions
diff --git a/src/regex.c b/src/regex.c index 71aa4cc87e1..112e0d6d7ad 100644 --- a/src/regex.c +++ b/src/regex.c | |||
| @@ -47,9 +47,15 @@ | |||
| 47 | `BSTRING', as far as I know, and neither of them use this code. */ | 47 | `BSTRING', as far as I know, and neither of them use this code. */ |
| 48 | #if USG || STDC_HEADERS | 48 | #if USG || STDC_HEADERS |
| 49 | #include <string.h> | 49 | #include <string.h> |
| 50 | #ifndef bcmp | ||
| 50 | #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n)) | 51 | #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n)) |
| 52 | #endif | ||
| 53 | #ifndef bcopy | ||
| 51 | #define bcopy(s, d, n) memcpy ((d), (s), (n)) | 54 | #define bcopy(s, d, n) memcpy ((d), (s), (n)) |
| 55 | #endif | ||
| 56 | #ifndef bzero | ||
| 52 | #define bzero(s, n) memset ((s), 0, (n)) | 57 | #define bzero(s, n) memset ((s), 0, (n)) |
| 58 | #endif | ||
| 53 | #else | 59 | #else |
| 54 | #include <strings.h> | 60 | #include <strings.h> |
| 55 | #endif | 61 | #endif |
| @@ -135,12 +141,8 @@ init_syntax_once () | |||
| 135 | (Per Bothner suggested the basic approach.) */ | 141 | (Per Bothner suggested the basic approach.) */ |
| 136 | #undef SIGN_EXTEND_CHAR | 142 | #undef SIGN_EXTEND_CHAR |
| 137 | #if __STDC__ | 143 | #if __STDC__ |
| 138 | #ifndef VMS | ||
| 139 | #define SIGN_EXTEND_CHAR(c) ((signed char) (c)) | 144 | #define SIGN_EXTEND_CHAR(c) ((signed char) (c)) |
| 140 | #else /* On VMS, VAXC doesn't recognize `signed' before `char' */ | 145 | #else /* not __STDC__ */ |
| 141 | #define SIGN_EXTEND_CHAR(c) ((char) (c)) | ||
| 142 | #endif /* VMS */ | ||
| 143 | #else | ||
| 144 | /* As in Harbison and Steele. */ | 146 | /* As in Harbison and Steele. */ |
| 145 | #define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128) | 147 | #define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128) |
| 146 | #endif | 148 | #endif |
| @@ -447,6 +449,7 @@ static int debug = 0; | |||
| 447 | #define DEBUG_PRINT1(x) if (debug) printf (x) | 449 | #define DEBUG_PRINT1(x) if (debug) printf (x) |
| 448 | #define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2) | 450 | #define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2) |
| 449 | #define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3) | 451 | #define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3) |
| 452 | #define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4) | ||
| 450 | #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \ | 453 | #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \ |
| 451 | if (debug) print_partial_compiled_pattern (s, e) | 454 | if (debug) print_partial_compiled_pattern (s, e) |
| 452 | #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \ | 455 | #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \ |
| @@ -760,6 +763,7 @@ print_double_string (where, string1, size1, string2, size2) | |||
| 760 | #define DEBUG_PRINT1(x) | 763 | #define DEBUG_PRINT1(x) |
| 761 | #define DEBUG_PRINT2(x1, x2) | 764 | #define DEBUG_PRINT2(x1, x2) |
| 762 | #define DEBUG_PRINT3(x1, x2, x3) | 765 | #define DEBUG_PRINT3(x1, x2, x3) |
| 766 | #define DEBUG_PRINT4(x1, x2, x3, x4) | ||
| 763 | #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) | 767 | #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) |
| 764 | #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) | 768 | #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) |
| 765 | 769 | ||
| @@ -1025,9 +1029,9 @@ typedef struct | |||
| 1025 | `buffer' is the compiled pattern; | 1029 | `buffer' is the compiled pattern; |
| 1026 | `syntax' is set to SYNTAX; | 1030 | `syntax' is set to SYNTAX; |
| 1027 | `used' is set to the length of the compiled pattern; | 1031 | `used' is set to the length of the compiled pattern; |
| 1028 | `fastmap_accurate' is set to zero; | 1032 | `fastmap_accurate' is zero; |
| 1029 | `re_nsub' is set to the number of groups in PATTERN; | 1033 | `re_nsub' is the number of subexpressions in PATTERN; |
| 1030 | `not_bol' and `not_eol' are set to zero. | 1034 | `not_bol' and `not_eol' are zero; |
| 1031 | 1035 | ||
| 1032 | The `fastmap' and `newline_anchor' fields are neither | 1036 | The `fastmap' and `newline_anchor' fields are neither |
| 1033 | examined nor set. */ | 1037 | examined nor set. */ |
| @@ -1676,10 +1680,10 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 1676 | | v | v | 1680 | | v | v |
| 1677 | a | b | c | 1681 | a | b | c |
| 1678 | 1682 | ||
| 1679 | If we are at `b,' then fixup_alt_jump right now points to a | 1683 | If we are at `b', then fixup_alt_jump right now points to a |
| 1680 | three-byte space after `a.' We'll put in the jump, set | 1684 | three-byte space after `a'. We'll put in the jump, set |
| 1681 | fixup_alt_jump to right after `b,' and leave behind three | 1685 | fixup_alt_jump to right after `b', and leave behind three |
| 1682 | bytes which we'll fill in when we get to after `c.' */ | 1686 | bytes which we'll fill in when we get to after `c'. */ |
| 1683 | 1687 | ||
| 1684 | if (fixup_alt_jump) | 1688 | if (fixup_alt_jump) |
| 1685 | STORE_JUMP (jump_past_alt, fixup_alt_jump, b); | 1689 | STORE_JUMP (jump_past_alt, fixup_alt_jump, b); |
| @@ -2320,6 +2324,7 @@ typedef struct | |||
| 2320 | int this_reg; \ | 2324 | int this_reg; \ |
| 2321 | \ | 2325 | \ |
| 2322 | DEBUG_STATEMENT (failure_id++); \ | 2326 | DEBUG_STATEMENT (failure_id++); \ |
| 2327 | DEBUG_STATEMENT (nfailure_points_pushed++); \ | ||
| 2323 | DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \ | 2328 | DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \ |
| 2324 | DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\ | 2329 | DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\ |
| 2325 | DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\ | 2330 | DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\ |
| @@ -2473,6 +2478,8 @@ typedef struct | |||
| 2473 | regstart[this_reg] = (const char *) POP_FAILURE_ITEM (); \ | 2478 | regstart[this_reg] = (const char *) POP_FAILURE_ITEM (); \ |
| 2474 | DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \ | 2479 | DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \ |
| 2475 | } \ | 2480 | } \ |
| 2481 | \ | ||
| 2482 | DEBUG_STATEMENT (nfailure_points_popped++); \ | ||
| 2476 | } /* POP_FAILURE_POINT */ | 2483 | } /* POP_FAILURE_POINT */ |
| 2477 | 2484 | ||
| 2478 | /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in | 2485 | /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in |
| @@ -2860,15 +2867,9 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) | |||
| 2860 | else if (endpos > total_size) | 2867 | else if (endpos > total_size) |
| 2861 | range = total_size - startpos; | 2868 | range = total_size - startpos; |
| 2862 | 2869 | ||
| 2863 | /* Update the fastmap now if not correct already. */ | ||
| 2864 | if (fastmap && !bufp->fastmap_accurate) | ||
| 2865 | if (re_compile_fastmap (bufp) == -2) | ||
| 2866 | return -2; | ||
| 2867 | |||
| 2868 | /* If the search isn't to be a backwards one, don't waste time in a | 2870 | /* If the search isn't to be a backwards one, don't waste time in a |
| 2869 | long search for a pattern that says it is anchored. */ | 2871 | search for a pattern that must be anchored. */ |
| 2870 | if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf | 2872 | if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0) |
| 2871 | && range > 0) | ||
| 2872 | { | 2873 | { |
| 2873 | if (startpos > 0) | 2874 | if (startpos > 0) |
| 2874 | return -1; | 2875 | return -1; |
| @@ -2876,6 +2877,12 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) | |||
| 2876 | range = 1; | 2877 | range = 1; |
| 2877 | } | 2878 | } |
| 2878 | 2879 | ||
| 2880 | /* Update the fastmap now if not correct already. */ | ||
| 2881 | if (fastmap && !bufp->fastmap_accurate) | ||
| 2882 | if (re_compile_fastmap (bufp) == -2) | ||
| 2883 | return -2; | ||
| 2884 | |||
| 2885 | /* Loop through the string, looking for a place to start matching. */ | ||
| 2879 | for (;;) | 2886 | for (;;) |
| 2880 | { | 2887 | { |
| 2881 | /* If a fastmap is supplied, skip quickly over characters that | 2888 | /* If a fastmap is supplied, skip quickly over characters that |
| @@ -2913,7 +2920,7 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) | |||
| 2913 | ? string2[startpos - size1] | 2920 | ? string2[startpos - size1] |
| 2914 | : string1[startpos]); | 2921 | : string1[startpos]); |
| 2915 | 2922 | ||
| 2916 | if (!fastmap[TRANSLATE (c)]) | 2923 | if (!fastmap[(unsigned char) TRANSLATE (c)]) |
| 2917 | goto advance; | 2924 | goto advance; |
| 2918 | } | 2925 | } |
| 2919 | } | 2926 | } |
| @@ -2987,12 +2994,9 @@ typedef union | |||
| 2987 | #define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something) | 2994 | #define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something) |
| 2988 | 2995 | ||
| 2989 | 2996 | ||
| 2990 | /* Call this when have matched something; it sets `matched' flags for the | 2997 | /* Call this when have matched a real character; it sets `matched' flags |
| 2991 | registers corresponding to the group of which we currently are inside. | 2998 | for the subexpressions which we are currently inside. Also records |
| 2992 | Also records whether this group ever matched something. We only care | 2999 | that those subexprs have matched. */ |
| 2993 | about this information at `stop_memory', and then only about the | ||
| 2994 | previous time through the loop (if the group is starred or whatever). | ||
| 2995 | So it is ok to clear all the nonactive registers here. */ | ||
| 2996 | #define SET_REGS_MATCHED() \ | 3000 | #define SET_REGS_MATCHED() \ |
| 2997 | do \ | 3001 | do \ |
| 2998 | { \ | 3002 | { \ |
| @@ -3037,24 +3041,24 @@ typedef union | |||
| 3037 | 3041 | ||
| 3038 | /* Test if at very beginning or at very end of the virtual concatenation | 3042 | /* Test if at very beginning or at very end of the virtual concatenation |
| 3039 | of `string1' and `string2'. If only one string, it's `string2'. */ | 3043 | of `string1' and `string2'. If only one string, it's `string2'. */ |
| 3040 | #define AT_STRINGS_BEG() (d == (size1 ? string1 : string2) || !size2) | 3044 | #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) |
| 3041 | #define AT_STRINGS_END() (d == end2) | 3045 | #define AT_STRINGS_END(d) ((d) == end2) |
| 3042 | 3046 | ||
| 3043 | 3047 | ||
| 3044 | /* Test if D points to a character which is word-constituent. We have | 3048 | /* Test if D points to a character which is word-constituent. We have |
| 3045 | two special cases to check for: if past the end of string1, look at | 3049 | two special cases to check for: if past the end of string1, look at |
| 3046 | the first character in string2; and if before the beginning of | 3050 | the first character in string2; and if before the beginning of |
| 3047 | string2, look at the last character in string1. | 3051 | string2, look at the last character in string1. */ |
| 3048 | 3052 | #define WORDCHAR_P(d) \ | |
| 3049 | Assumes `string1' exists, so use in conjunction with AT_STRINGS_BEG (). */ | ||
| 3050 | #define LETTER_P(d) \ | ||
| 3051 | (SYNTAX ((d) == end1 ? *string2 \ | 3053 | (SYNTAX ((d) == end1 ? *string2 \ |
| 3052 | : (d) == string2 - 1 ? *(end1 - 1) : *(d)) == Sword) | 3054 | : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \ |
| 3055 | == Sword) | ||
| 3053 | 3056 | ||
| 3054 | /* Test if the character before D and the one at D differ with respect | 3057 | /* Test if the character before D and the one at D differ with respect |
| 3055 | to being word-constituent. */ | 3058 | to being word-constituent. */ |
| 3056 | #define AT_WORD_BOUNDARY(d) \ | 3059 | #define AT_WORD_BOUNDARY(d) \ |
| 3057 | (AT_STRINGS_BEG () || AT_STRINGS_END () || LETTER_P (d - 1) != LETTER_P (d)) | 3060 | (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \ |
| 3061 | || WORDCHAR_P (d - 1) != WORDCHAR_P (d)) | ||
| 3058 | 3062 | ||
| 3059 | 3063 | ||
| 3060 | /* Free everything we malloc. */ | 3064 | /* Free everything we malloc. */ |
| @@ -3161,6 +3165,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 3161 | fail_stack_type fail_stack; | 3165 | fail_stack_type fail_stack; |
| 3162 | #ifdef DEBUG | 3166 | #ifdef DEBUG |
| 3163 | static unsigned failure_id = 0; | 3167 | static unsigned failure_id = 0; |
| 3168 | unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0; | ||
| 3164 | #endif | 3169 | #endif |
| 3165 | 3170 | ||
| 3166 | /* We fill all the registers internally, independent of what we | 3171 | /* We fill all the registers internally, independent of what we |
| @@ -3254,8 +3259,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 3254 | else | 3259 | else |
| 3255 | { | 3260 | { |
| 3256 | /* We must initialize all our variables to NULL, so that | 3261 | /* We must initialize all our variables to NULL, so that |
| 3257 | `FREE_VARIABLES' doesn't try to free them. Too bad this isn't | 3262 | `FREE_VARIABLES' doesn't try to free them. */ |
| 3258 | Lisp, so we could have a list of variables. As it is, */ | ||
| 3259 | regstart = regend = old_regstart = old_regend = best_regstart | 3263 | regstart = regend = old_regstart = old_regend = best_regstart |
| 3260 | = best_regend = reg_dummy = NULL; | 3264 | = best_regend = reg_dummy = NULL; |
| 3261 | reg_info = reg_info_dummy = (register_info_type *) NULL; | 3265 | reg_info = reg_info_dummy = (register_info_type *) NULL; |
| @@ -3339,8 +3343,10 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 3339 | 3343 | ||
| 3340 | if (p == pend) | 3344 | if (p == pend) |
| 3341 | { /* End of pattern means we might have succeeded. */ | 3345 | { /* End of pattern means we might have succeeded. */ |
| 3342 | DEBUG_PRINT1 ("End of pattern: "); | 3346 | DEBUG_PRINT1 ("end of pattern ... "); |
| 3343 | /* If not end of string, try backtracking. Otherwise done. */ | 3347 | |
| 3348 | /* If we haven't matched the entire string, and we want the | ||
| 3349 | longest match, try backtracking. */ | ||
| 3344 | if (d != end_match_2) | 3350 | if (d != end_match_2) |
| 3345 | { | 3351 | { |
| 3346 | DEBUG_PRINT1 ("backtracking.\n"); | 3352 | DEBUG_PRINT1 ("backtracking.\n"); |
| @@ -3378,6 +3384,8 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 3378 | For example, the pattern `x.*y.*z' against the | 3384 | For example, the pattern `x.*y.*z' against the |
| 3379 | strings `x-' and `y-z-', if the two strings are | 3385 | strings `x-' and `y-z-', if the two strings are |
| 3380 | not consecutive in memory. */ | 3386 | not consecutive in memory. */ |
| 3387 | DEBUG_PRINT1 ("Restoring best registers.\n"); | ||
| 3388 | |||
| 3381 | d = match_end; | 3389 | d = match_end; |
| 3382 | dend = ((d >= string1 && d <= end1) | 3390 | dend = ((d >= string1 && d <= end1) |
| 3383 | ? end_match_1 : end_match_2); | 3391 | ? end_match_1 : end_match_2); |
| @@ -3390,7 +3398,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 3390 | } | 3398 | } |
| 3391 | } /* d != end_match_2 */ | 3399 | } /* d != end_match_2 */ |
| 3392 | 3400 | ||
| 3393 | DEBUG_PRINT1 ("\nAccepting match.\n"); | 3401 | DEBUG_PRINT1 ("Accepting match.\n"); |
| 3394 | 3402 | ||
| 3395 | /* If caller wants register contents data back, do it. */ | 3403 | /* If caller wants register contents data back, do it. */ |
| 3396 | if (regs && !bufp->no_sub) | 3404 | if (regs && !bufp->no_sub) |
| @@ -3456,7 +3464,10 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 3456 | } /* regs && !bufp->no_sub */ | 3464 | } /* regs && !bufp->no_sub */ |
| 3457 | 3465 | ||
| 3458 | FREE_VARIABLES (); | 3466 | FREE_VARIABLES (); |
| 3459 | DEBUG_PRINT2 ("%d registers pushed.\n", num_regs_pushed); | 3467 | DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", |
| 3468 | nfailure_points_pushed, nfailure_points_popped, | ||
| 3469 | nfailure_points_pushed - nfailure_points_popped); | ||
| 3470 | DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed); | ||
| 3460 | 3471 | ||
| 3461 | mcnt = d - pos - (MATCHING_IN_FIRST_STRING | 3472 | mcnt = d - pos - (MATCHING_IN_FIRST_STRING |
| 3462 | ? string1 | 3473 | ? string1 |
| @@ -3658,7 +3669,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 3658 | 3669 | ||
| 3659 | /* If just failed to match something this time around with a | 3670 | /* If just failed to match something this time around with a |
| 3660 | group that's operated on by a repetition operator, try to | 3671 | group that's operated on by a repetition operator, try to |
| 3661 | force exit from the ``loop,'' and restore the register | 3672 | force exit from the ``loop'', and restore the register |
| 3662 | information for this group that we had before trying this | 3673 | information for this group that we had before trying this |
| 3663 | last match. */ | 3674 | last match. */ |
| 3664 | if ((!MATCHED_SOMETHING (reg_info[*p]) | 3675 | if ((!MATCHED_SOMETHING (reg_info[*p]) |
| @@ -3802,7 +3813,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 3802 | case begline: | 3813 | case begline: |
| 3803 | DEBUG_PRINT1 ("EXECUTING begline.\n"); | 3814 | DEBUG_PRINT1 ("EXECUTING begline.\n"); |
| 3804 | 3815 | ||
| 3805 | if (AT_STRINGS_BEG ()) | 3816 | if (AT_STRINGS_BEG (d)) |
| 3806 | { | 3817 | { |
| 3807 | if (!bufp->not_bol) break; | 3818 | if (!bufp->not_bol) break; |
| 3808 | } | 3819 | } |
| @@ -3818,7 +3829,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 3818 | case endline: | 3829 | case endline: |
| 3819 | DEBUG_PRINT1 ("EXECUTING endline.\n"); | 3830 | DEBUG_PRINT1 ("EXECUTING endline.\n"); |
| 3820 | 3831 | ||
| 3821 | if (AT_STRINGS_END ()) | 3832 | if (AT_STRINGS_END (d)) |
| 3822 | { | 3833 | { |
| 3823 | if (!bufp->not_eol) break; | 3834 | if (!bufp->not_eol) break; |
| 3824 | } | 3835 | } |
| @@ -3835,7 +3846,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 3835 | /* Match at the very beginning of the data. */ | 3846 | /* Match at the very beginning of the data. */ |
| 3836 | case begbuf: | 3847 | case begbuf: |
| 3837 | DEBUG_PRINT1 ("EXECUTING begbuf.\n"); | 3848 | DEBUG_PRINT1 ("EXECUTING begbuf.\n"); |
| 3838 | if (AT_STRINGS_BEG ()) | 3849 | if (AT_STRINGS_BEG (d)) |
| 3839 | break; | 3850 | break; |
| 3840 | goto fail; | 3851 | goto fail; |
| 3841 | 3852 | ||
| @@ -3843,7 +3854,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 3843 | /* Match at the very end of the data. */ | 3854 | /* Match at the very end of the data. */ |
| 3844 | case endbuf: | 3855 | case endbuf: |
| 3845 | DEBUG_PRINT1 ("EXECUTING endbuf.\n"); | 3856 | DEBUG_PRINT1 ("EXECUTING endbuf.\n"); |
| 3846 | if (AT_STRINGS_END ()) | 3857 | if (AT_STRINGS_END (d)) |
| 3847 | break; | 3858 | break; |
| 3848 | goto fail; | 3859 | goto fail; |
| 3849 | 3860 | ||
| @@ -3897,7 +3908,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 3897 | the original * applied to a group), save the information | 3908 | the original * applied to a group), save the information |
| 3898 | for that group and all inner ones, so that if we fail back | 3909 | for that group and all inner ones, so that if we fail back |
| 3899 | to this point, the group's information will be correct. | 3910 | to this point, the group's information will be correct. |
| 3900 | For example, in \(a*\)*\1, we only need the preceding group, | 3911 | For example, in \(a*\)*\1, we need the preceding group, |
| 3901 | and in \(\(a*\)b*\)\2, we need the inner group. */ | 3912 | and in \(\(a*\)b*\)\2, we need the inner group. */ |
| 3902 | 3913 | ||
| 3903 | /* We can't use `p' to check ahead because we push | 3914 | /* We can't use `p' to check ahead because we push |
| @@ -3927,8 +3938,8 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 3927 | break; | 3938 | break; |
| 3928 | 3939 | ||
| 3929 | 3940 | ||
| 3930 | /* A smart repeat ends with a maybe_pop_jump. | 3941 | /* A smart repeat ends with `maybe_pop_jump'. |
| 3931 | We change it either to a pop_failure_jump or a jump. */ | 3942 | We change it to either `pop_failure_jump' or `jump'. */ |
| 3932 | case maybe_pop_jump: | 3943 | case maybe_pop_jump: |
| 3933 | EXTRACT_NUMBER_AND_INCR (mcnt, p); | 3944 | EXTRACT_NUMBER_AND_INCR (mcnt, p); |
| 3934 | DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt); | 3945 | DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt); |
| @@ -3956,10 +3967,21 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 3956 | 3967 | ||
| 3957 | /* If we're at the end of the pattern, we can change. */ | 3968 | /* If we're at the end of the pattern, we can change. */ |
| 3958 | if (p2 == pend) | 3969 | if (p2 == pend) |
| 3959 | { | 3970 | { /* But if we're also at the end of the string, we might |
| 3971 | as well skip changing anything. For example, in `a+' | ||
| 3972 | against `a', we'll have already matched the `a', and | ||
| 3973 | I don't see the the point of changing the opcode, | ||
| 3974 | popping the failure point, finding out it fails, and | ||
| 3975 | then going into our endgame. */ | ||
| 3976 | if (d == dend) | ||
| 3977 | { | ||
| 3978 | p = pend; | ||
| 3979 | DEBUG_PRINT1 (" End of pattern & string => done.\n"); | ||
| 3980 | continue; | ||
| 3981 | } | ||
| 3982 | |||
| 3960 | p[-3] = (unsigned char) pop_failure_jump; | 3983 | p[-3] = (unsigned char) pop_failure_jump; |
| 3961 | DEBUG_PRINT1 | 3984 | DEBUG_PRINT1 (" End of pattern => pop_failure_jump.\n"); |
| 3962 | (" End of pattern: change to `pop_failure_jump'.\n"); | ||
| 3963 | } | 3985 | } |
| 3964 | 3986 | ||
| 3965 | else if ((re_opcode_t) *p2 == exactn | 3987 | else if ((re_opcode_t) *p2 == exactn |
| @@ -3973,7 +3995,12 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 3973 | to the `maybe_finalize_jump' of this case. Examine what | 3995 | to the `maybe_finalize_jump' of this case. Examine what |
| 3974 | follows. */ | 3996 | follows. */ |
| 3975 | if ((re_opcode_t) p1[3] == exactn && p1[5] != c) | 3997 | if ((re_opcode_t) p1[3] == exactn && p1[5] != c) |
| 3976 | p[-3] = (unsigned char) pop_failure_jump; | 3998 | { |
| 3999 | p[-3] = (unsigned char) pop_failure_jump; | ||
| 4000 | DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", | ||
| 4001 | c, p1[5]); | ||
| 4002 | } | ||
| 4003 | |||
| 3977 | else if ((re_opcode_t) p1[3] == charset | 4004 | else if ((re_opcode_t) p1[3] == charset |
| 3978 | || (re_opcode_t) p1[3] == charset_not) | 4005 | || (re_opcode_t) p1[3] == charset_not) |
| 3979 | { | 4006 | { |
| @@ -3988,9 +4015,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 3988 | if (!not) | 4015 | if (!not) |
| 3989 | { | 4016 | { |
| 3990 | p[-3] = (unsigned char) pop_failure_jump; | 4017 | p[-3] = (unsigned char) pop_failure_jump; |
| 3991 | DEBUG_PRINT1 | 4018 | DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); |
| 3992 | (" No match: change to `pop_failure_jump'.\n"); | ||
| 3993 | |||
| 3994 | } | 4019 | } |
| 3995 | } | 4020 | } |
| 3996 | } | 4021 | } |
| @@ -3999,6 +4024,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 3999 | if ((re_opcode_t) p[-1] != pop_failure_jump) | 4024 | if ((re_opcode_t) p[-1] != pop_failure_jump) |
| 4000 | { | 4025 | { |
| 4001 | p[-1] = (unsigned char) jump; | 4026 | p[-1] = (unsigned char) jump; |
| 4027 | DEBUG_PRINT1 (" Match => jump.\n"); | ||
| 4002 | goto unconditional_jump; | 4028 | goto unconditional_jump; |
| 4003 | } | 4029 | } |
| 4004 | /* Note fall through. */ | 4030 | /* Note fall through. */ |
| @@ -4060,7 +4086,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4060 | 4086 | ||
| 4061 | 4087 | ||
| 4062 | /* At the end of an alternative, we need to push a dummy failure | 4088 | /* At the end of an alternative, we need to push a dummy failure |
| 4063 | point in case we are followed by a pop_failure_jump', because | 4089 | point in case we are followed by a `pop_failure_jump', because |
| 4064 | we don't want the failure point for the alternative to be | 4090 | we don't want the failure point for the alternative to be |
| 4065 | popped. For example, matching `(a|ab)*' against `aab' | 4091 | popped. For example, matching `(a|ab)*' against `aab' |
| 4066 | requires that we match the `ab' alternative. */ | 4092 | requires that we match the `ab' alternative. */ |
| @@ -4137,14 +4163,14 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4137 | 4163 | ||
| 4138 | case wordbeg: | 4164 | case wordbeg: |
| 4139 | DEBUG_PRINT1 ("EXECUTING wordbeg.\n"); | 4165 | DEBUG_PRINT1 ("EXECUTING wordbeg.\n"); |
| 4140 | if (LETTER_P (d) && (AT_STRINGS_BEG () || !LETTER_P (d - 1))) | 4166 | if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1))) |
| 4141 | break; | 4167 | break; |
| 4142 | goto fail; | 4168 | goto fail; |
| 4143 | 4169 | ||
| 4144 | case wordend: | 4170 | case wordend: |
| 4145 | DEBUG_PRINT1 ("EXECUTING wordend.\n"); | 4171 | DEBUG_PRINT1 ("EXECUTING wordend.\n"); |
| 4146 | if (!AT_STRINGS_BEG () && LETTER_P (d - 1) | 4172 | if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1) |
| 4147 | && (!LETTER_P (d) || AT_STRINGS_END ())) | 4173 | && (!WORDCHAR_P (d) || AT_STRINGS_END (d))) |
| 4148 | break; | 4174 | break; |
| 4149 | goto fail; | 4175 | goto fail; |
| 4150 | 4176 | ||
| @@ -4181,11 +4207,12 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4181 | goto matchsyntax; | 4207 | goto matchsyntax; |
| 4182 | 4208 | ||
| 4183 | case wordchar: | 4209 | case wordchar: |
| 4184 | DEBUG_PRINT1 ("EXECUTING wordchar.\n"); | 4210 | DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n"); |
| 4185 | mcnt = (int) Sword; | 4211 | mcnt = (int) Sword; |
| 4186 | matchsyntax: | 4212 | matchsyntax: |
| 4187 | PREFETCH (); | 4213 | PREFETCH (); |
| 4188 | if (SYNTAX (*d++) != (enum syntaxcode) mcnt) goto fail; | 4214 | if (SYNTAX (*d++) != (enum syntaxcode) mcnt) |
| 4215 | goto fail; | ||
| 4189 | SET_REGS_MATCHED (); | 4216 | SET_REGS_MATCHED (); |
| 4190 | break; | 4217 | break; |
| 4191 | 4218 | ||
| @@ -4195,11 +4222,12 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4195 | goto matchnotsyntax; | 4222 | goto matchnotsyntax; |
| 4196 | 4223 | ||
| 4197 | case notwordchar: | 4224 | case notwordchar: |
| 4198 | DEBUG_PRINT1 ("EXECUTING notwordchar.\n"); | 4225 | DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n"); |
| 4199 | mcnt = (int) Sword; | 4226 | mcnt = (int) Sword; |
| 4200 | matchnotsyntax: /* We goto here from notsyntaxspec. */ | 4227 | matchnotsyntax: |
| 4201 | PREFETCH (); | 4228 | PREFETCH (); |
| 4202 | if (SYNTAX (*d++) == (enum syntaxcode) mcnt) goto fail; | 4229 | if (SYNTAX (*d++) == (enum syntaxcode) mcnt) |
| 4230 | goto fail; | ||
| 4203 | SET_REGS_MATCHED (); | 4231 | SET_REGS_MATCHED (); |
| 4204 | break; | 4232 | break; |
| 4205 | 4233 | ||
| @@ -4207,17 +4235,19 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4207 | case wordchar: | 4235 | case wordchar: |
| 4208 | DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n"); | 4236 | DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n"); |
| 4209 | PREFETCH (); | 4237 | PREFETCH (); |
| 4210 | if (!LETTER_P (d)) | 4238 | if (!WORDCHAR_P (d)) |
| 4211 | goto fail; | 4239 | goto fail; |
| 4212 | SET_REGS_MATCHED (); | 4240 | SET_REGS_MATCHED (); |
| 4241 | d++; | ||
| 4213 | break; | 4242 | break; |
| 4214 | 4243 | ||
| 4215 | case notwordchar: | 4244 | case notwordchar: |
| 4216 | DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n"); | 4245 | DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n"); |
| 4217 | PREFETCH (); | 4246 | PREFETCH (); |
| 4218 | if (LETTER_P (d)) | 4247 | if (WORDCHAR_P (d)) |
| 4219 | goto fail; | 4248 | goto fail; |
| 4220 | SET_REGS_MATCHED (); | 4249 | SET_REGS_MATCHED (); |
| 4250 | d++; | ||
| 4221 | break; | 4251 | break; |
| 4222 | #endif /* not emacs */ | 4252 | #endif /* not emacs */ |
| 4223 | 4253 | ||
| @@ -4812,7 +4842,7 @@ regexec (preg, string, nmatch, pmatch, eflags) | |||
| 4812 | 4842 | ||
| 4813 | 4843 | ||
| 4814 | /* Returns a message corresponding to an error code, ERRCODE, returned | 4844 | /* Returns a message corresponding to an error code, ERRCODE, returned |
| 4815 | from either regcomp or regexec. */ | 4845 | from either regcomp or regexec. We don't use PREG here. */ |
| 4816 | 4846 | ||
| 4817 | size_t | 4847 | size_t |
| 4818 | regerror (errcode, preg, errbuf, errbuf_size) | 4848 | regerror (errcode, preg, errbuf, errbuf_size) |
diff --git a/src/regex.h b/src/regex.h index ef8e9a36697..e38853eaf65 100644 --- a/src/regex.h +++ b/src/regex.h | |||
| @@ -20,12 +20,15 @@ | |||
| 20 | #ifndef __REGEXP_LIBRARY_H__ | 20 | #ifndef __REGEXP_LIBRARY_H__ |
| 21 | #define __REGEXP_LIBRARY_H__ | 21 | #define __REGEXP_LIBRARY_H__ |
| 22 | 22 | ||
| 23 | /* POSIX says that <sys/types.h> must be included (by the caller) before | ||
| 24 | <regex.h>. */ | ||
| 25 | |||
| 23 | #ifdef VMS | 26 | #ifdef VMS |
| 24 | /* POSIX says that size_t should be in stddef.h. */ | 27 | /* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it |
| 28 | should be there. */ | ||
| 25 | #include <stddef.h> | 29 | #include <stddef.h> |
| 26 | #endif | 30 | #endif |
| 27 | 31 | ||
| 28 | /* POSIX says that <sys/types.h> must be included before <regex.h>. */ | ||
| 29 | 32 | ||
| 30 | /* The following bits are used to determine the regexp syntax we | 33 | /* The following bits are used to determine the regexp syntax we |
| 31 | recognize. The set/not-set meanings are chosen so that Emacs syntax | 34 | recognize. The set/not-set meanings are chosen so that Emacs syntax |
| @@ -162,6 +165,9 @@ extern reg_syntax_t re_syntax_options; | |||
| 162 | #define RE_SYNTAX_POSIX_EGREP \ | 165 | #define RE_SYNTAX_POSIX_EGREP \ |
| 163 | (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES) | 166 | (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES) |
| 164 | 167 | ||
| 168 | /* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */ | ||
| 169 | #define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC | ||
| 170 | |||
| 165 | #define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC | 171 | #define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC |
| 166 | 172 | ||
| 167 | /* Syntax bits common to both basic and extended POSIX regex syntax. */ | 173 | /* Syntax bits common to both basic and extended POSIX regex syntax. */ |
| @@ -316,12 +322,12 @@ struct re_pattern_buffer | |||
| 316 | #define REGS_FIXED 2 | 322 | #define REGS_FIXED 2 |
| 317 | unsigned regs_allocated : 2; | 323 | unsigned regs_allocated : 2; |
| 318 | 324 | ||
| 319 | /* Set to zero when regex_compile compiles a pattern; set to one | 325 | /* Set to zero when `regex_compile' compiles a pattern; set to one |
| 320 | by re_compile_fastmap when it updates the fastmap, if any. */ | 326 | by `re_compile_fastmap' if it updates the fastmap. */ |
| 321 | unsigned fastmap_accurate : 1; | 327 | unsigned fastmap_accurate : 1; |
| 322 | 328 | ||
| 323 | /* If set, regexec reports only success or failure and does not | 329 | /* If set, `re_match_2' does not return information about |
| 324 | return anything in pmatch. */ | 330 | subexpressions. */ |
| 325 | unsigned no_sub : 1; | 331 | unsigned no_sub : 1; |
| 326 | 332 | ||
| 327 | /* If set, a beginning-of-line anchor doesn't match at the | 333 | /* If set, a beginning-of-line anchor doesn't match at the |
| @@ -383,17 +389,17 @@ typedef struct | |||
| 383 | unfortunately clutters up the declarations a bit, but I think it's | 389 | unfortunately clutters up the declarations a bit, but I think it's |
| 384 | worth it. | 390 | worth it. |
| 385 | 391 | ||
| 386 | We also have to undo `const' if we are not ANSI and if it hasn't | 392 | We may also have to undo `const' if we are not ANSI -- but if it has |
| 387 | previously being taken care of. */ | 393 | already been defined, as by Autoconf's AC_CONST, don't do anything. */ |
| 388 | 394 | ||
| 389 | #if __STDC__ | 395 | #if __STDC__ |
| 390 | #define _RE_ARGS(args) args | 396 | #define _RE_ARGS(args) args |
| 391 | #else | 397 | #else /* not __STDC__ */ |
| 392 | #define _RE_ARGS(args) () | 398 | #define _RE_ARGS(args) () |
| 393 | #ifndef const | 399 | #if !const && !HAVE_CONST |
| 394 | #define const | 400 | #define const |
| 395 | #endif | 401 | #endif |
| 396 | #endif | 402 | #endif /* not __STDC__ */ |
| 397 | 403 | ||
| 398 | /* Sets the current default syntax to SYNTAX, and return the old syntax. | 404 | /* Sets the current default syntax to SYNTAX, and return the old syntax. |
| 399 | You can also simply assign to the `re_syntax_options' variable. */ | 405 | You can also simply assign to the `re_syntax_options' variable. */ |