aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorKarl Berry1992-11-21 01:51:33 +0000
committerKarl Berry1992-11-21 01:51:33 +0000
commit9114e2792fe477299d6d7f8856c616fe4ce31d21 (patch)
tree0289e6410211f571f96229fc8321d9e83b1abef1 /src
parent9549c46d0b7b76c104c152e78feb33029c0e562a (diff)
downloademacs-9114e2792fe477299d6d7f8856c616fe4ce31d21.tar.gz
emacs-9114e2792fe477299d6d7f8856c616fe4ce31d21.zip
*** empty log message ***
Diffstat (limited to 'src')
-rw-r--r--src/regex.c166
-rw-r--r--src/regex.h28
2 files changed, 115 insertions, 79 deletions
diff --git a/src/regex.c b/src/regex.c
index 71aa4cc87e1..112e0d6d7ad 100644
--- a/src/regex.c
+++ b/src/regex.c
@@ -47,9 +47,15 @@
47 `BSTRING', as far as I know, and neither of them use this code. */ 47 `BSTRING', as far as I know, and neither of them use this code. */
48#if USG || STDC_HEADERS 48#if USG || STDC_HEADERS
49#include <string.h> 49#include <string.h>
50#ifndef bcmp
50#define bcmp(s1, s2, n) memcmp ((s1), (s2), (n)) 51#define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
52#endif
53#ifndef bcopy
51#define bcopy(s, d, n) memcpy ((d), (s), (n)) 54#define bcopy(s, d, n) memcpy ((d), (s), (n))
55#endif
56#ifndef bzero
52#define bzero(s, n) memset ((s), 0, (n)) 57#define bzero(s, n) memset ((s), 0, (n))
58#endif
53#else 59#else
54#include <strings.h> 60#include <strings.h>
55#endif 61#endif
@@ -135,12 +141,8 @@ init_syntax_once ()
135 (Per Bothner suggested the basic approach.) */ 141 (Per Bothner suggested the basic approach.) */
136#undef SIGN_EXTEND_CHAR 142#undef SIGN_EXTEND_CHAR
137#if __STDC__ 143#if __STDC__
138#ifndef VMS
139#define SIGN_EXTEND_CHAR(c) ((signed char) (c)) 144#define SIGN_EXTEND_CHAR(c) ((signed char) (c))
140#else /* On VMS, VAXC doesn't recognize `signed' before `char' */ 145#else /* not __STDC__ */
141#define SIGN_EXTEND_CHAR(c) ((char) (c))
142#endif /* VMS */
143#else
144/* As in Harbison and Steele. */ 146/* As in Harbison and Steele. */
145#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128) 147#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
146#endif 148#endif
@@ -447,6 +449,7 @@ static int debug = 0;
447#define DEBUG_PRINT1(x) if (debug) printf (x) 449#define DEBUG_PRINT1(x) if (debug) printf (x)
448#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2) 450#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
449#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3) 451#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
452#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
450#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \ 453#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
451 if (debug) print_partial_compiled_pattern (s, e) 454 if (debug) print_partial_compiled_pattern (s, e)
452#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \ 455#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
@@ -760,6 +763,7 @@ print_double_string (where, string1, size1, string2, size2)
760#define DEBUG_PRINT1(x) 763#define DEBUG_PRINT1(x)
761#define DEBUG_PRINT2(x1, x2) 764#define DEBUG_PRINT2(x1, x2)
762#define DEBUG_PRINT3(x1, x2, x3) 765#define DEBUG_PRINT3(x1, x2, x3)
766#define DEBUG_PRINT4(x1, x2, x3, x4)
763#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) 767#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
764#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) 768#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
765 769
@@ -1025,9 +1029,9 @@ typedef struct
1025 `buffer' is the compiled pattern; 1029 `buffer' is the compiled pattern;
1026 `syntax' is set to SYNTAX; 1030 `syntax' is set to SYNTAX;
1027 `used' is set to the length of the compiled pattern; 1031 `used' is set to the length of the compiled pattern;
1028 `fastmap_accurate' is set to zero; 1032 `fastmap_accurate' is zero;
1029 `re_nsub' is set to the number of groups in PATTERN; 1033 `re_nsub' is the number of subexpressions in PATTERN;
1030 `not_bol' and `not_eol' are set to zero. 1034 `not_bol' and `not_eol' are zero;
1031 1035
1032 The `fastmap' and `newline_anchor' fields are neither 1036 The `fastmap' and `newline_anchor' fields are neither
1033 examined nor set. */ 1037 examined nor set. */
@@ -1676,10 +1680,10 @@ regex_compile (pattern, size, syntax, bufp)
1676 | v | v 1680 | v | v
1677 a | b | c 1681 a | b | c
1678 1682
1679 If we are at `b,' then fixup_alt_jump right now points to a 1683 If we are at `b', then fixup_alt_jump right now points to a
1680 three-byte space after `a.' We'll put in the jump, set 1684 three-byte space after `a'. We'll put in the jump, set
1681 fixup_alt_jump to right after `b,' and leave behind three 1685 fixup_alt_jump to right after `b', and leave behind three
1682 bytes which we'll fill in when we get to after `c.' */ 1686 bytes which we'll fill in when we get to after `c'. */
1683 1687
1684 if (fixup_alt_jump) 1688 if (fixup_alt_jump)
1685 STORE_JUMP (jump_past_alt, fixup_alt_jump, b); 1689 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
@@ -2320,6 +2324,7 @@ typedef struct
2320 int this_reg; \ 2324 int this_reg; \
2321 \ 2325 \
2322 DEBUG_STATEMENT (failure_id++); \ 2326 DEBUG_STATEMENT (failure_id++); \
2327 DEBUG_STATEMENT (nfailure_points_pushed++); \
2323 DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \ 2328 DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
2324 DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\ 2329 DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\
2325 DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\ 2330 DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
@@ -2473,6 +2478,8 @@ typedef struct
2473 regstart[this_reg] = (const char *) POP_FAILURE_ITEM (); \ 2478 regstart[this_reg] = (const char *) POP_FAILURE_ITEM (); \
2474 DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \ 2479 DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
2475 } \ 2480 } \
2481 \
2482 DEBUG_STATEMENT (nfailure_points_popped++); \
2476} /* POP_FAILURE_POINT */ 2483} /* POP_FAILURE_POINT */
2477 2484
2478/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in 2485/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
@@ -2860,15 +2867,9 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
2860 else if (endpos > total_size) 2867 else if (endpos > total_size)
2861 range = total_size - startpos; 2868 range = total_size - startpos;
2862 2869
2863 /* Update the fastmap now if not correct already. */
2864 if (fastmap && !bufp->fastmap_accurate)
2865 if (re_compile_fastmap (bufp) == -2)
2866 return -2;
2867
2868 /* If the search isn't to be a backwards one, don't waste time in a 2870 /* If the search isn't to be a backwards one, don't waste time in a
2869 long search for a pattern that says it is anchored. */ 2871 search for a pattern that must be anchored. */
2870 if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf 2872 if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
2871 && range > 0)
2872 { 2873 {
2873 if (startpos > 0) 2874 if (startpos > 0)
2874 return -1; 2875 return -1;
@@ -2876,6 +2877,12 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
2876 range = 1; 2877 range = 1;
2877 } 2878 }
2878 2879
2880 /* Update the fastmap now if not correct already. */
2881 if (fastmap && !bufp->fastmap_accurate)
2882 if (re_compile_fastmap (bufp) == -2)
2883 return -2;
2884
2885 /* Loop through the string, looking for a place to start matching. */
2879 for (;;) 2886 for (;;)
2880 { 2887 {
2881 /* If a fastmap is supplied, skip quickly over characters that 2888 /* If a fastmap is supplied, skip quickly over characters that
@@ -2913,7 +2920,7 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
2913 ? string2[startpos - size1] 2920 ? string2[startpos - size1]
2914 : string1[startpos]); 2921 : string1[startpos]);
2915 2922
2916 if (!fastmap[TRANSLATE (c)]) 2923 if (!fastmap[(unsigned char) TRANSLATE (c)])
2917 goto advance; 2924 goto advance;
2918 } 2925 }
2919 } 2926 }
@@ -2987,12 +2994,9 @@ typedef union
2987#define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something) 2994#define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something)
2988 2995
2989 2996
2990/* Call this when have matched something; it sets `matched' flags for the 2997/* Call this when have matched a real character; it sets `matched' flags
2991 registers corresponding to the group of which we currently are inside. 2998 for the subexpressions which we are currently inside. Also records
2992 Also records whether this group ever matched something. We only care 2999 that those subexprs have matched. */
2993 about this information at `stop_memory', and then only about the
2994 previous time through the loop (if the group is starred or whatever).
2995 So it is ok to clear all the nonactive registers here. */
2996#define SET_REGS_MATCHED() \ 3000#define SET_REGS_MATCHED() \
2997 do \ 3001 do \
2998 { \ 3002 { \
@@ -3037,24 +3041,24 @@ typedef union
3037 3041
3038/* Test if at very beginning or at very end of the virtual concatenation 3042/* Test if at very beginning or at very end of the virtual concatenation
3039 of `string1' and `string2'. If only one string, it's `string2'. */ 3043 of `string1' and `string2'. If only one string, it's `string2'. */
3040#define AT_STRINGS_BEG() (d == (size1 ? string1 : string2) || !size2) 3044#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
3041#define AT_STRINGS_END() (d == end2) 3045#define AT_STRINGS_END(d) ((d) == end2)
3042 3046
3043 3047
3044/* Test if D points to a character which is word-constituent. We have 3048/* Test if D points to a character which is word-constituent. We have
3045 two special cases to check for: if past the end of string1, look at 3049 two special cases to check for: if past the end of string1, look at
3046 the first character in string2; and if before the beginning of 3050 the first character in string2; and if before the beginning of
3047 string2, look at the last character in string1. 3051 string2, look at the last character in string1. */
3048 3052#define WORDCHAR_P(d) \
3049 Assumes `string1' exists, so use in conjunction with AT_STRINGS_BEG (). */
3050#define LETTER_P(d) \
3051 (SYNTAX ((d) == end1 ? *string2 \ 3053 (SYNTAX ((d) == end1 ? *string2 \
3052 : (d) == string2 - 1 ? *(end1 - 1) : *(d)) == Sword) 3054 : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \
3055 == Sword)
3053 3056
3054/* Test if the character before D and the one at D differ with respect 3057/* Test if the character before D and the one at D differ with respect
3055 to being word-constituent. */ 3058 to being word-constituent. */
3056#define AT_WORD_BOUNDARY(d) \ 3059#define AT_WORD_BOUNDARY(d) \
3057 (AT_STRINGS_BEG () || AT_STRINGS_END () || LETTER_P (d - 1) != LETTER_P (d)) 3060 (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \
3061 || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
3058 3062
3059 3063
3060/* Free everything we malloc. */ 3064/* Free everything we malloc. */
@@ -3161,6 +3165,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3161 fail_stack_type fail_stack; 3165 fail_stack_type fail_stack;
3162#ifdef DEBUG 3166#ifdef DEBUG
3163 static unsigned failure_id = 0; 3167 static unsigned failure_id = 0;
3168 unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
3164#endif 3169#endif
3165 3170
3166 /* We fill all the registers internally, independent of what we 3171 /* We fill all the registers internally, independent of what we
@@ -3254,8 +3259,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3254 else 3259 else
3255 { 3260 {
3256 /* We must initialize all our variables to NULL, so that 3261 /* We must initialize all our variables to NULL, so that
3257 `FREE_VARIABLES' doesn't try to free them. Too bad this isn't 3262 `FREE_VARIABLES' doesn't try to free them. */
3258 Lisp, so we could have a list of variables. As it is, */
3259 regstart = regend = old_regstart = old_regend = best_regstart 3263 regstart = regend = old_regstart = old_regend = best_regstart
3260 = best_regend = reg_dummy = NULL; 3264 = best_regend = reg_dummy = NULL;
3261 reg_info = reg_info_dummy = (register_info_type *) NULL; 3265 reg_info = reg_info_dummy = (register_info_type *) NULL;
@@ -3339,8 +3343,10 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3339 3343
3340 if (p == pend) 3344 if (p == pend)
3341 { /* End of pattern means we might have succeeded. */ 3345 { /* End of pattern means we might have succeeded. */
3342 DEBUG_PRINT1 ("End of pattern: "); 3346 DEBUG_PRINT1 ("end of pattern ... ");
3343 /* If not end of string, try backtracking. Otherwise done. */ 3347
3348 /* If we haven't matched the entire string, and we want the
3349 longest match, try backtracking. */
3344 if (d != end_match_2) 3350 if (d != end_match_2)
3345 { 3351 {
3346 DEBUG_PRINT1 ("backtracking.\n"); 3352 DEBUG_PRINT1 ("backtracking.\n");
@@ -3378,6 +3384,8 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3378 For example, the pattern `x.*y.*z' against the 3384 For example, the pattern `x.*y.*z' against the
3379 strings `x-' and `y-z-', if the two strings are 3385 strings `x-' and `y-z-', if the two strings are
3380 not consecutive in memory. */ 3386 not consecutive in memory. */
3387 DEBUG_PRINT1 ("Restoring best registers.\n");
3388
3381 d = match_end; 3389 d = match_end;
3382 dend = ((d >= string1 && d <= end1) 3390 dend = ((d >= string1 && d <= end1)
3383 ? end_match_1 : end_match_2); 3391 ? end_match_1 : end_match_2);
@@ -3390,7 +3398,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3390 } 3398 }
3391 } /* d != end_match_2 */ 3399 } /* d != end_match_2 */
3392 3400
3393 DEBUG_PRINT1 ("\nAccepting match.\n"); 3401 DEBUG_PRINT1 ("Accepting match.\n");
3394 3402
3395 /* If caller wants register contents data back, do it. */ 3403 /* If caller wants register contents data back, do it. */
3396 if (regs && !bufp->no_sub) 3404 if (regs && !bufp->no_sub)
@@ -3456,7 +3464,10 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3456 } /* regs && !bufp->no_sub */ 3464 } /* regs && !bufp->no_sub */
3457 3465
3458 FREE_VARIABLES (); 3466 FREE_VARIABLES ();
3459 DEBUG_PRINT2 ("%d registers pushed.\n", num_regs_pushed); 3467 DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
3468 nfailure_points_pushed, nfailure_points_popped,
3469 nfailure_points_pushed - nfailure_points_popped);
3470 DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
3460 3471
3461 mcnt = d - pos - (MATCHING_IN_FIRST_STRING 3472 mcnt = d - pos - (MATCHING_IN_FIRST_STRING
3462 ? string1 3473 ? string1
@@ -3658,7 +3669,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3658 3669
3659 /* If just failed to match something this time around with a 3670 /* If just failed to match something this time around with a
3660 group that's operated on by a repetition operator, try to 3671 group that's operated on by a repetition operator, try to
3661 force exit from the ``loop,'' and restore the register 3672 force exit from the ``loop'', and restore the register
3662 information for this group that we had before trying this 3673 information for this group that we had before trying this
3663 last match. */ 3674 last match. */
3664 if ((!MATCHED_SOMETHING (reg_info[*p]) 3675 if ((!MATCHED_SOMETHING (reg_info[*p])
@@ -3802,7 +3813,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3802 case begline: 3813 case begline:
3803 DEBUG_PRINT1 ("EXECUTING begline.\n"); 3814 DEBUG_PRINT1 ("EXECUTING begline.\n");
3804 3815
3805 if (AT_STRINGS_BEG ()) 3816 if (AT_STRINGS_BEG (d))
3806 { 3817 {
3807 if (!bufp->not_bol) break; 3818 if (!bufp->not_bol) break;
3808 } 3819 }
@@ -3818,7 +3829,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3818 case endline: 3829 case endline:
3819 DEBUG_PRINT1 ("EXECUTING endline.\n"); 3830 DEBUG_PRINT1 ("EXECUTING endline.\n");
3820 3831
3821 if (AT_STRINGS_END ()) 3832 if (AT_STRINGS_END (d))
3822 { 3833 {
3823 if (!bufp->not_eol) break; 3834 if (!bufp->not_eol) break;
3824 } 3835 }
@@ -3835,7 +3846,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3835 /* Match at the very beginning of the data. */ 3846 /* Match at the very beginning of the data. */
3836 case begbuf: 3847 case begbuf:
3837 DEBUG_PRINT1 ("EXECUTING begbuf.\n"); 3848 DEBUG_PRINT1 ("EXECUTING begbuf.\n");
3838 if (AT_STRINGS_BEG ()) 3849 if (AT_STRINGS_BEG (d))
3839 break; 3850 break;
3840 goto fail; 3851 goto fail;
3841 3852
@@ -3843,7 +3854,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3843 /* Match at the very end of the data. */ 3854 /* Match at the very end of the data. */
3844 case endbuf: 3855 case endbuf:
3845 DEBUG_PRINT1 ("EXECUTING endbuf.\n"); 3856 DEBUG_PRINT1 ("EXECUTING endbuf.\n");
3846 if (AT_STRINGS_END ()) 3857 if (AT_STRINGS_END (d))
3847 break; 3858 break;
3848 goto fail; 3859 goto fail;
3849 3860
@@ -3897,7 +3908,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3897 the original * applied to a group), save the information 3908 the original * applied to a group), save the information
3898 for that group and all inner ones, so that if we fail back 3909 for that group and all inner ones, so that if we fail back
3899 to this point, the group's information will be correct. 3910 to this point, the group's information will be correct.
3900 For example, in \(a*\)*\1, we only need the preceding group, 3911 For example, in \(a*\)*\1, we need the preceding group,
3901 and in \(\(a*\)b*\)\2, we need the inner group. */ 3912 and in \(\(a*\)b*\)\2, we need the inner group. */
3902 3913
3903 /* We can't use `p' to check ahead because we push 3914 /* We can't use `p' to check ahead because we push
@@ -3927,8 +3938,8 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3927 break; 3938 break;
3928 3939
3929 3940
3930 /* A smart repeat ends with a maybe_pop_jump. 3941 /* A smart repeat ends with `maybe_pop_jump'.
3931 We change it either to a pop_failure_jump or a jump. */ 3942 We change it to either `pop_failure_jump' or `jump'. */
3932 case maybe_pop_jump: 3943 case maybe_pop_jump:
3933 EXTRACT_NUMBER_AND_INCR (mcnt, p); 3944 EXTRACT_NUMBER_AND_INCR (mcnt, p);
3934 DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt); 3945 DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
@@ -3956,10 +3967,21 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3956 3967
3957 /* If we're at the end of the pattern, we can change. */ 3968 /* If we're at the end of the pattern, we can change. */
3958 if (p2 == pend) 3969 if (p2 == pend)
3959 { 3970 { /* But if we're also at the end of the string, we might
3971 as well skip changing anything. For example, in `a+'
3972 against `a', we'll have already matched the `a', and
3973 I don't see the the point of changing the opcode,
3974 popping the failure point, finding out it fails, and
3975 then going into our endgame. */
3976 if (d == dend)
3977 {
3978 p = pend;
3979 DEBUG_PRINT1 (" End of pattern & string => done.\n");
3980 continue;
3981 }
3982
3960 p[-3] = (unsigned char) pop_failure_jump; 3983 p[-3] = (unsigned char) pop_failure_jump;
3961 DEBUG_PRINT1 3984 DEBUG_PRINT1 (" End of pattern => pop_failure_jump.\n");
3962 (" End of pattern: change to `pop_failure_jump'.\n");
3963 } 3985 }
3964 3986
3965 else if ((re_opcode_t) *p2 == exactn 3987 else if ((re_opcode_t) *p2 == exactn
@@ -3973,7 +3995,12 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3973 to the `maybe_finalize_jump' of this case. Examine what 3995 to the `maybe_finalize_jump' of this case. Examine what
3974 follows. */ 3996 follows. */
3975 if ((re_opcode_t) p1[3] == exactn && p1[5] != c) 3997 if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
3976 p[-3] = (unsigned char) pop_failure_jump; 3998 {
3999 p[-3] = (unsigned char) pop_failure_jump;
4000 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
4001 c, p1[5]);
4002 }
4003
3977 else if ((re_opcode_t) p1[3] == charset 4004 else if ((re_opcode_t) p1[3] == charset
3978 || (re_opcode_t) p1[3] == charset_not) 4005 || (re_opcode_t) p1[3] == charset_not)
3979 { 4006 {
@@ -3988,9 +4015,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3988 if (!not) 4015 if (!not)
3989 { 4016 {
3990 p[-3] = (unsigned char) pop_failure_jump; 4017 p[-3] = (unsigned char) pop_failure_jump;
3991 DEBUG_PRINT1 4018 DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
3992 (" No match: change to `pop_failure_jump'.\n");
3993
3994 } 4019 }
3995 } 4020 }
3996 } 4021 }
@@ -3999,6 +4024,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3999 if ((re_opcode_t) p[-1] != pop_failure_jump) 4024 if ((re_opcode_t) p[-1] != pop_failure_jump)
4000 { 4025 {
4001 p[-1] = (unsigned char) jump; 4026 p[-1] = (unsigned char) jump;
4027 DEBUG_PRINT1 (" Match => jump.\n");
4002 goto unconditional_jump; 4028 goto unconditional_jump;
4003 } 4029 }
4004 /* Note fall through. */ 4030 /* Note fall through. */
@@ -4060,7 +4086,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
4060 4086
4061 4087
4062 /* At the end of an alternative, we need to push a dummy failure 4088 /* At the end of an alternative, we need to push a dummy failure
4063 point in case we are followed by a pop_failure_jump', because 4089 point in case we are followed by a `pop_failure_jump', because
4064 we don't want the failure point for the alternative to be 4090 we don't want the failure point for the alternative to be
4065 popped. For example, matching `(a|ab)*' against `aab' 4091 popped. For example, matching `(a|ab)*' against `aab'
4066 requires that we match the `ab' alternative. */ 4092 requires that we match the `ab' alternative. */
@@ -4137,14 +4163,14 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
4137 4163
4138 case wordbeg: 4164 case wordbeg:
4139 DEBUG_PRINT1 ("EXECUTING wordbeg.\n"); 4165 DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
4140 if (LETTER_P (d) && (AT_STRINGS_BEG () || !LETTER_P (d - 1))) 4166 if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
4141 break; 4167 break;
4142 goto fail; 4168 goto fail;
4143 4169
4144 case wordend: 4170 case wordend:
4145 DEBUG_PRINT1 ("EXECUTING wordend.\n"); 4171 DEBUG_PRINT1 ("EXECUTING wordend.\n");
4146 if (!AT_STRINGS_BEG () && LETTER_P (d - 1) 4172 if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
4147 && (!LETTER_P (d) || AT_STRINGS_END ())) 4173 && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
4148 break; 4174 break;
4149 goto fail; 4175 goto fail;
4150 4176
@@ -4181,11 +4207,12 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
4181 goto matchsyntax; 4207 goto matchsyntax;
4182 4208
4183 case wordchar: 4209 case wordchar:
4184 DEBUG_PRINT1 ("EXECUTING wordchar.\n"); 4210 DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
4185 mcnt = (int) Sword; 4211 mcnt = (int) Sword;
4186 matchsyntax: 4212 matchsyntax:
4187 PREFETCH (); 4213 PREFETCH ();
4188 if (SYNTAX (*d++) != (enum syntaxcode) mcnt) goto fail; 4214 if (SYNTAX (*d++) != (enum syntaxcode) mcnt)
4215 goto fail;
4189 SET_REGS_MATCHED (); 4216 SET_REGS_MATCHED ();
4190 break; 4217 break;
4191 4218
@@ -4195,11 +4222,12 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
4195 goto matchnotsyntax; 4222 goto matchnotsyntax;
4196 4223
4197 case notwordchar: 4224 case notwordchar:
4198 DEBUG_PRINT1 ("EXECUTING notwordchar.\n"); 4225 DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
4199 mcnt = (int) Sword; 4226 mcnt = (int) Sword;
4200 matchnotsyntax: /* We goto here from notsyntaxspec. */ 4227 matchnotsyntax:
4201 PREFETCH (); 4228 PREFETCH ();
4202 if (SYNTAX (*d++) == (enum syntaxcode) mcnt) goto fail; 4229 if (SYNTAX (*d++) == (enum syntaxcode) mcnt)
4230 goto fail;
4203 SET_REGS_MATCHED (); 4231 SET_REGS_MATCHED ();
4204 break; 4232 break;
4205 4233
@@ -4207,17 +4235,19 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
4207 case wordchar: 4235 case wordchar:
4208 DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n"); 4236 DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
4209 PREFETCH (); 4237 PREFETCH ();
4210 if (!LETTER_P (d)) 4238 if (!WORDCHAR_P (d))
4211 goto fail; 4239 goto fail;
4212 SET_REGS_MATCHED (); 4240 SET_REGS_MATCHED ();
4241 d++;
4213 break; 4242 break;
4214 4243
4215 case notwordchar: 4244 case notwordchar:
4216 DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n"); 4245 DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
4217 PREFETCH (); 4246 PREFETCH ();
4218 if (LETTER_P (d)) 4247 if (WORDCHAR_P (d))
4219 goto fail; 4248 goto fail;
4220 SET_REGS_MATCHED (); 4249 SET_REGS_MATCHED ();
4250 d++;
4221 break; 4251 break;
4222#endif /* not emacs */ 4252#endif /* not emacs */
4223 4253
@@ -4812,7 +4842,7 @@ regexec (preg, string, nmatch, pmatch, eflags)
4812 4842
4813 4843
4814/* Returns a message corresponding to an error code, ERRCODE, returned 4844/* Returns a message corresponding to an error code, ERRCODE, returned
4815 from either regcomp or regexec. */ 4845 from either regcomp or regexec. We don't use PREG here. */
4816 4846
4817size_t 4847size_t
4818regerror (errcode, preg, errbuf, errbuf_size) 4848regerror (errcode, preg, errbuf, errbuf_size)
diff --git a/src/regex.h b/src/regex.h
index ef8e9a36697..e38853eaf65 100644
--- a/src/regex.h
+++ b/src/regex.h
@@ -20,12 +20,15 @@
20#ifndef __REGEXP_LIBRARY_H__ 20#ifndef __REGEXP_LIBRARY_H__
21#define __REGEXP_LIBRARY_H__ 21#define __REGEXP_LIBRARY_H__
22 22
23/* POSIX says that <sys/types.h> must be included (by the caller) before
24 <regex.h>. */
25
23#ifdef VMS 26#ifdef VMS
24/* POSIX says that size_t should be in stddef.h. */ 27/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it
28 should be there. */
25#include <stddef.h> 29#include <stddef.h>
26#endif 30#endif
27 31
28/* POSIX says that <sys/types.h> must be included before <regex.h>. */
29 32
30/* The following bits are used to determine the regexp syntax we 33/* The following bits are used to determine the regexp syntax we
31 recognize. The set/not-set meanings are chosen so that Emacs syntax 34 recognize. The set/not-set meanings are chosen so that Emacs syntax
@@ -162,6 +165,9 @@ extern reg_syntax_t re_syntax_options;
162#define RE_SYNTAX_POSIX_EGREP \ 165#define RE_SYNTAX_POSIX_EGREP \
163 (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES) 166 (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)
164 167
168/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
169#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
170
165#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC 171#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
166 172
167/* Syntax bits common to both basic and extended POSIX regex syntax. */ 173/* Syntax bits common to both basic and extended POSIX regex syntax. */
@@ -316,12 +322,12 @@ struct re_pattern_buffer
316#define REGS_FIXED 2 322#define REGS_FIXED 2
317 unsigned regs_allocated : 2; 323 unsigned regs_allocated : 2;
318 324
319 /* Set to zero when regex_compile compiles a pattern; set to one 325 /* Set to zero when `regex_compile' compiles a pattern; set to one
320 by re_compile_fastmap when it updates the fastmap, if any. */ 326 by `re_compile_fastmap' if it updates the fastmap. */
321 unsigned fastmap_accurate : 1; 327 unsigned fastmap_accurate : 1;
322 328
323 /* If set, regexec reports only success or failure and does not 329 /* If set, `re_match_2' does not return information about
324 return anything in pmatch. */ 330 subexpressions. */
325 unsigned no_sub : 1; 331 unsigned no_sub : 1;
326 332
327 /* If set, a beginning-of-line anchor doesn't match at the 333 /* If set, a beginning-of-line anchor doesn't match at the
@@ -383,17 +389,17 @@ typedef struct
383 unfortunately clutters up the declarations a bit, but I think it's 389 unfortunately clutters up the declarations a bit, but I think it's
384 worth it. 390 worth it.
385 391
386 We also have to undo `const' if we are not ANSI and if it hasn't 392 We may also have to undo `const' if we are not ANSI -- but if it has
387 previously being taken care of. */ 393 already been defined, as by Autoconf's AC_CONST, don't do anything. */
388 394
389#if __STDC__ 395#if __STDC__
390#define _RE_ARGS(args) args 396#define _RE_ARGS(args) args
391#else 397#else /* not __STDC__ */
392#define _RE_ARGS(args) () 398#define _RE_ARGS(args) ()
393#ifndef const 399#if !const && !HAVE_CONST
394#define const 400#define const
395#endif 401#endif
396#endif 402#endif /* not __STDC__ */
397 403
398/* Sets the current default syntax to SYNTAX, and return the old syntax. 404/* Sets the current default syntax to SYNTAX, and return the old syntax.
399 You can also simply assign to the `re_syntax_options' variable. */ 405 You can also simply assign to the `re_syntax_options' variable. */