diff options
| author | Stefan Monnier | 2000-03-19 23:23:50 +0000 |
|---|---|---|
| committer | Stefan Monnier | 2000-03-19 23:23:50 +0000 |
| commit | 4e8a91326953fc5ad2273c72f545a7ebdefacdd0 (patch) | |
| tree | deaae57ffc712de5bbc99a17eaca4f8914fc1999 /src | |
| parent | fc4e96c8e8a97327360cd3271381bf2de711d1eb (diff) | |
| download | emacs-4e8a91326953fc5ad2273c72f545a7ebdefacdd0.tar.gz emacs-4e8a91326953fc5ad2273c72f545a7ebdefacdd0.zip | |
(RE_STRING_CHAR): New macro.
(GET_CHAR_AFER_2): Remove.
(RE_TRANSLATE, RE_TRANSLATE_P): New macros moved from regex.h.
(enum re_opcode_t): Remove on_failure_jump_exclusive.
(print_partial_compiled_pattern, re_compile_fastmap)
(re_match_2_internal): Remove on_failure_jump_exclusive.
(regex_compile): Turn optimizable P+ loops into PP*, so that the
optimization only need to work for * (ie. can use of_keep_string_jump).
Remove the special case for .*\n since it is now covered by the general
optimization.
(re_search_2): Don't bother with `room'.
(skip_one_char): New function.
(skip_noops): Simplify since `memory' is not needed any more.
(mutually_exclusive_p): Restructure slightly to use `switch' and
add handling for "all" remaining cases.
(re_match_2_internal): Change on_failure_jump_smart to use
on_failure_keep_string_jump (and redirect the end-of-loop jump)
rather than on_failure_jump_exclusive.
Diffstat (limited to 'src')
| -rw-r--r-- | src/ChangeLog | 23 | ||||
| -rw-r--r-- | src/regex.c | 420 |
2 files changed, 242 insertions, 201 deletions
diff --git a/src/ChangeLog b/src/ChangeLog index 93e823be2d2..70b65ce807b 100644 --- a/src/ChangeLog +++ b/src/ChangeLog | |||
| @@ -1,3 +1,26 @@ | |||
| 1 | 2000-03-19 Stefan Monnier <monnier@cs.yale.edu> | ||
| 2 | |||
| 3 | * regex.h (RE_TRANSLATE. RE_TRANSLATE_P): Moved to regex.c. | ||
| 4 | |||
| 5 | * regex.c (RE_STRING_CHAR): New macro. | ||
| 6 | (GET_CHAR_AFER_2): Remove. | ||
| 7 | (RE_TRANSLATE, RE_TRANSLATE_P): New macros moved from regex.h. | ||
| 8 | (enum re_opcode_t): Remove on_failure_jump_exclusive. | ||
| 9 | (print_partial_compiled_pattern, re_compile_fastmap) | ||
| 10 | (re_match_2_internal): Remove on_failure_jump_exclusive. | ||
| 11 | (regex_compile): Turn optimizable P+ loops into PP*, so that the | ||
| 12 | optimization only need to work for * (ie. can use of_keep_string_jump). | ||
| 13 | Remove the special case for .*\n since it is now covered by the general | ||
| 14 | optimization. | ||
| 15 | (re_search_2): Don't bother with `room'. | ||
| 16 | (skip_one_char): New function. | ||
| 17 | (skip_noops): Simplify since `memory' is not needed any more. | ||
| 18 | (mutually_exclusive_p): Restructure slightly to use `switch' and | ||
| 19 | add handling for "all" remaining cases. | ||
| 20 | (re_match_2_internal): Change on_failure_jump_smart to use | ||
| 21 | on_failure_keep_string_jump (and redirect the end-of-loop jump) | ||
| 22 | rather than on_failure_jump_exclusive. | ||
| 23 | |||
| 1 | 2000-03-19 Gerd Moellmann <gerd@gnu.org> | 24 | 2000-03-19 Gerd Moellmann <gerd@gnu.org> |
| 2 | 25 | ||
| 3 | * xfns.c (select_visual): Don't set dpyinfo->n_planes to the | 26 | * xfns.c (select_visual): Don't set dpyinfo->n_planes to the |
diff --git a/src/regex.c b/src/regex.c index 82c5d76f4dc..711f7c7afa9 100644 --- a/src/regex.c +++ b/src/regex.c | |||
| @@ -81,6 +81,9 @@ | |||
| 81 | #define realloc xrealloc | 81 | #define realloc xrealloc |
| 82 | #define free xfree | 82 | #define free xfree |
| 83 | 83 | ||
| 84 | #define RE_STRING_CHAR(p, s) \ | ||
| 85 | (multibyte ? (STRING_CHAR (p, s)) : (*(p))) | ||
| 86 | |||
| 84 | #else /* not emacs */ | 87 | #else /* not emacs */ |
| 85 | 88 | ||
| 86 | /* If we are not linking with Emacs proper, | 89 | /* If we are not linking with Emacs proper, |
| @@ -187,12 +190,16 @@ init_syntax_once () | |||
| 187 | #define SAME_CHARSET_P(c1, c2) (1) | 190 | #define SAME_CHARSET_P(c1, c2) (1) |
| 188 | #define MULTIBYTE_FORM_LENGTH(p, s) (1) | 191 | #define MULTIBYTE_FORM_LENGTH(p, s) (1) |
| 189 | #define STRING_CHAR(p, s) (*(p)) | 192 | #define STRING_CHAR(p, s) (*(p)) |
| 193 | #define RE_STRING_CHAR STRING_CHAR | ||
| 190 | #define STRING_CHAR_AND_LENGTH(p, s, actual_len) ((actual_len) = 1, *(p)) | 194 | #define STRING_CHAR_AND_LENGTH(p, s, actual_len) ((actual_len) = 1, *(p)) |
| 191 | #define GET_CHAR_AFTER_2(c, p, str1, end1, str2, end2) \ | ||
| 192 | (c = ((p) == (end1) ? *(str2) : *(p))) | ||
| 193 | #define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \ | 195 | #define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \ |
| 194 | (c = ((p) == (str2) ? *((end1) - 1) : *((p) - 1))) | 196 | (c = ((p) == (str2) ? *((end1) - 1) : *((p) - 1))) |
| 195 | #endif /* not emacs */ | 197 | #endif /* not emacs */ |
| 198 | |||
| 199 | #ifndef RE_TRANSLATE | ||
| 200 | #define RE_TRANSLATE(TBL, C) ((unsigned char)(TBL)[C]) | ||
| 201 | #define RE_TRANSLATE_P(TBL) (TBL) | ||
| 202 | #endif | ||
| 196 | 203 | ||
| 197 | /* Get the interface, including the syntax bits. */ | 204 | /* Get the interface, including the syntax bits. */ |
| 198 | #include "regex.h" | 205 | #include "regex.h" |
| @@ -519,13 +526,6 @@ typedef enum | |||
| 519 | current string position when executed. */ | 526 | current string position when executed. */ |
| 520 | on_failure_keep_string_jump, | 527 | on_failure_keep_string_jump, |
| 521 | 528 | ||
| 522 | /* Like `on_failure_jump', except that it assumes that the | ||
| 523 | pattern following it is mutually exclusive with the pattern | ||
| 524 | at the destination of the jump: if one matches something, | ||
| 525 | the other won't match at all. | ||
| 526 | Always used via `on_failure_jump_smart'. */ | ||
| 527 | on_failure_jump_exclusive, | ||
| 528 | |||
| 529 | /* Just like `on_failure_jump', except that it checks that we | 529 | /* Just like `on_failure_jump', except that it checks that we |
| 530 | don't get stuck in an infinite loop (matching an empty string | 530 | don't get stuck in an infinite loop (matching an empty string |
| 531 | indefinitely). */ | 531 | indefinitely). */ |
| @@ -534,8 +534,9 @@ typedef enum | |||
| 534 | /* A smart `on_failure_jump' used for greedy * and + operators. | 534 | /* A smart `on_failure_jump' used for greedy * and + operators. |
| 535 | It analyses the loop before which it is put and if the | 535 | It analyses the loop before which it is put and if the |
| 536 | loop does not require backtracking, it changes itself to | 536 | loop does not require backtracking, it changes itself to |
| 537 | `on_failure_jump_exclusive', else it just defaults to | 537 | `on_failure_keep_string_jump' and short-circuits the loop, |
| 538 | changing itself into `on_failure_jump_loop'. */ | 538 | else it just defaults to changing itself into `on_failure_jump'. |
| 539 | It assumes that it is pointing to just past a `jump'. */ | ||
| 539 | on_failure_jump_smart, | 540 | on_failure_jump_smart, |
| 540 | 541 | ||
| 541 | /* Followed by two-byte relative address and two-byte number n. | 542 | /* Followed by two-byte relative address and two-byte number n. |
| @@ -943,11 +944,6 @@ print_partial_compiled_pattern (start, end) | |||
| 943 | printf ("/on_failure_keep_string_jump to %d", p + mcnt - start); | 944 | printf ("/on_failure_keep_string_jump to %d", p + mcnt - start); |
| 944 | break; | 945 | break; |
| 945 | 946 | ||
| 946 | case on_failure_jump_exclusive: | ||
| 947 | extract_number_and_incr (&mcnt, &p); | ||
| 948 | printf ("/on_failure_jump_exclusive to %d", p + mcnt - start); | ||
| 949 | break; | ||
| 950 | |||
| 951 | case on_failure_jump_loop: | 947 | case on_failure_jump_loop: |
| 952 | extract_number_and_incr (&mcnt, &p); | 948 | extract_number_and_incr (&mcnt, &p); |
| 953 | printf ("/on_failure_jump_loop to %d", p + mcnt - start); | 949 | printf ("/on_failure_jump_loop to %d", p + mcnt - start); |
| @@ -1532,6 +1528,7 @@ static boolean at_begline_loc_p _RE_ARGS((const unsigned char *pattern, | |||
| 1532 | static boolean at_endline_loc_p _RE_ARGS((const unsigned char *p, | 1528 | static boolean at_endline_loc_p _RE_ARGS((const unsigned char *p, |
| 1533 | const unsigned char *pend, | 1529 | const unsigned char *pend, |
| 1534 | reg_syntax_t syntax)); | 1530 | reg_syntax_t syntax)); |
| 1531 | static unsigned char *skip_one_char _RE_ARGS((unsigned char *p)); | ||
| 1535 | 1532 | ||
| 1536 | /* Fetch the next character in the uncompiled pattern---translating it | 1533 | /* Fetch the next character in the uncompiled pattern---translating it |
| 1537 | if necessary. Also cast from a signed character in the constant | 1534 | if necessary. Also cast from a signed character in the constant |
| @@ -2072,9 +2069,6 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2072 | } | 2069 | } |
| 2073 | 2070 | ||
| 2074 | { | 2071 | { |
| 2075 | /* Are we optimizing this jump? */ | ||
| 2076 | boolean keep_string_p = false; | ||
| 2077 | |||
| 2078 | /* 1 means zero (many) matches is allowed. */ | 2072 | /* 1 means zero (many) matches is allowed. */ |
| 2079 | boolean zero_times_ok = 0, many_times_ok = 0; | 2073 | boolean zero_times_ok = 0, many_times_ok = 0; |
| 2080 | boolean greedy = 1; | 2074 | boolean greedy = 1; |
| @@ -2129,7 +2123,7 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2129 | 2123 | ||
| 2130 | /* Star, etc. applied to an empty pattern is equivalent | 2124 | /* Star, etc. applied to an empty pattern is equivalent |
| 2131 | to an empty pattern. */ | 2125 | to an empty pattern. */ |
| 2132 | if (!laststart) | 2126 | if (!laststart || laststart == b) |
| 2133 | break; | 2127 | break; |
| 2134 | 2128 | ||
| 2135 | /* Now we know whether or not zero matches is allowed | 2129 | /* Now we know whether or not zero matches is allowed |
| @@ -2137,59 +2131,46 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2137 | if (greedy) | 2131 | if (greedy) |
| 2138 | { | 2132 | { |
| 2139 | if (many_times_ok) | 2133 | if (many_times_ok) |
| 2140 | { /* More than one repetition is allowed, so put in at the | 2134 | { |
| 2141 | end a backward relative jump from `b' to before the next | 2135 | boolean simple = skip_one_char (laststart) == b; |
| 2142 | jump we're going to put in below (which jumps from | 2136 | unsigned int startoffset = 0; |
| 2143 | laststart to after this jump). | 2137 | assert (skip_one_char (laststart) <= b); |
| 2144 | 2138 | ||
| 2145 | But if we are at the `*' in the exact sequence `.*\n', | 2139 | if (!zero_times_ok && simple) |
| 2146 | insert an unconditional jump backwards to the ., | 2140 | { /* Since simple * loops can be made faster by using |
| 2147 | instead of the beginning of the loop. This way we only | 2141 | on_failure_keep_string_jump, we turn simple P+ |
| 2148 | push a failure point once, instead of every time | 2142 | into PP* if P is simple. */ |
| 2149 | through the loop. */ | 2143 | unsigned char *p1, *p2; |
| 2150 | assert (p - 1 > pattern); | 2144 | startoffset = b - laststart; |
| 2151 | 2145 | GET_BUFFER_SPACE (startoffset); | |
| 2152 | /* Allocate the space for the jump. */ | 2146 | p1 = b; p2 = laststart; |
| 2153 | GET_BUFFER_SPACE (3); | 2147 | while (p2 < p1) |
| 2154 | 2148 | *b++ = *p2++; | |
| 2155 | /* We know we are not at the first character of the pattern, | 2149 | zero_times_ok = 1; |
| 2156 | because laststart was nonzero. And we've already | ||
| 2157 | incremented `p', by the way, to be the character after | ||
| 2158 | the `*'. Do we have to do something analogous here | ||
| 2159 | for null bytes, because of RE_DOT_NOT_NULL? */ | ||
| 2160 | if (TRANSLATE (*(p - 2)) == TRANSLATE ('.') | ||
| 2161 | && zero_times_ok | ||
| 2162 | && p < pend | ||
| 2163 | && TRANSLATE (*p) == TRANSLATE ('\n') | ||
| 2164 | && !(syntax & RE_DOT_NEWLINE)) | ||
| 2165 | { /* We have .*\n. */ | ||
| 2166 | STORE_JUMP (jump, b, laststart); | ||
| 2167 | keep_string_p = true; | ||
| 2168 | } | 2150 | } |
| 2151 | |||
| 2152 | GET_BUFFER_SPACE (6); | ||
| 2153 | if (!zero_times_ok) | ||
| 2154 | /* A + loop. */ | ||
| 2155 | STORE_JUMP (on_failure_jump_loop, b, b + 6); | ||
| 2169 | else | 2156 | else |
| 2170 | STORE_JUMP (jump, b, laststart - 3); | 2157 | /* Simple * loops can use on_failure_keep_string_jump |
| 2171 | 2158 | depending on what follows. But since we don't know | |
| 2172 | /* We've added more stuff to the buffer. */ | 2159 | that yet, we leave the decision up to |
| 2160 | on_failure_jump_smart. */ | ||
| 2161 | INSERT_JUMP (simple ? on_failure_jump_smart | ||
| 2162 | : on_failure_jump_loop, | ||
| 2163 | laststart + startoffset, b + 6); | ||
| 2173 | b += 3; | 2164 | b += 3; |
| 2174 | } | 2165 | STORE_JUMP (jump, b, laststart + startoffset); |
| 2175 | |||
| 2176 | /* On failure, jump from laststart to b + 3, which will be the | ||
| 2177 | end of the buffer after this jump is inserted. */ | ||
| 2178 | GET_BUFFER_SPACE (3); | ||
| 2179 | if (!zero_times_ok) | ||
| 2180 | { | ||
| 2181 | assert (many_times_ok); | ||
| 2182 | INSERT_JUMP (on_failure_jump_smart, b - 3, b + 3); | ||
| 2183 | pending_exact = 0; | ||
| 2184 | b += 3; | 2166 | b += 3; |
| 2185 | } | 2167 | } |
| 2186 | else | 2168 | else |
| 2187 | { | 2169 | { |
| 2188 | INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump | 2170 | /* A simple ? pattern. */ |
| 2189 | : !many_times_ok ? | 2171 | assert (zero_times_ok); |
| 2190 | on_failure_jump : on_failure_jump_smart, | 2172 | GET_BUFFER_SPACE (3); |
| 2191 | laststart, b + 3); | 2173 | INSERT_JUMP (on_failure_jump, laststart, b + 3); |
| 2192 | pending_exact = 0; | ||
| 2193 | b += 3; | 2174 | b += 3; |
| 2194 | } | 2175 | } |
| 2195 | } | 2176 | } |
| @@ -2224,6 +2205,7 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2224 | } | 2205 | } |
| 2225 | } | 2206 | } |
| 2226 | } | 2207 | } |
| 2208 | pending_exact = 0; | ||
| 2227 | break; | 2209 | break; |
| 2228 | 2210 | ||
| 2229 | 2211 | ||
| @@ -3217,7 +3199,7 @@ at_begline_loc_p (pattern, p, syntax) | |||
| 3217 | const unsigned char *pattern, *p; | 3199 | const unsigned char *pattern, *p; |
| 3218 | reg_syntax_t syntax; | 3200 | reg_syntax_t syntax; |
| 3219 | { | 3201 | { |
| 3220 | re_char *prev = p - 2; | 3202 | const unsigned char *prev = p - 2; |
| 3221 | boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\'; | 3203 | boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\'; |
| 3222 | 3204 | ||
| 3223 | return | 3205 | return |
| @@ -3236,9 +3218,9 @@ at_endline_loc_p (p, pend, syntax) | |||
| 3236 | const unsigned char *p, *pend; | 3218 | const unsigned char *p, *pend; |
| 3237 | reg_syntax_t syntax; | 3219 | reg_syntax_t syntax; |
| 3238 | { | 3220 | { |
| 3239 | re_char *next = p; | 3221 | const unsigned char *next = p; |
| 3240 | boolean next_backslash = *next == '\\'; | 3222 | boolean next_backslash = *next == '\\'; |
| 3241 | re_char *next_next = p + 1 < pend ? p + 1 : 0; | 3223 | const unsigned char *next_next = p + 1 < pend ? p + 1 : 0; |
| 3242 | 3224 | ||
| 3243 | return | 3225 | return |
| 3244 | /* Before a subexpression? */ | 3226 | /* Before a subexpression? */ |
| @@ -3663,7 +3645,6 @@ re_compile_fastmap (bufp) | |||
| 3663 | { | 3645 | { |
| 3664 | case on_failure_jump: | 3646 | case on_failure_jump: |
| 3665 | case on_failure_keep_string_jump: | 3647 | case on_failure_keep_string_jump: |
| 3666 | case on_failure_jump_exclusive: | ||
| 3667 | case on_failure_jump_loop: | 3648 | case on_failure_jump_loop: |
| 3668 | case on_failure_jump_smart: | 3649 | case on_failure_jump_smart: |
| 3669 | p++; | 3650 | p++; |
| @@ -3677,7 +3658,6 @@ re_compile_fastmap (bufp) | |||
| 3677 | 3658 | ||
| 3678 | case on_failure_jump: | 3659 | case on_failure_jump: |
| 3679 | case on_failure_keep_string_jump: | 3660 | case on_failure_keep_string_jump: |
| 3680 | case on_failure_jump_exclusive: | ||
| 3681 | case on_failure_jump_loop: | 3661 | case on_failure_jump_loop: |
| 3682 | case on_failure_jump_smart: | 3662 | case on_failure_jump_smart: |
| 3683 | handle_on_failure_jump: | 3663 | handle_on_failure_jump: |
| @@ -3987,11 +3967,9 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop) | |||
| 3987 | } | 3967 | } |
| 3988 | else /* Searching backwards. */ | 3968 | else /* Searching backwards. */ |
| 3989 | { | 3969 | { |
| 3990 | int room = (startpos >= size1 | 3970 | buf_ch = STRING_CHAR (d, (startpos >= size1 |
| 3991 | ? size2 + size1 - startpos | 3971 | ? size2 + size1 - startpos |
| 3992 | : size1 - startpos); | 3972 | : size1 - startpos)); |
| 3993 | |||
| 3994 | buf_ch = STRING_CHAR (d, room); | ||
| 3995 | if (RE_TRANSLATE_P (translate)) | 3973 | if (RE_TRANSLATE_P (translate)) |
| 3996 | buf_ch = RE_TRANSLATE (translate, buf_ch); | 3974 | buf_ch = RE_TRANSLATE (translate, buf_ch); |
| 3997 | 3975 | ||
| @@ -4153,11 +4131,54 @@ static int bcmp_translate (); | |||
| 4153 | 4131 | ||
| 4154 | /* Optimization routines. */ | 4132 | /* Optimization routines. */ |
| 4155 | 4133 | ||
| 4134 | /* If the operation is a match against one or more chars, | ||
| 4135 | return a pointer to the next operation, else return NULL. */ | ||
| 4136 | static unsigned char * | ||
| 4137 | skip_one_char (p) | ||
| 4138 | unsigned char *p; | ||
| 4139 | { | ||
| 4140 | switch (SWITCH_ENUM_CAST (*p++)) | ||
| 4141 | { | ||
| 4142 | case anychar: | ||
| 4143 | break; | ||
| 4144 | |||
| 4145 | case exactn: | ||
| 4146 | p += *p + 1; | ||
| 4147 | break; | ||
| 4148 | |||
| 4149 | case charset_not: | ||
| 4150 | case charset: | ||
| 4151 | if (CHARSET_RANGE_TABLE_EXISTS_P (p - 1)) | ||
| 4152 | { | ||
| 4153 | int mcnt; | ||
| 4154 | p = CHARSET_RANGE_TABLE (p - 1); | ||
| 4155 | EXTRACT_NUMBER_AND_INCR (mcnt, p); | ||
| 4156 | p = CHARSET_RANGE_TABLE_END (p, mcnt); | ||
| 4157 | } | ||
| 4158 | else | ||
| 4159 | p += 1 + CHARSET_BITMAP_SIZE (p - 1); | ||
| 4160 | break; | ||
| 4161 | |||
| 4162 | #ifdef emacs | ||
| 4163 | case syntaxspec: | ||
| 4164 | case notsyntaxspec: | ||
| 4165 | case categoryspec: | ||
| 4166 | case notcategoryspec: | ||
| 4167 | #endif /* emacs */ | ||
| 4168 | p++; | ||
| 4169 | break; | ||
| 4170 | |||
| 4171 | default: | ||
| 4172 | p = NULL; | ||
| 4173 | } | ||
| 4174 | return p; | ||
| 4175 | } | ||
| 4176 | |||
| 4177 | |||
| 4156 | /* Jump over non-matching operations. */ | 4178 | /* Jump over non-matching operations. */ |
| 4157 | static unsigned char * | 4179 | static unsigned char * |
| 4158 | skip_noops (p, pend, memory) | 4180 | skip_noops (p, pend) |
| 4159 | unsigned char *p, *pend; | 4181 | unsigned char *p, *pend; |
| 4160 | int memory; | ||
| 4161 | { | 4182 | { |
| 4162 | int mcnt; | 4183 | int mcnt; |
| 4163 | while (p < pend) | 4184 | while (p < pend) |
| @@ -4165,8 +4186,6 @@ skip_noops (p, pend, memory) | |||
| 4165 | switch (SWITCH_ENUM_CAST ((re_opcode_t) *p)) | 4186 | switch (SWITCH_ENUM_CAST ((re_opcode_t) *p)) |
| 4166 | { | 4187 | { |
| 4167 | case start_memory: | 4188 | case start_memory: |
| 4168 | if (!memory) | ||
| 4169 | return p; | ||
| 4170 | case stop_memory: | 4189 | case stop_memory: |
| 4171 | p += 2; break; | 4190 | p += 2; break; |
| 4172 | case no_op: | 4191 | case no_op: |
| @@ -4190,100 +4209,97 @@ mutually_exclusive_p (bufp, p1, p2) | |||
| 4190 | struct re_pattern_buffer *bufp; | 4209 | struct re_pattern_buffer *bufp; |
| 4191 | unsigned char *p1, *p2; | 4210 | unsigned char *p1, *p2; |
| 4192 | { | 4211 | { |
| 4193 | int multibyte = bufp->multibyte; | 4212 | re_opcode_t op2; |
| 4213 | const boolean multibyte = bufp->multibyte; | ||
| 4194 | unsigned char *pend = bufp->buffer + bufp->used; | 4214 | unsigned char *pend = bufp->buffer + bufp->used; |
| 4195 | 4215 | ||
| 4196 | assert (p1 >= bufp->buffer && p1 <= pend | 4216 | assert (p1 >= bufp->buffer && p1 < pend |
| 4197 | && p2 >= bufp->buffer && p2 <= pend); | 4217 | && p2 >= bufp->buffer && p2 <= pend); |
| 4198 | 4218 | ||
| 4199 | /* Skip over open/close-group commands. | 4219 | /* Skip over open/close-group commands. |
| 4200 | If what follows this loop is a ...+ construct, | 4220 | If what follows this loop is a ...+ construct, |
| 4201 | look at what begins its body, since we will have to | 4221 | look at what begins its body, since we will have to |
| 4202 | match at least one of that. */ | 4222 | match at least one of that. */ |
| 4203 | p2 = skip_noops (p2, pend, 1); | 4223 | p2 = skip_noops (p2, pend); |
| 4204 | /* The same skip can be done for p1, except that skipping over | 4224 | /* The same skip can be done for p1, except that this function |
| 4205 | start_memory is not a good idea (if there's a group inside | 4225 | is only used in the case where p1 is a simple match operator. */ |
| 4206 | the loop delimited by on_failure_jump_exclusive, then it | 4226 | /* p1 = skip_noops (p1, pend); */ |
| 4207 | can't optimize the push away (it still works properly, but | 4227 | |
| 4208 | slightly slower rather than faster)). */ | 4228 | assert (p1 >= bufp->buffer && p1 < pend |
| 4209 | p1 = skip_noops (p1, pend, 0); | 4229 | && p2 >= bufp->buffer && p2 <= pend); |
| 4210 | 4230 | ||
| 4211 | /* If we're at the end of the pattern, we can change. */ | 4231 | op2 = p2 == pend ? succeed : *p2; |
| 4212 | if (p2 == pend) | 4232 | |
| 4233 | switch (SWITCH_ENUM_CAST (op2)) | ||
| 4213 | { | 4234 | { |
| 4214 | switch (SWITCH_ENUM_CAST ((re_opcode_t) *p1)) | 4235 | case succeed: |
| 4236 | case endbuf: | ||
| 4237 | /* If we're at the end of the pattern, we can change. */ | ||
| 4238 | if (skip_one_char (p1)) | ||
| 4215 | { | 4239 | { |
| 4216 | case anychar: | ||
| 4217 | case charset_not: | ||
| 4218 | case charset: | ||
| 4219 | case exactn: | ||
| 4220 | DEBUG_PRINT1 (" End of pattern: fast loop.\n"); | 4240 | DEBUG_PRINT1 (" End of pattern: fast loop.\n"); |
| 4221 | return 1; | 4241 | return 1; |
| 4222 | default: | ||
| 4223 | return 0; | ||
| 4224 | } | 4242 | } |
| 4225 | } | 4243 | break; |
| 4226 | 4244 | ||
| 4227 | else if ((re_opcode_t) *p2 == exactn | 4245 | case endline: |
| 4228 | || (bufp->newline_anchor && (re_opcode_t) *p2 == endline)) | 4246 | if (!bufp->newline_anchor) |
| 4229 | { | 4247 | break; |
| 4230 | register unsigned int c | 4248 | /* Fallthrough */ |
| 4231 | = *p2 == (unsigned char) endline ? '\n' : p2[2]; | 4249 | case exactn: |
| 4250 | { | ||
| 4251 | register unsigned int c | ||
| 4252 | = (re_opcode_t) *p2 == endline ? '\n' | ||
| 4253 | : RE_STRING_CHAR(p2 + 2, pend - p2 - 2); | ||
| 4232 | 4254 | ||
| 4233 | if ((re_opcode_t) *p1 == exactn) | 4255 | if ((re_opcode_t) *p1 == exactn) |
| 4234 | { | 4256 | { |
| 4235 | if (!(multibyte /* && (c != '\n') */ | 4257 | if (c != RE_STRING_CHAR (p1 + 2, pend - p1 - 2)) |
| 4236 | && BASE_LEADING_CODE_P (c)) | 4258 | { |
| 4237 | ? c != p1[2] | 4259 | DEBUG_PRINT3 (" '%c' != '%c' => fast loop.\n", c, p1[2]); |
| 4238 | : (STRING_CHAR (&p2[2], pend - &p2[2]) | 4260 | return 1; |
| 4239 | != STRING_CHAR (&p1[2], pend - &p1[2]))) | 4261 | } |
| 4240 | { | 4262 | } |
| 4241 | DEBUG_PRINT3 (" '%c' != '%c' => fast loop.\n", c, p1[2]); | ||
| 4242 | return 1; | ||
| 4243 | } | ||
| 4244 | } | ||
| 4245 | 4263 | ||
| 4246 | else if ((re_opcode_t) *p1 == charset | 4264 | else if ((re_opcode_t) *p1 == charset |
| 4247 | || (re_opcode_t) *p1 == charset_not) | 4265 | || (re_opcode_t) *p1 == charset_not) |
| 4248 | { | 4266 | { |
| 4249 | int not = (re_opcode_t) *p1 == charset_not; | 4267 | int not = (re_opcode_t) *p1 == charset_not; |
| 4250 | 4268 | ||
| 4251 | if (multibyte /* && (c != '\n') */ | 4269 | /* Test if C is listed in charset (or charset_not) |
| 4252 | && BASE_LEADING_CODE_P (c)) | 4270 | at `p1'. */ |
| 4253 | c = STRING_CHAR (&p2[2], pend - &p2[2]); | 4271 | if (SINGLE_BYTE_CHAR_P (c)) |
| 4272 | { | ||
| 4273 | if (c < CHARSET_BITMAP_SIZE (p1) * BYTEWIDTH | ||
| 4274 | && p1[2 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) | ||
| 4275 | not = !not; | ||
| 4276 | } | ||
| 4277 | else if (CHARSET_RANGE_TABLE_EXISTS_P (p1)) | ||
| 4278 | CHARSET_LOOKUP_RANGE_TABLE (not, c, p1); | ||
| 4254 | 4279 | ||
| 4255 | /* Test if C is listed in charset (or charset_not) | 4280 | /* `not' is equal to 1 if c would match, which means |
| 4256 | at `p1'. */ | 4281 | that we can't change to pop_failure_jump. */ |
| 4257 | if (SINGLE_BYTE_CHAR_P (c)) | 4282 | if (!not) |
| 4258 | { | 4283 | { |
| 4259 | if (c < CHARSET_BITMAP_SIZE (p1) * BYTEWIDTH | 4284 | DEBUG_PRINT1 (" No match => fast loop.\n"); |
| 4260 | && p1[2 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) | 4285 | return 1; |
| 4261 | not = !not; | 4286 | } |
| 4262 | } | 4287 | } |
| 4263 | else if (CHARSET_RANGE_TABLE_EXISTS_P (p1)) | 4288 | else if ((re_opcode_t) *p1 == anychar |
| 4264 | CHARSET_LOOKUP_RANGE_TABLE (not, c, p1); | 4289 | && c == '\n') |
| 4290 | { | ||
| 4291 | DEBUG_PRINT1 (" . != \\n => fast loop.\n"); | ||
| 4292 | return 1; | ||
| 4293 | } | ||
| 4294 | } | ||
| 4295 | break; | ||
| 4265 | 4296 | ||
| 4266 | /* `not' is equal to 1 if c would match, which means | 4297 | case charset: |
| 4267 | that we can't change to pop_failure_jump. */ | 4298 | case charset_not: |
| 4268 | if (!not) | 4299 | { |
| 4269 | { | 4300 | if ((re_opcode_t) *p1 == exactn) |
| 4270 | DEBUG_PRINT1 (" No match => fast loop.\n"); | 4301 | /* Reuse the code above. */ |
| 4271 | return 1; | 4302 | return mutually_exclusive_p (bufp, p2, p1); |
| 4272 | } | ||
| 4273 | } | ||
| 4274 | else if ((re_opcode_t) *p1 == anychar | ||
| 4275 | && c == '\n') | ||
| 4276 | { | ||
| 4277 | DEBUG_PRINT1 (" . != \\n => fast loop.\n"); | ||
| 4278 | return 1; | ||
| 4279 | } | ||
| 4280 | } | ||
| 4281 | else if ((re_opcode_t) *p2 == charset | ||
| 4282 | || (re_opcode_t) *p2 == charset_not) | ||
| 4283 | { | ||
| 4284 | if ((re_opcode_t) *p1 == exactn) | ||
| 4285 | /* Reuse the code above. */ | ||
| 4286 | return mutually_exclusive_p (bufp, p2, p1); | ||
| 4287 | 4303 | ||
| 4288 | 4304 | ||
| 4289 | /* It is hard to list up all the character in charset | 4305 | /* It is hard to list up all the character in charset |
| @@ -4332,13 +4348,39 @@ mutually_exclusive_p (bufp, p1, p2) | |||
| 4332 | && ((p2[2 + idx] & ~ p1[2 + idx]) == 0)))) | 4348 | && ((p2[2 + idx] & ~ p1[2 + idx]) == 0)))) |
| 4333 | break; | 4349 | break; |
| 4334 | 4350 | ||
| 4335 | if (idx == p2[1]) | 4351 | if (idx == p2[1]) |
| 4336 | { | 4352 | { |
| 4337 | DEBUG_PRINT1 (" No match => fast loop.\n"); | 4353 | DEBUG_PRINT1 (" No match => fast loop.\n"); |
| 4338 | return 1; | 4354 | return 1; |
| 4339 | } | 4355 | } |
| 4340 | } | 4356 | } |
| 4341 | } | 4357 | } |
| 4358 | } | ||
| 4359 | |||
| 4360 | #ifdef emacs | ||
| 4361 | case wordend: | ||
| 4362 | case notsyntaxspec: | ||
| 4363 | return ((re_opcode_t) *p1 == syntaxspec | ||
| 4364 | && p1[1] == (op2 == wordend ? Sword : p2[1])); | ||
| 4365 | |||
| 4366 | case wordbeg: | ||
| 4367 | case syntaxspec: | ||
| 4368 | return ((re_opcode_t) *p1 == notsyntaxspec | ||
| 4369 | && p1[1] == (op2 == wordend ? Sword : p2[1])); | ||
| 4370 | |||
| 4371 | case wordbound: | ||
| 4372 | return (((re_opcode_t) *p1 == notsyntaxspec | ||
| 4373 | || (re_opcode_t) *p1 == syntaxspec) | ||
| 4374 | && p1[1] == Sword); | ||
| 4375 | |||
| 4376 | case categoryspec: | ||
| 4377 | return ((re_opcode_t) *p1 == notcategoryspec && p1[1] == p2[1]); | ||
| 4378 | case notcategoryspec: | ||
| 4379 | return ((re_opcode_t) *p1 == categoryspec && p1[1] == p2[1]); | ||
| 4380 | #endif /* emacs */ | ||
| 4381 | |||
| 4382 | default: | ||
| 4383 | ; | ||
| 4342 | } | 4384 | } |
| 4343 | 4385 | ||
| 4344 | /* Safe default. */ | 4386 | /* Safe default. */ |
| @@ -5157,32 +5199,9 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5157 | PUSH_FAILURE_POINT (p - 3, NULL); | 5199 | PUSH_FAILURE_POINT (p - 3, NULL); |
| 5158 | break; | 5200 | break; |
| 5159 | 5201 | ||
| 5160 | case on_failure_jump_exclusive: | ||
| 5161 | EXTRACT_NUMBER_AND_INCR (mcnt, p); | ||
| 5162 | DEBUG_PRINT3 ("EXECUTING on_failure_jump_exclusive %d (to %p):\n", | ||
| 5163 | mcnt, p + mcnt); | ||
| 5164 | |||
| 5165 | if (! FAIL_STACK_EMPTY () | ||
| 5166 | && FAILURE_PAT (TOP_FAILURE_HANDLE ()) == (p - 3) | ||
| 5167 | && fail_stack.avail == fail_stack.frame) | ||
| 5168 | { | ||
| 5169 | /* We are trying to push failure F2 onto the stack but there | ||
| 5170 | is already a failure F1 pushed from the same instruction. | ||
| 5171 | Between F1 and now, something has matched (else this is an | ||
| 5172 | improper use of on_failure_jump_exclusive), so that we know | ||
| 5173 | that the fail-destination of F1 cannot match, hence we can | ||
| 5174 | pop F1 before pushing F2. Instead of doing this pop/push, | ||
| 5175 | we manually turn F1 into F2. | ||
| 5176 | `fail_stack.avail == fail_stack.frame' makes sure | ||
| 5177 | that popping F1 doesn't involve registers, else | ||
| 5178 | this optimization cannot be done so trivially. */ | ||
| 5179 | assert (FAILURE_STR (TOP_FAILURE_HANDLE ()) != d); | ||
| 5180 | FAILURE_STR (TOP_FAILURE_HANDLE ()) = d; | ||
| 5181 | } | ||
| 5182 | else | ||
| 5183 | PUSH_FAILURE_POINT (p - 3, d); | ||
| 5184 | break; | ||
| 5185 | 5202 | ||
| 5203 | /* Simple loop detecting on_failure_jump: just check on the | ||
| 5204 | failure stack if the same spot was already hit earlier. */ | ||
| 5186 | case on_failure_jump_loop: | 5205 | case on_failure_jump_loop: |
| 5187 | on_failure: | 5206 | on_failure: |
| 5188 | EXTRACT_NUMBER_AND_INCR (mcnt, p); | 5207 | EXTRACT_NUMBER_AND_INCR (mcnt, p); |
| @@ -5215,13 +5234,13 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5215 | PUSH_FAILURE_POINT (p -3, d); | 5234 | PUSH_FAILURE_POINT (p -3, d); |
| 5216 | break; | 5235 | break; |
| 5217 | 5236 | ||
| 5218 | /* This operation is used for greedy * and +. | 5237 | /* This operation is used for greedy *. |
| 5219 | Compare the beginning of the repeat with what in the | 5238 | Compare the beginning of the repeat with what in the |
| 5220 | pattern follows its end. If we can establish that there | 5239 | pattern follows its end. If we can establish that there |
| 5221 | is nothing that they would both match, i.e., that we | 5240 | is nothing that they would both match, i.e., that we |
| 5222 | would have to backtrack because of (as in, e.g., `a*a') | 5241 | would have to backtrack because of (as in, e.g., `a*a') |
| 5223 | then we can use a non-backtracking loop based on | 5242 | then we can use a non-backtracking loop based on |
| 5224 | on_failure_jump_exclusive instead of on_failure_jump_loop. */ | 5243 | on_failure_keep_string_jump instead of on_failure_jump. */ |
| 5225 | case on_failure_jump_smart: | 5244 | case on_failure_jump_smart: |
| 5226 | QUIT; | 5245 | QUIT; |
| 5227 | EXTRACT_NUMBER_AND_INCR (mcnt, p); | 5246 | EXTRACT_NUMBER_AND_INCR (mcnt, p); |
| @@ -5234,18 +5253,25 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5234 | p -= 3; /* Reset so that we will re-execute the | 5253 | p -= 3; /* Reset so that we will re-execute the |
| 5235 | instruction once it's been changed. */ | 5254 | instruction once it's been changed. */ |
| 5236 | 5255 | ||
| 5256 | EXTRACT_NUMBER (mcnt, p2 - 2); | ||
| 5257 | |||
| 5258 | /* Ensure this is a indeed the trivial kind of loop | ||
| 5259 | we are expecting. */ | ||
| 5260 | assert (skip_one_char (p1) == p2 - 3); | ||
| 5261 | assert ((re_opcode_t) p2[-3] == jump && p2 + mcnt == p); | ||
| 5237 | DEBUG_STATEMENT (debug += 2); | 5262 | DEBUG_STATEMENT (debug += 2); |
| 5238 | if (mutually_exclusive_p (bufp, p1, p2)) | 5263 | if (mutually_exclusive_p (bufp, p1, p2)) |
| 5239 | { | 5264 | { |
| 5240 | /* Use a fast `on_failure_keep_string_jump' loop. */ | 5265 | /* Use a fast `on_failure_keep_string_jump' loop. */ |
| 5241 | *p = (unsigned char) on_failure_jump_exclusive; | 5266 | DEBUG_PRINT1 (" smart exclusive => fast loop.\n"); |
| 5242 | /* STORE_NUMBER (p2 - 2, mcnt + 3); */ | 5267 | *p = (unsigned char) on_failure_keep_string_jump; |
| 5268 | STORE_NUMBER (p2 - 2, mcnt + 3); | ||
| 5243 | } | 5269 | } |
| 5244 | else | 5270 | else |
| 5245 | { | 5271 | { |
| 5246 | /* Default to a safe `on_failure_jump' loop. */ | 5272 | /* Default to a safe `on_failure_jump' loop. */ |
| 5247 | DEBUG_PRINT1 (" smart default => slow loop.\n"); | 5273 | DEBUG_PRINT1 (" smart default => slow loop.\n"); |
| 5248 | *p = (unsigned char) on_failure_jump_loop; | 5274 | *p = (unsigned char) on_failure_jump; |
| 5249 | } | 5275 | } |
| 5250 | DEBUG_STATEMENT (debug -= 2); | 5276 | DEBUG_STATEMENT (debug -= 2); |
| 5251 | } | 5277 | } |
| @@ -5600,14 +5626,6 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5600 | assert (str == NULL); | 5626 | assert (str == NULL); |
| 5601 | goto continue_failure_jump; | 5627 | goto continue_failure_jump; |
| 5602 | 5628 | ||
| 5603 | case on_failure_jump_exclusive: | ||
| 5604 | /* If something has matched, the alternative will not match, | ||
| 5605 | so we might as well keep popping right away. */ | ||
| 5606 | if (0 /* d != str && d != string2 */) /* Don't bother. -sm */ | ||
| 5607 | /* (d == string2 && str == end1) => (d =~ str) */ | ||
| 5608 | goto fail; | ||
| 5609 | /* Fallthrough */ | ||
| 5610 | |||
| 5611 | case on_failure_jump_loop: | 5629 | case on_failure_jump_loop: |
| 5612 | case on_failure_jump: | 5630 | case on_failure_jump: |
| 5613 | case succeed_n: | 5631 | case succeed_n: |