aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorPaul Eggert2018-08-05 18:41:20 -0700
committerPaul Eggert2018-08-05 19:36:10 -0700
commit03dfb6061bfd78d74564d678213ef95728a5f9eb (patch)
treeab203ec6be71c599f7fb3bccdf5a8aa4832c3372 /src
parent3a6abe65c1324361bf0efcb65df61d22a39cfaaf (diff)
downloademacs-03dfb6061bfd78d74564d678213ef95728a5f9eb.tar.gz
emacs-03dfb6061bfd78d74564d678213ef95728a5f9eb.zip
Simplify regex-emacs by assuming Emacs syntax
* src/regex-emacs.c (reg_syntax_t) (RE_BACKSLASH_ESCAPE_IN_LISTS, RE_BK_PLUS_QM) (RE_CHAR_CLASSES, RE_CONTEXT_INDEP_ANCHORS) (RE_CONTEXT_INDEP_OPS, RE_CONTEXT_INVALID_OPS) (RE_DOT_NEWLINE, RE_DOT_NOT_NULL, RE_HAT_LISTS_NOT_NEWLINE) (RE_INTERVALS, RE_LIMITED_OPS, RE_NEWLINE_ALT) (RE_NO_BK_BRACES, RE_NO_BK_PARENS, RE_NO_BK_REFS) (RE_NO_BK_VBAR, RE_NO_EMPTY_RANGES) (RE_UNMATCHED_RIGHT_PAREN_ORD, RE_NO_POSIX_BACKTRACKING) (RE_NO_GNU_OPS, RE_FRUGAL, RE_SHY_GROUPS) (RE_NO_NEWLINE_ANCHOR, RE_SYNTAX_EMACS, RE_TRANSLATE_P): Remove. All uses removed and resulting code simplified. (TRANSLATE): Treat nil as an absent translation table, not zero. All uses changed.
Diffstat (limited to 'src')
-rw-r--r--src/regex-emacs.c493
-rw-r--r--src/search.c4
2 files changed, 70 insertions, 427 deletions
diff --git a/src/regex-emacs.c b/src/regex-emacs.c
index eb5970ffcf1..1ceb67ad297 100644
--- a/src/regex-emacs.c
+++ b/src/regex-emacs.c
@@ -50,133 +50,6 @@
50 ints. But Emacs only runs on 32 bit platforms anyway. */ 50 ints. But Emacs only runs on 32 bit platforms anyway. */
51#define RE_DUP_MAX (0xffff) 51#define RE_DUP_MAX (0xffff)
52 52
53/* The following bits are used to determine the regexp syntax we
54 recognize. The set/not-set meanings where historically chosen so
55 that Emacs syntax had the value 0.
56 The bits are given in alphabetical order, and
57 the definitions shifted by one from the previous bit; thus, when we
58 add or remove a bit, only one other definition need change. */
59typedef unsigned long reg_syntax_t;
60
61/* If this bit is not set, then \ inside a bracket expression is literal.
62 If set, then such a \ quotes the following character. */
63#define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1)
64
65/* If this bit is not set, then + and ? are operators, and \+ and \? are
66 literals.
67 If set, then \+ and \? are operators and + and ? are literals. */
68#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
69
70/* If this bit is set, then character classes are supported. They are:
71 [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
72 [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
73 If not set, then character classes are not supported. */
74#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
75
76/* If this bit is set, then ^ and $ are always anchors (outside bracket
77 expressions, of course).
78 If this bit is not set, then it depends:
79 ^ is an anchor if it is at the beginning of a regular
80 expression or after an open-group or an alternation operator;
81 $ is an anchor if it is at the end of a regular expression, or
82 before a close-group or an alternation operator.
83
84 This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
85 POSIX draft 11.2 says that * etc. in leading positions is undefined.
86 We already implemented a previous draft which made those constructs
87 invalid, though, so we haven't changed the code back. */
88#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
89
90/* If this bit is set, then special characters are always special
91 regardless of where they are in the pattern.
92 If this bit is not set, then special characters are special only in
93 some contexts; otherwise they are ordinary. Specifically,
94 * + ? and intervals are only special when not after the beginning,
95 open-group, or alternation operator. */
96#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
97
98/* If this bit is set, then *, +, ?, and { cannot be first in an re or
99 immediately after an alternation or begin-group operator. */
100#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
101
102/* If this bit is set, then . matches newline.
103 If not set, then it doesn't. */
104#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
105
106/* If this bit is set, then . doesn't match NUL.
107 If not set, then it does. */
108#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
109
110/* If this bit is set, nonmatching lists [^...] do not match newline.
111 If not set, they do. */
112#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
113
114/* If this bit is set, either \{...\} or {...} defines an
115 interval, depending on RE_NO_BK_BRACES.
116 If not set, \{, \}, {, and } are literals. */
117#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
118
119/* If this bit is set, +, ? and | aren't recognized as operators.
120 If not set, they are. */
121#define RE_LIMITED_OPS (RE_INTERVALS << 1)
122
123/* If this bit is set, newline is an alternation operator.
124 If not set, newline is literal. */
125#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
126
127/* If this bit is set, then `{...}' defines an interval, and \{ and \}
128 are literals.
129 If not set, then `\{...\}' defines an interval. */
130#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
131
132/* If this bit is set, (...) defines a group, and \( and \) are literals.
133 If not set, \(...\) defines a group, and ( and ) are literals. */
134#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
135
136/* If this bit is set, then \<digit> matches <digit>.
137 If not set, then \<digit> is a back-reference. */
138#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
139
140/* If this bit is set, then | is an alternation operator, and \| is literal.
141 If not set, then \| is an alternation operator, and | is literal. */
142#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
143
144/* If this bit is set, then an ending range point collating higher
145 than the starting range point, as in [z-a], is invalid.
146 If not set, then when ending range point collates higher than the
147 starting range point, the range is ignored. */
148#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
149
150/* If this bit is set, then an unmatched ) is ordinary.
151 If not set, then an unmatched ) is invalid. */
152#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
153
154/* If this bit is set, succeed as soon as we match the whole pattern,
155 without further backtracking. */
156#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
157
158/* If this bit is set, do not process the GNU regex operators.
159 If not set, then the GNU regex operators are recognized. */
160#define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1)
161
162/* If this bit is set, then *?, +? and ?? match non greedily. */
163#define RE_FRUGAL (RE_NO_GNU_OPS << 1)
164
165/* If this bit is set, then (?:...) is treated as a shy group. */
166#define RE_SHY_GROUPS (RE_FRUGAL << 1)
167
168/* If this bit is set, ^ and $ only match at beg/end of buffer. */
169#define RE_NO_NEWLINE_ANCHOR (RE_SHY_GROUPS << 1)
170
171/* This global variable defines the particular regexp syntax to use (for
172 some interfaces). When a regexp is compiled, the syntax used is
173 stored in the pattern buffer, so changing this does not affect
174 already-compiled regexps. */
175/* extern reg_syntax_t re_syntax_options; */
176/* Define combinations of the above bits for the standard possibilities. */
177#define RE_SYNTAX_EMACS \
178 (RE_CHAR_CLASSES | RE_INTERVALS | RE_SHY_GROUPS | RE_FRUGAL)
179
180/* Make syntax table lookup grant data in gl_state. */ 53/* Make syntax table lookup grant data in gl_state. */
181#define SYNTAX(c) syntax_property (c, 1) 54#define SYNTAX(c) syntax_property (c, 1)
182 55
@@ -1299,10 +1172,8 @@ static void insert_op1 (re_opcode_t op, unsigned char *loc,
1299 int arg, unsigned char *end); 1172 int arg, unsigned char *end);
1300static void insert_op2 (re_opcode_t op, unsigned char *loc, 1173static void insert_op2 (re_opcode_t op, unsigned char *loc,
1301 int arg1, int arg2, unsigned char *end); 1174 int arg1, int arg2, unsigned char *end);
1302static bool at_begline_loc_p (re_char *pattern, re_char *p, 1175static bool at_begline_loc_p (re_char *pattern, re_char *p);
1303 reg_syntax_t syntax); 1176static bool at_endline_loc_p (re_char *p, re_char *pend);
1304static bool at_endline_loc_p (re_char *p, re_char *pend,
1305 reg_syntax_t syntax);
1306static re_char *skip_one_char (re_char *p); 1177static re_char *skip_one_char (re_char *p);
1307static int analyze_first (re_char *p, re_char *pend, 1178static int analyze_first (re_char *p, re_char *pend,
1308 char *fastmap, const int multibyte); 1179 char *fastmap, const int multibyte);
@@ -1319,15 +1190,7 @@ static int analyze_first (re_char *p, re_char *pend,
1319 1190
1320 1191
1321#define RE_TRANSLATE(TBL, C) char_table_translate (TBL, C) 1192#define RE_TRANSLATE(TBL, C) char_table_translate (TBL, C)
1322#define RE_TRANSLATE_P(TBL) (!EQ (TBL, make_number (0))) 1193#define TRANSLATE(d) (!NILP (translate) ? RE_TRANSLATE (translate, d) : (d))
1323
1324/* If `translate' is non-zero, return translate[D], else just D. We
1325 cast the subscript to translate because some data is declared as
1326 `char *', to avoid warnings when a string constant is passed. But
1327 when we use a character as a subscript we must make it unsigned. */
1328#define TRANSLATE(d) \
1329 (RE_TRANSLATE_P (translate) ? RE_TRANSLATE (translate, (d)) : (d))
1330
1331 1194
1332/* Macros for outputting the compiled pattern into `buffer'. */ 1195/* Macros for outputting the compiled pattern into `buffer'. */
1333 1196
@@ -1847,8 +1710,6 @@ regex_compile (re_char *pattern, size_t size,
1847 const char *whitespace_regexp, 1710 const char *whitespace_regexp,
1848 struct re_pattern_buffer *bufp) 1711 struct re_pattern_buffer *bufp)
1849{ 1712{
1850 reg_syntax_t syntax = RE_SYNTAX_EMACS;
1851
1852 /* We fetch characters from PATTERN here. */ 1713 /* We fetch characters from PATTERN here. */
1853 int c, c1; 1714 int c, c1;
1854 1715
@@ -2011,51 +1872,24 @@ regex_compile (re_char *pattern, size_t size,
2011 } 1872 }
2012 1873
2013 case '^': 1874 case '^':
2014 { 1875 if (! (p == pattern + 1 || at_begline_loc_p (pattern, p)))
2015 if ( /* If at start of pattern, it's an operator. */ 1876 goto normal_char;
2016 p == pattern + 1 1877 BUF_PUSH (begline);
2017 /* If context independent, it's an operator. */
2018 || syntax & RE_CONTEXT_INDEP_ANCHORS
2019 /* Otherwise, depends on what's come before. */
2020 || at_begline_loc_p (pattern, p, syntax))
2021 BUF_PUSH ((syntax & RE_NO_NEWLINE_ANCHOR) ? begbuf : begline);
2022 else
2023 goto normal_char;
2024 }
2025 break; 1878 break;
2026 1879
2027
2028 case '$': 1880 case '$':
2029 { 1881 if (! (p == pend || at_endline_loc_p (p, pend)))
2030 if ( /* If at end of pattern, it's an operator. */ 1882 goto normal_char;
2031 p == pend 1883 BUF_PUSH (endline);
2032 /* If context independent, it's an operator. */ 1884 break;
2033 || syntax & RE_CONTEXT_INDEP_ANCHORS
2034 /* Otherwise, depends on what's next. */
2035 || at_endline_loc_p (p, pend, syntax))
2036 BUF_PUSH ((syntax & RE_NO_NEWLINE_ANCHOR) ? endbuf : endline);
2037 else
2038 goto normal_char;
2039 }
2040 break;
2041 1885
2042 1886
2043 case '+': 1887 case '+':
2044 case '?': 1888 case '?':
2045 if ((syntax & RE_BK_PLUS_QM)
2046 || (syntax & RE_LIMITED_OPS))
2047 goto normal_char;
2048 FALLTHROUGH;
2049 case '*': 1889 case '*':
2050 handle_plus:
2051 /* If there is no previous pattern... */ 1890 /* If there is no previous pattern... */
2052 if (!laststart) 1891 if (!laststart)
2053 { 1892 goto normal_char;
2054 if (syntax & RE_CONTEXT_INVALID_OPS)
2055 FREE_STACK_RETURN (REG_BADRPT);
2056 else if (!(syntax & RE_CONTEXT_INDEP_OPS))
2057 goto normal_char;
2058 }
2059 1893
2060 { 1894 {
2061 /* 1 means zero (many) matches is allowed. */ 1895 /* 1 means zero (many) matches is allowed. */
@@ -2069,8 +1903,7 @@ regex_compile (re_char *pattern, size_t size,
2069 1903
2070 for (;;) 1904 for (;;)
2071 { 1905 {
2072 if ((syntax & RE_FRUGAL) 1906 if (c == '?' && (zero_times_ok || many_times_ok))
2073 && c == '?' && (zero_times_ok || many_times_ok))
2074 greedy = false; 1907 greedy = false;
2075 else 1908 else
2076 { 1909 {
@@ -2078,25 +1911,10 @@ regex_compile (re_char *pattern, size_t size,
2078 many_times_ok |= c != '?'; 1911 many_times_ok |= c != '?';
2079 } 1912 }
2080 1913
2081 if (p == pend) 1914 if (! (p < pend && (*p == '*' || *p == '+' || *p == '?')))
2082 break;
2083 else if (*p == '*'
2084 || (!(syntax & RE_BK_PLUS_QM)
2085 && (*p == '+' || *p == '?')))
2086 ;
2087 else if (syntax & RE_BK_PLUS_QM && *p == '\\')
2088 {
2089 if (p+1 == pend)
2090 FREE_STACK_RETURN (REG_EESCAPE);
2091 if (p[1] == '+' || p[1] == '?')
2092 PATFETCH (c); /* Gobble up the backslash. */
2093 else
2094 break;
2095 }
2096 else
2097 break; 1915 break;
2098 /* If we get here, we found another repeat character. */ 1916 /* If we get here, we found another repeat character. */
2099 PATFETCH (c); 1917 c = *p++;
2100 } 1918 }
2101 1919
2102 /* Star, etc. applied to an empty pattern is equivalent 1920 /* Star, etc. applied to an empty pattern is equivalent
@@ -2228,24 +2046,18 @@ regex_compile (re_char *pattern, size_t size,
2228 /* Clear the whole map. */ 2046 /* Clear the whole map. */
2229 memset (b, 0, (1 << BYTEWIDTH) / BYTEWIDTH); 2047 memset (b, 0, (1 << BYTEWIDTH) / BYTEWIDTH);
2230 2048
2231 /* charset_not matches newline according to a syntax bit. */
2232 if ((re_opcode_t) b[-2] == charset_not
2233 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
2234 SET_LIST_BIT ('\n');
2235
2236 /* Read in characters and ranges, setting map bits. */ 2049 /* Read in characters and ranges, setting map bits. */
2237 for (;;) 2050 for (;;)
2238 { 2051 {
2239 const unsigned char *p2 = p; 2052 const unsigned char *p2 = p;
2240 re_wctype_t cc;
2241 int ch; 2053 int ch;
2242 2054
2243 if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 2055 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2244 2056
2245 /* See if we're at the beginning of a possible character 2057 /* See if we're at the beginning of a possible character
2246 class. */ 2058 class. */
2247 if (syntax & RE_CHAR_CLASSES && 2059 re_wctype_t cc = re_wctype_parse (&p, pend - p);
2248 (cc = re_wctype_parse(&p, pend - p)) != -1) 2060 if (cc != -1)
2249 { 2061 {
2250 if (cc == 0) 2062 if (cc == 0)
2251 FREE_STACK_RETURN (REG_ECTYPE); 2063 FREE_STACK_RETURN (REG_ECTYPE);
@@ -2297,21 +2109,11 @@ regex_compile (re_char *pattern, size_t size,
2297 (let ((case-fold-search t)) (string-match "[A-_]" "A")) */ 2109 (let ((case-fold-search t)) (string-match "[A-_]" "A")) */
2298 PATFETCH (c); 2110 PATFETCH (c);
2299 2111
2300 /* \ might escape characters inside [...] and [^...]. */ 2112 /* Could be the end of the bracket expression. If it's
2301 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') 2113 not (i.e., when the bracket expression is `[]' so
2302 { 2114 far), the ']' character bit gets set way below. */
2303 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); 2115 if (c == ']' && p2 != p1)
2304 2116 break;
2305 PATFETCH (c);
2306 }
2307 else
2308 {
2309 /* Could be the end of the bracket expression. If it's
2310 not (i.e., when the bracket expression is `[]' so
2311 far), the ']' character bit gets set way below. */
2312 if (c == ']' && p2 != p1)
2313 break;
2314 }
2315 2117
2316 if (p < pend && p[0] == '-' && p[1] != ']') 2118 if (p < pend && p[0] == '-' && p[1] != ']')
2317 { 2119 {
@@ -2332,13 +2134,7 @@ regex_compile (re_char *pattern, size_t size,
2332 /* Range from C to C. */ 2134 /* Range from C to C. */
2333 c1 = c; 2135 c1 = c;
2334 2136
2335 if (c > c1) 2137 if (c <= c1)
2336 {
2337 if (syntax & RE_NO_EMPTY_RANGES)
2338 FREE_STACK_RETURN (REG_ERANGEX);
2339 /* Else, repeat the loop. */
2340 }
2341 else
2342 { 2138 {
2343 if (c < 128) 2139 if (c < 128)
2344 { 2140 {
@@ -2348,24 +2144,17 @@ regex_compile (re_char *pattern, size_t size,
2348 if (CHAR_BYTE8_P (c1)) 2144 if (CHAR_BYTE8_P (c1))
2349 c = BYTE8_TO_CHAR (128); 2145 c = BYTE8_TO_CHAR (128);
2350 } 2146 }
2351 if (c <= c1) 2147 if (CHAR_BYTE8_P (c))
2352 { 2148 {
2353 if (CHAR_BYTE8_P (c)) 2149 c = CHAR_TO_BYTE8 (c);
2354 { 2150 c1 = CHAR_TO_BYTE8 (c1);
2355 c = CHAR_TO_BYTE8 (c); 2151 for (; c <= c1; c++)
2356 c1 = CHAR_TO_BYTE8 (c1); 2152 SET_LIST_BIT (c);
2357 for (; c <= c1; c++)
2358 SET_LIST_BIT (c);
2359 }
2360 else if (multibyte)
2361 {
2362 SETUP_MULTIBYTE_RANGE (range_table_work, c, c1);
2363 }
2364 else
2365 {
2366 SETUP_UNIBYTE_RANGE (range_table_work, c, c1);
2367 }
2368 } 2153 }
2154 else if (multibyte)
2155 SETUP_MULTIBYTE_RANGE (range_table_work, c, c1);
2156 else
2157 SETUP_UNIBYTE_RANGE (range_table_work, c, c1);
2369 } 2158 }
2370 } 2159 }
2371 2160
@@ -2403,41 +2192,6 @@ regex_compile (re_char *pattern, size_t size,
2403 break; 2192 break;
2404 2193
2405 2194
2406 case '(':
2407 if (syntax & RE_NO_BK_PARENS)
2408 goto handle_open;
2409 else
2410 goto normal_char;
2411
2412
2413 case ')':
2414 if (syntax & RE_NO_BK_PARENS)
2415 goto handle_close;
2416 else
2417 goto normal_char;
2418
2419
2420 case '\n':
2421 if (syntax & RE_NEWLINE_ALT)
2422 goto handle_alt;
2423 else
2424 goto normal_char;
2425
2426
2427 case '|':
2428 if (syntax & RE_NO_BK_VBAR)
2429 goto handle_alt;
2430 else
2431 goto normal_char;
2432
2433
2434 case '{':
2435 if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
2436 goto handle_interval;
2437 else
2438 goto normal_char;
2439
2440
2441 case '\\': 2195 case '\\':
2442 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); 2196 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2443 2197
@@ -2449,17 +2203,13 @@ regex_compile (re_char *pattern, size_t size,
2449 switch (c) 2203 switch (c)
2450 { 2204 {
2451 case '(': 2205 case '(':
2452 if (syntax & RE_NO_BK_PARENS)
2453 goto normal_backslash;
2454
2455 handle_open:
2456 { 2206 {
2457 int shy = 0; 2207 int shy = 0;
2458 regnum_t regnum = 0; 2208 regnum_t regnum = 0;
2459 if (p+1 < pend) 2209 if (p+1 < pend)
2460 { 2210 {
2461 /* Look for a special (?...) construct */ 2211 /* Look for a special (?...) construct */
2462 if ((syntax & RE_SHY_GROUPS) && *p == '?') 2212 if (*p == '?')
2463 { 2213 {
2464 PATFETCH (c); /* Gobble up the '?'. */ 2214 PATFETCH (c); /* Gobble up the '?'. */
2465 while (!shy) 2215 while (!shy)
@@ -2540,27 +2290,14 @@ regex_compile (re_char *pattern, size_t size,
2540 } 2290 }
2541 2291
2542 case ')': 2292 case ')':
2543 if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
2544
2545 if (COMPILE_STACK_EMPTY) 2293 if (COMPILE_STACK_EMPTY)
2546 { 2294 FREE_STACK_RETURN (REG_ERPAREN);
2547 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2548 goto normal_backslash;
2549 else
2550 FREE_STACK_RETURN (REG_ERPAREN);
2551 }
2552 2295
2553 handle_close:
2554 FIXUP_ALT_JUMP (); 2296 FIXUP_ALT_JUMP ();
2555 2297
2556 /* See similar code for backslashed left paren above. */ 2298 /* See similar code for backslashed left paren above. */
2557 if (COMPILE_STACK_EMPTY) 2299 if (COMPILE_STACK_EMPTY)
2558 { 2300 FREE_STACK_RETURN (REG_ERPAREN);
2559 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2560 goto normal_char;
2561 else
2562 FREE_STACK_RETURN (REG_ERPAREN);
2563 }
2564 2301
2565 /* Since we just checked for an empty stack above, this 2302 /* Since we just checked for an empty stack above, this
2566 ``can't happen''. */ 2303 ``can't happen''. */
@@ -2593,12 +2330,6 @@ regex_compile (re_char *pattern, size_t size,
2593 2330
2594 2331
2595 case '|': /* `\|'. */ 2332 case '|': /* `\|'. */
2596 if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
2597 goto normal_backslash;
2598 handle_alt:
2599 if (syntax & RE_LIMITED_OPS)
2600 goto normal_char;
2601
2602 /* Insert before the previous alternative a jump which 2333 /* Insert before the previous alternative a jump which
2603 jumps to this alternative if the former fails. */ 2334 jumps to this alternative if the former fails. */
2604 GET_BUFFER_SPACE (3); 2335 GET_BUFFER_SPACE (3);
@@ -2637,17 +2368,7 @@ regex_compile (re_char *pattern, size_t size,
2637 2368
2638 2369
2639 case '{': 2370 case '{':
2640 /* If \{ is a literal. */
2641 if (!(syntax & RE_INTERVALS)
2642 /* If we're at `\{' and it's not the open-interval
2643 operator. */
2644 || (syntax & RE_NO_BK_BRACES))
2645 goto normal_backslash;
2646
2647 handle_interval:
2648 { 2371 {
2649 /* If got here, then the syntax allows intervals. */
2650
2651 /* At least (most) this many matches must be made. */ 2372 /* At least (most) this many matches must be made. */
2652 int lower_bound = 0, upper_bound = -1; 2373 int lower_bound = 0, upper_bound = -1;
2653 2374
@@ -2662,33 +2383,19 @@ regex_compile (re_char *pattern, size_t size,
2662 upper_bound = lower_bound; 2383 upper_bound = lower_bound;
2663 2384
2664 if (lower_bound < 0 2385 if (lower_bound < 0
2665 || (0 <= upper_bound && upper_bound < lower_bound)) 2386 || (0 <= upper_bound && upper_bound < lower_bound)
2387 || c != '\\')
2666 FREE_STACK_RETURN (REG_BADBR); 2388 FREE_STACK_RETURN (REG_BADBR);
2667 2389 if (p == pend)
2668 if (!(syntax & RE_NO_BK_BRACES)) 2390 FREE_STACK_RETURN (REG_EESCAPE);
2669 { 2391 if (*p++ != '}')
2670 if (c != '\\')
2671 FREE_STACK_RETURN (REG_BADBR);
2672 if (p == pend)
2673 FREE_STACK_RETURN (REG_EESCAPE);
2674 PATFETCH (c);
2675 }
2676
2677 if (c != '}')
2678 FREE_STACK_RETURN (REG_BADBR); 2392 FREE_STACK_RETURN (REG_BADBR);
2679 2393
2680 /* We just parsed a valid interval. */ 2394 /* We just parsed a valid interval. */
2681 2395
2682 /* If it's invalid to have no preceding re. */ 2396 /* If it's invalid to have no preceding re. */
2683 if (!laststart) 2397 if (!laststart)
2684 { 2398 goto unfetch_interval;
2685 if (syntax & RE_CONTEXT_INVALID_OPS)
2686 FREE_STACK_RETURN (REG_BADRPT);
2687 else if (syntax & RE_CONTEXT_INDEP_OPS)
2688 laststart = b;
2689 else
2690 goto unfetch_interval;
2691 }
2692 2399
2693 if (upper_bound == 0) 2400 if (upper_bound == 0)
2694 /* If the upper bound is zero, just drop the sub pattern 2401 /* If the upper bound is zero, just drop the sub pattern
@@ -2793,17 +2500,9 @@ regex_compile (re_char *pattern, size_t size,
2793 eassert (beg_interval); 2500 eassert (beg_interval);
2794 p = beg_interval; 2501 p = beg_interval;
2795 beg_interval = NULL; 2502 beg_interval = NULL;
2796 2503 eassert (p > pattern && p[-1] == '\\');
2797 /* normal_char and normal_backslash need `c'. */
2798 c = '{'; 2504 c = '{';
2799 2505 goto normal_char;
2800 if (!(syntax & RE_NO_BK_BRACES))
2801 {
2802 eassert (p > pattern && p[-1] == '\\');
2803 goto normal_backslash;
2804 }
2805 else
2806 goto normal_char;
2807 2506
2808 case '=': 2507 case '=':
2809 laststart = b; 2508 laststart = b;
@@ -2835,38 +2534,28 @@ regex_compile (re_char *pattern, size_t size,
2835 break; 2534 break;
2836 2535
2837 case 'w': 2536 case 'w':
2838 if (syntax & RE_NO_GNU_OPS)
2839 goto normal_char;
2840 laststart = b; 2537 laststart = b;
2841 BUF_PUSH_2 (syntaxspec, Sword); 2538 BUF_PUSH_2 (syntaxspec, Sword);
2842 break; 2539 break;
2843 2540
2844 2541
2845 case 'W': 2542 case 'W':
2846 if (syntax & RE_NO_GNU_OPS)
2847 goto normal_char;
2848 laststart = b; 2543 laststart = b;
2849 BUF_PUSH_2 (notsyntaxspec, Sword); 2544 BUF_PUSH_2 (notsyntaxspec, Sword);
2850 break; 2545 break;
2851 2546
2852 2547
2853 case '<': 2548 case '<':
2854 if (syntax & RE_NO_GNU_OPS)
2855 goto normal_char;
2856 laststart = b; 2549 laststart = b;
2857 BUF_PUSH (wordbeg); 2550 BUF_PUSH (wordbeg);
2858 break; 2551 break;
2859 2552
2860 case '>': 2553 case '>':
2861 if (syntax & RE_NO_GNU_OPS)
2862 goto normal_char;
2863 laststart = b; 2554 laststart = b;
2864 BUF_PUSH (wordend); 2555 BUF_PUSH (wordend);
2865 break; 2556 break;
2866 2557
2867 case '_': 2558 case '_':
2868 if (syntax & RE_NO_GNU_OPS)
2869 goto normal_char;
2870 laststart = b; 2559 laststart = b;
2871 PATFETCH (c); 2560 PATFETCH (c);
2872 if (c == '<') 2561 if (c == '<')
@@ -2878,38 +2567,25 @@ regex_compile (re_char *pattern, size_t size,
2878 break; 2567 break;
2879 2568
2880 case 'b': 2569 case 'b':
2881 if (syntax & RE_NO_GNU_OPS)
2882 goto normal_char;
2883 BUF_PUSH (wordbound); 2570 BUF_PUSH (wordbound);
2884 break; 2571 break;
2885 2572
2886 case 'B': 2573 case 'B':
2887 if (syntax & RE_NO_GNU_OPS)
2888 goto normal_char;
2889 BUF_PUSH (notwordbound); 2574 BUF_PUSH (notwordbound);
2890 break; 2575 break;
2891 2576
2892 case '`': 2577 case '`':
2893 if (syntax & RE_NO_GNU_OPS)
2894 goto normal_char;
2895 BUF_PUSH (begbuf); 2578 BUF_PUSH (begbuf);
2896 break; 2579 break;
2897 2580
2898 case '\'': 2581 case '\'':
2899 if (syntax & RE_NO_GNU_OPS)
2900 goto normal_char;
2901 BUF_PUSH (endbuf); 2582 BUF_PUSH (endbuf);
2902 break; 2583 break;
2903 2584
2904 case '1': case '2': case '3': case '4': case '5': 2585 case '1': case '2': case '3': case '4': case '5':
2905 case '6': case '7': case '8': case '9': 2586 case '6': case '7': case '8': case '9':
2906 { 2587 {
2907 regnum_t reg; 2588 regnum_t reg = c - '0';
2908
2909 if (syntax & RE_NO_BK_REFS)
2910 goto normal_backslash;
2911
2912 reg = c - '0';
2913 2589
2914 if (reg > bufp->re_nsub || reg < 1 2590 if (reg > bufp->re_nsub || reg < 1
2915 /* Can't back reference to a subexp before its end. */ 2591 /* Can't back reference to a subexp before its end. */
@@ -2921,16 +2597,7 @@ regex_compile (re_char *pattern, size_t size,
2921 } 2597 }
2922 break; 2598 break;
2923 2599
2924
2925 case '+':
2926 case '?':
2927 if (syntax & RE_BK_PLUS_QM)
2928 goto handle_plus;
2929 else
2930 goto normal_backslash;
2931
2932 default: 2600 default:
2933 normal_backslash:
2934 /* You might think it would be useful for \ to mean 2601 /* You might think it would be useful for \ to mean
2935 not to translate; but if we don't translate it 2602 not to translate; but if we don't translate it
2936 it will never match anything. */ 2603 it will never match anything. */
@@ -2952,14 +2619,9 @@ regex_compile (re_char *pattern, size_t size,
2952 || *pending_exact >= (1 << BYTEWIDTH) - MAX_MULTIBYTE_LENGTH 2619 || *pending_exact >= (1 << BYTEWIDTH) - MAX_MULTIBYTE_LENGTH
2953 2620
2954 /* If followed by a repetition operator. */ 2621 /* If followed by a repetition operator. */
2955 || (p != pend && (*p == '*' || *p == '^')) 2622 || (p != pend
2956 || ((syntax & RE_BK_PLUS_QM) 2623 && (*p == '*' || *p == '+' || *p == '?' || *p == '^'))
2957 ? p + 1 < pend && *p == '\\' && (p[1] == '+' || p[1] == '?') 2624 || (p + 1 < pend && p[0] == '\\' && p[1] == '{'))
2958 : p != pend && (*p == '+' || *p == '?'))
2959 || ((syntax & RE_INTERVALS)
2960 && ((syntax & RE_NO_BK_BRACES)
2961 ? p != pend && *p == '{'
2962 : p + 1 < pend && p[0] == '\\' && p[1] == '{')))
2963 { 2625 {
2964 /* Start building a new exactn. */ 2626 /* Start building a new exactn. */
2965 2627
@@ -3088,40 +2750,35 @@ insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, unsigned cha
3088 least one character before the ^. */ 2750 least one character before the ^. */
3089 2751
3090static bool 2752static bool
3091at_begline_loc_p (re_char *pattern, re_char *p, reg_syntax_t syntax) 2753at_begline_loc_p (re_char *pattern, re_char *p)
3092{ 2754{
3093 re_char *prev = p - 2; 2755 re_char *prev = p - 2;
3094 bool odd_backslashes;
3095
3096 /* After a subexpression? */
3097 if (*prev == '(')
3098 odd_backslashes = (syntax & RE_NO_BK_PARENS) == 0;
3099 2756
3100 /* After an alternative? */ 2757 switch (*prev)
3101 else if (*prev == '|')
3102 odd_backslashes = (syntax & RE_NO_BK_VBAR) == 0;
3103
3104 /* After a shy subexpression? */
3105 else if (*prev == ':' && (syntax & RE_SHY_GROUPS))
3106 { 2758 {
2759 case '(': /* After a subexpression. */
2760 case '|': /* After an alternative. */
2761 break;
2762
2763 case ':': /* After a shy subexpression. */
3107 /* Skip over optional regnum. */ 2764 /* Skip over optional regnum. */
3108 while (prev - 1 >= pattern && prev[-1] >= '0' && prev[-1] <= '9') 2765 while (prev > pattern && '0' <= prev[-1] && prev[-1] <= '9')
3109 --prev; 2766 --prev;
3110 2767
3111 if (!(prev - 2 >= pattern 2768 if (! (prev > pattern + 1 && prev[-1] == '?' && prev[-2] == '('))
3112 && prev[-1] == '?' && prev[-2] == '('))
3113 return false; 2769 return false;
3114 prev -= 2; 2770 prev -= 2;
3115 odd_backslashes = (syntax & RE_NO_BK_PARENS) == 0; 2771 break;
2772
2773 default:
2774 return false;
3116 } 2775 }
3117 else
3118 return false;
3119 2776
3120 /* Count the number of preceding backslashes. */ 2777 /* Count the number of preceding backslashes. */
3121 p = prev; 2778 p = prev;
3122 while (prev - 1 >= pattern && prev[-1] == '\\') 2779 while (prev > pattern && prev[-1] == '\\')
3123 --prev; 2780 --prev;
3124 return (p - prev) & odd_backslashes; 2781 return (p - prev) & 1;
3125} 2782}
3126 2783
3127 2784
@@ -3129,19 +2786,10 @@ at_begline_loc_p (re_char *pattern, re_char *p, reg_syntax_t syntax)
3129 at least one character after the $, i.e., `P < PEND'. */ 2786 at least one character after the $, i.e., `P < PEND'. */
3130 2787
3131static bool 2788static bool
3132at_endline_loc_p (re_char *p, re_char *pend, reg_syntax_t syntax) 2789at_endline_loc_p (re_char *p, re_char *pend)
3133{ 2790{
3134 re_char *next = p; 2791 /* Before a subexpression or an alternative? */
3135 bool next_backslash = *next == '\\'; 2792 return *p == '\\' && p + 1 < pend && (p[1] == ')' || p[1] == '|');
3136 re_char *next_next = p + 1 < pend ? p + 1 : 0;
3137
3138 return
3139 /* Before a subexpression? */
3140 (syntax & RE_NO_BK_PARENS ? *next == ')'
3141 : next_backslash && next_next && *next_next == ')')
3142 /* Before an alternative? */
3143 || (syntax & RE_NO_BK_VBAR ? *next == '|'
3144 : next_backslash && next_next && *next_next == '|');
3145} 2793}
3146 2794
3147 2795
@@ -3655,7 +3303,7 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1,
3655 3303
3656 /* Written out as an if-else to avoid testing `translate' 3304 /* Written out as an if-else to avoid testing `translate'
3657 inside the loop. */ 3305 inside the loop. */
3658 if (RE_TRANSLATE_P (translate)) 3306 if (!NILP (translate))
3659 { 3307 {
3660 if (multibyte) 3308 if (multibyte)
3661 while (range > lim) 3309 while (range > lim)
@@ -4643,12 +4291,11 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
4643 break; 4291 break;
4644 4292
4645 4293
4646 /* Match any character except possibly a newline or a null. */ 4294 /* Match any character except newline. */
4647 case anychar: 4295 case anychar:
4648 { 4296 {
4649 int buf_charlen; 4297 int buf_charlen;
4650 int buf_ch; 4298 int buf_ch;
4651 reg_syntax_t syntax;
4652 4299
4653 DEBUG_PRINT ("EXECUTING anychar.\n"); 4300 DEBUG_PRINT ("EXECUTING anychar.\n");
4654 4301
@@ -4656,11 +4303,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
4656 buf_ch = RE_STRING_CHAR_AND_LENGTH (d, buf_charlen, 4303 buf_ch = RE_STRING_CHAR_AND_LENGTH (d, buf_charlen,
4657 target_multibyte); 4304 target_multibyte);
4658 buf_ch = TRANSLATE (buf_ch); 4305 buf_ch = TRANSLATE (buf_ch);
4659 4306 if (buf_ch == '\n')
4660 syntax = RE_SYNTAX_EMACS;
4661
4662 if ((!(syntax & RE_DOT_NEWLINE) && buf_ch == '\n')
4663 || ((syntax & RE_DOT_NOT_NULL) && buf_ch == '\000'))
4664 goto fail; 4307 goto fail;
4665 4308
4666 DEBUG_PRINT (" Matched \"%d\".\n", *d); 4309 DEBUG_PRINT (" Matched \"%d\".\n", *d);
@@ -4826,7 +4469,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
4826 4469
4827 /* Compare that many; failure if mismatch, else move 4470 /* Compare that many; failure if mismatch, else move
4828 past them. */ 4471 past them. */
4829 if (RE_TRANSLATE_P (translate) 4472 if (!NILP (translate)
4830 ? bcmp_translate (d, d2, dcnt, translate, target_multibyte) 4473 ? bcmp_translate (d, d2, dcnt, translate, target_multibyte)
4831 : memcmp (d, d2, dcnt)) 4474 : memcmp (d, d2, dcnt))
4832 { 4475 {
diff --git a/src/search.c b/src/search.c
index f758bb9304a..4e5a2530114 100644
--- a/src/search.c
+++ b/src/search.c
@@ -132,7 +132,7 @@ compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern,
132 132
133 eassert (!cp->busy); 133 eassert (!cp->busy);
134 cp->regexp = Qnil; 134 cp->regexp = Qnil;
135 cp->buf.translate = (! NILP (translate) ? translate : make_number (0)); 135 cp->buf.translate = translate;
136 cp->posix = posix; 136 cp->posix = posix;
137 cp->buf.multibyte = STRING_MULTIBYTE (pattern); 137 cp->buf.multibyte = STRING_MULTIBYTE (pattern);
138 cp->buf.charset_unibyte = charset_unibyte; 138 cp->buf.charset_unibyte = charset_unibyte;
@@ -238,7 +238,7 @@ compile_pattern (Lisp_Object pattern, struct re_registers *regp,
238 && !cp->busy 238 && !cp->busy
239 && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern) 239 && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
240 && !NILP (Fstring_equal (cp->regexp, pattern)) 240 && !NILP (Fstring_equal (cp->regexp, pattern))
241 && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0))) 241 && EQ (cp->buf.translate, translate)
242 && cp->posix == posix 242 && cp->posix == posix
243 && (EQ (cp->syntax_table, Qt) 243 && (EQ (cp->syntax_table, Qt)
244 || EQ (cp->syntax_table, BVAR (current_buffer, syntax_table))) 244 || EQ (cp->syntax_table, BVAR (current_buffer, syntax_table)))