diff options
| author | Paul Eggert | 2018-08-05 18:41:20 -0700 |
|---|---|---|
| committer | Paul Eggert | 2018-08-05 19:36:10 -0700 |
| commit | 03dfb6061bfd78d74564d678213ef95728a5f9eb (patch) | |
| tree | ab203ec6be71c599f7fb3bccdf5a8aa4832c3372 /src | |
| parent | 3a6abe65c1324361bf0efcb65df61d22a39cfaaf (diff) | |
| download | emacs-03dfb6061bfd78d74564d678213ef95728a5f9eb.tar.gz emacs-03dfb6061bfd78d74564d678213ef95728a5f9eb.zip | |
Simplify regex-emacs by assuming Emacs syntax
* src/regex-emacs.c (reg_syntax_t)
(RE_BACKSLASH_ESCAPE_IN_LISTS, RE_BK_PLUS_QM)
(RE_CHAR_CLASSES, RE_CONTEXT_INDEP_ANCHORS)
(RE_CONTEXT_INDEP_OPS, RE_CONTEXT_INVALID_OPS)
(RE_DOT_NEWLINE, RE_DOT_NOT_NULL, RE_HAT_LISTS_NOT_NEWLINE)
(RE_INTERVALS, RE_LIMITED_OPS, RE_NEWLINE_ALT)
(RE_NO_BK_BRACES, RE_NO_BK_PARENS, RE_NO_BK_REFS)
(RE_NO_BK_VBAR, RE_NO_EMPTY_RANGES)
(RE_UNMATCHED_RIGHT_PAREN_ORD, RE_NO_POSIX_BACKTRACKING)
(RE_NO_GNU_OPS, RE_FRUGAL, RE_SHY_GROUPS)
(RE_NO_NEWLINE_ANCHOR, RE_SYNTAX_EMACS, RE_TRANSLATE_P):
Remove. All uses removed and resulting code simplified.
(TRANSLATE): Treat nil as an absent translation table, not zero.
All uses changed.
Diffstat (limited to 'src')
| -rw-r--r-- | src/regex-emacs.c | 493 | ||||
| -rw-r--r-- | src/search.c | 4 |
2 files changed, 70 insertions, 427 deletions
diff --git a/src/regex-emacs.c b/src/regex-emacs.c index eb5970ffcf1..1ceb67ad297 100644 --- a/src/regex-emacs.c +++ b/src/regex-emacs.c | |||
| @@ -50,133 +50,6 @@ | |||
| 50 | ints. But Emacs only runs on 32 bit platforms anyway. */ | 50 | ints. But Emacs only runs on 32 bit platforms anyway. */ |
| 51 | #define RE_DUP_MAX (0xffff) | 51 | #define RE_DUP_MAX (0xffff) |
| 52 | 52 | ||
| 53 | /* The following bits are used to determine the regexp syntax we | ||
| 54 | recognize. The set/not-set meanings where historically chosen so | ||
| 55 | that Emacs syntax had the value 0. | ||
| 56 | The bits are given in alphabetical order, and | ||
| 57 | the definitions shifted by one from the previous bit; thus, when we | ||
| 58 | add or remove a bit, only one other definition need change. */ | ||
| 59 | typedef unsigned long reg_syntax_t; | ||
| 60 | |||
| 61 | /* If this bit is not set, then \ inside a bracket expression is literal. | ||
| 62 | If set, then such a \ quotes the following character. */ | ||
| 63 | #define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1) | ||
| 64 | |||
| 65 | /* If this bit is not set, then + and ? are operators, and \+ and \? are | ||
| 66 | literals. | ||
| 67 | If set, then \+ and \? are operators and + and ? are literals. */ | ||
| 68 | #define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1) | ||
| 69 | |||
| 70 | /* If this bit is set, then character classes are supported. They are: | ||
| 71 | [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:], | ||
| 72 | [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:]. | ||
| 73 | If not set, then character classes are not supported. */ | ||
| 74 | #define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1) | ||
| 75 | |||
| 76 | /* If this bit is set, then ^ and $ are always anchors (outside bracket | ||
| 77 | expressions, of course). | ||
| 78 | If this bit is not set, then it depends: | ||
| 79 | ^ is an anchor if it is at the beginning of a regular | ||
| 80 | expression or after an open-group or an alternation operator; | ||
| 81 | $ is an anchor if it is at the end of a regular expression, or | ||
| 82 | before a close-group or an alternation operator. | ||
| 83 | |||
| 84 | This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because | ||
| 85 | POSIX draft 11.2 says that * etc. in leading positions is undefined. | ||
| 86 | We already implemented a previous draft which made those constructs | ||
| 87 | invalid, though, so we haven't changed the code back. */ | ||
| 88 | #define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1) | ||
| 89 | |||
| 90 | /* If this bit is set, then special characters are always special | ||
| 91 | regardless of where they are in the pattern. | ||
| 92 | If this bit is not set, then special characters are special only in | ||
| 93 | some contexts; otherwise they are ordinary. Specifically, | ||
| 94 | * + ? and intervals are only special when not after the beginning, | ||
| 95 | open-group, or alternation operator. */ | ||
| 96 | #define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1) | ||
| 97 | |||
| 98 | /* If this bit is set, then *, +, ?, and { cannot be first in an re or | ||
| 99 | immediately after an alternation or begin-group operator. */ | ||
| 100 | #define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1) | ||
| 101 | |||
| 102 | /* If this bit is set, then . matches newline. | ||
| 103 | If not set, then it doesn't. */ | ||
| 104 | #define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1) | ||
| 105 | |||
| 106 | /* If this bit is set, then . doesn't match NUL. | ||
| 107 | If not set, then it does. */ | ||
| 108 | #define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1) | ||
| 109 | |||
| 110 | /* If this bit is set, nonmatching lists [^...] do not match newline. | ||
| 111 | If not set, they do. */ | ||
| 112 | #define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1) | ||
| 113 | |||
| 114 | /* If this bit is set, either \{...\} or {...} defines an | ||
| 115 | interval, depending on RE_NO_BK_BRACES. | ||
| 116 | If not set, \{, \}, {, and } are literals. */ | ||
| 117 | #define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1) | ||
| 118 | |||
| 119 | /* If this bit is set, +, ? and | aren't recognized as operators. | ||
| 120 | If not set, they are. */ | ||
| 121 | #define RE_LIMITED_OPS (RE_INTERVALS << 1) | ||
| 122 | |||
| 123 | /* If this bit is set, newline is an alternation operator. | ||
| 124 | If not set, newline is literal. */ | ||
| 125 | #define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1) | ||
| 126 | |||
| 127 | /* If this bit is set, then `{...}' defines an interval, and \{ and \} | ||
| 128 | are literals. | ||
| 129 | If not set, then `\{...\}' defines an interval. */ | ||
| 130 | #define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1) | ||
| 131 | |||
| 132 | /* If this bit is set, (...) defines a group, and \( and \) are literals. | ||
| 133 | If not set, \(...\) defines a group, and ( and ) are literals. */ | ||
| 134 | #define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1) | ||
| 135 | |||
| 136 | /* If this bit is set, then \<digit> matches <digit>. | ||
| 137 | If not set, then \<digit> is a back-reference. */ | ||
| 138 | #define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1) | ||
| 139 | |||
| 140 | /* If this bit is set, then | is an alternation operator, and \| is literal. | ||
| 141 | If not set, then \| is an alternation operator, and | is literal. */ | ||
| 142 | #define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1) | ||
| 143 | |||
| 144 | /* If this bit is set, then an ending range point collating higher | ||
| 145 | than the starting range point, as in [z-a], is invalid. | ||
| 146 | If not set, then when ending range point collates higher than the | ||
| 147 | starting range point, the range is ignored. */ | ||
| 148 | #define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1) | ||
| 149 | |||
| 150 | /* If this bit is set, then an unmatched ) is ordinary. | ||
| 151 | If not set, then an unmatched ) is invalid. */ | ||
| 152 | #define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1) | ||
| 153 | |||
| 154 | /* If this bit is set, succeed as soon as we match the whole pattern, | ||
| 155 | without further backtracking. */ | ||
| 156 | #define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1) | ||
| 157 | |||
| 158 | /* If this bit is set, do not process the GNU regex operators. | ||
| 159 | If not set, then the GNU regex operators are recognized. */ | ||
| 160 | #define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1) | ||
| 161 | |||
| 162 | /* If this bit is set, then *?, +? and ?? match non greedily. */ | ||
| 163 | #define RE_FRUGAL (RE_NO_GNU_OPS << 1) | ||
| 164 | |||
| 165 | /* If this bit is set, then (?:...) is treated as a shy group. */ | ||
| 166 | #define RE_SHY_GROUPS (RE_FRUGAL << 1) | ||
| 167 | |||
| 168 | /* If this bit is set, ^ and $ only match at beg/end of buffer. */ | ||
| 169 | #define RE_NO_NEWLINE_ANCHOR (RE_SHY_GROUPS << 1) | ||
| 170 | |||
| 171 | /* This global variable defines the particular regexp syntax to use (for | ||
| 172 | some interfaces). When a regexp is compiled, the syntax used is | ||
| 173 | stored in the pattern buffer, so changing this does not affect | ||
| 174 | already-compiled regexps. */ | ||
| 175 | /* extern reg_syntax_t re_syntax_options; */ | ||
| 176 | /* Define combinations of the above bits for the standard possibilities. */ | ||
| 177 | #define RE_SYNTAX_EMACS \ | ||
| 178 | (RE_CHAR_CLASSES | RE_INTERVALS | RE_SHY_GROUPS | RE_FRUGAL) | ||
| 179 | |||
| 180 | /* Make syntax table lookup grant data in gl_state. */ | 53 | /* Make syntax table lookup grant data in gl_state. */ |
| 181 | #define SYNTAX(c) syntax_property (c, 1) | 54 | #define SYNTAX(c) syntax_property (c, 1) |
| 182 | 55 | ||
| @@ -1299,10 +1172,8 @@ static void insert_op1 (re_opcode_t op, unsigned char *loc, | |||
| 1299 | int arg, unsigned char *end); | 1172 | int arg, unsigned char *end); |
| 1300 | static void insert_op2 (re_opcode_t op, unsigned char *loc, | 1173 | static void insert_op2 (re_opcode_t op, unsigned char *loc, |
| 1301 | int arg1, int arg2, unsigned char *end); | 1174 | int arg1, int arg2, unsigned char *end); |
| 1302 | static bool at_begline_loc_p (re_char *pattern, re_char *p, | 1175 | static bool at_begline_loc_p (re_char *pattern, re_char *p); |
| 1303 | reg_syntax_t syntax); | 1176 | static bool at_endline_loc_p (re_char *p, re_char *pend); |
| 1304 | static bool at_endline_loc_p (re_char *p, re_char *pend, | ||
| 1305 | reg_syntax_t syntax); | ||
| 1306 | static re_char *skip_one_char (re_char *p); | 1177 | static re_char *skip_one_char (re_char *p); |
| 1307 | static int analyze_first (re_char *p, re_char *pend, | 1178 | static int analyze_first (re_char *p, re_char *pend, |
| 1308 | char *fastmap, const int multibyte); | 1179 | char *fastmap, const int multibyte); |
| @@ -1319,15 +1190,7 @@ static int analyze_first (re_char *p, re_char *pend, | |||
| 1319 | 1190 | ||
| 1320 | 1191 | ||
| 1321 | #define RE_TRANSLATE(TBL, C) char_table_translate (TBL, C) | 1192 | #define RE_TRANSLATE(TBL, C) char_table_translate (TBL, C) |
| 1322 | #define RE_TRANSLATE_P(TBL) (!EQ (TBL, make_number (0))) | 1193 | #define TRANSLATE(d) (!NILP (translate) ? RE_TRANSLATE (translate, d) : (d)) |
| 1323 | |||
| 1324 | /* If `translate' is non-zero, return translate[D], else just D. We | ||
| 1325 | cast the subscript to translate because some data is declared as | ||
| 1326 | `char *', to avoid warnings when a string constant is passed. But | ||
| 1327 | when we use a character as a subscript we must make it unsigned. */ | ||
| 1328 | #define TRANSLATE(d) \ | ||
| 1329 | (RE_TRANSLATE_P (translate) ? RE_TRANSLATE (translate, (d)) : (d)) | ||
| 1330 | |||
| 1331 | 1194 | ||
| 1332 | /* Macros for outputting the compiled pattern into `buffer'. */ | 1195 | /* Macros for outputting the compiled pattern into `buffer'. */ |
| 1333 | 1196 | ||
| @@ -1847,8 +1710,6 @@ regex_compile (re_char *pattern, size_t size, | |||
| 1847 | const char *whitespace_regexp, | 1710 | const char *whitespace_regexp, |
| 1848 | struct re_pattern_buffer *bufp) | 1711 | struct re_pattern_buffer *bufp) |
| 1849 | { | 1712 | { |
| 1850 | reg_syntax_t syntax = RE_SYNTAX_EMACS; | ||
| 1851 | |||
| 1852 | /* We fetch characters from PATTERN here. */ | 1713 | /* We fetch characters from PATTERN here. */ |
| 1853 | int c, c1; | 1714 | int c, c1; |
| 1854 | 1715 | ||
| @@ -2011,51 +1872,24 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2011 | } | 1872 | } |
| 2012 | 1873 | ||
| 2013 | case '^': | 1874 | case '^': |
| 2014 | { | 1875 | if (! (p == pattern + 1 || at_begline_loc_p (pattern, p))) |
| 2015 | if ( /* If at start of pattern, it's an operator. */ | 1876 | goto normal_char; |
| 2016 | p == pattern + 1 | 1877 | BUF_PUSH (begline); |
| 2017 | /* If context independent, it's an operator. */ | ||
| 2018 | || syntax & RE_CONTEXT_INDEP_ANCHORS | ||
| 2019 | /* Otherwise, depends on what's come before. */ | ||
| 2020 | || at_begline_loc_p (pattern, p, syntax)) | ||
| 2021 | BUF_PUSH ((syntax & RE_NO_NEWLINE_ANCHOR) ? begbuf : begline); | ||
| 2022 | else | ||
| 2023 | goto normal_char; | ||
| 2024 | } | ||
| 2025 | break; | 1878 | break; |
| 2026 | 1879 | ||
| 2027 | |||
| 2028 | case '$': | 1880 | case '$': |
| 2029 | { | 1881 | if (! (p == pend || at_endline_loc_p (p, pend))) |
| 2030 | if ( /* If at end of pattern, it's an operator. */ | 1882 | goto normal_char; |
| 2031 | p == pend | 1883 | BUF_PUSH (endline); |
| 2032 | /* If context independent, it's an operator. */ | 1884 | break; |
| 2033 | || syntax & RE_CONTEXT_INDEP_ANCHORS | ||
| 2034 | /* Otherwise, depends on what's next. */ | ||
| 2035 | || at_endline_loc_p (p, pend, syntax)) | ||
| 2036 | BUF_PUSH ((syntax & RE_NO_NEWLINE_ANCHOR) ? endbuf : endline); | ||
| 2037 | else | ||
| 2038 | goto normal_char; | ||
| 2039 | } | ||
| 2040 | break; | ||
| 2041 | 1885 | ||
| 2042 | 1886 | ||
| 2043 | case '+': | 1887 | case '+': |
| 2044 | case '?': | 1888 | case '?': |
| 2045 | if ((syntax & RE_BK_PLUS_QM) | ||
| 2046 | || (syntax & RE_LIMITED_OPS)) | ||
| 2047 | goto normal_char; | ||
| 2048 | FALLTHROUGH; | ||
| 2049 | case '*': | 1889 | case '*': |
| 2050 | handle_plus: | ||
| 2051 | /* If there is no previous pattern... */ | 1890 | /* If there is no previous pattern... */ |
| 2052 | if (!laststart) | 1891 | if (!laststart) |
| 2053 | { | 1892 | goto normal_char; |
| 2054 | if (syntax & RE_CONTEXT_INVALID_OPS) | ||
| 2055 | FREE_STACK_RETURN (REG_BADRPT); | ||
| 2056 | else if (!(syntax & RE_CONTEXT_INDEP_OPS)) | ||
| 2057 | goto normal_char; | ||
| 2058 | } | ||
| 2059 | 1893 | ||
| 2060 | { | 1894 | { |
| 2061 | /* 1 means zero (many) matches is allowed. */ | 1895 | /* 1 means zero (many) matches is allowed. */ |
| @@ -2069,8 +1903,7 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2069 | 1903 | ||
| 2070 | for (;;) | 1904 | for (;;) |
| 2071 | { | 1905 | { |
| 2072 | if ((syntax & RE_FRUGAL) | 1906 | if (c == '?' && (zero_times_ok || many_times_ok)) |
| 2073 | && c == '?' && (zero_times_ok || many_times_ok)) | ||
| 2074 | greedy = false; | 1907 | greedy = false; |
| 2075 | else | 1908 | else |
| 2076 | { | 1909 | { |
| @@ -2078,25 +1911,10 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2078 | many_times_ok |= c != '?'; | 1911 | many_times_ok |= c != '?'; |
| 2079 | } | 1912 | } |
| 2080 | 1913 | ||
| 2081 | if (p == pend) | 1914 | if (! (p < pend && (*p == '*' || *p == '+' || *p == '?'))) |
| 2082 | break; | ||
| 2083 | else if (*p == '*' | ||
| 2084 | || (!(syntax & RE_BK_PLUS_QM) | ||
| 2085 | && (*p == '+' || *p == '?'))) | ||
| 2086 | ; | ||
| 2087 | else if (syntax & RE_BK_PLUS_QM && *p == '\\') | ||
| 2088 | { | ||
| 2089 | if (p+1 == pend) | ||
| 2090 | FREE_STACK_RETURN (REG_EESCAPE); | ||
| 2091 | if (p[1] == '+' || p[1] == '?') | ||
| 2092 | PATFETCH (c); /* Gobble up the backslash. */ | ||
| 2093 | else | ||
| 2094 | break; | ||
| 2095 | } | ||
| 2096 | else | ||
| 2097 | break; | 1915 | break; |
| 2098 | /* If we get here, we found another repeat character. */ | 1916 | /* If we get here, we found another repeat character. */ |
| 2099 | PATFETCH (c); | 1917 | c = *p++; |
| 2100 | } | 1918 | } |
| 2101 | 1919 | ||
| 2102 | /* Star, etc. applied to an empty pattern is equivalent | 1920 | /* Star, etc. applied to an empty pattern is equivalent |
| @@ -2228,24 +2046,18 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2228 | /* Clear the whole map. */ | 2046 | /* Clear the whole map. */ |
| 2229 | memset (b, 0, (1 << BYTEWIDTH) / BYTEWIDTH); | 2047 | memset (b, 0, (1 << BYTEWIDTH) / BYTEWIDTH); |
| 2230 | 2048 | ||
| 2231 | /* charset_not matches newline according to a syntax bit. */ | ||
| 2232 | if ((re_opcode_t) b[-2] == charset_not | ||
| 2233 | && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) | ||
| 2234 | SET_LIST_BIT ('\n'); | ||
| 2235 | |||
| 2236 | /* Read in characters and ranges, setting map bits. */ | 2049 | /* Read in characters and ranges, setting map bits. */ |
| 2237 | for (;;) | 2050 | for (;;) |
| 2238 | { | 2051 | { |
| 2239 | const unsigned char *p2 = p; | 2052 | const unsigned char *p2 = p; |
| 2240 | re_wctype_t cc; | ||
| 2241 | int ch; | 2053 | int ch; |
| 2242 | 2054 | ||
| 2243 | if (p == pend) FREE_STACK_RETURN (REG_EBRACK); | 2055 | if (p == pend) FREE_STACK_RETURN (REG_EBRACK); |
| 2244 | 2056 | ||
| 2245 | /* See if we're at the beginning of a possible character | 2057 | /* See if we're at the beginning of a possible character |
| 2246 | class. */ | 2058 | class. */ |
| 2247 | if (syntax & RE_CHAR_CLASSES && | 2059 | re_wctype_t cc = re_wctype_parse (&p, pend - p); |
| 2248 | (cc = re_wctype_parse(&p, pend - p)) != -1) | 2060 | if (cc != -1) |
| 2249 | { | 2061 | { |
| 2250 | if (cc == 0) | 2062 | if (cc == 0) |
| 2251 | FREE_STACK_RETURN (REG_ECTYPE); | 2063 | FREE_STACK_RETURN (REG_ECTYPE); |
| @@ -2297,21 +2109,11 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2297 | (let ((case-fold-search t)) (string-match "[A-_]" "A")) */ | 2109 | (let ((case-fold-search t)) (string-match "[A-_]" "A")) */ |
| 2298 | PATFETCH (c); | 2110 | PATFETCH (c); |
| 2299 | 2111 | ||
| 2300 | /* \ might escape characters inside [...] and [^...]. */ | 2112 | /* Could be the end of the bracket expression. If it's |
| 2301 | if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') | 2113 | not (i.e., when the bracket expression is `[]' so |
| 2302 | { | 2114 | far), the ']' character bit gets set way below. */ |
| 2303 | if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); | 2115 | if (c == ']' && p2 != p1) |
| 2304 | 2116 | break; | |
| 2305 | PATFETCH (c); | ||
| 2306 | } | ||
| 2307 | else | ||
| 2308 | { | ||
| 2309 | /* Could be the end of the bracket expression. If it's | ||
| 2310 | not (i.e., when the bracket expression is `[]' so | ||
| 2311 | far), the ']' character bit gets set way below. */ | ||
| 2312 | if (c == ']' && p2 != p1) | ||
| 2313 | break; | ||
| 2314 | } | ||
| 2315 | 2117 | ||
| 2316 | if (p < pend && p[0] == '-' && p[1] != ']') | 2118 | if (p < pend && p[0] == '-' && p[1] != ']') |
| 2317 | { | 2119 | { |
| @@ -2332,13 +2134,7 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2332 | /* Range from C to C. */ | 2134 | /* Range from C to C. */ |
| 2333 | c1 = c; | 2135 | c1 = c; |
| 2334 | 2136 | ||
| 2335 | if (c > c1) | 2137 | if (c <= c1) |
| 2336 | { | ||
| 2337 | if (syntax & RE_NO_EMPTY_RANGES) | ||
| 2338 | FREE_STACK_RETURN (REG_ERANGEX); | ||
| 2339 | /* Else, repeat the loop. */ | ||
| 2340 | } | ||
| 2341 | else | ||
| 2342 | { | 2138 | { |
| 2343 | if (c < 128) | 2139 | if (c < 128) |
| 2344 | { | 2140 | { |
| @@ -2348,24 +2144,17 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2348 | if (CHAR_BYTE8_P (c1)) | 2144 | if (CHAR_BYTE8_P (c1)) |
| 2349 | c = BYTE8_TO_CHAR (128); | 2145 | c = BYTE8_TO_CHAR (128); |
| 2350 | } | 2146 | } |
| 2351 | if (c <= c1) | 2147 | if (CHAR_BYTE8_P (c)) |
| 2352 | { | 2148 | { |
| 2353 | if (CHAR_BYTE8_P (c)) | 2149 | c = CHAR_TO_BYTE8 (c); |
| 2354 | { | 2150 | c1 = CHAR_TO_BYTE8 (c1); |
| 2355 | c = CHAR_TO_BYTE8 (c); | 2151 | for (; c <= c1; c++) |
| 2356 | c1 = CHAR_TO_BYTE8 (c1); | 2152 | SET_LIST_BIT (c); |
| 2357 | for (; c <= c1; c++) | ||
| 2358 | SET_LIST_BIT (c); | ||
| 2359 | } | ||
| 2360 | else if (multibyte) | ||
| 2361 | { | ||
| 2362 | SETUP_MULTIBYTE_RANGE (range_table_work, c, c1); | ||
| 2363 | } | ||
| 2364 | else | ||
| 2365 | { | ||
| 2366 | SETUP_UNIBYTE_RANGE (range_table_work, c, c1); | ||
| 2367 | } | ||
| 2368 | } | 2153 | } |
| 2154 | else if (multibyte) | ||
| 2155 | SETUP_MULTIBYTE_RANGE (range_table_work, c, c1); | ||
| 2156 | else | ||
| 2157 | SETUP_UNIBYTE_RANGE (range_table_work, c, c1); | ||
| 2369 | } | 2158 | } |
| 2370 | } | 2159 | } |
| 2371 | 2160 | ||
| @@ -2403,41 +2192,6 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2403 | break; | 2192 | break; |
| 2404 | 2193 | ||
| 2405 | 2194 | ||
| 2406 | case '(': | ||
| 2407 | if (syntax & RE_NO_BK_PARENS) | ||
| 2408 | goto handle_open; | ||
| 2409 | else | ||
| 2410 | goto normal_char; | ||
| 2411 | |||
| 2412 | |||
| 2413 | case ')': | ||
| 2414 | if (syntax & RE_NO_BK_PARENS) | ||
| 2415 | goto handle_close; | ||
| 2416 | else | ||
| 2417 | goto normal_char; | ||
| 2418 | |||
| 2419 | |||
| 2420 | case '\n': | ||
| 2421 | if (syntax & RE_NEWLINE_ALT) | ||
| 2422 | goto handle_alt; | ||
| 2423 | else | ||
| 2424 | goto normal_char; | ||
| 2425 | |||
| 2426 | |||
| 2427 | case '|': | ||
| 2428 | if (syntax & RE_NO_BK_VBAR) | ||
| 2429 | goto handle_alt; | ||
| 2430 | else | ||
| 2431 | goto normal_char; | ||
| 2432 | |||
| 2433 | |||
| 2434 | case '{': | ||
| 2435 | if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES) | ||
| 2436 | goto handle_interval; | ||
| 2437 | else | ||
| 2438 | goto normal_char; | ||
| 2439 | |||
| 2440 | |||
| 2441 | case '\\': | 2195 | case '\\': |
| 2442 | if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); | 2196 | if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); |
| 2443 | 2197 | ||
| @@ -2449,17 +2203,13 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2449 | switch (c) | 2203 | switch (c) |
| 2450 | { | 2204 | { |
| 2451 | case '(': | 2205 | case '(': |
| 2452 | if (syntax & RE_NO_BK_PARENS) | ||
| 2453 | goto normal_backslash; | ||
| 2454 | |||
| 2455 | handle_open: | ||
| 2456 | { | 2206 | { |
| 2457 | int shy = 0; | 2207 | int shy = 0; |
| 2458 | regnum_t regnum = 0; | 2208 | regnum_t regnum = 0; |
| 2459 | if (p+1 < pend) | 2209 | if (p+1 < pend) |
| 2460 | { | 2210 | { |
| 2461 | /* Look for a special (?...) construct */ | 2211 | /* Look for a special (?...) construct */ |
| 2462 | if ((syntax & RE_SHY_GROUPS) && *p == '?') | 2212 | if (*p == '?') |
| 2463 | { | 2213 | { |
| 2464 | PATFETCH (c); /* Gobble up the '?'. */ | 2214 | PATFETCH (c); /* Gobble up the '?'. */ |
| 2465 | while (!shy) | 2215 | while (!shy) |
| @@ -2540,27 +2290,14 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2540 | } | 2290 | } |
| 2541 | 2291 | ||
| 2542 | case ')': | 2292 | case ')': |
| 2543 | if (syntax & RE_NO_BK_PARENS) goto normal_backslash; | ||
| 2544 | |||
| 2545 | if (COMPILE_STACK_EMPTY) | 2293 | if (COMPILE_STACK_EMPTY) |
| 2546 | { | 2294 | FREE_STACK_RETURN (REG_ERPAREN); |
| 2547 | if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) | ||
| 2548 | goto normal_backslash; | ||
| 2549 | else | ||
| 2550 | FREE_STACK_RETURN (REG_ERPAREN); | ||
| 2551 | } | ||
| 2552 | 2295 | ||
| 2553 | handle_close: | ||
| 2554 | FIXUP_ALT_JUMP (); | 2296 | FIXUP_ALT_JUMP (); |
| 2555 | 2297 | ||
| 2556 | /* See similar code for backslashed left paren above. */ | 2298 | /* See similar code for backslashed left paren above. */ |
| 2557 | if (COMPILE_STACK_EMPTY) | 2299 | if (COMPILE_STACK_EMPTY) |
| 2558 | { | 2300 | FREE_STACK_RETURN (REG_ERPAREN); |
| 2559 | if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) | ||
| 2560 | goto normal_char; | ||
| 2561 | else | ||
| 2562 | FREE_STACK_RETURN (REG_ERPAREN); | ||
| 2563 | } | ||
| 2564 | 2301 | ||
| 2565 | /* Since we just checked for an empty stack above, this | 2302 | /* Since we just checked for an empty stack above, this |
| 2566 | ``can't happen''. */ | 2303 | ``can't happen''. */ |
| @@ -2593,12 +2330,6 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2593 | 2330 | ||
| 2594 | 2331 | ||
| 2595 | case '|': /* `\|'. */ | 2332 | case '|': /* `\|'. */ |
| 2596 | if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR) | ||
| 2597 | goto normal_backslash; | ||
| 2598 | handle_alt: | ||
| 2599 | if (syntax & RE_LIMITED_OPS) | ||
| 2600 | goto normal_char; | ||
| 2601 | |||
| 2602 | /* Insert before the previous alternative a jump which | 2333 | /* Insert before the previous alternative a jump which |
| 2603 | jumps to this alternative if the former fails. */ | 2334 | jumps to this alternative if the former fails. */ |
| 2604 | GET_BUFFER_SPACE (3); | 2335 | GET_BUFFER_SPACE (3); |
| @@ -2637,17 +2368,7 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2637 | 2368 | ||
| 2638 | 2369 | ||
| 2639 | case '{': | 2370 | case '{': |
| 2640 | /* If \{ is a literal. */ | ||
| 2641 | if (!(syntax & RE_INTERVALS) | ||
| 2642 | /* If we're at `\{' and it's not the open-interval | ||
| 2643 | operator. */ | ||
| 2644 | || (syntax & RE_NO_BK_BRACES)) | ||
| 2645 | goto normal_backslash; | ||
| 2646 | |||
| 2647 | handle_interval: | ||
| 2648 | { | 2371 | { |
| 2649 | /* If got here, then the syntax allows intervals. */ | ||
| 2650 | |||
| 2651 | /* At least (most) this many matches must be made. */ | 2372 | /* At least (most) this many matches must be made. */ |
| 2652 | int lower_bound = 0, upper_bound = -1; | 2373 | int lower_bound = 0, upper_bound = -1; |
| 2653 | 2374 | ||
| @@ -2662,33 +2383,19 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2662 | upper_bound = lower_bound; | 2383 | upper_bound = lower_bound; |
| 2663 | 2384 | ||
| 2664 | if (lower_bound < 0 | 2385 | if (lower_bound < 0 |
| 2665 | || (0 <= upper_bound && upper_bound < lower_bound)) | 2386 | || (0 <= upper_bound && upper_bound < lower_bound) |
| 2387 | || c != '\\') | ||
| 2666 | FREE_STACK_RETURN (REG_BADBR); | 2388 | FREE_STACK_RETURN (REG_BADBR); |
| 2667 | 2389 | if (p == pend) | |
| 2668 | if (!(syntax & RE_NO_BK_BRACES)) | 2390 | FREE_STACK_RETURN (REG_EESCAPE); |
| 2669 | { | 2391 | if (*p++ != '}') |
| 2670 | if (c != '\\') | ||
| 2671 | FREE_STACK_RETURN (REG_BADBR); | ||
| 2672 | if (p == pend) | ||
| 2673 | FREE_STACK_RETURN (REG_EESCAPE); | ||
| 2674 | PATFETCH (c); | ||
| 2675 | } | ||
| 2676 | |||
| 2677 | if (c != '}') | ||
| 2678 | FREE_STACK_RETURN (REG_BADBR); | 2392 | FREE_STACK_RETURN (REG_BADBR); |
| 2679 | 2393 | ||
| 2680 | /* We just parsed a valid interval. */ | 2394 | /* We just parsed a valid interval. */ |
| 2681 | 2395 | ||
| 2682 | /* If it's invalid to have no preceding re. */ | 2396 | /* If it's invalid to have no preceding re. */ |
| 2683 | if (!laststart) | 2397 | if (!laststart) |
| 2684 | { | 2398 | goto unfetch_interval; |
| 2685 | if (syntax & RE_CONTEXT_INVALID_OPS) | ||
| 2686 | FREE_STACK_RETURN (REG_BADRPT); | ||
| 2687 | else if (syntax & RE_CONTEXT_INDEP_OPS) | ||
| 2688 | laststart = b; | ||
| 2689 | else | ||
| 2690 | goto unfetch_interval; | ||
| 2691 | } | ||
| 2692 | 2399 | ||
| 2693 | if (upper_bound == 0) | 2400 | if (upper_bound == 0) |
| 2694 | /* If the upper bound is zero, just drop the sub pattern | 2401 | /* If the upper bound is zero, just drop the sub pattern |
| @@ -2793,17 +2500,9 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2793 | eassert (beg_interval); | 2500 | eassert (beg_interval); |
| 2794 | p = beg_interval; | 2501 | p = beg_interval; |
| 2795 | beg_interval = NULL; | 2502 | beg_interval = NULL; |
| 2796 | 2503 | eassert (p > pattern && p[-1] == '\\'); | |
| 2797 | /* normal_char and normal_backslash need `c'. */ | ||
| 2798 | c = '{'; | 2504 | c = '{'; |
| 2799 | 2505 | goto normal_char; | |
| 2800 | if (!(syntax & RE_NO_BK_BRACES)) | ||
| 2801 | { | ||
| 2802 | eassert (p > pattern && p[-1] == '\\'); | ||
| 2803 | goto normal_backslash; | ||
| 2804 | } | ||
| 2805 | else | ||
| 2806 | goto normal_char; | ||
| 2807 | 2506 | ||
| 2808 | case '=': | 2507 | case '=': |
| 2809 | laststart = b; | 2508 | laststart = b; |
| @@ -2835,38 +2534,28 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2835 | break; | 2534 | break; |
| 2836 | 2535 | ||
| 2837 | case 'w': | 2536 | case 'w': |
| 2838 | if (syntax & RE_NO_GNU_OPS) | ||
| 2839 | goto normal_char; | ||
| 2840 | laststart = b; | 2537 | laststart = b; |
| 2841 | BUF_PUSH_2 (syntaxspec, Sword); | 2538 | BUF_PUSH_2 (syntaxspec, Sword); |
| 2842 | break; | 2539 | break; |
| 2843 | 2540 | ||
| 2844 | 2541 | ||
| 2845 | case 'W': | 2542 | case 'W': |
| 2846 | if (syntax & RE_NO_GNU_OPS) | ||
| 2847 | goto normal_char; | ||
| 2848 | laststart = b; | 2543 | laststart = b; |
| 2849 | BUF_PUSH_2 (notsyntaxspec, Sword); | 2544 | BUF_PUSH_2 (notsyntaxspec, Sword); |
| 2850 | break; | 2545 | break; |
| 2851 | 2546 | ||
| 2852 | 2547 | ||
| 2853 | case '<': | 2548 | case '<': |
| 2854 | if (syntax & RE_NO_GNU_OPS) | ||
| 2855 | goto normal_char; | ||
| 2856 | laststart = b; | 2549 | laststart = b; |
| 2857 | BUF_PUSH (wordbeg); | 2550 | BUF_PUSH (wordbeg); |
| 2858 | break; | 2551 | break; |
| 2859 | 2552 | ||
| 2860 | case '>': | 2553 | case '>': |
| 2861 | if (syntax & RE_NO_GNU_OPS) | ||
| 2862 | goto normal_char; | ||
| 2863 | laststart = b; | 2554 | laststart = b; |
| 2864 | BUF_PUSH (wordend); | 2555 | BUF_PUSH (wordend); |
| 2865 | break; | 2556 | break; |
| 2866 | 2557 | ||
| 2867 | case '_': | 2558 | case '_': |
| 2868 | if (syntax & RE_NO_GNU_OPS) | ||
| 2869 | goto normal_char; | ||
| 2870 | laststart = b; | 2559 | laststart = b; |
| 2871 | PATFETCH (c); | 2560 | PATFETCH (c); |
| 2872 | if (c == '<') | 2561 | if (c == '<') |
| @@ -2878,38 +2567,25 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2878 | break; | 2567 | break; |
| 2879 | 2568 | ||
| 2880 | case 'b': | 2569 | case 'b': |
| 2881 | if (syntax & RE_NO_GNU_OPS) | ||
| 2882 | goto normal_char; | ||
| 2883 | BUF_PUSH (wordbound); | 2570 | BUF_PUSH (wordbound); |
| 2884 | break; | 2571 | break; |
| 2885 | 2572 | ||
| 2886 | case 'B': | 2573 | case 'B': |
| 2887 | if (syntax & RE_NO_GNU_OPS) | ||
| 2888 | goto normal_char; | ||
| 2889 | BUF_PUSH (notwordbound); | 2574 | BUF_PUSH (notwordbound); |
| 2890 | break; | 2575 | break; |
| 2891 | 2576 | ||
| 2892 | case '`': | 2577 | case '`': |
| 2893 | if (syntax & RE_NO_GNU_OPS) | ||
| 2894 | goto normal_char; | ||
| 2895 | BUF_PUSH (begbuf); | 2578 | BUF_PUSH (begbuf); |
| 2896 | break; | 2579 | break; |
| 2897 | 2580 | ||
| 2898 | case '\'': | 2581 | case '\'': |
| 2899 | if (syntax & RE_NO_GNU_OPS) | ||
| 2900 | goto normal_char; | ||
| 2901 | BUF_PUSH (endbuf); | 2582 | BUF_PUSH (endbuf); |
| 2902 | break; | 2583 | break; |
| 2903 | 2584 | ||
| 2904 | case '1': case '2': case '3': case '4': case '5': | 2585 | case '1': case '2': case '3': case '4': case '5': |
| 2905 | case '6': case '7': case '8': case '9': | 2586 | case '6': case '7': case '8': case '9': |
| 2906 | { | 2587 | { |
| 2907 | regnum_t reg; | 2588 | regnum_t reg = c - '0'; |
| 2908 | |||
| 2909 | if (syntax & RE_NO_BK_REFS) | ||
| 2910 | goto normal_backslash; | ||
| 2911 | |||
| 2912 | reg = c - '0'; | ||
| 2913 | 2589 | ||
| 2914 | if (reg > bufp->re_nsub || reg < 1 | 2590 | if (reg > bufp->re_nsub || reg < 1 |
| 2915 | /* Can't back reference to a subexp before its end. */ | 2591 | /* Can't back reference to a subexp before its end. */ |
| @@ -2921,16 +2597,7 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2921 | } | 2597 | } |
| 2922 | break; | 2598 | break; |
| 2923 | 2599 | ||
| 2924 | |||
| 2925 | case '+': | ||
| 2926 | case '?': | ||
| 2927 | if (syntax & RE_BK_PLUS_QM) | ||
| 2928 | goto handle_plus; | ||
| 2929 | else | ||
| 2930 | goto normal_backslash; | ||
| 2931 | |||
| 2932 | default: | 2600 | default: |
| 2933 | normal_backslash: | ||
| 2934 | /* You might think it would be useful for \ to mean | 2601 | /* You might think it would be useful for \ to mean |
| 2935 | not to translate; but if we don't translate it | 2602 | not to translate; but if we don't translate it |
| 2936 | it will never match anything. */ | 2603 | it will never match anything. */ |
| @@ -2952,14 +2619,9 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2952 | || *pending_exact >= (1 << BYTEWIDTH) - MAX_MULTIBYTE_LENGTH | 2619 | || *pending_exact >= (1 << BYTEWIDTH) - MAX_MULTIBYTE_LENGTH |
| 2953 | 2620 | ||
| 2954 | /* If followed by a repetition operator. */ | 2621 | /* If followed by a repetition operator. */ |
| 2955 | || (p != pend && (*p == '*' || *p == '^')) | 2622 | || (p != pend |
| 2956 | || ((syntax & RE_BK_PLUS_QM) | 2623 | && (*p == '*' || *p == '+' || *p == '?' || *p == '^')) |
| 2957 | ? p + 1 < pend && *p == '\\' && (p[1] == '+' || p[1] == '?') | 2624 | || (p + 1 < pend && p[0] == '\\' && p[1] == '{')) |
| 2958 | : p != pend && (*p == '+' || *p == '?')) | ||
| 2959 | || ((syntax & RE_INTERVALS) | ||
| 2960 | && ((syntax & RE_NO_BK_BRACES) | ||
| 2961 | ? p != pend && *p == '{' | ||
| 2962 | : p + 1 < pend && p[0] == '\\' && p[1] == '{'))) | ||
| 2963 | { | 2625 | { |
| 2964 | /* Start building a new exactn. */ | 2626 | /* Start building a new exactn. */ |
| 2965 | 2627 | ||
| @@ -3088,40 +2750,35 @@ insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, unsigned cha | |||
| 3088 | least one character before the ^. */ | 2750 | least one character before the ^. */ |
| 3089 | 2751 | ||
| 3090 | static bool | 2752 | static bool |
| 3091 | at_begline_loc_p (re_char *pattern, re_char *p, reg_syntax_t syntax) | 2753 | at_begline_loc_p (re_char *pattern, re_char *p) |
| 3092 | { | 2754 | { |
| 3093 | re_char *prev = p - 2; | 2755 | re_char *prev = p - 2; |
| 3094 | bool odd_backslashes; | ||
| 3095 | |||
| 3096 | /* After a subexpression? */ | ||
| 3097 | if (*prev == '(') | ||
| 3098 | odd_backslashes = (syntax & RE_NO_BK_PARENS) == 0; | ||
| 3099 | 2756 | ||
| 3100 | /* After an alternative? */ | 2757 | switch (*prev) |
| 3101 | else if (*prev == '|') | ||
| 3102 | odd_backslashes = (syntax & RE_NO_BK_VBAR) == 0; | ||
| 3103 | |||
| 3104 | /* After a shy subexpression? */ | ||
| 3105 | else if (*prev == ':' && (syntax & RE_SHY_GROUPS)) | ||
| 3106 | { | 2758 | { |
| 2759 | case '(': /* After a subexpression. */ | ||
| 2760 | case '|': /* After an alternative. */ | ||
| 2761 | break; | ||
| 2762 | |||
| 2763 | case ':': /* After a shy subexpression. */ | ||
| 3107 | /* Skip over optional regnum. */ | 2764 | /* Skip over optional regnum. */ |
| 3108 | while (prev - 1 >= pattern && prev[-1] >= '0' && prev[-1] <= '9') | 2765 | while (prev > pattern && '0' <= prev[-1] && prev[-1] <= '9') |
| 3109 | --prev; | 2766 | --prev; |
| 3110 | 2767 | ||
| 3111 | if (!(prev - 2 >= pattern | 2768 | if (! (prev > pattern + 1 && prev[-1] == '?' && prev[-2] == '(')) |
| 3112 | && prev[-1] == '?' && prev[-2] == '(')) | ||
| 3113 | return false; | 2769 | return false; |
| 3114 | prev -= 2; | 2770 | prev -= 2; |
| 3115 | odd_backslashes = (syntax & RE_NO_BK_PARENS) == 0; | 2771 | break; |
| 2772 | |||
| 2773 | default: | ||
| 2774 | return false; | ||
| 3116 | } | 2775 | } |
| 3117 | else | ||
| 3118 | return false; | ||
| 3119 | 2776 | ||
| 3120 | /* Count the number of preceding backslashes. */ | 2777 | /* Count the number of preceding backslashes. */ |
| 3121 | p = prev; | 2778 | p = prev; |
| 3122 | while (prev - 1 >= pattern && prev[-1] == '\\') | 2779 | while (prev > pattern && prev[-1] == '\\') |
| 3123 | --prev; | 2780 | --prev; |
| 3124 | return (p - prev) & odd_backslashes; | 2781 | return (p - prev) & 1; |
| 3125 | } | 2782 | } |
| 3126 | 2783 | ||
| 3127 | 2784 | ||
| @@ -3129,19 +2786,10 @@ at_begline_loc_p (re_char *pattern, re_char *p, reg_syntax_t syntax) | |||
| 3129 | at least one character after the $, i.e., `P < PEND'. */ | 2786 | at least one character after the $, i.e., `P < PEND'. */ |
| 3130 | 2787 | ||
| 3131 | static bool | 2788 | static bool |
| 3132 | at_endline_loc_p (re_char *p, re_char *pend, reg_syntax_t syntax) | 2789 | at_endline_loc_p (re_char *p, re_char *pend) |
| 3133 | { | 2790 | { |
| 3134 | re_char *next = p; | 2791 | /* Before a subexpression or an alternative? */ |
| 3135 | bool next_backslash = *next == '\\'; | 2792 | return *p == '\\' && p + 1 < pend && (p[1] == ')' || p[1] == '|'); |
| 3136 | re_char *next_next = p + 1 < pend ? p + 1 : 0; | ||
| 3137 | |||
| 3138 | return | ||
| 3139 | /* Before a subexpression? */ | ||
| 3140 | (syntax & RE_NO_BK_PARENS ? *next == ')' | ||
| 3141 | : next_backslash && next_next && *next_next == ')') | ||
| 3142 | /* Before an alternative? */ | ||
| 3143 | || (syntax & RE_NO_BK_VBAR ? *next == '|' | ||
| 3144 | : next_backslash && next_next && *next_next == '|'); | ||
| 3145 | } | 2793 | } |
| 3146 | 2794 | ||
| 3147 | 2795 | ||
| @@ -3655,7 +3303,7 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1, | |||
| 3655 | 3303 | ||
| 3656 | /* Written out as an if-else to avoid testing `translate' | 3304 | /* Written out as an if-else to avoid testing `translate' |
| 3657 | inside the loop. */ | 3305 | inside the loop. */ |
| 3658 | if (RE_TRANSLATE_P (translate)) | 3306 | if (!NILP (translate)) |
| 3659 | { | 3307 | { |
| 3660 | if (multibyte) | 3308 | if (multibyte) |
| 3661 | while (range > lim) | 3309 | while (range > lim) |
| @@ -4643,12 +4291,11 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 4643 | break; | 4291 | break; |
| 4644 | 4292 | ||
| 4645 | 4293 | ||
| 4646 | /* Match any character except possibly a newline or a null. */ | 4294 | /* Match any character except newline. */ |
| 4647 | case anychar: | 4295 | case anychar: |
| 4648 | { | 4296 | { |
| 4649 | int buf_charlen; | 4297 | int buf_charlen; |
| 4650 | int buf_ch; | 4298 | int buf_ch; |
| 4651 | reg_syntax_t syntax; | ||
| 4652 | 4299 | ||
| 4653 | DEBUG_PRINT ("EXECUTING anychar.\n"); | 4300 | DEBUG_PRINT ("EXECUTING anychar.\n"); |
| 4654 | 4301 | ||
| @@ -4656,11 +4303,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 4656 | buf_ch = RE_STRING_CHAR_AND_LENGTH (d, buf_charlen, | 4303 | buf_ch = RE_STRING_CHAR_AND_LENGTH (d, buf_charlen, |
| 4657 | target_multibyte); | 4304 | target_multibyte); |
| 4658 | buf_ch = TRANSLATE (buf_ch); | 4305 | buf_ch = TRANSLATE (buf_ch); |
| 4659 | 4306 | if (buf_ch == '\n') | |
| 4660 | syntax = RE_SYNTAX_EMACS; | ||
| 4661 | |||
| 4662 | if ((!(syntax & RE_DOT_NEWLINE) && buf_ch == '\n') | ||
| 4663 | || ((syntax & RE_DOT_NOT_NULL) && buf_ch == '\000')) | ||
| 4664 | goto fail; | 4307 | goto fail; |
| 4665 | 4308 | ||
| 4666 | DEBUG_PRINT (" Matched \"%d\".\n", *d); | 4309 | DEBUG_PRINT (" Matched \"%d\".\n", *d); |
| @@ -4826,7 +4469,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 4826 | 4469 | ||
| 4827 | /* Compare that many; failure if mismatch, else move | 4470 | /* Compare that many; failure if mismatch, else move |
| 4828 | past them. */ | 4471 | past them. */ |
| 4829 | if (RE_TRANSLATE_P (translate) | 4472 | if (!NILP (translate) |
| 4830 | ? bcmp_translate (d, d2, dcnt, translate, target_multibyte) | 4473 | ? bcmp_translate (d, d2, dcnt, translate, target_multibyte) |
| 4831 | : memcmp (d, d2, dcnt)) | 4474 | : memcmp (d, d2, dcnt)) |
| 4832 | { | 4475 | { |
diff --git a/src/search.c b/src/search.c index f758bb9304a..4e5a2530114 100644 --- a/src/search.c +++ b/src/search.c | |||
| @@ -132,7 +132,7 @@ compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern, | |||
| 132 | 132 | ||
| 133 | eassert (!cp->busy); | 133 | eassert (!cp->busy); |
| 134 | cp->regexp = Qnil; | 134 | cp->regexp = Qnil; |
| 135 | cp->buf.translate = (! NILP (translate) ? translate : make_number (0)); | 135 | cp->buf.translate = translate; |
| 136 | cp->posix = posix; | 136 | cp->posix = posix; |
| 137 | cp->buf.multibyte = STRING_MULTIBYTE (pattern); | 137 | cp->buf.multibyte = STRING_MULTIBYTE (pattern); |
| 138 | cp->buf.charset_unibyte = charset_unibyte; | 138 | cp->buf.charset_unibyte = charset_unibyte; |
| @@ -238,7 +238,7 @@ compile_pattern (Lisp_Object pattern, struct re_registers *regp, | |||
| 238 | && !cp->busy | 238 | && !cp->busy |
| 239 | && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern) | 239 | && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern) |
| 240 | && !NILP (Fstring_equal (cp->regexp, pattern)) | 240 | && !NILP (Fstring_equal (cp->regexp, pattern)) |
| 241 | && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0))) | 241 | && EQ (cp->buf.translate, translate) |
| 242 | && cp->posix == posix | 242 | && cp->posix == posix |
| 243 | && (EQ (cp->syntax_table, Qt) | 243 | && (EQ (cp->syntax_table, Qt) |
| 244 | || EQ (cp->syntax_table, BVAR (current_buffer, syntax_table))) | 244 | || EQ (cp->syntax_table, BVAR (current_buffer, syntax_table))) |