diff options
| author | Kenichi Handa | 2002-09-03 04:09:59 +0000 |
|---|---|---|
| committer | Kenichi Handa | 2002-09-03 04:09:59 +0000 |
| commit | 93daa011b35d0397b4503eff0d7d80e1b50a9bed (patch) | |
| tree | 5683a484834a16ead0fe3396235630210136b0e7 /src | |
| parent | bf2164799abc98b4aaafd9340ed77cdf2aa18370 (diff) | |
| download | emacs-93daa011b35d0397b4503eff0d7d80e1b50a9bed.tar.gz emacs-93daa011b35d0397b4503eff0d7d80e1b50a9bed.zip | |
(compile_pattern_1): Don't adjust the multibyteness of
the regexp pattern and the matching target. Set cp->buf.multibyte
to the multibyteness of the regexp pattern. Set
cp->but.target_multibyte to the multibyteness of the matching
target.
(wordify): Use FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE instead of
FETCH_STRING_CHAR_ADVANCE.
(Freplace_match): Convert unibyte chars to multibyte.
Diffstat (limited to 'src')
| -rw-r--r-- | src/search.c | 64 |
1 files changed, 11 insertions, 53 deletions
diff --git a/src/search.c b/src/search.c index 6db6cc674e6..c9fd6655c29 100644 --- a/src/search.c +++ b/src/search.c | |||
| @@ -117,51 +117,20 @@ compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte) | |||
| 117 | int posix; | 117 | int posix; |
| 118 | int multibyte; | 118 | int multibyte; |
| 119 | { | 119 | { |
| 120 | unsigned char *raw_pattern; | ||
| 121 | int raw_pattern_size; | ||
| 122 | char *val; | 120 | char *val; |
| 123 | reg_syntax_t old; | 121 | reg_syntax_t old; |
| 124 | 122 | ||
| 125 | /* MULTIBYTE says whether the text to be searched is multibyte. | ||
| 126 | We must convert PATTERN to match that, or we will not really | ||
| 127 | find things right. */ | ||
| 128 | |||
| 129 | if (multibyte == STRING_MULTIBYTE (pattern)) | ||
| 130 | { | ||
| 131 | raw_pattern = (unsigned char *) XSTRING (pattern)->data; | ||
| 132 | raw_pattern_size = STRING_BYTES (XSTRING (pattern)); | ||
| 133 | } | ||
| 134 | else if (multibyte) | ||
| 135 | { | ||
| 136 | raw_pattern_size = count_size_as_multibyte (XSTRING (pattern)->data, | ||
| 137 | XSTRING (pattern)->size); | ||
| 138 | raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1); | ||
| 139 | copy_text (XSTRING (pattern)->data, raw_pattern, | ||
| 140 | XSTRING (pattern)->size, 0, 1); | ||
| 141 | } | ||
| 142 | else | ||
| 143 | { | ||
| 144 | /* Converting multibyte to single-byte. | ||
| 145 | |||
| 146 | ??? Perhaps this conversion should be done in a special way | ||
| 147 | by subtracting nonascii-insert-offset from each non-ASCII char, | ||
| 148 | so that only the multibyte chars which really correspond to | ||
| 149 | the chosen single-byte character set can possibly match. */ | ||
| 150 | raw_pattern_size = XSTRING (pattern)->size; | ||
| 151 | raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1); | ||
| 152 | copy_text (XSTRING (pattern)->data, raw_pattern, | ||
| 153 | STRING_BYTES (XSTRING (pattern)), 1, 0); | ||
| 154 | } | ||
| 155 | |||
| 156 | cp->regexp = Qnil; | 123 | cp->regexp = Qnil; |
| 157 | cp->buf.translate = (! NILP (translate) ? translate : make_number (0)); | 124 | cp->buf.translate = (! NILP (translate) ? translate : make_number (0)); |
| 158 | cp->posix = posix; | 125 | cp->posix = posix; |
| 159 | cp->buf.multibyte = multibyte; | 126 | cp->buf.multibyte = STRING_MULTIBYTE (pattern); |
| 127 | cp->buf.target_multibyte = multibyte; | ||
| 160 | BLOCK_INPUT; | 128 | BLOCK_INPUT; |
| 161 | old = re_set_syntax (RE_SYNTAX_EMACS | 129 | old = re_set_syntax (RE_SYNTAX_EMACS |
| 162 | | (posix ? 0 : RE_NO_POSIX_BACKTRACKING)); | 130 | | (posix ? 0 : RE_NO_POSIX_BACKTRACKING)); |
| 163 | val = (char *) re_compile_pattern ((char *)raw_pattern, | 131 | val = (char *) re_compile_pattern ((char *) (XSTRING (pattern)->data), |
| 164 | raw_pattern_size, &cp->buf); | 132 | STRING_BYTES (XSTRING (pattern)), |
| 133 | &cp->buf); | ||
| 165 | re_set_syntax (old); | 134 | re_set_syntax (old); |
| 166 | UNBLOCK_INPUT; | 135 | UNBLOCK_INPUT; |
| 167 | if (val) | 136 | if (val) |
| @@ -1952,7 +1921,7 @@ wordify (string) | |||
| 1952 | { | 1921 | { |
| 1953 | int c; | 1922 | int c; |
| 1954 | 1923 | ||
| 1955 | FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte); | 1924 | FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte); |
| 1956 | 1925 | ||
| 1957 | if (SYNTAX (c) != Sword) | 1926 | if (SYNTAX (c) != Sword) |
| 1958 | { | 1927 | { |
| @@ -1987,7 +1956,7 @@ wordify (string) | |||
| 1987 | int c; | 1956 | int c; |
| 1988 | int i_byte_orig = i_byte; | 1957 | int i_byte_orig = i_byte; |
| 1989 | 1958 | ||
| 1990 | FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte); | 1959 | FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte); |
| 1991 | 1960 | ||
| 1992 | if (SYNTAX (c) == Sword) | 1961 | if (SYNTAX (c) == Sword) |
| 1993 | { | 1962 | { |
| @@ -2245,21 +2214,14 @@ since only regular expressions have distinguished subexpressions. */) | |||
| 2245 | { | 2214 | { |
| 2246 | /* Decide how to casify by examining the matched text. */ | 2215 | /* Decide how to casify by examining the matched text. */ |
| 2247 | int last; | 2216 | int last; |
| 2248 | int multibyte; | ||
| 2249 | 2217 | ||
| 2250 | pos = search_regs.start[sub]; | 2218 | pos = search_regs.start[sub]; |
| 2251 | last = search_regs.end[sub]; | 2219 | last = search_regs.end[sub]; |
| 2252 | 2220 | ||
| 2253 | if (NILP (string)) | 2221 | if (NILP (string)) |
| 2254 | { | 2222 | pos_byte = CHAR_TO_BYTE (pos); |
| 2255 | pos_byte = CHAR_TO_BYTE (pos); | ||
| 2256 | multibyte = ! NILP (current_buffer->enable_multibyte_characters); | ||
| 2257 | } | ||
| 2258 | else | 2223 | else |
| 2259 | { | 2224 | pos_byte = string_char_to_byte (string, pos); |
| 2260 | pos_byte = string_char_to_byte (string, pos); | ||
| 2261 | multibyte = STRING_MULTIBYTE (string); | ||
| 2262 | } | ||
| 2263 | 2225 | ||
| 2264 | prevc = '\n'; | 2226 | prevc = '\n'; |
| 2265 | case_action = all_caps; | 2227 | case_action = all_caps; |
| @@ -2275,15 +2237,11 @@ since only regular expressions have distinguished subexpressions. */) | |||
| 2275 | { | 2237 | { |
| 2276 | if (NILP (string)) | 2238 | if (NILP (string)) |
| 2277 | { | 2239 | { |
| 2278 | c = FETCH_CHAR (pos_byte); | 2240 | c = FETCH_CHAR_AS_MULTIBYTE (pos_byte); |
| 2279 | INC_BOTH (pos, pos_byte); | 2241 | INC_BOTH (pos, pos_byte); |
| 2280 | } | 2242 | } |
| 2281 | else | 2243 | else |
| 2282 | FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte); | 2244 | FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte); |
| 2283 | if (! multibyte) | ||
| 2284 | { | ||
| 2285 | MAKE_CHAR_MULTIBYTE (c); | ||
| 2286 | } | ||
| 2287 | 2245 | ||
| 2288 | if (LOWERCASEP (c)) | 2246 | if (LOWERCASEP (c)) |
| 2289 | { | 2247 | { |