aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorKenichi Handa2002-09-03 04:09:59 +0000
committerKenichi Handa2002-09-03 04:09:59 +0000
commit93daa011b35d0397b4503eff0d7d80e1b50a9bed (patch)
tree5683a484834a16ead0fe3396235630210136b0e7 /src
parentbf2164799abc98b4aaafd9340ed77cdf2aa18370 (diff)
downloademacs-93daa011b35d0397b4503eff0d7d80e1b50a9bed.tar.gz
emacs-93daa011b35d0397b4503eff0d7d80e1b50a9bed.zip
(compile_pattern_1): Don't adjust the multibyteness of
the regexp pattern and the matching target. Set cp->buf.multibyte to the multibyteness of the regexp pattern. Set cp->but.target_multibyte to the multibyteness of the matching target. (wordify): Use FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE instead of FETCH_STRING_CHAR_ADVANCE. (Freplace_match): Convert unibyte chars to multibyte.
Diffstat (limited to 'src')
-rw-r--r--src/search.c64
1 files changed, 11 insertions, 53 deletions
diff --git a/src/search.c b/src/search.c
index 6db6cc674e6..c9fd6655c29 100644
--- a/src/search.c
+++ b/src/search.c
@@ -117,51 +117,20 @@ compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
117 int posix; 117 int posix;
118 int multibyte; 118 int multibyte;
119{ 119{
120 unsigned char *raw_pattern;
121 int raw_pattern_size;
122 char *val; 120 char *val;
123 reg_syntax_t old; 121 reg_syntax_t old;
124 122
125 /* MULTIBYTE says whether the text to be searched is multibyte.
126 We must convert PATTERN to match that, or we will not really
127 find things right. */
128
129 if (multibyte == STRING_MULTIBYTE (pattern))
130 {
131 raw_pattern = (unsigned char *) XSTRING (pattern)->data;
132 raw_pattern_size = STRING_BYTES (XSTRING (pattern));
133 }
134 else if (multibyte)
135 {
136 raw_pattern_size = count_size_as_multibyte (XSTRING (pattern)->data,
137 XSTRING (pattern)->size);
138 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
139 copy_text (XSTRING (pattern)->data, raw_pattern,
140 XSTRING (pattern)->size, 0, 1);
141 }
142 else
143 {
144 /* Converting multibyte to single-byte.
145
146 ??? Perhaps this conversion should be done in a special way
147 by subtracting nonascii-insert-offset from each non-ASCII char,
148 so that only the multibyte chars which really correspond to
149 the chosen single-byte character set can possibly match. */
150 raw_pattern_size = XSTRING (pattern)->size;
151 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
152 copy_text (XSTRING (pattern)->data, raw_pattern,
153 STRING_BYTES (XSTRING (pattern)), 1, 0);
154 }
155
156 cp->regexp = Qnil; 123 cp->regexp = Qnil;
157 cp->buf.translate = (! NILP (translate) ? translate : make_number (0)); 124 cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
158 cp->posix = posix; 125 cp->posix = posix;
159 cp->buf.multibyte = multibyte; 126 cp->buf.multibyte = STRING_MULTIBYTE (pattern);
127 cp->buf.target_multibyte = multibyte;
160 BLOCK_INPUT; 128 BLOCK_INPUT;
161 old = re_set_syntax (RE_SYNTAX_EMACS 129 old = re_set_syntax (RE_SYNTAX_EMACS
162 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING)); 130 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
163 val = (char *) re_compile_pattern ((char *)raw_pattern, 131 val = (char *) re_compile_pattern ((char *) (XSTRING (pattern)->data),
164 raw_pattern_size, &cp->buf); 132 STRING_BYTES (XSTRING (pattern)),
133 &cp->buf);
165 re_set_syntax (old); 134 re_set_syntax (old);
166 UNBLOCK_INPUT; 135 UNBLOCK_INPUT;
167 if (val) 136 if (val)
@@ -1952,7 +1921,7 @@ wordify (string)
1952 { 1921 {
1953 int c; 1922 int c;
1954 1923
1955 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte); 1924 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
1956 1925
1957 if (SYNTAX (c) != Sword) 1926 if (SYNTAX (c) != Sword)
1958 { 1927 {
@@ -1987,7 +1956,7 @@ wordify (string)
1987 int c; 1956 int c;
1988 int i_byte_orig = i_byte; 1957 int i_byte_orig = i_byte;
1989 1958
1990 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte); 1959 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
1991 1960
1992 if (SYNTAX (c) == Sword) 1961 if (SYNTAX (c) == Sword)
1993 { 1962 {
@@ -2245,21 +2214,14 @@ since only regular expressions have distinguished subexpressions. */)
2245 { 2214 {
2246 /* Decide how to casify by examining the matched text. */ 2215 /* Decide how to casify by examining the matched text. */
2247 int last; 2216 int last;
2248 int multibyte;
2249 2217
2250 pos = search_regs.start[sub]; 2218 pos = search_regs.start[sub];
2251 last = search_regs.end[sub]; 2219 last = search_regs.end[sub];
2252 2220
2253 if (NILP (string)) 2221 if (NILP (string))
2254 { 2222 pos_byte = CHAR_TO_BYTE (pos);
2255 pos_byte = CHAR_TO_BYTE (pos);
2256 multibyte = ! NILP (current_buffer->enable_multibyte_characters);
2257 }
2258 else 2223 else
2259 { 2224 pos_byte = string_char_to_byte (string, pos);
2260 pos_byte = string_char_to_byte (string, pos);
2261 multibyte = STRING_MULTIBYTE (string);
2262 }
2263 2225
2264 prevc = '\n'; 2226 prevc = '\n';
2265 case_action = all_caps; 2227 case_action = all_caps;
@@ -2275,15 +2237,11 @@ since only regular expressions have distinguished subexpressions. */)
2275 { 2237 {
2276 if (NILP (string)) 2238 if (NILP (string))
2277 { 2239 {
2278 c = FETCH_CHAR (pos_byte); 2240 c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
2279 INC_BOTH (pos, pos_byte); 2241 INC_BOTH (pos, pos_byte);
2280 } 2242 }
2281 else 2243 else
2282 FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte); 2244 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
2283 if (! multibyte)
2284 {
2285 MAKE_CHAR_MULTIBYTE (c);
2286 }
2287 2245
2288 if (LOWERCASEP (c)) 2246 if (LOWERCASEP (c))
2289 { 2247 {