diff options
| author | Richard M. Stallman | 2002-02-03 10:35:20 +0000 |
|---|---|---|
| committer | Richard M. Stallman | 2002-02-03 10:35:20 +0000 |
| commit | 5150eeecd86d45af4e4107a33b4a0dd8752e2cba (patch) | |
| tree | 45a9654ff561fb4526f20dd9e69cc6f76c876951 | |
| parent | 1ce2659dbe1e56ce2b738e2946c4cea38d665d7f (diff) | |
| download | emacs-5150eeecd86d45af4e4107a33b4a0dd8752e2cba.tar.gz emacs-5150eeecd86d45af4e4107a33b4a0dd8752e2cba.zip | |
(read1): Redesign strategy for force_multibyte and
force_singlebyte. Now is_multibyte records whether read_buffer
is multibyte. Encountering any multibyte character makes it so.
| -rw-r--r-- | src/lread.c | 89 |
1 files changed, 49 insertions, 40 deletions
diff --git a/src/lread.c b/src/lread.c index ae8339ac3d0..10baf509918 100644 --- a/src/lread.c +++ b/src/lread.c | |||
| @@ -2145,14 +2145,16 @@ read1 (readcharfun, pch, first_in_list) | |||
| 2145 | char *p = read_buffer; | 2145 | char *p = read_buffer; |
| 2146 | char *end = read_buffer + read_buffer_size; | 2146 | char *end = read_buffer + read_buffer_size; |
| 2147 | register int c; | 2147 | register int c; |
| 2148 | /* Nonzero if we saw an escape sequence specifying | 2148 | /* 1 if we saw an escape sequence specifying |
| 2149 | a multibyte character. */ | 2149 | a multibyte character, or a multibyte character. */ |
| 2150 | int force_multibyte = 0; | 2150 | int force_multibyte = 0; |
| 2151 | /* Nonzero if we saw an escape sequence specifying | 2151 | /* 1 if we saw an escape sequence specifying |
| 2152 | a single-byte character. */ | 2152 | a single-byte character. */ |
| 2153 | int force_singlebyte = 0; | 2153 | int force_singlebyte = 0; |
| 2154 | /* 1 if read_buffer contains multibyte text now. */ | ||
| 2155 | int is_multibyte = 0; | ||
| 2154 | int cancel = 0; | 2156 | int cancel = 0; |
| 2155 | int nchars; | 2157 | int nchars = 0; |
| 2156 | 2158 | ||
| 2157 | while ((c = READCHAR) >= 0 | 2159 | while ((c = READCHAR) >= 0 |
| 2158 | && c != '\"') | 2160 | && c != '\"') |
| @@ -2186,39 +2188,47 @@ read1 (readcharfun, pch, first_in_list) | |||
| 2186 | force_multibyte = 1; | 2188 | force_multibyte = 1; |
| 2187 | } | 2189 | } |
| 2188 | 2190 | ||
| 2189 | if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK))) | 2191 | /* A character that must be multibyte forces multibyte. */ |
| 2192 | if (! SINGLE_BYTE_CHAR_P (c & ~CHAR_MODIFIER_MASK)) | ||
| 2193 | force_multibyte = 1; | ||
| 2194 | |||
| 2195 | /* If we just discovered the need to be multibyte, | ||
| 2196 | convert the text accumulated thus far. */ | ||
| 2197 | if (force_multibyte && ! is_multibyte) | ||
| 2190 | { | 2198 | { |
| 2191 | /* Any modifiers for a multibyte character are invalid. */ | 2199 | is_multibyte = 1; |
| 2192 | if (c & CHAR_MODIFIER_MASK) | 2200 | to_multibyte (&p, &end, &nchars); |
| 2193 | error ("Invalid modifier in string"); | ||
| 2194 | p += CHAR_STRING (c, p); | ||
| 2195 | force_multibyte = 1; | ||
| 2196 | } | 2201 | } |
| 2197 | else | ||
| 2198 | { | ||
| 2199 | /* Allow `\C- ' and `\C-?'. */ | ||
| 2200 | if (c == (CHAR_CTL | ' ')) | ||
| 2201 | c = 0; | ||
| 2202 | else if (c == (CHAR_CTL | '?')) | ||
| 2203 | c = 127; | ||
| 2204 | 2202 | ||
| 2205 | if (c & CHAR_SHIFT) | 2203 | /* Allow `\C- ' and `\C-?'. */ |
| 2206 | { | 2204 | if (c == (CHAR_CTL | ' ')) |
| 2207 | /* Shift modifier is valid only with [A-Za-z]. */ | 2205 | c = 0; |
| 2208 | if ((c & 0377) >= 'A' && (c & 0377) <= 'Z') | 2206 | else if (c == (CHAR_CTL | '?')) |
| 2209 | c &= ~CHAR_SHIFT; | 2207 | c = 127; |
| 2210 | else if ((c & 0377) >= 'a' && (c & 0377) <= 'z') | ||
| 2211 | c = (c & ~CHAR_SHIFT) - ('a' - 'A'); | ||
| 2212 | } | ||
| 2213 | 2208 | ||
| 2214 | if (c & CHAR_META) | 2209 | if (c & CHAR_SHIFT) |
| 2215 | /* Move the meta bit to the right place for a string. */ | 2210 | { |
| 2216 | c = (c & ~CHAR_META) | 0x80; | 2211 | /* Shift modifier is valid only with [A-Za-z]. */ |
| 2217 | if (c & ~0xff) | 2212 | if ((c & 0377) >= 'A' && (c & 0377) <= 'Z') |
| 2218 | error ("Invalid modifier in string"); | 2213 | c &= ~CHAR_SHIFT; |
| 2219 | *p++ = c; | 2214 | else if ((c & 0377) >= 'a' && (c & 0377) <= 'z') |
| 2215 | c = (c & ~CHAR_SHIFT) - ('a' - 'A'); | ||
| 2220 | } | 2216 | } |
| 2217 | |||
| 2218 | if (c & CHAR_META) | ||
| 2219 | /* Move the meta bit to the right place for a string. */ | ||
| 2220 | c = (c & ~CHAR_META) | 0x80; | ||
| 2221 | if (c & CHAR_MODIFIER_MASK) | ||
| 2222 | error ("Invalid modifier in string"); | ||
| 2223 | |||
| 2224 | if (is_multibyte) | ||
| 2225 | p += CHAR_STRING (c, p); | ||
| 2226 | else | ||
| 2227 | *p++ = c; | ||
| 2228 | |||
| 2229 | nchars++; | ||
| 2221 | } | 2230 | } |
| 2231 | |||
| 2222 | if (c < 0) | 2232 | if (c < 0) |
| 2223 | end_of_file_error (); | 2233 | end_of_file_error (); |
| 2224 | 2234 | ||
| @@ -2228,10 +2238,8 @@ read1 (readcharfun, pch, first_in_list) | |||
| 2228 | if (!NILP (Vpurify_flag) && NILP (Vdoc_file_name) && cancel) | 2238 | if (!NILP (Vpurify_flag) && NILP (Vdoc_file_name) && cancel) |
| 2229 | return make_number (0); | 2239 | return make_number (0); |
| 2230 | 2240 | ||
| 2231 | if (force_multibyte) | 2241 | if (is_multibyte || force_singlebyte) |
| 2232 | to_multibyte (&p, &end, &nchars); | 2242 | ; |
| 2233 | else if (force_singlebyte) | ||
| 2234 | nchars = p - read_buffer; | ||
| 2235 | else if (load_convert_to_unibyte) | 2243 | else if (load_convert_to_unibyte) |
| 2236 | { | 2244 | { |
| 2237 | Lisp_Object string; | 2245 | Lisp_Object string; |
| @@ -2242,6 +2250,8 @@ read1 (readcharfun, pch, first_in_list) | |||
| 2242 | p - read_buffer); | 2250 | p - read_buffer); |
| 2243 | return Fstring_make_unibyte (string); | 2251 | return Fstring_make_unibyte (string); |
| 2244 | } | 2252 | } |
| 2253 | /* We can make a unibyte string directly. */ | ||
| 2254 | is_multibyte = 0; | ||
| 2245 | } | 2255 | } |
| 2246 | else if (EQ (readcharfun, Qget_file_char) | 2256 | else if (EQ (readcharfun, Qget_file_char) |
| 2247 | || EQ (readcharfun, Qlambda)) | 2257 | || EQ (readcharfun, Qlambda)) |
| @@ -2252,19 +2262,18 @@ read1 (readcharfun, pch, first_in_list) | |||
| 2252 | for reading dynamic byte code (compiled with | 2262 | for reading dynamic byte code (compiled with |
| 2253 | byte-compile-dynamic = t). */ | 2263 | byte-compile-dynamic = t). */ |
| 2254 | to_multibyte (&p, &end, &nchars); | 2264 | to_multibyte (&p, &end, &nchars); |
| 2265 | is_multibyte = 1; | ||
| 2255 | } | 2266 | } |
| 2256 | else | 2267 | else |
| 2257 | /* In all other cases, if we read these bytes as | 2268 | /* In all other cases, if we read these bytes as |
| 2258 | separate characters, treat them as separate characters now. */ | 2269 | separate characters, treat them as separate characters now. */ |
| 2259 | nchars = p - read_buffer; | 2270 | ; |
| 2260 | 2271 | ||
| 2261 | if (read_pure) | 2272 | if (read_pure) |
| 2262 | return make_pure_string (read_buffer, nchars, p - read_buffer, | 2273 | return make_pure_string (read_buffer, nchars, p - read_buffer, |
| 2263 | (force_multibyte | 2274 | is_multibyte); |
| 2264 | || (p - read_buffer != nchars))); | ||
| 2265 | return make_specified_string (read_buffer, nchars, p - read_buffer, | 2275 | return make_specified_string (read_buffer, nchars, p - read_buffer, |
| 2266 | (force_multibyte | 2276 | is_multibyte); |
| 2267 | || (p - read_buffer != nchars))); | ||
| 2268 | } | 2277 | } |
| 2269 | 2278 | ||
| 2270 | case '.': | 2279 | case '.': |