aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorKenichi Handa2002-03-01 01:44:45 +0000
committerKenichi Handa2002-03-01 01:44:45 +0000
commit1571601b93f6e09394efba4b8b3252729ce5ce12 (patch)
treeead4156341a0ab9ba204473a01eab20953dd57cc /src
parent1842abb2bf55038f92160f01d26a3649b25d63e9 (diff)
downloademacs-1571601b93f6e09394efba4b8b3252729ce5ce12.tar.gz
emacs-1571601b93f6e09394efba4b8b3252729ce5ce12.zip
Include "character.h".
(read_multibyte): New arg NBYTES. (read_escape): The meaning of returned *BYTEREP changed. (to_multibyte): Deleted. (read1): Adjuted the handling of char table and string.
Diffstat (limited to 'src')
-rw-r--r--src/lread.c278
1 files changed, 149 insertions, 129 deletions
diff --git a/src/lread.c b/src/lread.c
index 10baf509918..360fa68386d 100644
--- a/src/lread.c
+++ b/src/lread.c
@@ -29,6 +29,7 @@ Boston, MA 02111-1307, USA. */
29#include "lisp.h" 29#include "lisp.h"
30#include "intervals.h" 30#include "intervals.h"
31#include "buffer.h" 31#include "buffer.h"
32#include "character.h"
32#include "charset.h" 33#include "charset.h"
33#include <epaths.h> 34#include <epaths.h>
34#include "commands.h" 35#include "commands.h"
@@ -1476,36 +1477,51 @@ static char *read_buffer;
1476 1477
1477/* Read multibyte form and return it as a character. C is a first 1478/* Read multibyte form and return it as a character. C is a first
1478 byte of multibyte form, and rest of them are read from 1479 byte of multibyte form, and rest of them are read from
1479 READCHARFUN. */ 1480 READCHARFUN. Store the byte length of the form into *NBYTES. */
1480 1481
1481static int 1482static int
1482read_multibyte (c, readcharfun) 1483read_multibyte (c, readcharfun, nbytes)
1483 register int c; 1484 register int c;
1484 Lisp_Object readcharfun; 1485 Lisp_Object readcharfun;
1486 int *nbytes;
1485{ 1487{
1486 /* We need the actual character code of this multibyte 1488 /* We need the actual character code of this multibyte
1487 characters. */ 1489 characters. */
1488 unsigned char str[MAX_MULTIBYTE_LENGTH]; 1490 unsigned char str[MAX_MULTIBYTE_LENGTH];
1489 int len = 0; 1491 int len = 0;
1490 int bytes; 1492 int bytes = BYTES_BY_CHAR_HEAD (c);
1491 1493
1492 str[len++] = c; 1494 str[len++] = c;
1493 while ((c = READCHAR) >= 0xA0 1495 while (len < bytes)
1494 && len < MAX_MULTIBYTE_LENGTH) 1496 {
1495 str[len++] = c; 1497 c = READCHAR;
1496 UNREAD (c); 1498 if (CHAR_HEAD_P (c))
1497 if (UNIBYTE_STR_AS_MULTIBYTE_P (str, len, bytes)) 1499 {
1498 return STRING_CHAR (str, len); 1500 UNREAD (c);
1501 break;
1502 }
1503 str[len++] = c;
1504 }
1505
1506 if (len == bytes && MULTIBYTE_LENGTH_NO_CHECK (str) > 0)
1507 {
1508 *nbytes = len;
1509 return STRING_CHAR (str, len);
1510 }
1499 /* The byte sequence is not valid as multibyte. Unread all bytes 1511 /* The byte sequence is not valid as multibyte. Unread all bytes
1500 but the first one, and return the first byte. */ 1512 but the first one, and return the first byte. */
1501 while (--len > 0) 1513 while (--len > 0)
1502 UNREAD (str[len]); 1514 UNREAD (str[len]);
1515 *nbytes = 1;
1503 return str[0]; 1516 return str[0];
1504} 1517}
1505 1518
1506/* Read a \-escape sequence, assuming we already read the `\'. 1519/* Read a \-escape sequence, assuming we already read the `\'.
1507 If the escape sequence forces unibyte, store 1 into *BYTEREP. 1520 If the escape sequence forces unibyte, store 1 into *BYTEREP.
1508 If the escape sequence forces multibyte, store 2 into *BYTEREP. 1521 If the escape sequence forces multibyte and the returned character
1522 is raw 8-bit char, store 2 into *BYTEREP.
1523 If the escape sequence forces multibyte and the returned character
1524 is not raw 8-bit char, store 3 into *BYTEREP.
1509 Otherwise store 0 into *BYTEREP. */ 1525 Otherwise store 0 into *BYTEREP. */
1510 1526
1511static int 1527static int
@@ -1640,7 +1656,10 @@ read_escape (readcharfun, stringp, byterep)
1640 } 1656 }
1641 } 1657 }
1642 1658
1643 *byterep = 1; 1659 if (c < 0x100)
1660 *byterep = 1;
1661 else
1662 *byterep = 3;
1644 return i; 1663 return i;
1645 } 1664 }
1646 1665
@@ -1648,6 +1667,7 @@ read_escape (readcharfun, stringp, byterep)
1648 /* A hex escape, as in ANSI C. */ 1667 /* A hex escape, as in ANSI C. */
1649 { 1668 {
1650 int i = 0; 1669 int i = 0;
1670 int count = 0;
1651 while (1) 1671 while (1)
1652 { 1672 {
1653 c = READCHAR; 1673 c = READCHAR;
@@ -1670,15 +1690,26 @@ read_escape (readcharfun, stringp, byterep)
1670 UNREAD (c); 1690 UNREAD (c);
1671 break; 1691 break;
1672 } 1692 }
1693 count++;
1673 } 1694 }
1674 1695
1675 *byterep = 2; 1696 if (count < 3 && i >= 0x80)
1697 *byterep = 2;
1698 else
1699 *byterep = 3;
1676 return i; 1700 return i;
1677 } 1701 }
1678 1702
1679 default: 1703 default:
1680 if (BASE_LEADING_CODE_P (c)) 1704 if (EQ (readcharfun, Qget_file_char)
1681 c = read_multibyte (c, readcharfun); 1705 && BASE_LEADING_CODE_P (c))
1706 {
1707 int nbytes;
1708
1709 c = read_multibyte (c, readcharfun, &nbytes);
1710 if (nbytes > 1)
1711 *byterep = 3;
1712 }
1682 return c; 1713 return c;
1683 } 1714 }
1684} 1715}
@@ -1750,43 +1781,6 @@ read_integer (readcharfun, radix)
1750} 1781}
1751 1782
1752 1783
1753/* Convert unibyte text in read_buffer to multibyte.
1754
1755 Initially, *P is a pointer after the end of the unibyte text, and
1756 the pointer *END points after the end of read_buffer.
1757
1758 If read_buffer doesn't have enough room to hold the result
1759 of the conversion, reallocate it and adjust *P and *END.
1760
1761 At the end, make *P point after the result of the conversion, and
1762 return in *NCHARS the number of characters in the converted
1763 text. */
1764
1765static void
1766to_multibyte (p, end, nchars)
1767 char **p, **end;
1768 int *nchars;
1769{
1770 int nbytes;
1771
1772 parse_str_as_multibyte (read_buffer, *p - read_buffer, &nbytes, nchars);
1773 if (read_buffer_size < 2 * nbytes)
1774 {
1775 int offset = *p - read_buffer;
1776 read_buffer_size = 2 * max (read_buffer_size, nbytes);
1777 read_buffer = (char *) xrealloc (read_buffer, read_buffer_size);
1778 *p = read_buffer + offset;
1779 *end = read_buffer + read_buffer_size;
1780 }
1781
1782 if (nbytes != *nchars)
1783 nbytes = str_as_multibyte (read_buffer, read_buffer_size,
1784 *p - read_buffer, nchars);
1785
1786 *p = read_buffer + nbytes;
1787}
1788
1789
1790/* If the next token is ')' or ']' or '.', we store that character 1784/* If the next token is ')' or ']' or '.', we store that character
1791 in *PCH and the return value is not interesting. Else, we store 1785 in *PCH and the return value is not interesting. Else, we store
1792 zero in *PCH and we read and return one lisp object. 1786 zero in *PCH and we read and return one lisp object.
@@ -1834,11 +1828,9 @@ read1 (readcharfun, pch, first_in_list)
1834 { 1828 {
1835 Lisp_Object tmp; 1829 Lisp_Object tmp;
1836 tmp = read_vector (readcharfun, 0); 1830 tmp = read_vector (readcharfun, 0);
1837 if (XVECTOR (tmp)->size < CHAR_TABLE_STANDARD_SLOTS 1831 if (XVECTOR (tmp)->size != VECSIZE (struct Lisp_Char_Table))
1838 || XVECTOR (tmp)->size > CHAR_TABLE_STANDARD_SLOTS + 10)
1839 error ("Invalid size char-table"); 1832 error ("Invalid size char-table");
1840 XSETCHAR_TABLE (tmp, XCHAR_TABLE (tmp)); 1833 XSETCHAR_TABLE (tmp, XCHAR_TABLE (tmp));
1841 XCHAR_TABLE (tmp)->top = Qt;
1842 return tmp; 1834 return tmp;
1843 } 1835 }
1844 else if (c == '^') 1836 else if (c == '^')
@@ -1847,11 +1839,18 @@ read1 (readcharfun, pch, first_in_list)
1847 if (c == '[') 1839 if (c == '[')
1848 { 1840 {
1849 Lisp_Object tmp; 1841 Lisp_Object tmp;
1842 int depth, size;
1843
1850 tmp = read_vector (readcharfun, 0); 1844 tmp = read_vector (readcharfun, 0);
1851 if (XVECTOR (tmp)->size != SUB_CHAR_TABLE_STANDARD_SLOTS) 1845 if (!INTEGERP (AREF (tmp, 0)))
1846 error ("Invalid depth in char-table");
1847 depth = XINT (AREF (tmp, 0));
1848 if (depth < 1 || depth > 3)
1849 error ("Invalid depth in char-table");
1850 size = XVECTOR (tmp)->size + 2;
1851 if (chartab_size [depth] != size)
1852 error ("Invalid size char-table"); 1852 error ("Invalid size char-table");
1853 XSETCHAR_TABLE (tmp, XCHAR_TABLE (tmp)); 1853 XSETSUB_CHAR_TABLE (tmp, XSUB_CHAR_TABLE (tmp));
1854 XCHAR_TABLE (tmp)->top = Qnil;
1855 return tmp; 1854 return tmp;
1856 } 1855 }
1857 Fsignal (Qinvalid_read_syntax, 1856 Fsignal (Qinvalid_read_syntax,
@@ -2134,8 +2133,9 @@ read1 (readcharfun, pch, first_in_list)
2134 2133
2135 if (c == '\\') 2134 if (c == '\\')
2136 c = read_escape (readcharfun, 0, &discard); 2135 c = read_escape (readcharfun, 0, &discard);
2137 else if (BASE_LEADING_CODE_P (c)) 2136 else if (EQ (readcharfun, Qget_file_char)
2138 c = read_multibyte (c, readcharfun); 2137 && BASE_LEADING_CODE_P (c))
2138 c = read_multibyte (c, readcharfun, &discard);
2139 2139
2140 return make_number (c); 2140 return make_number (c);
2141 } 2141 }
@@ -2145,14 +2145,12 @@ read1 (readcharfun, pch, first_in_list)
2145 char *p = read_buffer; 2145 char *p = read_buffer;
2146 char *end = read_buffer + read_buffer_size; 2146 char *end = read_buffer + read_buffer_size;
2147 register int c; 2147 register int c;
2148 /* 1 if we saw an escape sequence specifying 2148 /* Nonzero if we saw an escape sequence specifying
2149 a multibyte character, or a multibyte character. */ 2149 a multibyte character. */
2150 int force_multibyte = 0; 2150 int force_multibyte = 0;
2151 /* 1 if we saw an escape sequence specifying 2151 /* Nonzero if we saw an escape sequence specifying
2152 a single-byte character. */ 2152 a single-byte character. */
2153 int force_singlebyte = 0; 2153 int force_singlebyte = 0;
2154 /* 1 if read_buffer contains multibyte text now. */
2155 int is_multibyte = 0;
2156 int cancel = 0; 2154 int cancel = 0;
2157 int nchars = 0; 2155 int nchars = 0;
2158 2156
@@ -2170,6 +2168,7 @@ read1 (readcharfun, pch, first_in_list)
2170 2168
2171 if (c == '\\') 2169 if (c == '\\')
2172 { 2170 {
2171 int modifiers;
2173 int byterep; 2172 int byterep;
2174 2173
2175 c = read_escape (readcharfun, 1, &byterep); 2174 c = read_escape (readcharfun, 1, &byterep);
@@ -2182,53 +2181,92 @@ read1 (readcharfun, pch, first_in_list)
2182 continue; 2181 continue;
2183 } 2182 }
2184 2183
2184 modifiers = c & CHAR_MODIFIER_MASK;
2185 c = c & ~CHAR_MODIFIER_MASK;
2186
2185 if (byterep == 1) 2187 if (byterep == 1)
2186 force_singlebyte = 1; 2188 {
2187 else if (byterep == 2) 2189 force_singlebyte = 1;
2188 force_multibyte = 1; 2190 if (c >= 0x80)
2189 } 2191 /* Raw 8-bit code */
2192 c = BYTE8_TO_CHAR (c);
2193 }
2194 else if (byterep > 1)
2195 {
2196 force_multibyte = 1;
2197 if (byterep == 2)
2198 c = BYTE8_TO_CHAR (c);
2199 }
2200 else if (c >= 0x80)
2201 {
2202 force_singlebyte = 1;
2203 c = BYTE8_TO_CHAR (c);
2204 }
2190 2205
2191 /* A character that must be multibyte forces multibyte. */ 2206 if (ASCII_CHAR_P (c))
2192 if (! SINGLE_BYTE_CHAR_P (c & ~CHAR_MODIFIER_MASK)) 2207 {
2193 force_multibyte = 1; 2208 /* Allow `\C- ' and `\C-?'. */
2209 if (modifiers == CHAR_CTL)
2210 {
2211 if (c == ' ')
2212 c = 0, modifiers = 0;
2213 else if (c == '?')
2214 c = 127, modifiers = 0;
2215 }
2216 if (modifiers & CHAR_SHIFT)
2217 {
2218 /* Shift modifier is valid only with [A-Za-z]. */
2219 if (c >= 'A' && c <= 'Z')
2220 modifiers &= ~CHAR_SHIFT;
2221 else if (c >= 'a' && c <= 'z')
2222 c -= ('a' - 'A'), modifiers &= ~CHAR_SHIFT;
2223 }
2224
2225 if (modifiers & CHAR_META)
2226 {
2227 /* Move the meta bit to the right place for a
2228 string. */
2229 modifiers &= ~CHAR_META;
2230 c = BYTE8_TO_CHAR (c | 0x80);
2231 force_singlebyte = 1;
2232 }
2233 }
2194 2234
2195 /* If we just discovered the need to be multibyte, 2235 /* Any modifiers remaining are invalid. */
2196 convert the text accumulated thus far. */ 2236 if (modifiers)
2197 if (force_multibyte && ! is_multibyte) 2237 error ("Invalid modifier in string");
2198 { 2238 p += CHAR_STRING (c, (unsigned char *) p);
2199 is_multibyte = 1;
2200 to_multibyte (&p, &end, &nchars);
2201 } 2239 }
2202 2240 else if (c >= 0x80)
2203 /* Allow `\C- ' and `\C-?'. */
2204 if (c == (CHAR_CTL | ' '))
2205 c = 0;
2206 else if (c == (CHAR_CTL | '?'))
2207 c = 127;
2208
2209 if (c & CHAR_SHIFT)
2210 { 2241 {
2211 /* Shift modifier is valid only with [A-Za-z]. */ 2242 if (EQ (readcharfun, Qget_file_char))
2212 if ((c & 0377) >= 'A' && (c & 0377) <= 'Z') 2243 {
2213 c &= ~CHAR_SHIFT; 2244 if (BASE_LEADING_CODE_P (c))
2214 else if ((c & 0377) >= 'a' && (c & 0377) <= 'z') 2245 {
2215 c = (c & ~CHAR_SHIFT) - ('a' - 'A'); 2246 int nbytes;
2247 c = read_multibyte (c, readcharfun, &nbytes);
2248 if (nbytes > 1)
2249 force_multibyte = 1;
2250 else
2251 {
2252 force_singlebyte = 1;
2253 c = BYTE8_TO_CHAR (c);
2254 }
2255 }
2256 else
2257 {
2258 force_singlebyte = 1;
2259 c = BYTE8_TO_CHAR (c);
2260 }
2261 }
2262 else
2263 force_multibyte = 1;
2264 p += CHAR_STRING (c, (unsigned char *) p);
2216 } 2265 }
2217
2218 if (c & CHAR_META)
2219 /* Move the meta bit to the right place for a string. */
2220 c = (c & ~CHAR_META) | 0x80;
2221 if (c & CHAR_MODIFIER_MASK)
2222 error ("Invalid modifier in string");
2223
2224 if (is_multibyte)
2225 p += CHAR_STRING (c, p);
2226 else 2266 else
2227 *p++ = c; 2267 *p++ = c;
2228
2229 nchars++; 2268 nchars++;
2230 } 2269 }
2231
2232 if (c < 0) 2270 if (c < 0)
2233 end_of_file_error (); 2271 end_of_file_error ();
2234 2272
@@ -2238,42 +2276,24 @@ read1 (readcharfun, pch, first_in_list)
2238 if (!NILP (Vpurify_flag) && NILP (Vdoc_file_name) && cancel) 2276 if (!NILP (Vpurify_flag) && NILP (Vdoc_file_name) && cancel)
2239 return make_number (0); 2277 return make_number (0);
2240 2278
2241 if (is_multibyte || force_singlebyte) 2279 if (force_multibyte)
2280 /* READ_BUFFER already contains valid multibyte forms. */
2242 ; 2281 ;
2243 else if (load_convert_to_unibyte) 2282 else if (force_singlebyte)
2244 { 2283 {
2245 Lisp_Object string; 2284 nchars = str_as_unibyte (read_buffer, p - read_buffer);
2246 to_multibyte (&p, &end, &nchars); 2285 p = read_buffer + nchars;
2247 if (p - read_buffer != nchars)
2248 {
2249 string = make_multibyte_string (read_buffer, nchars,
2250 p - read_buffer);
2251 return Fstring_make_unibyte (string);
2252 }
2253 /* We can make a unibyte string directly. */
2254 is_multibyte = 0;
2255 }
2256 else if (EQ (readcharfun, Qget_file_char)
2257 || EQ (readcharfun, Qlambda))
2258 {
2259 /* Nowadays, reading directly from a file is used only for
2260 compiled Emacs Lisp files, and those always use the
2261 Emacs internal encoding. Meanwhile, Qlambda is used
2262 for reading dynamic byte code (compiled with
2263 byte-compile-dynamic = t). */
2264 to_multibyte (&p, &end, &nchars);
2265 is_multibyte = 1;
2266 } 2286 }
2267 else 2287 else
2268 /* In all other cases, if we read these bytes as 2288 /* Otherwise, READ_BUFFER contains only ASCII. */
2269 separate characters, treat them as separate characters now. */
2270 ;
2271 2289
2272 if (read_pure) 2290 if (read_pure)
2273 return make_pure_string (read_buffer, nchars, p - read_buffer, 2291 return make_pure_string (read_buffer, nchars, p - read_buffer,
2274 is_multibyte); 2292 (force_multibyte
2293 || (p - read_buffer != nchars)));
2275 return make_specified_string (read_buffer, nchars, p - read_buffer, 2294 return make_specified_string (read_buffer, nchars, p - read_buffer,
2276 is_multibyte); 2295 (force_multibyte
2296 || (p - read_buffer != nchars)));
2277 } 2297 }
2278 2298
2279 case '.': 2299 case '.':