diff options
| author | Richard M. Stallman | 1998-04-23 21:22:51 +0000 |
|---|---|---|
| committer | Richard M. Stallman | 1998-04-23 21:22:51 +0000 |
| commit | bed23cb24b269a01a437ca83e26bc0830ff5cfa5 (patch) | |
| tree | ae1b2558d24a121f24c3c7e9e168d5ed0846e951 | |
| parent | d67e2df9c3ab30fea86d9a9981e3f7e7dcddab4e (diff) | |
| download | emacs-bed23cb24b269a01a437ca83e26bc0830ff5cfa5.tar.gz emacs-bed23cb24b269a01a437ca83e26bc0830ff5cfa5.zip | |
(read_from_string_index_byte): New variable.
(read_from_string_index): Now counts characters.
(readchar, unreadchar, Fread_from_string): Changed accordingly.
(readchar): Read a multibyte char all at once
from a buffer, marker or string.
(unreadchar): Unread a multibyte char all at once.
(read1): Properly handle non-escaped multibyte chars.
They force a string to be multibyte.
When reading direct from a file, any multibyte sequence means
a multibyte string.
Insist on MAX_LENGTH_OF_MULTI_BYTE_FORM bytes when checking
for read_buffer full; this way need not check specially for multibyte.
| -rw-r--r-- | src/lread.c | 225 |
1 files changed, 114 insertions, 111 deletions
diff --git a/src/lread.c b/src/lread.c index 18fc07246ad..895fb6caa19 100644 --- a/src/lread.c +++ b/src/lread.c | |||
| @@ -131,6 +131,7 @@ static int read_pure; | |||
| 131 | 131 | ||
| 132 | /* For use within read-from-string (this reader is non-reentrant!!) */ | 132 | /* For use within read-from-string (this reader is non-reentrant!!) */ |
| 133 | static int read_from_string_index; | 133 | static int read_from_string_index; |
| 134 | static int read_from_string_index_byte; | ||
| 134 | static int read_from_string_limit; | 135 | static int read_from_string_limit; |
| 135 | 136 | ||
| 136 | /* Number of bytes left to read in the buffer character | 137 | /* Number of bytes left to read in the buffer character |
| @@ -169,64 +170,59 @@ readchar (readcharfun) | |||
| 169 | Lisp_Object readcharfun; | 170 | Lisp_Object readcharfun; |
| 170 | { | 171 | { |
| 171 | Lisp_Object tem; | 172 | Lisp_Object tem; |
| 172 | register struct buffer *inbuffer; | ||
| 173 | register int c, mpos; | 173 | register int c, mpos; |
| 174 | 174 | ||
| 175 | if (BUFFERP (readcharfun)) | 175 | if (BUFFERP (readcharfun)) |
| 176 | { | 176 | { |
| 177 | inbuffer = XBUFFER (readcharfun); | 177 | register struct buffer *inbuffer = XBUFFER (readcharfun); |
| 178 | 178 | ||
| 179 | if (readchar_backlog == 0) | 179 | int pt_byte = BUF_PT_BYTE (inbuffer); |
| 180 | { | 180 | int orig_pt_byte = pt_byte; |
| 181 | int pt_byte = BUF_PT_BYTE (inbuffer); | ||
| 182 | int orig_pt_byte = pt_byte; | ||
| 183 | 181 | ||
| 184 | if (pt_byte >= BUF_ZV_BYTE (inbuffer)) | 182 | if (pt_byte >= BUF_ZV_BYTE (inbuffer)) |
| 185 | return -1; | 183 | return -1; |
| 186 | 184 | ||
| 187 | if (! NILP (inbuffer->enable_multibyte_characters)) | 185 | if (! NILP (inbuffer->enable_multibyte_characters)) |
| 188 | BUF_INC_POS (inbuffer, pt_byte); | 186 | { |
| 189 | else | 187 | unsigned char *p = BUF_BYTE_ADDRESS (inbuffer, pt_byte); |
| 190 | pt_byte++; | 188 | BUF_INC_POS (inbuffer, pt_byte); |
| 191 | SET_BUF_PT_BOTH (inbuffer, BUF_PT (inbuffer) + 1, pt_byte); | 189 | c = STRING_CHAR (p, pt_byte - orig_pt_byte); |
| 192 | readchar_backlog = pt_byte - orig_pt_byte; | 190 | } |
| 191 | else | ||
| 192 | { | ||
| 193 | c = BUF_FETCH_BYTE (inbuffer, pt_byte); | ||
| 194 | pt_byte++; | ||
| 193 | } | 195 | } |
| 196 | SET_BUF_PT_BOTH (inbuffer, BUF_PT (inbuffer) + 1, pt_byte); | ||
| 194 | 197 | ||
| 195 | /* We get the address of the byte just passed, | 198 | return c; |
| 196 | which is the last byte of the character. | ||
| 197 | The other bytes in this character are consecutive with it, | ||
| 198 | because the gap can't be in the middle of a character. */ | ||
| 199 | return *(BUF_BYTE_ADDRESS (inbuffer, BUF_PT_BYTE (inbuffer) - 1) | ||
| 200 | - --readchar_backlog); | ||
| 201 | } | 199 | } |
| 202 | if (MARKERP (readcharfun)) | 200 | if (MARKERP (readcharfun)) |
| 203 | { | 201 | { |
| 204 | inbuffer = XMARKER (readcharfun)->buffer; | 202 | register struct buffer *inbuffer = XMARKER (readcharfun)->buffer; |
| 205 | |||
| 206 | if (readchar_backlog == 0) | ||
| 207 | { | ||
| 208 | int bytepos = marker_byte_position (readcharfun); | ||
| 209 | int orig_bytepos = bytepos; | ||
| 210 | 203 | ||
| 211 | if (bytepos >= BUF_ZV_BYTE (inbuffer)) | 204 | int bytepos = marker_byte_position (readcharfun); |
| 212 | return -1; | 205 | int orig_bytepos = bytepos; |
| 213 | 206 | ||
| 214 | if (! NILP (inbuffer->enable_multibyte_characters)) | 207 | if (bytepos >= BUF_ZV_BYTE (inbuffer)) |
| 215 | INC_POS (bytepos); | 208 | return -1; |
| 216 | else | ||
| 217 | bytepos++; | ||
| 218 | XMARKER (readcharfun)->bytepos = bytepos; | ||
| 219 | XMARKER (readcharfun)->charpos++; | ||
| 220 | 209 | ||
| 221 | readchar_backlog = bytepos - orig_bytepos; | 210 | if (! NILP (inbuffer->enable_multibyte_characters)) |
| 211 | { | ||
| 212 | unsigned char *p = BUF_BYTE_ADDRESS (inbuffer, bytepos); | ||
| 213 | BUF_INC_POS (inbuffer, bytepos); | ||
| 214 | c = STRING_CHAR (p, bytepos - orig_bytepos); | ||
| 215 | } | ||
| 216 | else | ||
| 217 | { | ||
| 218 | c = BUF_FETCH_BYTE (inbuffer, bytepos); | ||
| 219 | bytepos++; | ||
| 222 | } | 220 | } |
| 223 | 221 | ||
| 224 | /* We get the address of the byte just passed, | 222 | XMARKER (readcharfun)->bytepos = bytepos; |
| 225 | which is the last byte of the character. | 223 | XMARKER (readcharfun)->charpos++; |
| 226 | The other bytes in this character are consecutive with it, | 224 | |
| 227 | because the gap can't be in the middle of a character. */ | 225 | return c; |
| 228 | return *(BUF_BYTE_ADDRESS (inbuffer, XMARKER (readcharfun)->bytepos - 1) | ||
| 229 | - --readchar_backlog); | ||
| 230 | } | 226 | } |
| 231 | if (EQ (readcharfun, Qget_file_char)) | 227 | if (EQ (readcharfun, Qget_file_char)) |
| 232 | { | 228 | { |
| @@ -244,13 +240,14 @@ readchar (readcharfun) | |||
| 244 | 240 | ||
| 245 | if (STRINGP (readcharfun)) | 241 | if (STRINGP (readcharfun)) |
| 246 | { | 242 | { |
| 247 | register int c; | 243 | if (read_from_string_index >= read_from_string_limit) |
| 248 | /* This used to be return of a conditional expression, | ||
| 249 | but that truncated -1 to a char on VMS. */ | ||
| 250 | if (read_from_string_index < read_from_string_limit) | ||
| 251 | c = XSTRING (readcharfun)->data[read_from_string_index++]; | ||
| 252 | else | ||
| 253 | c = -1; | 244 | c = -1; |
| 245 | else if (STRING_MULTIBYTE (readcharfun)) | ||
| 246 | FETCH_STRING_CHAR_ADVANCE (c, readcharfun, | ||
| 247 | read_from_string_index, | ||
| 248 | read_from_string_index_byte); | ||
| 249 | else | ||
| 250 | c = XSTRING (readcharfun)->data[read_from_string_index++]; | ||
| 254 | 251 | ||
| 255 | return c; | 252 | return c; |
| 256 | } | 253 | } |
| @@ -276,42 +273,36 @@ unreadchar (readcharfun, c) | |||
| 276 | ; | 273 | ; |
| 277 | else if (BUFFERP (readcharfun)) | 274 | else if (BUFFERP (readcharfun)) |
| 278 | { | 275 | { |
| 279 | if (!SINGLE_BYTE_CHAR_P (c)) | 276 | struct buffer *b = XBUFFER (readcharfun); |
| 280 | readchar_backlog++; | 277 | int bytepos = BUF_PT_BYTE (b); |
| 281 | else | ||
| 282 | { | ||
| 283 | struct buffer *b = XBUFFER (readcharfun); | ||
| 284 | int bytepos = BUF_PT_BYTE (b); | ||
| 285 | 278 | ||
| 286 | BUF_PT (b)--; | 279 | BUF_PT (b)--; |
| 287 | if (! NILP (b->enable_multibyte_characters)) | 280 | if (! NILP (b->enable_multibyte_characters)) |
| 288 | BUF_DEC_POS (b, bytepos); | 281 | BUF_DEC_POS (b, bytepos); |
| 289 | else | 282 | else |
| 290 | bytepos--; | 283 | bytepos--; |
| 291 | 284 | ||
| 292 | BUF_PT_BYTE (b) = bytepos; | 285 | BUF_PT_BYTE (b) = bytepos; |
| 293 | } | ||
| 294 | } | 286 | } |
| 295 | else if (MARKERP (readcharfun)) | 287 | else if (MARKERP (readcharfun)) |
| 296 | { | 288 | { |
| 297 | if (!SINGLE_BYTE_CHAR_P (c)) | 289 | struct buffer *b = XMARKER (readcharfun)->buffer; |
| 298 | readchar_backlog++; | 290 | int bytepos = XMARKER (readcharfun)->bytepos; |
| 299 | else | ||
| 300 | { | ||
| 301 | struct buffer *b = XMARKER (readcharfun)->buffer; | ||
| 302 | int bytepos = XMARKER (readcharfun)->bytepos; | ||
| 303 | 291 | ||
| 304 | XMARKER (readcharfun)->charpos--; | 292 | XMARKER (readcharfun)->charpos--; |
| 305 | if (! NILP (b->enable_multibyte_characters)) | 293 | if (! NILP (b->enable_multibyte_characters)) |
| 306 | BUF_DEC_POS (b, bytepos); | 294 | BUF_DEC_POS (b, bytepos); |
| 307 | else | 295 | else |
| 308 | bytepos--; | 296 | bytepos--; |
| 309 | 297 | ||
| 310 | XMARKER (readcharfun)->bytepos = bytepos; | 298 | XMARKER (readcharfun)->bytepos = bytepos; |
| 311 | } | ||
| 312 | } | 299 | } |
| 313 | else if (STRINGP (readcharfun)) | 300 | else if (STRINGP (readcharfun)) |
| 314 | read_from_string_index--; | 301 | { |
| 302 | read_from_string_index--; | ||
| 303 | read_from_string_index_byte | ||
| 304 | = string_char_to_byte (readcharfun, read_from_string_index); | ||
| 305 | } | ||
| 315 | else if (EQ (readcharfun, Qget_file_char)) | 306 | else if (EQ (readcharfun, Qget_file_char)) |
| 316 | ungetc (c, instream); | 307 | ungetc (c, instream); |
| 317 | else | 308 | else |
| @@ -321,7 +312,7 @@ unreadchar (readcharfun, c) | |||
| 321 | static Lisp_Object read0 (), read1 (), read_list (), read_vector (); | 312 | static Lisp_Object read0 (), read1 (), read_list (), read_vector (); |
| 322 | static int read_multibyte (); | 313 | static int read_multibyte (); |
| 323 | 314 | ||
| 324 | /* get a character from the tty */ | 315 | /* Get a character from the tty. */ |
| 325 | 316 | ||
| 326 | extern Lisp_Object read_char (); | 317 | extern Lisp_Object read_char (); |
| 327 | 318 | ||
| @@ -1180,12 +1171,11 @@ START and END optionally delimit a substring of STRING from which to read;\n\ | |||
| 1180 | CHECK_STRING (string,0); | 1171 | CHECK_STRING (string,0); |
| 1181 | 1172 | ||
| 1182 | if (NILP (end)) | 1173 | if (NILP (end)) |
| 1183 | endval = STRING_BYTES (XSTRING (string)); | 1174 | endval = XSTRING (string)->size; |
| 1184 | else | 1175 | else |
| 1185 | { | 1176 | { |
| 1186 | CHECK_NUMBER (end, 2); | 1177 | CHECK_NUMBER (end, 2); |
| 1187 | endval = string_char_to_byte (string, XINT (end)); | 1178 | if (endval < 0 || endval > XSTRING (string)->size) |
| 1188 | if (endval < 0 || endval > STRING_BYTES (XSTRING (string))) | ||
| 1189 | args_out_of_range (string, end); | 1179 | args_out_of_range (string, end); |
| 1190 | } | 1180 | } |
| 1191 | 1181 | ||
| @@ -1194,21 +1184,19 @@ START and END optionally delimit a substring of STRING from which to read;\n\ | |||
| 1194 | else | 1184 | else |
| 1195 | { | 1185 | { |
| 1196 | CHECK_NUMBER (start, 1); | 1186 | CHECK_NUMBER (start, 1); |
| 1197 | startval = string_char_to_byte (string, XINT (start)); | ||
| 1198 | if (startval < 0 || startval > endval) | 1187 | if (startval < 0 || startval > endval) |
| 1199 | args_out_of_range (string, start); | 1188 | args_out_of_range (string, start); |
| 1200 | } | 1189 | } |
| 1201 | 1190 | ||
| 1202 | read_from_string_index = startval; | 1191 | read_from_string_index = startval; |
| 1192 | read_from_string_index_byte = string_char_to_byte (string, startval); | ||
| 1203 | read_from_string_limit = endval; | 1193 | read_from_string_limit = endval; |
| 1204 | 1194 | ||
| 1205 | new_backquote_flag = 0; | 1195 | new_backquote_flag = 0; |
| 1206 | read_objects = Qnil; | 1196 | read_objects = Qnil; |
| 1207 | 1197 | ||
| 1208 | tem = read0 (string); | 1198 | tem = read0 (string); |
| 1209 | endval = string_byte_to_char (string, | 1199 | return Fcons (tem, make_number (read_from_string_index)); |
| 1210 | read_from_string_index); | ||
| 1211 | return Fcons (tem, make_number (endval)); | ||
| 1212 | } | 1200 | } |
| 1213 | 1201 | ||
| 1214 | /* Use this for recursive reads, in contexts where internal tokens | 1202 | /* Use this for recursive reads, in contexts where internal tokens |
| @@ -1744,49 +1732,45 @@ read1 (readcharfun, pch, first_in_list) | |||
| 1744 | while ((c = READCHAR) >= 0 | 1732 | while ((c = READCHAR) >= 0 |
| 1745 | && c != '\"') | 1733 | && c != '\"') |
| 1746 | { | 1734 | { |
| 1747 | if (p == end) | 1735 | if (end - p < MAX_LENGTH_OF_MULTI_BYTE_FORM) |
| 1748 | { | 1736 | { |
| 1749 | char *new = (char *) xrealloc (read_buffer, read_buffer_size *= 2); | 1737 | char *new = (char *) xrealloc (read_buffer, read_buffer_size *= 2); |
| 1750 | p += new - read_buffer; | 1738 | p += new - read_buffer; |
| 1751 | read_buffer += new - read_buffer; | 1739 | read_buffer += new - read_buffer; |
| 1752 | end = read_buffer + read_buffer_size; | 1740 | end = read_buffer + read_buffer_size; |
| 1753 | } | 1741 | } |
| 1742 | |||
| 1754 | if (c == '\\') | 1743 | if (c == '\\') |
| 1755 | { | 1744 | { |
| 1756 | c = read_escape (readcharfun, 1); | 1745 | c = read_escape (readcharfun, 1); |
| 1757 | if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_META))) | 1746 | |
| 1747 | /* C is -1 if \ newline has just been seen */ | ||
| 1748 | if (c == -1) | ||
| 1758 | { | 1749 | { |
| 1759 | unsigned char workbuf[4]; | 1750 | if (p == read_buffer) |
| 1760 | unsigned char *str = workbuf; | 1751 | cancel = 1; |
| 1761 | int length; | ||
| 1762 | |||
| 1763 | length = non_ascii_char_to_string (c, workbuf, &str); | ||
| 1764 | if (length > 1) | ||
| 1765 | force_multibyte = 1; | ||
| 1766 | |||
| 1767 | if (p + length > end) | ||
| 1768 | { | ||
| 1769 | char *new = (char *) xrealloc (read_buffer, read_buffer_size *= 2); | ||
| 1770 | p += new - read_buffer; | ||
| 1771 | read_buffer += new - read_buffer; | ||
| 1772 | end = read_buffer + read_buffer_size; | ||
| 1773 | } | ||
| 1774 | |||
| 1775 | bcopy (str, p, length); | ||
| 1776 | p += length; | ||
| 1777 | continue; | 1752 | continue; |
| 1778 | } | 1753 | } |
| 1754 | |||
| 1779 | /* If an escape specifies a non-ASCII single-byte character, | 1755 | /* If an escape specifies a non-ASCII single-byte character, |
| 1780 | this must be a unibyte string. */ | 1756 | this must be a unibyte string. */ |
| 1781 | else if (! ASCII_BYTE_P (c)) | 1757 | if (SINGLE_BYTE_CHAR_P ((c & ~CHAR_META)) |
| 1758 | && ! ASCII_BYTE_P (c)) | ||
| 1782 | force_singlebyte = 1; | 1759 | force_singlebyte = 1; |
| 1783 | } | 1760 | } |
| 1784 | 1761 | ||
| 1785 | /* c is -1 if \ newline has just been seen */ | 1762 | if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_META))) |
| 1786 | if (c == -1) | ||
| 1787 | { | 1763 | { |
| 1788 | if (p == read_buffer) | 1764 | unsigned char workbuf[4]; |
| 1789 | cancel = 1; | 1765 | unsigned char *str = workbuf; |
| 1766 | int length; | ||
| 1767 | |||
| 1768 | length = non_ascii_char_to_string (c, workbuf, &str); | ||
| 1769 | if (length > 1) | ||
| 1770 | force_multibyte = 1; | ||
| 1771 | |||
| 1772 | bcopy (str, p, length); | ||
| 1773 | p += length; | ||
| 1790 | } | 1774 | } |
| 1791 | else | 1775 | else |
| 1792 | { | 1776 | { |
| @@ -1814,7 +1798,7 @@ read1 (readcharfun, pch, first_in_list) | |||
| 1814 | return make_number (0); | 1798 | return make_number (0); |
| 1815 | 1799 | ||
| 1816 | if (force_singlebyte && force_multibyte) | 1800 | if (force_singlebyte && force_multibyte) |
| 1817 | error ("Multibyte and single-byte escapes in one string constant"); | 1801 | error ("Multibyte and unibyte characters in one string constant"); |
| 1818 | 1802 | ||
| 1819 | if (force_singlebyte) | 1803 | if (force_singlebyte) |
| 1820 | nchars = p - read_buffer; | 1804 | nchars = p - read_buffer; |
| @@ -1831,7 +1815,14 @@ read1 (readcharfun, pch, first_in_list) | |||
| 1831 | return Fstring_make_unibyte (string); | 1815 | return Fstring_make_unibyte (string); |
| 1832 | } | 1816 | } |
| 1833 | } | 1817 | } |
| 1818 | else if (EQ (readcharfun, Qget_file_char)) | ||
| 1819 | /* Nowadays, reading directly from a file | ||
| 1820 | is used only for compiled Emacs Lisp files, | ||
| 1821 | and those always use the Emacs internal encoding. */ | ||
| 1822 | nchars = multibyte_chars_in_text (read_buffer, p - read_buffer); | ||
| 1834 | else | 1823 | else |
| 1824 | /* In all other cases, if we read these bytes as | ||
| 1825 | separate characters, treat them as separate characters now. */ | ||
| 1835 | nchars = p - read_buffer; | 1826 | nchars = p - read_buffer; |
| 1836 | 1827 | ||
| 1837 | if (read_pure) | 1828 | if (read_pure) |
| @@ -1884,7 +1875,7 @@ read1 (readcharfun, pch, first_in_list) | |||
| 1884 | || c == '[' || c == ']' || c == '#' | 1875 | || c == '[' || c == ']' || c == '#' |
| 1885 | )) | 1876 | )) |
| 1886 | { | 1877 | { |
| 1887 | if (p == end) | 1878 | if (end - p < MAX_LENGTH_OF_MULTI_BYTE_FORM) |
| 1888 | { | 1879 | { |
| 1889 | register char *new = (char *) xrealloc (read_buffer, read_buffer_size *= 2); | 1880 | register char *new = (char *) xrealloc (read_buffer, read_buffer_size *= 2); |
| 1890 | p += new - read_buffer; | 1881 | p += new - read_buffer; |
| @@ -1897,7 +1888,19 @@ read1 (readcharfun, pch, first_in_list) | |||
| 1897 | quoted = 1; | 1888 | quoted = 1; |
| 1898 | } | 1889 | } |
| 1899 | 1890 | ||
| 1900 | *p++ = c; | 1891 | if (! SINGLE_BYTE_CHAR_P (c)) |
| 1892 | { | ||
| 1893 | unsigned char workbuf[4]; | ||
| 1894 | unsigned char *str = workbuf; | ||
| 1895 | int length; | ||
| 1896 | |||
| 1897 | length = non_ascii_char_to_string (c, workbuf, &str); | ||
| 1898 | |||
| 1899 | bcopy (str, p, length); | ||
| 1900 | p += length; | ||
| 1901 | } | ||
| 1902 | else | ||
| 1903 | *p++ = c; | ||
| 1901 | 1904 | ||
| 1902 | c = READCHAR; | 1905 | c = READCHAR; |
| 1903 | } | 1906 | } |
| @@ -2553,7 +2556,7 @@ init_obarray () | |||
| 2553 | Qvariable_documentation = intern ("variable-documentation"); | 2556 | Qvariable_documentation = intern ("variable-documentation"); |
| 2554 | staticpro (&Qvariable_documentation); | 2557 | staticpro (&Qvariable_documentation); |
| 2555 | 2558 | ||
| 2556 | read_buffer_size = 100; | 2559 | read_buffer_size = 100 + MAX_LENGTH_OF_MULTI_BYTE_FORM; |
| 2557 | read_buffer = (char *) malloc (read_buffer_size); | 2560 | read_buffer = (char *) malloc (read_buffer_size); |
| 2558 | } | 2561 | } |
| 2559 | 2562 | ||