(read_from_string_index_byte): New variable.

(read_from_string_index): Now counts characters. (readchar, unreadchar, Fread_from_string): Changed accordingly. (readchar): Read a multibyte char all at once from a buffer, marker or string. (unreadchar): Unread a multibyte char all at once. (read1): Properly handle non-escaped multibyte chars. They force a string to be multibyte. When reading direct from a file, any multibyte sequence means a multibyte string. Insist on MAX_LENGTH_OF_MULTI_BYTE_FORM bytes when checking for read_buffer full; this way need not check specially for multibyte.
author: Richard M. Stallman 1998-04-23 21:22:51 +0000
committer: Richard M. Stallman 1998-04-23 21:22:51 +0000
commit: bed23cb24b269a01a437ca83e26bc0830ff5cfa5 (patch)
tree: ae1b2558d24a121f24c3c7e9e168d5ed0846e951
parent: d67e2df9c3ab30fea86d9a9981e3f7e7dcddab4e (diff)
download: emacs-bed23cb24b269a01a437ca83e26bc0830ff5cfa5.tar.gz
emacs-bed23cb24b269a01a437ca83e26bc0830ff5cfa5.zip
1 files changed, 114 insertions, 111 deletions
diff --git a/src/lread.c b/src/lread.c
index 18fc07246ad..895fb6caa19 100644
--- a/src/lread.c
+++ b/src/lread.c
@@ -131,6 +131,7 @@ static int read_pure;
 /* For use within read-from-string (this reader is non-reentrant!!)  */
 static int read_from_string_index;
+static int read_from_string_index_byte;
 static int read_from_string_limit;
 /* Number of bytes left to read in the buffer character
@@ -169,64 +170,59 @@ readchar (readcharfun)
     Lisp_Object readcharfun;
 {
  Lisp_Object tem;
-  register struct buffer *inbuffer;
  register int c, mpos;
  if (BUFFERP (readcharfun))
    {
-      inbuffer = XBUFFER (readcharfun);
+      register struct buffer *inbuffer = XBUFFER (readcharfun);
-      if (readchar_backlog == 0)
+      int pt_byte = BUF_PT_BYTE (inbuffer);
-        {
+      int orig_pt_byte = pt_byte;
-          int pt_byte = BUF_PT_BYTE (inbuffer);
-          int orig_pt_byte = pt_byte;
-          if (pt_byte >= BUF_ZV_BYTE (inbuffer))
+      if (pt_byte >= BUF_ZV_BYTE (inbuffer))
-            return -1;
+        return -1;
-          if (! NILP (inbuffer->enable_multibyte_characters))
+      if (! NILP (inbuffer->enable_multibyte_characters))
-            BUF_INC_POS (inbuffer, pt_byte);
+        {
-          else
+          unsigned char *p = BUF_BYTE_ADDRESS (inbuffer, pt_byte);
-            pt_byte++;
+          BUF_INC_POS (inbuffer, pt_byte);
-          SET_BUF_PT_BOTH (inbuffer, BUF_PT (inbuffer) + 1, pt_byte);
+          c = STRING_CHAR (p, pt_byte - orig_pt_byte);
-          readchar_backlog = pt_byte - orig_pt_byte;
+        }
+      else
+        {
+          c = BUF_FETCH_BYTE (inbuffer, pt_byte);
+          pt_byte++;
        }
+      SET_BUF_PT_BOTH (inbuffer, BUF_PT (inbuffer) + 1, pt_byte);
-      /* We get the address of the byte just passed,
+      return c;
-         which is the last byte of the character.
-         The other bytes in this character are consecutive with it,
-         because the gap can't be in the middle of a character.  */
-      return *(BUF_BYTE_ADDRESS (inbuffer, BUF_PT_BYTE (inbuffer) - 1)
-               - --readchar_backlog);
    }
  if (MARKERP (readcharfun))
    {
-      inbuffer = XMARKER (readcharfun)->buffer;
+      register struct buffer *inbuffer = XMARKER (readcharfun)->buffer;
-      if (readchar_backlog == 0)
-        {
-          int bytepos = marker_byte_position (readcharfun);
-          int orig_bytepos = bytepos;
-          if (bytepos >= BUF_ZV_BYTE (inbuffer))
+      int bytepos = marker_byte_position (readcharfun);
-            return -1;
+      int orig_bytepos = bytepos;
-          if (! NILP (inbuffer->enable_multibyte_characters))
+      if (bytepos >= BUF_ZV_BYTE (inbuffer))
-            INC_POS (bytepos);
+        return -1;
-          else
-            bytepos++;
-          XMARKER (readcharfun)->bytepos = bytepos;
-          XMARKER (readcharfun)->charpos++;
-          readchar_backlog = bytepos - orig_bytepos;
+      if (! NILP (inbuffer->enable_multibyte_characters))
+        {
+          unsigned char *p = BUF_BYTE_ADDRESS (inbuffer, bytepos);
+          BUF_INC_POS (inbuffer, bytepos);
+          c = STRING_CHAR (p, bytepos - orig_bytepos);
+        }
+      else
+        {
+          c = BUF_FETCH_BYTE (inbuffer, bytepos);
+          bytepos++;
        }
-      /* We get the address of the byte just passed,
+      XMARKER (readcharfun)->bytepos = bytepos;
-         which is the last byte of the character.
+      XMARKER (readcharfun)->charpos++;
-         The other bytes in this character are consecutive with it,
-         because the gap can't be in the middle of a character.  */
+      return c;
-      return *(BUF_BYTE_ADDRESS (inbuffer, XMARKER (readcharfun)->bytepos - 1)
-               - --readchar_backlog);
    }
  if (EQ (readcharfun, Qget_file_char))
    {
@@ -244,13 +240,14 @@ readchar (readcharfun)
  if (STRINGP (readcharfun))
    {
-      register int c;
+      if (read_from_string_index >= read_from_string_limit)
-      /* This used to be return of a conditional expression,
-         but that truncated -1 to a char on VMS.  */
-      if (read_from_string_index < read_from_string_limit)
-        c = XSTRING (readcharfun)->data[read_from_string_index++];
-      else
        c = -1;
+      else if (STRING_MULTIBYTE (readcharfun))
+        FETCH_STRING_CHAR_ADVANCE (c, readcharfun,
+                                   read_from_string_index,
+                                   read_from_string_index_byte);
+      else
+        c = XSTRING (readcharfun)->data[read_from_string_index++];
      return c;
    }
@@ -276,42 +273,36 @@ unreadchar (readcharfun, c)
    ;
  else if (BUFFERP (readcharfun))
    {
-      if (!SINGLE_BYTE_CHAR_P (c))
+      struct buffer *b = XBUFFER (readcharfun);
-        readchar_backlog++;
+      int bytepos = BUF_PT_BYTE (b);
-      else
-        {
-          struct buffer *b = XBUFFER (readcharfun);
-          int bytepos = BUF_PT_BYTE (b);
-          BUF_PT (b)--;
+      BUF_PT (b)--;
-          if (! NILP (b->enable_multibyte_characters))
+      if (! NILP (b->enable_multibyte_characters))
-            BUF_DEC_POS (b, bytepos);
+        BUF_DEC_POS (b, bytepos);
-          else
+      else
-            bytepos--;
+        bytepos--;
-          BUF_PT_BYTE (b) = bytepos;
+      BUF_PT_BYTE (b) = bytepos;
-        }
    }
  else if (MARKERP (readcharfun))
    {
-      if (!SINGLE_BYTE_CHAR_P (c))
+      struct buffer *b = XMARKER (readcharfun)->buffer;
-        readchar_backlog++;
+      int bytepos = XMARKER (readcharfun)->bytepos;
-      else
-        {
-          struct buffer *b = XMARKER (readcharfun)->buffer;
-          int bytepos = XMARKER (readcharfun)->bytepos;
-          XMARKER (readcharfun)->charpos--;
+      XMARKER (readcharfun)->charpos--;
-          if (! NILP (b->enable_multibyte_characters))
+      if (! NILP (b->enable_multibyte_characters))
-            BUF_DEC_POS (b, bytepos);
+        BUF_DEC_POS (b, bytepos);
-          else
+      else
-            bytepos--;
+        bytepos--;
-          XMARKER (readcharfun)->bytepos = bytepos;
+      XMARKER (readcharfun)->bytepos = bytepos;
-        }
    }
  else if (STRINGP (readcharfun))
-    read_from_string_index--;
+    {
+      read_from_string_index--;
+      read_from_string_index_byte
+        = string_char_to_byte (readcharfun, read_from_string_index);
+    }
  else if (EQ (readcharfun, Qget_file_char))
    ungetc (c, instream);
  else
@@ -321,7 +312,7 @@ unreadchar (readcharfun, c)
 static Lisp_Object read0 (), read1 (), read_list (), read_vector ();
 static int read_multibyte ();
-/* get a character from the tty */
+/* Get a character from the tty.  */
 extern Lisp_Object read_char ();
@@ -1180,12 +1171,11 @@ START and END optionally delimit a substring of STRING from which to read;\n\
  CHECK_STRING (string,0);
  if (NILP (end))
-    endval = STRING_BYTES (XSTRING (string));
+    endval = XSTRING (string)->size;
  else
    {
      CHECK_NUMBER (end, 2);
-      endval = string_char_to_byte (string, XINT (end));
+      if (endval < 0 || endval > XSTRING (string)->size)
-      if (endval < 0 || endval > STRING_BYTES (XSTRING (string)))
        args_out_of_range (string, end);
    }
@@ -1194,21 +1184,19 @@ START and END optionally delimit a substring of STRING from which to read;\n\
  else
    {
      CHECK_NUMBER (start, 1);
-      startval = string_char_to_byte (string, XINT (start));
      if (startval < 0 || startval > endval)
        args_out_of_range (string, start);
    }
  read_from_string_index = startval;
+  read_from_string_index_byte = string_char_to_byte (string, startval);
  read_from_string_limit = endval;
  new_backquote_flag = 0;
  read_objects = Qnil;
  tem = read0 (string);
-  endval = string_byte_to_char (string,
+  return Fcons (tem, make_number (read_from_string_index));
-                                read_from_string_index);
-  return Fcons (tem, make_number (endval));
 }
 /* Use this for recursive reads, in contexts where internal tokens
@@ -1744,49 +1732,45 @@ read1 (readcharfun, pch, first_in_list)
        while ((c = READCHAR) >= 0
               && c != '\"')
          {
-            if (p == end)
+            if (end - p < MAX_LENGTH_OF_MULTI_BYTE_FORM)
              {
                char *new = (char *) xrealloc (read_buffer, read_buffer_size *= 2);
                p += new - read_buffer;
                read_buffer += new - read_buffer;
                end = read_buffer + read_buffer_size;
              }
            if (c == '\\')
              {
                c = read_escape (readcharfun, 1);
-                if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_META)))
+                /* C is -1 if \ newline has just been seen */
+                if (c == -1)
                  {
-                    unsigned char workbuf[4];
+                    if (p == read_buffer)
-                    unsigned char *str = workbuf;
+                      cancel = 1;
-                    int length;
-                    length = non_ascii_char_to_string (c, workbuf, &str);
-                    if (length > 1)
-                      force_multibyte = 1;
-                    if (p + length > end)
-                      {
-                        char *new = (char *) xrealloc (read_buffer, read_buffer_size *= 2);
-                        p += new - read_buffer;
-                        read_buffer += new - read_buffer;
-                        end = read_buffer + read_buffer_size;
-                      }
- 
-                    bcopy (str, p, length);
-                    p += length;
                    continue;
                  }
                /* If an escape specifies a non-ASCII single-byte character,
                   this must be a unibyte string.  */
-                else if (! ASCII_BYTE_P (c))
+                if (SINGLE_BYTE_CHAR_P ((c & ~CHAR_META))
+                    && ! ASCII_BYTE_P (c))
                  force_singlebyte = 1;
              }
-            /* c is -1 if \ newline has just been seen */
+            if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_META)))
-            if (c == -1)
              {
-                if (p == read_buffer)
+                unsigned char workbuf[4];
-                  cancel = 1;
+                unsigned char *str = workbuf;
+                int length;
+                length = non_ascii_char_to_string (c, workbuf, &str);
+                if (length > 1)
+                  force_multibyte = 1;
+                bcopy (str, p, length);
+                p += length;
              }
            else
              {
@@ -1814,7 +1798,7 @@ read1 (readcharfun, pch, first_in_list)
          return make_number (0);
        if (force_singlebyte && force_multibyte)
-          error ("Multibyte and single-byte escapes in one string constant");
+          error ("Multibyte and unibyte characters in one string constant");
        if (force_singlebyte)
          nchars = p - read_buffer;
@@ -1831,7 +1815,14 @@ read1 (readcharfun, pch, first_in_list)
                return Fstring_make_unibyte (string);
              }
          }
+        else if (EQ (readcharfun, Qget_file_char))
+          /* Nowadays, reading directly from a file
+             is used only for compiled Emacs Lisp files,
+             and those always use the Emacs internal encoding.  */
+          nchars = multibyte_chars_in_text (read_buffer, p - read_buffer);
        else
+          /* In all other cases, if we read these bytes as
+             separate characters, treat them as separate characters now.  */
          nchars = p - read_buffer;
        if (read_pure)
@@ -1884,7 +1875,7 @@ read1 (readcharfun, pch, first_in_list)
                      || c == '[' || c == ']' || c == '#'
                      ))
            {
-              if (p == end)
+              if (end - p < MAX_LENGTH_OF_MULTI_BYTE_FORM)
                {
                  register char *new = (char *) xrealloc (read_buffer, read_buffer_size *= 2);
                  p += new - read_buffer;
@@ -1897,7 +1888,19 @@ read1 (readcharfun, pch, first_in_list)
                  quoted = 1;
                }
-              *p++ = c;
+              if (! SINGLE_BYTE_CHAR_P (c))
+                {
+                  unsigned char workbuf[4];
+                  unsigned char *str = workbuf;
+                  int length;
+                  length = non_ascii_char_to_string (c, workbuf, &str);
+                  bcopy (str, p, length);
+                  p += length;
+                }
+              else
+                *p++ = c;
              c = READCHAR;
            }
@@ -2553,7 +2556,7 @@ init_obarray ()
  Qvariable_documentation = intern ("variable-documentation");
  staticpro (&Qvariable_documentation);
-  read_buffer_size = 100;
+  read_buffer_size = 100 + MAX_LENGTH_OF_MULTI_BYTE_FORM;
  read_buffer = (char *) malloc (read_buffer_size);
 }
author	Richard M. Stallman	1998-04-23 21:22:51 +0000
committer	Richard M. Stallman	1998-04-23 21:22:51 +0000
commit	bed23cb24b269a01a437ca83e26bc0830ff5cfa5 (patch)
tree	ae1b2558d24a121f24c3c7e9e168d5ed0846e951
parent	d67e2df9c3ab30fea86d9a9981e3f7e7dcddab4e (diff)
download	emacs-bed23cb24b269a01a437ca83e26bc0830ff5cfa5.tar.gz emacs-bed23cb24b269a01a437ca83e26bc0830ff5cfa5.zip

diff --git a/src/lread.c b/src/lread.c index 18fc07246ad..895fb6caa19 100644 --- a/src/lread.c +++ b/src/lread.c
@@ -131,6 +131,7 @@ static int read_pure;
131		131
132	/* For use within read-from-string (this reader is non-reentrant!!) */	132	/* For use within read-from-string (this reader is non-reentrant!!) */
133	static int read_from_string_index;	133	static int read_from_string_index;
		134	static int read_from_string_index_byte;
134	static int read_from_string_limit;	135	static int read_from_string_limit;
135		136
136	/* Number of bytes left to read in the buffer character	137	/* Number of bytes left to read in the buffer character
@@ -169,64 +170,59 @@ readchar (readcharfun)
169	Lisp_Object readcharfun;	170	Lisp_Object readcharfun;
170	{	171	{
171	Lisp_Object tem;	172	Lisp_Object tem;
172	register struct buffer *inbuffer;
173	register int c, mpos;	173	register int c, mpos;
174		174
175	if (BUFFERP (readcharfun))	175	if (BUFFERP (readcharfun))
176	{	176	{
177	inbuffer = XBUFFER (readcharfun);	177	register struct buffer *inbuffer = XBUFFER (readcharfun);
178		178
179	if (readchar_backlog == 0)	179	int pt_byte = BUF_PT_BYTE (inbuffer);
180	{	180	int orig_pt_byte = pt_byte;
181	int pt_byte = BUF_PT_BYTE (inbuffer);
182	int orig_pt_byte = pt_byte;
183		181
184	if (pt_byte >= BUF_ZV_BYTE (inbuffer))	182	if (pt_byte >= BUF_ZV_BYTE (inbuffer))
185	return -1;	183	return -1;
186		184
187	if (! NILP (inbuffer->enable_multibyte_characters))	185	if (! NILP (inbuffer->enable_multibyte_characters))
188	BUF_INC_POS (inbuffer, pt_byte);	186	{
189	else	187	unsigned char *p = BUF_BYTE_ADDRESS (inbuffer, pt_byte);
190	pt_byte++;	188	BUF_INC_POS (inbuffer, pt_byte);
191	SET_BUF_PT_BOTH (inbuffer, BUF_PT (inbuffer) + 1, pt_byte);	189	c = STRING_CHAR (p, pt_byte - orig_pt_byte);
192	readchar_backlog = pt_byte - orig_pt_byte;	190	}
		191	else
		192	{
		193	c = BUF_FETCH_BYTE (inbuffer, pt_byte);
		194	pt_byte++;
193	}	195	}
		196	SET_BUF_PT_BOTH (inbuffer, BUF_PT (inbuffer) + 1, pt_byte);
194		197
195	/* We get the address of the byte just passed,	198	return c;
196	which is the last byte of the character.
197	The other bytes in this character are consecutive with it,
198	because the gap can't be in the middle of a character. */
199	return *(BUF_BYTE_ADDRESS (inbuffer, BUF_PT_BYTE (inbuffer) - 1)
200	- --readchar_backlog);
201	}	199	}
202	if (MARKERP (readcharfun))	200	if (MARKERP (readcharfun))
203	{	201	{
204	inbuffer = XMARKER (readcharfun)->buffer;	202	register struct buffer *inbuffer = XMARKER (readcharfun)->buffer;
205
206	if (readchar_backlog == 0)
207	{
208	int bytepos = marker_byte_position (readcharfun);
209	int orig_bytepos = bytepos;
210		203
211	if (bytepos >= BUF_ZV_BYTE (inbuffer))	204	int bytepos = marker_byte_position (readcharfun);
212	return -1;	205	int orig_bytepos = bytepos;
213		206
214	if (! NILP (inbuffer->enable_multibyte_characters))	207	if (bytepos >= BUF_ZV_BYTE (inbuffer))
215	INC_POS (bytepos);	208	return -1;
216	else
217	bytepos++;
218	XMARKER (readcharfun)->bytepos = bytepos;
219	XMARKER (readcharfun)->charpos++;
220		209
221	readchar_backlog = bytepos - orig_bytepos;	210	if (! NILP (inbuffer->enable_multibyte_characters))
		211	{
		212	unsigned char *p = BUF_BYTE_ADDRESS (inbuffer, bytepos);
		213	BUF_INC_POS (inbuffer, bytepos);
		214	c = STRING_CHAR (p, bytepos - orig_bytepos);
		215	}
		216	else
		217	{
		218	c = BUF_FETCH_BYTE (inbuffer, bytepos);
		219	bytepos++;
222	}	220	}
223		221
224	/* We get the address of the byte just passed,	222	XMARKER (readcharfun)->bytepos = bytepos;
225	which is the last byte of the character.	223	XMARKER (readcharfun)->charpos++;
226	The other bytes in this character are consecutive with it,	224
227	because the gap can't be in the middle of a character. */	225	return c;
228	return *(BUF_BYTE_ADDRESS (inbuffer, XMARKER (readcharfun)->bytepos - 1)
229	- --readchar_backlog);
230	}	226	}
231	if (EQ (readcharfun, Qget_file_char))	227	if (EQ (readcharfun, Qget_file_char))
232	{	228	{
@@ -244,13 +240,14 @@ readchar (readcharfun)
244		240
245	if (STRINGP (readcharfun))	241	if (STRINGP (readcharfun))
246	{	242	{
247	register int c;	243	if (read_from_string_index >= read_from_string_limit)
248	/* This used to be return of a conditional expression,
249	but that truncated -1 to a char on VMS. */
250	if (read_from_string_index < read_from_string_limit)
251	c = XSTRING (readcharfun)->data[read_from_string_index++];
252	else
253	c = -1;	244	c = -1;
		245	else if (STRING_MULTIBYTE (readcharfun))
		246	FETCH_STRING_CHAR_ADVANCE (c, readcharfun,
		247	read_from_string_index,
		248	read_from_string_index_byte);
		249	else
		250	c = XSTRING (readcharfun)->data[read_from_string_index++];
254		251
255	return c;	252	return c;
256	}	253	}
@@ -276,42 +273,36 @@ unreadchar (readcharfun, c)
276	;	273	;
277	else if (BUFFERP (readcharfun))	274	else if (BUFFERP (readcharfun))
278	{	275	{
279	if (!SINGLE_BYTE_CHAR_P (c))	276	struct buffer *b = XBUFFER (readcharfun);
280	readchar_backlog++;	277	int bytepos = BUF_PT_BYTE (b);
281	else
282	{
283	struct buffer *b = XBUFFER (readcharfun);
284	int bytepos = BUF_PT_BYTE (b);
285		278
286	BUF_PT (b)--;	279	BUF_PT (b)--;
287	if (! NILP (b->enable_multibyte_characters))	280	if (! NILP (b->enable_multibyte_characters))
288	BUF_DEC_POS (b, bytepos);	281	BUF_DEC_POS (b, bytepos);
289	else	282	else
290	bytepos--;	283	bytepos--;
291		284
292	BUF_PT_BYTE (b) = bytepos;	285	BUF_PT_BYTE (b) = bytepos;
293	}
294	}	286	}
295	else if (MARKERP (readcharfun))	287	else if (MARKERP (readcharfun))
296	{	288	{
297	if (!SINGLE_BYTE_CHAR_P (c))	289	struct buffer *b = XMARKER (readcharfun)->buffer;
298	readchar_backlog++;	290	int bytepos = XMARKER (readcharfun)->bytepos;
299	else
300	{
301	struct buffer *b = XMARKER (readcharfun)->buffer;
302	int bytepos = XMARKER (readcharfun)->bytepos;
303		291
304	XMARKER (readcharfun)->charpos--;	292	XMARKER (readcharfun)->charpos--;
305	if (! NILP (b->enable_multibyte_characters))	293	if (! NILP (b->enable_multibyte_characters))
306	BUF_DEC_POS (b, bytepos);	294	BUF_DEC_POS (b, bytepos);
307	else	295	else
308	bytepos--;	296	bytepos--;
309		297
310	XMARKER (readcharfun)->bytepos = bytepos;	298	XMARKER (readcharfun)->bytepos = bytepos;
311	}
312	}	299	}
313	else if (STRINGP (readcharfun))	300	else if (STRINGP (readcharfun))
314	read_from_string_index--;	301	{
		302	read_from_string_index--;
		303	read_from_string_index_byte
		304	= string_char_to_byte (readcharfun, read_from_string_index);
		305	}
315	else if (EQ (readcharfun, Qget_file_char))	306	else if (EQ (readcharfun, Qget_file_char))
316	ungetc (c, instream);	307	ungetc (c, instream);
317	else	308	else
@@ -321,7 +312,7 @@ unreadchar (readcharfun, c)
321	static Lisp_Object read0 (), read1 (), read_list (), read_vector ();	312	static Lisp_Object read0 (), read1 (), read_list (), read_vector ();
322	static int read_multibyte ();	313	static int read_multibyte ();
323		314
324	/* get a character from the tty */	315	/* Get a character from the tty. */
325		316
326	extern Lisp_Object read_char ();	317	extern Lisp_Object read_char ();
327		318
@@ -1180,12 +1171,11 @@ START and END optionally delimit a substring of STRING from which to read;\n\
1180	CHECK_STRING (string,0);	1171	CHECK_STRING (string,0);
1181		1172
1182	if (NILP (end))	1173	if (NILP (end))
1183	endval = STRING_BYTES (XSTRING (string));	1174	endval = XSTRING (string)->size;
1184	else	1175	else
1185	{	1176	{
1186	CHECK_NUMBER (end, 2);	1177	CHECK_NUMBER (end, 2);
1187	endval = string_char_to_byte (string, XINT (end));	1178	if (endval < 0 \|\| endval > XSTRING (string)->size)
1188	if (endval < 0 \|\| endval > STRING_BYTES (XSTRING (string)))
1189	args_out_of_range (string, end);	1179	args_out_of_range (string, end);
1190	}	1180	}
1191		1181
@@ -1194,21 +1184,19 @@ START and END optionally delimit a substring of STRING from which to read;\n\
1194	else	1184	else
1195	{	1185	{
1196	CHECK_NUMBER (start, 1);	1186	CHECK_NUMBER (start, 1);
1197	startval = string_char_to_byte (string, XINT (start));
1198	if (startval < 0 \|\| startval > endval)	1187	if (startval < 0 \|\| startval > endval)
1199	args_out_of_range (string, start);	1188	args_out_of_range (string, start);
1200	}	1189	}
1201		1190
1202	read_from_string_index = startval;	1191	read_from_string_index = startval;
		1192	read_from_string_index_byte = string_char_to_byte (string, startval);
1203	read_from_string_limit = endval;	1193	read_from_string_limit = endval;
1204		1194
1205	new_backquote_flag = 0;	1195	new_backquote_flag = 0;
1206	read_objects = Qnil;	1196	read_objects = Qnil;
1207		1197
1208	tem = read0 (string);	1198	tem = read0 (string);
1209	endval = string_byte_to_char (string,	1199	return Fcons (tem, make_number (read_from_string_index));
1210	read_from_string_index);
1211	return Fcons (tem, make_number (endval));
1212	}	1200	}
1213		1201
1214	/* Use this for recursive reads, in contexts where internal tokens	1202	/* Use this for recursive reads, in contexts where internal tokens
@@ -1744,49 +1732,45 @@ read1 (readcharfun, pch, first_in_list)
1744	while ((c = READCHAR) >= 0	1732	while ((c = READCHAR) >= 0
1745	&& c != '\"')	1733	&& c != '\"')
1746	{	1734	{
1747	if (p == end)	1735	if (end - p < MAX_LENGTH_OF_MULTI_BYTE_FORM)
1748	{	1736	{
1749	char new = (char ) xrealloc (read_buffer, read_buffer_size *= 2);	1737	char new = (char ) xrealloc (read_buffer, read_buffer_size *= 2);
1750	p += new - read_buffer;	1738	p += new - read_buffer;
1751	read_buffer += new - read_buffer;	1739	read_buffer += new - read_buffer;
1752	end = read_buffer + read_buffer_size;	1740	end = read_buffer + read_buffer_size;
1753	}	1741	}
		1742
1754	if (c == '\\')	1743	if (c == '\\')
1755	{	1744	{
1756	c = read_escape (readcharfun, 1);	1745	c = read_escape (readcharfun, 1);
1757	if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_META)))	1746
		1747	/* C is -1 if \ newline has just been seen */
		1748	if (c == -1)
1758	{	1749	{
1759	unsigned char workbuf[4];	1750	if (p == read_buffer)
1760	unsigned char *str = workbuf;	1751	cancel = 1;
1761	int length;
1762
1763	length = non_ascii_char_to_string (c, workbuf, &str);
1764	if (length > 1)
1765	force_multibyte = 1;
1766
1767	if (p + length > end)
1768	{
1769	char new = (char ) xrealloc (read_buffer, read_buffer_size *= 2);
1770	p += new - read_buffer;
1771	read_buffer += new - read_buffer;
1772	end = read_buffer + read_buffer_size;
1773	}
1774
1775	bcopy (str, p, length);
1776	p += length;
1777	continue;	1752	continue;
1778	}	1753	}
		1754
1779	/* If an escape specifies a non-ASCII single-byte character,	1755	/* If an escape specifies a non-ASCII single-byte character,
1780	this must be a unibyte string. */	1756	this must be a unibyte string. */
1781	else if (! ASCII_BYTE_P (c))	1757	if (SINGLE_BYTE_CHAR_P ((c & ~CHAR_META))
		1758	&& ! ASCII_BYTE_P (c))
1782	force_singlebyte = 1;	1759	force_singlebyte = 1;
1783	}	1760	}
1784		1761
1785	/* c is -1 if \ newline has just been seen */	1762	if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_META)))
1786	if (c == -1)
1787	{	1763	{
1788	if (p == read_buffer)	1764	unsigned char workbuf[4];
1789	cancel = 1;	1765	unsigned char *str = workbuf;
		1766	int length;
		1767
		1768	length = non_ascii_char_to_string (c, workbuf, &str);
		1769	if (length > 1)
		1770	force_multibyte = 1;
		1771
		1772	bcopy (str, p, length);
		1773	p += length;
1790	}	1774	}
1791	else	1775	else
1792	{	1776	{
@@ -1814,7 +1798,7 @@ read1 (readcharfun, pch, first_in_list)
1814	return make_number (0);	1798	return make_number (0);
1815		1799
1816	if (force_singlebyte && force_multibyte)	1800	if (force_singlebyte && force_multibyte)
1817	error ("Multibyte and single-byte escapes in one string constant");	1801	error ("Multibyte and unibyte characters in one string constant");
1818		1802
1819	if (force_singlebyte)	1803	if (force_singlebyte)
1820	nchars = p - read_buffer;	1804	nchars = p - read_buffer;
@@ -1831,7 +1815,14 @@ read1 (readcharfun, pch, first_in_list)
1831	return Fstring_make_unibyte (string);	1815	return Fstring_make_unibyte (string);
1832	}	1816	}
1833	}	1817	}
		1818	else if (EQ (readcharfun, Qget_file_char))
		1819	/* Nowadays, reading directly from a file
		1820	is used only for compiled Emacs Lisp files,
		1821	and those always use the Emacs internal encoding. */
		1822	nchars = multibyte_chars_in_text (read_buffer, p - read_buffer);
1834	else	1823	else
		1824	/* In all other cases, if we read these bytes as
		1825	separate characters, treat them as separate characters now. */
1835	nchars = p - read_buffer;	1826	nchars = p - read_buffer;
1836		1827
1837	if (read_pure)	1828	if (read_pure)
@@ -1884,7 +1875,7 @@ read1 (readcharfun, pch, first_in_list)
1884	\|\| c == '[' \|\| c == ']' \|\| c == '#'	1875	\|\| c == '[' \|\| c == ']' \|\| c == '#'
1885	))	1876	))
1886	{	1877	{
1887	if (p == end)	1878	if (end - p < MAX_LENGTH_OF_MULTI_BYTE_FORM)
1888	{	1879	{
1889	register char new = (char ) xrealloc (read_buffer, read_buffer_size *= 2);	1880	register char new = (char ) xrealloc (read_buffer, read_buffer_size *= 2);
1890	p += new - read_buffer;	1881	p += new - read_buffer;
@@ -1897,7 +1888,19 @@ read1 (readcharfun, pch, first_in_list)
1897	quoted = 1;	1888	quoted = 1;
1898	}	1889	}
1899		1890
1900	*p++ = c;	1891	if (! SINGLE_BYTE_CHAR_P (c))
		1892	{
		1893	unsigned char workbuf[4];
		1894	unsigned char *str = workbuf;
		1895	int length;
		1896
		1897	length = non_ascii_char_to_string (c, workbuf, &str);
		1898
		1899	bcopy (str, p, length);
		1900	p += length;
		1901	}
		1902	else
		1903	*p++ = c;
1901		1904
1902	c = READCHAR;	1905	c = READCHAR;
1903	}	1906	}
@@ -2553,7 +2556,7 @@ init_obarray ()
2553	Qvariable_documentation = intern ("variable-documentation");	2556	Qvariable_documentation = intern ("variable-documentation");
2554	staticpro (&Qvariable_documentation);	2557	staticpro (&Qvariable_documentation);
2555		2558
2556	read_buffer_size = 100;	2559	read_buffer_size = 100 + MAX_LENGTH_OF_MULTI_BYTE_FORM;
2557	read_buffer = (char *) malloc (read_buffer_size);	2560	read_buffer = (char *) malloc (read_buffer_size);
2558	}	2561	}
2559		2562