Include "character.h".

(read_multibyte): New arg NBYTES. (read_escape): The meaning of returned *BYTEREP changed. (to_multibyte): Deleted. (read1): Adjuted the handling of char table and string.
author: Kenichi Handa 2002-03-01 01:44:45 +0000
committer: Kenichi Handa 2002-03-01 01:44:45 +0000
commit: 1571601b93f6e09394efba4b8b3252729ce5ce12 (patch)
tree: ead4156341a0ab9ba204473a01eab20953dd57cc /src
parent: 1842abb2bf55038f92160f01d26a3649b25d63e9 (diff)
download: emacs-1571601b93f6e09394efba4b8b3252729ce5ce12.tar.gz
emacs-1571601b93f6e09394efba4b8b3252729ce5ce12.zip
1 files changed, 149 insertions, 129 deletions
diff --git a/src/lread.c b/src/lread.c
index 10baf509918..360fa68386d 100644
--- a/src/lread.c
+++ b/src/lread.c
@@ -29,6 +29,7 @@ Boston, MA 02111-1307, USA.  */
 #include "lisp.h"
 #include "intervals.h"
 #include "buffer.h"
+#include "character.h"
 #include "charset.h"
 #include <epaths.h>
 #include "commands.h"
@@ -1476,36 +1477,51 @@ static char *read_buffer;
 /* Read multibyte form and return it as a character.  C is a first
   byte of multibyte form, and rest of them are read from
-   READCHARFUN.  */
+   READCHARFUN.  Store the byte length of the form into *NBYTES.  */
 static int
-read_multibyte (c, readcharfun)
+read_multibyte (c, readcharfun, nbytes)
     register int c;
     Lisp_Object readcharfun;
+     int *nbytes;
 {
  /* We need the actual character code of this multibyte
     characters.  */
  unsigned char str[MAX_MULTIBYTE_LENGTH];
  int len = 0;
-  int bytes;
+  int bytes = BYTES_BY_CHAR_HEAD (c);
  str[len++] = c;
-  while ((c = READCHAR) >= 0xA0
+  while (len < bytes)
-         && len < MAX_MULTIBYTE_LENGTH)
+    {
-    str[len++] = c;
+      c = READCHAR;
-  UNREAD (c);
+      if (CHAR_HEAD_P (c))
-  if (UNIBYTE_STR_AS_MULTIBYTE_P (str, len, bytes))
+        {
-    return STRING_CHAR (str, len);
+          UNREAD (c);
+          break;
+        }
+      str[len++] = c;
+    }
+  if (len == bytes && MULTIBYTE_LENGTH_NO_CHECK (str) > 0)
+    {
+      *nbytes = len;
+      return STRING_CHAR (str, len);
+    }
  /* The byte sequence is not valid as multibyte.  Unread all bytes
     but the first one, and return the first byte.  */
  while (--len > 0)
    UNREAD (str[len]);
+  *nbytes = 1;
  return str[0];
 }
 /* Read a \-escape sequence, assuming we already read the `\'.
   If the escape sequence forces unibyte, store 1 into *BYTEREP.
-   If the escape sequence forces multibyte, store 2 into *BYTEREP.
+   If the escape sequence forces multibyte and the returned character
+   is raw 8-bit char, store 2 into *BYTEREP.
+   If the escape sequence forces multibyte and the returned character
+   is not raw 8-bit char, store 3 into *BYTEREP.
   Otherwise store 0 into *BYTEREP.  */
 static int
@@ -1640,7 +1656,10 @@ read_escape (readcharfun, stringp, byterep)
              }
          }
        
-        *byterep = 1;
+        if (c < 0x100)
+          *byterep = 1;
+        else
+          *byterep = 3;
        return i;
      }
@@ -1648,6 +1667,7 @@ read_escape (readcharfun, stringp, byterep)
      /* A hex escape, as in ANSI C.  */
      {
        int i = 0;
+        int count = 0;
        while (1)
          {
            c = READCHAR;
@@ -1670,15 +1690,26 @@ read_escape (readcharfun, stringp, byterep)
                UNREAD (c);
                break;
              }
+            count++;
          }
-        *byterep = 2;
+        if (count < 3 && i >= 0x80)
+          *byterep = 2;
+        else
+          *byterep = 3;
        return i;
      }
    default:
-      if (BASE_LEADING_CODE_P (c))
+      if (EQ (readcharfun, Qget_file_char)
-        c = read_multibyte (c, readcharfun);
+          && BASE_LEADING_CODE_P (c))
+        {
+          int nbytes;
+          c = read_multibyte (c, readcharfun, &nbytes);
+          if (nbytes > 1)
+            *byterep = 3;
+        }
      return c;
    }
 }
@@ -1750,43 +1781,6 @@ read_integer (readcharfun, radix)
 }
-/* Convert unibyte text in read_buffer to multibyte.
-   Initially, *P is a pointer after the end of the unibyte text, and
-   the pointer *END points after the end of read_buffer.
-   If read_buffer doesn't have enough room to hold the result
-   of the conversion, reallocate it and adjust *P and *END.
-   At the end, make *P point after the result of the conversion, and
-   return in *NCHARS the number of characters in the converted
-   text.  */
-static void
-to_multibyte (p, end, nchars)
-     char **p, **end;
-     int *nchars;
-{
-  int nbytes;
-  parse_str_as_multibyte (read_buffer, *p - read_buffer, &nbytes, nchars);
-  if (read_buffer_size < 2 * nbytes)
-    {
-      int offset = *p - read_buffer;
-      read_buffer_size = 2 * max (read_buffer_size, nbytes);
-      read_buffer = (char *) xrealloc (read_buffer, read_buffer_size);
-      *p = read_buffer + offset;
-      *end = read_buffer + read_buffer_size;
-    }
-  if (nbytes != *nchars)
-    nbytes = str_as_multibyte (read_buffer, read_buffer_size,
-                               *p - read_buffer, nchars);
-  
-  *p = read_buffer + nbytes;
-}
 /* If the next token is ')' or ']' or '.', we store that character
   in *PCH and the return value is not interesting.  Else, we store
   zero in *PCH and we read and return one lisp object.
@@ -1834,11 +1828,9 @@ read1 (readcharfun, pch, first_in_list)
            {
              Lisp_Object tmp;
              tmp = read_vector (readcharfun, 0);
-              if (XVECTOR (tmp)->size < CHAR_TABLE_STANDARD_SLOTS
+              if (XVECTOR (tmp)->size != VECSIZE (struct Lisp_Char_Table))
-                  || XVECTOR (tmp)->size > CHAR_TABLE_STANDARD_SLOTS + 10)
                error ("Invalid size char-table");
              XSETCHAR_TABLE (tmp, XCHAR_TABLE (tmp));
-              XCHAR_TABLE (tmp)->top = Qt;
              return tmp;
            }
          else if (c == '^')
@@ -1847,11 +1839,18 @@ read1 (readcharfun, pch, first_in_list)
              if (c == '[')
                {
                  Lisp_Object tmp;
+                  int depth, size;
+                  
                  tmp = read_vector (readcharfun, 0);
-                  if (XVECTOR (tmp)->size != SUB_CHAR_TABLE_STANDARD_SLOTS)
+                  if (!INTEGERP (AREF (tmp, 0)))
+                    error ("Invalid depth in char-table");
+                  depth = XINT (AREF (tmp, 0));
+                  if (depth < 1 || depth > 3)
+                    error ("Invalid depth in char-table");
+                  size = XVECTOR (tmp)->size + 2;
+                  if (chartab_size [depth] != size)
                    error ("Invalid size char-table");
-                  XSETCHAR_TABLE (tmp, XCHAR_TABLE (tmp));
+                  XSETSUB_CHAR_TABLE (tmp, XSUB_CHAR_TABLE (tmp));
-                  XCHAR_TABLE (tmp)->top = Qnil;
                  return tmp;
                }
              Fsignal (Qinvalid_read_syntax,
@@ -2134,8 +2133,9 @@ read1 (readcharfun, pch, first_in_list)
        if (c == '\\')
          c = read_escape (readcharfun, 0, &discard);
-        else if (BASE_LEADING_CODE_P (c))
+        else if (EQ (readcharfun, Qget_file_char)
-          c = read_multibyte (c, readcharfun);
+                 && BASE_LEADING_CODE_P (c))
+          c = read_multibyte (c, readcharfun, &discard);
        return make_number (c);
      }
@@ -2145,14 +2145,12 @@ read1 (readcharfun, pch, first_in_list)
        char *p = read_buffer;
        char *end = read_buffer + read_buffer_size;
        register int c;
-        /* 1 if we saw an escape sequence specifying
+        /* Nonzero if we saw an escape sequence specifying
-           a multibyte character, or a multibyte character.  */
+           a multibyte character.  */
        int force_multibyte = 0;
-        /* 1 if we saw an escape sequence specifying
+        /* Nonzero if we saw an escape sequence specifying
           a single-byte character.  */
        int force_singlebyte = 0;
-        /* 1 if read_buffer contains multibyte text now.  */
-        int is_multibyte = 0;
        int cancel = 0;
        int nchars = 0;
@@ -2170,6 +2168,7 @@ read1 (readcharfun, pch, first_in_list)
            if (c == '\\')
              {
+                int modifiers;
                int byterep;
                c = read_escape (readcharfun, 1, &byterep);
@@ -2182,53 +2181,92 @@ read1 (readcharfun, pch, first_in_list)
                    continue;
                  }
+                modifiers = c & CHAR_MODIFIER_MASK;
+                c = c & ~CHAR_MODIFIER_MASK;
                if (byterep == 1)
-                  force_singlebyte = 1;
+                  {
-                else if (byterep == 2)
+                    force_singlebyte = 1;
-                  force_multibyte = 1;
+                    if (c >= 0x80)
-              }
+                      /*  Raw 8-bit code */
+                      c = BYTE8_TO_CHAR (c);
+                  }
+                else if (byterep > 1)
+                  {
+                    force_multibyte = 1;
+                    if (byterep == 2)
+                      c = BYTE8_TO_CHAR (c);
+                  }
+                else if (c >= 0x80)
+                  {
+                    force_singlebyte = 1;
+                    c = BYTE8_TO_CHAR (c);
+                  }
-            /* A character that must be multibyte forces multibyte.  */
+                if (ASCII_CHAR_P (c))
-            if (! SINGLE_BYTE_CHAR_P (c & ~CHAR_MODIFIER_MASK))
+                  {
-              force_multibyte = 1;
+                    /* Allow `\C- ' and `\C-?'.  */
+                    if (modifiers == CHAR_CTL)
+                      {
+                        if (c == ' ')
+                          c = 0, modifiers = 0;
+                        else if (c == '?')
+                          c = 127, modifiers = 0;
+                      }
+                    if (modifiers & CHAR_SHIFT)
+                      {
+                        /* Shift modifier is valid only with [A-Za-z].  */
+                        if (c >= 'A' && c <= 'Z')
+                          modifiers &= ~CHAR_SHIFT;
+                        else if (c >= 'a' && c <= 'z')
+                          c -= ('a' - 'A'), modifiers &= ~CHAR_SHIFT;
+                      }
+                    if (modifiers & CHAR_META)
+                      {
+                        /* Move the meta bit to the right place for a
+                           string.  */
+                        modifiers &= ~CHAR_META;
+                        c = BYTE8_TO_CHAR (c | 0x80);
+                        force_singlebyte = 1;
+                      }
+                  }
-            /* If we just discovered the need to be multibyte,
+                /* Any modifiers remaining are invalid.  */
-               convert the text accumulated thus far.  */
+                if (modifiers)
-            if (force_multibyte && ! is_multibyte)
+                  error ("Invalid modifier in string");
-              {
+                p += CHAR_STRING (c, (unsigned char *) p);
-                is_multibyte = 1;
-                to_multibyte (&p, &end, &nchars);
              }
+            else if (c >= 0x80)
-            /* Allow `\C- ' and `\C-?'.  */
-            if (c == (CHAR_CTL | ' '))
-              c = 0;
-            else if (c == (CHAR_CTL | '?'))
-              c = 127;
-            if (c & CHAR_SHIFT)
              {
-                /* Shift modifier is valid only with [A-Za-z].  */
+                if (EQ (readcharfun, Qget_file_char))
-                if ((c & 0377) >= 'A' && (c & 0377) <= 'Z')
+                  {
-                  c &= ~CHAR_SHIFT;
+                    if (BASE_LEADING_CODE_P (c))
-                else if ((c & 0377) >= 'a' && (c & 0377) <= 'z')
+                      {
-                  c = (c & ~CHAR_SHIFT) - ('a' - 'A');
+                        int nbytes;
+                        c = read_multibyte (c, readcharfun, &nbytes);
+                        if (nbytes > 1)
+                          force_multibyte = 1;
+                        else
+                          {
+                            force_singlebyte = 1;
+                            c = BYTE8_TO_CHAR (c);
+                          }
+                      }
+                    else
+                      {
+                        force_singlebyte = 1;
+                        c = BYTE8_TO_CHAR (c);
+                      }
+                  }
+                else
+                  force_multibyte = 1;
+                p += CHAR_STRING (c, (unsigned char *) p);
              }
-            if (c & CHAR_META)
-              /* Move the meta bit to the right place for a string.  */
-              c = (c & ~CHAR_META) | 0x80;
-            if (c & CHAR_MODIFIER_MASK)
-              error ("Invalid modifier in string");
-            if (is_multibyte)
-              p += CHAR_STRING (c, p);
            else
              *p++ = c;
            nchars++;
          }
        if (c < 0)
          end_of_file_error ();
@@ -2238,42 +2276,24 @@ read1 (readcharfun, pch, first_in_list)
        if (!NILP (Vpurify_flag) && NILP (Vdoc_file_name) && cancel)
          return make_number (0);
-        if (is_multibyte || force_singlebyte)
+        if (force_multibyte)
+          /* READ_BUFFER already contains valid multibyte forms.  */
          ;
-        else if (load_convert_to_unibyte)
+        else if (force_singlebyte)
          {
-            Lisp_Object string;
+            nchars = str_as_unibyte (read_buffer, p - read_buffer);
-            to_multibyte (&p, &end, &nchars);
+            p = read_buffer + nchars;
-            if (p - read_buffer != nchars)
-              {
-                string = make_multibyte_string (read_buffer, nchars,
-                                                p - read_buffer);
-                return Fstring_make_unibyte (string);
-              }
-            /* We can make a unibyte string directly.  */
-            is_multibyte = 0;
-          }
-        else if (EQ (readcharfun, Qget_file_char)
-                 || EQ (readcharfun, Qlambda))
-          {
-            /* Nowadays, reading directly from a file is used only for
-               compiled Emacs Lisp files, and those always use the
-               Emacs internal encoding.  Meanwhile, Qlambda is used
-               for reading dynamic byte code (compiled with
-               byte-compile-dynamic = t).  */
-            to_multibyte (&p, &end, &nchars);
-            is_multibyte = 1;
          }
        else
-          /* In all other cases, if we read these bytes as
+          /* Otherwise, READ_BUFFER contains only ASCII.  */
-             separate characters, treat them as separate characters now.  */
-          ;
        if (read_pure)
          return make_pure_string (read_buffer, nchars, p - read_buffer,
-                                   is_multibyte);
+                                   (force_multibyte
+                                    || (p - read_buffer != nchars)));
        return make_specified_string (read_buffer, nchars, p - read_buffer,
-                                      is_multibyte);
+                                      (force_multibyte
+                                       || (p - read_buffer != nchars)));
      }
    case '.':
author	Kenichi Handa	2002-03-01 01:44:45 +0000
committer	Kenichi Handa	2002-03-01 01:44:45 +0000
commit	1571601b93f6e09394efba4b8b3252729ce5ce12 (patch)
tree	ead4156341a0ab9ba204473a01eab20953dd57cc /src
parent	1842abb2bf55038f92160f01d26a3649b25d63e9 (diff)
download	emacs-1571601b93f6e09394efba4b8b3252729ce5ce12.tar.gz emacs-1571601b93f6e09394efba4b8b3252729ce5ce12.zip

diff --git a/src/lread.c b/src/lread.c index 10baf509918..360fa68386d 100644 --- a/src/lread.c +++ b/src/lread.c
@@ -29,6 +29,7 @@ Boston, MA 02111-1307, USA. */
29	#include "lisp.h"	29	#include "lisp.h"
30	#include "intervals.h"	30	#include "intervals.h"
31	#include "buffer.h"	31	#include "buffer.h"
		32	#include "character.h"
32	#include "charset.h"	33	#include "charset.h"
33	#include <epaths.h>	34	#include <epaths.h>
34	#include "commands.h"	35	#include "commands.h"
@@ -1476,36 +1477,51 @@ static char *read_buffer;
1476		1477
1477	/* Read multibyte form and return it as a character. C is a first	1478	/* Read multibyte form and return it as a character. C is a first
1478	byte of multibyte form, and rest of them are read from	1479	byte of multibyte form, and rest of them are read from
1479	READCHARFUN. */	1480	READCHARFUN. Store the byte length of the form into NBYTES. /
1480		1481
1481	static int	1482	static int
1482	read_multibyte (c, readcharfun)	1483	read_multibyte (c, readcharfun, nbytes)
1483	register int c;	1484	register int c;
1484	Lisp_Object readcharfun;	1485	Lisp_Object readcharfun;
		1486	int *nbytes;
1485	{	1487	{
1486	/* We need the actual character code of this multibyte	1488	/* We need the actual character code of this multibyte
1487	characters. */	1489	characters. */
1488	unsigned char str[MAX_MULTIBYTE_LENGTH];	1490	unsigned char str[MAX_MULTIBYTE_LENGTH];
1489	int len = 0;	1491	int len = 0;
1490	int bytes;	1492	int bytes = BYTES_BY_CHAR_HEAD (c);
1491		1493
1492	str[len++] = c;	1494	str[len++] = c;
1493	while ((c = READCHAR) >= 0xA0	1495	while (len < bytes)
1494	&& len < MAX_MULTIBYTE_LENGTH)	1496	{
1495	str[len++] = c;	1497	c = READCHAR;
1496	UNREAD (c);	1498	if (CHAR_HEAD_P (c))
1497	if (UNIBYTE_STR_AS_MULTIBYTE_P (str, len, bytes))	1499	{
1498	return STRING_CHAR (str, len);	1500	UNREAD (c);
		1501	break;
		1502	}
		1503	str[len++] = c;
		1504	}
		1505
		1506	if (len == bytes && MULTIBYTE_LENGTH_NO_CHECK (str) > 0)
		1507	{
		1508	*nbytes = len;
		1509	return STRING_CHAR (str, len);
		1510	}
1499	/* The byte sequence is not valid as multibyte. Unread all bytes	1511	/* The byte sequence is not valid as multibyte. Unread all bytes
1500	but the first one, and return the first byte. */	1512	but the first one, and return the first byte. */
1501	while (--len > 0)	1513	while (--len > 0)
1502	UNREAD (str[len]);	1514	UNREAD (str[len]);
		1515	*nbytes = 1;
1503	return str[0];	1516	return str[0];
1504	}	1517	}
1505		1518
1506	/* Read a \-escape sequence, assuming we already read the `\'.	1519	/* Read a \-escape sequence, assuming we already read the `\'.
1507	If the escape sequence forces unibyte, store 1 into *BYTEREP.	1520	If the escape sequence forces unibyte, store 1 into *BYTEREP.
1508	If the escape sequence forces multibyte, store 2 into *BYTEREP.	1521	If the escape sequence forces multibyte and the returned character
		1522	is raw 8-bit char, store 2 into *BYTEREP.
		1523	If the escape sequence forces multibyte and the returned character
		1524	is not raw 8-bit char, store 3 into *BYTEREP.
1509	Otherwise store 0 into BYTEREP. /	1525	Otherwise store 0 into BYTEREP. /
1510		1526
1511	static int	1527	static int
@@ -1640,7 +1656,10 @@ read_escape (readcharfun, stringp, byterep)
1640	}	1656	}
1641	}	1657	}
1642		1658
1643	*byterep = 1;	1659	if (c < 0x100)
		1660	*byterep = 1;
		1661	else
		1662	*byterep = 3;
1644	return i;	1663	return i;
1645	}	1664	}
1646		1665
@@ -1648,6 +1667,7 @@ read_escape (readcharfun, stringp, byterep)
1648	/* A hex escape, as in ANSI C. */	1667	/* A hex escape, as in ANSI C. */
1649	{	1668	{
1650	int i = 0;	1669	int i = 0;
		1670	int count = 0;
1651	while (1)	1671	while (1)
1652	{	1672	{
1653	c = READCHAR;	1673	c = READCHAR;
@@ -1670,15 +1690,26 @@ read_escape (readcharfun, stringp, byterep)
1670	UNREAD (c);	1690	UNREAD (c);
1671	break;	1691	break;
1672	}	1692	}
		1693	count++;
1673	}	1694	}
1674		1695
1675	*byterep = 2;	1696	if (count < 3 && i >= 0x80)
		1697	*byterep = 2;
		1698	else
		1699	*byterep = 3;
1676	return i;	1700	return i;
1677	}	1701	}
1678		1702
1679	default:	1703	default:
1680	if (BASE_LEADING_CODE_P (c))	1704	if (EQ (readcharfun, Qget_file_char)
1681	c = read_multibyte (c, readcharfun);	1705	&& BASE_LEADING_CODE_P (c))
		1706	{
		1707	int nbytes;
		1708
		1709	c = read_multibyte (c, readcharfun, &nbytes);
		1710	if (nbytes > 1)
		1711	*byterep = 3;
		1712	}
1682	return c;	1713	return c;
1683	}	1714	}
1684	}	1715	}
@@ -1750,43 +1781,6 @@ read_integer (readcharfun, radix)
1750	}	1781	}
1751		1782
1752		1783
1753	/* Convert unibyte text in read_buffer to multibyte.
1754
1755	Initially, *P is a pointer after the end of the unibyte text, and
1756	the pointer *END points after the end of read_buffer.
1757
1758	If read_buffer doesn't have enough room to hold the result
1759	of the conversion, reallocate it and adjust P and END.
1760
1761	At the end, make *P point after the result of the conversion, and
1762	return in *NCHARS the number of characters in the converted
1763	text. */
1764
1765	static void
1766	to_multibyte (p, end, nchars)
1767	char p, end;
1768	int *nchars;
1769	{
1770	int nbytes;
1771
1772	parse_str_as_multibyte (read_buffer, *p - read_buffer, &nbytes, nchars);
1773	if (read_buffer_size < 2 * nbytes)
1774	{
1775	int offset = *p - read_buffer;
1776	read_buffer_size = 2 * max (read_buffer_size, nbytes);
1777	read_buffer = (char *) xrealloc (read_buffer, read_buffer_size);
1778	*p = read_buffer + offset;
1779	*end = read_buffer + read_buffer_size;
1780	}
1781
1782	if (nbytes != *nchars)
1783	nbytes = str_as_multibyte (read_buffer, read_buffer_size,
1784	*p - read_buffer, nchars);
1785
1786	*p = read_buffer + nbytes;
1787	}
1788
1789
1790	/* If the next token is ')' or ']' or '.', we store that character	1784	/* If the next token is ')' or ']' or '.', we store that character
1791	in *PCH and the return value is not interesting. Else, we store	1785	in *PCH and the return value is not interesting. Else, we store
1792	zero in *PCH and we read and return one lisp object.	1786	zero in *PCH and we read and return one lisp object.
@@ -1834,11 +1828,9 @@ read1 (readcharfun, pch, first_in_list)
1834	{	1828	{
1835	Lisp_Object tmp;	1829	Lisp_Object tmp;
1836	tmp = read_vector (readcharfun, 0);	1830	tmp = read_vector (readcharfun, 0);
1837	if (XVECTOR (tmp)->size < CHAR_TABLE_STANDARD_SLOTS	1831	if (XVECTOR (tmp)->size != VECSIZE (struct Lisp_Char_Table))
1838	\|\| XVECTOR (tmp)->size > CHAR_TABLE_STANDARD_SLOTS + 10)
1839	error ("Invalid size char-table");	1832	error ("Invalid size char-table");
1840	XSETCHAR_TABLE (tmp, XCHAR_TABLE (tmp));	1833	XSETCHAR_TABLE (tmp, XCHAR_TABLE (tmp));
1841	XCHAR_TABLE (tmp)->top = Qt;
1842	return tmp;	1834	return tmp;
1843	}	1835	}
1844	else if (c == '^')	1836	else if (c == '^')
@@ -1847,11 +1839,18 @@ read1 (readcharfun, pch, first_in_list)
1847	if (c == '[')	1839	if (c == '[')
1848	{	1840	{
1849	Lisp_Object tmp;	1841	Lisp_Object tmp;
		1842	int depth, size;
		1843
1850	tmp = read_vector (readcharfun, 0);	1844	tmp = read_vector (readcharfun, 0);
1851	if (XVECTOR (tmp)->size != SUB_CHAR_TABLE_STANDARD_SLOTS)	1845	if (!INTEGERP (AREF (tmp, 0)))
		1846	error ("Invalid depth in char-table");
		1847	depth = XINT (AREF (tmp, 0));
		1848	if (depth < 1 \|\| depth > 3)
		1849	error ("Invalid depth in char-table");
		1850	size = XVECTOR (tmp)->size + 2;
		1851	if (chartab_size [depth] != size)
1852	error ("Invalid size char-table");	1852	error ("Invalid size char-table");
1853	XSETCHAR_TABLE (tmp, XCHAR_TABLE (tmp));	1853	XSETSUB_CHAR_TABLE (tmp, XSUB_CHAR_TABLE (tmp));
1854	XCHAR_TABLE (tmp)->top = Qnil;
1855	return tmp;	1854	return tmp;
1856	}	1855	}
1857	Fsignal (Qinvalid_read_syntax,	1856	Fsignal (Qinvalid_read_syntax,
@@ -2134,8 +2133,9 @@ read1 (readcharfun, pch, first_in_list)
2134		2133
2135	if (c == '\\')	2134	if (c == '\\')
2136	c = read_escape (readcharfun, 0, &discard);	2135	c = read_escape (readcharfun, 0, &discard);
2137	else if (BASE_LEADING_CODE_P (c))	2136	else if (EQ (readcharfun, Qget_file_char)
2138	c = read_multibyte (c, readcharfun);	2137	&& BASE_LEADING_CODE_P (c))
		2138	c = read_multibyte (c, readcharfun, &discard);
2139		2139
2140	return make_number (c);	2140	return make_number (c);
2141	}	2141	}
@@ -2145,14 +2145,12 @@ read1 (readcharfun, pch, first_in_list)
2145	char *p = read_buffer;	2145	char *p = read_buffer;
2146	char *end = read_buffer + read_buffer_size;	2146	char *end = read_buffer + read_buffer_size;
2147	register int c;	2147	register int c;
2148	/* 1 if we saw an escape sequence specifying	2148	/* Nonzero if we saw an escape sequence specifying
2149	a multibyte character, or a multibyte character. */	2149	a multibyte character. */
2150	int force_multibyte = 0;	2150	int force_multibyte = 0;
2151	/* 1 if we saw an escape sequence specifying	2151	/* Nonzero if we saw an escape sequence specifying
2152	a single-byte character. */	2152	a single-byte character. */
2153	int force_singlebyte = 0;	2153	int force_singlebyte = 0;
2154	/* 1 if read_buffer contains multibyte text now. */
2155	int is_multibyte = 0;
2156	int cancel = 0;	2154	int cancel = 0;
2157	int nchars = 0;	2155	int nchars = 0;
2158		2156
@@ -2170,6 +2168,7 @@ read1 (readcharfun, pch, first_in_list)
2170		2168
2171	if (c == '\\')	2169	if (c == '\\')
2172	{	2170	{
		2171	int modifiers;
2173	int byterep;	2172	int byterep;
2174		2173
2175	c = read_escape (readcharfun, 1, &byterep);	2174	c = read_escape (readcharfun, 1, &byterep);
@@ -2182,53 +2181,92 @@ read1 (readcharfun, pch, first_in_list)
2182	continue;	2181	continue;
2183	}	2182	}
2184		2183
		2184	modifiers = c & CHAR_MODIFIER_MASK;
		2185	c = c & ~CHAR_MODIFIER_MASK;
		2186
2185	if (byterep == 1)	2187	if (byterep == 1)
2186	force_singlebyte = 1;	2188	{
2187	else if (byterep == 2)	2189	force_singlebyte = 1;
2188	force_multibyte = 1;	2190	if (c >= 0x80)
2189	}	2191	/* Raw 8-bit code */
		2192	c = BYTE8_TO_CHAR (c);
		2193	}
		2194	else if (byterep > 1)
		2195	{
		2196	force_multibyte = 1;
		2197	if (byterep == 2)
		2198	c = BYTE8_TO_CHAR (c);
		2199	}
		2200	else if (c >= 0x80)
		2201	{
		2202	force_singlebyte = 1;
		2203	c = BYTE8_TO_CHAR (c);
		2204	}
2190		2205
2191	/* A character that must be multibyte forces multibyte. */	2206	if (ASCII_CHAR_P (c))
2192	if (! SINGLE_BYTE_CHAR_P (c & ~CHAR_MODIFIER_MASK))	2207	{
2193	force_multibyte = 1;	2208	/* Allow `\C- ' and `\C-?'. */
		2209	if (modifiers == CHAR_CTL)
		2210	{
		2211	if (c == ' ')
		2212	c = 0, modifiers = 0;
		2213	else if (c == '?')
		2214	c = 127, modifiers = 0;
		2215	}
		2216	if (modifiers & CHAR_SHIFT)
		2217	{
		2218	/* Shift modifier is valid only with [A-Za-z]. */
		2219	if (c >= 'A' && c <= 'Z')
		2220	modifiers &= ~CHAR_SHIFT;
		2221	else if (c >= 'a' && c <= 'z')
		2222	c -= ('a' - 'A'), modifiers &= ~CHAR_SHIFT;
		2223	}
		2224
		2225	if (modifiers & CHAR_META)
		2226	{
		2227	/* Move the meta bit to the right place for a
		2228	string. */
		2229	modifiers &= ~CHAR_META;
		2230	c = BYTE8_TO_CHAR (c \| 0x80);
		2231	force_singlebyte = 1;
		2232	}
		2233	}
2194		2234
2195	/* If we just discovered the need to be multibyte,	2235	/* Any modifiers remaining are invalid. */
2196	convert the text accumulated thus far. */	2236	if (modifiers)
2197	if (force_multibyte && ! is_multibyte)	2237	error ("Invalid modifier in string");
2198	{	2238	p += CHAR_STRING (c, (unsigned char *) p);
2199	is_multibyte = 1;
2200	to_multibyte (&p, &end, &nchars);
2201	}	2239	}
2202		2240	else if (c >= 0x80)
2203	/* Allow `\C- ' and `\C-?'. */
2204	if (c == (CHAR_CTL \| ' '))
2205	c = 0;
2206	else if (c == (CHAR_CTL \| '?'))
2207	c = 127;
2208
2209	if (c & CHAR_SHIFT)
2210	{	2241	{
2211	/* Shift modifier is valid only with [A-Za-z]. */	2242	if (EQ (readcharfun, Qget_file_char))
2212	if ((c & 0377) >= 'A' && (c & 0377) <= 'Z')	2243	{
2213	c &= ~CHAR_SHIFT;	2244	if (BASE_LEADING_CODE_P (c))
2214	else if ((c & 0377) >= 'a' && (c & 0377) <= 'z')	2245	{
2215	c = (c & ~CHAR_SHIFT) - ('a' - 'A');	2246	int nbytes;
		2247	c = read_multibyte (c, readcharfun, &nbytes);
		2248	if (nbytes > 1)
		2249	force_multibyte = 1;
		2250	else
		2251	{
		2252	force_singlebyte = 1;
		2253	c = BYTE8_TO_CHAR (c);
		2254	}
		2255	}
		2256	else
		2257	{
		2258	force_singlebyte = 1;
		2259	c = BYTE8_TO_CHAR (c);
		2260	}
		2261	}
		2262	else
		2263	force_multibyte = 1;
		2264	p += CHAR_STRING (c, (unsigned char *) p);
2216	}	2265	}
2217
2218	if (c & CHAR_META)
2219	/* Move the meta bit to the right place for a string. */
2220	c = (c & ~CHAR_META) \| 0x80;
2221	if (c & CHAR_MODIFIER_MASK)
2222	error ("Invalid modifier in string");
2223
2224	if (is_multibyte)
2225	p += CHAR_STRING (c, p);
2226	else	2266	else
2227	*p++ = c;	2267	*p++ = c;
2228
2229	nchars++;	2268	nchars++;
2230	}	2269	}
2231
2232	if (c < 0)	2270	if (c < 0)
2233	end_of_file_error ();	2271	end_of_file_error ();
2234		2272
@@ -2238,42 +2276,24 @@ read1 (readcharfun, pch, first_in_list)
2238	if (!NILP (Vpurify_flag) && NILP (Vdoc_file_name) && cancel)	2276	if (!NILP (Vpurify_flag) && NILP (Vdoc_file_name) && cancel)
2239	return make_number (0);	2277	return make_number (0);
2240		2278
2241	if (is_multibyte \|\| force_singlebyte)	2279	if (force_multibyte)
		2280	/* READ_BUFFER already contains valid multibyte forms. */
2242	;	2281	;
2243	else if (load_convert_to_unibyte)	2282	else if (force_singlebyte)
2244	{	2283	{
2245	Lisp_Object string;	2284	nchars = str_as_unibyte (read_buffer, p - read_buffer);
2246	to_multibyte (&p, &end, &nchars);	2285	p = read_buffer + nchars;
2247	if (p - read_buffer != nchars)
2248	{
2249	string = make_multibyte_string (read_buffer, nchars,
2250	p - read_buffer);
2251	return Fstring_make_unibyte (string);
2252	}
2253	/* We can make a unibyte string directly. */
2254	is_multibyte = 0;
2255	}
2256	else if (EQ (readcharfun, Qget_file_char)
2257	\|\| EQ (readcharfun, Qlambda))
2258	{
2259	/* Nowadays, reading directly from a file is used only for
2260	compiled Emacs Lisp files, and those always use the
2261	Emacs internal encoding. Meanwhile, Qlambda is used
2262	for reading dynamic byte code (compiled with
2263	byte-compile-dynamic = t). */
2264	to_multibyte (&p, &end, &nchars);
2265	is_multibyte = 1;
2266	}	2286	}
2267	else	2287	else
2268	/* In all other cases, if we read these bytes as	2288	/* Otherwise, READ_BUFFER contains only ASCII. */
2269	separate characters, treat them as separate characters now. */
2270	;
2271		2289
2272	if (read_pure)	2290	if (read_pure)
2273	return make_pure_string (read_buffer, nchars, p - read_buffer,	2291	return make_pure_string (read_buffer, nchars, p - read_buffer,
2274	is_multibyte);	2292	(force_multibyte
		2293	\|\| (p - read_buffer != nchars)));
2275	return make_specified_string (read_buffer, nchars, p - read_buffer,	2294	return make_specified_string (read_buffer, nchars, p - read_buffer,
2276	is_multibyte);	2295	(force_multibyte
		2296	\|\| (p - read_buffer != nchars)));
2277	}	2297	}
2278		2298
2279	case '.':	2299	case '.':