Include "coding.h".

(Qget_emacs_mule_file_char, Qload_force_doc_strings, load_each_byte, unread_char): New variables. (readchar_backlog): This variable deleted. (readchar): Return a character unless load_each_byte is nonzero. Handle the case that readcharfun is Qget_emacs_mule_file_char or a cons. If unread_char is not -1, simply return it. (unreadchar): Handle the case that readcharfun is Qget_emacs_mule_file_char or a cons. Set unread_char if necessary. (read_multibyte): This function deleted. (readbyte_for_lambda, readbyte_from_file, readbyte_from_string) (read_emacs_mule_char): New functions. (Fload): Even if the file doesn't have the extention ".elc", if safe_to_load_p returns a positive version number, assume that the file contains bytecompiled code. If the version is less than 22, load the file while decoding multibyte sequences by emacs-mule. (readevalloop): Don't use readchar_backlog. (Fread): Likewise. Pay attention to the case that STREAM is a cons. (Fread_from_string): Pay attention to the case that STREAM is a cons. (read_escape): The arg BYTEREP deleted. (read1): Set load_each_byte to 1 temporarily while handling #@NUMBER. Don't call read_multibyte. (read_vector): Call Fread with a cons. If readcharfun is Qget_emacs_mule_file_char, decode the read string by emacs-mule. (read_list): If doc_reference is 2, make the cdr part string as unibyte. (syms_of_lread): Intern and staticpro Qget_emacs_mule_file_char and Qload_force_doc_strings.
author: Kenichi Handa 2002-07-24 10:51:15 +0000
committer: Kenichi Handa 2002-07-24 10:51:15 +0000
commit: 8792be6679372e6ba9908123fb1cf81798b6fd61 (patch)
tree: 70380692661ef6f5bfb1e2515f2e3f2f5c7248ff /src
parent: 571407d6e005f973a75f9db8afcb3f92a203b9b1 (diff)
download: emacs-8792be6679372e6ba9908123fb1cf81798b6fd61.tar.gz
emacs-8792be6679372e6ba9908123fb1cf81798b6fd61.zip
1 files changed, 350 insertions, 253 deletions
diff --git a/src/lread.c b/src/lread.c
index b4dc71514d9..3d4a6394185 100644
--- a/src/lread.c
+++ b/src/lread.c
@@ -31,6 +31,7 @@ Boston, MA 02111-1307, USA.  */
 #include "buffer.h"
 #include "character.h"
 #include "charset.h"
+#include "coding.h"
 #include <epaths.h>
 #include "commands.h"
 #include "keyboard.h"
@@ -83,6 +84,12 @@ Lisp_Object Qascii_character, Qload, Qload_file_name;
 Lisp_Object Qbackquote, Qcomma, Qcomma_at, Qcomma_dot, Qfunction;
 Lisp_Object Qinhibit_file_name_operation;
+/* Used instead of Qget_file_char while loading *.elc files compiled
+   by Emacs 21 or older.  */
+static Lisp_Object Qget_emacs_mule_file_char;
+static Lisp_Object Qload_force_doc_strings;
 extern Lisp_Object Qevent_symbol_element_mask;
 extern Lisp_Object Qfile_exists_p;
@@ -126,6 +133,11 @@ static int load_force_doc_strings;
 /* Nonzero means read should convert strings to unibyte.  */
 static int load_convert_to_unibyte;
+/* Nonzero means READCHAR should read bytes one by one (not character)
+   when READCHARFUN is Qget_file_char or Qget_emacs_mule_file_char.
+   This is set to 1 by read1 temporarily while handling #@NUMBER.  */
+static int load_each_byte;
 /* Function to use for loading an Emacs lisp source file (not
   compiled) instead of readevalloop.  */
 Lisp_Object Vload_source_file_function;
@@ -147,10 +159,6 @@ static int read_from_string_index;
 static int read_from_string_index_byte;
 static int read_from_string_limit;
-/* Number of bytes left to read in the buffer character
-   that `readchar' has already advanced over.  */
-static int readchar_backlog;
 /* This contains the last string skipped with #@.  */
 static char *saved_doc_string;
 /* Length of buffer allocated in saved_doc_string.  */
@@ -190,6 +198,9 @@ int load_dangerous_libraries;
 static Lisp_Object Vbytecomp_version_regexp;
+static int read_emacs_mule_char P_ ((int, int (*) (int, Lisp_Object),
+                                     Lisp_Object));
 static void readevalloop P_ ((Lisp_Object, FILE*, Lisp_Object, 
                              Lisp_Object (*) (), int,
                              Lisp_Object, Lisp_Object));
@@ -197,23 +208,42 @@ static Lisp_Object load_unwind P_ ((Lisp_Object));
 static Lisp_Object load_descriptor_unwind P_ ((Lisp_Object));
+/* Functions that read one byte from the current source READCHARFUN
+   or unreads one byte.  If the integer argument C is -1, it returns
+   one read byte, or -1 when there's no more byte in the source.  If C
+   is 0 or positive, it unreads C, and the return value is not
+   interesting.  */
+static int readbyte_for_lambda P_ ((int, Lisp_Object));
+static int readbyte_from_file P_ ((int, Lisp_Object));
+static int readbyte_from_string P_ ((int, Lisp_Object));
 /* Handle unreading and rereading of characters.
   Write READCHAR to read a character,
   UNREAD(c) to unread c to be read again.
-   These macros actually read/unread a byte code, multibyte characters
+   These macros correctly read/unread multibyte characters.  */
-   are not handled here.  The caller should manage them if necessary.
- */
 #define READCHAR readchar (readcharfun)
 #define UNREAD(c) unreadchar (readcharfun, c)
+/* When READCHARFUN is Qget_file_char, Qget_emacs_mule_file_char,
+   Qlambda, or a cons, we use this to keep unread character because a
+   file stream can't handle multibyte-char unreading.  The value -1
+   means that there's no unread character. */
+static int unread_char;
 static int
 readchar (readcharfun)
     Lisp_Object readcharfun;
 {
  Lisp_Object tem;
  register int c;
+  int (*readbyte) P_ ((int, Lisp_Object));
+  unsigned char buf[MAX_MULTIBYTE_LENGTH];
+  int i, len;
+  int emacs_mule_encoding = 0;
  if (BUFFERP (readcharfun))
    {
@@ -221,19 +251,9 @@ readchar (readcharfun)
      int pt_byte = BUF_PT_BYTE (inbuffer);
-      if (readchar_backlog > 0)
-        /* We get the address of the byte just passed,
-           which is the last byte of the character.
-           The other bytes in this character are consecutive with it,
-           because the gap can't be in the middle of a character.  */
-        return *(BUF_BYTE_ADDRESS (inbuffer, BUF_PT_BYTE (inbuffer) - 1)
-                 - --readchar_backlog);
      if (pt_byte >= BUF_ZV_BYTE (inbuffer))
        return -1;
-      readchar_backlog = -1;
      if (! NILP (inbuffer->enable_multibyte_characters))
        {
          /* Fetch the character code from the buffer.  */
@@ -244,6 +264,8 @@ readchar (readcharfun)
      else
        {
          c = BUF_FETCH_BYTE (inbuffer, pt_byte);
+          if (! ASCII_BYTE_P (c))
+            c = BYTE8_TO_CHAR (c);
          pt_byte++;
        }
      SET_BUF_PT_BOTH (inbuffer, BUF_PT (inbuffer) + 1, pt_byte);
@@ -256,19 +278,9 @@ readchar (readcharfun)
      int bytepos = marker_byte_position (readcharfun);
-      if (readchar_backlog > 0)
-        /* We get the address of the byte just passed,
-           which is the last byte of the character.
-           The other bytes in this character are consecutive with it,
-           because the gap can't be in the middle of a character.  */
-        return *(BUF_BYTE_ADDRESS (inbuffer, XMARKER (readcharfun)->bytepos - 1)
-                 - --readchar_backlog);
      if (bytepos >= BUF_ZV_BYTE (inbuffer))
        return -1;
-      readchar_backlog = -1;
      if (! NILP (inbuffer->enable_multibyte_characters))
        {
          /* Fetch the character code from the buffer.  */
@@ -279,6 +291,8 @@ readchar (readcharfun)
      else
        {
          c = BUF_FETCH_BYTE (inbuffer, bytepos);
+          if (! ASCII_BYTE_P (c))
+            c = BYTE8_TO_CHAR (c);
          bytepos++;
        }
@@ -289,22 +303,15 @@ readchar (readcharfun)
    }
  if (EQ (readcharfun, Qlambda))
-    return read_bytecode_char (0);
+    {
+      readbyte = readbyte_for_lambda;
+      goto read_multibyte;
+    }
  if (EQ (readcharfun, Qget_file_char))
    {
-      c = getc (instream);
+      readbyte = readbyte_from_file;
-#ifdef EINTR
+      goto read_multibyte;
-      /* Interrupted reads have been observed while reading over the network */
-      while (c == EOF && ferror (instream) && errno == EINTR)
-        {
-          clearerr (instream);
-          c = getc (instream);
-        }
-#endif
-      return c;
    }
  if (STRINGP (readcharfun))
    {
      if (read_from_string_index >= read_from_string_limit)
@@ -316,14 +323,61 @@ readchar (readcharfun)
      return c;
    }
+  if (CONSP (readcharfun))
+    {
+      /* This is the case that read_vector is reading from a unibyte
+         string that contains a byte sequence previously skipped
+         because of #@NUMBER.  The car part of readcharfun is that
+         string, and the cdr part is a value of readcharfun given to
+         read_vector.  */
+      readbyte = readbyte_from_string;
+      if (EQ (XCDR (readcharfun), Qget_emacs_mule_file_char))
+        emacs_mule_encoding = 1;
+      goto read_multibyte;
+    }
+  if (EQ (readcharfun, Qget_emacs_mule_file_char))
+    {
+      readbyte = readbyte_from_file;
+      emacs_mule_encoding = 1;
+      goto read_multibyte;
+    }
  tem = call0 (readcharfun);
  if (NILP (tem))
    return -1;
  return XINT (tem);
+ read_multibyte:
+  if (unread_char >= 0)
+    {
+      c = unread_char;
+      unread_char = -1;
+      return c;
+    }
+  c = (*readbyte) (-1, readcharfun);
+  if (c < 0 || ASCII_BYTE_P (c) || load_each_byte)
+    return c;
+  if (emacs_mule_encoding)
+    return read_emacs_mule_char (c, readbyte, readcharfun);
+  i = 0;
+  buf[i++] = c;
+  len = BYTES_BY_CHAR_HEAD (c);
+  while (i < len)
+    {
+      c = (*readbyte) (-1, readcharfun);
+      if (c < 0 || ! TRAILING_CODE_P (c))
+        {
+          while (--i > 1)
+            (*readbyte) (buf[i], readcharfun);
+          return BYTE8_TO_CHAR (buf[0]);
+        }
+      buf[i++] = c;
+    }
+  return STRING_CHAR (buf, i);
 }
 /* Unread the character C in the way appropriate for the stream READCHARFUN.
   If the stream is a user function, call it with the char as argument.  */
@@ -341,36 +395,26 @@ unreadchar (readcharfun, c)
      struct buffer *b = XBUFFER (readcharfun);
      int bytepos = BUF_PT_BYTE (b);
-      if (readchar_backlog >= 0)
+      BUF_PT (b)--;
-        readchar_backlog++;
+      if (! NILP (b->enable_multibyte_characters))
+        BUF_DEC_POS (b, bytepos);
      else
-        {
+        bytepos--;
-          BUF_PT (b)--;
-          if (! NILP (b->enable_multibyte_characters))
-            BUF_DEC_POS (b, bytepos);
-          else
-            bytepos--;
-          BUF_PT_BYTE (b) = bytepos;
+      BUF_PT_BYTE (b) = bytepos;
-        }
    }
  else if (MARKERP (readcharfun))
    {
      struct buffer *b = XMARKER (readcharfun)->buffer;
      int bytepos = XMARKER (readcharfun)->bytepos;
-      if (readchar_backlog >= 0)
+      XMARKER (readcharfun)->charpos--;
-        readchar_backlog++;
+      if (! NILP (b->enable_multibyte_characters))
+        BUF_DEC_POS (b, bytepos);
      else
-        {
+        bytepos--;
-          XMARKER (readcharfun)->charpos--;
-          if (! NILP (b->enable_multibyte_characters))
-            BUF_DEC_POS (b, bytepos);
-          else
-            bytepos--;
-          XMARKER (readcharfun)->bytepos = bytepos;
+      XMARKER (readcharfun)->bytepos = bytepos;
-        }
    }
  else if (STRINGP (readcharfun))
    {
@@ -378,16 +422,152 @@ unreadchar (readcharfun, c)
      read_from_string_index_byte
        = string_char_to_byte (readcharfun, read_from_string_index);
    }
+  else if (CONSP (readcharfun))
+    {
+      unread_char = c;
+    }
  else if (EQ (readcharfun, Qlambda))
-    read_bytecode_char (1);
+    {
-  else if (EQ (readcharfun, Qget_file_char))
+      unread_char = c;
-    ungetc (c, instream);
+    }
+  else if (EQ (readcharfun, Qget_file_char)
+           || EQ (readcharfun, Qget_emacs_mule_file_char))
+    {
+      if (load_each_byte)
+        ungetc (c, instream);
+      else
+        unread_char = c;
+    }
  else
    call1 (readcharfun, make_number (c));
 }
+static int
+readbyte_for_lambda (c, readcharfun)
+     int c;
+     Lisp_Object readcharfun;
+{
+  return read_bytecode_char (c >= 0);
+}
+static int
+readbyte_from_file (c, readcharfun)
+     int c;
+     Lisp_Object readcharfun;
+{
+  if (c >= 0)
+    {
+      ungetc (c, instream);
+      return 0;
+    }
+  c = getc (instream);
+#ifdef EINTR
+  /* Interrupted reads have been observed while reading over the network */
+  while (c == EOF && ferror (instream) && errno == EINTR)
+    {
+      clearerr (instream);
+      c = getc (instream);
+    }
+#endif
+  return (c == EOF ? -1 : c);
+}
+static int
+readbyte_from_string (c, readcharfun)
+     int c;
+     Lisp_Object readcharfun;
+{
+  Lisp_Object string = XCAR (readcharfun);
+  if (c >= 0)
+    {
+      read_from_string_index--;
+      read_from_string_index_byte
+        = string_char_to_byte (string, read_from_string_index);
+    }
+  
+  if (read_from_string_index >= read_from_string_limit)
+    c = -1;
+  else
+    FETCH_STRING_CHAR_ADVANCE (c, string,
+                               read_from_string_index,
+                               read_from_string_index_byte);
+  return c;
+}
+/* Read one non-ASCII character from INSTREAM.  The character is
+   encoded in `emacs-mule' and the first byte is already read in
+   C.  */
+extern char emacs_mule_bytes[256];
+static int
+read_emacs_mule_char (c, readbyte, readcharfun)
+     int c;
+     int (*readbyte) P_ ((int, Lisp_Object));
+     Lisp_Object readcharfun;
+{
+  /* Emacs-mule coding uses at most 4-byte for one character.  */
+  unsigned char buf[4];
+  int len = emacs_mule_bytes[c];
+  struct charset *charset;
+  int i;
+  unsigned code;
+  if (len == 1)
+    /* C is not a valid leading-code of `emacs-mule'.  */
+    return BYTE8_TO_CHAR (c);
+  i = 0;
+  buf[i++] = c;
+  while (i < len)
+    {
+      c = (*readbyte) (-1, readcharfun);
+      if (c < 0xA0)
+        {
+          while (--i > 1)
+            (*readbyte) (buf[i], readcharfun);
+          return BYTE8_TO_CHAR (buf[0]);
+        }
+      buf[i++] = c;
+    }
+  if (len == 2)
+    {
+      charset = emacs_mule_charset[c];
+      code = buf[1] & 0x7F;
+    }
+  else if (len == 3)
+    {
+      if (buf[0] == EMACS_MULE_LEADING_CODE_PRIVATE_11
+          || buf[0] == EMACS_MULE_LEADING_CODE_PRIVATE_12)
+        {
+          charset = emacs_mule_charset[buf[1]];
+          code = buf[2] & 0x7F;
+        }
+      else
+        {
+          charset = emacs_mule_charset[buf[0]];
+          code = ((buf[1] << 8) | buf[2]) & 0x7F7F;
+        }
+    }
+  else
+    {
+      charset = emacs_mule_charset[buf[1]];
+      code = ((buf[2] << 8) | buf[3]) & 0x7F7F;      
+    }
+  c = DECODE_CHAR (charset, code);
+  if (c < 0)
+    Fsignal (Qinvalid_read_syntax,
+             Fcons (build_string ("invalid multibyte form"), Qnil));
+  return c;
+}
 static Lisp_Object read0 (), read1 (), read_list (), read_vector ();
-static int read_multibyte ();
 static Lisp_Object substitute_object_recurse ();
 static void        substitute_object_in_subtree (), substitute_in_interval ();
@@ -553,11 +733,11 @@ DEFUN ("get-file-char", Fget_file_char, Sget_file_char, 0, 0, 0,
-/* Value is non-zero if the file asswociated with file descriptor FD
+/* Value is a version number of byte compiled code if the file
-   is a compiled Lisp file that's safe to load.  Only files compiled
+   associated with file descriptor FD is a compiled Lisp file that's
-   with Emacs are safe to load.  Files compiled with XEmacs can lead
+   safe to load.  Only files compiled with Emacs are safe to load.
-   to a crash in Fbyte_code because of an incompatible change in the
+   Files compiled with XEmacs can lead to a crash in Fbyte_code
-   byte compiler.  */
+   because of an incompatible change in the byte compiler.  */
 static int
 safe_to_load_p (fd)
@@ -578,7 +758,7 @@ safe_to_load_p (fd)
         with NUL bytes following it, but note the version.  */
      for (i = 0; i < nbytes && buf[i] != '\n'; ++i)
        if (i == 4)
-          version = buf[i];
+          version = buf[i];
      if (i < nbytes
          && fast_c_string_match_ignore_case (Vbytecomp_version_regexp,
@@ -637,6 +817,8 @@ Return t if file exists.  */)
  Lisp_Object handler;
  int safe_p = 1;
  char *fmode = "r";
+  int version;
 #ifdef DOS_NT
  fmode = "rt";
 #endif /* DOS_NT */
@@ -744,17 +926,20 @@ Return t if file exists.  */)
    Vloads_in_progress = Fcons (found, Vloads_in_progress);
  }
+  version = -1;
  if (!bcmp (&(XSTRING (found)->data[STRING_BYTES (XSTRING (found)) - 4]),
-             ".elc", 4))
+             ".elc", 4)
+      || (version = safe_to_load_p (fd)) > 0)
    /* Load .elc files directly, but not when they are
       remote and have no handler!  */
    {
      if (fd != -2)
        {
          struct stat s1, s2;
-          int result, version;
+          int result;
-          if (!(version = safe_to_load_p (fd)))
+          if (version < 0
+              && ! (version = safe_to_load_p (fd)))
            {
              safe_p = 0;
              if (!load_dangerous_libraries)
@@ -769,30 +954,6 @@ Return t if file exists.  */)
          compiled = 1;
-          if (version == 20)    /* 21 isn't used */
-            /* We're loading something compiled with Mule 3, 4 or 5,
-               and thus potentially emacs-mule-encoded; load it with
-               code conversion.  (Perhaps the test should actually be
-               <22?)  We could check further on whether the comment
-               mentions multibyte and only code-convert if it does.  I
-               doubt it's worth the effort. -- fx  */
-            {
-              Lisp_Object val;
-              if (fd >= 0)
-                emacs_close (fd);
-              /* load-with-code-conversion currently fails with
-                 emacs-mule non-ASCII doc strings.  */
-              error ("Can't currently load Emacs 20/1-compiled files: %s",
-                     XSTRING (found)->data);
-#if 0
-              val = call4 (intern ("load-with-code-conversion"), found, file,
-                           NILP (noerror) ? Qnil : Qt,
-                           NILP (nomessage) ? Qnil : Qt);
-#endif
-              return unbind_to (count, val);
-            }
 #ifdef DOS_NT
          fmode = "rb";
 #endif /* DOS_NT */
@@ -868,7 +1029,16 @@ Return t if file exists.  */)
  load_descriptor_list
    = Fcons (make_number (fileno (stream)), load_descriptor_list);
  load_in_progress++;
-  readevalloop (Qget_file_char, stream, file, Feval, 0, Qnil, Qnil);
+  if (version >= 22)
+    readevalloop (Qget_file_char, stream, file, Feval, 0, Qnil, Qnil);
+  else
+    {
+      /* We can't handle a file which was compiled with
+         byte-compile-dynamic by older version of Emacs.  */
+      specbind (Qload_force_doc_strings, Qt);
+      readevalloop (Qget_emacs_mule_file_char, stream, file, Feval, 0,
+                    Qnil, Qnil);
+    }
  unbind_to (count, Qnil);
  /* Run any load-hooks for this file.  */
@@ -1242,8 +1412,6 @@ readevalloop (readcharfun, stream, sourcename, evalfun, printflag, unibyte, read
  record_unwind_protect (readevalloop_1, load_convert_to_unibyte ? Qt : Qnil);
  load_convert_to_unibyte = !NILP (unibyte);
-  readchar_backlog = -1;
  GCPRO1 (sourcename);
  LOADHIST_ATTACH (sourcename);
@@ -1423,14 +1591,13 @@ STREAM or the value of `standard-input' may be:
  if (EQ (stream, Qt))
    stream = Qread_char;
-  readchar_backlog = -1;
  new_backquote_flag = 0;
  read_objects = Qnil;
  if (EQ (stream, Qread_char))
    return Fread_minibuffer (build_string ("Lisp expression: "), Qnil);
-  if (STRINGP (stream))
+  if (STRINGP (stream) || ((CONSP (stream) && STRINGP (XCAR (stream)))))
    return Fcar (Fread_from_string (stream, Qnil, Qnil));
  return read0 (stream);
@@ -1445,18 +1612,23 @@ START and END optionally delimit a substring of STRING from which to read;
     Lisp_Object string, start, end;
 {
  int startval, endval;
+  Lisp_Object str;
  Lisp_Object tem;
-  CHECK_STRING (string);
+  if (CONSP (string))
+    str = XCAR (string);
+  else
+    str = string;
+  CHECK_STRING (str);
  if (NILP (end))
-    endval = XSTRING (string)->size;
+    endval = XSTRING (str)->size;
  else
    {
      CHECK_NUMBER (end);
      endval = XINT (end);
-      if (endval < 0 || endval > XSTRING (string)->size)
+      if (endval < 0 || endval > XSTRING (str)->size)
-        args_out_of_range (string, end);
+        args_out_of_range (str, end);
    }
  if (NILP (start))
@@ -1466,11 +1638,11 @@ START and END optionally delimit a substring of STRING from which to read;
      CHECK_NUMBER (start);
      startval = XINT (start);
      if (startval < 0 || startval > endval)
-        args_out_of_range (string, start);
+        args_out_of_range (str, start);
    }
  read_from_string_index = startval;
-  read_from_string_index_byte = string_char_to_byte (string, startval);
+  read_from_string_index_byte = string_char_to_byte (str, startval);
  read_from_string_limit = endval;
  new_backquote_flag = 0;
@@ -1502,65 +1674,16 @@ read0 (readcharfun)
 static int read_buffer_size;
 static char *read_buffer;
-/* Read multibyte form and return it as a character.  C is a first
-   byte of multibyte form, and rest of them are read from
-   READCHARFUN.  Store the byte length of the form into *NBYTES.  */
-static int
-read_multibyte (c, readcharfun, nbytes)
-     register int c;
-     Lisp_Object readcharfun;
-     int *nbytes;
-{
-  /* We need the actual character code of this multibyte
-     characters.  */
-  unsigned char str[MAX_MULTIBYTE_LENGTH];
-  int len = 0;
-  int bytes = BYTES_BY_CHAR_HEAD (c);
-  str[len++] = c;
-  while (len < bytes)
-    {
-      c = READCHAR;
-      if (CHAR_HEAD_P (c))
-        {
-          UNREAD (c);
-          break;
-        }
-      str[len++] = c;
-    }
-  if (len == bytes && MULTIBYTE_LENGTH_NO_CHECK (str) > 0)
-    {
-      *nbytes = len;
-      return STRING_CHAR (str, len);
-    }
-  /* The byte sequence is not valid as multibyte.  Unread all bytes
-     but the first one, and return the first byte.  */
-  while (--len > 0)
-    UNREAD (str[len]);
-  *nbytes = 1;
-  return str[0];
-}
 /* Read a \-escape sequence, assuming we already read the `\'.
-   If the escape sequence forces unibyte, store 1 into *BYTEREP.
+   If the escape sequence forces unibyte, return eight-bit-char.  */
-   If the escape sequence forces multibyte and the returned character
-   is raw 8-bit char, store 2 into *BYTEREP.
-   If the escape sequence forces multibyte and the returned character
-   is not raw 8-bit char, store 3 into *BYTEREP.
-   Otherwise store 0 into *BYTEREP.  */
 static int
-read_escape (readcharfun, stringp, byterep)
+read_escape (readcharfun, stringp)
     Lisp_Object readcharfun;
     int stringp;
-     int *byterep;
 {
  register int c = READCHAR;
-  *byterep = 0;
  switch (c)
    {
    case -1:
@@ -1597,7 +1720,7 @@ read_escape (readcharfun, stringp, byterep)
        error ("Invalid escape character syntax");
      c = READCHAR;
      if (c == '\\')
-        c = read_escape (readcharfun, 0, byterep);
+        c = read_escape (readcharfun, 0);
      return c | meta_modifier;
    case 'S':
@@ -1606,7 +1729,7 @@ read_escape (readcharfun, stringp, byterep)
        error ("Invalid escape character syntax");
      c = READCHAR;
      if (c == '\\')
-        c = read_escape (readcharfun, 0, byterep);
+        c = read_escape (readcharfun, 0);
      return c | shift_modifier;
    case 'H':
@@ -1615,7 +1738,7 @@ read_escape (readcharfun, stringp, byterep)
        error ("Invalid escape character syntax");
      c = READCHAR;
      if (c == '\\')
-        c = read_escape (readcharfun, 0, byterep);
+        c = read_escape (readcharfun, 0);
      return c | hyper_modifier;
    case 'A':
@@ -1624,7 +1747,7 @@ read_escape (readcharfun, stringp, byterep)
        error ("Invalid escape character syntax");
      c = READCHAR;
      if (c == '\\')
-        c = read_escape (readcharfun, 0, byterep);
+        c = read_escape (readcharfun, 0);
      return c | alt_modifier;
    case 's':
@@ -1633,7 +1756,7 @@ read_escape (readcharfun, stringp, byterep)
        error ("Invalid escape character syntax");
      c = READCHAR;
      if (c == '\\')
-        c = read_escape (readcharfun, 0, byterep);
+        c = read_escape (readcharfun, 0);
      return c | super_modifier;
    case 'C':
@@ -1643,7 +1766,7 @@ read_escape (readcharfun, stringp, byterep)
    case '^':
      c = READCHAR;
      if (c == '\\')
-        c = read_escape (readcharfun, 0, byterep);
+        c = read_escape (readcharfun, 0);
      if ((c & ~CHAR_MODIFIER_MASK) == '?')
        return 0177 | (c & CHAR_MODIFIER_MASK);
      else if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK)))
@@ -1683,10 +1806,8 @@ read_escape (readcharfun, stringp, byterep)
              }
          }
        
-        if (c < 0x100)
+        if (! ASCII_BYTE_P (i))
-          *byterep = 1;
+          i = BYTE8_TO_CHAR (i);
-        else
-          *byterep = 3;
        return i;
      }
@@ -1721,22 +1842,11 @@ read_escape (readcharfun, stringp, byterep)
          }
        if (count < 3 && i >= 0x80)
-          *byterep = 2;
+          return BYTE8_TO_CHAR (i);
-        else
-          *byterep = 3;
        return i;
      }
    default:
-      if (EQ (readcharfun, Qget_file_char)
-          && BASE_LEADING_CODE_P (c))
-        {
-          int nbytes;
-          c = read_multibyte (c, readcharfun, &nbytes);
-          if (nbytes > 1)
-            *byterep = 3;
-        }
      return c;
    }
 }
@@ -1824,6 +1934,7 @@ read1 (readcharfun, pch, first_in_list)
  int uninterned_symbol = 0;
  *pch = 0;
+  load_each_byte = 0;
 retry:
@@ -1898,12 +2009,14 @@ read1 (readcharfun, pch, first_in_list)
              UNREAD (c);
              tmp = read1 (readcharfun, pch, first_in_list);
-              if (size_in_chars != XSTRING (tmp)->size
+              if (STRING_MULTIBYTE (tmp)
-                  /* We used to print 1 char too many
+                  || (size_in_chars != XSTRING (tmp)->size
-                     when the number of bits was a multiple of 8.
+                      /* We used to print 1 char too many
-                     Accept such input in case it came from an old version.  */
+                         when the number of bits was a multiple of 8.
-                  && ! (XFASTINT (length)
+                         Accept such input in case it came from an old
-                        == (XSTRING (tmp)->size - 1) * BITS_PER_CHAR))
+                         version.  */
+                      && ! (XFASTINT (length)
+                            == (XSTRING (tmp)->size - 1) * BITS_PER_CHAR)))
                Fsignal (Qinvalid_read_syntax,
                         Fcons (make_string ("#&...", 5), Qnil));
                
@@ -1969,6 +2082,7 @@ read1 (readcharfun, pch, first_in_list)
        {
          int i, nskip = 0;
+          load_each_byte = 1;
          /* Read a decimal integer.  */
          while ((c = READCHAR) >= 0
                 && c >= '0' && c <= '9')
@@ -1979,7 +2093,9 @@ read1 (readcharfun, pch, first_in_list)
          if (c >= 0)
            UNREAD (c);
          
-          if (load_force_doc_strings && EQ (readcharfun, Qget_file_char))
+          if (load_force_doc_strings
+              && (EQ (readcharfun, Qget_file_char)
+                  || EQ (readcharfun, Qget_emacs_mule_file_char)))
            {
              /* If we are supposed to force doc strings into core right now,
                 record the last string that we skipped,
@@ -2031,6 +2147,7 @@ read1 (readcharfun, pch, first_in_list)
                c = READCHAR;
            }
+          load_each_byte = 0;
          goto retry;
        }
      if (c == '$')
@@ -2152,17 +2269,18 @@ read1 (readcharfun, pch, first_in_list)
    case '?':
      {
-        int discard;
+        int modifiers;
        c = READCHAR;
        if (c < 0)
          end_of_file_error ();
        if (c == '\\')
-          c = read_escape (readcharfun, 0, &discard);
+          c = read_escape (readcharfun, 0);
-        else if (EQ (readcharfun, Qget_file_char)
+        modifiers = c & CHAR_MODIFIER_MASK;
-                 && BASE_LEADING_CODE_P (c))
+        c &= ~CHAR_MODIFIER_MASK;
-          c = read_multibyte (c, readcharfun, &discard);
+        if (CHAR_BYTE8_P (c))
+          c = CHAR_TO_BYTE8 (c);
+        c |= modifiers;
        return make_number (c);
      }
@@ -2196,9 +2314,8 @@ read1 (readcharfun, pch, first_in_list)
            if (c == '\\')
              {
                int modifiers;
-                int byterep;
-                c = read_escape (readcharfun, 1, &byterep);
+                c = read_escape (readcharfun, 1);
                /* C is -1 if \ newline has just been seen */
                if (c == -1)
@@ -2211,26 +2328,11 @@ read1 (readcharfun, pch, first_in_list)
                modifiers = c & CHAR_MODIFIER_MASK;
                c = c & ~CHAR_MODIFIER_MASK;
-                if (byterep == 1)
+                if (CHAR_BYTE8_P (c))
-                  {
+                  force_singlebyte = 1;
-                    force_singlebyte = 1;
+                else if (! ASCII_CHAR_P (c))
-                    if (c >= 0x80)
+                  force_multibyte = 1;
-                      /*  Raw 8-bit code */
+                else            /* i.e. ASCII_CHAR_P (c) */
-                      c = BYTE8_TO_CHAR (c);
-                  }
-                else if (byterep > 1)
-                  {
-                    force_multibyte = 1;
-                    if (byterep == 2)
-                      c = BYTE8_TO_CHAR (c);
-                  }
-                else if (c >= 0x80)
-                  {
-                    force_singlebyte = 1;
-                    c = BYTE8_TO_CHAR (c);
-                  }
-                if (ASCII_CHAR_P (c))
                  {
                    /* Allow `\C- ' and `\C-?'.  */
                    if (modifiers == CHAR_CTL)
@@ -2264,34 +2366,10 @@ read1 (readcharfun, pch, first_in_list)
                  error ("Invalid modifier in string");
                p += CHAR_STRING (c, (unsigned char *) p);
              }
-            else if (c >= 0x80)
+            else
              {
-                if (EQ (readcharfun, Qget_file_char))
-                  {
-                    if (BASE_LEADING_CODE_P (c))
-                      {
-                        int nbytes;
-                        c = read_multibyte (c, readcharfun, &nbytes);
-                        if (nbytes > 1)
-                          force_multibyte = 1;
-                        else
-                          {
-                            force_singlebyte = 1;
-                            c = BYTE8_TO_CHAR (c);
-                          }
-                      }
-                    else
-                      {
-                        force_singlebyte = 1;
-                        c = BYTE8_TO_CHAR (c);
-                      }
-                  }
-                else
-                  force_multibyte = 1;
                p += CHAR_STRING (c, (unsigned char *) p);
              }
-            else
-              *p++ = c;
            nchars++;
          }
        if (c < 0)
@@ -2371,11 +2449,7 @@ read1 (readcharfun, pch, first_in_list)
                  quoted = 1;
                }
-              if (! SINGLE_BYTE_CHAR_P (c))
+              p += CHAR_STRING (c, p);
-                p += CHAR_STRING (c, p);
-              else
-                *p++ = c;
              c = READCHAR;
            }
@@ -2702,7 +2776,7 @@ read_vector (readcharfun, bytecodeflag)
                  XSTRING (bytestr)->size = STRING_BYTES (XSTRING (bytestr));
                  SET_STRING_BYTES (XSTRING (bytestr), -1);
-                  item = Fread (bytestr);
+                  item = Fread (Fcons (bytestr, readcharfun));
                  if (!CONSP (item))
                    error ("invalid byte code");
@@ -2715,6 +2789,15 @@ read_vector (readcharfun, bytecodeflag)
              /* Now handle the bytecode slot.  */
              ptr[COMPILED_BYTECODE] = read_pure ? Fpurecopy (bytestr) : bytestr;
            }
+          else if (i == COMPILED_DOC_STRING
+                   && STRINGP (item)
+                   && ! STRING_MULTIBYTE (item))
+            {
+              if (EQ (readcharfun, Qget_emacs_mule_file_char))
+                item = Fdecode_coding_string (item, Qemacs_mule, Qnil, Qnil);
+              else
+                item = Fstring_as_multibyte (item);
+            }
        }
      ptr[i] = read_pure ? Fpurecopy (item) : item;
      otem = XCONS (tem);
@@ -2812,7 +2895,14 @@ read_list (flag, readcharfun)
                  if (doc_reference == 2)
                    {
                      /* Get a doc string from the file we are loading.
-                         If it's in saved_doc_string, get it from there.  */
+                         If it's in saved_doc_string, get it from there.
+                         Here, we don't know if the string is a
+                         bytecode string or a doc string.  As a
+                         bytecode string must be unibyte, we always
+                         return a unibyte string.  If it is actually a
+                         doc string, caller must make it
+                         multibyte.  */
                      int pos = XINT (XCDR (val));
                      /* Position is negative for user variables.  */
                      if (pos < 0) pos = -pos;
@@ -2844,8 +2934,8 @@ read_list (flag, readcharfun)
                                saved_doc_string[to++] = c;
                            }
-                          return make_string (saved_doc_string + start,
+                          return make_unibyte_string (saved_doc_string + start,
-                                              to - start);
+                                                      to - start);
                        }
                      /* Look in prev_saved_doc_string the same way.  */
                      else if (pos >= prev_saved_doc_string_position
@@ -2876,11 +2966,12 @@ read_list (flag, readcharfun)
                                prev_saved_doc_string[to++] = c;
                            }
-                          return make_string (prev_saved_doc_string + start,
+                          return make_unibyte_string (prev_saved_doc_string
-                                              to - start);
+                                                      + start,
+                                                      to - start);
                        }
                      else
-                        return get_doc_string (val, 0, 0);
+                        return get_doc_string (val, 1, 0);
                    }
                  return val;
@@ -3764,6 +3855,12 @@ to load.  See also `load-dangerous-libraries'.  */);
  Qget_file_char = intern ("get-file-char");
  staticpro (&Qget_file_char);
+  Qget_emacs_mule_file_char = intern ("get-emacs-mule-file-char");
+  staticpro (&Qget_emacs_mule_file_char);
+  Qload_force_doc_strings = intern ("load-force-doc-strings");
+  staticpro (&Qload_force_doc_strings);
  Qbackquote = intern ("`");
  staticpro (&Qbackquote);
  Qcomma = intern (",");
author	Kenichi Handa	2002-07-24 10:51:15 +0000
committer	Kenichi Handa	2002-07-24 10:51:15 +0000
commit	8792be6679372e6ba9908123fb1cf81798b6fd61 (patch)
tree	70380692661ef6f5bfb1e2515f2e3f2f5c7248ff /src
parent	571407d6e005f973a75f9db8afcb3f92a203b9b1 (diff)
download	emacs-8792be6679372e6ba9908123fb1cf81798b6fd61.tar.gz emacs-8792be6679372e6ba9908123fb1cf81798b6fd61.zip