(Valternate_charset_table): The valiable deleted.

(Venable_character_unification, Vstandard_character_unification_table_for_read, Vstandard_character_unification_table_for_write, Qcharacter_unification_table): New variables. (syms_of_coding): Initialize and declare them. (DECODE_ISO_CHARACTER): Modified to handle a character unification table instead of Valternate_charset_table. (DECODE_DESIGNATION): Delete handling of Valternate_charset_table. (decode_coding_iso2022): Handle a character unification table. (ENCODE_ISO_CHARACTER): New macro. (encode_designation_at_bol): Handle a character unification table. Do not return -1 even if end-of-line is not in the current run. (encode_coding_iso2022): Handle a character unification table. Call macro ENCODE_ISO_CHARACTER instead of calling ENCODE_ISO_CHARACTER_DIMENSION1 and ENCODE_ISO_CHARACTER_DIMENSION2 directly. Check the size of carryover before copying it to destination. (setup_coding_system): Initialize the member character_unification_table of the struct coding system to Qnil. (Fset_keyboard_coding_system): Doc string augmented.
author: Kenichi Handa 1997-05-10 03:37:01 +0000
committer: Kenichi Handa 1997-05-10 03:37:01 +0000
commit: bdd9fb4867851e75fd60a4fe0100dc719a09c049 (patch)
tree: f2e9db7c535ba7cc954a134263ad74ca786d0ed6 /src/coding.c
parent: ceb5851081f93fe19e70072bb971cbd868f17e6f (diff)
download: emacs-bdd9fb4867851e75fd60a4fe0100dc719a09c049.tar.gz
emacs-bdd9fb4867851e75fd60a4fe0100dc719a09c049.zip
1 files changed, 152 insertions, 95 deletions
diff --git a/src/coding.c b/src/coding.c
index faae05d9136..bcc603a2c63 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -303,8 +303,15 @@ char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
  "coding-category-binary"
 };
-/* Alist of charsets vs the alternate charsets.  */
+/* Flag to tell if we look up unification table on character code
-Lisp_Object Valternate_charset_table;
+   conversion.  */
+Lisp_Object Venable_character_unification;
+/* Standard unification table to look up on reading (decoding).  */
+Lisp_Object Vstandard_character_unification_table_for_read;
+/* Standard unification table to look up on writing (encoding).  */
+Lisp_Object Vstandard_character_unification_table_for_write;
+Lisp_Object Qcharacter_unification_table;
 /* Alist of charsets vs revision number.  */
 Lisp_Object Vcharset_revision_alist;
@@ -650,44 +657,46 @@ detect_coding_iso2022 (src, src_end)
 }
 /* Decode a character of which charset is CHARSET and the 1st position
-   code is C1.  If dimension of CHARSET 2, the 2nd position code is
+   code is C1.  If dimension of CHARSET is 2, the 2nd position code is
   fetched from SRC and set to C2.  If CHARSET is negative, it means
   that we are decoding ill formed text, and what we can do is just to
   read C1 as is.  */
-#define DECODE_ISO_CHARACTER(charset, c1)                       \
+#define DECODE_ISO_CHARACTER(charset, c1)                               \
-  do {                                                          \
+  do {                                                                  \
-    if ((charset) >= 0 && CHARSET_DIMENSION (charset) == 2)     \
+    int c_alt, charset_alt = (charset);                                 \
-      ONE_MORE_BYTE (c2);                                       \
+    if (COMPOSING_HEAD_P (coding->composing))                           \
-    if (COMPOSING_HEAD_P (coding->composing))                   \
+      {                                                                 \
-      {                                                         \
+        *dst++ = LEADING_CODE_COMPOSITION;                              \
-        *dst++ = LEADING_CODE_COMPOSITION;                      \
+        if (COMPOSING_WITH_RULE_P (coding->composing))                  \
-        if (COMPOSING_WITH_RULE_P (coding->composing))          \
+          /* To tell composition rules are embeded.  */                 \
-          /* To tell composition rules are embeded.  */         \
+          *dst++ = 0xFF;                                                \
-          *dst++ = 0xFF;                                        \
+        coding->composing += 2;                                         \
-        coding->composing += 2;                                 \
+      }                                                                 \
-      }                                                         \
+    if ((charset) >= 0)                                                 \
-    if ((charset) < 0)                                          \
+      {                                                                 \
-      *dst++ = c1;                                              \
+        if (CHARSET_DIMENSION (charset) == 2)                           \
-    else if ((charset) == CHARSET_ASCII)                        \
+          ONE_MORE_BYTE (c2);                                           \
-      DECODE_CHARACTER_ASCII (c1);                              \
+        if (!NILP (unification_table)                                   \
-    else if (CHARSET_DIMENSION (charset) == 1)                  \
+            && ((c_alt = unify_char (unification_table,                 \
-      DECODE_CHARACTER_DIMENSION1 (charset, c1);                \
+                                     -1, (charset), c1, c2)) >= 0))     \
-    else                                                        \
+          SPLIT_CHAR (c_alt, charset_alt, c1, c2);                      \
-      DECODE_CHARACTER_DIMENSION2 (charset, c1, c2);            \
+      }                                                                 \
-    if (COMPOSING_WITH_RULE_P (coding->composing))              \
+    if (charset_alt == CHARSET_ASCII || charset_alt < 0)                \
-      /* To tell a composition rule follows.  */                \
+      DECODE_CHARACTER_ASCII (c1);                                      \
-      coding->composing = COMPOSING_WITH_RULE_RULE;             \
+    else if (CHARSET_DIMENSION (charset_alt) == 1)                      \
+      DECODE_CHARACTER_DIMENSION1 (charset_alt, c1);                    \
+    else                                                                \
+      DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2);                \
+    if (COMPOSING_WITH_RULE_P (coding->composing))                      \
+      /* To tell a composition rule follows.  */                        \
+      coding->composing = COMPOSING_WITH_RULE_RULE;                     \
  } while (0)
 /* Set designation state into CODING.  */
 #define DECODE_DESIGNATION(reg, dimension, chars, final_char)           \
  do {                                                                  \
    int charset = ISO_CHARSET_TABLE (dimension, chars, final_char);     \
-    Lisp_Object temp                                                    \
-      = Fassq (CHARSET_SYMBOL (charset), Valternate_charset_table);     \
-    if (! NILP (temp))                                                  \
-      charset = get_charset_id (XCONS (temp)->cdr);                     \
    if (charset >= 0)                                                   \
      {                                                                 \
        if (coding->direction == 1                                      \
@@ -719,6 +728,10 @@ decode_coding_iso2022 (coding, source, destination,
  /* Charsets invoked to graphic plane 0 and 1 respectively.  */
  int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
  int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
+  Lisp_Object unification_table = coding->character_unification_table;
+  if (!NILP (Venable_character_unification) && NILP (unification_table))
+    unification_table = Vstandard_character_unification_table_for_read;
  while (src < src_end && dst < adjusted_dst_end)
    {
@@ -728,7 +741,7 @@ decode_coding_iso2022 (coding, source, destination,
         ONE_MORE_BYTE or TWO_MORE_BYTES).  In that case, SRC is reset
         to SRC_BASE before exiting.  */
      unsigned char *src_base = src;
-      unsigned char c1 = *src++, c2, cmprule;
+      int c1 = *src++, c2;
      switch (iso_code_class [c1])
        {
@@ -1167,6 +1180,21 @@ decode_coding_iso2022 (coding, source, destination,
      dst = encode_invocation_designation (charset, coding, dst);       \
  } while (1)
+#define ENCODE_ISO_CHARACTER(charset, c1, c2)                             \
+  do {                                                                    \
+    int c_alt, charset_alt;                                               \
+    if (!NILP (unification_table)                                         \
+        && ((c_alt = unify_char (unification_table, -1, charset, c1, c2)) \
+            < 0))                                                         \
+      SPLIT_CHAR (c_alt, charset_alt, c1, c2);                            \
+    else                                                                  \
+      charset_alt = charset;                                              \
+    if (CHARSET_DIMENSION (charset_alt) == 1)                             \
+      ENCODE_ISO_CHARACTER_DIMENSION1 (charset_alt, c1);                  \
+    else                                                                  \
+      ENCODE_ISO_CHARACTER_DIMENSION2 (charset_alt, c1, c2);              \
+  } while (0)
 /* Produce designation and invocation codes at a place pointed by DST
   to use CHARSET.  The element `spec.iso2022' of *CODING is updated.
   Return new DST.  */
@@ -1266,48 +1294,57 @@ encode_invocation_designation (charset, coding, dst)
          (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
  } while (0)
-int
+/* Produce designation sequences of charsets in the line started from
-encode_designation_at_bol (coding, src, src_end, dstp)
+   *SRC to a place pointed by DSTP.
+   If the current block ends before any end-of-line, we may fail to
+   find all the necessary *designations.  */
+encode_designation_at_bol (coding, table, src, src_end, dstp)
     struct coding_system *coding;
+     Lisp_Object table;
     unsigned char *src, *src_end, **dstp;
 {
-  int charset, reg, r[4];
+  int charset, c, found = 0, reg;
-  unsigned char *dst = *dstp, c;
+  /* Table of charsets to be designated to each graphic register.  */
-  for (reg = 0; reg < 4; reg++) r[reg] = -1;
+  int r[4];
-  while (src < src_end && (c = *src++) != '\n')
+  unsigned char *dst = *dstp;
+  for (reg = 0; reg < 4; reg++)
+    r[reg] = -1;
+  while (src < src_end && *src != '\n' && found < 4)
    {
-      switch (emacs_code_class[c])
+      int bytes = BYTES_BY_CHAR_HEAD (*src);
+      
+      if (NILP (table))
+        charset = CHARSET_AT (src);
+      else
        {
-        case EMACS_ascii_code:
+          int c_alt, c1, c2;
-          charset = CHARSET_ASCII;
-          break;
+          SPLIT_STRING(src, bytes, charset, c1, c2);
-        case EMACS_leading_code_2:
+          if ((c_alt = unify_char (table, -1, charset, c1, c2)) >= 0)
-          if (++src >= src_end) continue;
+            charset = CHAR_CHARSET (c_alt);
-          charset = c;
-          break;
-        case EMACS_leading_code_3:
-          if ((src += 2) >= src_end) continue;
-          charset =  (c < LEADING_CODE_PRIVATE_11 ? c : *(src - 2));
-          break;
-        case EMACS_leading_code_4:
-          if ((src += 3) >= src_end) continue;
-          charset = *(src - 3);
-          break;
-        default:
-          continue;
        }
      reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
-      if (r[reg] < 0
+      if (r[reg] < 0)
-          && CODING_SPEC_ISO_DESIGNATION (coding, reg) != charset)
+        {
-        r[reg] = charset;
+          found++;
+          r[reg] = charset;
+        }
+      src += bytes;
+    }
+  if (found)
+    {
+      for (reg = 0; reg < 4; reg++)
+        if (r[reg] >= 0
+            && CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg])
+          ENCODE_DESIGNATION (r[reg], reg, coding);
+      *dstp = dst;
    }
-  if (c != '\n' && !coding->last_block)
-    return -1;
-  for (reg = 0; reg < 4; reg++)
-    if (r[reg] >= 0)
-      ENCODE_DESIGNATION (r[reg], reg, coding);
-  *dstp = dst;
-  return 0;
 }
 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
@@ -1328,6 +1365,10 @@ encode_coding_iso2022 (coding, source, destination,
     from DST_END to assure overflow checking is necessary only at the
     head of loop.  */
  unsigned char *adjusted_dst_end = dst_end - 19;
+  Lisp_Object unification_table = coding->character_unification_table;
+  if (!NILP (Venable_character_unification) && NILP (unification_table))
+    unification_table = Vstandard_character_unification_table_for_write;
  while (src < src_end && dst < adjusted_dst_end)
    {
@@ -1337,18 +1378,14 @@ encode_coding_iso2022 (coding, source, destination,
         TWO_MORE_BYTES, and THREE_MORE_BYTES).  In that case, SRC is
         reset to SRC_BASE before exiting.  */
      unsigned char *src_base = src;
-      unsigned char c1, c2, c3, c4;
+      int charset, c1, c2, c3, c4;
-      int charset;
      if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
          && CODING_SPEC_ISO_BOL (coding))
        {
-          /* We have to produce destination sequences now.  */
+          /* We have to produce designation sequences if any now.  */
-          if (encode_designation_at_bol (coding, src, src_end, &dst) < 0)
+          encode_designation_at_bol (coding, unification_table,
-            /* We can't find end of line in the current block.  Let's
+                                     src, src_end, &dst);
-             repeat encoding starting from the current position
-             pointed by SRC.  */
-            break;
          CODING_SPEC_ISO_BOL (coding) = 0;
        }
@@ -1393,7 +1430,7 @@ encode_coding_iso2022 (coding, source, destination,
      switch (emacs_code_class[c1])
        {
        case EMACS_ascii_code:
-          ENCODE_ISO_CHARACTER_DIMENSION1 (CHARSET_ASCII, c1);
+          ENCODE_ISO_CHARACTER (CHARSET_ASCII, c1, /* dummy */ c2);
          break;
        case EMACS_control_code:
@@ -1431,20 +1468,20 @@ encode_coding_iso2022 (coding, source, destination,
        case EMACS_leading_code_2:
          ONE_MORE_BYTE (c2);
-          ENCODE_ISO_CHARACTER_DIMENSION1 (c1, c2);
+          ENCODE_ISO_CHARACTER (c1, c2, /* dummy */ c3);
          break;
        case EMACS_leading_code_3:
          TWO_MORE_BYTES (c2, c3);
          if (c1 < LEADING_CODE_PRIVATE_11)
-            ENCODE_ISO_CHARACTER_DIMENSION2 (c1, c2, c3);
+            ENCODE_ISO_CHARACTER (c1, c2, c3);
          else
-            ENCODE_ISO_CHARACTER_DIMENSION1 (c2, c3);
+            ENCODE_ISO_CHARACTER (c2, c3, /* dummy */ c4);
          break;
        case EMACS_leading_code_4:
          THREE_MORE_BYTES (c2, c3, c4);
-          ENCODE_ISO_CHARACTER_DIMENSION2 (c2, c3, c4);
+          ENCODE_ISO_CHARACTER (c2, c3, c4);
          break;
        case EMACS_leading_code_composition:
@@ -1472,20 +1509,21 @@ encode_coding_iso2022 (coding, source, destination,
    label_end_of_loop:
      coding->carryover_size = src - src_base;
      bcopy (src_base, coding->carryover, coding->carryover_size);
-      src = src_base;
      break;
    }
  /* If this is the last block of the text to be encoded, we must
-     reset the state of graphic planes and registers to initial one.
+     reset graphic planes and registers to the initial state.  */
-     In addition, we had better just flush out all remaining codes in
+  if (src >= src_end && coding->last_block)
-     the text although they are not valid characters.  */
-  if (coding->last_block)
    {
      ENCODE_RESET_PLANE_AND_REGISTER;
-      bcopy(src, dst, src_end - src);
+      if (coding->carryover_size > 0
-      dst += (src_end - src);
+          && coding->carryover_size < (dst_end - dst))
-      src = src_end;
+        {
+          bcopy (coding->carryover, dst, coding->carryover_size);
+          dst += coding->carryover_size;
+          coding->carryover_size = 0;
+        }
    }
  *consumed = src - source;
  return dst - destination;
@@ -2063,6 +2101,9 @@ setup_coding_system (coding_system, coding)
  coding->direction = 0;
  coding->carryover_size = 0;
  coding->post_read_conversion = coding->pre_write_conversion = Qnil;
+  /* We have not yet implemented a way to specify unification table in
+     a coding system.  */
+  coding->character_unification_table = Qnil;
  Vlast_coding_system_used = coding->symbol = coding_system;
  eol_type = Qnil;
@@ -3316,10 +3357,13 @@ DEFUN ("terminal-coding-system",
 }
 DEFUN ("set-keyboard-coding-system",
-       Fset_keyboard_coding_system, Sset_keyboard_coding_system, 1, 1,
+       Fset_keyboard_coding_system, Sset_keyboard_coding_system, 1, 1, 0,
-       "zCoding-system for keyboard input: ",
+  "Set coding-system of codes sent from terminal keyboard to CODING-SYSTEM.\n\
-  "Set coding-system of what is sent from terminal keyboard to CODING-SYSTEM.\n\
+In Encoded-kbd minor mode, user inputs are decoded\n\
-All inputs from terminal are decoded from this coding-system.")
+accoding to CODING-SYSTEM.\n\
+Do not call this function directly, but use the command\n\
+encoded-kbd-set-coding-system to activate Encoded-kbd mode\n\
+with a specific coding system.")
  (coding_system)
     Lisp_Object coding_system;
 {
@@ -3529,6 +3573,11 @@ syms_of_coding ()
      }
  }
+  Qcharacter_unification_table = intern ("character-unification-table");
+  staticpro (&Qcharacter_unification_table);
+  Fput (Qcharacter_unification_table, Qchar_table_extra_slots,
+        make_number (0));
  defsubr (&Scoding_system_vector);
  defsubr (&Scoding_system_p);
  defsubr (&Sread_coding_system);
@@ -3613,11 +3662,19 @@ See the documentation of `find-coding-system' for more detail.");
    "Mnemonic character indicating end-of-line format is not yet decided.");
  eol_mnemonic_undecided = '-';
-  DEFVAR_LISP ("alternate-charset-table", &Valternate_charset_table,
+  DEFVAR_LISP ("enable-character-unification", &Venable_character_unification,
-    "Alist of charsets vs the alternate charsets.\n\
+    "Non-nil means ISO 2022 encoder/decoder do character unification.");
-While decoding, if a charset (car part of an element) is found,\n\
+  Venable_character_unification = Qt;
-decode it as the alternate charset (cdr part of the element).");
-  Valternate_charset_table = Qnil;
+  DEFVAR_LISP ("standard-character-unification-table-for-read",
+    &Vstandard_character_unification_table_for_read,
+    "Table for unifying characters when reading.");
+  Vstandard_character_unification_table_for_read = Qnil;
+  DEFVAR_LISP ("standard-character-unification-table-for-write",
+    &Vstandard_character_unification_table_for_write,
+    "Table for unifying characters when writing.");
+  Vstandard_character_unification_table_for_write = Qnil;
  DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
    "Alist of charsets vs revision numbers.\n\
author	Kenichi Handa	1997-05-10 03:37:01 +0000
committer	Kenichi Handa	1997-05-10 03:37:01 +0000
commit	bdd9fb4867851e75fd60a4fe0100dc719a09c049 (patch)
tree	f2e9db7c535ba7cc954a134263ad74ca786d0ed6 /src/coding.c
parent	ceb5851081f93fe19e70072bb971cbd868f17e6f (diff)
download	emacs-bdd9fb4867851e75fd60a4fe0100dc719a09c049.tar.gz emacs-bdd9fb4867851e75fd60a4fe0100dc719a09c049.zip

diff --git a/src/coding.c b/src/coding.c index faae05d9136..bcc603a2c63 100644 --- a/src/coding.c +++ b/src/coding.c
@@ -303,8 +303,15 @@ char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
303	"coding-category-binary"	303	"coding-category-binary"
304	};	304	};
305		305
306	/* Alist of charsets vs the alternate charsets. */	306	/* Flag to tell if we look up unification table on character code
307	Lisp_Object Valternate_charset_table;	307	conversion. */
		308	Lisp_Object Venable_character_unification;
		309	/* Standard unification table to look up on reading (decoding). */
		310	Lisp_Object Vstandard_character_unification_table_for_read;
		311	/* Standard unification table to look up on writing (encoding). */
		312	Lisp_Object Vstandard_character_unification_table_for_write;
		313
		314	Lisp_Object Qcharacter_unification_table;
308		315
309	/* Alist of charsets vs revision number. */	316	/* Alist of charsets vs revision number. */
310	Lisp_Object Vcharset_revision_alist;	317	Lisp_Object Vcharset_revision_alist;
@@ -650,44 +657,46 @@ detect_coding_iso2022 (src, src_end)
650	}	657	}
651		658
652	/* Decode a character of which charset is CHARSET and the 1st position	659	/* Decode a character of which charset is CHARSET and the 1st position
653	code is C1. If dimension of CHARSET 2, the 2nd position code is	660	code is C1. If dimension of CHARSET is 2, the 2nd position code is
654	fetched from SRC and set to C2. If CHARSET is negative, it means	661	fetched from SRC and set to C2. If CHARSET is negative, it means
655	that we are decoding ill formed text, and what we can do is just to	662	that we are decoding ill formed text, and what we can do is just to
656	read C1 as is. */	663	read C1 as is. */
657		664
658	#define DECODE_ISO_CHARACTER(charset, c1) \	665	#define DECODE_ISO_CHARACTER(charset, c1) \
659	do { \	666	do { \
660	if ((charset) >= 0 && CHARSET_DIMENSION (charset) == 2) \	667	int c_alt, charset_alt = (charset); \
661	ONE_MORE_BYTE (c2); \	668	if (COMPOSING_HEAD_P (coding->composing)) \
662	if (COMPOSING_HEAD_P (coding->composing)) \	669	{ \
663	{ \	670	*dst++ = LEADING_CODE_COMPOSITION; \
664	*dst++ = LEADING_CODE_COMPOSITION; \	671	if (COMPOSING_WITH_RULE_P (coding->composing)) \
665	if (COMPOSING_WITH_RULE_P (coding->composing)) \	672	/* To tell composition rules are embeded. */ \
666	/* To tell composition rules are embeded. */ \	673	*dst++ = 0xFF; \
667	*dst++ = 0xFF; \	674	coding->composing += 2; \
668	coding->composing += 2; \	675	} \
669	} \	676	if ((charset) >= 0) \
670	if ((charset) < 0) \	677	{ \
671	*dst++ = c1; \	678	if (CHARSET_DIMENSION (charset) == 2) \
672	else if ((charset) == CHARSET_ASCII) \	679	ONE_MORE_BYTE (c2); \
673	DECODE_CHARACTER_ASCII (c1); \	680	if (!NILP (unification_table) \
674	else if (CHARSET_DIMENSION (charset) == 1) \	681	&& ((c_alt = unify_char (unification_table, \
675	DECODE_CHARACTER_DIMENSION1 (charset, c1); \	682	-1, (charset), c1, c2)) >= 0)) \
676	else \	683	SPLIT_CHAR (c_alt, charset_alt, c1, c2); \
677	DECODE_CHARACTER_DIMENSION2 (charset, c1, c2); \	684	} \
678	if (COMPOSING_WITH_RULE_P (coding->composing)) \	685	if (charset_alt == CHARSET_ASCII \|\| charset_alt < 0) \
679	/* To tell a composition rule follows. */ \	686	DECODE_CHARACTER_ASCII (c1); \
680	coding->composing = COMPOSING_WITH_RULE_RULE; \	687	else if (CHARSET_DIMENSION (charset_alt) == 1) \
		688	DECODE_CHARACTER_DIMENSION1 (charset_alt, c1); \
		689	else \
		690	DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2); \
		691	if (COMPOSING_WITH_RULE_P (coding->composing)) \
		692	/* To tell a composition rule follows. */ \
		693	coding->composing = COMPOSING_WITH_RULE_RULE; \
681	} while (0)	694	} while (0)
682		695
683	/* Set designation state into CODING. */	696	/* Set designation state into CODING. */
684	#define DECODE_DESIGNATION(reg, dimension, chars, final_char) \	697	#define DECODE_DESIGNATION(reg, dimension, chars, final_char) \
685	do { \	698	do { \
686	int charset = ISO_CHARSET_TABLE (dimension, chars, final_char); \	699	int charset = ISO_CHARSET_TABLE (dimension, chars, final_char); \
687	Lisp_Object temp \
688	= Fassq (CHARSET_SYMBOL (charset), Valternate_charset_table); \
689	if (! NILP (temp)) \
690	charset = get_charset_id (XCONS (temp)->cdr); \
691	if (charset >= 0) \	700	if (charset >= 0) \
692	{ \	701	{ \
693	if (coding->direction == 1 \	702	if (coding->direction == 1 \
@@ -719,6 +728,10 @@ decode_coding_iso2022 (coding, source, destination,
719	/* Charsets invoked to graphic plane 0 and 1 respectively. */	728	/* Charsets invoked to graphic plane 0 and 1 respectively. */
720	int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);	729	int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
721	int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);	730	int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
		731	Lisp_Object unification_table = coding->character_unification_table;
		732
		733	if (!NILP (Venable_character_unification) && NILP (unification_table))
		734	unification_table = Vstandard_character_unification_table_for_read;
722		735
723	while (src < src_end && dst < adjusted_dst_end)	736	while (src < src_end && dst < adjusted_dst_end)
724	{	737	{
@@ -728,7 +741,7 @@ decode_coding_iso2022 (coding, source, destination,
728	ONE_MORE_BYTE or TWO_MORE_BYTES). In that case, SRC is reset	741	ONE_MORE_BYTE or TWO_MORE_BYTES). In that case, SRC is reset
729	to SRC_BASE before exiting. */	742	to SRC_BASE before exiting. */
730	unsigned char *src_base = src;	743	unsigned char *src_base = src;
731	unsigned char c1 = *src++, c2, cmprule;	744	int c1 = *src++, c2;
732		745
733	switch (iso_code_class [c1])	746	switch (iso_code_class [c1])
734	{	747	{
@@ -1167,6 +1180,21 @@ decode_coding_iso2022 (coding, source, destination,
1167	dst = encode_invocation_designation (charset, coding, dst); \	1180	dst = encode_invocation_designation (charset, coding, dst); \
1168	} while (1)	1181	} while (1)
1169		1182
		1183	#define ENCODE_ISO_CHARACTER(charset, c1, c2) \
		1184	do { \
		1185	int c_alt, charset_alt; \
		1186	if (!NILP (unification_table) \
		1187	&& ((c_alt = unify_char (unification_table, -1, charset, c1, c2)) \
		1188	< 0)) \
		1189	SPLIT_CHAR (c_alt, charset_alt, c1, c2); \
		1190	else \
		1191	charset_alt = charset; \
		1192	if (CHARSET_DIMENSION (charset_alt) == 1) \
		1193	ENCODE_ISO_CHARACTER_DIMENSION1 (charset_alt, c1); \
		1194	else \
		1195	ENCODE_ISO_CHARACTER_DIMENSION2 (charset_alt, c1, c2); \
		1196	} while (0)
		1197
1170	/* Produce designation and invocation codes at a place pointed by DST	1198	/* Produce designation and invocation codes at a place pointed by DST
1171	to use CHARSET. The element `spec.iso2022' of *CODING is updated.	1199	to use CHARSET. The element `spec.iso2022' of *CODING is updated.
1172	Return new DST. */	1200	Return new DST. */
@@ -1266,48 +1294,57 @@ encode_invocation_designation (charset, coding, dst)
1266	(CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \	1294	(CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
1267	} while (0)	1295	} while (0)
1268		1296
1269	int	1297	/* Produce designation sequences of charsets in the line started from
1270	encode_designation_at_bol (coding, src, src_end, dstp)	1298	*SRC to a place pointed by DSTP.
		1299
		1300	If the current block ends before any end-of-line, we may fail to
		1301	find all the necessary designations. /
		1302	encode_designation_at_bol (coding, table, src, src_end, dstp)
1271	struct coding_system *coding;	1303	struct coding_system *coding;
		1304	Lisp_Object table;
1272	unsigned char src, src_end, **dstp;	1305	unsigned char src, src_end, **dstp;
1273	{	1306	{
1274	int charset, reg, r[4];	1307	int charset, c, found = 0, reg;
1275	unsigned char dst = dstp, c;	1308	/* Table of charsets to be designated to each graphic register. */
1276	for (reg = 0; reg < 4; reg++) r[reg] = -1;	1309	int r[4];
1277	while (src < src_end && (c = *src++) != '\n')	1310	unsigned char dst = dstp;
		1311
		1312	for (reg = 0; reg < 4; reg++)
		1313	r[reg] = -1;
		1314
		1315	while (src < src_end && *src != '\n' && found < 4)
1278	{	1316	{
1279	switch (emacs_code_class[c])	1317	int bytes = BYTES_BY_CHAR_HEAD (*src);
		1318
		1319	if (NILP (table))
		1320	charset = CHARSET_AT (src);
		1321	else
1280	{	1322	{
1281	case EMACS_ascii_code:	1323	int c_alt, c1, c2;
1282	charset = CHARSET_ASCII;	1324
1283	break;	1325	SPLIT_STRING(src, bytes, charset, c1, c2);
1284	case EMACS_leading_code_2:	1326	if ((c_alt = unify_char (table, -1, charset, c1, c2)) >= 0)
1285	if (++src >= src_end) continue;	1327	charset = CHAR_CHARSET (c_alt);
1286	charset = c;
1287	break;
1288	case EMACS_leading_code_3:
1289	if ((src += 2) >= src_end) continue;
1290	charset = (c < LEADING_CODE_PRIVATE_11 ? c : *(src - 2));
1291	break;
1292	case EMACS_leading_code_4:
1293	if ((src += 3) >= src_end) continue;
1294	charset = *(src - 3);
1295	break;
1296	default:
1297	continue;
1298	}	1328	}
		1329
1299	reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);	1330	reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
1300	if (r[reg] < 0	1331	if (r[reg] < 0)
1301	&& CODING_SPEC_ISO_DESIGNATION (coding, reg) != charset)	1332	{
1302	r[reg] = charset;	1333	found++;
		1334	r[reg] = charset;
		1335	}
		1336
		1337	src += bytes;
		1338	}
		1339
		1340	if (found)
		1341	{
		1342	for (reg = 0; reg < 4; reg++)
		1343	if (r[reg] >= 0
		1344	&& CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg])
		1345	ENCODE_DESIGNATION (r[reg], reg, coding);
		1346	*dstp = dst;
1303	}	1347	}
1304	if (c != '\n' && !coding->last_block)
1305	return -1;
1306	for (reg = 0; reg < 4; reg++)
1307	if (r[reg] >= 0)
1308	ENCODE_DESIGNATION (r[reg], reg, coding);
1309	*dstp = dst;
1310	return 0;
1311	}	1348	}
1312		1349
1313	/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */	1350	/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
@@ -1328,6 +1365,10 @@ encode_coding_iso2022 (coding, source, destination,
1328	from DST_END to assure overflow checking is necessary only at the	1365	from DST_END to assure overflow checking is necessary only at the
1329	head of loop. */	1366	head of loop. */
1330	unsigned char *adjusted_dst_end = dst_end - 19;	1367	unsigned char *adjusted_dst_end = dst_end - 19;
		1368	Lisp_Object unification_table = coding->character_unification_table;
		1369
		1370	if (!NILP (Venable_character_unification) && NILP (unification_table))
		1371	unification_table = Vstandard_character_unification_table_for_write;
1331		1372
1332	while (src < src_end && dst < adjusted_dst_end)	1373	while (src < src_end && dst < adjusted_dst_end)
1333	{	1374	{
@@ -1337,18 +1378,14 @@ encode_coding_iso2022 (coding, source, destination,
1337	TWO_MORE_BYTES, and THREE_MORE_BYTES). In that case, SRC is	1378	TWO_MORE_BYTES, and THREE_MORE_BYTES). In that case, SRC is
1338	reset to SRC_BASE before exiting. */	1379	reset to SRC_BASE before exiting. */
1339	unsigned char *src_base = src;	1380	unsigned char *src_base = src;
1340	unsigned char c1, c2, c3, c4;	1381	int charset, c1, c2, c3, c4;
1341	int charset;
1342		1382
1343	if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL	1383	if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
1344	&& CODING_SPEC_ISO_BOL (coding))	1384	&& CODING_SPEC_ISO_BOL (coding))
1345	{	1385	{
1346	/* We have to produce destination sequences now. */	1386	/* We have to produce designation sequences if any now. */
1347	if (encode_designation_at_bol (coding, src, src_end, &dst) < 0)	1387	encode_designation_at_bol (coding, unification_table,
1348	/* We can't find end of line in the current block. Let's	1388	src, src_end, &dst);
1349	repeat encoding starting from the current position
1350	pointed by SRC. */
1351	break;
1352	CODING_SPEC_ISO_BOL (coding) = 0;	1389	CODING_SPEC_ISO_BOL (coding) = 0;
1353	}	1390	}
1354		1391
@@ -1393,7 +1430,7 @@ encode_coding_iso2022 (coding, source, destination,
1393	switch (emacs_code_class[c1])	1430	switch (emacs_code_class[c1])
1394	{	1431	{
1395	case EMACS_ascii_code:	1432	case EMACS_ascii_code:
1396	ENCODE_ISO_CHARACTER_DIMENSION1 (CHARSET_ASCII, c1);	1433	ENCODE_ISO_CHARACTER (CHARSET_ASCII, c1, /* dummy */ c2);
1397	break;	1434	break;
1398		1435
1399	case EMACS_control_code:	1436	case EMACS_control_code:
@@ -1431,20 +1468,20 @@ encode_coding_iso2022 (coding, source, destination,
1431		1468
1432	case EMACS_leading_code_2:	1469	case EMACS_leading_code_2:
1433	ONE_MORE_BYTE (c2);	1470	ONE_MORE_BYTE (c2);
1434	ENCODE_ISO_CHARACTER_DIMENSION1 (c1, c2);	1471	ENCODE_ISO_CHARACTER (c1, c2, /* dummy */ c3);
1435	break;	1472	break;
1436		1473
1437	case EMACS_leading_code_3:	1474	case EMACS_leading_code_3:
1438	TWO_MORE_BYTES (c2, c3);	1475	TWO_MORE_BYTES (c2, c3);
1439	if (c1 < LEADING_CODE_PRIVATE_11)	1476	if (c1 < LEADING_CODE_PRIVATE_11)
1440	ENCODE_ISO_CHARACTER_DIMENSION2 (c1, c2, c3);	1477	ENCODE_ISO_CHARACTER (c1, c2, c3);
1441	else	1478	else
1442	ENCODE_ISO_CHARACTER_DIMENSION1 (c2, c3);	1479	ENCODE_ISO_CHARACTER (c2, c3, /* dummy */ c4);
1443	break;	1480	break;
1444		1481
1445	case EMACS_leading_code_4:	1482	case EMACS_leading_code_4:
1446	THREE_MORE_BYTES (c2, c3, c4);	1483	THREE_MORE_BYTES (c2, c3, c4);
1447	ENCODE_ISO_CHARACTER_DIMENSION2 (c2, c3, c4);	1484	ENCODE_ISO_CHARACTER (c2, c3, c4);
1448	break;	1485	break;
1449		1486
1450	case EMACS_leading_code_composition:	1487	case EMACS_leading_code_composition:
@@ -1472,20 +1509,21 @@ encode_coding_iso2022 (coding, source, destination,
1472	label_end_of_loop:	1509	label_end_of_loop:
1473	coding->carryover_size = src - src_base;	1510	coding->carryover_size = src - src_base;
1474	bcopy (src_base, coding->carryover, coding->carryover_size);	1511	bcopy (src_base, coding->carryover, coding->carryover_size);
1475	src = src_base;
1476	break;	1512	break;
1477	}	1513	}
1478		1514
1479	/* If this is the last block of the text to be encoded, we must	1515	/* If this is the last block of the text to be encoded, we must
1480	reset the state of graphic planes and registers to initial one.	1516	reset graphic planes and registers to the initial state. */
1481	In addition, we had better just flush out all remaining codes in	1517	if (src >= src_end && coding->last_block)
1482	the text although they are not valid characters. */
1483	if (coding->last_block)
1484	{	1518	{
1485	ENCODE_RESET_PLANE_AND_REGISTER;	1519	ENCODE_RESET_PLANE_AND_REGISTER;
1486	bcopy(src, dst, src_end - src);	1520	if (coding->carryover_size > 0
1487	dst += (src_end - src);	1521	&& coding->carryover_size < (dst_end - dst))
1488	src = src_end;	1522	{
		1523	bcopy (coding->carryover, dst, coding->carryover_size);
		1524	dst += coding->carryover_size;
		1525	coding->carryover_size = 0;
		1526	}
1489	}	1527	}
1490	*consumed = src - source;	1528	*consumed = src - source;
1491	return dst - destination;	1529	return dst - destination;
@@ -2063,6 +2101,9 @@ setup_coding_system (coding_system, coding)
2063	coding->direction = 0;	2101	coding->direction = 0;
2064	coding->carryover_size = 0;	2102	coding->carryover_size = 0;
2065	coding->post_read_conversion = coding->pre_write_conversion = Qnil;	2103	coding->post_read_conversion = coding->pre_write_conversion = Qnil;
		2104	/* We have not yet implemented a way to specify unification table in
		2105	a coding system. */
		2106	coding->character_unification_table = Qnil;
2066		2107
2067	Vlast_coding_system_used = coding->symbol = coding_system;	2108	Vlast_coding_system_used = coding->symbol = coding_system;
2068	eol_type = Qnil;	2109	eol_type = Qnil;
@@ -3316,10 +3357,13 @@ DEFUN ("terminal-coding-system",
3316	}	3357	}
3317		3358
3318	DEFUN ("set-keyboard-coding-system",	3359	DEFUN ("set-keyboard-coding-system",
3319	Fset_keyboard_coding_system, Sset_keyboard_coding_system, 1, 1,	3360	Fset_keyboard_coding_system, Sset_keyboard_coding_system, 1, 1, 0,
3320	"zCoding-system for keyboard input: ",	3361	"Set coding-system of codes sent from terminal keyboard to CODING-SYSTEM.\n\
3321	"Set coding-system of what is sent from terminal keyboard to CODING-SYSTEM.\n\	3362	In Encoded-kbd minor mode, user inputs are decoded\n\
3322	All inputs from terminal are decoded from this coding-system.")	3363	accoding to CODING-SYSTEM.\n\
		3364	Do not call this function directly, but use the command\n\
		3365	encoded-kbd-set-coding-system to activate Encoded-kbd mode\n\
		3366	with a specific coding system.")
3323	(coding_system)	3367	(coding_system)
3324	Lisp_Object coding_system;	3368	Lisp_Object coding_system;
3325	{	3369	{
@@ -3529,6 +3573,11 @@ syms_of_coding ()
3529	}	3573	}
3530	}	3574	}
3531		3575
		3576	Qcharacter_unification_table = intern ("character-unification-table");
		3577	staticpro (&Qcharacter_unification_table);
		3578	Fput (Qcharacter_unification_table, Qchar_table_extra_slots,
		3579	make_number (0));
		3580
3532	defsubr (&Scoding_system_vector);	3581	defsubr (&Scoding_system_vector);
3533	defsubr (&Scoding_system_p);	3582	defsubr (&Scoding_system_p);
3534	defsubr (&Sread_coding_system);	3583	defsubr (&Sread_coding_system);
@@ -3613,11 +3662,19 @@ See the documentation of `find-coding-system' for more detail.");
3613	"Mnemonic character indicating end-of-line format is not yet decided.");	3662	"Mnemonic character indicating end-of-line format is not yet decided.");
3614	eol_mnemonic_undecided = '-';	3663	eol_mnemonic_undecided = '-';
3615		3664
3616	DEFVAR_LISP ("alternate-charset-table", &Valternate_charset_table,	3665	DEFVAR_LISP ("enable-character-unification", &Venable_character_unification,
3617	"Alist of charsets vs the alternate charsets.\n\	3666	"Non-nil means ISO 2022 encoder/decoder do character unification.");
3618	While decoding, if a charset (car part of an element) is found,\n\	3667	Venable_character_unification = Qt;
3619	decode it as the alternate charset (cdr part of the element).");	3668
3620	Valternate_charset_table = Qnil;	3669	DEFVAR_LISP ("standard-character-unification-table-for-read",
		3670	&Vstandard_character_unification_table_for_read,
		3671	"Table for unifying characters when reading.");
		3672	Vstandard_character_unification_table_for_read = Qnil;
		3673
		3674	DEFVAR_LISP ("standard-character-unification-table-for-write",
		3675	&Vstandard_character_unification_table_for_write,
		3676	"Table for unifying characters when writing.");
		3677	Vstandard_character_unification_table_for_write = Qnil;
3621		3678
3622	DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,	3679	DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
3623	"Alist of charsets vs revision numbers.\n\	3680	"Alist of charsets vs revision numbers.\n\