(MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2

are negative. (MAKE_CHAR): Don't set MSBs of C1 and C2 to 0. (VALID_MULTIBYTE_CHAR_P): This macro deleted. (PARSE_COMPOSITE_SEQ): New macro. (PARSE_CHARACTER_SEQ): New macro. (PARSE_MULTIBYTE_SEQ): New macro. (CHAR_PRINTABLE_P): New macro. (STRING_CHAR): Adjusted for the change of string_to_non_ascii_char. (STRING_CHAR_AND_LENGTH): Likewise. (STRING_CHAR_AND_CHAR_LENGTH): Define it as STRING_CHAR_AND_LENGTH. (INC_POS): Use the macro PARSE_MULTIBYTE_SEQ. (DEC_POS, BUF_INC_POS, BUF_DEC_POS): Likewise,
author: Kenichi Handa 1999-09-03 01:28:42 +0000
committer: Kenichi Handa 1999-09-03 01:28:42 +0000
commit: 384107f281bd6a43a4c66e9bbb1826e1bc8cec05 (patch)
tree: 9d31c1a4424483e5165f51a2f553b402513bc19e /src
parent: a260318815a268ae273669e3116274732e1f1945 (diff)
download: emacs-384107f281bd6a43a4c66e9bbb1826e1bc8cec05.tar.gz
emacs-384107f281bd6a43a4c66e9bbb1826e1bc8cec05.zip
1 files changed, 149 insertions, 102 deletions
diff --git a/src/charset.h b/src/charset.h
index 1b2ac59bec5..775b755c372 100644
--- a/src/charset.h
+++ b/src/charset.h
@@ -376,19 +376,19 @@ extern Lisp_Object Vcharset_symbol_table;
 #define CHARSET_SYMBOL(charset) \
  XVECTOR (Vcharset_symbol_table)->contents[charset]
-/* 1 if CHARSET is valid, else 0.  */
+/* 1 if CHARSET is in valid value range, else 0.  */
 #define CHARSET_VALID_P(charset)                                         \
  ((charset) == 0                                                        \
   || ((charset) >= 0x80 && (charset) <= MAX_CHARSET_OFFICIAL_DIMENSION2) \
   || ((charset) >= MIN_CHARSET_PRIVATE_DIMENSION1 && (charset) <= MAX_CHARSET))
-/* 1 if CHARSET is already defined, else 0.  */
+/* 1 if CHARSET is already defined (and not CHARSET_COMPOSITION), else 0.  */
 #define CHARSET_DEFINED_P(charset)                      \
  (((charset) >= 0) && ((charset) <= MAX_CHARSET)       \
   && !NILP (CHARSET_TABLE_ENTRY (charset)))
 /* Since the information CHARSET-BYTES and CHARSET-WIDTH of
-   Vcharset_table can be retrieved only from the first byte of
+   Vcharset_table can be retrieved only the first byte of
   multi-byte form (an ASCII code or a base leading-code), we provide
   here tables to be used by macros BYTES_BY_CHAR_HEAD and
   WIDTH_BY_CHAR_HEAD for faster information retrieval.  */
@@ -451,12 +451,16 @@ extern int width_by_char_head[256];
   position-codes are C1 and C2.  DIMENSION1 character ignores C2.  */
 #define MAKE_NON_ASCII_CHAR(charset, c1, c2)                            \
  ((charset) == CHARSET_COMPOSITION                                     \
-   ? MAKE_COMPOSITE_CHAR (((c1) << 7) + (c2))                           \
+   ? ((c2) < 0                                                          \
+      ? (((charset) - 0x70) << 7) + (c1)                                \
+      : MAKE_COMPOSITE_CHAR (((c1) << 7) + (c2)))                       \
   : (! CHARSET_DEFINED_P (charset) || CHARSET_DIMENSION (charset) == 1 \
-      ? (((charset) - 0x70) << 7) | (c1)                                \
+      ? (((charset) - 0x70) << 7) | ((c1) <= 0 ? 0 : (c1))              \
      : ((charset) < MIN_CHARSET_PRIVATE_DIMENSION2                     \
-         ? (((charset) - 0x8F) << 14) | ((c1) << 7) | (c2)              \
+         ? ((((charset) - 0x8F) << 14)                                  \
-         : (((charset) - 0xE0) << 14) | ((c1) << 7) | (c2))))
+            | ((c1) <= 0 ? 0 : ((c1) << 7)) | ((c2) <= 0 ? 0 : (c2)))   \
+         : ((((charset) - 0xE0) << 14)                                  \
+            | ((c1) <= 0 ? 0 : ((c1) << 7)) | ((c2) <= 0 ? 0 : (c2))))))
 /* Return a composite character of which CMPCHAR-ID is ID.  */
 #define MAKE_COMPOSITE_CHAR(id) (MIN_CHAR_COMPOSITION + (id))
@@ -466,10 +470,10 @@ extern int width_by_char_head[256];
 /* Return a character of which charset is CHARSET and position-codes
   are C1 and C2.  DIMENSION1 character ignores C2.  */
-#define MAKE_CHAR(charset, c1, c2)                      \
+#define MAKE_CHAR(charset, c1, c2)      \
-  ((charset) == CHARSET_ASCII                           \
+  ((charset) == CHARSET_ASCII           \
-   ? (c1)                                               \
+   ? (c1)                               \
-   : MAKE_NON_ASCII_CHAR ((charset), (c1) & 0x7F, (c2) & 0x7F))
+   : MAKE_NON_ASCII_CHAR ((charset), (c1), (c2)))
 /* If GENERICP is nonzero, return nonzero iff C is a valid normal or
   generic character.  If GENERICP is zero, return nonzero iff C is a
@@ -484,22 +488,43 @@ extern int width_by_char_head[256];
 #define DEFAULT_NONASCII_INSERT_OFFSET 0x800
-/* Check if the character C is valid as a multibyte character.  */
+/* Parse string STR of length LENGTH (>= 2) and check if a composite
+   character is at STR.  Actually, the whole multibyte sequence
-#define VALID_MULTIBYTE_CHAR_P(c)                                         \
+   starting with LEADING_CODE_COMPOSITION is treated as a single
-  ((c) < MIN_CHAR_OFFICIAL_DIMENSION2                                     \
+   multibyte character.  So, here, we just set BYTES to LENGTH.  */
-   ? (!NILP (XCHAR_TABLE (Vcharset_table)->contents[CHAR_FIELD2 (c)       \
-                                                   + 0xF0])               \
+#define PARSE_COMPOSITE_SEQ(str, length, bytes) \
-      && CHAR_FIELD3 (c) >= 32)                                           \
+  do {                                          \
-   : ((c) < MIN_CHAR_PRIVATE_DIMENSION2                                   \
+    (bytes) = (length);                         \
-      ? (!NILP (XCHAR_TABLE (Vcharset_table)->contents[CHAR_FIELD1 (c)    \
+  } while (0)
-                                                      + 0x10F])           \
-         && CHAR_FIELD2 (c) >= 32 && CHAR_FIELD3 (c) >= 32)               \
-      : ((c) < MIN_CHAR_COMPOSITION                                       \
+/* Parse string STR of length LENGTH (>= 2) and check if a
-         ? (!NILP (XCHAR_TABLE (Vcharset_table)->contents[CHAR_FIELD1 (c) \
+   non-composite multibyte character is at STR.  Set BYTES to the
-                                                         + 0x160])        \
+   actual sequence length.  */
-            && CHAR_FIELD2 (c) >= 32 && CHAR_FIELD3 (c) >= 32)            \
-         : (c) < MIN_CHAR_COMPOSITION + n_cmpchars)))
+#define PARSE_CHARACTER_SEQ(str, length, bytes) \
+  do {                                          \
+    (bytes) = BYTES_BY_CHAR_HEAD ((str)[0]);    \
+    if ((bytes) > (length))                     \
+      (bytes) = (length);                       \
+  } while (0)
+/* Parse string STR of length LENGTH and check if a multibyte
+   characters is at STR.  If so, set BYTES for that character, else
+   set BYTES to 1.  */
+#define PARSE_MULTIBYTE_SEQ(str, length, bytes)                 \
+  do {                                                          \
+    int i = 1;                                                  \
+    while (i < (length) && ! CHAR_HEAD_P ((str)[i])) i++;       \
+    if (i == 1)                                                 \
+      (bytes) = 1;                                              \
+    else if ((str)[0] == LEADING_CODE_COMPOSITION)              \
+      PARSE_COMPOSITE_SEQ (str, i, bytes);                      \
+    else                                                        \
+      PARSE_CHARACTER_SEQ (str, i, bytes);                      \
+  } while (0)
 /* The charset of non-ASCII character C is stored in CHARSET, and the
   position-codes of C are stored in C1 and C2.
@@ -521,13 +546,20 @@ extern int width_by_char_head[256];
 /* The charset of character C is stored in CHARSET, and the
   position-codes of C are stored in C1 and C2.
-   We store -1 in C2 if the character is just 2 bytes.  */
+   We store -1 in C2 if the dimension of the charset 1.  */
 #define SPLIT_CHAR(c, charset, c1, c2)                  \
  (SINGLE_BYTE_CHAR_P (c)                               \
   ? charset = CHARSET_ASCII, c1 = (c), c2 = -1         \
   : SPLIT_NON_ASCII_CHAR (c, charset, c1, c2))
+/* Return 1 iff character C has valid printable glyph.  */
+#define CHAR_PRINTABLE_P(c)             \
+  (SINGLE_BYTE_CHAR_P (c)               \
+   || ((c) >= MIN_CHAR_COMPOSITION      \
+       ? (c) < MAX_CHAR                 \
+       : char_printable_p (c)))
 /* The charset of the character at STR is stored in CHARSET, and the
   position-codes are stored in C1 and C2.
   We store -1 in C2 if the character is just 2 bytes.
@@ -580,23 +612,20 @@ extern int iso_charset_table[2][2][128];
 #define STRING_CHAR(str, len)                           \
  (BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) == 1     \
   ? (unsigned char) *(str)                             \
-   : string_to_non_ascii_char (str, len, 0, 0))
+   : string_to_non_ascii_char (str, len, 0))
-/* This is like STRING_CHAR but the third arg ACTUAL_LEN is set to
+/* This is like STRING_CHAR but the third arg ACTUAL_LEN is set to the
-   the length of the multi-byte form.  Just to know the length, use
+   length of the multi-byte form.  Just to know the length, use
   MULTIBYTE_FORM_LENGTH.  */
-#define STRING_CHAR_AND_LENGTH(str, len, actual_len)            \
+#define STRING_CHAR_AND_LENGTH(str, len, actual_len)    \
-  (BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) == 1             \
+  (BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) == 1     \
-   ? (actual_len = 1), (unsigned char) *(str)                   \
+   ? ((actual_len) = 1), (unsigned char) *(str)         \
-   : string_to_non_ascii_char (str, len, &actual_len, 0))
+   : string_to_non_ascii_char (str, len, &(actual_len)))
 /* This is like STRING_CHAR_AND_LENGTH but the third arg ACTUAL_LEN
   does not include garbage bytes following the multibyte character.  */
-#define STRING_CHAR_AND_CHAR_LENGTH(str, len, actual_len)       \
+#define STRING_CHAR_AND_CHAR_LENGTH STRING_CHAR_AND_LENGTH
-  (BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) == 1             \
-   ? (actual_len = 1), (unsigned char) *(str)                   \
-   : string_to_non_ascii_char (str, len, &actual_len, 1))
 /* Fetch the "next" multibyte character from Lisp string STRING
   at byte position BYTEIDX, character position CHARIDX.
@@ -654,36 +683,45 @@ else
 #ifdef emacs
-/* Increase the buffer point POS of the current buffer to the next
+/* Increase the buffer byte position POS_BYTE of the current buffer to
-   character boundary.  This macro relies on the fact that *GPT_ADDR
+   the next character boundary.  This macro relies on the fact that
-   and *Z_ADDR are always accessible and the values are '\0'.  No
+   *GPT_ADDR and *Z_ADDR are always accessible and the values are
-   range checking of POS.  */
+   '\0'.  No range checking of POS.  */
-#define INC_POS(pos)                            \
+#define INC_POS(pos_byte)                               \
-  do {                                          \
+  do {                                                  \
-    unsigned char *p = BYTE_POS_ADDR (pos);     \
+    unsigned char *p = BYTE_POS_ADDR (pos_byte);        \
-    pos++;                                      \
+    if (BASE_LEADING_CODE_P (*p))                       \
-    if (BASE_LEADING_CODE_P (*p++))             \
+      {                                                 \
-      while (!CHAR_HEAD_P (*p)) p++, pos++;     \
+        int len, bytes;                                 \
+        len = Z_BYTE - pos_byte;                        \
+        PARSE_MULTIBYTE_SEQ (p, len, bytes);            \
+        pos_byte += bytes;                              \
+      }                                                 \
+    else                                                \
+      pos_byte++;                                       \
  } while (0)
-/* Decrease the buffer point POS of the current buffer to the previous
+/* Decrease the buffer byte position POS_BYTE of the current buffer to
-   character boundary.  No range checking of POS.  */
+   the previous character boundary.  No range checking of POS.  */
-#define DEC_POS(pos)                                            \
+#define DEC_POS(pos_byte)                                               \
-  do {                                                          \
+  do {                                                                  \
-    unsigned char *p, *p_min;                                   \
+    unsigned char *p, *p_min;                                           \
-                                                                \
+                                                                        \
-    pos--;                                                      \
+    pos_byte--;                                                         \
-    if (pos < GPT_BYTE)                                         \
+    if (pos_byte < GPT_BYTE)                                            \
-      p = BEG_ADDR + pos - 1, p_min = BEG_ADDR;                 \
+      p = BEG_ADDR + pos_byte - 1, p_min = BEG_ADDR;                    \
-    else                                                        \
+    else                                                                \
-      p = BEG_ADDR + GAP_SIZE + pos - 1, p_min = GAP_END_ADDR;  \
+      p = BEG_ADDR + GAP_SIZE + pos_byte - 1, p_min = GAP_END_ADDR;     \
-    if (p > p_min && !CHAR_HEAD_P (*p))                         \
+    if (p > p_min && !CHAR_HEAD_P (*p))                                 \
-      {                                                         \
+      {                                                                 \
-        int pos_saved = pos--;                                  \
+        unsigned char *pend = p--;                                      \
-        p--;                                                    \
+        int len, bytes;                                                 \
-        while (p > p_min && !CHAR_HEAD_P (*p)) p--, pos--;      \
+        while (p > p_min && !CHAR_HEAD_P (*p)) p--;                     \
-        if (!BASE_LEADING_CODE_P (*p)) pos = pos_saved;         \
+        len = pend + 1 - p;                                             \
-      }                                                         \
+        PARSE_MULTIBYTE_SEQ (p, len, bytes);                            \
+        if (bytes == len)                                               \
+          pos_byte -= len - 1;                                          \
+      }                                                                 \
  } while (0)
 /* Increment both CHARPOS and BYTEPOS, each in the appropriate way.  */
@@ -712,41 +750,50 @@ do								\
  }                                                             \
 while (0)
-/* Increase the buffer point POS of the current buffer to the next
+/* Increase the buffer byte position POS_BYTE of the current buffer to
-   character boundary.  This macro relies on the fact that *GPT_ADDR
+   the next character boundary.  This macro relies on the fact that
-   and *Z_ADDR are always accessible and the values are '\0'.  No
+   *GPT_ADDR and *Z_ADDR are always accessible and the values are
-   range checking of POS.  */
+   '\0'.  No range checking of POS_BYTE.  */
-#define BUF_INC_POS(buf, pos)                           \
+#define BUF_INC_POS(buf, pos_byte)                              \
-  do {                                                  \
+  do {                                                          \
-    unsigned char *p = BUF_BYTE_ADDRESS (buf, pos);     \
+    unsigned char *p = BUF_BYTE_ADDRESS (buf, pos_byte);        \
-    pos++;                                              \
+    if (BASE_LEADING_CODE_P (*p))                               \
-    if (BASE_LEADING_CODE_P (*p++))                     \
-      while (!CHAR_HEAD_P (*p)) p++, pos++;             \
-  } while (0)
-/* Decrease the buffer point POS of the current buffer to the previous
-   character boundary.  No range checking of POS.  */
-#define BUF_DEC_POS(buf, pos)                                   \
-  do {                                                          \
-    unsigned char *p, *p_min;                                   \
-    int pos_saved = --pos;                                      \
-    if (pos < BUF_GPT_BYTE (buf))                               \
-      {                                                         \
-        p = BUF_BEG_ADDR (buf) + pos - 1;                       \
-        p_min = BUF_BEG_ADDR (buf);                             \
-      }                                                         \
-    else                                                        \
-      {                                                         \
-        p = BUF_BEG_ADDR (buf) + BUF_GAP_SIZE (buf) + pos - 1;  \
-        p_min = BUF_GAP_END_ADDR (buf);                         \
-      }                                                         \
-    if (p > p_min && !CHAR_HEAD_P (*p))                         \
      {                                                         \
-        int pos_saved = pos--;                                  \
+        int len, bytes;                                         \
-        p--;                                                    \
+        len = BUF_Z_BYTE (buf) - pos_byte;                      \
-        while (p > p_min && !CHAR_HEAD_P (*p)) p--, pos--;      \
+        PARSE_MULTIBYTE_SEQ (p, len, bytes);                    \
-        if (!BASE_LEADING_CODE_P (*p)) pos = pos_saved;         \
+        pos_byte += bytes;                                      \
      }                                                         \
+    else                                                        \
+      pos_byte++;                                               \
+  } while (0)
+/* Decrease the buffer byte position POS_BYTE of the current buffer to
+   the previous character boundary.  No range checking of POS_BYTE.  */
+#define BUF_DEC_POS(buf, pos_byte)                                      \
+  do {                                                                  \
+    unsigned char *p, *p_min;                                           \
+    pos_byte--;                                                         \
+    if (pos_byte < BUF_GPT_BYTE (buf))                                  \
+      {                                                                 \
+        p = BUF_BEG_ADDR (buf) + pos_byte - 1;                          \
+        p_min = BUF_BEG_ADDR (buf);                                     \
+      }                                                                 \
+    else                                                                \
+      {                                                                 \
+        p = BUF_BEG_ADDR (buf) + BUF_GAP_SIZE (buf) + pos_byte - 1;     \
+        p_min = BUF_GAP_END_ADDR (buf);                                 \
+      }                                                                 \
+    if (p > p_min && !CHAR_HEAD_P (*p))                                 \
+      {                                                                 \
+        unsigned char *pend = p--;                                      \
+        int len, bytes;                                                 \
+        while (p > p_min && !CHAR_HEAD_P (*p)) p--;                     \
+        len = pend + 1 - p;                                             \
+        PARSE_MULTIBYTE_SEQ (p, len, bytes);                            \
+        if (bytes == len)                                               \
+          pos_byte -= len - 1;                                          \
+      }                                                                 \
  } while (0)
 #endif /* emacs */
@@ -806,9 +853,9 @@ extern void invalid_character P_ ((int));
 extern int translate_char P_ ((Lisp_Object, int, int, int, int));
 extern int split_non_ascii_string P_ ((const unsigned char *, int, int *,
                                       unsigned char *, unsigned char *));
-extern int string_to_non_ascii_char P_ ((const unsigned char *, int, int *,
+extern int string_to_non_ascii_char P_ ((const unsigned char *, int, int *));
-                                         int));
 extern int non_ascii_char_to_string P_ ((int, unsigned char *, unsigned char **));
+extern int char_printable_p P_ ((int c));
 extern int multibyte_form_length P_ ((const unsigned char *, int));
 extern int str_cmpchar_id P_ ((const unsigned char *, int));
 extern int get_charset_id P_ ((Lisp_Object));
author	Kenichi Handa	1999-09-03 01:28:42 +0000
committer	Kenichi Handa	1999-09-03 01:28:42 +0000
commit	384107f281bd6a43a4c66e9bbb1826e1bc8cec05 (patch)
tree	9d31c1a4424483e5165f51a2f553b402513bc19e /src
parent	a260318815a268ae273669e3116274732e1f1945 (diff)
download	emacs-384107f281bd6a43a4c66e9bbb1826e1bc8cec05.tar.gz emacs-384107f281bd6a43a4c66e9bbb1826e1bc8cec05.zip

diff --git a/src/charset.h b/src/charset.h index 1b2ac59bec5..775b755c372 100644 --- a/src/charset.h +++ b/src/charset.h
@@ -376,19 +376,19 @@ extern Lisp_Object Vcharset_symbol_table;
376	#define CHARSET_SYMBOL(charset) \	376	#define CHARSET_SYMBOL(charset) \
377	XVECTOR (Vcharset_symbol_table)->contents[charset]	377	XVECTOR (Vcharset_symbol_table)->contents[charset]
378		378
379	/* 1 if CHARSET is valid, else 0. */	379	/* 1 if CHARSET is in valid value range, else 0. */
380	#define CHARSET_VALID_P(charset) \	380	#define CHARSET_VALID_P(charset) \
381	((charset) == 0 \	381	((charset) == 0 \
382	\|\| ((charset) >= 0x80 && (charset) <= MAX_CHARSET_OFFICIAL_DIMENSION2) \	382	\|\| ((charset) >= 0x80 && (charset) <= MAX_CHARSET_OFFICIAL_DIMENSION2) \
383	\|\| ((charset) >= MIN_CHARSET_PRIVATE_DIMENSION1 && (charset) <= MAX_CHARSET))	383	\|\| ((charset) >= MIN_CHARSET_PRIVATE_DIMENSION1 && (charset) <= MAX_CHARSET))
384		384
385	/* 1 if CHARSET is already defined, else 0. */	385	/* 1 if CHARSET is already defined (and not CHARSET_COMPOSITION), else 0. */
386	#define CHARSET_DEFINED_P(charset) \	386	#define CHARSET_DEFINED_P(charset) \
387	(((charset) >= 0) && ((charset) <= MAX_CHARSET) \	387	(((charset) >= 0) && ((charset) <= MAX_CHARSET) \
388	&& !NILP (CHARSET_TABLE_ENTRY (charset)))	388	&& !NILP (CHARSET_TABLE_ENTRY (charset)))
389		389
390	/* Since the information CHARSET-BYTES and CHARSET-WIDTH of	390	/* Since the information CHARSET-BYTES and CHARSET-WIDTH of
391	Vcharset_table can be retrieved only from the first byte of	391	Vcharset_table can be retrieved only the first byte of
392	multi-byte form (an ASCII code or a base leading-code), we provide	392	multi-byte form (an ASCII code or a base leading-code), we provide
393	here tables to be used by macros BYTES_BY_CHAR_HEAD and	393	here tables to be used by macros BYTES_BY_CHAR_HEAD and
394	WIDTH_BY_CHAR_HEAD for faster information retrieval. */	394	WIDTH_BY_CHAR_HEAD for faster information retrieval. */
@@ -451,12 +451,16 @@ extern int width_by_char_head[256];
451	position-codes are C1 and C2. DIMENSION1 character ignores C2. */	451	position-codes are C1 and C2. DIMENSION1 character ignores C2. */
452	#define MAKE_NON_ASCII_CHAR(charset, c1, c2) \	452	#define MAKE_NON_ASCII_CHAR(charset, c1, c2) \
453	((charset) == CHARSET_COMPOSITION \	453	((charset) == CHARSET_COMPOSITION \
454	? MAKE_COMPOSITE_CHAR (((c1) << 7) + (c2)) \	454	? ((c2) < 0 \
		455	? (((charset) - 0x70) << 7) + (c1) \
		456	: MAKE_COMPOSITE_CHAR (((c1) << 7) + (c2))) \
455	: (! CHARSET_DEFINED_P (charset) \|\| CHARSET_DIMENSION (charset) == 1 \	457	: (! CHARSET_DEFINED_P (charset) \|\| CHARSET_DIMENSION (charset) == 1 \
456	? (((charset) - 0x70) << 7) \| (c1) \	458	? (((charset) - 0x70) << 7) \| ((c1) <= 0 ? 0 : (c1)) \
457	: ((charset) < MIN_CHARSET_PRIVATE_DIMENSION2 \	459	: ((charset) < MIN_CHARSET_PRIVATE_DIMENSION2 \
458	? (((charset) - 0x8F) << 14) \| ((c1) << 7) \| (c2) \	460	? ((((charset) - 0x8F) << 14) \
459	: (((charset) - 0xE0) << 14) \| ((c1) << 7) \| (c2))))	461	\| ((c1) <= 0 ? 0 : ((c1) << 7)) \| ((c2) <= 0 ? 0 : (c2))) \
		462	: ((((charset) - 0xE0) << 14) \
		463	\| ((c1) <= 0 ? 0 : ((c1) << 7)) \| ((c2) <= 0 ? 0 : (c2))))))
460		464
461	/* Return a composite character of which CMPCHAR-ID is ID. */	465	/* Return a composite character of which CMPCHAR-ID is ID. */
462	#define MAKE_COMPOSITE_CHAR(id) (MIN_CHAR_COMPOSITION + (id))	466	#define MAKE_COMPOSITE_CHAR(id) (MIN_CHAR_COMPOSITION + (id))
@@ -466,10 +470,10 @@ extern int width_by_char_head[256];
466		470
467	/* Return a character of which charset is CHARSET and position-codes	471	/* Return a character of which charset is CHARSET and position-codes
468	are C1 and C2. DIMENSION1 character ignores C2. */	472	are C1 and C2. DIMENSION1 character ignores C2. */
469	#define MAKE_CHAR(charset, c1, c2) \	473	#define MAKE_CHAR(charset, c1, c2) \
470	((charset) == CHARSET_ASCII \	474	((charset) == CHARSET_ASCII \
471	? (c1) \	475	? (c1) \
472	: MAKE_NON_ASCII_CHAR ((charset), (c1) & 0x7F, (c2) & 0x7F))	476	: MAKE_NON_ASCII_CHAR ((charset), (c1), (c2)))
473		477
474	/* If GENERICP is nonzero, return nonzero iff C is a valid normal or	478	/* If GENERICP is nonzero, return nonzero iff C is a valid normal or
475	generic character. If GENERICP is zero, return nonzero iff C is a	479	generic character. If GENERICP is zero, return nonzero iff C is a
@@ -484,22 +488,43 @@ extern int width_by_char_head[256];
484		488
485	#define DEFAULT_NONASCII_INSERT_OFFSET 0x800	489	#define DEFAULT_NONASCII_INSERT_OFFSET 0x800
486		490
487	/* Check if the character C is valid as a multibyte character. */	491	/* Parse string STR of length LENGTH (>= 2) and check if a composite
488		492	character is at STR. Actually, the whole multibyte sequence
489	#define VALID_MULTIBYTE_CHAR_P(c) \	493	starting with LEADING_CODE_COMPOSITION is treated as a single
490	((c) < MIN_CHAR_OFFICIAL_DIMENSION2 \	494	multibyte character. So, here, we just set BYTES to LENGTH. */
491	? (!NILP (XCHAR_TABLE (Vcharset_table)->contents[CHAR_FIELD2 (c) \	495
492	+ 0xF0]) \	496	#define PARSE_COMPOSITE_SEQ(str, length, bytes) \
493	&& CHAR_FIELD3 (c) >= 32) \	497	do { \
494	: ((c) < MIN_CHAR_PRIVATE_DIMENSION2 \	498	(bytes) = (length); \
495	? (!NILP (XCHAR_TABLE (Vcharset_table)->contents[CHAR_FIELD1 (c) \	499	} while (0)
496	+ 0x10F]) \	500
497	&& CHAR_FIELD2 (c) >= 32 && CHAR_FIELD3 (c) >= 32) \	501
498	: ((c) < MIN_CHAR_COMPOSITION \	502	/* Parse string STR of length LENGTH (>= 2) and check if a
499	? (!NILP (XCHAR_TABLE (Vcharset_table)->contents[CHAR_FIELD1 (c) \	503	non-composite multibyte character is at STR. Set BYTES to the
500	+ 0x160]) \	504	actual sequence length. */
501	&& CHAR_FIELD2 (c) >= 32 && CHAR_FIELD3 (c) >= 32) \	505
502	: (c) < MIN_CHAR_COMPOSITION + n_cmpchars)))	506	#define PARSE_CHARACTER_SEQ(str, length, bytes) \
		507	do { \
		508	(bytes) = BYTES_BY_CHAR_HEAD ((str)[0]); \
		509	if ((bytes) > (length)) \
		510	(bytes) = (length); \
		511	} while (0)
		512
		513	/* Parse string STR of length LENGTH and check if a multibyte
		514	characters is at STR. If so, set BYTES for that character, else
		515	set BYTES to 1. */
		516
		517	#define PARSE_MULTIBYTE_SEQ(str, length, bytes) \
		518	do { \
		519	int i = 1; \
		520	while (i < (length) && ! CHAR_HEAD_P ((str)[i])) i++; \
		521	if (i == 1) \
		522	(bytes) = 1; \
		523	else if ((str)[0] == LEADING_CODE_COMPOSITION) \
		524	PARSE_COMPOSITE_SEQ (str, i, bytes); \
		525	else \
		526	PARSE_CHARACTER_SEQ (str, i, bytes); \
		527	} while (0)
503		528
504	/* The charset of non-ASCII character C is stored in CHARSET, and the	529	/* The charset of non-ASCII character C is stored in CHARSET, and the
505	position-codes of C are stored in C1 and C2.	530	position-codes of C are stored in C1 and C2.
@@ -521,13 +546,20 @@ extern int width_by_char_head[256];
521		546
522	/* The charset of character C is stored in CHARSET, and the	547	/* The charset of character C is stored in CHARSET, and the
523	position-codes of C are stored in C1 and C2.	548	position-codes of C are stored in C1 and C2.
524	We store -1 in C2 if the character is just 2 bytes. */	549	We store -1 in C2 if the dimension of the charset 1. */
525		550
526	#define SPLIT_CHAR(c, charset, c1, c2) \	551	#define SPLIT_CHAR(c, charset, c1, c2) \
527	(SINGLE_BYTE_CHAR_P (c) \	552	(SINGLE_BYTE_CHAR_P (c) \
528	? charset = CHARSET_ASCII, c1 = (c), c2 = -1 \	553	? charset = CHARSET_ASCII, c1 = (c), c2 = -1 \
529	: SPLIT_NON_ASCII_CHAR (c, charset, c1, c2))	554	: SPLIT_NON_ASCII_CHAR (c, charset, c1, c2))
530		555
		556	/* Return 1 iff character C has valid printable glyph. */
		557	#define CHAR_PRINTABLE_P(c) \
		558	(SINGLE_BYTE_CHAR_P (c) \
		559	\|\| ((c) >= MIN_CHAR_COMPOSITION \
		560	? (c) < MAX_CHAR \
		561	: char_printable_p (c)))
		562
531	/* The charset of the character at STR is stored in CHARSET, and the	563	/* The charset of the character at STR is stored in CHARSET, and the
532	position-codes are stored in C1 and C2.	564	position-codes are stored in C1 and C2.
533	We store -1 in C2 if the character is just 2 bytes.	565	We store -1 in C2 if the character is just 2 bytes.
@@ -580,23 +612,20 @@ extern int iso_charset_table[2][2][128];
580	#define STRING_CHAR(str, len) \	612	#define STRING_CHAR(str, len) \
581	(BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) == 1 \	613	(BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) == 1 \
582	? (unsigned char) *(str) \	614	? (unsigned char) *(str) \
583	: string_to_non_ascii_char (str, len, 0, 0))	615	: string_to_non_ascii_char (str, len, 0))
584		616
585	/* This is like STRING_CHAR but the third arg ACTUAL_LEN is set to	617	/* This is like STRING_CHAR but the third arg ACTUAL_LEN is set to the
586	the length of the multi-byte form. Just to know the length, use	618	length of the multi-byte form. Just to know the length, use
587	MULTIBYTE_FORM_LENGTH. */	619	MULTIBYTE_FORM_LENGTH. */
588		620
589	#define STRING_CHAR_AND_LENGTH(str, len, actual_len) \	621	#define STRING_CHAR_AND_LENGTH(str, len, actual_len) \
590	(BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) == 1 \	622	(BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) == 1 \
591	? (actual_len = 1), (unsigned char) *(str) \	623	? ((actual_len) = 1), (unsigned char) *(str) \
592	: string_to_non_ascii_char (str, len, &actual_len, 0))	624	: string_to_non_ascii_char (str, len, &(actual_len)))
593		625
594	/* This is like STRING_CHAR_AND_LENGTH but the third arg ACTUAL_LEN	626	/* This is like STRING_CHAR_AND_LENGTH but the third arg ACTUAL_LEN
595	does not include garbage bytes following the multibyte character. */	627	does not include garbage bytes following the multibyte character. */
596	#define STRING_CHAR_AND_CHAR_LENGTH(str, len, actual_len) \	628	#define STRING_CHAR_AND_CHAR_LENGTH STRING_CHAR_AND_LENGTH
597	(BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) == 1 \
598	? (actual_len = 1), (unsigned char) *(str) \
599	: string_to_non_ascii_char (str, len, &actual_len, 1))
600		629
601	/* Fetch the "next" multibyte character from Lisp string STRING	630	/* Fetch the "next" multibyte character from Lisp string STRING
602	at byte position BYTEIDX, character position CHARIDX.	631	at byte position BYTEIDX, character position CHARIDX.
@@ -654,36 +683,45 @@ else
654		683
655	#ifdef emacs	684	#ifdef emacs
656		685
657	/* Increase the buffer point POS of the current buffer to the next	686	/* Increase the buffer byte position POS_BYTE of the current buffer to
658	character boundary. This macro relies on the fact that *GPT_ADDR	687	the next character boundary. This macro relies on the fact that
659	and *Z_ADDR are always accessible and the values are '\0'. No	688	GPT_ADDR and Z_ADDR are always accessible and the values are
660	range checking of POS. */	689	'\0'. No range checking of POS. */
661	#define INC_POS(pos) \	690	#define INC_POS(pos_byte) \
662	do { \	691	do { \
663	unsigned char *p = BYTE_POS_ADDR (pos); \	692	unsigned char *p = BYTE_POS_ADDR (pos_byte); \
664	pos++; \	693	if (BASE_LEADING_CODE_P (*p)) \
665	if (BASE_LEADING_CODE_P (*p++)) \	694	{ \
666	while (!CHAR_HEAD_P (*p)) p++, pos++; \	695	int len, bytes; \
		696	len = Z_BYTE - pos_byte; \
		697	PARSE_MULTIBYTE_SEQ (p, len, bytes); \
		698	pos_byte += bytes; \
		699	} \
		700	else \
		701	pos_byte++; \
667	} while (0)	702	} while (0)
668		703
669	/* Decrease the buffer point POS of the current buffer to the previous	704	/* Decrease the buffer byte position POS_BYTE of the current buffer to
670	character boundary. No range checking of POS. */	705	the previous character boundary. No range checking of POS. */
671	#define DEC_POS(pos) \	706	#define DEC_POS(pos_byte) \
672	do { \	707	do { \
673	unsigned char p, p_min; \	708	unsigned char p, p_min; \
674	\	709	\
675	pos--; \	710	pos_byte--; \
676	if (pos < GPT_BYTE) \	711	if (pos_byte < GPT_BYTE) \
677	p = BEG_ADDR + pos - 1, p_min = BEG_ADDR; \	712	p = BEG_ADDR + pos_byte - 1, p_min = BEG_ADDR; \
678	else \	713	else \
679	p = BEG_ADDR + GAP_SIZE + pos - 1, p_min = GAP_END_ADDR; \	714	p = BEG_ADDR + GAP_SIZE + pos_byte - 1, p_min = GAP_END_ADDR; \
680	if (p > p_min && !CHAR_HEAD_P (*p)) \	715	if (p > p_min && !CHAR_HEAD_P (*p)) \
681	{ \	716	{ \
682	int pos_saved = pos--; \	717	unsigned char *pend = p--; \
683	p--; \	718	int len, bytes; \
684	while (p > p_min && !CHAR_HEAD_P (*p)) p--, pos--; \	719	while (p > p_min && !CHAR_HEAD_P (*p)) p--; \
685	if (!BASE_LEADING_CODE_P (*p)) pos = pos_saved; \	720	len = pend + 1 - p; \
686	} \	721	PARSE_MULTIBYTE_SEQ (p, len, bytes); \
		722	if (bytes == len) \
		723	pos_byte -= len - 1; \
		724	} \
687	} while (0)	725	} while (0)
688		726
689	/* Increment both CHARPOS and BYTEPOS, each in the appropriate way. */	727	/* Increment both CHARPOS and BYTEPOS, each in the appropriate way. */
@@ -712,41 +750,50 @@ do \
712	} \	750	} \
713	while (0)	751	while (0)
714		752
715	/* Increase the buffer point POS of the current buffer to the next	753	/* Increase the buffer byte position POS_BYTE of the current buffer to
716	character boundary. This macro relies on the fact that *GPT_ADDR	754	the next character boundary. This macro relies on the fact that
717	and *Z_ADDR are always accessible and the values are '\0'. No	755	GPT_ADDR and Z_ADDR are always accessible and the values are
718	range checking of POS. */	756	'\0'. No range checking of POS_BYTE. */
719	#define BUF_INC_POS(buf, pos) \	757	#define BUF_INC_POS(buf, pos_byte) \
720	do { \	758	do { \
721	unsigned char *p = BUF_BYTE_ADDRESS (buf, pos); \	759	unsigned char *p = BUF_BYTE_ADDRESS (buf, pos_byte); \
722	pos++; \	760	if (BASE_LEADING_CODE_P (*p)) \
723	if (BASE_LEADING_CODE_P (*p++)) \
724	while (!CHAR_HEAD_P (*p)) p++, pos++; \
725	} while (0)
726
727	/* Decrease the buffer point POS of the current buffer to the previous
728	character boundary. No range checking of POS. */
729	#define BUF_DEC_POS(buf, pos) \
730	do { \
731	unsigned char p, p_min; \
732	int pos_saved = --pos; \
733	if (pos < BUF_GPT_BYTE (buf)) \
734	{ \
735	p = BUF_BEG_ADDR (buf) + pos - 1; \
736	p_min = BUF_BEG_ADDR (buf); \
737	} \
738	else \
739	{ \
740	p = BUF_BEG_ADDR (buf) + BUF_GAP_SIZE (buf) + pos - 1; \
741	p_min = BUF_GAP_END_ADDR (buf); \
742	} \
743	if (p > p_min && !CHAR_HEAD_P (*p)) \
744	{ \	761	{ \
745	int pos_saved = pos--; \	762	int len, bytes; \
746	p--; \	763	len = BUF_Z_BYTE (buf) - pos_byte; \
747	while (p > p_min && !CHAR_HEAD_P (*p)) p--, pos--; \	764	PARSE_MULTIBYTE_SEQ (p, len, bytes); \
748	if (!BASE_LEADING_CODE_P (*p)) pos = pos_saved; \	765	pos_byte += bytes; \
749	} \	766	} \
		767	else \
		768	pos_byte++; \
		769	} while (0)
		770
		771	/* Decrease the buffer byte position POS_BYTE of the current buffer to
		772	the previous character boundary. No range checking of POS_BYTE. */
		773	#define BUF_DEC_POS(buf, pos_byte) \
		774	do { \
		775	unsigned char p, p_min; \
		776	pos_byte--; \
		777	if (pos_byte < BUF_GPT_BYTE (buf)) \
		778	{ \
		779	p = BUF_BEG_ADDR (buf) + pos_byte - 1; \
		780	p_min = BUF_BEG_ADDR (buf); \
		781	} \
		782	else \
		783	{ \
		784	p = BUF_BEG_ADDR (buf) + BUF_GAP_SIZE (buf) + pos_byte - 1; \
		785	p_min = BUF_GAP_END_ADDR (buf); \
		786	} \
		787	if (p > p_min && !CHAR_HEAD_P (*p)) \
		788	{ \
		789	unsigned char *pend = p--; \
		790	int len, bytes; \
		791	while (p > p_min && !CHAR_HEAD_P (*p)) p--; \
		792	len = pend + 1 - p; \
		793	PARSE_MULTIBYTE_SEQ (p, len, bytes); \
		794	if (bytes == len) \
		795	pos_byte -= len - 1; \
		796	} \
750	} while (0)	797	} while (0)
751		798
752	#endif /* emacs */	799	#endif /* emacs */
@@ -806,9 +853,9 @@ extern void invalid_character P_ ((int));
806	extern int translate_char P_ ((Lisp_Object, int, int, int, int));	853	extern int translate_char P_ ((Lisp_Object, int, int, int, int));
807	extern int split_non_ascii_string P_ ((const unsigned char , int, int ,	854	extern int split_non_ascii_string P_ ((const unsigned char , int, int ,
808	unsigned char , unsigned char ));	855	unsigned char , unsigned char ));
809	extern int string_to_non_ascii_char P_ ((const unsigned char , int, int ,	856	extern int string_to_non_ascii_char P_ ((const unsigned char , int, int ));
810	int));
811	extern int non_ascii_char_to_string P_ ((int, unsigned char , unsigned char *));	857	extern int non_ascii_char_to_string P_ ((int, unsigned char , unsigned char *));
		858	extern int char_printable_p P_ ((int c));
812	extern int multibyte_form_length P_ ((const unsigned char *, int));	859	extern int multibyte_form_length P_ ((const unsigned char *, int));
813	extern int str_cmpchar_id P_ ((const unsigned char *, int));	860	extern int str_cmpchar_id P_ ((const unsigned char *, int));
814	extern int get_charset_id P_ ((Lisp_Object));	861	extern int get_charset_id P_ ((Lisp_Object));