In this entry, just `Modified' means that codes for a

composite character is deleted. (LEADING_CODE_COMPOSITION) (CHARSET_COMPOSITION) (charset_composition) (MIN_CHAR_COMPOSITION) (MAX_CHAR_COMPOSITION) (GENERIC_COMPOSITION_CHAR) (COMPOSITE_CHAR_P) (MAKE_COMPOSITE_CHAR) (COMPOSITE_CHAR_ID) (PARSE_COMPOSITE_SEQ) (PARSE_CHARACTER_SEQ): Deleted. (MAX_CHAR) (CHARSET_VALID_P) (CHARSET_DEFINED_P) (CHARSET_AT) (FIRST_CHARSET_AT) (SAME_CHARSET_P) (MAKE_NON_ASCII_CHAR) (PARSE_MULTIBYTE_SEQ) (SPLIT_NON_ASCII_CHAR) (CHAR_PRINTABLE_P): Modified. (SPLIT_STRING): Call split_string, not split_non_ascii_string. (CHAR_STRING): Delete WORKBUF argument. Call char_string, not non_ascii_char_to_string. (STRING_CHAR): Call string_to_char, not string_to_non_ascii_char. (STRING_CHAR_AND_LENGTH): Likewise. (FETCH_CHAR_ADVANCE): New macro. (MAX_COMPONENT_COUNT) (struct cmpchar_info): Deleted. (MAX_MULTIBYTE_LENGTH): New macro. (MAX_LENGTH_OF_MULTI_BYTE_FORM): Deleted. (find_charset_in_str): Argument adjusted. (CHAR_LEN): Modified.
author: Kenichi Handa 1999-12-15 00:04:14 +0000
committer: Kenichi Handa 1999-12-15 00:04:14 +0000
commit: f49b37c9a3ab3b8d4cee577dbe3b224e648a6c85 (patch)
tree: cac527955f26dd3dbee3617f985aeec1d7025132 /src
parent: 6982083b65de7526b4ff88815524addbbbceee80 (diff)
download: emacs-f49b37c9a3ab3b8d4cee577dbe3b224e648a6c85.tar.gz
emacs-f49b37c9a3ab3b8d4cee577dbe3b224e648a6c85.zip
1 files changed, 106 insertions, 255 deletions
diff --git a/src/charset.h b/src/charset.h
index ed5b3a7d2cf..c0b7e4d9eb4 100644
--- a/src/charset.h
+++ b/src/charset.h
@@ -1,4 +1,4 @@
-/* Header for multilingual character handler.
+/* Header for multibyte character handler.
   Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   Licensed to the Free Software Foundation.
@@ -27,21 +27,20 @@ Boston, MA 02111-1307, USA.  */
  A character set ("charset" hereafter) is a meaningful collection
  (i.e. language, culture, functionality, etc) of characters.  Emacs
  handles multiple charsets at once.  Each charset corresponds to one
-  of ISO charsets (except for a special charset for composition
+  of ISO charsets.  Emacs identifies a charset by a unique
-  characters).  Emacs identifies a charset by a unique identification
+  identification number, whereas ISO identifies a charset by a triplet
-  number, whereas ISO identifies a charset by a triplet of DIMENSION,
+  of DIMENSION, CHARS and FINAL-CHAR.  So, hereafter, just saying
-  CHARS and FINAL-CHAR.  So, hereafter, just saying "charset" means an
+  "charset" means an identification number (integer value).
-  identification number (integer value).
-  The value range of charset is 0x00, 0x80..0xFE.  There are four
+  The value range of charset is 0x00, 0x81..0xFE.  There are four
  kinds of charset depending on DIMENSION (1 or 2) and CHARS (94 or
  96).  For instance, a charset of DIMENSION2_CHARS94 contains 94x94
+  characters.
  Within Emacs Lisp, a charset is treated as a symbol which has a
  property `charset'.  The property value is a vector containing
  various information about the charset.  For readability of C codes,
-  we use the following convention on C variable names:
+  we use the following convention for C variable names:
        charset_symbol: Emacs Lisp symbol of a charset
        charset_id: Emacs Lisp integer of an identification number of a charset
        charset: C integer of an identification number of a charset
@@ -55,7 +54,7 @@ Boston, MA 02111-1307, USA.  */
  character in Emacs' buffer and string.
  We call a charset which has extended leading-code as "private
-  charset" because those are mainly for a charset which is not
+  charset" because those are mainly for a charset which is not yet
  registered by ISO.  On the contrary, we call a charset which does
  not have extended leading-code as "official charset".
@@ -65,7 +64,7 @@ Boston, MA 02111-1307, USA.  */
  0x00          official dim1    -- none --             -- none --
                (ASCII)
  0x01..0x7F    --never used--
-  0x80          COMPOSITION      same as charset        -- none --
+  0x80          --never used--
  0x81..0x8F    official dim1    same as charset        -- none --
  0x90..0x99    official dim2    same as charset        -- none --
  0x9A..0x9F    --never used--
@@ -80,20 +79,9 @@ Boston, MA 02111-1307, USA.  */
  0xFF          --never used--
  ---------------------------------------------------------------------------
-  In the table, "COMPOSITION" means a charset for a composite
-  character which is a character composed from several (up to 16)
-  non-composite characters (components).  Although a composite
-  character can contain components of many charsets, a composite
-  character itself belongs to the charset CHARSET-COMPOSITION.  See
-  the document "GENERAL NOTE on COMPOSITE CHARACTER" below for more
-  detail.
 */
 /* Definition of special leading-codes.  */
-/* Base leading-code.  */
-/* Special leading-code followed by components of a composite character.  */
-#define LEADING_CODE_COMPOSITION        0x80
 /* Leading-code followed by extended leading-code.  */
 #define LEADING_CODE_PRIVATE_11 0x9A /* for private DIMENSION1 of 1-column */
 #define LEADING_CODE_PRIVATE_12 0x9B /* for private DIMENSION1 of 2-column */
@@ -122,10 +110,8 @@ Boston, MA 02111-1307, USA.  */
 /* Definition of special charsets.  */
 #define CHARSET_ASCII           0
-#define CHARSET_COMPOSITION     0x80
 extern int charset_ascii;       /* ASCII */
-extern int charset_composition; /* for a composite character */
 extern int charset_latin_iso8859_1; /* ISO8859-1 (Latin-1) */
 extern int charset_jisx0208_1978; /* JISX0208.1978 (Japanese Kanji old set) */
 extern int charset_jisx0208;    /* JISX0208.1983 (Japanese Kanji) */
@@ -194,35 +180,6 @@ extern int charset_big5_2;	/* Big5 Level 2 (Chinese Traditional) */
 */
-/*** GENERAL NOTE on COMPOSITE CHARACTER ***
-  A composite character is a character composed from several (up to
-  16) non-composite characters (components).  Although each component
-  can belong to any charset, a composite character itself belongs to
-  the charset `charset-composition' and is assigned a special
-  leading-code `LEADING_CODE_COMPOSITION' for multi-byte form.  See
-  the document "2. Emacs internal format handlers" in `coding.c' for
-  more detail about multi-byte form.
-  A character code of composite character has special format.  In the
-  above document, FIELD1 of a composite character is 0x1F.  Each
-  composite character is assigned a sequential number CMPCHAR-ID.
-  FIELD2 and FIELD3 are combined to make 14bits field for holding
-  CMPCHAR-ID, which means that Emacs can handle at most 2^14 (= 16384)
-  composite characters at once.
-  -----------------------------------------------------------------------
-  charset               FIELD1 (5-bit)      FIELD2&3 (14-bit)
-  -----------------------------------------------------------------------
-  CHARSET-COMPOSITION   0x1F                CMPCHAR-ID
-  -----------------------------------------------------------------------  
-  Emacs assigns CMPCHAR-ID to a composite character only when it
-  requires the character code of the composite character (e.g. while
-  displaying the composite character).
-*/
 /* Masks of each field of character code.  */
 #define CHAR_FIELD1_MASK (0x1F << 14)
 #define CHAR_FIELD2_MASK (0x7F << 7)
@@ -242,17 +199,11 @@ extern int charset_big5_2;	/* Big5 Level 2 (Chinese Traditional) */
  ((MIN_CHARSET_OFFICIAL_DIMENSION2 - 0x8F) << 14)
 #define MIN_CHAR_PRIVATE_DIMENSION2 \
  ((MIN_CHARSET_PRIVATE_DIMENSION2 - 0xE0) << 14)
-#define MIN_CHAR_COMPOSITION \
+/* Maximum character code currently used plus 1.  */
-  (0x1F << 14)
+#define MAX_CHAR (0x1F << 14)
-#define MAX_CHAR_COMPOSITION (GLYPH_MASK_CHAR - 1)
-/* A generic character for composition characters.  */
-#define GENERIC_COMPOSITION_CHAR (GLYPH_MASK_CHAR)
 /* 1 if C is an ASCII character, else 0.  */
 #define SINGLE_BYTE_CHAR_P(c) ((c) >= 0 && (c) < 0x100)
-/* 1 if C is an composite character, else 0.  */
-#define COMPOSITE_CHAR_P(c) ((c) >= MIN_CHAR_COMPOSITION)
 /* 1 if BYTE is a character in itself, in multibyte mode.  */
 #define ASCII_BYTE_P(byte) ((byte) < 0x80)
@@ -379,10 +330,10 @@ extern Lisp_Object Vcharset_symbol_table;
 /* 1 if CHARSET is in valid value range, else 0.  */
 #define CHARSET_VALID_P(charset)                                         \
  ((charset) == 0                                                        \
-   || ((charset) >= 0x80 && (charset) <= MAX_CHARSET_OFFICIAL_DIMENSION2) \
+   || ((charset) > 0x80 && (charset) <= MAX_CHARSET_OFFICIAL_DIMENSION2) \
   || ((charset) >= MIN_CHARSET_PRIVATE_DIMENSION1 && (charset) <= MAX_CHARSET))
-/* 1 if CHARSET is already defined (and not CHARSET_COMPOSITION), else 0.  */
+/* 1 if CHARSET is already defined, else 0.  */
 #define CHARSET_DEFINED_P(charset)                      \
  (((charset) >= 0) && ((charset) <= MAX_CHARSET)       \
   && !NILP (CHARSET_TABLE_ENTRY (charset)))
@@ -406,67 +357,47 @@ extern int width_by_char_head[256];
      ? CHAR_FIELD2 (c) + 0x70                  \
      : ((c) < MIN_CHAR_PRIVATE_DIMENSION2      \
         ? CHAR_FIELD1 (c) + 0x8F               \
-         : ((c) < MIN_CHAR_COMPOSITION          \
+         : CHAR_FIELD1 (c) + 0xE0)))
-            ? CHAR_FIELD1 (c) + 0xE0            \
-            : ((c) <= MAX_CHAR_COMPOSITION      \
-               ? CHARSET_COMPOSITION            \
-               : CHARSET_ASCII)))))
 /* Return charset at the place pointed by P.  */
-#define CHARSET_AT(p)                           \
+#define CHARSET_AT(p)                           \
-  (*(p) < 0x80                                  \
+  (*(p) < 0x80                                  \
-   ? CHARSET_ASCII                              \
+   ? CHARSET_ASCII                              \
-   : (*(p) == LEADING_CODE_COMPOSITION          \
+   : (*(p) < LEADING_CODE_PRIVATE_11            \
-      ? CHARSET_COMPOSITION                     \
+      ? (int)*(p)                               \
-      : (*(p) < LEADING_CODE_PRIVATE_11         \
+      : (*(p) <= LEADING_CODE_PRIVATE_22        \
-         ? (int)*(p)                            \
+         ? (int)*((p) + 1)                      \
-         : (*(p) <= LEADING_CODE_PRIVATE_22     \
+         : -1)))
-            ? (int)*((p) + 1)                   \
-            : -1))))
 /* Same as `CHARSET_AT ()' but perhaps runs faster because of an
   additional argument C which is the code (byte) at P.  */
-#define FIRST_CHARSET_AT(p, c)                  \
+#define FIRST_CHARSET_AT(p, c)          \
-  ((c) < 0x80                                   \
+  ((c) < 0x80                           \
-   ? CHARSET_ASCII                              \
+   ? CHARSET_ASCII                      \
-   : ((c) == LEADING_CODE_COMPOSITION           \
+   : ((c) < LEADING_CODE_PRIVATE_11     \
-      ? CHARSET_COMPOSITION                     \
+      ? (int)(c)                        \
-      : ((c) < LEADING_CODE_PRIVATE_11          \
+      : ((c) <= LEADING_CODE_PRIVATE_22 \
-         ? (int)(c)                             \
+         ? (int)*((p) + 1)              \
-         : ((c) <= LEADING_CODE_PRIVATE_22      \
+         : -1)))
-            ? (int)*((p) + 1)                   \
-            : -1))))
+/* Check if two characters C1 and C2 belong to the same charset.  */
+#define SAME_CHARSET_P(c1, c2)                                  \
-/* Check if two characters C1 and C2 belong to the same charset.
+  (SINGLE_BYTE_CHAR_P (c1)                                      \
-   Always return 0 for composite characters.  */
+   ? SINGLE_BYTE_CHAR_P (c2)                                    \
-#define SAME_CHARSET_P(c1, c2)                                  \
+   : (c1 < MIN_CHAR_OFFICIAL_DIMENSION2                         \
-  (c1 < MIN_CHAR_COMPOSITION                                    \
+      ? (c1 & CHAR_FIELD2_MASK) == (c2 & CHAR_FIELD2_MASK)      \
-   && (SINGLE_BYTE_CHAR_P (c1)                                  \
+      : (c1 & CHAR_FIELD1_MASK) == (c2 & CHAR_FIELD1_MASK)))
-       ? SINGLE_BYTE_CHAR_P (c2)                                \
-       : (c1 < MIN_CHAR_OFFICIAL_DIMENSION2                     \
-          ? (c1 & CHAR_FIELD2_MASK) == (c2 & CHAR_FIELD2_MASK)  \
-          : (c1 & CHAR_FIELD1_MASK) == (c2 & CHAR_FIELD1_MASK))))
 /* Return a non-ASCII character of which charset is CHARSET and
   position-codes are C1 and C2.  DIMENSION1 character ignores C2.  */
 #define MAKE_NON_ASCII_CHAR(charset, c1, c2)                            \
-  ((charset) == CHARSET_COMPOSITION                                     \
+  (! CHARSET_DEFINED_P (charset) || CHARSET_DIMENSION (charset) == 1    \
-   ? ((c2) < 0                                                          \
+   ? (((charset) - 0x70) << 7) | ((c1) <= 0 ? 0 : (c1))                 \
-      ? (((charset) - 0x70) << 7) + (c1)                                \
+   : ((charset) < MIN_CHARSET_PRIVATE_DIMENSION2                        \
-      : MAKE_COMPOSITE_CHAR (((c1) << 7) + (c2)))                       \
+      ? ((((charset) - 0x8F) << 14)                                     \
-   : (! CHARSET_DEFINED_P (charset) || CHARSET_DIMENSION (charset) == 1 \
+         | ((c1) <= 0 ? 0 : ((c1) << 7)) | ((c2) <= 0 ? 0 : (c2)))      \
-      ? (((charset) - 0x70) << 7) | ((c1) <= 0 ? 0 : (c1))              \
+      : ((((charset) - 0xE0) << 14)                                     \
-      : ((charset) < MIN_CHARSET_PRIVATE_DIMENSION2                     \
+         | ((c1) <= 0 ? 0 : ((c1) << 7)) | ((c2) <= 0 ? 0 : (c2)))))
-         ? ((((charset) - 0x8F) << 14)                                  \
-            | ((c1) <= 0 ? 0 : ((c1) << 7)) | ((c2) <= 0 ? 0 : (c2)))   \
-         : ((((charset) - 0xE0) << 14)                                  \
-            | ((c1) <= 0 ? 0 : ((c1) << 7)) | ((c2) <= 0 ? 0 : (c2))))))
-/* Return a composite character of which CMPCHAR-ID is ID.  */
-#define MAKE_COMPOSITE_CHAR(id) (MIN_CHAR_COMPOSITION + (id))
-/* Return CMPCHAR-ID of a composite character C.  */
-#define COMPOSITE_CHAR_ID(c) ((c) - MIN_CHAR_COMPOSITION)
 /* Return a character of which charset is CHARSET and position-codes
   are C1 and C2.  DIMENSION1 character ignores C2.  */
@@ -488,41 +419,6 @@ extern int width_by_char_head[256];
 #define DEFAULT_NONASCII_INSERT_OFFSET 0x800
-/* Parse composite character string STR of length LENGTH (>= 2) and
-   set BYTES to the length of actual multibyte sequence.
-   It is assumed that *STR is LEADING_CODE_COMPOSITION and the
-   following (LENGTH - 1) bytes satisfy !CHAR_HEAD_P.
-   Actually, the whole multibyte sequence starting with
-   LEADING_CODE_COMPOSITION is treated as a single multibyte
-   character.  So, here, we just set BYTES to LENGTH.
-   This macro should be called only from PARSE_MULTIBYTE_SEQ.  */
-#define PARSE_COMPOSITE_SEQ(str, length, bytes) \
-  do {                                          \
-    (bytes) = (length);                         \
-  } while (0)
-/* Parse non-composite multibyte character string STR of length
-   LENGTH (>= 2) and set BYTES to the length of actual multibyte
-   sequence.
-   It is assumed that *STR is one of base leading codes (excluding
-   LEADING_CODE_COMPOSITION) and the following (LENGTH - 1) bytes
-   satisfy !CHAR_HEAD_P.
-   This macro should be called only from PARSE_MULTIBYTE_SEQ.  */
-#define PARSE_CHARACTER_SEQ(str, length, bytes) \
-  do {                                          \
-    (bytes) = BYTES_BY_CHAR_HEAD ((str)[0]);    \
-    if ((bytes) > (length))                     \
-      (bytes) = (length);                       \
-  } while (0)
 /* Parse string STR of length LENGTH and check if a multibyte
   characters is at STR.  If so, set BYTES for that character, else
   set BYTES to 1.  */
@@ -530,17 +426,14 @@ extern int width_by_char_head[256];
 #define PARSE_MULTIBYTE_SEQ(str, length, bytes)                 \
  do {                                                          \
    int i = 1;                                                  \
-    if (ASCII_BYTE_P (*str))                                    \
+    while (i < (length) && ! CHAR_HEAD_P ((str)[i])) i++;       \
-      bytes = 1;                                                \
+    if (i == 1)                                                 \
+      (bytes) = 1;                                              \
    else                                                        \
      {                                                         \
-        while (i < (length) && ! CHAR_HEAD_P ((str)[i])) i++;   \
+        (bytes) = BYTES_BY_CHAR_HEAD ((str)[0]);                \
-        if (i == 1)                                             \
+        if ((bytes) > (length))                                 \
-          (bytes) = 1;                                          \
+          (bytes) = (length);                                   \
-        else if ((str)[0] == LEADING_CODE_COMPOSITION)          \
-          PARSE_COMPOSITE_SEQ (str, i, bytes);                  \
-        else                                                    \
-          PARSE_CHARACTER_SEQ (str, i, bytes);                  \
      }                                                         \
  } while (0)
@@ -550,16 +443,14 @@ extern int width_by_char_head[256];
   Do not use this macro for an ASCII character.  */
-#define SPLIT_NON_ASCII_CHAR(c, charset, c1, c2)                         \
+#define SPLIT_NON_ASCII_CHAR(c, charset, c1, c2)                        \
-  ((c) & CHAR_FIELD1_MASK                                                \
+  ((c) & CHAR_FIELD1_MASK                                               \
-   ? (charset = ((c) < MIN_CHAR_COMPOSITION                              \
+   ? (charset = (CHAR_FIELD1 (c)                                        \
-                 ? (CHAR_FIELD1 (c)                                      \
+                 + ((c) < MIN_CHAR_PRIVATE_DIMENSION2 ? 0x8F : 0xE0)),  \
-                    + ((c) < MIN_CHAR_PRIVATE_DIMENSION2 ? 0x8F : 0xE0)) \
+      c1 = CHAR_FIELD2 (c),                                             \
-                 : CHARSET_COMPOSITION),                                 \
+      c2 = CHAR_FIELD3 (c))                                             \
-      c1 = CHAR_FIELD2 (c),                                              \
+   : (charset = CHAR_FIELD2 (c) + 0x70,                                 \
-      c2 = CHAR_FIELD3 (c))                                              \
+      c1 = CHAR_FIELD3 (c),                                             \
-   : (charset = CHAR_FIELD2 (c) + 0x70,                                  \
-      c1 = CHAR_FIELD3 (c),                                              \
      c2 = -1))
 /* The charset of character C is stored in CHARSET, and the
@@ -572,25 +463,19 @@ extern int width_by_char_head[256];
   : SPLIT_NON_ASCII_CHAR (c, charset, c1, c2))
 /* Return 1 iff character C has valid printable glyph.  */
-#define CHAR_PRINTABLE_P(c)             \
+#define CHAR_PRINTABLE_P(c)     \
-  (SINGLE_BYTE_CHAR_P (c)               \
+  (SINGLE_BYTE_CHAR_P (c)       \
-   || ((c) >= MIN_CHAR_COMPOSITION      \
+   || char_printable_p (c))
-       ? (c) < MAX_CHAR                 \
-       : char_printable_p (c)))
 /* The charset of the character at STR is stored in CHARSET, and the
   position-codes are stored in C1 and C2.
-   We store -1 in C2 if the character is just 2 bytes.
+   We store -1 in C2 if the character is just 2 bytes.  */
-   If the character is a composite character, the upper 7-bit and
-   lower 7-bit of CMPCHAR-ID are set in C1 and C2 respectively.  No
-   range checking.  */
-#define SPLIT_STRING(str, len, charset, c1, c2)                         \
+#define SPLIT_STRING(str, len, charset, c1, c2)                 \
-  ((BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) < 2                     \
+  ((BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) < 2             \
-    || BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) > len                \
+    || BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) > len        \
-    || split_non_ascii_string (str, len, &charset, &c1, &c2) < 0)       \
+    || split_string (str, len, &charset, &c1, &c2) < 0)         \
-   ? c1 = *(str), charset = CHARSET_ASCII                               \
+   ? c1 = *(str), charset = CHARSET_ASCII                       \
   : charset)
 /* Mapping table from ISO2022's charset (specified by DIMENSION,
@@ -612,16 +497,15 @@ extern int iso_charset_table[2][2][128];
   representations: multi-byte form and single-word form (character
   code).  */
-/* Set STR a pointer to the multi-byte form of the character C.  If C
+/* Store multi-byte form of the character C in STR.  The caller should
-   is not a composite character, the multi-byte form is set in WORKBUF
+   allocate at least 4-byte area at STR in advance.  Returns the
-   and STR points WORKBUF.  The caller should allocate at least 4-byte
+   length of the multi-byte form.  If C is an invalid character code,
-   area at WORKBUF in advance.  Returns the length of the multi-byte
+   signal an error.  */
-   form.  If C is an invalid character code, signal an error.  */
-#define CHAR_STRING(c, workbuf, str)                    \
+#define CHAR_STRING(c, str)             \
-  (SINGLE_BYTE_CHAR_P (c)                               \
+  (SINGLE_BYTE_CHAR_P (c)               \
-   ? *(str = workbuf) = (unsigned char)(c), 1           \
+   ? *(str) = (unsigned char)(c), 1     \
-   : non_ascii_char_to_string (c, workbuf, (unsigned char **)&str))
+   : char_to_string (c, (unsigned char *)str))
 /* Return a character code of the character of which multi-byte form
   is at STR and the length is LEN.  If STR doesn't contain valid
@@ -630,7 +514,7 @@ extern int iso_charset_table[2][2][128];
 #define STRING_CHAR(str, len)                           \
  (BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) == 1     \
   ? (unsigned char) *(str)                             \
-   : string_to_non_ascii_char (str, len, 0))
+   : string_to_char (str, len, 0))
 /* This is like STRING_CHAR but the third arg ACTUAL_LEN is set to the
   length of the multi-byte form.  Just to know the length, use
@@ -639,7 +523,7 @@ extern int iso_charset_table[2][2][128];
 #define STRING_CHAR_AND_LENGTH(str, len, actual_len)    \
  (BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) == 1     \
   ? ((actual_len) = 1), (unsigned char) *(str)         \
-   : string_to_non_ascii_char (str, len, &(actual_len)))
+   : string_to_char (str, len, &(actual_len)))
 /* Fetch the "next" multibyte character from Lisp string STRING
   at byte position BYTEIDX, character position CHARIDX.
@@ -665,6 +549,26 @@ if (1)									      \
  }                                                                           \
 else
+/* Like FETCH_STRING_CHAR_SPACE_LEFT but fetch character from the
+   current buffer.  */
+#define FETCH_CHAR_ADVANCE(OUTPUT, CHARIDX, BYTEIDX)                      \
+if (1)                                                                    \
+  {                                                                       \
+    unsigned char *fetch_buf_char_ptr = BYTE_POS_ADDR (BYTEIDX);          \
+    int fetch_buf_char_space_left = ((CHARIDX < GPT ? GPT_BYTE : Z_BYTE)  \
+                                       - BYTEIDX);                        \
+    int actual_len;                                                       \
+                                                                          \
+    OUTPUT                                                                \
+        = STRING_CHAR_AND_LENGTH (fetch_buf_char_ptr,                     \
+                                  fetch_buf_char_space_left, actual_len); \
+                                                                          \
+    BYTEIDX += actual_len;                                                \
+    CHARIDX++;                                                            \
+  }                                                                       \
+else
 /* Return the length of the multi-byte form at string STR of length LEN.  */
 #define MULTIBYTE_FORM_LENGTH(str, len)                 \
@@ -812,70 +716,21 @@ while (0)
 #endif /* emacs */
-/* Maximum counts of components in one composite character.  */
+/* This is the maximum byte length of multi-byte sequence.  */
-#define MAX_COMPONENT_COUNT 16
+#define MAX_MULTIBYTE_LENGTH 4
-/* Structure to hold information of a composite character.  */
-struct cmpchar_info {
-  /* Byte length of the composite character.  */
-  int len;
-  /* Multi-byte form of the composite character.  */
-  unsigned char *data;
-  /* Length of glyph codes.  */
-  int glyph_len;
-  /* Width of the overall glyph of the composite character.  */
-  int width;
-  /* Pointer to an array of glyph codes of the composite character.
-     This actually contains only character code, no face.  */
-  GLYPH *glyph;
-  /* Pointer to an array of composition rules.  The value has the form:
-        (0xA0 + ((GLOBAL-REF-POINT << 2) | NEW-REF-POINT))
-     where each XXX-REF-POINT is 0..8.  */
-  unsigned char *cmp_rule;
-  /* Pointer to an array of x-axis offset of left edge of glyphs
-     relative to the left of of glyph[0] except for the first element
-     which is the absolute offset from the left edge of overall glyph.
-     The actual pixel offset should be calculated by multiplying each
-     frame's one column width by this value:
-        (i.e. FONT_WIDTH (f->output_data.x->font) * col_offset[N]).  */
-  float *col_offset;
-  /* Work slot used by `dumpglyphs' (xterm.c).  */
-  int face_work;
-};
-/* Table of pointers to the structure `cmpchar_info' indexed by
-   CMPCHAR-ID.  */
-extern struct cmpchar_info **cmpchar_table;
-/* Number of the current composite characters.  */
-extern int n_cmpchars;
-/* This is the maximum length of multi-byte form.  */
-#define MAX_LENGTH_OF_MULTI_BYTE_FORM (MAX_COMPONENT_COUNT * 6)
-/* Maximum character code currently used.  */
-#define MAX_CHAR (MIN_CHAR_COMPOSITION + n_cmpchars)
 extern void invalid_character P_ ((int));
 extern int translate_char P_ ((Lisp_Object, int, int, int, int));
-extern int split_non_ascii_string P_ ((const unsigned char *, int, int *,
+extern int split_string P_ ((const unsigned char *, int, int *,
                                       unsigned char *, unsigned char *));
-extern int string_to_non_ascii_char P_ ((const unsigned char *, int, int *));
+extern int char_to_string P_ ((int, unsigned char *));
-extern int non_ascii_char_to_string P_ ((int, unsigned char *, unsigned char **));
+extern int string_to_char P_ ((const unsigned char *, int, int *));
 extern int char_printable_p P_ ((int c));
 extern int multibyte_form_length P_ ((const unsigned char *, int));
-extern int str_cmpchar_id P_ ((const unsigned char *, int));
 extern int get_charset_id P_ ((Lisp_Object));
-extern int cmpchar_component P_ ((int, int, int));
 extern int find_charset_in_str P_ ((unsigned char *, int, int *,
-                                    Lisp_Object, int, int));
+                                    Lisp_Object, int));
 extern int strwidth P_ ((unsigned char *, int));
 extern int char_bytes P_ ((int));
 extern int char_valid_p P_ ((int, int));
@@ -902,10 +757,6 @@ extern Lisp_Object Vauto_fill_chars;
 /* Length of C in bytes.  */
-#define CHAR_LEN(C)                                     \
+#define CHAR_LEN(C) CHARSET_BYTES (CHAR_CHARSET ((C)))
-     (CHAR_CHARSET ((C)) == CHARSET_COMPOSITION         \
-      ? cmpchar_table[COMPOSITE_CHAR_ID ((C))]->len     \
-      : CHARSET_BYTES (CHAR_CHARSET ((C))))
 #endif /* _CHARSET_H */
author	Kenichi Handa	1999-12-15 00:04:14 +0000
committer	Kenichi Handa	1999-12-15 00:04:14 +0000
commit	f49b37c9a3ab3b8d4cee577dbe3b224e648a6c85 (patch)
tree	cac527955f26dd3dbee3617f985aeec1d7025132 /src
parent	6982083b65de7526b4ff88815524addbbbceee80 (diff)
download	emacs-f49b37c9a3ab3b8d4cee577dbe3b224e648a6c85.tar.gz emacs-f49b37c9a3ab3b8d4cee577dbe3b224e648a6c85.zip