diff options
| author | Kenichi Handa | 2002-03-01 02:12:59 +0000 |
|---|---|---|
| committer | Kenichi Handa | 2002-03-01 02:12:59 +0000 |
| commit | c0e17dd87eb56bb2de52a0f7699fc33596c035f0 (patch) | |
| tree | 63ff061c0793eba7aeaade7addfc2e0246d8c94e | |
| parent | 97941b05253cdac11a83c12dd4fec72e82a189b0 (diff) | |
| download | emacs-c0e17dd87eb56bb2de52a0f7699fc33596c035f0.tar.gz emacs-c0e17dd87eb56bb2de52a0f7699fc33596c035f0.zip | |
Fully Re-written.
| -rw-r--r-- | lisp/international/mule-conf.el | 1125 |
1 files changed, 728 insertions, 397 deletions
diff --git a/lisp/international/mule-conf.el b/lisp/international/mule-conf.el index fe3d6a2e494..31452b34081 100644 --- a/lisp/international/mule-conf.el +++ b/lisp/international/mule-conf.el | |||
| @@ -2,6 +2,9 @@ | |||
| 2 | 2 | ||
| 3 | ;; Copyright (C) 1997 Electrotechnical Laboratory, JAPAN. | 3 | ;; Copyright (C) 1997 Electrotechnical Laboratory, JAPAN. |
| 4 | ;; Licensed to the Free Software Foundation. | 4 | ;; Licensed to the Free Software Foundation. |
| 5 | ;; Copyright (C) 2001, 2002 | ||
| 6 | ;; National Institute of Advanced Industrial Science and Technology (AIST) | ||
| 7 | ;; Registration Number H13PRO009 | ||
| 5 | 8 | ||
| 6 | ;; Keywords: mule, multilingual, character set, coding system | 9 | ;; Keywords: mule, multilingual, character set, coding system |
| 7 | 10 | ||
| @@ -30,263 +33,569 @@ | |||
| 30 | 33 | ||
| 31 | ;;; Definitions of character sets. | 34 | ;;; Definitions of character sets. |
| 32 | 35 | ||
| 33 | ;; Basic (official) character sets. These character sets are treated | 36 | ;; The charsets `ascii' and `unicoe' are aleady defined in charset.c |
| 34 | ;; efficiently with respect to buffer memory. | 37 | ;; as below: |
| 35 | 38 | ;; | |
| 36 | ;; Syntax: | 39 | ;; (define-charset 'ascii |
| 37 | ;; (define-charset CHARSET-ID CHARSET | 40 | ;; "" |
| 38 | ;; [ DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE | 41 | ;; :dimension 1 |
| 39 | ;; SHORT-NAME LONG-NAME DESCRIPTION ]) | 42 | ;; :code-space [0 127] |
| 40 | ;; ASCII charset is defined in src/charset.c as below. | 43 | ;; :iso-final-char ?A |
| 41 | ;; (define-charset 0 ascii | 44 | ;; :ascii-compatible-p t |
| 42 | ;; [1 94 1 0 ?B 0 "ASCII" "ASCII" "ASCII (ISO646 IRV)"]) | 45 | ;; :emacs-mule-id 0 |
| 43 | 46 | ;; :code-offset 0) | |
| 44 | ;; 1-byte charsets. Valid range of CHARSET-ID is 128..143. | 47 | ;; |
| 45 | 48 | ;; (define-charset 'unicode | |
| 46 | ;; CHARSET-ID 128 is not used. | 49 | ;; "" |
| 47 | 50 | ;; :dimension 3 | |
| 48 | (define-charset 129 'latin-iso8859-1 | 51 | ;; :code-space [0 255 0 255 0 16] |
| 49 | [1 96 1 0 ?A 1 "RHP of Latin-1" "RHP of Latin-1 (ISO 8859-1): ISO-IR-100" | 52 | ;; :ascii-compatible-p t |
| 50 | "Right-Hand Part of Latin Alphabet 1 (ISO/IEC 8859-1): ISO-IR-100."]) | 53 | ;; :code-offset 0) |
| 51 | (define-charset 130 'latin-iso8859-2 | 54 | ;; |
| 52 | [1 96 1 0 ?B 1 "RHP of Latin-2" "RHP of Latin-2 (ISO 8859-2): ISO-IR-101" | 55 | ;; We now set :docstring, :short-name, and :long-name properties. |
| 53 | "Right-Hand Part of Latin Alphabet 2 (ISO/IEC 8859-2): ISO-IR-101."]) | 56 | |
| 54 | (define-charset 131 'latin-iso8859-3 | 57 | (put-charset-property |
| 55 | [1 96 1 0 ?C 1 "RHP of Latin-3" "RHP of Latin-3 (ISO 8859-3): ISO-IR-109" | 58 | 'ascii :docstring "ASCII (ISO646 IRV)") |
| 56 | "Right-Hand Part of Latin Alphabet 3 (ISO/IEC 8859-3): ISO-IR-109."]) | 59 | (put-charset-property |
| 57 | (define-charset 132 'latin-iso8859-4 | 60 | 'ascii :short-name "ASCII") |
| 58 | [1 96 1 0 ?D 1 "RHP of Latin-4" "RHP of Latin-4 (ISO 8859-4): ISO-IR-110" | 61 | (put-charset-property |
| 59 | "Right-Hand Part of Latin Alphabet 4 (ISO/IEC 8859-4): ISO-IR-110."]) | 62 | 'ascii :long-name "ASCII (ISO646 IRV)") |
| 60 | (define-charset 133 'thai-tis620 | 63 | (put-charset-property |
| 61 | [1 96 1 0 ?T 1 "RHP of TIS620" "RHP of Thai (TIS620): ISO-IR-166" | 64 | 'unicode :docstring "Unicode (ISO10646)") |
| 62 | "Right-Hand Part of TIS620.2533 (Thai): ISO-IR-166."]) | 65 | (put-charset-property |
| 63 | (define-charset 134 'greek-iso8859-7 | 66 | 'unicode :short-name "Unicode") |
| 64 | [1 96 1 0 ?F 1 "RHP of ISO8859/7" "RHP of Greek (ISO 8859-7): ISO-IR-126" | 67 | (put-charset-property |
| 65 | "Right-Hand Part of Latin/Greek Alphabet (ISO/IEC 8859-7): ISO-IR-126."]) | 68 | 'unicode :long-name "Unicode (ISO10646)") |
| 66 | (define-charset 135 'arabic-iso8859-6 | 69 | |
| 67 | [1 96 1 1 ?G 1 "RHP of ISO8859/6" "RHP of Arabic (ISO 8859-6): ISO-IR-127" | 70 | (define-charset-alias 'ucs 'unicode) |
| 68 | "Right-Hand Part of Latin/Arabic Alphabet (ISO/IEC 8859-6): ISO-IR-127."]) | 71 | |
| 69 | (define-charset 136 'hebrew-iso8859-8 | 72 | (define-charset 'emacs |
| 70 | [1 96 1 1 ?H 1 "RHP of ISO8859/8" "RHP of Hebrew (ISO 8859-8): ISO-IR-138" | 73 | "Full Emacs characters." |
| 71 | "Right-Hand Part of Latin/Hebrew Alphabet (ISO/IEC 8859-8): ISO-IR-138."]) | 74 | :ascii-compatible-p t |
| 72 | (define-charset 137 'katakana-jisx0201 | 75 | :code-space [ 0 255 0 255 0 63 ] |
| 73 | [1 94 1 0 ?I 1 "JISX0201 Katakana" "Japanese Katakana (JISX0201.1976)" | 76 | :code-offset 0 |
| 74 | "Katakana Part of JISX0201.1976."]) | 77 | :supplementary-p t) |
| 75 | (define-charset 138 'latin-jisx0201 | 78 | |
| 76 | [1 94 1 0 ?J 0 "JISX0201 Roman" "Japanese Roman (JISX0201.1976)" | 79 | (define-charset 'iso-8859-1 |
| 77 | "Roman Part of JISX0201.1976."]) | 80 | "Laint-1 (ISO/IEC 8859-1)" |
| 78 | 81 | :short-name "Latin-1" | |
| 79 | ;; CHARSET-ID is not used 139. | 82 | :ascii-compatible-p t |
| 80 | 83 | :code-space [0 255] | |
| 81 | (define-charset 140 'cyrillic-iso8859-5 | 84 | :code-offset 0) |
| 82 | [1 96 1 0 ?L 1 "RHP of ISO8859/5" "RHP of Cyrillic (ISO 8859-5): ISO-IR-144" | 85 | |
| 83 | "Right-Hand Part of Latin/Cyrillic Alphabet (ISO/IEC 8859-5): ISO-IR-144."]) | 86 | (define-charset 'latin-iso8859-1 |
| 84 | (define-charset 141 'latin-iso8859-9 | 87 | "Right-Hand Part of ISO/IEC 8859/1 (Latin-1): ISO-IR-100" |
| 85 | [1 96 1 0 ?M 1 "RHP of Latin-5" "RHP of Latin-5 (ISO 8859-9): ISO-IR-148" | 88 | :short-name "RHP of Latin-1" |
| 86 | "Right-Hand Part of Latin Alphabet 5 (ISO/IEC 8859-9): ISO-IR-148."]) | 89 | :long-name "RHP of ISO/IEC 8859/1 (Latin-1): ISO-IR-100" |
| 87 | (define-charset 142 'latin-iso8859-15 | 90 | :iso-final-char ?A |
| 88 | [1 96 1 0 ?b 1 "RHP of Latin-9" "RHP of Latin-9 (ISO 8859-15): ISO-IR-203" | 91 | :emacs-mule-id 129 |
| 89 | "Right-Hand Part of Latin Alphabet 9 (ISO/IEC 8859-15): ISO-IR-203."]) | 92 | :code-space [32 127] |
| 90 | (define-charset 143 'latin-iso8859-14 | 93 | :code-offset 160) |
| 91 | [1 96 1 0 ?_ 1 "RHP of Latin-8" "RHP of Latin-8 (ISO 8859-14): ISO-IR-199" | 94 | |
| 92 | "Right-Hand Part of Latin Alphabet 8 (ISO/IEC 8859-14): ISO-IR-199."]) | 95 | (define-charset 'eight-bit-control |
| 93 | 96 | "8-bit control code (0x80..0x9F)" | |
| 94 | ;; 2-byte charsets. Valid range of CHARSET-ID is 144..153. | 97 | :short-name "8-bit control code" |
| 95 | 98 | :code-space [128 159] | |
| 96 | (define-charset 144 'japanese-jisx0208-1978 | 99 | :code-offset 128) |
| 97 | [2 94 2 0 ?@ 0 "JISX0208.1978" "JISX0208.1978 (Japanese): ISO-IR-42" | 100 | |
| 98 | "JISX0208.1978 Japanese Kanji (so called \"old JIS\"): ISO-IR-42."]) | 101 | (define-charset 'eight-bit-graphic |
| 99 | (define-charset 145 'chinese-gb2312 | 102 | "8-bit graphic code (0xA0..0xFF)" |
| 100 | [2 94 2 0 ?A 0 "GB2312" "GB2312: ISO-IR-58" | 103 | :short-name "8-bit graphic code" |
| 101 | "GB2312 Chinese simplified: ISO-IR-58."]) | 104 | :code-space [160 255] |
| 102 | (define-charset 146 'japanese-jisx0208 | 105 | :code-offset 160) |
| 103 | [2 94 2 0 ?B 0 "JISX0208" "JISX0208.1983/1990 (Japanese): ISO-IR-87" | 106 | |
| 104 | "JISX0208.1983/1990 Japanese Kanji: ISO-IR-87."]) | 107 | (defmacro define-iso-single-byte-charset (symbol iso-symbol name nickname |
| 105 | (define-charset 147 'korean-ksc5601 | 108 | iso-ir iso-final |
| 106 | [2 94 2 0 ?C 0 "KSC5601" "KSC5601 (Korean): ISO-IR-149" | 109 | emacs-mule-id map) |
| 107 | "KSC5601 Korean Hangul and Hanja: ISO-IR-149."]) | 110 | "For internal use only." |
| 108 | (define-charset 148 'japanese-jisx0212 | 111 | `(progn |
| 109 | [2 94 2 0 ?D 0 "JISX0212" "JISX0212 (Japanese): ISO-IR-159" | 112 | (define-charset ,symbol |
| 110 | "JISX0212 Japanese supplement: ISO-IR-159."]) | 113 | ,name |
| 111 | (define-charset 149 'chinese-cns11643-1 | 114 | :short-name ,nickname |
| 112 | [2 94 2 0 ?G 0 "CNS11643-1" "CNS11643-1 (Chinese traditional): ISO-IR-171" | 115 | :long-name ,name |
| 113 | "CNS11643 Plane 1 Chinese traditional: ISO-IR-171."]) | 116 | :ascii-compatible-p t |
| 114 | (define-charset 150 'chinese-cns11643-2 | 117 | :code-space [0 255] |
| 115 | [2 94 2 0 ?H 0 "CNS11643-2" "CNS11643-2 (Chinese traditional): ISO-IR-172" | 118 | :map ,map) |
| 116 | "CNS11643 Plane 2 Chinese traditional: ISO-IR-172."]) | 119 | (if ,iso-symbol |
| 117 | (define-charset 151 'japanese-jisx0213-1 | 120 | (define-charset ,iso-symbol |
| 118 | [2 94 2 0 ?O 0 "JISX0213-1" "JISX0213-1" "JISX0213 Plane 1 (Japanese)"]) | 121 | (if ,iso-ir |
| 119 | (define-charset 152 'chinese-big5-1 | 122 | (format "Right-Hand Part of %s (%s): ISO-IR-%d" |
| 120 | [2 94 2 0 ?0 0 "Big5 (Level-1)" "Big5 (Level-1) A141-C67F" | 123 | ,name ,nickname ,iso-ir) |
| 121 | "Frequently used part (A141-C67F) of Big5 (Chinese traditional)."]) | 124 | (format "Right-Hand Part of %s (%s)" ,name ,nickname)) |
| 122 | (define-charset 153 'chinese-big5-2 | 125 | :short-name (format "RHP of %s" ,name) |
| 123 | [2 94 2 0 ?1 0 "Big5 (Level-2)" "Big5 (Level-2) C940-FEFE" | 126 | :long-name (format "RHP of %s (%s)" ,name ,nickname) |
| 124 | "Less frequently used part (C940-FEFE) of Big5 (Chinese traditional)."]) | 127 | :iso-final-char ,iso-final |
| 125 | 128 | :emacs-mule-id ,emacs-mule-id | |
| 126 | ;; Additional (private) character sets. These character sets are | 129 | :code-space [32 127] |
| 127 | ;; treated less space-efficiently in the buffer. | 130 | :parents (list (cons ,symbol 128)))))) |
| 128 | 131 | ||
| 129 | ;; Syntax: | 132 | (define-iso-single-byte-charset 'iso-8859-2 'latin-iso8859-2 |
| 130 | ;; (define-charset CHARSET-ID CHARSET | 133 | "ISO/IEC 8859/2" "Latin-2" 101 ?B 130 "8859-2") |
| 131 | ;; [ DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE | 134 | |
| 132 | ;; SHORT-NAME LONG-NAME DESCRIPTION ]) | 135 | (define-iso-single-byte-charset 'iso-8859-3 'latin-iso8859-3 |
| 133 | 136 | "ISO/IEC 8859/3" "Latin-3" 109 ?C 131 "8859-3") | |
| 134 | ;; ISO-2022 allows a use of character sets not registered in ISO with | 137 | |
| 135 | ;; final characters `0' (0x30) through `?' (0x3F). Among them, Emacs | 138 | (define-iso-single-byte-charset 'iso-8859-4 'latin-iso8859-4 |
| 136 | ;; reserves `0' through `9' to support several private character sets. | 139 | "ISO/IEC 8859/4" "Latin-4" 110 ?D 132 "8859-4") |
| 137 | ;; The remaining final characters `:' through `?' are for users. | 140 | |
| 138 | 141 | (define-iso-single-byte-charset 'iso-8859-5 'cyrillic-iso8859-5 | |
| 139 | ;; 1-byte 1-column charsets. Valid range of CHARSET-ID is 160..223. | 142 | "ISO/IEC 8859/5" "Latin/Cyrillic" 144 ?L 140 "8859-5") |
| 140 | 143 | ||
| 141 | (define-charset 160 'chinese-sisheng | 144 | (define-iso-single-byte-charset 'iso-8859-6 'arabic-iso8859-6 |
| 142 | [1 94 1 0 ?0 0 "SiSheng" "SiSheng (PinYin/ZhuYin)" | 145 | "ISO/IEC 8859/6" "Latin/Arabic" 127 ?G 135 "8859-6") |
| 143 | "Sisheng characters (vowels with tone marks) for Pinyin/Zhuyin."]) | 146 | |
| 144 | 147 | (define-iso-single-byte-charset 'iso-8859-7 'greek-iso8859-7 | |
| 145 | ;; IPA characters for phonetic symbols. | 148 | "ISO/IEC 8859/7" "Latin/Greek" 126 ?F 134 "8859-7") |
| 146 | (define-charset 161 'ipa | 149 | |
| 147 | [1 96 1 0 ?0 1 "IPA" "IPA" | 150 | (define-iso-single-byte-charset 'iso-8859-8 'hebrew-iso8859-8 |
| 148 | "IPA (International Phonetic Association) characters."]) | 151 | "ISO/IEC 8859/8" "Latin/Hebrew" 138 ?H 136 "8859-8") |
| 149 | 152 | ||
| 150 | ;; Vietnamese VISCII. VISCII is 1-byte character set which contains | 153 | (define-iso-single-byte-charset 'iso-8859-9 'latin-iso8859-9 |
| 151 | ;; more than 96 characters. Since Emacs can't handle it as one | 154 | "ISO/IEC 8859/9" "Latin-5" 148 ?M 141 "8859-9") |
| 152 | ;; character set, it is divided into two: lower case letters and upper | 155 | |
| 153 | ;; case letters. | 156 | (define-iso-single-byte-charset 'iso-8859-13 'latin-iso8859-13 |
| 154 | (define-charset 162 'vietnamese-viscii-lower | 157 | "ISO/IEC 8859/13" "Latin-7" nil nil nil "8859-13") |
| 155 | [1 96 1 0 ?1 1 "VISCII lower" "VISCII lower-case" | 158 | |
| 156 | "Vietnamese VISCII1.1 lower-case characters."]) | 159 | (define-iso-single-byte-charset 'iso-8859-14 'latin-iso8859-14 |
| 157 | (define-charset 163 'vietnamese-viscii-upper | 160 | "ISO/IEC 8859/14" "Latin-8" 199 ?_ 143 "8859-14") |
| 158 | [1 96 1 0 ?2 1 "VISCII upper" "VISCII upper-case" | 161 | |
| 159 | "Vietnamese VISCII1.1 upper-case characters."]) | 162 | (define-iso-single-byte-charset 'iso-8859-15 'latin-iso8859-15 |
| 163 | "ISO/IEC 8859/15" "Latin-9" 203 ?b 142 "8859-15") | ||
| 164 | |||
| 165 | (define-charset 'thai-tis620 | ||
| 166 | "TIS620.2533" | ||
| 167 | :short-name "TIS620.2533" | ||
| 168 | :iso-final-char ?T | ||
| 169 | :emacs-mule-id 133 | ||
| 170 | :code-space [32 127] | ||
| 171 | :code-offset #x0E00) | ||
| 172 | |||
| 173 | (define-charset 'tis620-2533 | ||
| 174 | "TIS620.2533" | ||
| 175 | :short-name "TIS620.2533" | ||
| 176 | :ascii-compatible-p t | ||
| 177 | :code-space [0 255] | ||
| 178 | :parents '(ascii eight-bit-control (thai-tis620 . -128))) | ||
| 179 | |||
| 180 | (define-charset 'jisx0201 | ||
| 181 | "JISX0201" | ||
| 182 | :short-name "JISX0201" | ||
| 183 | :long-name "JISX0201" | ||
| 184 | :code-space [33 254] | ||
| 185 | :map "jisx0201") | ||
| 186 | |||
| 187 | (define-charset 'latin-jisx0201 | ||
| 188 | "Roman Part of JISX0201.1976" | ||
| 189 | :short-name "JISX0201 Roman" | ||
| 190 | :long-name "Japanese Roman (JISX0201.1976)" | ||
| 191 | :iso-final-char ?J | ||
| 192 | :emacs-mule-id 138 | ||
| 193 | :code-space [33 126] | ||
| 194 | :parents '(jisx0201)) | ||
| 195 | |||
| 196 | (define-charset 'katakana-jisx0201 | ||
| 197 | "Katakana Part of JISX0201.1976" | ||
| 198 | :short-name "JISX0201 Katakana" | ||
| 199 | :long-name "Japanese Katakana (JISX0201.1976)" | ||
| 200 | :iso-final-char ?I | ||
| 201 | :emacs-mule-id 137 | ||
| 202 | :code-space [33 126] | ||
| 203 | :parents '((jisx0201 . #x80))) | ||
| 204 | |||
| 205 | (define-charset 'chinese-gb2312 | ||
| 206 | "GB2312 Chinese simplified: ISO-IR-58" | ||
| 207 | :short-name "GB2312" | ||
| 208 | :long-name "GB2312: ISO-IR-58" | ||
| 209 | :iso-final-char ?A | ||
| 210 | :emacs-mule-id 145 | ||
| 211 | :code-space [33 126 33 126] | ||
| 212 | :code-offset #x110000 | ||
| 213 | :unify-map "gb2312-1980") | ||
| 214 | |||
| 215 | (define-charset 'chinese-cns11643-1 | ||
| 216 | "CNS11643 Plane 1 Chinese traditional: ISO-IR-171" | ||
| 217 | :short-name "CNS11643-1" | ||
| 218 | :long-name "CNS11643-1 (Chinese traditional): ISO-IR-171" | ||
| 219 | :iso-final-char ?G | ||
| 220 | :emacs-mule-id 149 | ||
| 221 | :code-space [33 126 33 126] | ||
| 222 | :code-offset #x114000 | ||
| 223 | :unify-map "cns11643-1") | ||
| 224 | |||
| 225 | (define-charset 'chinese-cns11643-2 | ||
| 226 | "CNS11643 Plane 2 Chinese traditional: ISO-IR-172" | ||
| 227 | :short-name "CNS11643-2" | ||
| 228 | :long-name "CNS11643-2 (Chinese traditional): ISO-IR-172" | ||
| 229 | :iso-final-char ?H | ||
| 230 | :emacs-mule-id 150 | ||
| 231 | :code-space [33 126 33 126] | ||
| 232 | :code-offset #x118000 | ||
| 233 | :unify-map "cns11643-2") | ||
| 234 | |||
| 235 | (define-charset 'chinese-cns11643-3 | ||
| 236 | "CNS11643 Plane 3 Chinese Traditional: ISO-IR-183" | ||
| 237 | :short-name "CNS11643-3" | ||
| 238 | :long-name "CNS11643-3 (Chinese traditional): ISO-IR-183" | ||
| 239 | :iso-final-char ?I | ||
| 240 | :code-space [33 126 33 126] | ||
| 241 | :emacs-mule-id 246 | ||
| 242 | :code-offset #x11C000) | ||
| 243 | |||
| 244 | (define-charset 'chinese-cns11643-4 | ||
| 245 | "CNS11643 Plane 4 Chinese Traditional: ISO-IR-184" | ||
| 246 | :short-name "CNS11643-4" | ||
| 247 | :long-name "CNS11643-4 (Chinese traditional): ISO-IR-184" | ||
| 248 | :iso-final-char ?J | ||
| 249 | :emacs-mule-id 247 | ||
| 250 | :code-space [33 126 33 126] | ||
| 251 | :code-offset #x120000) | ||
| 252 | |||
| 253 | (define-charset 'chinese-cns11643-5 | ||
| 254 | "CNS11643 Plane 5 Chinese Traditional: ISO-IR-185" | ||
| 255 | :short-name "CNS11643-5" | ||
| 256 | :long-name "CNS11643-5 (Chinese traditional): ISO-IR-185" | ||
| 257 | :iso-final-char ?K | ||
| 258 | :emacs-mule-id 248 | ||
| 259 | :code-space [33 126 33 126] | ||
| 260 | :code-offset #x124000) | ||
| 261 | |||
| 262 | (define-charset 'chinese-cns11643-6 | ||
| 263 | "CNS11643 Plane 6 Chinese Traditional: ISO-IR-186" | ||
| 264 | :short-name "CNS11643-6" | ||
| 265 | :long-name "CNS11643-6 (Chinese traditional): ISO-IR-186" | ||
| 266 | :iso-final-char ?L | ||
| 267 | :emacs-mule-id 249 | ||
| 268 | :code-space [33 126 33 126] | ||
| 269 | :code-offset #x128000) | ||
| 270 | |||
| 271 | (define-charset 'chinese-cns11643-7 | ||
| 272 | "CNS11643 Plane 7 Chinese Traditional: ISO-IR-187" | ||
| 273 | :short-name "CNS11643-7" | ||
| 274 | :long-name "CNS11643-7 (Chinese traditional): ISO-IR-187" | ||
| 275 | :iso-final-char ?M | ||
| 276 | :emacs-mule-id 250 | ||
| 277 | :code-space [33 126 33 126] | ||
| 278 | :code-offset #x12C000) | ||
| 279 | |||
| 280 | (define-charset 'big5 | ||
| 281 | "Big5 (Chinese traditional)" | ||
| 282 | :short-name "Big5" | ||
| 283 | :long-name "Big5" | ||
| 284 | :code-space [#x40 #xFE #xA1 #xFE] | ||
| 285 | :code-offset #x130000 | ||
| 286 | :unify-map "big5") | ||
| 287 | |||
| 288 | (define-charset 'chinese-big5-1 | ||
| 289 | "Frequentry used part (A141-C67E) of Big5 (Chinese traditional)" | ||
| 290 | :short-name "Big5 (Level-1)" | ||
| 291 | :long-name "Big5 (Level-1) A141-C67F" | ||
| 292 | :iso-final-char ?0 | ||
| 293 | :emacs-mule-id 152 | ||
| 294 | :code-space [#x21 #x7E #x21 #x7E] | ||
| 295 | :code-offset #x134000 | ||
| 296 | :unify-map "big5-1") | ||
| 297 | |||
| 298 | (define-charset 'chinese-big5-2 | ||
| 299 | "Less frequentry used part (C940-FEFE) of Big5 (Chinese traditional)" | ||
| 300 | :short-name "Big5 (Level-2)" | ||
| 301 | :long-name "Big5 (Level-2) C940-FEFE" | ||
| 302 | :iso-final-char ?1 | ||
| 303 | :emacs-mule-id 153 | ||
| 304 | :code-space [#x21 #x7E #x21 #x7E] | ||
| 305 | :code-offset #x138000 | ||
| 306 | :unify-map "big5-2") | ||
| 307 | |||
| 308 | (define-charset 'japanese-jisx0208 | ||
| 309 | "JISX0208.1983/1990 Japanese Kanji: ISO-IR-87" | ||
| 310 | :short-name "JISX0208" | ||
| 311 | :long-name "JISX0208.1983/1990 (Japanese): ISO-IR-87" | ||
| 312 | :iso-final-char ?B | ||
| 313 | :emacs-mule-id 146 | ||
| 314 | :code-space [33 126 33 126] | ||
| 315 | :code-offset #x140000 | ||
| 316 | :unify-map "jisx0208-1990") | ||
| 317 | |||
| 318 | (define-charset 'japanese-jisx0208-1978 | ||
| 319 | "JISX0208.1978 Japanese Kanji (so called \"old JIS\"): ISO-IR-42" | ||
| 320 | :short-name "JISX0208.1978" | ||
| 321 | :long-name "JISX0208.1978 (Japanese): ISO-IR-42" | ||
| 322 | :iso-final-char ?@ | ||
| 323 | :emacs-mule-id 144 | ||
| 324 | :code-space [33 126 33 126] | ||
| 325 | :code-offset #x144000 | ||
| 326 | :unify-map "jisx0208-1978") | ||
| 327 | |||
| 328 | (define-charset 'japanese-jisx0212 | ||
| 329 | "JISX0212 Japanese supplement: ISO-IR-159" | ||
| 330 | :short-name "JISX0212" | ||
| 331 | :long-name "JISX0212 (Japanese): ISO-IR-159" | ||
| 332 | :iso-final-char ?D | ||
| 333 | :emacs-mule-id 148 | ||
| 334 | :code-space [33 126 33 126] | ||
| 335 | :code-offset #x148000 | ||
| 336 | :unify-map "jisx0212-1990") | ||
| 337 | |||
| 338 | (define-charset 'japanese-jisx0213-1 | ||
| 339 | "JISX0213 Plane 1 (Japanese)" | ||
| 340 | :short-name "JISX0213-1" | ||
| 341 | :long-name "JISX0213-1" | ||
| 342 | :iso-final-char ?O | ||
| 343 | :emacs-mule-id 151 | ||
| 344 | :code-space [33 126 33 126] | ||
| 345 | :code-offset #x14C000) | ||
| 346 | |||
| 347 | (define-charset 'japanese-jisx0213-2 | ||
| 348 | "JISX0213 Plane 2 (Japanese)" | ||
| 349 | :short-name "JISX0213-2" | ||
| 350 | :long-name "JISX0213-2" | ||
| 351 | :iso-final-char ?P | ||
| 352 | :emacs-mule-id 254 | ||
| 353 | :code-space [33 126 33 126] | ||
| 354 | :code-offset #x150000) | ||
| 355 | |||
| 356 | (define-charset 'korean-ksc5601 | ||
| 357 | "KSC5601 Korean Hangul and Hanja: ISO-IR-149" | ||
| 358 | :short-name "KSC5601" | ||
| 359 | :long-name "KSC5601 (Korean): ISO-IR-149" | ||
| 360 | :iso-final-char ?C | ||
| 361 | :emacs-mule-id 147 | ||
| 362 | :code-space [33 126 33 126] | ||
| 363 | :map "ksc5601-1987") | ||
| 364 | |||
| 365 | (define-charset 'chinese-sisheng | ||
| 366 | "SiSheng characters for PinYin/ZhuYin" | ||
| 367 | :short-name "SiSheng" | ||
| 368 | :long-name "SiSheng (PinYin/ZhuYin)" | ||
| 369 | :iso-final-char ?0 | ||
| 370 | :emacs-mule-id 160 | ||
| 371 | :code-space [33 126] | ||
| 372 | :code-offset #x200000) | ||
| 373 | |||
| 374 | (define-charset 'ipa | ||
| 375 | "IPA (International Phonetic Association)" | ||
| 376 | :short-name "IPA" | ||
| 377 | :long-name "IPA" | ||
| 378 | :iso-final-char ?0 | ||
| 379 | :emacs-mule-id 161 | ||
| 380 | :code-space [32 127] | ||
| 381 | :code-offset #x200080) | ||
| 382 | |||
| 383 | (define-charset 'viscii | ||
| 384 | "VISCII1.1" | ||
| 385 | :short-name "VISCII" | ||
| 386 | :long-name "VISCII 1.1" | ||
| 387 | :code-space [0 255] | ||
| 388 | :map "viscii") | ||
| 389 | |||
| 390 | (define-charset 'vietnamese-viscii-lower | ||
| 391 | "VISCII1.1 lower-case" | ||
| 392 | :short-name "VISCII lower" | ||
| 393 | :long-name "VISCII lower-case" | ||
| 394 | :iso-final-char ?1 | ||
| 395 | :emacs-mule-id 162 | ||
| 396 | :code-space [32 127] | ||
| 397 | :map "viscii-lower") | ||
| 398 | |||
| 399 | (define-charset 'vietnamese-viscii-upper | ||
| 400 | "VISCII1.1 upper-case" | ||
| 401 | :short-name "VISCII upper" | ||
| 402 | :long-name "VISCII upper-case" | ||
| 403 | :iso-final-char ?2 | ||
| 404 | :emacs-mule-id 163 | ||
| 405 | :code-space [32 127] | ||
| 406 | :map "viscii-upper") | ||
| 407 | |||
| 408 | (define-charset 'vscii | ||
| 409 | "VSCII1.1" | ||
| 410 | :short-name "VSCII" | ||
| 411 | :long-name "VSCII" | ||
| 412 | :code-space [0 255] | ||
| 413 | :map "vscii") | ||
| 414 | |||
| 415 | (define-charset 'koi8-r | ||
| 416 | "KOI8-R" | ||
| 417 | :short-name "KOI8-R" | ||
| 418 | :long-name "KOI8-R" | ||
| 419 | :ascii-compatible-p t | ||
| 420 | :code-space [0 255] | ||
| 421 | :map "koi8-r") | ||
| 422 | |||
| 423 | (define-charset-alias 'koi8 'koi8-r) | ||
| 424 | |||
| 425 | (define-charset 'alternativnyj | ||
| 426 | "ALTERNATIVNYJ" | ||
| 427 | :short-name "alternativnyj" | ||
| 428 | :long-name "alternativnyj" | ||
| 429 | :ascii-compatible-p t | ||
| 430 | :code-space [0 255] | ||
| 431 | :map "ibm866") | ||
| 160 | 432 | ||
| 161 | ;; For Arabic, we need three different types of character sets. | 433 | ;; For Arabic, we need three different types of character sets. |
| 162 | ;; Digits are of direction left-to-right and of width 1-column. | 434 | ;; Digits are of direction left-to-right and of width 1-column. |
| 163 | ;; Others are of direction right-to-left and of width 1-column or | 435 | ;; Others are of direction right-to-left and of width 1-column or |
| 164 | ;; 2-column. | 436 | ;; 2-column. |
| 165 | (define-charset 164 'arabic-digit | 437 | (define-charset 'arabic-digit |
| 166 | [1 94 1 0 ?2 0 "Arabic digit" "Arabic digit" | 438 | "Arabic digit" |
| 167 | "Arabic digits."]) | 439 | :short-name "Arabic digit" |
| 168 | (define-charset 165 'arabic-1-column | 440 | :long-name "Arabic digit" |
| 169 | [1 94 1 1 ?3 0 "Arabic 1-col" "Arabic 1-column" | 441 | :iso-final-char ?2 |
| 170 | "Arabic 1-column width glyphs."]) | 442 | :emacs-mule-id 164 |
| 171 | 443 | :code-space [34 42] | |
| 172 | ;; ASCII with right-to-left direction. | 444 | :code-offset #x0600) |
| 173 | (define-charset 166 'ascii-right-to-left | 445 | |
| 174 | [1 94 1 1 ?B 0 "rev ASCII" "ASCII with right-to-left direction" | 446 | (define-charset 'arabic-1-column |
| 175 | "ASCII (left half of ISO 8859-1) with right-to-left direction."]) | 447 | "Arabic 1-column" |
| 448 | :short-name "Arabic 1-col" | ||
| 449 | :long-name "Arabic 1-column" | ||
| 450 | :iso-final-char ?3 | ||
| 451 | :emacs-mule-id 165 | ||
| 452 | :code-space [33 126] | ||
| 453 | :code-offset #x200100) | ||
| 454 | |||
| 455 | (define-charset 'arabic-2-column | ||
| 456 | "Arabic 2-column" | ||
| 457 | :short-name "Arabic 2-col" | ||
| 458 | :long-name "Arabic 2-column" | ||
| 459 | :iso-final-char ?4 | ||
| 460 | :emacs-mule-id 224 | ||
| 461 | :code-space [33 126] | ||
| 462 | :code-offset #x200180) | ||
| 176 | 463 | ||
| 177 | ;; Lao script. | 464 | ;; Lao script. |
| 178 | ;; ISO10646's 0x0E80..0x0EDF are mapped to 0x20..0x7F. | 465 | ;; Codes 0x21..0x7E are mapped to Unicode U+0E81..U+0EDF. |
| 179 | (define-charset 167 'lao | 466 | (define-charset 'lao |
| 180 | [1 94 1 0 ?1 0 "Lao" "Lao" | 467 | "Lao characters (ISO10646 0E81..0EDF)" |
| 181 | "Lao characters (U+0E80..U+0EDF)."]) | 468 | :short-name "Lao" |
| 469 | :long-name "Lao" | ||
| 470 | :iso-final-char ?1 | ||
| 471 | :emacs-mule-id 167 | ||
| 472 | :code-space [33 126] | ||
| 473 | :code-offset #x0E81) | ||
| 474 | |||
| 475 | (define-charset 'mule-lao | ||
| 476 | "Lao characters (ISO10646 0E81..0EDF)" | ||
| 477 | :short-name "Lao" | ||
| 478 | :long-name "Lao" | ||
| 479 | :code-space [0 255] | ||
| 480 | :parents '(ascii eight-bit-control (lao . -128))) | ||
| 182 | 481 | ||
| 183 | ;; CHARSET-IDs 168..223 are not used. | ||
| 184 | |||
| 185 | ;; 1-byte 2-column charsets. Valid range of CHARSET-ID is 224..239. | ||
| 186 | |||
| 187 | (define-charset 224 'arabic-2-column | ||
| 188 | [1 94 2 1 ?4 0 "Arabic 2-col" "Arabic 2-column" | ||
| 189 | "Arabic 2-column glyphs."]) | ||
| 190 | 482 | ||
| 191 | ;; Indian scripts. Symbolic charset for data exchange. Glyphs are | 483 | ;; Indian scripts. Symbolic charset for data exchange. Glyphs are |
| 192 | ;; not assigned. They are automatically converted to each Indian | 484 | ;; not assigned. They are automatically converted to each Indian |
| 193 | ;; script which IS-13194 supports. | 485 | ;; script which IS-13194 supports. |
| 194 | 486 | ||
| 195 | (define-charset 225 'indian-is13194 | 487 | (define-charset 'indian-is13194 |
| 196 | [1 94 2 0 ?5 1 "IS 13194" "Indian IS 13194" | 488 | "Generic Indian charset for data exchange with IS 13194" |
| 197 | "Generic Indian character set for data exchange with IS 13194."]) | 489 | :short-name "IS 13194" |
| 198 | 490 | :long-name "Indian IS 13194" | |
| 199 | ;; CHARSET-IDs 226..239 are not used. | 491 | :iso-final-char ?5 |
| 200 | 492 | :emacs-mule-id 225 | |
| 201 | (define-charset 240 'indian-glyph | 493 | :code-space [33 126] |
| 202 | [2 96 1 0 ?4 0 "Indian glyph" "Indian glyph" | 494 | :code-offset #x180000) |
| 203 | "Glyphs for Indian characters."]) | 495 | |
| 204 | ;; 240 used to be [2 94 1 0 ?6 0 "Indian 1-col" "Indian 1 Column"] | 496 | (define-charset 'indian-glyph |
| 205 | 497 | "Glyphs for Indian characters." | |
| 206 | ;; 2-byte 1-column charsets. Valid range of CHARSET-ID is 240..244. | 498 | :short-name "Indian glyph" |
| 499 | :long-name "Indian glyph" | ||
| 500 | :iso-final-char ?4 | ||
| 501 | :emacs-mule-id 240 | ||
| 502 | :code-space [32 127 32 127] | ||
| 503 | :code-offset #x180100) | ||
| 207 | 504 | ||
| 208 | ;; Actual Glyph for 1-column width. | 505 | ;; Actual Glyph for 1-column width. |
| 209 | (define-charset 241 'tibetan-1-column | 506 | (define-charset 'indian-1-column |
| 210 | [2 94 1 0 ?8 0 "Tibetan 1-col" "Tibetan 1 column" | 507 | "Indian charset for 1-column width glyphs" |
| 211 | "Tibetan 1-column glyphs."]) | 508 | :short-name "Indian 1-col" |
| 212 | 509 | :long-name "Indian 1 Column" | |
| 213 | ;; Subsets of Unicode. | 510 | :iso-final-char ?6 |
| 214 | 511 | :emacs-mule-id 240 | |
| 215 | (define-charset 242 'mule-unicode-2500-33ff | 512 | :code-space [33 126 33 126] |
| 216 | [2 96 1 0 ?2 0 "Unicode subset 2" "Unicode subset (U+2500..U+33FF)" | 513 | :code-offset #x184000) |
| 217 | "Unicode characters of the range U+2500..U+33FF."]) | ||
| 218 | |||
| 219 | (define-charset 243 'mule-unicode-e000-ffff | ||
| 220 | [2 96 1 0 ?3 0 "Unicode subset 3" "Unicode subset (U+E000+FFFF)" | ||
| 221 | "Unicode characters of the range U+E000..U+FFFF."]) | ||
| 222 | |||
| 223 | (define-charset 244 'mule-unicode-0100-24ff | ||
| 224 | [2 96 1 0 ?1 0 "Unicode subset" "Unicode subset (U+0100..U+24FF)" | ||
| 225 | "Unicode characters of the range U+0100..U+24FF."]) | ||
| 226 | |||
| 227 | ;; 2-byte 2-column charsets. Valid range of CHARSET-ID is 245..254. | ||
| 228 | |||
| 229 | ;; Ethiopic characters (Amahric and Tigrigna). | ||
| 230 | (define-charset 245 'ethiopic | ||
| 231 | [2 94 2 0 ?3 0 "Ethiopic" "Ethiopic characters" | ||
| 232 | "Ethiopic characters."]) | ||
| 233 | |||
| 234 | ;; Chinese CNS11643 Plane3 thru Plane7. Although these are official | ||
| 235 | ;; character sets, the use is rare and don't have to be treated | ||
| 236 | ;; space-efficiently in the buffer. | ||
| 237 | (define-charset 246 'chinese-cns11643-3 | ||
| 238 | [2 94 2 0 ?I 0 "CNS11643-3" "CNS11643-3 (Chinese traditional): ISO-IR-183" | ||
| 239 | "CNS11643 Plane 3 Chinese Traditional: ISO-IR-183."]) | ||
| 240 | (define-charset 247 'chinese-cns11643-4 | ||
| 241 | [2 94 2 0 ?J 0 "CNS11643-4" "CNS11643-4 (Chinese traditional): ISO-IR-184" | ||
| 242 | "CNS11643 Plane 4 Chinese Traditional: ISO-IR-184."]) | ||
| 243 | (define-charset 248 'chinese-cns11643-5 | ||
| 244 | [2 94 2 0 ?K 0 "CNS11643-5" "CNS11643-5 (Chinese traditional): ISO-IR-185" | ||
| 245 | "CNS11643 Plane 5 Chinese Traditional: ISO-IR-185."]) | ||
| 246 | (define-charset 249 'chinese-cns11643-6 | ||
| 247 | [2 94 2 0 ?L 0 "CNS11643-6" "CNS11643-6 (Chinese traditional): ISO-IR-186" | ||
| 248 | "CNS11643 Plane 6 Chinese Traditional: ISO-IR-186."]) | ||
| 249 | (define-charset 250 'chinese-cns11643-7 | ||
| 250 | [2 94 2 0 ?M 0 "CNS11643-7" "CNS11643-7 (Chinese traditional): ISO-IR-187" | ||
| 251 | "CNS11643 Plane 7 Chinese Traditional: ISO-IR-187."]) | ||
| 252 | 514 | ||
| 253 | ;; Actual Glyph for 2-column width. | 515 | ;; Actual Glyph for 2-column width. |
| 254 | (define-charset 251 'indian-2-column | 516 | (define-charset 'indian-2-column |
| 255 | [2 94 2 0 ?5 0 "Indian 2-col" "Indian 2 Column" | 517 | "Indian charset for 2-column width glyphs" |
| 256 | "Indian character set for 2-column width glyphs."]) | 518 | :short-name "Indian 2-col" |
| 257 | ;; old indian-1-column characters will be translated to indian-2-column. | 519 | :long-name "Indian 2 Column" |
| 258 | (declare-equiv-charset 2 94 ?6 'indian-2-column) | 520 | :iso-final-char ?5 |
| 259 | 521 | :emacs-mule-id 251 | |
| 260 | ;; Tibetan script. | 522 | :code-space [33 126 33 126] |
| 261 | (define-charset 252 'tibetan | 523 | :parents '(indian-1-column)) |
| 262 | [2 94 2 0 ?7 0 "Tibetan 2-col" "Tibetan 2 column" | 524 | |
| 263 | "Tibetan 2-column width glyphs."]) | 525 | (define-charset 'tibetan |
| 264 | 526 | "Tibetan characters" | |
| 265 | ;; CHARSET-ID 253 is not used. | 527 | :iso-final-char ?7 |
| 266 | 528 | :short-name "Tibetan 2-col" | |
| 267 | ;; JISX0213 Plane 2 | 529 | :long-name "Tibetan 2 column" |
| 268 | (define-charset 254 'japanese-jisx0213-2 | 530 | :iso-final-char ?7 |
| 269 | [2 94 2 0 ?P 0 "JISX0213-2" "JISX0213-2" | 531 | :emacs-mule-id 252 |
| 270 | "JISX0213 Plane 2 (Japanese)."]) | 532 | :code-space [33 126 33 126] |
| 271 | 533 | :code-offset #x190000) | |
| 272 | ;; Tell C code charset ID's of several charsets. | 534 | |
| 273 | (setup-special-charsets) | 535 | (define-charset 'tibetan-1-column |
| 536 | "Tibetan 1 column glyph" | ||
| 537 | :short-name "Tibetan 1-col" | ||
| 538 | :long-name "Tibetan 1 column" | ||
| 539 | :iso-final-char ?8 | ||
| 540 | :emacs-mule-id 241 | ||
| 541 | :code-space [33 126 33 37] | ||
| 542 | :parents '(tibetan)) | ||
| 274 | 543 | ||
| 544 | ;; Subsets of Unicode. | ||
| 545 | (define-charset 'mule-unicode-2500-33ff | ||
| 546 | "Unicode characters of the range U+2500..U+33FF." | ||
| 547 | :short-name "Unicode subset 2" | ||
| 548 | :long-name "Unicode subset (U+2500..U+33FF)" | ||
| 549 | :iso-final-char ?2 | ||
| 550 | :emacs-mule-id 242 | ||
| 551 | :code-space [#x20 #x7f #x20 #x47] | ||
| 552 | :code-offset #x2500) | ||
| 553 | |||
| 554 | (define-charset 'mule-unicode-e000-ffff | ||
| 555 | "Unicode characters of the range U+E000..U+FFFF." | ||
| 556 | :short-name "Unicode subset 3" | ||
| 557 | :long-name "Unicode subset (U+E000+FFFF)" | ||
| 558 | :iso-final-char ?3 | ||
| 559 | :emacs-mule-id 243 | ||
| 560 | :code-space [#x20 #x7F #x20 #x75] | ||
| 561 | :code-offset #xE000) | ||
| 562 | |||
| 563 | (define-charset 'mule-unicode-0100-24ff | ||
| 564 | "Unicode characters of the range U+0100..U+24FF." | ||
| 565 | :short-name "Unicode subset" | ||
| 566 | :long-name "Unicode subset (U+0100..U+24FF)" | ||
| 567 | :iso-final-char ?1 | ||
| 568 | :emacs-mule-id 244 | ||
| 569 | :code-space [#x20 #x7F #x20 #x7F] | ||
| 570 | :code-offset #x100) | ||
| 571 | |||
| 572 | (define-charset 'ethiopic | ||
| 573 | "Ethiopic characters for Amahric and Tigrigna." | ||
| 574 | :short-name "Ethiopic" | ||
| 575 | :long-name "Ethiopic characters" | ||
| 576 | :iso-final-char ?3 | ||
| 577 | :emacs-mule-id 245 | ||
| 578 | :code-space [33 126 33 126] | ||
| 579 | :code-offset #x1A0000) | ||
| 580 | |||
| 581 | (define-charset 'mac-roman | ||
| 582 | "Mac Roman charset" | ||
| 583 | :short-name "Mac Roman" | ||
| 584 | :long-name "Mac Roman" | ||
| 585 | :ascii-compatible-p t | ||
| 586 | :code-space [0 255] | ||
| 587 | :map "mac-roman") | ||
| 588 | |||
| 589 | (unify-charset 'chinese-gb2312) | ||
| 590 | (unify-charset 'chinese-cns11643-1) | ||
| 591 | (unify-charset 'chinese-cns11643-2) | ||
| 592 | (unify-charset 'big5) | ||
| 593 | (unify-charset 'chinese-big5-1) | ||
| 594 | (unify-charset 'chinese-big5-2) | ||
| 275 | 595 | ||
| 276 | ;; These are tables for translating characters on decoding and | 596 | ;; These are tables for translating characters on decoding and |
| 277 | ;; encoding. | 597 | ;; encoding. |
| 278 | (define-translation-table | 598 | (setq standard-translation-table-for-decode nil) |
| 279 | 'oldjis-newjis-jisroman-ascii | ||
| 280 | (list (cons (make-char 'japanese-jisx0208-1978) | ||
| 281 | (make-char 'japanese-jisx0208)) | ||
| 282 | (cons (make-char 'latin-jisx0201) (make-char 'ascii)))) | ||
| 283 | (aset (get 'oldjis-newjis-jisroman-ascii 'translation-table) | ||
| 284 | (make-char 'latin-jisx0201 92) (make-char 'latin-jisx0201 92)) | ||
| 285 | (aset (get 'oldjis-newjis-jisroman-ascii 'translation-table) | ||
| 286 | (make-char 'latin-jisx0201 126) (make-char 'latin-jisx0201 126)) | ||
| 287 | |||
| 288 | (setq standard-translation-table-for-decode | ||
| 289 | (get 'oldjis-newjis-jisroman-ascii 'translation-table)) | ||
| 290 | 599 | ||
| 291 | (setq standard-translation-table-for-encode nil) | 600 | (setq standard-translation-table-for-encode nil) |
| 292 | 601 | ||
| @@ -296,130 +605,173 @@ | |||
| 296 | 605 | ||
| 297 | ;;; Make fundamental coding systems. | 606 | ;;; Make fundamental coding systems. |
| 298 | 607 | ||
| 299 | ;; Miscellaneous coding systems which can't be made by | 608 | ;; The coding system `no-conversion' is already defined in coding.c as |
| 300 | ;; `make-coding-system'. | 609 | ;; below: |
| 610 | ;; | ||
| 611 | ;; (define-coding-system 'no-conversion | ||
| 612 | ;; "Do no conversion." | ||
| 613 | ;; :coding-type 'raw-text | ||
| 614 | ;; :mnemonic ?=) | ||
| 301 | 615 | ||
| 302 | (put 'no-conversion 'coding-system | 616 | (define-coding-system 'raw-text |
| 303 | (vector nil ?= "Do no conversion. | 617 | "Raw text, which means text contains random 8-bit codes. |
| 618 | Encoding text with this coding system produces the actual byte | ||
| 619 | sequence of the text in buffers and strings. An exception is made for | ||
| 620 | eight-bit-control characters. Each of them is encoded into a single | ||
| 621 | byte. | ||
| 304 | 622 | ||
| 305 | When you visit a file with this coding, the file is read into a | 623 | When you visit a file with this coding, the file is read into a |
| 306 | unibyte buffer as is, thus each byte of a file is treated as a | 624 | unibyte buffer as is (except for EOL format), thus each byte of a file |
| 307 | character." | 625 | is treated as a character." |
| 308 | (list 'coding-category 'coding-category-binary | 626 | :coding-type 'raw-text |
| 309 | 'alias-coding-systems '(no-conversion)) | 627 | :mnemonic ?t) |
| 310 | nil)) | 628 | |
| 311 | (put 'no-conversion 'eol-type 0) | 629 | (define-coding-system 'undecided |
| 312 | (put 'coding-category-binary 'coding-systems '(no-conversion)) | 630 | "No conversion on encoding, automatic conversion on decoding" |
| 313 | (setq coding-system-list '(no-conversion)) | 631 | :coding-type 'undecided |
| 314 | (setq coding-system-alist '(("no-conversion"))) | 632 | :mnemonic ?- |
| 315 | (register-char-codings 'no-conversion t) | 633 | :charset-list '(ascii)) |
| 316 | |||
| 317 | (define-coding-system-alias 'binary 'no-conversion) | ||
| 318 | |||
| 319 | (put 'undecided 'coding-system | ||
| 320 | (vector t ?- "No conversion on encoding, automatic conversion on decoding" | ||
| 321 | (list 'alias-coding-systems '(undecided) | ||
| 322 | 'safe-charsets '(ascii)) | ||
| 323 | nil)) | ||
| 324 | (setq coding-system-list (cons 'undecided coding-system-list)) | ||
| 325 | (setq coding-system-alist (cons '("undecided") coding-system-alist)) | ||
| 326 | (put 'undecided 'eol-type | ||
| 327 | (make-subsidiary-coding-system 'undecided)) | ||
| 328 | 634 | ||
| 329 | (define-coding-system-alias 'unix 'undecided-unix) | 635 | (define-coding-system-alias 'unix 'undecided-unix) |
| 330 | (define-coding-system-alias 'dos 'undecided-dos) | 636 | (define-coding-system-alias 'dos 'undecided-dos) |
| 331 | (define-coding-system-alias 'mac 'undecided-mac) | 637 | (define-coding-system-alias 'mac 'undecided-mac) |
| 332 | 638 | ||
| 333 | ;; Coding systems not specific to each language environment. | 639 | (define-coding-system 'iso-latin-1 |
| 334 | 640 | "ISO 2022 based 8-bit encoding for Latin-1 (MIME:ISO-8859-1)." | |
| 335 | (make-coding-system | 641 | :coding-type 'iso-2022 |
| 336 | 'emacs-mule 0 ?= | 642 | :mnemonic ?1 |
| 337 | "Emacs internal format used in buffer and string. | 643 | :charset-list '(ascii latin-iso8859-1) |
| 644 | :designation [ascii latin-iso8859-1 nil nil] | ||
| 645 | :mime-charset 'iso-8859-1) | ||
| 338 | 646 | ||
| 339 | Encoding text with this coding system produces the actual byte | 647 | (define-coding-system-alias 'iso-8859-1 'iso-latin-1) |
| 340 | sequence of the text in buffers and strings. An exception is made for | 648 | (define-coding-system-alias 'latin-1 'iso-latin-1) |
| 341 | eight-bit-control characters. Each of them is encoded into a single | ||
| 342 | byte." | ||
| 343 | nil | ||
| 344 | '((safe-charsets . t))) | ||
| 345 | 649 | ||
| 346 | (make-coding-system | 650 | ;; Coding systems not specific to each language environment. |
| 347 | 'raw-text 5 ?t | ||
| 348 | "Raw text, which means text contains random 8-bit codes. | ||
| 349 | Encoding text with this coding system produces the actual byte | ||
| 350 | sequence of the text in buffers and strings. An exception is made for | ||
| 351 | eight-bit-control characters. Each of them is encoded into a single | ||
| 352 | byte. | ||
| 353 | 651 | ||
| 354 | When you visit a file with this coding, the file is read into a | 652 | (define-coding-system 'emacs-mule |
| 355 | unibyte buffer as is (except for EOL format), thus each byte of a file | 653 | "Emacs 21 internal format used in buffer and string." |
| 356 | is treated as a character." | 654 | :coding-type 'emacs-mule |
| 357 | nil | 655 | :mnemonic ?M) |
| 358 | '((safe-charsets . t))) | 656 | |
| 359 | 657 | (define-coding-system 'utf-8 | |
| 360 | (make-coding-system | 658 | "UTF-8." |
| 361 | 'iso-2022-7bit 2 ?J | 659 | :coding-type 'utf-8 |
| 362 | "ISO 2022 based 7-bit encoding using only G0" | 660 | :mnemonic ?U |
| 363 | '((ascii t) nil nil nil | 661 | :charset-list '(unicode)) |
| 364 | short ascii-eol ascii-cntl seven) | 662 | |
| 365 | '((safe-charsets . t) | 663 | (define-coding-system-alias 'mule-utf-8 'utf-8) |
| 366 | (composition . t))) | 664 | |
| 367 | 665 | (define-coding-system 'utf-8-emacs | |
| 368 | (make-coding-system | 666 | "UTF-8 will full support for Emacs characters." |
| 369 | 'iso-2022-7bit-ss2 2 ?$ | 667 | :coding-type 'utf-8 |
| 370 | "ISO 2022 based 7-bit encoding using SS2 for 96-charset" | 668 | :mnemonic ?U |
| 371 | '((ascii t) nil t nil | 669 | :charset-list '(emacs)) |
| 372 | short ascii-eol ascii-cntl seven nil single-shift) | 670 | |
| 373 | '((safe-charsets . t) | 671 | (define-coding-system 'utf-16 |
| 374 | (composition . t))) | 672 | "UTF-16" |
| 375 | 673 | :coding-type 'utf-16 | |
| 376 | (make-coding-system | 674 | :mnemonic ?U |
| 377 | 'iso-2022-7bit-lock 2 ?& | 675 | :charset-list '(unicode)) |
| 378 | "ISO-2022 coding system using Locking-Shift for 96-charset" | 676 | |
| 379 | '((ascii t) t nil nil | 677 | (define-coding-system 'utf-16-le-nosig |
| 380 | nil ascii-eol ascii-cntl seven locking-shift) | 678 | "UTF-16, little endian, no signature" |
| 381 | '((safe-charsets . t) | 679 | :coding-type 'utf-16 |
| 382 | (composition . t))) | 680 | :mnemonic ?U |
| 681 | :charset-list '(unicode) | ||
| 682 | :endian 'little) | ||
| 683 | |||
| 684 | (define-coding-system 'utf-16-be-nosig | ||
| 685 | "UTF-16, big endian, no signature" | ||
| 686 | :coding-type 'utf-16 | ||
| 687 | :mnemonic ?U | ||
| 688 | :charset-list '(unicode) | ||
| 689 | :endian 'big) | ||
| 690 | |||
| 691 | (define-coding-system 'utf-16-le | ||
| 692 | "UTF-16, little endian, with signature" | ||
| 693 | :coding-type 'utf-16 | ||
| 694 | :mnemonic ?U | ||
| 695 | :charset-list '(unicode) | ||
| 696 | :signature t | ||
| 697 | :endian 'little) | ||
| 698 | |||
| 699 | (define-coding-system 'utf-16-be | ||
| 700 | "UTF-16, big endian, with signature" | ||
| 701 | :coding-type 'utf-16 | ||
| 702 | :mnemonic ?U | ||
| 703 | :charset-list '(unicode) | ||
| 704 | :signature t | ||
| 705 | :endian 'big) | ||
| 706 | |||
| 707 | (define-coding-system 'iso-2022-7bit | ||
| 708 | "ISO 2022 based 7-bit encoding using only G0" | ||
| 709 | :coding-type 'iso-2022 | ||
| 710 | :mnemonic ?J | ||
| 711 | :charset-list 'iso-2022 | ||
| 712 | :designation [(ascii t) nil nil nil] | ||
| 713 | :flags '(short ascii-at-eol ascii-at-cntl 7-bit designation composition)) | ||
| 714 | |||
| 715 | (define-coding-system 'iso-2022-7bit-ss2 | ||
| 716 | "ISO 2022 based 7-bit encoding using SS2 for 96-charset" | ||
| 717 | :coding-type 'iso-2022 | ||
| 718 | :mnemonic ?$ | ||
| 719 | :charset-list 'iso-2022 | ||
| 720 | :designation [(ascii 94) nil (nil 96) nil] | ||
| 721 | :flags '(short ascii-at-eol ascii-at-cntl 7-bit | ||
| 722 | designation single-shift composition)) | ||
| 723 | |||
| 724 | (define-coding-system 'iso-2022-7bit-lock | ||
| 725 | "ISO-2022 coding system using Locking-Shift for 96-charset" | ||
| 726 | :coding-type 'iso-2022 | ||
| 727 | :mnemonic ?& | ||
| 728 | :charset-list 'iso-2022 | ||
| 729 | :designation [(ascii 94) (nil 96) nil nil] | ||
| 730 | :flags '(ascii-at-eol ascii-at-cntl 7-bit | ||
| 731 | designation locking-shift composition)) | ||
| 383 | 732 | ||
| 384 | (define-coding-system-alias 'iso-2022-int-1 'iso-2022-7bit-lock) | 733 | (define-coding-system-alias 'iso-2022-int-1 'iso-2022-7bit-lock) |
| 385 | 734 | ||
| 386 | (make-coding-system | 735 | (define-coding-system 'iso-2022-7bit-lock-ss2 |
| 387 | 'iso-2022-7bit-lock-ss2 2 ?i | 736 | "Mixture of ISO-2022-JP, ISO-2022-KR, and ISO-2022-CN" |
| 388 | "Mixture of ISO-2022-JP, ISO-2022-KR, and ISO-2022-CN" | 737 | :coding-type 'iso-2022 |
| 389 | '((ascii t) | 738 | :mnemonic ?i |
| 390 | (nil korean-ksc5601 chinese-gb2312 chinese-cns11643-1 t) | 739 | :charset-list '(ascii |
| 391 | (nil chinese-cns11643-2) | 740 | japanese-jisx0208 japanese-jisx0208-1978 latin-jisx0201 |
| 392 | (nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5 | 741 | korean-ksc5601 |
| 393 | chinese-cns11643-6 chinese-cns11643-7) | 742 | chinese-gb2312 |
| 394 | short ascii-eol ascii-cntl seven locking-shift single-shift nil nil nil | 743 | chinese-cns11643-1 chinese-cns11643-2 chinese-cns11643-3 |
| 395 | init-bol) | 744 | chinese-cns11643-4 chinese-cns11643-5 chinese-cns11643-6 |
| 396 | '((safe-charsets ascii japanese-jisx0208 japanese-jisx0208-1978 latin-jisx0201 | 745 | chinese-cns11643-7) |
| 397 | korean-ksc5601 chinese-gb2312 chinese-cns11643-1 | 746 | :designation [(ascii 94) |
| 398 | chinese-cns11643-2 chinese-cns11643-3 chinese-cns11643-4 | 747 | (nil korean-ksc5601 chinese-gb2312 chinese-cns11643-1 96) |
| 399 | chinese-cns11643-5 chinese-cns11643-6 chinese-cns11643-7) | 748 | (nil chinese-cns11643-2) |
| 400 | (composition . t))) | 749 | (nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5 |
| 750 | chinese-cns11643-6 chinese-cns11643-7)] | ||
| 751 | :flags '(short ascii-at-eol ascii-at-cntl 7-bit locking-shift | ||
| 752 | single-shift init-bol)) | ||
| 401 | 753 | ||
| 402 | (define-coding-system-alias 'iso-2022-cjk 'iso-2022-7bit-lock-ss2) | 754 | (define-coding-system-alias 'iso-2022-cjk 'iso-2022-7bit-lock-ss2) |
| 403 | 755 | ||
| 404 | (make-coding-system | 756 | (define-coding-system 'iso-2022-8bit-ss2 |
| 405 | 'iso-2022-8bit-ss2 2 ?@ | 757 | "ISO 2022 based 8-bit encoding using SS2 for 96-charset" |
| 406 | "ISO 2022 based 8-bit encoding using SS2 for 96-charset" | 758 | :coding-type 'iso-2022 |
| 407 | '((ascii t) nil t nil | 759 | :mnemonic ?@ |
| 408 | nil ascii-eol ascii-cntl nil nil single-shift) | 760 | :charset-list 'iso-2022 |
| 409 | '((safe-charsets . t) | 761 | :designation [(ascii 94) nil (nil 96) nil] |
| 410 | (composition . t))) | 762 | :flags '(ascii-at-eol ascii-at-cntl designation single-shift composition)) |
| 411 | 763 | ||
| 412 | (make-coding-system | 764 | (define-coding-system 'compound-text |
| 413 | 'compound-text 2 ?x | 765 | "Compound text based generic encoding for decoding unknown messages. |
| 414 | "Compound text based generic encoding for decoding unknown messages. | ||
| 415 | 766 | ||
| 416 | This coding system does not support ICCCM Extended Segments." | 767 | This coding system does not support ICCCM Extended Segments." |
| 417 | '((ascii t) (latin-iso8859-1 katakana-jisx0201 t) t t | 768 | :coding-type 'iso-2022 |
| 418 | nil ascii-eol ascii-cntl nil locking-shift single-shift nil nil nil | 769 | :mnemonic ?x |
| 419 | init-bol nil nil) | 770 | :charset-list 'iso-2022 |
| 420 | '((safe-charsets . t) | 771 | :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil] |
| 421 | (mime-charset . x-ctext) | 772 | :flags '(ascii-at-eol ascii-at-cntl |
| 422 | (composition . t))) | 773 | designation locking-shift single-shift composition) |
| 774 | :mime-charset 'x-ctext) | ||
| 423 | 775 | ||
| 424 | (define-coding-system-alias 'x-ctext 'compound-text) | 776 | (define-coding-system-alias 'x-ctext 'compound-text) |
| 425 | (define-coding-system-alias 'ctext 'compound-text) | 777 | (define-coding-system-alias 'ctext 'compound-text) |
| @@ -429,44 +781,48 @@ This coding system does not support ICCCM Extended Segments." | |||
| 429 | ;; compound-text-with-extensions, see mule.el. Note that this should | 781 | ;; compound-text-with-extensions, see mule.el. Note that this should |
| 430 | ;; not have a mime-charset property, to prevent it from showing up | 782 | ;; not have a mime-charset property, to prevent it from showing up |
| 431 | ;; close to the beginning of coding systems ordered by priority. | 783 | ;; close to the beginning of coding systems ordered by priority. |
| 432 | (make-coding-system | 784 | (define-coding-system 'ctext-no-compositions 2 ?x |
| 433 | 'ctext-no-compositions 2 ?x | ||
| 434 | "Compound text based generic encoding for decoding unknown messages. | 785 | "Compound text based generic encoding for decoding unknown messages. |
| 435 | 786 | ||
| 436 | Like `compound-text', but does not produce escape sequences for compositions." | 787 | Like `compound-text', but does not produce escape sequences for compositions." |
| 437 | '((ascii t) (latin-iso8859-1 katakana-jisx0201 t) t t | 788 | :coding-type 'iso-2022 |
| 438 | nil ascii-eol ascii-cntl nil locking-shift single-shift nil nil nil | 789 | :mnemonic ?x |
| 439 | init-bol nil nil) | 790 | :charset-list 'iso-2022 |
| 440 | '((safe-charsets . t))) | 791 | :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil] |
| 441 | 792 | :flags '(ascii-at-eol ascii-at-cntl | |
| 442 | (make-coding-system | 793 | designation locking-shift single-shift)) |
| 443 | 'compound-text-with-extensions 5 ?x | 794 | |
| 795 | (define-coding-system 'compound-text-with-extensions | ||
| 444 | "Compound text encoding with ICCCM Extended Segment extensions. | 796 | "Compound text encoding with ICCCM Extended Segment extensions. |
| 445 | 797 | ||
| 446 | This coding system should be used only for X selections. It is inappropriate | 798 | This coding system should be used only for X selections. It is inappropriate |
| 447 | for decoding and encoding files, process I/O, etc." | 799 | for decoding and encoding files, process I/O, etc." |
| 448 | nil | 800 | :coding-type 'raw-text |
| 449 | '((post-read-conversion . ctext-post-read-conversion) | 801 | :mnemonic ?x |
| 450 | (pre-write-conversion . ctext-pre-write-conversion))) | 802 | :post-read-conversion 'ctext-post-read-conversion |
| 803 | :pre-write-conversion 'ctext-pre-write-conversion) | ||
| 451 | 804 | ||
| 452 | (define-coding-system-alias | 805 | (define-coding-system-alias |
| 453 | 'x-ctext-with-extensions 'compound-text-with-extensions) | 806 | 'x-ctext-with-extensions 'compound-text-with-extensions) |
| 454 | (define-coding-system-alias | 807 | (define-coding-system-alias |
| 455 | 'ctext-with-extensions 'compound-text-with-extensions) | 808 | 'ctext-with-extensions 'compound-text-with-extensions) |
| 456 | 809 | ||
| 457 | (make-coding-system | 810 | (define-coding-system 'us-ascii |
| 458 | 'iso-safe 2 ?- | 811 | "Convert all characters but ASCII to `?'." |
| 459 | "Convert all characters but ASCII to `?'." | 812 | :coding-type 'charset |
| 460 | '(ascii nil nil nil | 813 | :mnemonic ?- |
| 461 | nil ascii-eol ascii-cntl nil nil nil nil nil nil nil nil t) | 814 | :charset-list '(ascii) |
| 462 | '((safe-charsets ascii))) | 815 | :default-char ?? |
| 816 | :mime-charset 'us-ascii) | ||
| 817 | |||
| 818 | (define-coding-system-alias 'iso-safe 'us-ascii) | ||
| 463 | 819 | ||
| 464 | ;; Use iso-safe for terminal output if some other coding system is not | 820 | ;; Use us-ascii for terminal output if some other coding system is not |
| 465 | ;; specified explicitly. | 821 | ;; specified explicitly. |
| 466 | (set-safe-terminal-coding-system-internal 'iso-safe) | 822 | (set-safe-terminal-coding-system-internal 'us-ascii) |
| 467 | 823 | ||
| 468 | ;; The other coding-systems are defined in each language specific | 824 | ;; The other coding-systems are defined in each language specific |
| 469 | ;; section of languages.el. | 825 | ;; files under lisp/language. |
| 470 | 826 | ||
| 471 | ;; Normally, set coding system to `undecided' before reading a file. | 827 | ;; Normally, set coding system to `undecided' before reading a file. |
| 472 | ;; Compiled Emacs Lisp files (*.elc) are not decoded at all, | 828 | ;; Compiled Emacs Lisp files (*.elc) are not decoded at all, |
| @@ -493,45 +849,20 @@ for decoding and encoding files, process I/O, etc." | |||
| 493 | ;; values are set by the command `set-language-environment' for each | 849 | ;; values are set by the command `set-language-environment' for each |
| 494 | ;; language environment. | 850 | ;; language environment. |
| 495 | 851 | ||
| 496 | (setq coding-category-emacs-mule 'emacs-mule | 852 | (set-coding-system-priority |
| 497 | coding-category-sjis 'japanese-shift-jis | 853 | 'iso-latin-1 |
| 498 | coding-category-iso-7 'iso-2022-7bit | 854 | 'utf-8 |
| 499 | coding-category-iso-7-tight 'iso-2022-jp | 855 | 'iso-2022-7bit |
| 500 | coding-category-iso-8-1 'iso-latin-1 | 856 | ) |
| 501 | coding-category-iso-8-2 'iso-latin-1 | ||
| 502 | coding-category-iso-7-else 'iso-2022-7bit-lock | ||
| 503 | coding-category-iso-8-else 'iso-2022-8bit-ss2 | ||
| 504 | coding-category-ccl nil | ||
| 505 | coding-category-utf-8 'mule-utf-8 | ||
| 506 | coding-category-utf-16-be nil | ||
| 507 | coding-category-utf-16-le nil | ||
| 508 | coding-category-big5 'chinese-big5 | ||
| 509 | coding-category-raw-text 'raw-text | ||
| 510 | coding-category-binary 'no-conversion) | ||
| 511 | |||
| 512 | (set-coding-priority | ||
| 513 | '(coding-category-iso-8-1 | ||
| 514 | coding-category-iso-8-2 | ||
| 515 | coding-category-iso-7-tight | ||
| 516 | coding-category-iso-7 | ||
| 517 | coding-category-iso-7-else | ||
| 518 | coding-category-iso-8-else | ||
| 519 | coding-category-emacs-mule | ||
| 520 | coding-category-raw-text | ||
| 521 | coding-category-sjis | ||
| 522 | coding-category-big5 | ||
| 523 | coding-category-ccl | ||
| 524 | coding-category-binary | ||
| 525 | coding-category-utf-8 | ||
| 526 | coding-category-utf-16-be | ||
| 527 | coding-category-utf-16-le)) | ||
| 528 | 857 | ||
| 529 | 858 | ||
| 530 | ;;; Miscellaneous settings. | 859 | ;;; Miscellaneous settings. |
| 531 | (aset latin-extra-code-table ?\222 t) | ||
| 532 | (aset latin-extra-code-table ?\223 t) | ||
| 533 | (aset latin-extra-code-table ?\224 t) | ||
| 534 | 860 | ||
| 535 | (update-coding-systems-internal) | 861 | ;; Make all multibyte characters self-insert. |
| 862 | (set-char-table-range (nth 1 global-map) | ||
| 863 | (cons (make-char 'unicode 128) (max-char)) | ||
| 864 | 'self-insert-command) | ||
| 865 | |||
| 866 | (aset latin-extra-code-table ?\222 t) | ||
| 536 | 867 | ||
| 537 | ;;; mule-conf.el ends here | 868 | ;;; mule-conf.el ends here |