diff options
| author | Kenichi Handa | 2002-09-30 06:35:13 +0000 |
|---|---|---|
| committer | Kenichi Handa | 2002-09-30 06:35:13 +0000 |
| commit | ad88f5c5c78eb5b1794166433ec8fe39e99eeb1b (patch) | |
| tree | 9e1bc949d2ee8bf28817ab39af3f30117b7bf9d9 | |
| parent | d98276be20e3821dd3d410950258ac814b0d28af (diff) | |
| download | emacs-ad88f5c5c78eb5b1794166433ec8fe39e99eeb1b.tar.gz emacs-ad88f5c5c78eb5b1794166433ec8fe39e99eeb1b.zip | |
(ucs-mule-to-mule-unicode): Don't define
this translation-table name here.
(utf-translation-table-for-encode): New translation-table name.
(utf-fragmentation-table): Renamed from utf-8-fragmentation-table.
(utf-defragmentation-table): New variable.
(ucs-mule-cjk-to-unicode): Renamed from utf-8-subst-rev-table.
(utf-subst-table-for-encode): New translation-table name.
(ucs-unicode-to-mule-cjk): Renamed from utf-8-subst-table.
(utf-subst-table-for-decode): New translation-table name.
(utf-fragment-on-decoding): Renamed from
utf-8-fragment-on-decoding. Correctly handle the case that
unify-8859-on-encoding-mode is off. Handle mule-utf-16-le and
mule-utf-16-be too.
(utf-translate-cjk): Renamed from utf-8-translate-cjk. Handle
mule-utf-16-le and mule-utf-16-be too.
(ccl-decode-mule-utf-8): Refer to utf-translation-table-for-decode
and utf-subst-table-for-decode.
(ccl-encode-mule-utf-8): Refer to utf-translation-table-for-encode
and utf-subst-table-for-encode.
(mule-utf-8): Fix `safe-charsets' property, put `dependency'
property.
| -rw-r--r-- | lisp/international/utf-8.el | 260 |
1 files changed, 149 insertions, 111 deletions
diff --git a/lisp/international/utf-8.el b/lisp/international/utf-8.el index e201c025ade..fcc35243231 100644 --- a/lisp/international/utf-8.el +++ b/lisp/international/utf-8.el | |||
| @@ -46,12 +46,13 @@ | |||
| 46 | ;; Fixme: note that reading and writing invalid utf-8 may not be | 46 | ;; Fixme: note that reading and writing invalid utf-8 may not be |
| 47 | ;; idempotent -- to represent the bytes to fix that needs a new charset. | 47 | ;; idempotent -- to represent the bytes to fix that needs a new charset. |
| 48 | ;; | 48 | ;; |
| 49 | ;; Characters from other character sets can be encoded with | 49 | ;; Characters from other character sets can be encoded with mule-utf-8 |
| 50 | ;; mule-utf-8 by populating the table `ucs-mule-to-mule-unicode' and | 50 | ;; by populating the translation-table |
| 51 | ;; registering the translation with `register-char-codings'. Hash | 51 | ;; `utf-translation-table-for-encode' and registering the translation |
| 52 | ;; tables `utf-8-subst-table' and `utf-8-subst-rev-table' are used to | 52 | ;; with `register-char-codings'. Hash tables |
| 53 | ;; support encoding and decoding of about a quarter of the CJK space | 53 | ;; `utf-subst-table-for-decode' and `utf-subst-table-for-encode' are |
| 54 | ;; between U+3400 and U+DFFF. | 54 | ;; used to support encoding and decoding of about a quarter of the CJK |
| 55 | ;; space between U+3400 and U+DFFF. | ||
| 55 | 56 | ||
| 56 | ;; UTF-8 is defined in RFC 2279. A sketch of the encoding is: | 57 | ;; UTF-8 is defined in RFC 2279. A sketch of the encoding is: |
| 57 | 58 | ||
| @@ -64,34 +65,58 @@ | |||
| 64 | 65 | ||
| 65 | ;;; Code: | 66 | ;;; Code: |
| 66 | 67 | ||
| 67 | (defvar ucs-mule-to-mule-unicode (make-translation-table) | 68 | (defvar ucs-mule-to-mule-unicode (make-char-table 'translation-table nil) |
| 68 | "Translation table for encoding to `mule-utf-8'.") | 69 | "Char table mapping characters to latin-iso8859-1 or mule-unicode-*. |
| 69 | (define-translation-table 'ucs-mule-to-mule-unicode | ||
| 70 | ucs-mule-to-mule-unicode) | ||
| 71 | 70 | ||
| 72 | (defvar utf-8-subst-table (make-hash-table :test 'eq)) | 71 | If `unify-8859-on-encoding-mode' is non-nil, this table populates the |
| 73 | (defvar utf-8-subst-rev-table (make-hash-table :test 'eq)) | 72 | translation-table named `utf-translation-table-for-encode'.") |
| 74 | (define-translation-hash-table 'utf-8-subst-table utf-8-subst-table) | 73 | |
| 75 | (define-translation-hash-table 'utf-8-subst-rev-table utf-8-subst-rev-table) | 74 | (define-translation-table 'utf-translation-table-for-encode) |
| 76 | 75 | ||
| 77 | (defvar utf-8-translation-table-for-decode (make-translation-table) | ||
| 78 | "Translation table applied after decoding utf-8 to mule-unicode. | ||
| 79 | This is only actually applied to characters which would normally be | ||
| 80 | decoded into mule-unicode-0100-24ff.") | ||
| 81 | (define-translation-table 'utf-8-translation-table-for-decode | ||
| 82 | utf-8-translation-table-for-decode) | ||
| 83 | 76 | ||
| 84 | ;; Map Cyrillic and Greek to iso-8859 charsets, which take half the | 77 | ;; Map Cyrillic and Greek to iso-8859 charsets, which take half the |
| 85 | ;; space of mule-unicode. For Latin scripts this isn't very | 78 | ;; space of mule-unicode. For Latin scripts this isn't very |
| 86 | ;; important. Hebrew and Arabic might go here too when there's proper | 79 | ;; important. Hebrew and Arabic might go here too when there's proper |
| 87 | ;; support for them. | 80 | ;; support for them. |
| 88 | (defvar utf-8-fragmentation-table (make-translation-table) | 81 | |
| 89 | "Char table normally mapping non-Latin mule-unicode-... characters to iso8859. | 82 | (defvar utf-fragmentation-table (make-char-table 'translation-table nil) |
| 90 | Used as the value of `utf-8-translation-table-for-decode' in | 83 | "Char-table normally mapping non-Latin mule-unicode-* chars to iso-8859-*. |
| 91 | `utf-8-fragment-on-decoding' mode.") | 84 | |
| 85 | If `utf-fragment-on-decoding' is non-nil, this table populates the | ||
| 86 | translation-table named `utf-translation-table-for-decode'") | ||
| 87 | |||
| 88 | (defvar utf-defragmentation-table (make-char-table 'translation-table nil) | ||
| 89 | "Char-table for reverse mapping of `utf-fragmentation-table'. | ||
| 90 | |||
| 91 | If `utf-fragment-on-decoding' is non-nil and | ||
| 92 | `unify-8859-on-encoding-mode' is nil, this table populates the | ||
| 93 | translation-table named `utf-translation-table-for-encode'") | ||
| 94 | |||
| 95 | (define-translation-table 'utf-translation-table-for-decode) | ||
| 96 | |||
| 97 | |||
| 98 | (defvar ucs-mule-cjk-to-unicode (make-hash-table :test 'eq) | ||
| 99 | "Hash table mapping Emacs CJK character sets to Unicode code points. | ||
| 100 | |||
| 101 | If `utf-translate-cjk' is non-nil, this table populates the | ||
| 102 | translation-hash-table named `utf-subst-table-for-encode'.") | ||
| 103 | |||
| 104 | (define-translation-hash-table 'utf-subst-table-for-encode | ||
| 105 | (make-hash-table :test 'eq)) | ||
| 106 | |||
| 107 | (defvar ucs-unicode-to-mule-cjk (make-hash-table :test 'eq) | ||
| 108 | "Hash table mapping Unicode code points to Emacs CJK character sets. | ||
| 109 | |||
| 110 | If `utf-translate-cjk' is non-nil, this table populates the | ||
| 111 | translation-hash-table named `utf-subst-table-for-decode'.") | ||
| 112 | |||
| 113 | (define-translation-hash-table 'utf-subst-table-for-decode | ||
| 114 | (make-hash-table :test 'eq)) | ||
| 115 | |||
| 92 | (mapc | 116 | (mapc |
| 93 | (lambda (pair) | 117 | (lambda (pair) |
| 94 | (aset utf-8-fragmentation-table (car pair) (cdr pair))) | 118 | (aset utf-fragmentation-table (car pair) (cdr pair)) |
| 119 | (aset utf-defragmentation-table (cdr pair) (car pair))) | ||
| 95 | '((?$,1&d(B . ?,F4(B) (?$,1&e(B . ?,F5(B) (?$,1&f(B . ?,F6(B) (?$,1&h(B . ?,F8(B) (?$,1&i(B . ?,F9(B) | 120 | '((?$,1&d(B . ?,F4(B) (?$,1&e(B . ?,F5(B) (?$,1&f(B . ?,F6(B) (?$,1&h(B . ?,F8(B) (?$,1&i(B . ?,F9(B) |
| 96 | (?$,1&j(B . ?,F:(B) (?$,1&l(B . ?,F<(B) (?$,1&n(B . ?,F>(B) (?$,1&o(B . ?,F?(B) (?$,1&p(B . ?,F@(B) | 121 | (?$,1&j(B . ?,F:(B) (?$,1&l(B . ?,F<(B) (?$,1&n(B . ?,F>(B) (?$,1&o(B . ?,F?(B) (?$,1&p(B . ?,F@(B) |
| 97 | (?$,1&q(B . ?,FA(B) (?$,1&r(B . ?,FB(B) (?$,1&s(B . ?,FC(B) (?$,1&t(B . ?,FD(B) (?$,1&u(B . ?,FE(B) | 122 | (?$,1&q(B . ?,FA(B) (?$,1&r(B . ?,FB(B) (?$,1&s(B . ?,FC(B) (?$,1&t(B . ?,FD(B) (?$,1&u(B . ?,FE(B) |
| @@ -128,8 +153,9 @@ Used as the value of `utf-8-translation-table-for-decode' in | |||
| 128 | (?$,1(w(B . ?,Lw(B) (?$,1(x(B . ?,Lx(B) (?$,1(y(B . ?,Ly(B) (?$,1(z(B . ?,Lz(B) (?$,1({(B . ?,L{(B) | 153 | (?$,1(w(B . ?,Lw(B) (?$,1(x(B . ?,Lx(B) (?$,1(y(B . ?,Ly(B) (?$,1(z(B . ?,Lz(B) (?$,1({(B . ?,L{(B) |
| 129 | (?$,1(|(B . ?,L|(B) (?$,1(~(B . ?,L~(B) (?$,1((B . ?,L(B))) | 154 | (?$,1(|(B . ?,L|(B) (?$,1(~(B . ?,L~(B) (?$,1((B . ?,L(B))) |
| 130 | 155 | ||
| 131 | (defcustom utf-8-fragment-on-decoding nil | 156 | |
| 132 | "Whether or not to decode some scripts in UTF-8 text into iso8859 charsets. | 157 | (defcustom utf-fragment-on-decoding nil |
| 158 | "Whether or not to decode some chars in UTF-8/16 text into iso8859 charsets. | ||
| 133 | Setting this means that the relevant Cyrillic and Greek characters are | 159 | Setting this means that the relevant Cyrillic and Greek characters are |
| 134 | decoded into the iso8859 charsets rather than into | 160 | decoded into the iso8859 charsets rather than into |
| 135 | mule-unicode-0100-24ff. The iso8859 charsets take half as much space | 161 | mule-unicode-0100-24ff. The iso8859 charsets take half as much space |
| @@ -140,40 +166,81 @@ for mechanisms to make this largely transparent. | |||
| 140 | 166 | ||
| 141 | Setting this variable outside customize has no effect." | 167 | Setting this variable outside customize has no effect." |
| 142 | :set (lambda (s v) | 168 | :set (lambda (s v) |
| 143 | (setq utf-8-translation-table-for-decode | 169 | (if v |
| 144 | (if v | 170 | (progn |
| 145 | utf-8-fragmentation-table | 171 | (define-translation-table 'utf-translation-table-for-decode |
| 146 | (make-translation-table))) | 172 | utf-fragmentation-table) |
| 147 | (define-translation-table 'utf-8-translation-table-for-decode | 173 | ;; Even if unify-8859-on-encoding-mode is off, make |
| 148 | utf-8-translation-table-for-decode) | 174 | ;; mule-utf-* encode characters in |
| 175 | ;; utf-fragmentation-table. | ||
| 176 | (unless (eq (get 'utf-translation-table-for-encode | ||
| 177 | 'translation-table) | ||
| 178 | ucs-mule-to-mule-unicode) | ||
| 179 | (define-translation-table 'utf-translation-table-for-encode | ||
| 180 | utf-defragmentation-table) | ||
| 181 | (dolist (coding '(mule-utf-8 mule-utf-16-be mule-utf-16-le)) | ||
| 182 | (register-char-codings coding utf-defragmentation-table)))) | ||
| 183 | (define-translation-table 'utf-translation-table-for-decode) | ||
| 184 | ;; When unify-8859-on-encoding-mode is off, be sure to make | ||
| 185 | ;; mule-utf-* disabled for characters in | ||
| 186 | ;; utf-fragmentation-table. | ||
| 187 | (unless (eq (get 'utf-translation-table-for-encode | ||
| 188 | 'translation-table) | ||
| 189 | ucs-mule-to-mule-unicode) | ||
| 190 | (define-translation-table 'utf-translation-table-for-encode) | ||
| 191 | (map-char-table | ||
| 192 | (lambda (key val) | ||
| 193 | (if (and (>= key 128) val) | ||
| 194 | (aset char-coding-system-table key | ||
| 195 | (delq 'mule-utf-8 | ||
| 196 | (delq 'mule-utf-16-le | ||
| 197 | (delq 'mule-utf-16-be | ||
| 198 | (aref char-coding-system-table | ||
| 199 | key))))))) | ||
| 200 | utf-defragmentation-table))) | ||
| 149 | (set-default s v)) | 201 | (set-default s v)) |
| 150 | :version "21.4" | 202 | :version "21.4" |
| 151 | :type 'boolean | 203 | :type 'boolean |
| 152 | :group 'mule) | 204 | :group 'mule) |
| 153 | 205 | ||
| 154 | (defcustom utf-8-translate-cjk nil | 206 | (defcustom utf-translate-cjk nil |
| 155 | "Whether the `mule-utf-8' coding system should encode many CJK characters. | 207 | "Whether the UTF based coding systems should decode/encode CJK characters. |
| 156 | 208 | ||
| 157 | Enabling this loads tables which enable the coding system to encode | 209 | Enabling this loads tables which enable the coding systems: |
| 158 | characters in the charsets `korean-ksc5601', `chinese-gb2312' and | 210 | mule-utf-8, mule-utf-16-le, mule-utf-16-be |
| 211 | to encode characters in the charsets `korean-ksc5601', `chinese-gb2312' and | ||
| 159 | `japanese-jisx0208', and to decode the corresponding unicodes into | 212 | `japanese-jisx0208', and to decode the corresponding unicodes into |
| 160 | such characters. This works by loading the library `utf-8-subst'; see | 213 | such characters. This works by loading the library `utf-8-subst'; see |
| 161 | its commentary. The tables are fairly large (about 33000 entries), so this | 214 | its commentary. The tables are fairly large (about 33000 entries), so this |
| 162 | option is not the default." | 215 | option is not the default." |
| 163 | :link '(emacs-commentary-link "utf-8-subst") | 216 | :link '(emacs-commentary-link "utf-8-subst") |
| 164 | :set (lambda (s v) | 217 | :set (lambda (s v) |
| 165 | (when v | 218 | (if v |
| 166 | (require 'utf-8-subst) | 219 | (progn |
| 167 | (let ((table (make-char-table 'translation-table))) | 220 | (require 'utf-8-subst) |
| 168 | (coding-system-put 'mule-utf-8 'safe-charsets | 221 | (let ((table (make-char-table 'translation-table))) |
| 169 | (append (coding-system-get 'mule-utf-8 | 222 | (maphash (lambda (k v) |
| 170 | 'safe-charsets) | 223 | (aset table k t)) |
| 171 | '(korean-ksc5601 chinese-gb2312 | 224 | ucs-mule-cjk-to-unicode) |
| 172 | japanese-jisx0208))) | 225 | (register-char-codings 'mule-utf-8 table) |
| 173 | (maphash (lambda (k v) | 226 | (register-char-codings 'mule-utf-16-le table) |
| 174 | (aset table k v)) | 227 | (register-char-codings 'mule-utf-16-be table)) |
| 175 | utf-8-subst-rev-table) | 228 | (define-translation-hash-table 'utf-subst-table-for-decode |
| 176 | (register-char-codings 'mule-utf-8 table))) | 229 | ucs-unicode-to-mule-cjk) |
| 230 | (define-translation-hash-table 'utf-subst-table-for-encode | ||
| 231 | ucs-mule-cjk-to-unicode)) | ||
| 232 | (map-char-table | ||
| 233 | (lambda (k v) | ||
| 234 | (if (gethash k ucs-mule-cjk-to-unicode) | ||
| 235 | (aset char-coding-system-table k | ||
| 236 | (delq 'mule-utf-8 | ||
| 237 | (delq 'mule-utf-16-le | ||
| 238 | (delq 'mule-utf-16-be v)))))) | ||
| 239 | char-coding-system-table) | ||
| 240 | (define-translation-hash-table 'utf-subst-table-for-decode | ||
| 241 | (make-hash-table :test 'eq)) | ||
| 242 | (define-translation-hash-table 'utf-subst-table-for-encode | ||
| 243 | (make-hash-table :test 'eq))) | ||
| 177 | (set-default s v)) | 244 | (set-default s v)) |
| 178 | :version "21.4" | 245 | :version "21.4" |
| 179 | :type 'boolean | 246 | :type 'boolean |
| @@ -263,7 +330,7 @@ option is not the default." | |||
| 263 | (r1 %= 96) | 330 | (r1 %= 96) |
| 264 | (r1 += (r2 + 32)) | 331 | (r1 += (r2 + 32)) |
| 265 | (translate-character | 332 | (translate-character |
| 266 | utf-8-translation-table-for-decode r0 r1) | 333 | utf-translation-table-for-decode r0 r1) |
| 267 | (write-multibyte-character r0 r1)))))))) | 334 | (write-multibyte-character r0 r1)))))))) |
| 268 | 335 | ||
| 269 | ;; 3byte encoding | 336 | ;; 3byte encoding |
| @@ -308,14 +375,15 @@ option is not the default." | |||
| 308 | (r1 = (r7 + 32)) | 375 | (r1 = (r7 + 32)) |
| 309 | (r1 += ((r3 + 32) << 7)) | 376 | (r1 += ((r3 + 32) << 7)) |
| 310 | (translate-character | 377 | (translate-character |
| 311 | utf-8-translation-table-for-decode r0 r1) | 378 | utf-translation-table-for-decode r0 r1) |
| 312 | (write-multibyte-character r0 r1)) | 379 | (write-multibyte-character r0 r1)) |
| 313 | 380 | ||
| 314 | ;; mule-unicode-2500-33ff | 381 | ;; mule-unicode-2500-33ff |
| 315 | ;; Fixme: Perhaps allow translation via | 382 | ;; Fixme: Perhaps allow translation via |
| 316 | ;; utf-8-subst-table for #x2e80 up, so that we use | 383 | ;; utf-subst-table-for-decode for #x2e80 up, so |
| 317 | ;; consistent charsets for all of CJK. Would need | 384 | ;; that we use consistent charsets for all of |
| 318 | ;; corresponding change to encoding tables. | 385 | ;; CJK. Would need corresponding change to |
| 386 | ;; encoding tables. | ||
| 319 | (if (r3 < #x3400) | 387 | (if (r3 < #x3400) |
| 320 | ((r0 = ,(charset-id 'mule-unicode-2500-33ff)) | 388 | ((r0 = ,(charset-id 'mule-unicode-2500-33ff)) |
| 321 | (r3 -= #x2500) | 389 | (r3 -= #x2500) |
| @@ -329,7 +397,7 @@ option is not the default." | |||
| 329 | ;; them as eight-bit-{control|graphic}. | 397 | ;; them as eight-bit-{control|graphic}. |
| 330 | (if (r3 < #xd800) | 398 | (if (r3 < #xd800) |
| 331 | ((r4 = r3) ; don't zap r3 | 399 | ((r4 = r3) ; don't zap r3 |
| 332 | (lookup-integer utf-8-subst-table r4 r5) | 400 | (lookup-integer utf-subst-table-for-decode r4 r5) |
| 333 | (if r7 | 401 | (if r7 |
| 334 | ;; got a translation | 402 | ;; got a translation |
| 335 | ((write-multibyte-character r4 r5) | 403 | ((write-multibyte-character r4 r5) |
| @@ -370,7 +438,7 @@ option is not the default." | |||
| 370 | (if (r0 < #xfe) | 438 | (if (r0 < #xfe) |
| 371 | ;; 4byte encoding | 439 | ;; 4byte encoding |
| 372 | ;; keep those bytes as eight-bit-{control|graphic} | 440 | ;; keep those bytes as eight-bit-{control|graphic} |
| 373 | ;; Fixme: allow lookup in utf-8-subst-table. | 441 | ;; Fixme: allow lookup in utf-subst-table-for-decode. |
| 374 | ((read r1 r2 r3) | 442 | ((read r1 r2 r3) |
| 375 | ;; r0 > #xf0, thus eight-bit-graphic | 443 | ;; r0 > #xf0, thus eight-bit-graphic |
| 376 | (write-multibyte-character r6 r0) | 444 | (write-multibyte-character r6 r0) |
| @@ -409,8 +477,8 @@ option is not the default." | |||
| 409 | 477 | ||
| 410 | "CCL program to decode UTF-8. | 478 | "CCL program to decode UTF-8. |
| 411 | Basic decoding is done into the charsets ascii, latin-iso8859-1 and | 479 | Basic decoding is done into the charsets ascii, latin-iso8859-1 and |
| 412 | mule-unicode-*, but see also `utf-8-translation-table-for-decode' and | 480 | mule-unicode-*, but see also `utf-fragmentation-table' and |
| 413 | `utf-8-subst-table'. | 481 | `ucs-mule-cjk-to-unicode'. |
| 414 | Encodings of un-representable Unicode characters are decoded asis into | 482 | Encodings of un-representable Unicode characters are decoded asis into |
| 415 | eight-bit-control and eight-bit-graphic characters.") | 483 | eight-bit-control and eight-bit-graphic characters.") |
| 416 | 484 | ||
| @@ -421,7 +489,7 @@ eight-bit-control and eight-bit-graphic characters.") | |||
| 421 | (if (r5 < 0) | 489 | (if (r5 < 0) |
| 422 | ((r1 = -1) | 490 | ((r1 = -1) |
| 423 | (read-multibyte-character r0 r1) | 491 | (read-multibyte-character r0 r1) |
| 424 | (translate-character ucs-mule-to-mule-unicode r0 r1)) | 492 | (translate-character utf-translation-table-for-encode r0 r1)) |
| 425 | (;; We have already done read-multibyte-character. | 493 | (;; We have already done read-multibyte-character. |
| 426 | (r0 = r5) | 494 | (r0 = r5) |
| 427 | (r1 = r6) | 495 | (r1 = r6) |
| @@ -516,7 +584,7 @@ eight-bit-control and eight-bit-graphic characters.") | |||
| 516 | ((write #xc2) | 584 | ((write #xc2) |
| 517 | (write r1))))))) | 585 | (write r1))))))) |
| 518 | 586 | ||
| 519 | ((lookup-character utf-8-subst-rev-table r0 r1) | 587 | ((lookup-character utf-subst-table-for-encode r0 r1) |
| 520 | (if r7 ; lookup succeeded | 588 | (if r7 ; lookup succeeded |
| 521 | ((r1 = (((r0 & #xf000) >> 12) | #xe0)) | 589 | ((r1 = (((r0 & #xf000) >> 12) | #xe0)) |
| 522 | (r2 = ((r0 & #x3f) | #x80)) | 590 | (r2 = ((r0 & #x3f) | #x80)) |
| @@ -538,10 +606,6 @@ eight-bit-control and eight-bit-graphic characters.") | |||
| 538 | 606 | ||
| 539 | "CCL program to encode into UTF-8.") | 607 | "CCL program to encode into UTF-8.") |
| 540 | 608 | ||
| 541 | ;; Dummy definition so that the CCL can be checked correctly; the | ||
| 542 | ;; actual data are loaded on demand. | ||
| 543 | (unless (boundp 'ucs-mule-8859-to-mule-unicode) ; don't zap it | ||
| 544 | (define-translation-table 'ucs-mule-8859-to-mule-unicode)) | ||
| 545 | 609 | ||
| 546 | (define-ccl-program ccl-untranslated-to-ucs | 610 | (define-ccl-program ccl-untranslated-to-ucs |
| 547 | `(0 | 611 | `(0 |
| @@ -648,7 +712,7 @@ Also compose particular scripts if `utf-8-compose-scripts' is non-nil." | |||
| 648 | ;; ucs-tables is preloaded | 712 | ;; ucs-tables is preloaded |
| 649 | ;; (defun utf-8-pre-write-conversion (beg end) | 713 | ;; (defun utf-8-pre-write-conversion (beg end) |
| 650 | ;; "Semi-dummy pre-write function effectively to autoload ucs-tables." | 714 | ;; "Semi-dummy pre-write function effectively to autoload ucs-tables." |
| 651 | ;; ;; Ensure translation table is loaded. | 715 | ;; ;; Ensure translation-table is loaded. |
| 652 | ;; (require 'ucs-tables) | 716 | ;; (require 'ucs-tables) |
| 653 | ;; ;; Don't do this again. | 717 | ;; ;; Don't do this again. |
| 654 | ;; (coding-system-put 'mule-utf-8 'pre-write-conversion nil) | 718 | ;; (coding-system-put 'mule-utf-8 'pre-write-conversion nil) |
| @@ -657,33 +721,21 @@ Also compose particular scripts if `utf-8-compose-scripts' is non-nil." | |||
| 657 | (make-coding-system | 721 | (make-coding-system |
| 658 | 'mule-utf-8 4 ?u | 722 | 'mule-utf-8 4 ?u |
| 659 | "UTF-8 encoding for Emacs-supported Unicode characters. | 723 | "UTF-8 encoding for Emacs-supported Unicode characters. |
| 660 | The supported Emacs character sets are the following, plus any other | 724 | It supports Unicode characters of these ranges: |
| 661 | characters included in the tables `ucs-mule-to-mule-unicode' and | 725 | U+0000..U+33FF, U+E000..U+FFFF. |
| 662 | `utf-8-subst-rev-table': | 726 | They correspond to these Emacs character sets: |
| 663 | ascii | 727 | ascii, latin-iso8859-1, mule-unicode-0100-24ff, |
| 664 | eight-bit-control | 728 | mule-unicode-2500-33ff, mule-unicode-e000-ffff |
| 665 | eight-bit-graphic | 729 | |
| 666 | latin-iso8859-1 | 730 | On decoding (e.g. reading a file), Unicode characters not in the above |
| 667 | latin-iso8859-2 | 731 | ranges are decoded into sequences of eight-bit-control and |
| 668 | latin-iso8859-3 | 732 | eight-bit-graphic characters to preserve their byte sequences. The |
| 669 | latin-iso8859-4 | 733 | byte sequence is preserved on i/o for valid utf-8, but not necessarily |
| 670 | cyrillic-iso8859-5 | 734 | for invalid utf-8. |
| 671 | greek-iso8859-7 | 735 | |
| 672 | hebrew-iso8859-8 | 736 | On encoding (e.g. writing a file), Emacs characters not belonging to |
| 673 | latin-iso8859-9 | 737 | any of the character sets listed above are encoded into the UTF-8 byte |
| 674 | latin-iso8859-14 | 738 | sequence representing U+FFFD (REPLACEMENT CHARACTER)." |
| 675 | latin-iso8859-15 | ||
| 676 | mule-unicode-0100-24ff | ||
| 677 | mule-unicode-2500-33ff | ||
| 678 | mule-unicode-e000-ffff | ||
| 679 | |||
| 680 | Unicode characters out of the ranges U+0000-U+33FF and U+E200-U+FFFF | ||
| 681 | may be decoded into korean-ksc5601, chinese-gb2312, japanese-jisx0208 | ||
| 682 | \(see user option `utf-8-translate-cjk'); otherwise, sequences of | ||
| 683 | eight-bit-control and eight-bit-graphic characters are used to | ||
| 684 | preserve their byte sequences, and these are composed to display as a | ||
| 685 | single character. Emacs characters that otherwise can't be encoded | ||
| 686 | are encoded as U+FFFD." | ||
| 687 | 739 | ||
| 688 | '(ccl-decode-mule-utf-8 . ccl-encode-mule-utf-8) | 740 | '(ccl-decode-mule-utf-8 . ccl-encode-mule-utf-8) |
| 689 | '((safe-charsets | 741 | '((safe-charsets |
| @@ -691,24 +743,6 @@ are encoded as U+FFFD." | |||
| 691 | eight-bit-control | 743 | eight-bit-control |
| 692 | eight-bit-graphic | 744 | eight-bit-graphic |
| 693 | latin-iso8859-1 | 745 | latin-iso8859-1 |
| 694 | latin-iso8859-15 | ||
| 695 | latin-iso8859-14 | ||
| 696 | latin-iso8859-9 | ||
| 697 | hebrew-iso8859-8 | ||
| 698 | greek-iso8859-7 | ||
| 699 | cyrillic-iso8859-5 | ||
| 700 | latin-iso8859-4 | ||
| 701 | latin-iso8859-3 | ||
| 702 | latin-iso8859-2 | ||
| 703 | vietnamese-viscii-lower | ||
| 704 | vietnamese-viscii-upper | ||
| 705 | thai-tis620 | ||
| 706 | ipa | ||
| 707 | ethiopic | ||
| 708 | indian-is13194 | ||
| 709 | katakana-jisx0201 | ||
| 710 | chinese-sisheng | ||
| 711 | lao | ||
| 712 | mule-unicode-0100-24ff | 746 | mule-unicode-0100-24ff |
| 713 | mule-unicode-2500-33ff | 747 | mule-unicode-2500-33ff |
| 714 | mule-unicode-e000-ffff) | 748 | mule-unicode-e000-ffff) |
| @@ -716,7 +750,11 @@ are encoded as U+FFFD." | |||
| 716 | (coding-category . coding-category-utf-8) | 750 | (coding-category . coding-category-utf-8) |
| 717 | (valid-codes (0 . 255)) | 751 | (valid-codes (0 . 255)) |
| 718 | ;; (pre-write-conversion . utf-8-pre-write-conversion) | 752 | ;; (pre-write-conversion . utf-8-pre-write-conversion) |
| 719 | (post-read-conversion . utf-8-post-read-conversion))) | 753 | (post-read-conversion . utf-8-post-read-conversion) |
| 754 | (dependency unify-8859-on-encoding-mode | ||
| 755 | unify-8859-on-decoding-mode | ||
| 756 | utf-fragment-on-decoding | ||
| 757 | utf-translate-cjk))) | ||
| 720 | 758 | ||
| 721 | (define-coding-system-alias 'utf-8 'mule-utf-8) | 759 | (define-coding-system-alias 'utf-8 'mule-utf-8) |
| 722 | 760 | ||