(ucs-mule-to-mule-unicode): New

translation table. (ccl-encode-mule-utf-8): Use it. (utf-8-untranslated-to-ucs, utf-8-help-echo, utf-8-compose) (utf-8-post-read-conversion, utf-8-pre-write-conversion): New function. (utf-8-subst-table): New variable. (utf-8-compose-scripts): New option. (mule-utf-8): Update safe-charsets, pre-write and post-read conversion.
author: Dave Love 2001-12-07 14:26:02 +0000
committer: Dave Love 2001-12-07 14:26:02 +0000
commit: aa2e3f49f337b3a14e208d1f2ddb34912730c535 (patch)
tree: 1269fbc291ca9363b5431b844d0c3be76db7f44a
parent: 6964db142f8436c6af64cb2ac64a3961d02a8162 (diff)
download: emacs-aa2e3f49f337b3a14e208d1f2ddb34912730c535.tar.gz
emacs-aa2e3f49f337b3a14e208d1f2ddb34912730c535.zip
1 files changed, 171 insertions, 24 deletions
diff --git a/lisp/international/utf-8.el b/lisp/international/utf-8.el
index 85f60409567..f04fc45e7e9 100644
--- a/lisp/international/utf-8.el
+++ b/lisp/international/utf-8.el
@@ -1,4 +1,4 @@
-;;; utf-8.el --- limited UTF-8 decoding/encoding support
+;;; utf-8.el --- limited UTF-8 decoding/encoding support -*- coding: iso-2022-7bit-*-
 ;; Copyright (C) 2001 Electrotechnical Laboratory, JAPAN.
 ;; Licensed to the Free Software Foundation.
@@ -26,8 +26,8 @@
 ;;; Commentary:
-;; The coding-system `mule-utf-8' supports encoding/decoding of the
+;; The coding-system `mule-utf-8' basically supports encoding/decoding
-;; following character sets to and from UTF-8:
+;; of the following character sets to and from UTF-8:
 ;;
 ;;   ascii
 ;;   eight-bit-control
@@ -36,15 +36,14 @@
 ;;   mule-unicode-2500-33ff
 ;;   mule-unicode-e000-ffff
 ;;
-;; Characters of other character sets cannot be encoded with
-;; mule-utf-8.  Note that the mule-unicode charsets currently lack
-;; case and syntax information, so things like `downcase' will only
-;; work for characters from ASCII and Latin-1.
-;;
 ;; On decoding, Unicode characters that do not fit into the above
 ;; character sets are handled as `eight-bit-control' or
 ;; `eight-bit-graphic' characters to retain the information about the
 ;; original byte sequence.
+;;
+;; Characters from other character sets can be encoded with
+;; mule-utf-8 by populating the table `ucs-mule-to-mule-unicode' and
+;; registering the translation with `register-char-codings'.
 ;; UTF-8 is defined in RFC 2279.  A sketch of the encoding is:
@@ -57,6 +56,11 @@
 ;;; Code:
+(defvar ucs-mule-to-mule-unicode (make-translation-table)
+  "Translation table for encoding to `mule-utf-8'.")
+;; Could have been done by ucs-tables loaded before.
+(unless (get 'ucs-mule-to-mule-unicode 'translation-table)
+  (define-translation-table 'ucs-mule-to-mule-unicode ucs-mule-to-mule-unicode))
 (define-ccl-program ccl-decode-mule-utf-8
  ;;
  ;;        charset         | bytes in utf-8 | bytes in emacs
@@ -64,6 +68,7 @@
  ;;         ascii          |       1        |       1
  ;; -----------------------+----------------+---------------
  ;;    eight-bit-control   |       2        |       2
+  ;;    eight-bit-graphic   |       2        |       1
  ;;     latin-iso8859-1    |       2        |       2
  ;; -----------------------+----------------+---------------
  ;; mule-unicode-0100-24ff |       2        |       4
@@ -228,7 +233,8 @@ characters.")
     (loop
      (if (r5 < 0)
          ((r1 = -1)
-           (read-multibyte-character r0 r1))
+           (read-multibyte-character r0 r1)
+           (translate-character ucs-mule-to-mule-unicode r0 r1))
        (;; We have already done read-multibyte-character.
         (r0 = r5)
         (r1 = r6)
@@ -340,26 +346,126 @@ Only characters from the charsets ascii, eight-bit-control,
 eight-bit-graphic, latin-iso8859-1 and mule-unicode-* are recognized.
 Others are encoded as U+FFFD.")
+;; Dummy definition so that the CCL can be checked correctly; the
+;; actual data are loaded on demand.
+(unless (boundp 'ucs-mule-8859-to-mule-unicode) ; don't zap it
+  (define-translation-table 'ucs-mule-8859-to-mule-unicode))
+(defsubst utf-8-untranslated-to-ucs ()
+  (let ((b1 (char-after))
+        (b2 (char-after (1+ (point))))
+        (b3 (char-after (+ 2 (point))))
+        (b4 (char-after (+ 4 (point)))))
+    (if (and b1 b2 b3)
+        (cond ((< b1 ?\xf0)
+               (setq b2 (lsh (logand b2 ?\x3f) 6))
+               (setq b3 (logand b3 ?\x3f))
+               (logior b3 (logior b2 (lsh (logand b1 ?\x0f) 12))))
+              (b4
+               (setq b2 (lsh (logand b2 ?\x3f) 12))
+               (setq b3 (lsh (logand b3 ?\x3f) 6))
+               (setq b4 (logand b4 ?\x3f))
+               (logior b4 (logior b3 (logior b2 (lsh (logand b1 ?\x07)
+                                                     18)))))))))
+(defun utf-8-help-echo (window object position)
+  (format "Untranslated Unicode U+%04X"
+          (get-char-property position 'untranslated-utf-8 object)))
+(defvar utf-8-subst-table nil
+  "If non-nil, a hash table mapping `untranslatable utf-8' to Emacs characters.")
+;; We compose the untranslatable sequences into a single character.
+;; This is infelicitous for editing, because there's currently no
+;; mechanism for treating compositions as atomic, but is OK for
+;; display.  We try to compose an appropriate character from a hash
+;; table of CJK characters to display correctly.  Otherwise we use
+;; U+FFFD.  What we really should have is hash table lookup from CCL
+;; so that we could do this properly.  This function GCs too much.
+(defsubst utf-8-compose ()
+  "Put a suitable composition on an untranslatable sequence.
+Return the sequence's length."
+  (let* ((u (utf-8-untranslated-to-ucs))
+         (l (and u (if (>= u ?\x10000)
+                       4
+                     3)))
+         (subst (and utf-8-subst-table (gethash u utf-8-subst-table))))
+    (when u
+      (put-text-property (point) (min (point-max) (+ l (point)))
+                         'untranslated-utf-8 u)
+      (unless subst
+          (put-text-property (point) (min (point-max) (+ l (point)))
+                             'help-echo 'utf-8-help-echo)
+          (setq subst ?$,3u=(B))
+      (compose-region (point) (+ l (point)) subst)
+      l)))
+(defcustom utf-8-compose-scripts nil
+  "*Non-nil means compose various scipts on decoding utf-8 text."
+  :group 'mule
+  :type 'boolean)       ; omitted in Emacs 21.1
+(defun utf-8-post-read-conversion (length)
+  "Compose untranslated utf-8 sequences into single characters.
+Also compose particular scripts if `utf-8-compose-scripts' is non-nil."
+  (save-excursion
+    ;; Can't do eval-when-compile to insert a multibyte constant
+    ;; version of the string in the loop, since it's always loaded as
+    ;; unibyte from a byte-compiled file.
+    (let ((range (string-as-multibyte "^\341-\377"))) 
+      (while (and (skip-chars-forward
+                   range)
+                  (not (eobp)))
+        (forward-char (utf-8-compose)))))
+  ;; Fixme: Takahashi-san implies it may not work this easily -- needs
+  ;; checking with him.
+  (when (and utf-8-compose-scripts (> length 1))
+    ;; These currently have definitions which cover the relevant
+    ;; Unicodes.  We could avoid loading thai-util &c by checking
+    ;; whether the region contains any characters with the appropriate
+    ;; categories.  There aren't yet Unicode-based rules for Tibetan.
+    (save-excursion (setq length (diacritic-post-read-conversion length)))
+    (save-excursion (setq length (thai-post-read-conversion length)))
+    (save-excursion (setq length (lao-post-read-conversion length)))
+    (save-excursion (setq length (devanagari-post-read-conversion length))))
+  length)
+(defun utf-8-pre-write-conversion (beg end)
+  "Semi-dummy pre-write function effectively to autoload ucs-tables."
+  ;; Ensure translation table is loaded.
+  (require 'ucs-tables)
+  ;; Don't do this again.
+  (coding-system-put 'mule-utf-8 'pre-write-conversion nil)
+  nil)
 (make-coding-system
 'mule-utf-8 4 ?u
 "UTF-8 encoding for Emacs-supported Unicode characters.
-The supported Emacs character sets are:
+The supported Emacs character sets are the following, plus others
-   ascii
+which may be included in the translation table
-   eight-bit-control
+`ucs-mule-to-mule-unicode':
-   eight-bit-graphic
+ ascii
-   latin-iso8859-1
+ eight-bit-control
-   mule-unicode-0100-24ff
+ eight-bit-graphic
-   mule-unicode-2500-33ff
+ latin-iso8859-1
-   mule-unicode-e000-ffff
+ latin-iso8859-2
+ latin-iso8859-3
+ latin-iso8859-4
+ cyrillic-iso8859-5
+ greek-iso8859-7
+ hebrew-iso8859-8
+ latin-iso8859-9
+ latin-iso8859-14
+ latin-iso8859-15
+ mule-unicode-0100-24ff
+ mule-unicode-2500-33ff
+ mule-unicode-e000-ffff
 Unicode characters out of the ranges U+0000-U+33FF and U+E200-U+FFFF
 are decoded into sequences of eight-bit-control and eight-bit-graphic
-characters to preserve their byte sequences.  Emacs characters out of
+characters to preserve their byte sequences and composed to display as
-these ranges are encoded into U+FFFD.
+a single character.  Emacs characters that can't be encoded to these
+ranges are encoded as U+FFFD."
-Note that, currently, characters in the mule-unicode charsets have no
-syntax and case information.  Thus, for instance, upper- and
-lower-casing commands won't work with them."
 '(ccl-decode-mule-utf-8 . ccl-encode-mule-utf-8)
 '((safe-charsets
@@ -367,13 +473,54 @@ lower-casing commands won't work with them."
    eight-bit-control
    eight-bit-graphic
    latin-iso8859-1
+    latin-iso8859-15
+    latin-iso8859-14
+    latin-iso8859-9
+    hebrew-iso8859-8
+    greek-iso8859-7
+    cyrillic-iso8859-5
+    latin-iso8859-4
+    latin-iso8859-3
+    latin-iso8859-2
+    vietnamese-viscii-lower
+    vietnamese-viscii-upper
+    thai-tis620
+    ipa
+    ethiopic
+    indian-is13194
+    katakana-jisx0201
+    chinese-sisheng
+    lao
    mule-unicode-0100-24ff
    mule-unicode-2500-33ff
    mule-unicode-e000-ffff)
   (mime-charset . utf-8)
   (coding-category . coding-category-utf-8)
-   (valid-codes (0 . 255))))
+   (valid-codes (0 . 255))
+   (pre-write-conversion . utf-8-pre-write-conversion)
+   (post-read-conversion . utf-8-post-read-conversion)))
 (define-coding-system-alias 'utf-8 'mule-utf-8)
+;; I think this needs special private charsets defined for the
+;; untranslated sequences, if it's going to work well.
+;;; (defun utf-8-compose-function (pos to pattern &optional string)
+;;;   (let* ((prop (get-char-property pos 'composition string))
+;;;      (l (and prop (- (cadr prop) (car prop)))))
+;;;     (cond ((and l (> l (- to pos)))
+;;;        (delete-region pos to))
+;;;       ((and (> (char-after pos) 224)
+;;;             (< (char-after pos) 256)
+;;;             (save-restriction
+;;;               (narrow-to-region pos to)
+;;;               (utf-8-compose)))
+;;;        t))))
+;;; (dotimes (i 96)
+;;;   (aset composition-function-table
+;;;     (+ 128 i)
+;;;     `((,(string-as-multibyte "[\200-\237\240-\377]")
+;;;        . utf-8-compose-function))))
 ;;; utf-8.el ends here
author	Dave Love	2001-12-07 14:26:02 +0000
committer	Dave Love	2001-12-07 14:26:02 +0000
commit	aa2e3f49f337b3a14e208d1f2ddb34912730c535 (patch)
tree	1269fbc291ca9363b5431b844d0c3be76db7f44a
parent	6964db142f8436c6af64cb2ac64a3961d02a8162 (diff)
download	emacs-aa2e3f49f337b3a14e208d1f2ddb34912730c535.tar.gz emacs-aa2e3f49f337b3a14e208d1f2ddb34912730c535.zip

diff --git a/lisp/international/utf-8.el b/lisp/international/utf-8.el index 85f60409567..f04fc45e7e9 100644 --- a/lisp/international/utf-8.el +++ b/lisp/international/utf-8.el
@@ -1,4 +1,4 @@
1	;;; utf-8.el --- limited UTF-8 decoding/encoding support	1	;;; utf-8.el --- limited UTF-8 decoding/encoding support -- coding: iso-2022-7bit--
2		2
3	;; Copyright (C) 2001 Electrotechnical Laboratory, JAPAN.	3	;; Copyright (C) 2001 Electrotechnical Laboratory, JAPAN.
4	;; Licensed to the Free Software Foundation.	4	;; Licensed to the Free Software Foundation.
@@ -26,8 +26,8 @@
26		26
27	;;; Commentary:	27	;;; Commentary:
28		28
29	;; The coding-system `mule-utf-8' supports encoding/decoding of the	29	;; The coding-system `mule-utf-8' basically supports encoding/decoding
30	;; following character sets to and from UTF-8:	30	;; of the following character sets to and from UTF-8:
31	;;	31	;;
32	;; ascii	32	;; ascii
33	;; eight-bit-control	33	;; eight-bit-control
@@ -36,15 +36,14 @@
36	;; mule-unicode-2500-33ff	36	;; mule-unicode-2500-33ff
37	;; mule-unicode-e000-ffff	37	;; mule-unicode-e000-ffff
38	;;	38	;;
39	;; Characters of other character sets cannot be encoded with
40	;; mule-utf-8. Note that the mule-unicode charsets currently lack
41	;; case and syntax information, so things like `downcase' will only
42	;; work for characters from ASCII and Latin-1.
43	;;
44	;; On decoding, Unicode characters that do not fit into the above	39	;; On decoding, Unicode characters that do not fit into the above
45	;; character sets are handled as `eight-bit-control' or	40	;; character sets are handled as `eight-bit-control' or
46	;; `eight-bit-graphic' characters to retain the information about the	41	;; `eight-bit-graphic' characters to retain the information about the
47	;; original byte sequence.	42	;; original byte sequence.
		43	;;
		44	;; Characters from other character sets can be encoded with
		45	;; mule-utf-8 by populating the table `ucs-mule-to-mule-unicode' and
		46	;; registering the translation with `register-char-codings'.
48		47
49	;; UTF-8 is defined in RFC 2279. A sketch of the encoding is:	48	;; UTF-8 is defined in RFC 2279. A sketch of the encoding is:
50		49
@@ -57,6 +56,11 @@
57		56
58	;;; Code:	57	;;; Code:
59		58
		59	(defvar ucs-mule-to-mule-unicode (make-translation-table)
		60	"Translation table for encoding to `mule-utf-8'.")
		61	;; Could have been done by ucs-tables loaded before.
		62	(unless (get 'ucs-mule-to-mule-unicode 'translation-table)
		63	(define-translation-table 'ucs-mule-to-mule-unicode ucs-mule-to-mule-unicode))
60	(define-ccl-program ccl-decode-mule-utf-8	64	(define-ccl-program ccl-decode-mule-utf-8
61	;;	65	;;
62	;; charset \| bytes in utf-8 \| bytes in emacs	66	;; charset \| bytes in utf-8 \| bytes in emacs
@@ -64,6 +68,7 @@
64	;; ascii \| 1 \| 1	68	;; ascii \| 1 \| 1
65	;; -----------------------+----------------+---------------	69	;; -----------------------+----------------+---------------
66	;; eight-bit-control \| 2 \| 2	70	;; eight-bit-control \| 2 \| 2
		71	;; eight-bit-graphic \| 2 \| 1
67	;; latin-iso8859-1 \| 2 \| 2	72	;; latin-iso8859-1 \| 2 \| 2
68	;; -----------------------+----------------+---------------	73	;; -----------------------+----------------+---------------
69	;; mule-unicode-0100-24ff \| 2 \| 4	74	;; mule-unicode-0100-24ff \| 2 \| 4
@@ -228,7 +233,8 @@ characters.")
228	(loop	233	(loop
229	(if (r5 < 0)	234	(if (r5 < 0)
230	((r1 = -1)	235	((r1 = -1)
231	(read-multibyte-character r0 r1))	236	(read-multibyte-character r0 r1)
		237	(translate-character ucs-mule-to-mule-unicode r0 r1))
232	(;; We have already done read-multibyte-character.	238	(;; We have already done read-multibyte-character.
233	(r0 = r5)	239	(r0 = r5)
234	(r1 = r6)	240	(r1 = r6)
@@ -340,26 +346,126 @@ Only characters from the charsets ascii, eight-bit-control,
340	eight-bit-graphic, latin-iso8859-1 and mule-unicode-* are recognized.	346	eight-bit-graphic, latin-iso8859-1 and mule-unicode-* are recognized.
341	Others are encoded as U+FFFD.")	347	Others are encoded as U+FFFD.")
342		348
		349	;; Dummy definition so that the CCL can be checked correctly; the
		350	;; actual data are loaded on demand.
		351	(unless (boundp 'ucs-mule-8859-to-mule-unicode) ; don't zap it
		352	(define-translation-table 'ucs-mule-8859-to-mule-unicode))
		353
		354	(defsubst utf-8-untranslated-to-ucs ()
		355	(let ((b1 (char-after))
		356	(b2 (char-after (1+ (point))))
		357	(b3 (char-after (+ 2 (point))))
		358	(b4 (char-after (+ 4 (point)))))
		359	(if (and b1 b2 b3)
		360	(cond ((< b1 ?\xf0)
		361	(setq b2 (lsh (logand b2 ?\x3f) 6))
		362	(setq b3 (logand b3 ?\x3f))
		363	(logior b3 (logior b2 (lsh (logand b1 ?\x0f) 12))))
		364	(b4
		365	(setq b2 (lsh (logand b2 ?\x3f) 12))
		366	(setq b3 (lsh (logand b3 ?\x3f) 6))
		367	(setq b4 (logand b4 ?\x3f))
		368	(logior b4 (logior b3 (logior b2 (lsh (logand b1 ?\x07)
		369	18)))))))))
		370
		371	(defun utf-8-help-echo (window object position)
		372	(format "Untranslated Unicode U+%04X"
		373	(get-char-property position 'untranslated-utf-8 object)))
		374
		375	(defvar utf-8-subst-table nil
		376	"If non-nil, a hash table mapping `untranslatable utf-8' to Emacs characters.")
		377
		378	;; We compose the untranslatable sequences into a single character.
		379	;; This is infelicitous for editing, because there's currently no
		380	;; mechanism for treating compositions as atomic, but is OK for
		381	;; display. We try to compose an appropriate character from a hash
		382	;; table of CJK characters to display correctly. Otherwise we use
		383	;; U+FFFD. What we really should have is hash table lookup from CCL
		384	;; so that we could do this properly. This function GCs too much.
		385	(defsubst utf-8-compose ()
		386	"Put a suitable composition on an untranslatable sequence.
		387	Return the sequence's length."
		388	(let* ((u (utf-8-untranslated-to-ucs))
		389	(l (and u (if (>= u ?\x10000)
		390	4
		391	3)))
		392	(subst (and utf-8-subst-table (gethash u utf-8-subst-table))))
		393	(when u
		394	(put-text-property (point) (min (point-max) (+ l (point)))
		395	'untranslated-utf-8 u)
		396	(unless subst
		397	(put-text-property (point) (min (point-max) (+ l (point)))
		398	'help-echo 'utf-8-help-echo)
		399	(setq subst ?$,3u=(B))
		400	(compose-region (point) (+ l (point)) subst)
		401	l)))
		402
		403	(defcustom utf-8-compose-scripts nil
		404	"*Non-nil means compose various scipts on decoding utf-8 text."
		405	:group 'mule
		406	:type 'boolean) ; omitted in Emacs 21.1
		407
		408	(defun utf-8-post-read-conversion (length)
		409	"Compose untranslated utf-8 sequences into single characters.
		410	Also compose particular scripts if `utf-8-compose-scripts' is non-nil."
		411	(save-excursion
		412	;; Can't do eval-when-compile to insert a multibyte constant
		413	;; version of the string in the loop, since it's always loaded as
		414	;; unibyte from a byte-compiled file.
		415	(let ((range (string-as-multibyte "^\341-\377")))
		416	(while (and (skip-chars-forward
		417	range)
		418	(not (eobp)))
		419	(forward-char (utf-8-compose)))))
		420	;; Fixme: Takahashi-san implies it may not work this easily -- needs
		421	;; checking with him.
		422	(when (and utf-8-compose-scripts (> length 1))
		423	;; These currently have definitions which cover the relevant
		424	;; Unicodes. We could avoid loading thai-util &c by checking
		425	;; whether the region contains any characters with the appropriate
		426	;; categories. There aren't yet Unicode-based rules for Tibetan.
		427	(save-excursion (setq length (diacritic-post-read-conversion length)))
		428	(save-excursion (setq length (thai-post-read-conversion length)))
		429	(save-excursion (setq length (lao-post-read-conversion length)))
		430	(save-excursion (setq length (devanagari-post-read-conversion length))))
		431	length)
		432
		433	(defun utf-8-pre-write-conversion (beg end)
		434	"Semi-dummy pre-write function effectively to autoload ucs-tables."
		435	;; Ensure translation table is loaded.
		436	(require 'ucs-tables)
		437	;; Don't do this again.
		438	(coding-system-put 'mule-utf-8 'pre-write-conversion nil)
		439	nil)
		440
343	(make-coding-system	441	(make-coding-system
344	'mule-utf-8 4 ?u	442	'mule-utf-8 4 ?u
345	"UTF-8 encoding for Emacs-supported Unicode characters.	443	"UTF-8 encoding for Emacs-supported Unicode characters.
346	The supported Emacs character sets are:	444	The supported Emacs character sets are the following, plus others
347	ascii	445	which may be included in the translation table
348	eight-bit-control	446	`ucs-mule-to-mule-unicode':
349	eight-bit-graphic	447	ascii
350	latin-iso8859-1	448	eight-bit-control
351	mule-unicode-0100-24ff	449	eight-bit-graphic
352	mule-unicode-2500-33ff	450	latin-iso8859-1
353	mule-unicode-e000-ffff	451	latin-iso8859-2
		452	latin-iso8859-3
		453	latin-iso8859-4
		454	cyrillic-iso8859-5
		455	greek-iso8859-7
		456	hebrew-iso8859-8
		457	latin-iso8859-9
		458	latin-iso8859-14
		459	latin-iso8859-15
		460	mule-unicode-0100-24ff
		461	mule-unicode-2500-33ff
		462	mule-unicode-e000-ffff
354		463
355	Unicode characters out of the ranges U+0000-U+33FF and U+E200-U+FFFF	464	Unicode characters out of the ranges U+0000-U+33FF and U+E200-U+FFFF
356	are decoded into sequences of eight-bit-control and eight-bit-graphic	465	are decoded into sequences of eight-bit-control and eight-bit-graphic
357	characters to preserve their byte sequences. Emacs characters out of	466	characters to preserve their byte sequences and composed to display as
358	these ranges are encoded into U+FFFD.	467	a single character. Emacs characters that can't be encoded to these
359		468	ranges are encoded as U+FFFD."
360	Note that, currently, characters in the mule-unicode charsets have no
361	syntax and case information. Thus, for instance, upper- and
362	lower-casing commands won't work with them."
363		469
364	'(ccl-decode-mule-utf-8 . ccl-encode-mule-utf-8)	470	'(ccl-decode-mule-utf-8 . ccl-encode-mule-utf-8)
365	'((safe-charsets	471	'((safe-charsets
@@ -367,13 +473,54 @@ lower-casing commands won't work with them."
367	eight-bit-control	473	eight-bit-control
368	eight-bit-graphic	474	eight-bit-graphic
369	latin-iso8859-1	475	latin-iso8859-1
		476	latin-iso8859-15
		477	latin-iso8859-14
		478	latin-iso8859-9
		479	hebrew-iso8859-8
		480	greek-iso8859-7
		481	cyrillic-iso8859-5
		482	latin-iso8859-4
		483	latin-iso8859-3
		484	latin-iso8859-2
		485	vietnamese-viscii-lower
		486	vietnamese-viscii-upper
		487	thai-tis620
		488	ipa
		489	ethiopic
		490	indian-is13194
		491	katakana-jisx0201
		492	chinese-sisheng
		493	lao
370	mule-unicode-0100-24ff	494	mule-unicode-0100-24ff
371	mule-unicode-2500-33ff	495	mule-unicode-2500-33ff
372	mule-unicode-e000-ffff)	496	mule-unicode-e000-ffff)
373	(mime-charset . utf-8)	497	(mime-charset . utf-8)
374	(coding-category . coding-category-utf-8)	498	(coding-category . coding-category-utf-8)
375	(valid-codes (0 . 255))))	499	(valid-codes (0 . 255))
		500	(pre-write-conversion . utf-8-pre-write-conversion)
		501	(post-read-conversion . utf-8-post-read-conversion)))
376		502
377	(define-coding-system-alias 'utf-8 'mule-utf-8)	503	(define-coding-system-alias 'utf-8 'mule-utf-8)
378		504
		505	;; I think this needs special private charsets defined for the
		506	;; untranslated sequences, if it's going to work well.
		507
		508	;;; (defun utf-8-compose-function (pos to pattern &optional string)
		509	;;; (let* ((prop (get-char-property pos 'composition string))
		510	;;; (l (and prop (- (cadr prop) (car prop)))))
		511	;;; (cond ((and l (> l (- to pos)))
		512	;;; (delete-region pos to))
		513	;;; ((and (> (char-after pos) 224)
		514	;;; (< (char-after pos) 256)
		515	;;; (save-restriction
		516	;;; (narrow-to-region pos to)
		517	;;; (utf-8-compose)))
		518	;;; t))))
		519
		520	;;; (dotimes (i 96)
		521	;;; (aset composition-function-table
		522	;;; (+ 128 i)
		523	;;; `((,(string-as-multibyte "[\200-\237\240-\377]")
		524	;;; . utf-8-compose-function))))
		525
379	;;; utf-8.el ends here	526	;;; utf-8.el ends here