aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKenichi Handa2002-09-30 06:38:13 +0000
committerKenichi Handa2002-09-30 06:38:13 +0000
commit7d38f8fcf6702e5783c45860442922f243ab45ea (patch)
tree7885a5dc0b2a06f48431425fc3be578d33853764
parent0405f493393a1bd1d86d019ead9aa1384eb6257a (diff)
downloademacs-7d38f8fcf6702e5783c45860442922f243ab45ea.tar.gz
emacs-7d38f8fcf6702e5783c45860442922f243ab45ea.zip
Don't bind
utf-8-translation-table-for-decode while setting up ucs-mule-8859-to-ucs-table, etc. Add `depenency' property to iso-8859-* coding systems. (ucs-unify-8859): Arguments changed to FOR-ENCODE and FOR-DECODE. If FOR-DECODE is non-nil, make ucs-mule-8859-to-mule-unicode populate the translation table named ucs-translation-table-for-decode. If FOR-ENCODE is non-nil, make ucs-mule-to-mule-unicode populates the translation table named utf-translation-table-for-encode. Call register-char-codings for mule-utf-16-be and mule-utf-16-le too. (ucs-fragment-8859): Arguments changed to FOR-ENCODE and FOR-DECODE. If FOR-DECODE is non-nil, make the translation table named ucs-translation-table-for-decode vacant. If FOR-ENCODE is non-nil, make a proper char-table populates the translation table name utf-translation-table-for-encode. Call register-char-codings for all mule-utf-* to to reset their status to the origianl. (unify-8859-on-encoding-mode): Call ucs-unify-8859 and ucs-fragment-8859 with fixed arguments. Set the version to 21.3. (unify-8859-on-decoding-mode): Likewise. Remove dependency. (ccl-encode-unicode-font): Deleted, (ucs-tables-unload-hook): Deleted.
-rw-r--r--lisp/international/ucs-tables.el448
1 files changed, 225 insertions, 223 deletions
diff --git a/lisp/international/ucs-tables.el b/lisp/international/ucs-tables.el
index 15c9e1b52e9..033b951ce2a 100644
--- a/lisp/international/ucs-tables.el
+++ b/lisp/international/ucs-tables.el
@@ -48,12 +48,12 @@
48;; all that users normally care about unifying although, for instance, 48;; all that users normally care about unifying although, for instance,
49;; Greek occurs in as many as nine Emacs charsets. 49;; Greek occurs in as many as nine Emacs charsets.
50 50
51;; The translation table `ucs-mule-to-mule-unicode' is populated, 51;; The translation-table `utf-translation-table-for-encode' is
52;; which could be used for more general unification on decoding. This 52;; populated, which could be used for more general unification on
53;; is used by the `mule-utf-8' coding system to encode extra 53;; decoding. This is used by the `mule-utf-8' coding system to encode
54;; characters, and also by the coding systems set up by code-pages.el. 54;; extra characters, and also by the coding systems set up by
55;; The decoding tables here take account of 55;; code-pages.el. The decoding tables here take account of
56;; `utf-8-fragment-on-decoding' which may specify decoding Greek and 56;; `utf-fragment-on-decoding' which may specify decoding Greek and
57;; Cyrillic into 8859 charsets. 57;; Cyrillic into 8859 charsets.
58 58
59;; Unification also puts a `translation-table-for-input' property on 59;; Unification also puts a `translation-table-for-input' property on
@@ -89,13 +89,13 @@
89;;; Define tables, to be populated later. 89;;; Define tables, to be populated later.
90 90
91(defvar ucs-mule-8859-to-ucs-table (make-translation-table) 91(defvar ucs-mule-8859-to-ucs-table (make-translation-table)
92 "Translation table from Emacs ISO-8859 characters to Unicode. 92 "Char table from Emacs ISO-8859 characters to Unicode.
93This maps Emacs characters from the non-Latin-1 93This maps Emacs characters from the non-Latin-1
94...-iso8859-... charsets to their Unicode code points. This is a 94...-iso8859-... charsets to their Unicode code points. This is a
95many-to-one mapping.") 95many-to-one mapping.")
96 96
97(defvar ucs-mule-8859-to-mule-unicode (make-translation-table) 97(defvar ucs-mule-8859-to-mule-unicode (make-translation-table)
98 "Translation table from Emacs ISO-8859 characters to Mule Unicode. 98 "Char table from Emacs ISO-8859 characters to Mule Unicode.
99This maps Emacs characters from the non-Latin-1 99This maps Emacs characters from the non-Latin-1
100...-iso8859-... charsets to characters from the 100...-iso8859-... charsets to characters from the
101mule-unicode-... charsets. This is a many-to-one mapping. The 101mule-unicode-... charsets. This is a many-to-one mapping. The
@@ -157,6 +157,8 @@ Translates from the iso8859 charsets and `mule-unicode-0100-24ff'.")
157 "Used as `translation-table-for-encode' for iso-8859-15. 157 "Used as `translation-table-for-encode' for iso-8859-15.
158Translates from the iso8859 charsets and `mule-unicode-0100-24ff'.") 158Translates from the iso8859 charsets and `mule-unicode-0100-24ff'.")
159 159
160(defvar translation-table-for-input (make-translation-table))
161
160;;; Set up the tables. 162;;; Set up the tables.
161 163
162;; Most of these tables were derived from ones in Mule-UCS. 164;; Most of these tables were derived from ones in Mule-UCS.
@@ -1097,51 +1099,57 @@ Translates from the iso8859 charsets and `mule-unicode-0100-24ff'.")
1097 (setq i (1+ i))) 1099 (setq i (1+ i)))
1098 (nreverse l)))) 1100 (nreverse l))))
1099 1101
1102 ;; Note: Here, using decode-char is safe because
1103 ;; utf-fragment-on-decoding is by default nil, thus the translation
1104 ;; table `utf-translation-table-for-decode' does nothing.
1105
1100 ;; Convert the lists to the basic char tables. 1106 ;; Convert the lists to the basic char tables.
1101 ;; Ensure `decode-char' doesn't use the fragmentation table. 1107 (dolist (n (list 15 14 9 8 7 5 4 3 2 1))
1102 ;; Fixme: handa suggests using the RESTRICTION arg. 1108 (let ((alist (symbol-value (intern (format "ucs-8859-%d-alist" n)))))
1103 (let ((utf-8-translation-table-for-decode (make-translation-table))) 1109 (dolist (pair alist)
1104 (dolist (n (list 15 14 9 8 7 5 4 3 2 1)) 1110 (let ((mule (car pair))
1105 (let ((alist (symbol-value (intern (format "ucs-8859-%d-alist" n))))) 1111 (uc (cdr pair))
1106 (dolist (pair alist) 1112 (mu (decode-char 'ucs (cdr pair))))
1107 (let ((mule (car pair)) 1113 (aset ucs-mule-8859-to-ucs-table mule uc)
1108 (uc (cdr pair)) 1114 ;; (aset ucs-ucs-to-mule-8859-table uc mule)
1109 (mu (decode-char 'ucs (cdr pair)))) 1115 ;; (aset ucs-mule-unicode-to-mule-8859 mu mule)
1110 (aset ucs-mule-8859-to-ucs-table mule uc) 1116 (aset ucs-mule-8859-to-mule-unicode mule mu)
1111 ;; (aset ucs-ucs-to-mule-8859-table uc mule) 1117 (aset ucs-mule-to-mule-unicode mule mu)))))
1112 ;; (aset ucs-mule-unicode-to-mule-8859 mu mule) 1118
1113 (aset ucs-mule-8859-to-mule-unicode mule mu) 1119 ;; Derive tables that can be used as per-coding-system
1114 (aset ucs-mule-to-mule-unicode mule mu))))) 1120 ;; `translation-table-for-encode's.
1115 ;; Derive tables that can be used as per-coding-system 1121 (dolist (n (list 15 14 9 8 7 5 4 3 2 1))
1116 ;; `translation-table-for-encode's. 1122 (let* ((alist (symbol-value (intern (format "ucs-8859-%d-alist" n))))
1117 (dolist (n (list 15 14 9 8 7 5 4 3 2 1)) 1123 (encode-translator (set (intern (format "ucs-8859-%d-encode-table"
1118 (let* ((alist (symbol-value (intern (format "ucs-8859-%d-alist" n)))) 1124 n))
1119 (encode-translator (set (intern (format "ucs-8859-%d-encode-table" 1125 (make-translation-table)))
1120 n)) 1126 (coding-system
1121 (make-translation-table))) 1127 (coding-system-base (intern (format "iso-8859-%d" n))))
1122 elt) 1128 (dependency (coding-system-get coding-system 'dependency))
1123 ;; Start with the mule-unicode component. 1129 elt)
1124 (dolist (pair alist) 1130 ;; Start with the mule-unicode component (except for latin-iso8859-1).
1125 (let ((mule (car pair)) 1131 (if (/= n 1)
1126 (mu (decode-char 'ucs (cdr pair)))) 1132 (dolist (pair alist)
1127 (aset encode-translator mu mule))) 1133 (let ((mule (car pair))
1128 ;; Find characters from other 8859 sets which map to the same 1134 (mu (decode-char 'ucs (cdr pair))))
1129 ;; unicode as some character in this set. 1135 (aset encode-translator mu mule))))
1130 (map-char-table (lambda (k v) 1136 ;; Find characters from other 8859 sets which map to the same
1131 (if (and (setq elt (rassq v alist)) 1137 ;; unicode as some character in this set.
1132 (not (assq k alist))) 1138 (map-char-table (lambda (k v)
1133 (aset encode-translator k (car elt)))) 1139 (if (and (setq elt (rassq v alist))
1134 ucs-mule-8859-to-ucs-table) 1140 (not (assq k alist)))
1135 (optimize-char-table encode-translator))))) 1141 (aset encode-translator k (car elt))))
1136 1142 ucs-mule-8859-to-ucs-table)
1137;; Register for use in CCL. 1143 (optimize-char-table encode-translator)
1138(define-translation-table 'ucs-mule-8859-to-mule-unicode 1144
1139 ucs-mule-8859-to-mule-unicode) 1145 (or (memq 'unify-8859-on-encoding-mode dependency)
1140(define-translation-table 'ucs-mule-to-mule-unicode 1146 (setq dependency (cons 'unify-8859-on-encoding-mode dependency)))
1141 ucs-mule-to-mule-unicode) 1147 (or (memq 'unify-8859-on-decoding-mode dependency)
1142 1148 (setq dependency (cons 'unify-8859-on-decoding-mode dependency)))
1143(defun ucs-unify-8859 (&optional encode-only) 1149 (coding-system-put coding-system 'dependency dependency))))
1144 "Set up translation tables for unifying characters from ISO 8859. 1150
1151(defun ucs-unify-8859 (for-encode for-decode)
1152 "Set up translation-tables for unifying characters from ISO 8859.
1145 1153
1146On decoding, non-ASCII characters are mapped into the `iso-latin-1' 1154On decoding, non-ASCII characters are mapped into the `iso-latin-1'
1147and `mule-unicode-0100-24ff' charsets. On encoding, these are mapped 1155and `mule-unicode-0100-24ff' charsets. On encoding, these are mapped
@@ -1149,139 +1157,129 @@ back appropriate for the coding system.
1149 1157
1150With prefix arg, do unification on encoding only, i.e. don't unify 1158With prefix arg, do unification on encoding only, i.e. don't unify
1151everything on input operations." 1159everything on input operations."
1152 (interactive "P") 1160 (when for-decode
1153 (unless encode-only
1154 ;; Unify 8859 on decoding. (Non-CCL coding systems only.) 1161 ;; Unify 8859 on decoding. (Non-CCL coding systems only.)
1155 (if utf-8-fragment-on-decoding 1162 (if utf-fragment-on-decoding
1156 (progn (map-char-table 1163 (progn (map-char-table
1157 (lambda (k v) 1164 (lambda (k v)
1158 (if v (aset ucs-mule-to-mule-unicode v nil))) 1165 (if v (aset ucs-mule-8859-to-mule-unicode v nil)))
1159 utf-8-translation-table-for-decode) 1166 utf-fragmentation-table)
1160 (optimize-char-table ucs-mule-to-mule-unicode)) 1167 (optimize-char-table ucs-mule-8859-to-mule-unicode))
1161 ;; Reset in case it was changed. 1168 ;; Reset in case it was changed.
1162 (map-char-table 1169 (map-char-table
1163 (lambda (k v) 1170 (lambda (k v)
1164 (if v (aset ucs-mule-to-mule-unicode v k))) 1171 (if v (aset ucs-mule-8859-to-mule-unicode v k)))
1165 utf-8-translation-table-for-decode)) 1172 utf-fragmentation-table))
1173
1174 ;; For non-CCL coding systems (e.g. iso-latin-2).
1166 (set-char-table-parent standard-translation-table-for-decode 1175 (set-char-table-parent standard-translation-table-for-decode
1167 ucs-mule-8859-to-mule-unicode) 1176 ucs-mule-8859-to-mule-unicode)
1177 ;; For CCL coding systems other than mule-utf-*
1178 (define-translation-table 'ucs-translation-table-for-decode
1179 ucs-mule-8859-to-mule-unicode)
1180
1168 ;; Translate Quail input globally. 1181 ;; Translate Quail input globally.
1169 (setq-default translation-table-for-input ucs-mule-to-mule-unicode) 1182 (setq-default translation-table-for-input ucs-mule-to-mule-unicode)
1170 ;; In case these are set up, but we should use the global 1183 ;; In case these are set up, but we should use the global
1171 ;; translation table. 1184 ;; translation-table.
1172 (remove-hook 'quail-activate-hook 'ucs-quail-activate) 1185 (remove-hook 'quail-activate-hook 'ucs-quail-activate)
1173 (remove-hook 'minibuffer-setup-hook 'ucs-minibuffer-setup)) 1186 (remove-hook 'minibuffer-setup-hook 'ucs-minibuffer-setup))
1174 ;; Adjust the 8859 coding systems to fragment the unified characters 1187
1175 ;; on encoding. 1188 (when for-encode
1176 (dolist (n '(1 2 3 4 5 7 8 9 14 15)) 1189 ;; Make mule-utf-* encode all characters in ucs-mule-to-mule-unicode.
1177 (let* ((coding-system 1190 (let ((coding-list '(mule-utf-8 mule-utf-16-be mule-utf-16-le)))
1178 (coding-system-base (intern (format "iso-8859-%d" n)))) 1191 (define-translation-table 'utf-translation-table-for-encode
1179 (table (symbol-value 1192 ucs-mule-to-mule-unicode)
1180 (intern (format "ucs-8859-%d-encode-table" n)))) 1193 (dolist (coding coding-list)
1181 (safe (coding-system-get coding-system 'safe-chars))) 1194 (set-char-table-parent (coding-system-get coding 'safe-chars)
1182 ;; Actually, the coding system's safe-chars are not normally 1195 ucs-mule-to-mule-unicode)
1183 ;; used after they've been registered, but we might as well 1196 (register-char-codings coding ucs-mule-to-mule-unicode)))
1184 ;; record them. Setting the parent here is a convenience. 1197
1185 (set-char-table-parent safe table) 1198 ;; Adjust the 8859 coding systems to fragment the unified characters
1186 ;; Update the table of what encodes to what. 1199 ;; on encoding.
1187 (register-char-codings coding-system table) 1200 (dolist (n '(1 2 3 4 5 7 8 9 14 15))
1188 (coding-system-put coding-system 'translation-table-for-encode table) 1201 (let* ((coding-system
1189 (coding-system-put coding-system 'translation-table-for-input table))) 1202 (coding-system-base (intern (format "iso-8859-%d" n))))
1190 ;; Arrange local translation tables for Quail input. 1203 (table (symbol-value
1191 (add-hook 'quail-activate-hook 'ucs-quail-activate) 1204 (intern (format "ucs-8859-%d-encode-table" n))))
1192 (add-hook 'minibuffer-setup-hook 'ucs-minibuffer-setup)) 1205 (safe (coding-system-get coding-system 'safe-chars)))
1193 1206 ;; Actually, the coding system's safe-chars are not normally
1194(defun ucs-fragment-8859 (&optional encode-only) 1207 ;; used after they've been registered, but we might as well
1208 ;; record them. Setting the parent here is a convenience.
1209 (set-char-table-parent safe table)
1210 ;; Update the table of what encodes to what.
1211 (register-char-codings coding-system table)
1212 (coding-system-put coding-system 'translation-table-for-encode table)
1213 (coding-system-put coding-system 'translation-table-for-input table)))
1214 ;; Arrange local translation-tables for Quail input.
1215 (add-hook 'quail-activate-hook 'ucs-quail-activate)
1216 (add-hook 'minibuffer-setup-hook 'ucs-minibuffer-setup)))
1217
1218(defun ucs-fragment-8859 (for-encode for-decode)
1195 "Undo the unification done by `ucs-unify-8859'. 1219 "Undo the unification done by `ucs-unify-8859'.
1196With prefix arg, undo unification on encoding only, i.e. don't undo 1220With prefix arg, undo unification on encoding only, i.e. don't undo
1197unification on input operations." 1221unification on input operations."
1198 (interactive "P") 1222 (when for-decode
1199 ;; Maybe fix decoding. 1223 ;; Don't Unify 8859 on decoding.
1200 (unless encode-only 1224 ;; For non-CCL coding systems (e.g. iso-latin-2).
1201 ;; Unify 8859 on decoding. (Non-CCL coding systems only.)
1202 (set-char-table-parent standard-translation-table-for-decode nil) 1225 (set-char-table-parent standard-translation-table-for-decode nil)
1226 ;; For CCL coding systems other than mule-utf-* (e.g. cyrillic-koi8).
1227 (define-translation-table 'ucs-translation-table-for-decode)
1228 ;; For Quail input.
1203 (setq-default translation-table-for-input nil)) 1229 (setq-default translation-table-for-input nil))
1204 ;; Fix encoding. For each charset, remove the entries in 1230
1205 ;; `char-coding-system-table' added to its safe-chars table (as its 1231 (when for-encode
1206 ;; parent). 1232 ;; Make mule-utf-* disabled for all characters in
1207 (dolist (n '(1 2 3 4 5 7 8 9 14 15)) 1233 ;; ucs-mule-to-mule-unicode but what originally supported and what
1208 (let* ((coding-system 1234 ;; translated bt utf-translation-table-for-decode when
1209 (coding-system-base (intern (format "iso-8859-%d" n)))) 1235 ;; `utf-fragment-on-decoding' is non-nil.
1210 (table (symbol-value 1236 (let ((coding-list '(mule-utf-8 mule-utf-16-be mule-utf-16-le))
1211 (intern (format "ucs-8859-%d-encode-table" n)))) 1237 (safe (coding-system-get 'mule-utf-8 'safe-chars)))
1212 (safe (coding-system-get coding-system 'safe-chars))) 1238 (dolist (coding coding-list)
1239 (set-char-table-parent (coding-system-get coding 'safe-chars) nil))
1240 ;; Here we assume that all mule-utf-* have the same character
1241 ;; repertory, thus we can use SAFE for all of them.
1213 (map-char-table 1242 (map-char-table
1214 (lambda (key val) 1243 (lambda (key val)
1215 (if (and (>= key 128) val) 1244 (if (and (>= key 128) val
1216 (let ((codings (aref char-coding-system-table key))) 1245 (not (aref safe key)))
1217 (aset char-coding-system-table key 1246 (aset char-coding-system-table key
1218 (delq coding-system codings))))) 1247 (delq 'mule-utf-8
1219 (char-table-parent safe)) 1248 (delq 'mule-utf-16-le
1220 (set-char-table-parent safe nil) 1249 (delq 'mule-utf-16-be
1221 (coding-system-put coding-system 'translation-table-for-encode nil) 1250 (aref char-coding-system-table key)))))))
1222 (coding-system-put coding-system 'translation-table-for-input nil))) 1251 ucs-mule-to-mule-unicode)
1223 (optimize-char-table char-coding-system-table) 1252
1224 (remove-hook 'quail-activate-hook 'ucs-quail-activate) 1253 (if (not utf-fragment-on-decoding)
1225 (remove-hook 'minibuffer-setup-hook 'ucs-minibuffer-setup)) 1254 (define-translation-table 'utf-translation-table-for-encode)
1226 1255 (define-translation-table 'utf-translation-table-for-encode
1227(define-minor-mode unify-8859-on-encoding-mode 1256 utf-defragmentation-table)
1228 "Set up translation tables for unifying ISO 8859 characters on encoding. 1257 (dolist (coding coding-list)
1229 1258 (register-char-codings coding utf-defragmentation-table))))
1230The ISO 8859 characters sets overlap, e.g. 8859-1 (Latin-1) and 1259
12318859-15 (Latin-9) differ only in a few characters. Emacs normally 1260 ;; For each charset, remove the entries in
1232distinguishes equivalent characters from those ISO-8859 character sets 1261 ;; `char-coding-system-table' added to its safe-chars table (as
1233which are built in to Emacs. This behaviour is essentially inherited 1262 ;; its parent).
1234from the European-originated international standards. Treating them 1263 (dolist (n '(1 2 3 4 5 7 8 9 14 15))
1235equivalently, by translating to and from a single representation is 1264 (let* ((coding-system
1236called `unification'. (The `utf-8' coding system treats the 1265 (coding-system-base (intern (format "iso-8859-%d" n))))
1237characters of European scripts in a unified manner.) 1266 (table (symbol-value
1238 1267 (intern (format "ucs-8859-%d-encode-table" n))))
1239In this mode, on encoding -- i.e. output operations -- non-ASCII 1268 (safe (coding-system-get coding-system 'safe-chars)))
1240characters from the built-in ISO 8859 and `mule-unicode-0100-24ff' 1269 (when (char-table-parent safe)
1241charsets are handled automatically by the coding system used if it can 1270 (map-char-table
1242represent them. Thus, say, an e-acute from the Latin-1 charset (the 1271 (lambda (key val)
1243unified representation) in a buffer saved as Latin-9 will be encoded 1272 (if (and (>= key 128) val)
1244directly to a byte value 233. By default, in contrast, you would be 1273 (let ((codings (aref char-coding-system-table key)))
1245prompted for a general coding system to use for saving the file, which 1274 (aset char-coding-system-table key
1246can cope with separate Latin-1 and Latin-9 representations of e-acute. 1275 (delq coding-system codings)))))
1247 1276 (char-table-parent safe))
1248Also sets hooks that arrange `translation-table-for-input' to be set 1277 (set-char-table-parent safe nil))
1249up locally when Quail input methods are activated. This will often 1278 (coding-system-put coding-system 'translation-table-for-encode nil)
1250allow input generated by Quail input methods to conform with what the 1279 (coding-system-put coding-system 'translation-table-for-input nil)))
1251buffer's file coding system can encode. Thus you could use a Latin-2 1280 (optimize-char-table char-coding-system-table)
1252input method to search for e-acute in a Latin-1 buffer. 1281 (remove-hook 'quail-activate-hook 'ucs-quail-activate)
1253 1282 (remove-hook 'minibuffer-setup-hook 'ucs-minibuffer-setup)))
1254See also command `unify-8859-on-decoding-mode'."
1255 :group 'mule
1256 :global t
1257 :init-value t
1258 (if unify-8859-on-encoding-mode
1259 (ucs-unify-8859 t)
1260 (ucs-fragment-8859 t)))
1261
1262(custom-add-version 'unify-8859-on-encoding-mode "21.4")
1263
1264(define-minor-mode unify-8859-on-decoding-mode
1265 "Set up translation tables for unifying ISO 8859 characters on decoding.
1266On decoding, i.e. input operations, non-ASCII characters from the
1267built-in ISO 8859 charsets are unified by mapping them into the
1268`iso-latin-1' and `mule-unicode-0100-24ff' charsets.
1269
1270Also sets `translation-table-for-input' globally, so that Quail input
1271methods produce unified characters.
1272
1273See also command `unify-8859-on-encoding-mode' and the user option
1274`utf-8-fragment-on-decoding'."
1275 :group 'mule
1276 :global t
1277 :init-value nil
1278 (if unify-8859-on-decoding-mode
1279 (ucs-unify-8859)
1280 (ucs-fragment-8859)))
1281
1282(custom-add-dependencies 'unify-8859-on-decoding-mode
1283 '(utf-8-fragment-on-decoding))
1284(custom-add-version 'unify-8859-on-decoding-mode "21.4")
1285 1283
1286(defun ucs-insert (arg) 1284(defun ucs-insert (arg)
1287 "Insert the Emacs character representation of the given Unicode. 1285 "Insert the Emacs character representation of the given Unicode.
@@ -2456,15 +2454,70 @@ Interactively, prompts for a hex string giving the code."
2456 ((memq cs '(lao thai-tis620 tibetan-iso-8bit)) 2454 ((memq cs '(lao thai-tis620 tibetan-iso-8bit))
2457 (coding-system-put cs 'translation-table-for-input cs))))) 2455 (coding-system-put cs 'translation-table-for-input cs)))))
2458 (dolist (c safe-charsets) 2456 (dolist (c safe-charsets)
2459 (aset table (make-char c) t)) 2457 (aset table (make-char c) t))))
2460 (coding-system-put 'mule-utf-8 'safe-charsets
2461 (append (coding-system-get 'mule-utf-8 'safe-charsets)
2462 safe-charsets))
2463 (register-char-codings 'mule-utf-8 table)))
2464 2458
2465(defvar translation-table-for-input (make-translation-table)) 2459(define-minor-mode unify-8859-on-encoding-mode
2460 "Set up translation-tables for unifying ISO 8859 characters on encoding.
2461
2462The ISO 8859 characters sets overlap, e.g. 8859-1 (Latin-1) and
24638859-15 (Latin-9) differ only in a few characters. Emacs normally
2464distinguishes equivalent characters from those ISO-8859 character sets
2465which are built in to Emacs. This behaviour is essentially inherited
2466from the European-originated international standards. Treating them
2467equivalently, by translating to and from a single representation is
2468called `unification'. (The `utf-8' coding system treats the
2469characters of European scripts in a unified manner.)
2470
2471In this mode, on encoding -- i.e. output operations -- non-ASCII
2472characters from the built-in ISO 8859 and `mule-unicode-0100-24ff'
2473charsets are handled automatically by the coding system used if it can
2474represent them. Thus, say, an e-acute from the Latin-1 charset (the
2475unified representation) in a buffer saved as Latin-9 will be encoded
2476directly to a byte value 233. By default, in contrast, you would be
2477prompted for a general coding system to use for saving the file, which
2478can cope with separate Latin-1 and Latin-9 representations of e-acute.
2466 2479
2467;; Arrange to set up the translation table for Quail. This probably 2480Also sets hooks that arrange `translation-table-for-input' to be set
2481up locally when Quail input methods are activated. This will often
2482allow input generated by Quail input methods to conform with what the
2483buffer's file coding system can encode. Thus you could use a Latin-2
2484input method to search for e-acute in a Latin-1 buffer.
2485
2486See also command `unify-8859-on-decoding-mode'."
2487 :group 'mule
2488 :global t
2489 :init-value t
2490 (if unify-8859-on-encoding-mode
2491 (ucs-unify-8859 t nil)
2492 (ucs-fragment-8859 t nil)))
2493
2494(custom-add-version 'unify-8859-on-encoding-mode "21.3")
2495
2496(define-minor-mode unify-8859-on-decoding-mode
2497 "Set up translation-tables for unifying ISO 8859 characters on decoding.
2498On decoding, i.e. input operations, non-ASCII characters from the
2499built-in ISO 8859 charsets are unified by mapping them into the
2500`iso-latin-1' and `mule-unicode-0100-24ff' charsets.
2501
2502Also sets `translation-table-for-input' globally, so that Quail input
2503methods produce unified characters.
2504
2505See also command `unify-8859-on-encoding-mode' and the user option
2506`utf-fragment-on-decoding'."
2507 :group 'mule
2508 :global t
2509 :init-value nil
2510 (if unify-8859-on-decoding-mode
2511 (ucs-unify-8859 nil t)
2512 (ucs-fragment-8859 nil t)))
2513
2514(custom-add-version 'unify-8859-on-decoding-mode "21.3")
2515
2516;; Synchronize the status with the initial value of
2517;; unify-8859-on-encoding-mode and unify-8859-on-decoding-mode.
2518(ucs-unify-8859 t nil)
2519
2520;; Arrange to set up the translation-table for Quail. This probably
2468;; isn't foolproof. 2521;; isn't foolproof.
2469(defun ucs-quail-activate () 2522(defun ucs-quail-activate ()
2470 "Set up an appropriate `translation-table-for-input' for current buffer. 2523 "Set up an appropriate `translation-table-for-input' for current buffer.
@@ -2489,57 +2542,6 @@ Intended to be added to `minibuffer-setup-hook'."
2489 (cadr (buffer-list)))) 2542 (cadr (buffer-list))))
2490 buffer-file-coding-system))) 2543 buffer-file-coding-system)))
2491 2544
2492;; Modified to allow display of arbitrary characters with an
2493;; iso-10646-encoded (`Unicode') font.
2494(define-ccl-program ccl-encode-unicode-font
2495 `(0
2496 ((if (r0 == ,(charset-id 'ascii))
2497 ((r2 = r1)
2498 (r1 = 0))
2499 (
2500 ;; Look for a translation for non-ASCII chars. For a 2D
2501 ;; charset, produce a single code for the translation.
2502 ;; Official 2D sets are in the charset id range [#x90,#x99],
2503 ;; private ones in the range [#xf0,#xfe] (with #xff not used).
2504 ;; Fixme: Is there a better way to do this?
2505 (r3 = (r0 >= #x90))
2506 (r3 &= (r0 <= #x99))
2507 (r3 |= (r0 >= #xf0))
2508 (if r3 ; 2D input
2509 (r1 = ((r1 << 7) | r2)))
2510 (translate-character ucs-mule-to-mule-unicode r0 r1)
2511 (r3 = (r0 >= #x90))
2512 (r3 &= (r0 <= #x99))
2513 (r3 |= (r0 >= #xf0))
2514 (if r3 ; 2D translation
2515 ((r2 = (r1 & 127))
2516 (r1 = (r1 >> 7))))
2517 (if (r0 == ,(charset-id 'latin-iso8859-1))
2518 ((r2 = (r1 + 128))
2519 (r1 = 0))
2520 (if (r0 == ,(charset-id 'mule-unicode-0100-24ff))
2521 ((r1 *= 96)
2522 (r1 += r2)
2523 (r1 += ,(- #x100 (* 32 96) 32))
2524 (r1 >8= 0)
2525 (r2 = r7))
2526 (if (r0 == ,(charset-id 'mule-unicode-2500-33ff))
2527 ((r1 *= 96)
2528 (r1 += r2)
2529 (r1 += ,(- #x2500 (* 32 96) 32))
2530 (r1 >8= 0)
2531 (r2 = r7))
2532 (if (r0 == ,(charset-id 'mule-unicode-e000-ffff))
2533 ((r1 *= 96)
2534 (r1 += r2)
2535 (r1 += ,(- #xe000 (* 32 96) 32))
2536 (r1 >8= 0)
2537 (r2 = r7))))))))))
2538 "Encode characters for display with iso10646 font.
2539Translate through table `ucs-mule-to-mule-unicode' initially.")
2540
2541(defalias 'ucs-tables-unload-hook 'ucs-fragment-8859)
2542
2543(provide 'ucs-tables) 2545(provide 'ucs-tables)
2544 2546
2545;;; ucs-tables.el ends here 2547;;; ucs-tables.el ends here