diff options
| author | Kenichi Handa | 2002-09-30 06:38:13 +0000 |
|---|---|---|
| committer | Kenichi Handa | 2002-09-30 06:38:13 +0000 |
| commit | 7d38f8fcf6702e5783c45860442922f243ab45ea (patch) | |
| tree | 7885a5dc0b2a06f48431425fc3be578d33853764 | |
| parent | 0405f493393a1bd1d86d019ead9aa1384eb6257a (diff) | |
| download | emacs-7d38f8fcf6702e5783c45860442922f243ab45ea.tar.gz emacs-7d38f8fcf6702e5783c45860442922f243ab45ea.zip | |
Don't bind
utf-8-translation-table-for-decode while setting up
ucs-mule-8859-to-ucs-table, etc. Add `depenency' property to
iso-8859-* coding systems.
(ucs-unify-8859): Arguments changed to FOR-ENCODE and FOR-DECODE.
If FOR-DECODE is non-nil, make ucs-mule-8859-to-mule-unicode
populate the translation table named
ucs-translation-table-for-decode. If FOR-ENCODE is non-nil, make
ucs-mule-to-mule-unicode populates the translation table named
utf-translation-table-for-encode. Call register-char-codings for
mule-utf-16-be and mule-utf-16-le too.
(ucs-fragment-8859): Arguments changed to FOR-ENCODE and
FOR-DECODE. If FOR-DECODE is non-nil, make the translation table
named ucs-translation-table-for-decode vacant. If FOR-ENCODE is
non-nil, make a proper char-table populates the translation table
name utf-translation-table-for-encode. Call register-char-codings
for all mule-utf-* to to reset their status to the origianl.
(unify-8859-on-encoding-mode): Call ucs-unify-8859 and
ucs-fragment-8859 with fixed arguments. Set the version to 21.3.
(unify-8859-on-decoding-mode): Likewise. Remove dependency.
(ccl-encode-unicode-font): Deleted,
(ucs-tables-unload-hook): Deleted.
| -rw-r--r-- | lisp/international/ucs-tables.el | 448 |
1 files changed, 225 insertions, 223 deletions
diff --git a/lisp/international/ucs-tables.el b/lisp/international/ucs-tables.el index 15c9e1b52e9..033b951ce2a 100644 --- a/lisp/international/ucs-tables.el +++ b/lisp/international/ucs-tables.el | |||
| @@ -48,12 +48,12 @@ | |||
| 48 | ;; all that users normally care about unifying although, for instance, | 48 | ;; all that users normally care about unifying although, for instance, |
| 49 | ;; Greek occurs in as many as nine Emacs charsets. | 49 | ;; Greek occurs in as many as nine Emacs charsets. |
| 50 | 50 | ||
| 51 | ;; The translation table `ucs-mule-to-mule-unicode' is populated, | 51 | ;; The translation-table `utf-translation-table-for-encode' is |
| 52 | ;; which could be used for more general unification on decoding. This | 52 | ;; populated, which could be used for more general unification on |
| 53 | ;; is used by the `mule-utf-8' coding system to encode extra | 53 | ;; decoding. This is used by the `mule-utf-8' coding system to encode |
| 54 | ;; characters, and also by the coding systems set up by code-pages.el. | 54 | ;; extra characters, and also by the coding systems set up by |
| 55 | ;; The decoding tables here take account of | 55 | ;; code-pages.el. The decoding tables here take account of |
| 56 | ;; `utf-8-fragment-on-decoding' which may specify decoding Greek and | 56 | ;; `utf-fragment-on-decoding' which may specify decoding Greek and |
| 57 | ;; Cyrillic into 8859 charsets. | 57 | ;; Cyrillic into 8859 charsets. |
| 58 | 58 | ||
| 59 | ;; Unification also puts a `translation-table-for-input' property on | 59 | ;; Unification also puts a `translation-table-for-input' property on |
| @@ -89,13 +89,13 @@ | |||
| 89 | ;;; Define tables, to be populated later. | 89 | ;;; Define tables, to be populated later. |
| 90 | 90 | ||
| 91 | (defvar ucs-mule-8859-to-ucs-table (make-translation-table) | 91 | (defvar ucs-mule-8859-to-ucs-table (make-translation-table) |
| 92 | "Translation table from Emacs ISO-8859 characters to Unicode. | 92 | "Char table from Emacs ISO-8859 characters to Unicode. |
| 93 | This maps Emacs characters from the non-Latin-1 | 93 | This maps Emacs characters from the non-Latin-1 |
| 94 | ...-iso8859-... charsets to their Unicode code points. This is a | 94 | ...-iso8859-... charsets to their Unicode code points. This is a |
| 95 | many-to-one mapping.") | 95 | many-to-one mapping.") |
| 96 | 96 | ||
| 97 | (defvar ucs-mule-8859-to-mule-unicode (make-translation-table) | 97 | (defvar ucs-mule-8859-to-mule-unicode (make-translation-table) |
| 98 | "Translation table from Emacs ISO-8859 characters to Mule Unicode. | 98 | "Char table from Emacs ISO-8859 characters to Mule Unicode. |
| 99 | This maps Emacs characters from the non-Latin-1 | 99 | This maps Emacs characters from the non-Latin-1 |
| 100 | ...-iso8859-... charsets to characters from the | 100 | ...-iso8859-... charsets to characters from the |
| 101 | mule-unicode-... charsets. This is a many-to-one mapping. The | 101 | mule-unicode-... charsets. This is a many-to-one mapping. The |
| @@ -157,6 +157,8 @@ Translates from the iso8859 charsets and `mule-unicode-0100-24ff'.") | |||
| 157 | "Used as `translation-table-for-encode' for iso-8859-15. | 157 | "Used as `translation-table-for-encode' for iso-8859-15. |
| 158 | Translates from the iso8859 charsets and `mule-unicode-0100-24ff'.") | 158 | Translates from the iso8859 charsets and `mule-unicode-0100-24ff'.") |
| 159 | 159 | ||
| 160 | (defvar translation-table-for-input (make-translation-table)) | ||
| 161 | |||
| 160 | ;;; Set up the tables. | 162 | ;;; Set up the tables. |
| 161 | 163 | ||
| 162 | ;; Most of these tables were derived from ones in Mule-UCS. | 164 | ;; Most of these tables were derived from ones in Mule-UCS. |
| @@ -1097,51 +1099,57 @@ Translates from the iso8859 charsets and `mule-unicode-0100-24ff'.") | |||
| 1097 | (setq i (1+ i))) | 1099 | (setq i (1+ i))) |
| 1098 | (nreverse l)))) | 1100 | (nreverse l)))) |
| 1099 | 1101 | ||
| 1102 | ;; Note: Here, using decode-char is safe because | ||
| 1103 | ;; utf-fragment-on-decoding is by default nil, thus the translation | ||
| 1104 | ;; table `utf-translation-table-for-decode' does nothing. | ||
| 1105 | |||
| 1100 | ;; Convert the lists to the basic char tables. | 1106 | ;; Convert the lists to the basic char tables. |
| 1101 | ;; Ensure `decode-char' doesn't use the fragmentation table. | 1107 | (dolist (n (list 15 14 9 8 7 5 4 3 2 1)) |
| 1102 | ;; Fixme: handa suggests using the RESTRICTION arg. | 1108 | (let ((alist (symbol-value (intern (format "ucs-8859-%d-alist" n))))) |
| 1103 | (let ((utf-8-translation-table-for-decode (make-translation-table))) | 1109 | (dolist (pair alist) |
| 1104 | (dolist (n (list 15 14 9 8 7 5 4 3 2 1)) | 1110 | (let ((mule (car pair)) |
| 1105 | (let ((alist (symbol-value (intern (format "ucs-8859-%d-alist" n))))) | 1111 | (uc (cdr pair)) |
| 1106 | (dolist (pair alist) | 1112 | (mu (decode-char 'ucs (cdr pair)))) |
| 1107 | (let ((mule (car pair)) | 1113 | (aset ucs-mule-8859-to-ucs-table mule uc) |
| 1108 | (uc (cdr pair)) | 1114 | ;; (aset ucs-ucs-to-mule-8859-table uc mule) |
| 1109 | (mu (decode-char 'ucs (cdr pair)))) | 1115 | ;; (aset ucs-mule-unicode-to-mule-8859 mu mule) |
| 1110 | (aset ucs-mule-8859-to-ucs-table mule uc) | 1116 | (aset ucs-mule-8859-to-mule-unicode mule mu) |
| 1111 | ;; (aset ucs-ucs-to-mule-8859-table uc mule) | 1117 | (aset ucs-mule-to-mule-unicode mule mu))))) |
| 1112 | ;; (aset ucs-mule-unicode-to-mule-8859 mu mule) | 1118 | |
| 1113 | (aset ucs-mule-8859-to-mule-unicode mule mu) | 1119 | ;; Derive tables that can be used as per-coding-system |
| 1114 | (aset ucs-mule-to-mule-unicode mule mu))))) | 1120 | ;; `translation-table-for-encode's. |
| 1115 | ;; Derive tables that can be used as per-coding-system | 1121 | (dolist (n (list 15 14 9 8 7 5 4 3 2 1)) |
| 1116 | ;; `translation-table-for-encode's. | 1122 | (let* ((alist (symbol-value (intern (format "ucs-8859-%d-alist" n)))) |
| 1117 | (dolist (n (list 15 14 9 8 7 5 4 3 2 1)) | 1123 | (encode-translator (set (intern (format "ucs-8859-%d-encode-table" |
| 1118 | (let* ((alist (symbol-value (intern (format "ucs-8859-%d-alist" n)))) | 1124 | n)) |
| 1119 | (encode-translator (set (intern (format "ucs-8859-%d-encode-table" | 1125 | (make-translation-table))) |
| 1120 | n)) | 1126 | (coding-system |
| 1121 | (make-translation-table))) | 1127 | (coding-system-base (intern (format "iso-8859-%d" n)))) |
| 1122 | elt) | 1128 | (dependency (coding-system-get coding-system 'dependency)) |
| 1123 | ;; Start with the mule-unicode component. | 1129 | elt) |
| 1124 | (dolist (pair alist) | 1130 | ;; Start with the mule-unicode component (except for latin-iso8859-1). |
| 1125 | (let ((mule (car pair)) | 1131 | (if (/= n 1) |
| 1126 | (mu (decode-char 'ucs (cdr pair)))) | 1132 | (dolist (pair alist) |
| 1127 | (aset encode-translator mu mule))) | 1133 | (let ((mule (car pair)) |
| 1128 | ;; Find characters from other 8859 sets which map to the same | 1134 | (mu (decode-char 'ucs (cdr pair)))) |
| 1129 | ;; unicode as some character in this set. | 1135 | (aset encode-translator mu mule)))) |
| 1130 | (map-char-table (lambda (k v) | 1136 | ;; Find characters from other 8859 sets which map to the same |
| 1131 | (if (and (setq elt (rassq v alist)) | 1137 | ;; unicode as some character in this set. |
| 1132 | (not (assq k alist))) | 1138 | (map-char-table (lambda (k v) |
| 1133 | (aset encode-translator k (car elt)))) | 1139 | (if (and (setq elt (rassq v alist)) |
| 1134 | ucs-mule-8859-to-ucs-table) | 1140 | (not (assq k alist))) |
| 1135 | (optimize-char-table encode-translator))))) | 1141 | (aset encode-translator k (car elt)))) |
| 1136 | 1142 | ucs-mule-8859-to-ucs-table) | |
| 1137 | ;; Register for use in CCL. | 1143 | (optimize-char-table encode-translator) |
| 1138 | (define-translation-table 'ucs-mule-8859-to-mule-unicode | 1144 | |
| 1139 | ucs-mule-8859-to-mule-unicode) | 1145 | (or (memq 'unify-8859-on-encoding-mode dependency) |
| 1140 | (define-translation-table 'ucs-mule-to-mule-unicode | 1146 | (setq dependency (cons 'unify-8859-on-encoding-mode dependency))) |
| 1141 | ucs-mule-to-mule-unicode) | 1147 | (or (memq 'unify-8859-on-decoding-mode dependency) |
| 1142 | 1148 | (setq dependency (cons 'unify-8859-on-decoding-mode dependency))) | |
| 1143 | (defun ucs-unify-8859 (&optional encode-only) | 1149 | (coding-system-put coding-system 'dependency dependency)))) |
| 1144 | "Set up translation tables for unifying characters from ISO 8859. | 1150 | |
| 1151 | (defun ucs-unify-8859 (for-encode for-decode) | ||
| 1152 | "Set up translation-tables for unifying characters from ISO 8859. | ||
| 1145 | 1153 | ||
| 1146 | On decoding, non-ASCII characters are mapped into the `iso-latin-1' | 1154 | On decoding, non-ASCII characters are mapped into the `iso-latin-1' |
| 1147 | and `mule-unicode-0100-24ff' charsets. On encoding, these are mapped | 1155 | and `mule-unicode-0100-24ff' charsets. On encoding, these are mapped |
| @@ -1149,139 +1157,129 @@ back appropriate for the coding system. | |||
| 1149 | 1157 | ||
| 1150 | With prefix arg, do unification on encoding only, i.e. don't unify | 1158 | With prefix arg, do unification on encoding only, i.e. don't unify |
| 1151 | everything on input operations." | 1159 | everything on input operations." |
| 1152 | (interactive "P") | 1160 | (when for-decode |
| 1153 | (unless encode-only | ||
| 1154 | ;; Unify 8859 on decoding. (Non-CCL coding systems only.) | 1161 | ;; Unify 8859 on decoding. (Non-CCL coding systems only.) |
| 1155 | (if utf-8-fragment-on-decoding | 1162 | (if utf-fragment-on-decoding |
| 1156 | (progn (map-char-table | 1163 | (progn (map-char-table |
| 1157 | (lambda (k v) | 1164 | (lambda (k v) |
| 1158 | (if v (aset ucs-mule-to-mule-unicode v nil))) | 1165 | (if v (aset ucs-mule-8859-to-mule-unicode v nil))) |
| 1159 | utf-8-translation-table-for-decode) | 1166 | utf-fragmentation-table) |
| 1160 | (optimize-char-table ucs-mule-to-mule-unicode)) | 1167 | (optimize-char-table ucs-mule-8859-to-mule-unicode)) |
| 1161 | ;; Reset in case it was changed. | 1168 | ;; Reset in case it was changed. |
| 1162 | (map-char-table | 1169 | (map-char-table |
| 1163 | (lambda (k v) | 1170 | (lambda (k v) |
| 1164 | (if v (aset ucs-mule-to-mule-unicode v k))) | 1171 | (if v (aset ucs-mule-8859-to-mule-unicode v k))) |
| 1165 | utf-8-translation-table-for-decode)) | 1172 | utf-fragmentation-table)) |
| 1173 | |||
| 1174 | ;; For non-CCL coding systems (e.g. iso-latin-2). | ||
| 1166 | (set-char-table-parent standard-translation-table-for-decode | 1175 | (set-char-table-parent standard-translation-table-for-decode |
| 1167 | ucs-mule-8859-to-mule-unicode) | 1176 | ucs-mule-8859-to-mule-unicode) |
| 1177 | ;; For CCL coding systems other than mule-utf-* | ||
| 1178 | (define-translation-table 'ucs-translation-table-for-decode | ||
| 1179 | ucs-mule-8859-to-mule-unicode) | ||
| 1180 | |||
| 1168 | ;; Translate Quail input globally. | 1181 | ;; Translate Quail input globally. |
| 1169 | (setq-default translation-table-for-input ucs-mule-to-mule-unicode) | 1182 | (setq-default translation-table-for-input ucs-mule-to-mule-unicode) |
| 1170 | ;; In case these are set up, but we should use the global | 1183 | ;; In case these are set up, but we should use the global |
| 1171 | ;; translation table. | 1184 | ;; translation-table. |
| 1172 | (remove-hook 'quail-activate-hook 'ucs-quail-activate) | 1185 | (remove-hook 'quail-activate-hook 'ucs-quail-activate) |
| 1173 | (remove-hook 'minibuffer-setup-hook 'ucs-minibuffer-setup)) | 1186 | (remove-hook 'minibuffer-setup-hook 'ucs-minibuffer-setup)) |
| 1174 | ;; Adjust the 8859 coding systems to fragment the unified characters | 1187 | |
| 1175 | ;; on encoding. | 1188 | (when for-encode |
| 1176 | (dolist (n '(1 2 3 4 5 7 8 9 14 15)) | 1189 | ;; Make mule-utf-* encode all characters in ucs-mule-to-mule-unicode. |
| 1177 | (let* ((coding-system | 1190 | (let ((coding-list '(mule-utf-8 mule-utf-16-be mule-utf-16-le))) |
| 1178 | (coding-system-base (intern (format "iso-8859-%d" n)))) | 1191 | (define-translation-table 'utf-translation-table-for-encode |
| 1179 | (table (symbol-value | 1192 | ucs-mule-to-mule-unicode) |
| 1180 | (intern (format "ucs-8859-%d-encode-table" n)))) | 1193 | (dolist (coding coding-list) |
| 1181 | (safe (coding-system-get coding-system 'safe-chars))) | 1194 | (set-char-table-parent (coding-system-get coding 'safe-chars) |
| 1182 | ;; Actually, the coding system's safe-chars are not normally | 1195 | ucs-mule-to-mule-unicode) |
| 1183 | ;; used after they've been registered, but we might as well | 1196 | (register-char-codings coding ucs-mule-to-mule-unicode))) |
| 1184 | ;; record them. Setting the parent here is a convenience. | 1197 | |
| 1185 | (set-char-table-parent safe table) | 1198 | ;; Adjust the 8859 coding systems to fragment the unified characters |
| 1186 | ;; Update the table of what encodes to what. | 1199 | ;; on encoding. |
| 1187 | (register-char-codings coding-system table) | 1200 | (dolist (n '(1 2 3 4 5 7 8 9 14 15)) |
| 1188 | (coding-system-put coding-system 'translation-table-for-encode table) | 1201 | (let* ((coding-system |
| 1189 | (coding-system-put coding-system 'translation-table-for-input table))) | 1202 | (coding-system-base (intern (format "iso-8859-%d" n)))) |
| 1190 | ;; Arrange local translation tables for Quail input. | 1203 | (table (symbol-value |
| 1191 | (add-hook 'quail-activate-hook 'ucs-quail-activate) | 1204 | (intern (format "ucs-8859-%d-encode-table" n)))) |
| 1192 | (add-hook 'minibuffer-setup-hook 'ucs-minibuffer-setup)) | 1205 | (safe (coding-system-get coding-system 'safe-chars))) |
| 1193 | 1206 | ;; Actually, the coding system's safe-chars are not normally | |
| 1194 | (defun ucs-fragment-8859 (&optional encode-only) | 1207 | ;; used after they've been registered, but we might as well |
| 1208 | ;; record them. Setting the parent here is a convenience. | ||
| 1209 | (set-char-table-parent safe table) | ||
| 1210 | ;; Update the table of what encodes to what. | ||
| 1211 | (register-char-codings coding-system table) | ||
| 1212 | (coding-system-put coding-system 'translation-table-for-encode table) | ||
| 1213 | (coding-system-put coding-system 'translation-table-for-input table))) | ||
| 1214 | ;; Arrange local translation-tables for Quail input. | ||
| 1215 | (add-hook 'quail-activate-hook 'ucs-quail-activate) | ||
| 1216 | (add-hook 'minibuffer-setup-hook 'ucs-minibuffer-setup))) | ||
| 1217 | |||
| 1218 | (defun ucs-fragment-8859 (for-encode for-decode) | ||
| 1195 | "Undo the unification done by `ucs-unify-8859'. | 1219 | "Undo the unification done by `ucs-unify-8859'. |
| 1196 | With prefix arg, undo unification on encoding only, i.e. don't undo | 1220 | With prefix arg, undo unification on encoding only, i.e. don't undo |
| 1197 | unification on input operations." | 1221 | unification on input operations." |
| 1198 | (interactive "P") | 1222 | (when for-decode |
| 1199 | ;; Maybe fix decoding. | 1223 | ;; Don't Unify 8859 on decoding. |
| 1200 | (unless encode-only | 1224 | ;; For non-CCL coding systems (e.g. iso-latin-2). |
| 1201 | ;; Unify 8859 on decoding. (Non-CCL coding systems only.) | ||
| 1202 | (set-char-table-parent standard-translation-table-for-decode nil) | 1225 | (set-char-table-parent standard-translation-table-for-decode nil) |
| 1226 | ;; For CCL coding systems other than mule-utf-* (e.g. cyrillic-koi8). | ||
| 1227 | (define-translation-table 'ucs-translation-table-for-decode) | ||
| 1228 | ;; For Quail input. | ||
| 1203 | (setq-default translation-table-for-input nil)) | 1229 | (setq-default translation-table-for-input nil)) |
| 1204 | ;; Fix encoding. For each charset, remove the entries in | 1230 | |
| 1205 | ;; `char-coding-system-table' added to its safe-chars table (as its | 1231 | (when for-encode |
| 1206 | ;; parent). | 1232 | ;; Make mule-utf-* disabled for all characters in |
| 1207 | (dolist (n '(1 2 3 4 5 7 8 9 14 15)) | 1233 | ;; ucs-mule-to-mule-unicode but what originally supported and what |
| 1208 | (let* ((coding-system | 1234 | ;; translated bt utf-translation-table-for-decode when |
| 1209 | (coding-system-base (intern (format "iso-8859-%d" n)))) | 1235 | ;; `utf-fragment-on-decoding' is non-nil. |
| 1210 | (table (symbol-value | 1236 | (let ((coding-list '(mule-utf-8 mule-utf-16-be mule-utf-16-le)) |
| 1211 | (intern (format "ucs-8859-%d-encode-table" n)))) | 1237 | (safe (coding-system-get 'mule-utf-8 'safe-chars))) |
| 1212 | (safe (coding-system-get coding-system 'safe-chars))) | 1238 | (dolist (coding coding-list) |
| 1239 | (set-char-table-parent (coding-system-get coding 'safe-chars) nil)) | ||
| 1240 | ;; Here we assume that all mule-utf-* have the same character | ||
| 1241 | ;; repertory, thus we can use SAFE for all of them. | ||
| 1213 | (map-char-table | 1242 | (map-char-table |
| 1214 | (lambda (key val) | 1243 | (lambda (key val) |
| 1215 | (if (and (>= key 128) val) | 1244 | (if (and (>= key 128) val |
| 1216 | (let ((codings (aref char-coding-system-table key))) | 1245 | (not (aref safe key))) |
| 1217 | (aset char-coding-system-table key | 1246 | (aset char-coding-system-table key |
| 1218 | (delq coding-system codings))))) | 1247 | (delq 'mule-utf-8 |
| 1219 | (char-table-parent safe)) | 1248 | (delq 'mule-utf-16-le |
| 1220 | (set-char-table-parent safe nil) | 1249 | (delq 'mule-utf-16-be |
| 1221 | (coding-system-put coding-system 'translation-table-for-encode nil) | 1250 | (aref char-coding-system-table key))))))) |
| 1222 | (coding-system-put coding-system 'translation-table-for-input nil))) | 1251 | ucs-mule-to-mule-unicode) |
| 1223 | (optimize-char-table char-coding-system-table) | 1252 | |
| 1224 | (remove-hook 'quail-activate-hook 'ucs-quail-activate) | 1253 | (if (not utf-fragment-on-decoding) |
| 1225 | (remove-hook 'minibuffer-setup-hook 'ucs-minibuffer-setup)) | 1254 | (define-translation-table 'utf-translation-table-for-encode) |
| 1226 | 1255 | (define-translation-table 'utf-translation-table-for-encode | |
| 1227 | (define-minor-mode unify-8859-on-encoding-mode | 1256 | utf-defragmentation-table) |
| 1228 | "Set up translation tables for unifying ISO 8859 characters on encoding. | 1257 | (dolist (coding coding-list) |
| 1229 | 1258 | (register-char-codings coding utf-defragmentation-table)))) | |
| 1230 | The ISO 8859 characters sets overlap, e.g. 8859-1 (Latin-1) and | 1259 | |
| 1231 | 8859-15 (Latin-9) differ only in a few characters. Emacs normally | 1260 | ;; For each charset, remove the entries in |
| 1232 | distinguishes equivalent characters from those ISO-8859 character sets | 1261 | ;; `char-coding-system-table' added to its safe-chars table (as |
| 1233 | which are built in to Emacs. This behaviour is essentially inherited | 1262 | ;; its parent). |
| 1234 | from the European-originated international standards. Treating them | 1263 | (dolist (n '(1 2 3 4 5 7 8 9 14 15)) |
| 1235 | equivalently, by translating to and from a single representation is | 1264 | (let* ((coding-system |
| 1236 | called `unification'. (The `utf-8' coding system treats the | 1265 | (coding-system-base (intern (format "iso-8859-%d" n)))) |
| 1237 | characters of European scripts in a unified manner.) | 1266 | (table (symbol-value |
| 1238 | 1267 | (intern (format "ucs-8859-%d-encode-table" n)))) | |
| 1239 | In this mode, on encoding -- i.e. output operations -- non-ASCII | 1268 | (safe (coding-system-get coding-system 'safe-chars))) |
| 1240 | characters from the built-in ISO 8859 and `mule-unicode-0100-24ff' | 1269 | (when (char-table-parent safe) |
| 1241 | charsets are handled automatically by the coding system used if it can | 1270 | (map-char-table |
| 1242 | represent them. Thus, say, an e-acute from the Latin-1 charset (the | 1271 | (lambda (key val) |
| 1243 | unified representation) in a buffer saved as Latin-9 will be encoded | 1272 | (if (and (>= key 128) val) |
| 1244 | directly to a byte value 233. By default, in contrast, you would be | 1273 | (let ((codings (aref char-coding-system-table key))) |
| 1245 | prompted for a general coding system to use for saving the file, which | 1274 | (aset char-coding-system-table key |
| 1246 | can cope with separate Latin-1 and Latin-9 representations of e-acute. | 1275 | (delq coding-system codings))))) |
| 1247 | 1276 | (char-table-parent safe)) | |
| 1248 | Also sets hooks that arrange `translation-table-for-input' to be set | 1277 | (set-char-table-parent safe nil)) |
| 1249 | up locally when Quail input methods are activated. This will often | 1278 | (coding-system-put coding-system 'translation-table-for-encode nil) |
| 1250 | allow input generated by Quail input methods to conform with what the | 1279 | (coding-system-put coding-system 'translation-table-for-input nil))) |
| 1251 | buffer's file coding system can encode. Thus you could use a Latin-2 | 1280 | (optimize-char-table char-coding-system-table) |
| 1252 | input method to search for e-acute in a Latin-1 buffer. | 1281 | (remove-hook 'quail-activate-hook 'ucs-quail-activate) |
| 1253 | 1282 | (remove-hook 'minibuffer-setup-hook 'ucs-minibuffer-setup))) | |
| 1254 | See also command `unify-8859-on-decoding-mode'." | ||
| 1255 | :group 'mule | ||
| 1256 | :global t | ||
| 1257 | :init-value t | ||
| 1258 | (if unify-8859-on-encoding-mode | ||
| 1259 | (ucs-unify-8859 t) | ||
| 1260 | (ucs-fragment-8859 t))) | ||
| 1261 | |||
| 1262 | (custom-add-version 'unify-8859-on-encoding-mode "21.4") | ||
| 1263 | |||
| 1264 | (define-minor-mode unify-8859-on-decoding-mode | ||
| 1265 | "Set up translation tables for unifying ISO 8859 characters on decoding. | ||
| 1266 | On decoding, i.e. input operations, non-ASCII characters from the | ||
| 1267 | built-in ISO 8859 charsets are unified by mapping them into the | ||
| 1268 | `iso-latin-1' and `mule-unicode-0100-24ff' charsets. | ||
| 1269 | |||
| 1270 | Also sets `translation-table-for-input' globally, so that Quail input | ||
| 1271 | methods produce unified characters. | ||
| 1272 | |||
| 1273 | See also command `unify-8859-on-encoding-mode' and the user option | ||
| 1274 | `utf-8-fragment-on-decoding'." | ||
| 1275 | :group 'mule | ||
| 1276 | :global t | ||
| 1277 | :init-value nil | ||
| 1278 | (if unify-8859-on-decoding-mode | ||
| 1279 | (ucs-unify-8859) | ||
| 1280 | (ucs-fragment-8859))) | ||
| 1281 | |||
| 1282 | (custom-add-dependencies 'unify-8859-on-decoding-mode | ||
| 1283 | '(utf-8-fragment-on-decoding)) | ||
| 1284 | (custom-add-version 'unify-8859-on-decoding-mode "21.4") | ||
| 1285 | 1283 | ||
| 1286 | (defun ucs-insert (arg) | 1284 | (defun ucs-insert (arg) |
| 1287 | "Insert the Emacs character representation of the given Unicode. | 1285 | "Insert the Emacs character representation of the given Unicode. |
| @@ -2456,15 +2454,70 @@ Interactively, prompts for a hex string giving the code." | |||
| 2456 | ((memq cs '(lao thai-tis620 tibetan-iso-8bit)) | 2454 | ((memq cs '(lao thai-tis620 tibetan-iso-8bit)) |
| 2457 | (coding-system-put cs 'translation-table-for-input cs))))) | 2455 | (coding-system-put cs 'translation-table-for-input cs))))) |
| 2458 | (dolist (c safe-charsets) | 2456 | (dolist (c safe-charsets) |
| 2459 | (aset table (make-char c) t)) | 2457 | (aset table (make-char c) t)))) |
| 2460 | (coding-system-put 'mule-utf-8 'safe-charsets | ||
| 2461 | (append (coding-system-get 'mule-utf-8 'safe-charsets) | ||
| 2462 | safe-charsets)) | ||
| 2463 | (register-char-codings 'mule-utf-8 table))) | ||
| 2464 | 2458 | ||
| 2465 | (defvar translation-table-for-input (make-translation-table)) | 2459 | (define-minor-mode unify-8859-on-encoding-mode |
| 2460 | "Set up translation-tables for unifying ISO 8859 characters on encoding. | ||
| 2461 | |||
| 2462 | The ISO 8859 characters sets overlap, e.g. 8859-1 (Latin-1) and | ||
| 2463 | 8859-15 (Latin-9) differ only in a few characters. Emacs normally | ||
| 2464 | distinguishes equivalent characters from those ISO-8859 character sets | ||
| 2465 | which are built in to Emacs. This behaviour is essentially inherited | ||
| 2466 | from the European-originated international standards. Treating them | ||
| 2467 | equivalently, by translating to and from a single representation is | ||
| 2468 | called `unification'. (The `utf-8' coding system treats the | ||
| 2469 | characters of European scripts in a unified manner.) | ||
| 2470 | |||
| 2471 | In this mode, on encoding -- i.e. output operations -- non-ASCII | ||
| 2472 | characters from the built-in ISO 8859 and `mule-unicode-0100-24ff' | ||
| 2473 | charsets are handled automatically by the coding system used if it can | ||
| 2474 | represent them. Thus, say, an e-acute from the Latin-1 charset (the | ||
| 2475 | unified representation) in a buffer saved as Latin-9 will be encoded | ||
| 2476 | directly to a byte value 233. By default, in contrast, you would be | ||
| 2477 | prompted for a general coding system to use for saving the file, which | ||
| 2478 | can cope with separate Latin-1 and Latin-9 representations of e-acute. | ||
| 2466 | 2479 | ||
| 2467 | ;; Arrange to set up the translation table for Quail. This probably | 2480 | Also sets hooks that arrange `translation-table-for-input' to be set |
| 2481 | up locally when Quail input methods are activated. This will often | ||
| 2482 | allow input generated by Quail input methods to conform with what the | ||
| 2483 | buffer's file coding system can encode. Thus you could use a Latin-2 | ||
| 2484 | input method to search for e-acute in a Latin-1 buffer. | ||
| 2485 | |||
| 2486 | See also command `unify-8859-on-decoding-mode'." | ||
| 2487 | :group 'mule | ||
| 2488 | :global t | ||
| 2489 | :init-value t | ||
| 2490 | (if unify-8859-on-encoding-mode | ||
| 2491 | (ucs-unify-8859 t nil) | ||
| 2492 | (ucs-fragment-8859 t nil))) | ||
| 2493 | |||
| 2494 | (custom-add-version 'unify-8859-on-encoding-mode "21.3") | ||
| 2495 | |||
| 2496 | (define-minor-mode unify-8859-on-decoding-mode | ||
| 2497 | "Set up translation-tables for unifying ISO 8859 characters on decoding. | ||
| 2498 | On decoding, i.e. input operations, non-ASCII characters from the | ||
| 2499 | built-in ISO 8859 charsets are unified by mapping them into the | ||
| 2500 | `iso-latin-1' and `mule-unicode-0100-24ff' charsets. | ||
| 2501 | |||
| 2502 | Also sets `translation-table-for-input' globally, so that Quail input | ||
| 2503 | methods produce unified characters. | ||
| 2504 | |||
| 2505 | See also command `unify-8859-on-encoding-mode' and the user option | ||
| 2506 | `utf-fragment-on-decoding'." | ||
| 2507 | :group 'mule | ||
| 2508 | :global t | ||
| 2509 | :init-value nil | ||
| 2510 | (if unify-8859-on-decoding-mode | ||
| 2511 | (ucs-unify-8859 nil t) | ||
| 2512 | (ucs-fragment-8859 nil t))) | ||
| 2513 | |||
| 2514 | (custom-add-version 'unify-8859-on-decoding-mode "21.3") | ||
| 2515 | |||
| 2516 | ;; Synchronize the status with the initial value of | ||
| 2517 | ;; unify-8859-on-encoding-mode and unify-8859-on-decoding-mode. | ||
| 2518 | (ucs-unify-8859 t nil) | ||
| 2519 | |||
| 2520 | ;; Arrange to set up the translation-table for Quail. This probably | ||
| 2468 | ;; isn't foolproof. | 2521 | ;; isn't foolproof. |
| 2469 | (defun ucs-quail-activate () | 2522 | (defun ucs-quail-activate () |
| 2470 | "Set up an appropriate `translation-table-for-input' for current buffer. | 2523 | "Set up an appropriate `translation-table-for-input' for current buffer. |
| @@ -2489,57 +2542,6 @@ Intended to be added to `minibuffer-setup-hook'." | |||
| 2489 | (cadr (buffer-list)))) | 2542 | (cadr (buffer-list)))) |
| 2490 | buffer-file-coding-system))) | 2543 | buffer-file-coding-system))) |
| 2491 | 2544 | ||
| 2492 | ;; Modified to allow display of arbitrary characters with an | ||
| 2493 | ;; iso-10646-encoded (`Unicode') font. | ||
| 2494 | (define-ccl-program ccl-encode-unicode-font | ||
| 2495 | `(0 | ||
| 2496 | ((if (r0 == ,(charset-id 'ascii)) | ||
| 2497 | ((r2 = r1) | ||
| 2498 | (r1 = 0)) | ||
| 2499 | ( | ||
| 2500 | ;; Look for a translation for non-ASCII chars. For a 2D | ||
| 2501 | ;; charset, produce a single code for the translation. | ||
| 2502 | ;; Official 2D sets are in the charset id range [#x90,#x99], | ||
| 2503 | ;; private ones in the range [#xf0,#xfe] (with #xff not used). | ||
| 2504 | ;; Fixme: Is there a better way to do this? | ||
| 2505 | (r3 = (r0 >= #x90)) | ||
| 2506 | (r3 &= (r0 <= #x99)) | ||
| 2507 | (r3 |= (r0 >= #xf0)) | ||
| 2508 | (if r3 ; 2D input | ||
| 2509 | (r1 = ((r1 << 7) | r2))) | ||
| 2510 | (translate-character ucs-mule-to-mule-unicode r0 r1) | ||
| 2511 | (r3 = (r0 >= #x90)) | ||
| 2512 | (r3 &= (r0 <= #x99)) | ||
| 2513 | (r3 |= (r0 >= #xf0)) | ||
| 2514 | (if r3 ; 2D translation | ||
| 2515 | ((r2 = (r1 & 127)) | ||
| 2516 | (r1 = (r1 >> 7)))) | ||
| 2517 | (if (r0 == ,(charset-id 'latin-iso8859-1)) | ||
| 2518 | ((r2 = (r1 + 128)) | ||
| 2519 | (r1 = 0)) | ||
| 2520 | (if (r0 == ,(charset-id 'mule-unicode-0100-24ff)) | ||
| 2521 | ((r1 *= 96) | ||
| 2522 | (r1 += r2) | ||
| 2523 | (r1 += ,(- #x100 (* 32 96) 32)) | ||
| 2524 | (r1 >8= 0) | ||
| 2525 | (r2 = r7)) | ||
| 2526 | (if (r0 == ,(charset-id 'mule-unicode-2500-33ff)) | ||
| 2527 | ((r1 *= 96) | ||
| 2528 | (r1 += r2) | ||
| 2529 | (r1 += ,(- #x2500 (* 32 96) 32)) | ||
| 2530 | (r1 >8= 0) | ||
| 2531 | (r2 = r7)) | ||
| 2532 | (if (r0 == ,(charset-id 'mule-unicode-e000-ffff)) | ||
| 2533 | ((r1 *= 96) | ||
| 2534 | (r1 += r2) | ||
| 2535 | (r1 += ,(- #xe000 (* 32 96) 32)) | ||
| 2536 | (r1 >8= 0) | ||
| 2537 | (r2 = r7)))))))))) | ||
| 2538 | "Encode characters for display with iso10646 font. | ||
| 2539 | Translate through table `ucs-mule-to-mule-unicode' initially.") | ||
| 2540 | |||
| 2541 | (defalias 'ucs-tables-unload-hook 'ucs-fragment-8859) | ||
| 2542 | |||
| 2543 | (provide 'ucs-tables) | 2545 | (provide 'ucs-tables) |
| 2544 | 2546 | ||
| 2545 | ;;; ucs-tables.el ends here | 2547 | ;;; ucs-tables.el ends here |