diff options
| author | Dave Love | 2002-07-17 19:21:41 +0000 |
|---|---|---|
| committer | Dave Love | 2002-07-17 19:21:41 +0000 |
| commit | 939e3d9bf6b6d906b84801cb4cdeb3bc2feb0292 (patch) | |
| tree | 1cbf45f99a60f149f47a7dea8b32dba9b0940d54 | |
| parent | 9fb1b1a8198d3cb4fff9489790b99f205711ab53 (diff) | |
| download | emacs-939e3d9bf6b6d906b84801cb4cdeb3bc2feb0292.tar.gz emacs-939e3d9bf6b6d906b84801cb4cdeb3bc2feb0292.zip | |
Optimize tables. Deal with some
non-8859 charsets.
(ucs-mule-to-mule-unicode): New.
(ucs-unify-8859): Use utf-8-fragment-on-decoding, set up Quail
translation.
(ucs-fragment-8859): Modified consistent with ucs-unify-8859.
(unify-8859-on-encoding-mode): Doc mod. Fix custom version.
(unify-8859-on-decoding-mode): Doc mod. Change code. Fix custom
version. Add custom dependencies.
(ucs-insert): Check for null from decode-char.
(translation-table-for-input, ucs-quail-activate)
(ucs-minibuffer-setup, ccl-encode-unicode-font)
(ucs-tables-unload-hook): New.
| -rw-r--r-- | lisp/international/ucs-tables.el | 310 |
1 files changed, 191 insertions, 119 deletions
diff --git a/lisp/international/ucs-tables.el b/lisp/international/ucs-tables.el index 076772e3e10..e59e7b3872a 100644 --- a/lisp/international/ucs-tables.el +++ b/lisp/international/ucs-tables.el | |||
| @@ -25,12 +25,12 @@ | |||
| 25 | ;;; Commentary: | 25 | ;;; Commentary: |
| 26 | 26 | ||
| 27 | ;; This file provides tables mapping between Unicode numbers and | 27 | ;; This file provides tables mapping between Unicode numbers and |
| 28 | ;; emacs-mule characters from the iso8859 charsets (and others). It | 28 | ;; emacs-mule characters from the iso-8859 charsets (and others). It |
| 29 | ;; also provides some auxiliary functions. | 29 | ;; also provides some auxiliary functions. |
| 30 | 30 | ||
| 31 | ;; These tables are used to construct other mappings between the Mule | 31 | ;; These tables are used to construct other mappings between the Mule |
| 32 | ;; iso8859 charsets and the emacs-unicode charsets and a table that | 32 | ;; iso8859 charsets and the emacs-unicode charsets and a table that |
| 33 | ;; unifies iso8859 characters using a single charset as far as | 33 | ;; unifies iso-8859 characters using a single charset as far as |
| 34 | ;; possible. These tables are used by latin1-disp.el to display some | 34 | ;; possible. These tables are used by latin1-disp.el to display some |
| 35 | ;; Unicode characters without a Unicode font and by utf-8.el to unify | 35 | ;; Unicode characters without a Unicode font and by utf-8.el to unify |
| 36 | ;; Latin-N as far as possible on encoding. | 36 | ;; Latin-N as far as possible on encoding. |
| @@ -44,14 +44,46 @@ | |||
| 44 | ;; not idempotent. | 44 | ;; not idempotent. |
| 45 | 45 | ||
| 46 | ;; Global minor modes are provided to unify on encoding and decoding. | 46 | ;; Global minor modes are provided to unify on encoding and decoding. |
| 47 | 47 | ;; These could be extended to non-iso-8859 charsets. However 8859 is | |
| 48 | ;; The translation table `ucs-mule-to-mule-unicode' is populated. | 48 | ;; all that users normally care about unifying although, for instance, |
| 49 | ;; This is used by the `mule-utf-8' coding system to encode extra | 49 | ;; Greek occurs in as many as nine Emacs charsets. |
| 50 | ;; characters. | 50 | |
| 51 | ;; The translation table `ucs-mule-to-mule-unicode' is populated, | ||
| 52 | ;; which could be used for more general unification on decoding. This | ||
| 53 | ;; is used by the `mule-utf-8' coding system to encode extra | ||
| 54 | ;; characters, and also by the coding systems set up by code-pages.el. | ||
| 55 | ;; The decoding tables here take account of | ||
| 56 | ;; `utf-8-fragment-on-decoding' which may specify decoding Greek and | ||
| 57 | ;; Cyrillic into 8859 charsets. | ||
| 58 | |||
| 59 | ;; Unification also puts a `translation-table-for-input' property on | ||
| 60 | ;; relevant coding coding systems and arranges for the | ||
| 61 | ;; `translation-table-for-input' variable to be set either globally or | ||
| 62 | ;; locally. This is used by Quail input methods to translate input | ||
| 63 | ;; characters appropriately for the buffer's coding system (if | ||
| 64 | ;; possible). Unification on decoding sets it globally to translate | ||
| 65 | ;; to Unicode. Unification on encoding uses hooks to set it up | ||
| 66 | ;; locally to buffers. Thus in the latter case, typing `"a' into a | ||
| 67 | ;; Latin-1 buffer using the `latin-2-prefix' method translates the | ||
| 68 | ;; generated latin-iso8859-2 `,Bd(B' into latin-iso8859-1 `,Ad(B'. | ||
| 69 | |||
| 70 | ;; NB, this code depends on the default value of | ||
| 71 | ;; `enable-character-translation'. (Making it nil would anyway lead | ||
| 72 | ;; to inconsistent behaviour between CCL-based coding systems which | ||
| 73 | ;; use explicit translation tables and the rest.) | ||
| 51 | 74 | ||
| 52 | ;; Command `ucs-insert' is convenient for inserting a given Unicode. | 75 | ;; Command `ucs-insert' is convenient for inserting a given Unicode. |
| 53 | ;; (See also the `ucs' input method.) | 76 | ;; (See also the `ucs' input method.) |
| 54 | 77 | ||
| 78 | ;; A replacement CCL program is provided which allows characters in | ||
| 79 | ;; the `ucs-mule-to-mule-unicode' table to be displayed with an | ||
| 80 | ;; iso-10646-encoded font. E.g. to use a `Unicode' font for Cyrillic: | ||
| 81 | ;; | ||
| 82 | ;; (set-fontset-font "fontset-startup" | ||
| 83 | ;; (cons (make-char 'cyrillic-iso8859-5 160) | ||
| 84 | ;; (make-char 'cyrillic-iso8859-5 255)) | ||
| 85 | ;; '(nil . "ISO10646-1")) | ||
| 86 | |||
| 55 | ;;; Code: | 87 | ;;; Code: |
| 56 | 88 | ||
| 57 | ;;; Define tables, to be populated later. | 89 | ;;; Define tables, to be populated later. |
| @@ -1067,11 +1099,7 @@ Translates from the iso8859 charsets and `mule-unicode-0100-24ff'.") | |||
| 1067 | (push (cons (make-char 'latin-iso8859-1 (- i 128)) i) | 1099 | (push (cons (make-char 'latin-iso8859-1 (- i 128)) i) |
| 1068 | l) | 1100 | l) |
| 1069 | (setq i (1+ i))) | 1101 | (setq i (1+ i))) |
| 1070 | (nreverse l))) | 1102 | (nreverse l)))) |
| 1071 | |||
| 1072 | ;; (case-table (standard-case-table)) | ||
| 1073 | ;; (syntax-table (standard-syntax-table)) | ||
| 1074 | ) | ||
| 1075 | 1103 | ||
| 1076 | ;; Convert the lists to the basic char tables. | 1104 | ;; Convert the lists to the basic char tables. |
| 1077 | (dolist (n (list 15 14 9 8 7 5 4 3 2 1)) | 1105 | (dolist (n (list 15 14 9 8 7 5 4 3 2 1)) |
| @@ -1084,41 +1112,11 @@ Translates from the iso8859 charsets and `mule-unicode-0100-24ff'.") | |||
| 1084 | ;; (aset ucs-ucs-to-mule-8859-table uc mule) | 1112 | ;; (aset ucs-ucs-to-mule-8859-table uc mule) |
| 1085 | ;; (aset ucs-mule-unicode-to-mule-8859 mu mule) | 1113 | ;; (aset ucs-mule-unicode-to-mule-8859 mu mule) |
| 1086 | (aset ucs-mule-8859-to-mule-unicode mule mu) | 1114 | (aset ucs-mule-8859-to-mule-unicode mule mu) |
| 1087 | (aset ucs-mule-to-mule-unicode mule mu))) | 1115 | (aset ucs-mule-to-mule-unicode mule mu))))) |
| 1088 | ;; I think this is actually done OK in characters.el. | 1116 | ;; The table optimizing here and elsewhere probably isn't very |
| 1089 | ;; Probably things like accents shouldn't have word syntax, but the | 1117 | ;; useful, but seems good practice. |
| 1090 | ;; Latin-N syntax tables currently aren't consistent for such | 1118 | (optimize-char-table ucs-mule-to-mule-unicode) |
| 1091 | ;; characters anyhow. | 1119 | (optimize-char-table ucs-mule-8859-to-mule-unicode) |
| 1092 | ;; ;; Make the mule-unicode characters inherit syntax and case info | ||
| 1093 | ;; ;; if they don't already have it. | ||
| 1094 | ;; (dolist (pair alist) | ||
| 1095 | ;; (let ((mule (car pair)) | ||
| 1096 | ;; (uc (cdr pair)) | ||
| 1097 | ;; (mu (decode-char 'ucs (cdr pair)))) | ||
| 1098 | ;; (let ((syntax (aref syntax-table mule))) | ||
| 1099 | ;; (if (eq mule (downcase mule)) | ||
| 1100 | ;; (if (eq mule (upcase mule)) ; non-letter or uncased letter | ||
| 1101 | ;; (progn | ||
| 1102 | ;; (if (= 4 (car syntax)) ; left delim | ||
| 1103 | ;; (progn | ||
| 1104 | ;; (aset syntax-table | ||
| 1105 | ;; mu | ||
| 1106 | ;; (cons 4 (aref ucs-mule-8859-to-mule-unicode | ||
| 1107 | ;; (cdr syntax)))) | ||
| 1108 | ;; (aset syntax-table | ||
| 1109 | ;; (aref ucs-mule-8859-to-mule-unicode | ||
| 1110 | ;; (cdr syntax)) | ||
| 1111 | ;; (cons 5 mu))) | ||
| 1112 | ;; (aset syntax-table mu syntax)) | ||
| 1113 | ;; (aset case-table mu mu))) | ||
| 1114 | ;; ;; Upper case letter | ||
| 1115 | ;; (let ((lower (aref ucs-mule-8859-to-mule-unicode | ||
| 1116 | ;; (aref case-table mule)))) | ||
| 1117 | ;; (aset case-table mu lower) | ||
| 1118 | ;; (aset case-table lower lower) | ||
| 1119 | ;; (modify-syntax-entry lower "w " syntax-table) | ||
| 1120 | ;; (modify-syntax-entry mu "w " syntax-table)))))) | ||
| 1121 | )) | ||
| 1122 | ;; Derive tables that can be used as per-coding-system | 1120 | ;; Derive tables that can be used as per-coding-system |
| 1123 | ;; `translation-table-for-encode's. | 1121 | ;; `translation-table-for-encode's. |
| 1124 | (dolist (n (list 15 14 9 8 7 5 4 3 2 1)) | 1122 | (dolist (n (list 15 14 9 8 7 5 4 3 2 1)) |
| @@ -1138,15 +1136,15 @@ Translates from the iso8859 charsets and `mule-unicode-0100-24ff'.") | |||
| 1138 | (if (and (setq elt (rassq v alist)) | 1136 | (if (and (setq elt (rassq v alist)) |
| 1139 | (not (assq k alist))) | 1137 | (not (assq k alist))) |
| 1140 | (aset encode-translator k (car elt)))) | 1138 | (aset encode-translator k (car elt)))) |
| 1141 | ucs-mule-8859-to-ucs-table)))) | 1139 | ucs-mule-8859-to-ucs-table) |
| 1140 | (optimize-char-table encode-translator)))) | ||
| 1142 | 1141 | ||
| 1143 | ;; Register for use in CCL. | 1142 | ;; Register for use in CCL. |
| 1144 | (define-translation-table 'ucs-mule-8859-to-mule-unicode | 1143 | (define-translation-table 'ucs-mule-8859-to-mule-unicode |
| 1145 | ucs-mule-8859-to-mule-unicode) | 1144 | ucs-mule-8859-to-mule-unicode) |
| 1145 | (define-translation-table 'ucs-mule-to-mule-unicode | ||
| 1146 | ucs-mule-to-mule-unicode) | ||
| 1146 | 1147 | ||
| 1147 | ;; Fixme: Make this reversible, which means frobbing | ||
| 1148 | ;; `char-coding-system-table' directly to remove what we added -- see | ||
| 1149 | ;; codepages.el. Also make it a user option. | ||
| 1150 | (defun ucs-unify-8859 (&optional encode-only) | 1148 | (defun ucs-unify-8859 (&optional encode-only) |
| 1151 | "Set up translation tables for unifying characters from ISO 8859. | 1149 | "Set up translation tables for unifying characters from ISO 8859. |
| 1152 | 1150 | ||
| @@ -1159,7 +1157,24 @@ everything on input operations." | |||
| 1159 | (interactive "P") | 1157 | (interactive "P") |
| 1160 | (unless encode-only | 1158 | (unless encode-only |
| 1161 | ;; Unify 8859 on decoding. (Non-CCL coding systems only.) | 1159 | ;; Unify 8859 on decoding. (Non-CCL coding systems only.) |
| 1162 | (unify-8859-on-decoding-mode 1)) | 1160 | (if utf-8-fragment-on-decoding |
| 1161 | (map-char-table | ||
| 1162 | (lambda (k v) | ||
| 1163 | (if v (aset ucs-mule-to-mule-unicode v nil))) | ||
| 1164 | utf-8-translation-table-for-decode) | ||
| 1165 | ;; Reset in case it was changed. | ||
| 1166 | (map-char-table | ||
| 1167 | (lambda (k v) | ||
| 1168 | (if v (aset ucs-mule-to-mule-unicode v k))) | ||
| 1169 | utf-8-translation-table-for-decode)) | ||
| 1170 | (set-char-table-parent standard-translation-table-for-decode | ||
| 1171 | ucs-mule-8859-to-mule-unicode) | ||
| 1172 | ;; Translate Quail input globally. | ||
| 1173 | (setq-default translation-table-for-input ucs-mule-to-mule-unicode) | ||
| 1174 | ;; In case these are set up, but we should use the global | ||
| 1175 | ;; translation table. | ||
| 1176 | (remove-hook 'quail-activate-hook 'ucs-quail-activate) | ||
| 1177 | (remove-hook 'minibuffer-setup-hook 'ucs-minibuffer-setup)) | ||
| 1163 | ;; Adjust the 8859 coding systems to fragment the unified characters | 1178 | ;; Adjust the 8859 coding systems to fragment the unified characters |
| 1164 | ;; on encoding. | 1179 | ;; on encoding. |
| 1165 | (dolist (n '(1 2 3 4 5 7 8 9 14 15)) | 1180 | (dolist (n '(1 2 3 4 5 7 8 9 14 15)) |
| @@ -1174,58 +1189,11 @@ everything on input operations." | |||
| 1174 | (set-char-table-parent safe table) | 1189 | (set-char-table-parent safe table) |
| 1175 | ;; Update the table of what encodes to what. | 1190 | ;; Update the table of what encodes to what. |
| 1176 | (register-char-codings coding-system table) | 1191 | (register-char-codings coding-system table) |
| 1177 | (coding-system-put coding-system 'translation-table-for-encode table))) | 1192 | (coding-system-put coding-system 'translation-table-for-encode table) |
| 1178 | 1193 | (coding-system-put coding-system 'translation-table-for-input table))) | |
| 1179 | ;;; The following works for the bundled coding systems, but it's | 1194 | ;; Arrange local translation tables for Quail input. |
| 1180 | ;;; better to use the Unicode-based ones and make it irrelevant. | 1195 | (add-hook 'quail-activate-hook 'ucs-quail-activate) |
| 1181 | 1196 | (add-hook 'minibuffer-setup-hook 'ucs-minibuffer-setup)) | |
| 1182 | ;;; ;; Update the Cyrillic special cases. | ||
| 1183 | ;;; ;; `translation-table-for-encode' doesn't work for CCL coding | ||
| 1184 | ;;; ;; systems, and `standard-translation-table-for-decode' isn't | ||
| 1185 | ;;; ;; applied. | ||
| 1186 | ;;; (let ((table (get 'cyrillic-koi8-r-encode-table 'translation-table))) | ||
| 1187 | ;;; (map-char-table | ||
| 1188 | ;;; (lambda (k v) | ||
| 1189 | ;;; (aset table | ||
| 1190 | ;;; (or (aref ucs-8859-5-encode-table k) | ||
| 1191 | ;;; k) | ||
| 1192 | ;;; v)) | ||
| 1193 | ;;; table) | ||
| 1194 | ;;; (register-char-codings 'cyrillic-koi8 table)) | ||
| 1195 | ;;; (let ((table (get 'cyrillic-koi8-r-nonascii-translation-table | ||
| 1196 | ;;; 'translation-table))) | ||
| 1197 | ;;; (map-char-table | ||
| 1198 | ;;; (lambda (k v) | ||
| 1199 | ;;; (if v (aset table k (or (aref ucs-mule-8859-to-mule-unicode v) | ||
| 1200 | ;;; v)))) | ||
| 1201 | ;;; table)) | ||
| 1202 | ;;; ;; Redefine this, since the orginal only translated 8859-5. | ||
| 1203 | ;;; (define-ccl-program ccl-encode-koi8 | ||
| 1204 | ;;; `(1 | ||
| 1205 | ;;; ((loop | ||
| 1206 | ;;; (read-multibyte-character r0 r1) | ||
| 1207 | ;;; (translate-character cyrillic-koi8-r-encode-table r0 r1) | ||
| 1208 | ;;; (write-repeat r1)))) | ||
| 1209 | ;;; "CCL program to encode KOI8.") | ||
| 1210 | ;;; (let ((table (get 'cyrillic-alternativnyj-encode-table 'translation-table))) | ||
| 1211 | ;;; (map-char-table | ||
| 1212 | ;;; (lambda (k v) | ||
| 1213 | ;;; (aset table | ||
| 1214 | ;;; (or (aref ucs-8859-5-encode-table k) | ||
| 1215 | ;;; k) | ||
| 1216 | ;;; v)) | ||
| 1217 | ;;; table) | ||
| 1218 | ;;; (register-char-codings 'cyrillic-alternativnyj table)) | ||
| 1219 | ;;; (let ((table (get 'cyrillic-alternativnyj-nonascii-translation-table | ||
| 1220 | ;;; 'translation-table))) | ||
| 1221 | ;;; (map-char-table | ||
| 1222 | ;;; (lambda (k v) | ||
| 1223 | ;;; (if v (aset table | ||
| 1224 | ;;; k | ||
| 1225 | ;;; (or (aref ucs-mule-8859-to-mule-unicode v) | ||
| 1226 | ;;; v)))) | ||
| 1227 | ;;; table)) | ||
| 1228 | ) | ||
| 1229 | 1197 | ||
| 1230 | (defun ucs-fragment-8859 (&optional encode-only) | 1198 | (defun ucs-fragment-8859 (&optional encode-only) |
| 1231 | "Undo the unification done by `ucs-unify-8859'. | 1199 | "Undo the unification done by `ucs-unify-8859'. |
| @@ -1235,7 +1203,8 @@ unification on input operations." | |||
| 1235 | ;; Maybe fix decoding. | 1203 | ;; Maybe fix decoding. |
| 1236 | (unless encode-only | 1204 | (unless encode-only |
| 1237 | ;; Unify 8859 on decoding. (Non-CCL coding systems only.) | 1205 | ;; Unify 8859 on decoding. (Non-CCL coding systems only.) |
| 1238 | (unify-8859-on-decoding-mode -1)) | 1206 | (set-char-table-parent standard-translation-table-for-decode nil) |
| 1207 | (setq-default translation-table-for-input nil)) | ||
| 1239 | ;; Fix encoding. For each charset, remove the entries in | 1208 | ;; Fix encoding. For each charset, remove the entries in |
| 1240 | ;; `char-coding-system-table' added to its safe-chars table (as its | 1209 | ;; `char-coding-system-table' added to its safe-chars table (as its |
| 1241 | ;; parent). | 1210 | ;; parent). |
| @@ -1253,7 +1222,11 @@ unification on input operations." | |||
| 1253 | (delq coding-system codings))))) | 1222 | (delq coding-system codings))))) |
| 1254 | (char-table-parent safe)) | 1223 | (char-table-parent safe)) |
| 1255 | (set-char-table-parent safe nil) | 1224 | (set-char-table-parent safe nil) |
| 1256 | (coding-system-put coding-system 'translation-table-for-encode nil)))) | 1225 | (coding-system-put coding-system 'translation-table-for-encode nil) |
| 1226 | (coding-system-put coding-system 'translation-table-for-input nil))) | ||
| 1227 | (optimize-char-table char-coding-system-table) | ||
| 1228 | (remove-hook 'quail-activate-hook 'ucs-quail-activate) | ||
| 1229 | (remove-hook 'minibuffer-setup-hook 'ucs-minibuffer-setup)) | ||
| 1257 | 1230 | ||
| 1258 | (define-minor-mode unify-8859-on-encoding-mode | 1231 | (define-minor-mode unify-8859-on-encoding-mode |
| 1259 | "Set up translation tables for unifying ISO 8859 characters on encoding. | 1232 | "Set up translation tables for unifying ISO 8859 characters on encoding. |
| @@ -1276,42 +1249,54 @@ directly to a byte value 233. By default, in contrast, you would be | |||
| 1276 | prompted for a general coding system to use for saving the file, which | 1249 | prompted for a general coding system to use for saving the file, which |
| 1277 | can cope with separate Latin-1 and Latin-9 representations of e-acute. | 1250 | can cope with separate Latin-1 and Latin-9 representations of e-acute. |
| 1278 | 1251 | ||
| 1252 | Also sets hooks that arrange `translation-table-for-input' to be set | ||
| 1253 | up locally when Quail input methods are activated. This will often | ||
| 1254 | allow input generated by Quail input methods to conform with what the | ||
| 1255 | buffer's file coding system can encode. Thus you could use a Latin-2 | ||
| 1256 | input method to search for e-acute in a Latin-1 buffer. | ||
| 1257 | |||
| 1279 | See also command `unify-8859-on-decoding-mode'." | 1258 | See also command `unify-8859-on-decoding-mode'." |
| 1280 | :group 'mule | 1259 | :group 'mule |
| 1281 | :global t | 1260 | :global t |
| 1282 | :version "21.3" ; who knows...? | ||
| 1283 | :init-value nil | 1261 | :init-value nil |
| 1284 | (if unify-8859-on-encoding-mode | 1262 | (if unify-8859-on-encoding-mode |
| 1285 | (ucs-unify-8859 t) | 1263 | (ucs-unify-8859 t) |
| 1286 | (ucs-fragment-8859 t))) | 1264 | (ucs-fragment-8859 t))) |
| 1287 | 1265 | ||
| 1266 | (custom-add-version 'unify-8859-on-encoding-mode "21.4") | ||
| 1267 | |||
| 1288 | (define-minor-mode unify-8859-on-decoding-mode | 1268 | (define-minor-mode unify-8859-on-decoding-mode |
| 1289 | "Set up translation table for unifying ISO 8859 characters on decoding. | 1269 | "Set up translation tables for unifying ISO 8859 characters on decoding. |
| 1290 | On decoding -- i.e. input operations -- non-ASCII characters from the | 1270 | On decoding, i.e. input operations, non-ASCII characters from the |
| 1291 | built-in ISO 8859 charsets are unified by mapping them into the | 1271 | built-in ISO 8859 charsets are unified by mapping them into the |
| 1292 | `iso-latin-1' and `mule-unicode-0100-24ff' charsets. | 1272 | `iso-latin-1' and `mule-unicode-0100-24ff' charsets. |
| 1293 | 1273 | ||
| 1294 | This sets the parent of `standard-translation-table-for-decode'. | ||
| 1295 | Also sets `translation-table-for-input' globally, so that Quail input | 1274 | Also sets `translation-table-for-input' globally, so that Quail input |
| 1296 | methods produce unified characters. | 1275 | methods produce unified characters. |
| 1297 | 1276 | ||
| 1298 | See also command `unify-8859-on-encoding-mode'." | 1277 | See also command `unify-8859-on-encoding-mode' and the user option |
| 1278 | `utf-8-fragment-on-decoding'." | ||
| 1299 | :group 'mule | 1279 | :group 'mule |
| 1300 | :global t | 1280 | :global t |
| 1301 | :version "21.3" ; who knows...? | ||
| 1302 | :init-value nil | 1281 | :init-value nil |
| 1303 | (let ((table (if unify-8859-on-decoding-mode ucs-mule-8859-to-mule-unicode))) | 1282 | (if unify-8859-on-decoding-mode |
| 1304 | (set-char-table-parent standard-translation-table-for-decode table) | 1283 | (ucs-unify-8859) |
| 1305 | (setq-default translation-table-for-input table))) | 1284 | (ucs-fragment-8859))) |
| 1285 | |||
| 1286 | (custom-add-dependencies 'unify-8859-on-decoding-mode | ||
| 1287 | '(utf-8-fragment-on-decoding)) | ||
| 1288 | (custom-add-version 'unify-8859-on-decoding-mode "21.4") | ||
| 1306 | 1289 | ||
| 1307 | (defun ucs-insert (arg) | 1290 | (defun ucs-insert (arg) |
| 1308 | "Insert the Emacs character representation of the given Unicode. | 1291 | "Insert the Emacs character representation of the given Unicode. |
| 1309 | Interactively, prompts for a hex string giving the code." | 1292 | Interactively, prompts for a hex string giving the code." |
| 1310 | (interactive "sUnicode (hex): ") | 1293 | (interactive "sUnicode (hex): ") |
| 1311 | (insert (or (decode-char 'ucs (if (integerp arg) | 1294 | (let ((c (decode-char 'ucs (if (integerp arg) |
| 1312 | arg | 1295 | arg |
| 1313 | (string-to-number arg 16))) | 1296 | (string-to-number arg 16))))) |
| 1314 | (error "Unknown Unicode character")))) | 1297 | (if c |
| 1298 | (insert c) | ||
| 1299 | (error "Character can't be decoded to UCS")))) | ||
| 1315 | 1300 | ||
| 1316 | ;;; Dealing with non-8859 character sets. | 1301 | ;;; Dealing with non-8859 character sets. |
| 1317 | 1302 | ||
| @@ -2458,11 +2443,23 @@ Interactively, prompts for a hex string giving the code." | |||
| 2458 | (aset ucs-mule-to-mule-unicode (car pair) (cdr pair)) | 2443 | (aset ucs-mule-to-mule-unicode (car pair) (cdr pair)) |
| 2459 | (if encode-translator | 2444 | (if encode-translator |
| 2460 | (aset encode-translator (cdr pair) (car pair)))) | 2445 | (aset encode-translator (cdr pair) (car pair)))) |
| 2446 | (if encode-translator | ||
| 2447 | (optimize-char-table encode-translator)) | ||
| 2461 | (if (charsetp cs) | 2448 | (if (charsetp cs) |
| 2462 | (push cs safe-charsets) | 2449 | (push cs safe-charsets) |
| 2463 | (setq safe-charsets | 2450 | (setq safe-charsets |
| 2464 | (append (delq 'ascii (coding-system-get cs 'safe-charsets)) | 2451 | (append (delq 'ascii (coding-system-get cs 'safe-charsets)) |
| 2465 | safe-charsets))))) | 2452 | safe-charsets))) |
| 2453 | (cond ((eq cs 'vietnamese-viscii) | ||
| 2454 | (coding-system-put 'vietnamese-viscii | ||
| 2455 | 'translation-table-for-input | ||
| 2456 | encode-translator) | ||
| 2457 | (coding-system-put 'vietnamese-viqr | ||
| 2458 | 'translation-table-for-input | ||
| 2459 | encode-translator)) | ||
| 2460 | ((memq cs '(lao thai-tis620 tibetan-iso-8bit)) | ||
| 2461 | (coding-system-put cs 'translation-table-for-input cs))))) | ||
| 2462 | (optimize-char-table ucs-mule-to-mule-unicode) | ||
| 2466 | (dolist (c safe-charsets) | 2463 | (dolist (c safe-charsets) |
| 2467 | (aset table (make-char c) t)) | 2464 | (aset table (make-char c) t)) |
| 2468 | (coding-system-put 'mule-utf-8 'safe-charsets | 2465 | (coding-system-put 'mule-utf-8 'safe-charsets |
| @@ -2470,6 +2467,81 @@ Interactively, prompts for a hex string giving the code." | |||
| 2470 | safe-charsets)) | 2467 | safe-charsets)) |
| 2471 | (register-char-codings 'mule-utf-8 table))) | 2468 | (register-char-codings 'mule-utf-8 table))) |
| 2472 | 2469 | ||
| 2470 | (defvar translation-table-for-input (make-translation-table)) | ||
| 2471 | |||
| 2472 | ;; Arrange to set up the translation table for Quail. This probably | ||
| 2473 | ;; isn't foolproof. | ||
| 2474 | (defun ucs-quail-activate () | ||
| 2475 | "Set up an appropriate `translation-table-for-input' for current buffer. | ||
| 2476 | Intended to be added to `quail-activate-hook'." | ||
| 2477 | (let ((cs (coding-system-base buffer-file-coding-system))) | ||
| 2478 | (if (eq cs 'undecided) | ||
| 2479 | (setq cs (coding-system-base default-buffer-file-coding-system))) | ||
| 2480 | (if (coding-system-get cs 'translation-table-for-input) | ||
| 2481 | (set (make-variable-buffer-local 'translation-table-for-input) | ||
| 2482 | (coding-system-get cs 'translation-table-for-input))))) | ||
| 2483 | |||
| 2484 | ;; The minibuffer needs to acquire a `buffer-file-coding-system' for | ||
| 2485 | ;; the above to work in it. | ||
| 2486 | (defun ucs-minibuffer-setup () | ||
| 2487 | "Set up an appropriate `buffer-file-coding-system' for current buffer. | ||
| 2488 | Does so by inheriting it from the cadr of the current buffer list. | ||
| 2489 | Intended to be added to `minibuffer-setup-hook'." | ||
| 2490 | (set (make-local-variable 'buffer-file-coding-system) | ||
| 2491 | (with-current-buffer (cadr (buffer-list)) | ||
| 2492 | buffer-file-coding-system))) | ||
| 2493 | |||
| 2494 | ;; Modified to allow display of arbitrary characters with an | ||
| 2495 | ;; iso-10646-encoded (`Unicode') font. | ||
| 2496 | (define-ccl-program ccl-encode-unicode-font | ||
| 2497 | `(0 | ||
| 2498 | ((if (r0 == ,(charset-id 'ascii)) | ||
| 2499 | ((r2 = r1) | ||
| 2500 | (r1 = 0)) | ||
| 2501 | ( | ||
| 2502 | ;; Look for a translation for non-ASCII chars. For a 2D | ||
| 2503 | ;; charset, produce a single code for the translation. | ||
| 2504 | ;; Official 2D sets are in the charset id range [#x90,#x99], | ||
| 2505 | ;; private ones in the range [#xf0,#xfe] (with #xff not used). | ||
| 2506 | ;; Fixme: Is there a better way to do this? | ||
| 2507 | (r3 = (r0 >= #x90)) | ||
| 2508 | (r3 &= (r0 <= #x99)) | ||
| 2509 | (r3 |= (r0 >= #xf0)) | ||
| 2510 | (if r3 ; 2D input | ||
| 2511 | (r1 = ((r1 << 7) | r2))) | ||
| 2512 | (translate-character ucs-mule-to-mule-unicode r0 r1) | ||
| 2513 | (r3 = (r0 >= #x90)) | ||
| 2514 | (r3 &= (r0 <= #x99)) | ||
| 2515 | (r3 |= (r0 >= #xf0)) | ||
| 2516 | (if r3 ; 2D translation | ||
| 2517 | ((r2 = (r1 & 127)) | ||
| 2518 | (r1 = (r1 >> 7)))) | ||
| 2519 | (if (r0 == ,(charset-id 'latin-iso8859-1)) | ||
| 2520 | ((r2 = (r1 + 128)) | ||
| 2521 | (r1 = 0)) | ||
| 2522 | (if (r0 == ,(charset-id 'mule-unicode-0100-24ff)) | ||
| 2523 | ((r1 *= 96) | ||
| 2524 | (r1 += r2) | ||
| 2525 | (r1 += ,(- #x100 (* 32 96) 32)) | ||
| 2526 | (r1 >8= 0) | ||
| 2527 | (r2 = r7)) | ||
| 2528 | (if (r0 == ,(charset-id 'mule-unicode-2500-33ff)) | ||
| 2529 | ((r1 *= 96) | ||
| 2530 | (r1 += r2) | ||
| 2531 | (r1 += ,(- #x2500 (* 32 96) 32)) | ||
| 2532 | (r1 >8= 0) | ||
| 2533 | (r2 = r7)) | ||
| 2534 | (if (r0 == ,(charset-id 'mule-unicode-e000-ffff)) | ||
| 2535 | ((r1 *= 96) | ||
| 2536 | (r1 += r2) | ||
| 2537 | (r1 += ,(- #xe000 (* 32 96) 32)) | ||
| 2538 | (r1 >8= 0) | ||
| 2539 | (r2 = r7)))))))))) | ||
| 2540 | "Encode characters for display with iso10646 font. | ||
| 2541 | Translate through table `ucs-mule-to-mule-unicode' initially.") | ||
| 2542 | |||
| 2543 | (defalias 'ucs-tables-unload-hook 'ucs-fragment-8859) | ||
| 2544 | |||
| 2473 | (provide 'ucs-tables) | 2545 | (provide 'ucs-tables) |
| 2474 | 2546 | ||
| 2475 | ;;; ucs-tables.el ends here | 2547 | ;;; ucs-tables.el ends here |