diff options
| author | Eli Zaretskii | 1998-12-17 17:01:11 +0000 |
|---|---|---|
| committer | Eli Zaretskii | 1998-12-17 17:01:11 +0000 |
| commit | 75e98450b2d34ed4cf91ae06ee80411f42306d19 (patch) | |
| tree | 63e0083ce5422c79763c66644de5dfdc8e2bb7c0 | |
| parent | ca493964089ecf3912a3f16e9118bb4e2bddacd7 (diff) | |
| download | emacs-75e98450b2d34ed4cf91ae06ee80411f42306d19.tar.gz emacs-75e98450b2d34ed4cf91ae06ee80411f42306d19.zip | |
Initial revision
| -rw-r--r-- | lisp/international/codepage.el | 530 |
1 files changed, 530 insertions, 0 deletions
diff --git a/lisp/international/codepage.el b/lisp/international/codepage.el new file mode 100644 index 00000000000..bd7dc8bf8a4 --- /dev/null +++ b/lisp/international/codepage.el | |||
| @@ -0,0 +1,530 @@ | |||
| 1 | ;;; codepage.el --- MS-DOS specific coding systems. | ||
| 2 | |||
| 3 | ;; Copyright (C) 1998 Free Software Foundation, Inc. | ||
| 4 | |||
| 5 | ;; Author: Eli Zaretskii | ||
| 6 | ;; Maintainer: FSF | ||
| 7 | ;; Keywords: i18n ms-dos codepage | ||
| 8 | |||
| 9 | ;; This file is part of GNU Emacs. | ||
| 10 | |||
| 11 | ;; GNU Emacs is free software; you can redistribute it and/or modify | ||
| 12 | ;; it under the terms of the GNU General Public License as published by | ||
| 13 | ;; the Free Software Foundation; either version 2, or (at your option) | ||
| 14 | ;; any later version. | ||
| 15 | |||
| 16 | ;; GNU Emacs is distributed in the hope that it will be useful, | ||
| 17 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 18 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 19 | ;; GNU General Public License for more details. | ||
| 20 | |||
| 21 | ;; You should have received a copy of the GNU General Public License | ||
| 22 | ;; along with GNU Emacs; see the file COPYING. If not, write to the | ||
| 23 | ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 24 | ;; Boston, MA 02111-1307, USA. | ||
| 25 | |||
| 26 | ;;; Commentary: | ||
| 27 | |||
| 28 | ;; Special coding systems for DOS codepage support. | ||
| 29 | ;; | ||
| 30 | ;; These coding systems perform conversion from the DOS codepage encoding | ||
| 31 | ;; to one of the ISO-8859 character sets. Each codepage has its corresponding | ||
| 32 | ;; ISO-8859 charset, chosen so as to be able to convert all (or most) of the | ||
| 33 | ;; characters. The idea is that Emacs internally works with the usual MULE | ||
| 34 | ;; charsets, and the conversion to and from the DOS codepage is performed | ||
| 35 | ;; on I/O only. | ||
| 36 | ;; See term/internal.el for the complementary setup of the DOS terminal | ||
| 37 | ;; display and input methods. | ||
| 38 | ;; | ||
| 39 | ;; Thanks to Ken'ichi Handa <handa@etl.go.jp> for writing the CCL | ||
| 40 | ;; encoders/decoders, and for help in debugging this code. | ||
| 41 | |||
| 42 | ;;; Code: | ||
| 43 | |||
| 44 | (defun cp-coding-system-for-codepage-1 (coding mnemonic iso-name | ||
| 45 | decoder encoder) | ||
| 46 | "Make coding system CODING for a DOS codepage using translation tables. | ||
| 47 | MNEMONIC is a character to be displayed on mode line for the coding system. | ||
| 48 | ISO-NAME is the name of the ISO-8859 charset which corresponds to this | ||
| 49 | codepage. | ||
| 50 | DECODER is a translation table for converting characters in the DOS codepage | ||
| 51 | encoding to Emacs multibyte characters. | ||
| 52 | ENCODER is a translation table for encoding Emacs multibyte characters into | ||
| 53 | external DOS codepage codes. | ||
| 54 | |||
| 55 | Note that the coding systems created by this function don't support | ||
| 56 | automatic detection of the EOL format. Use explicit -dos or -unix variants | ||
| 57 | as appropriate (Mac EOL style is not supported, as it doesn't make sense for | ||
| 58 | these coding systems). | ||
| 59 | |||
| 60 | If the coding system's name ends with \"-dos\", this function automatically | ||
| 61 | creates a coding system which converts from and to DOS EOL format; otherwise | ||
| 62 | the created coding system assumes Unix-style EOL (i.e., it doesn't perform | ||
| 63 | any EOL conversions)." | ||
| 64 | (save-match-data | ||
| 65 | (let* ((coding-name (symbol-name coding)) | ||
| 66 | (eol-type (string-match "-\\(dos\\|unix\\)\\'" coding-name)) | ||
| 67 | (dos-p | ||
| 68 | (and eol-type | ||
| 69 | (string= "-dos" (substring coding-name eol-type)))) | ||
| 70 | (coding-sans-eol | ||
| 71 | (if eol-type (substring coding-name 0 eol-type) coding-name)) | ||
| 72 | (ccl-decoder | ||
| 73 | (if dos-p | ||
| 74 | (ccl-compile | ||
| 75 | `(4 (loop (read r1) | ||
| 76 | (if (r1 != ?\r) | ||
| 77 | (if (r1 >= 128) | ||
| 78 | ((r0 = ,(charset-id 'ascii)) | ||
| 79 | (translate-character ,decoder r0 r1) | ||
| 80 | (if (r0 == ,(charset-id 'ascii)) | ||
| 81 | (write r1) | ||
| 82 | (write-multibyte-character r0 r1))) | ||
| 83 | (write r1))) | ||
| 84 | (repeat)))) | ||
| 85 | (ccl-compile | ||
| 86 | `(4 (loop (read r1) | ||
| 87 | (if (r1 >= 128) | ||
| 88 | ((r0 = ,(charset-id 'ascii)) | ||
| 89 | (translate-character ,decoder r0 r1) | ||
| 90 | (if (r0 == ,(charset-id 'ascii)) | ||
| 91 | (write r1) | ||
| 92 | (write-multibyte-character r0 r1))) | ||
| 93 | (write r1)) | ||
| 94 | (repeat)))))) | ||
| 95 | (ccl-encoder | ||
| 96 | (if dos-p | ||
| 97 | (ccl-compile | ||
| 98 | `(1 (loop (read-multibyte-character r0 r1) | ||
| 99 | (if (r1 == ?\n) | ||
| 100 | (write ?\r) | ||
| 101 | (if (r0 != ,(charset-id 'ascii)) | ||
| 102 | ((translate-character ,encoder r0 r1) | ||
| 103 | (if (r0 == ,(charset-id 'japanese-jisx0208)) | ||
| 104 | ((r1 = ??) | ||
| 105 | (write r1)))))) | ||
| 106 | (write-repeat r1)))) | ||
| 107 | (ccl-compile | ||
| 108 | `(1 (loop (read-multibyte-character r0 r1) | ||
| 109 | (if (r0 != ,(charset-id 'ascii)) | ||
| 110 | ((translate-character ,encoder r0 r1) | ||
| 111 | (if (r0 == ,(charset-id 'japanese-jisx0208)) | ||
| 112 | ((r1 = ??) | ||
| 113 | (write r1))))) | ||
| 114 | (write-repeat r1))))))) | ||
| 115 | (if (memq coding coding-system-list) | ||
| 116 | (setq coding-system-list (delq coding coding-system-list))) | ||
| 117 | (make-coding-system | ||
| 118 | coding 4 mnemonic | ||
| 119 | (concat "8-bit encoding of " (symbol-name iso-name) | ||
| 120 | " characters using IBM codepage " (substring coding-sans-eol 2)) | ||
| 121 | (cons ccl-decoder ccl-encoder) | ||
| 122 | `((safe-charsets ascii ,iso-name))) | ||
| 123 | (put coding 'eol-type (if dos-p 1 0))))) | ||
| 124 | |||
| 125 | (defun cp-decoding-vector-for-codepage (table charset offset) | ||
| 126 | "Create a vector for decoding IBM PC characters using conversion table | ||
| 127 | TABLE into an ISO-8859 character set CHARSET whose first non-ASCII | ||
| 128 | character' is generated by (make-char CHARSET OFFSET)." | ||
| 129 | (let* ((len (length table)) | ||
| 130 | (undefined-char | ||
| 131 | (if (eq system-type 'ms-dos) | ||
| 132 | (if dos-unsupported-char-glyph | ||
| 133 | (logand dos-unsupported-char-glyph 255) | ||
| 134 | 127) | ||
| 135 | 32)) | ||
| 136 | (vec1 (make-vector 256 undefined-char)) | ||
| 137 | (i 0)) | ||
| 138 | (while (< i offset) | ||
| 139 | (aset vec1 i i) | ||
| 140 | (setq i (1+ i))) | ||
| 141 | (setq i 0) | ||
| 142 | (while (< i len) | ||
| 143 | (if (aref table i) | ||
| 144 | (aset vec1 (aref table i) (make-char charset (+ i offset)))) | ||
| 145 | (setq i (1+ i))) | ||
| 146 | vec1)) | ||
| 147 | |||
| 148 | ;;; You don't think I created all these tables below by hand, do you? | ||
| 149 | ;;; The following Awk script will create the table for cp850-to-Latin-1 | ||
| 150 | ;;; conversion from the RFC 1345 file (the other tables are left as an | ||
| 151 | ;;; excercise): | ||
| 152 | ;;; BEGIN { n_pages = 11; | ||
| 153 | ;;; pn["IBM437"] = 0; pn["IBM850"] = 1; pn["IBM851"] = 2; | ||
| 154 | ;;; pn["IBM852"] = 3; pn["IBM855"] = 4; pn["IBM860"] = 5; | ||
| 155 | ;;; pn["IBM861"] = 6; pn["IBM862"] = 7; pn["IBM863"] = 8; | ||
| 156 | ;;; pn["IBM864"] = 9; pn["IBM865"] = 10; | ||
| 157 | ;;; } | ||
| 158 | ;;; $1 == "&charset" { charset = $2; } | ||
| 159 | ;;; $1 == "&code" { code = $2; } | ||
| 160 | ;;; /^ [^&]/ { | ||
| 161 | ;;; if ((charset ~ /^IBM(437|8(5[0125]|6[0-5]))$/) || (charset ~ /^ISO_8859-1/)) | ||
| 162 | ;;; { | ||
| 163 | ;;; for (i = 1; i <= NF; i++) | ||
| 164 | ;;; chars[charset,code++] = $i; | ||
| 165 | ;;; } | ||
| 166 | ;;; } | ||
| 167 | ;;; | ||
| 168 | ;;; END { | ||
| 169 | ;;; for (i = 160; i < 256; i++) | ||
| 170 | ;;; { | ||
| 171 | ;;; c = chars["ISO_8859-1:1987",i]; | ||
| 172 | ;;; if (c == "??") # skip unused positions | ||
| 173 | ;;; { | ||
| 174 | ;;; printf " nil"; | ||
| 175 | ;;; if ((i - 159)%16 == 0) | ||
| 176 | ;;; printf "\n"; | ||
| 177 | ;;; continue; | ||
| 178 | ;;; } | ||
| 179 | ;;; found = 0; | ||
| 180 | ;;; for (j in pn) | ||
| 181 | ;;; map[j] = "nil"; | ||
| 182 | ;;; for (combined in chars) | ||
| 183 | ;;; { | ||
| 184 | ;;; candidate = chars[combined]; | ||
| 185 | ;;; split (combined, separate, SUBSEP); | ||
| 186 | ;;; if (separate[1] == "IBM850" && candidate == c) | ||
| 187 | ;;; { | ||
| 188 | ;;; found = 1; | ||
| 189 | ;;; map[separate[1]] = separate[2]; | ||
| 190 | ;;; } | ||
| 191 | ;;; } | ||
| 192 | ;;; printf " %s", map["IBM850"]; | ||
| 193 | ;;; if ((i - 159)%16 == 0) | ||
| 194 | ;;; printf "\n"; | ||
| 195 | ;;; } | ||
| 196 | ;;; } | ||
| 197 | |||
| 198 | ;;; WARNING WARNING WARNING!!! | ||
| 199 | ;;; | ||
| 200 | ;;; If you want to get fancy with these tables, remember that the inverse | ||
| 201 | ;;; tables, created by `cp-decoding-vector-for-codepage' above, are installed | ||
| 202 | ;;; on MS-DOS as nonascii-translation-table (see `dos-codepage-setup' on | ||
| 203 | ;;; internal.el). Therefore, you should NOT put any codes below 128 in | ||
| 204 | ;;; these tables! Otherwise, various Emacs commands and functions will | ||
| 205 | ;;; mysteriously fail! For example, a typical screwup is to map the Latin-N | ||
| 206 | ;;; acute accent character to the apostrophe, and have all regexps which | ||
| 207 | ;;; end with "\\'" begin to fail (e.g., the automatic setting of the major | ||
| 208 | ;;; mode by file name extension will stop working). | ||
| 209 | ;;; | ||
| 210 | ;;; You HAVE BEEN warned! | ||
| 211 | |||
| 212 | ;; US/English/PC-8/IBM-2. This doesn't support Latin-1 characters very | ||
| 213 | ;; well, but why not use what we can salvage? | ||
| 214 | (defvar cp437-decode-table | ||
| 215 | ;; Nth element is the code of a cp437 glyph for the multibyte | ||
| 216 | ;; character created by (make-char 'latin-iso8859-1 (+ N 160)). | ||
| 217 | ;; The element nil means there's no corresponding cp850 glyph. | ||
| 218 | [ | ||
| 219 | 255 173 155 156 nil 157 179 nil nil nil 166 174 170 196 nil nil | ||
| 220 | 248 241 253 nil nil nil nil 249 nil nil 167 175 172 171 nil 168 | ||
| 221 | nil nil nil nil 142 143 146 128 nil 144 nil nil nil nil nil nil | ||
| 222 | nil 165 nil nil nil nil 153 nil nil nil nil nil 154 nil nil 225 | ||
| 223 | 133 160 131 nil 132 134 145 135 138 130 136 137 141 161 140 139 | ||
| 224 | nil 164 149 162 147 nil 148 246 nil 151 163 150 129 nil nil 152] | ||
| 225 | "Table for converting ISO-8859-1 characters into codepage 437 glyphs.") | ||
| 226 | (setplist 'cp437-decode-table | ||
| 227 | '(charset latin-iso8859-1 language "Latin-1" offset 160)) | ||
| 228 | |||
| 229 | ;; Multilingual (Latin-1) | ||
| 230 | (defvar cp850-decode-table | ||
| 231 | ;; Nth element is the code of a cp850 glyph for the multibyte | ||
| 232 | ;; character created by (make-char 'latin-iso8859-1 (+ N 160)). | ||
| 233 | ;; The element nil means there's no corresponding cp850 glyph. | ||
| 234 | [ | ||
| 235 | 255 173 189 156 207 190 221 245 249 184 166 174 170 240 169 nil | ||
| 236 | 248 241 253 252 239 nil 244 nil nil 251 167 175 172 171 243 168 | ||
| 237 | 183 181 182 199 142 143 146 128 212 144 210 211 222 214 215 216 | ||
| 238 | 209 165 227 224 226 229 153 158 157 235 233 234 154 237 231 225 | ||
| 239 | 133 160 131 198 132 134 145 135 138 130 136 137 141 161 140 139 | ||
| 240 | 208 164 149 162 147 228 148 246 155 151 163 150 129 236 232 152] | ||
| 241 | "Table for converting ISO-8859-1 characters into codepage 850 glyphs.") | ||
| 242 | (setplist 'cp850-decode-table | ||
| 243 | '(charset latin-iso8859-1 language "Latin-1" offset 160)) | ||
| 244 | |||
| 245 | ;; Greek | ||
| 246 | (defvar cp851-decode-table | ||
| 247 | [ | ||
| 248 | 255 nil nil 156 nil nil nil 245 249 nil nil 174 nil 240 nil nil | ||
| 249 | 248 241 nil nil 239 nil 134 nil 141 143 144 175 146 171 149 152 | ||
| 250 | 161 164 165 166 167 168 169 170 172 173 181 182 184 183 189 190 | ||
| 251 | 198 199 nil 207 208 209 210 211 212 213 nil nil 155 157 158 159 | ||
| 252 | 252 214 215 216 221 222 224 225 226 227 228 229 230 231 232 233 | ||
| 253 | 234 235 237 236 238 242 243 244 246 250 160 251 162 163 253 nil] | ||
| 254 | "Table for converting ISO-8859-7 characters into codepage 851 glyphs.") | ||
| 255 | (setplist 'cp851-decode-table | ||
| 256 | '(charset greek-iso8859-7 language "Greek" offset 160)) | ||
| 257 | |||
| 258 | ;; Slavic/Eastern Europe (Latin-2) | ||
| 259 | (defvar cp852-decode-table | ||
| 260 | [ | ||
| 261 | 255 164 244 157 207 149 151 245 249 230 184 155 141 240 166 189 | ||
| 262 | 248 165 247 136 239 150 152 243 242 231 173 156 171 241 167 190 | ||
| 263 | 232 181 182 198 142 145 143 128 172 144 168 211 183 214 nil 210 | ||
| 264 | 209 227 213 224 226 138 153 158 252 222 233 235 154 237 221 225 | ||
| 265 | 234 160 131 199 132 146 134 135 159 130 169 137 216 161 140 212 | ||
| 266 | 208 228 229 162 147 139 148 246 253 133 163 251 129 236 238 250] | ||
| 267 | "Table for converting ISO-8859-2 characters into codepage 852 glyphs.") | ||
| 268 | (setplist 'cp852-decode-table | ||
| 269 | '(charset latin-iso8859-2 language "Latin-2" offset 160)) | ||
| 270 | |||
| 271 | ;; Russian | ||
| 272 | (defvar cp855-decode-table | ||
| 273 | [ | ||
| 274 | 255 133 129 131 135 137 139 141 143 145 147 nil 151 240 153 155 | ||
| 275 | 161 163 236 173 167 169 234 244 184 190 199 209 211 213 215 221 | ||
| 276 | 226 228 230 232 171 182 165 252 246 250 159 242 238 248 157 224 | ||
| 277 | 160 162 235 172 166 168 233 243 183 189 198 208 210 212 214 216 | ||
| 278 | 225 227 229 231 170 181 164 251 245 249 158 241 237 247 156 222 | ||
| 279 | nil 132 128 130 134 136 138 140 142 144 146 148 150 nil 152 154] | ||
| 280 | "Table for converting ISO-8859-5 characters into codepage 855 glyphs.") | ||
| 281 | (setplist 'cp855-decode-table | ||
| 282 | '(charset cyrillic-iso8859-5 language "Cyrillic-ISO" offset 160)) | ||
| 283 | |||
| 284 | ;; Turkish | ||
| 285 | (defvar cp857-decode-table | ||
| 286 | [ | ||
| 287 | 255 nil nil 156 207 nil 245 249 152 158 166 nil 240 nil | ||
| 288 | 248 nil 253 252 239 nil nil nil nil 141 159 167 nil 171 nil | ||
| 289 | 183 181 182 142 nil nil 128 212 144 210 211 222 214 215 216 | ||
| 290 | 165 227 224 226 nil 153 232 nil 235 233 234 154 nil nil 225 | ||
| 291 | 133 160 131 132 nil nil 135 138 130 136 137 236 161 140 139 | ||
| 292 | 164 149 162 147 nil 148 246 nil 151 163 150 129 nil nil 250] | ||
| 293 | "Table for converting ISO-8859-3 characters into codepage 857 glyphs.") | ||
| 294 | (setplist 'cp857-decode-table | ||
| 295 | '(charset latin-iso8859-3 language "Latin-3" offset 160)) | ||
| 296 | |||
| 297 | ;; Portuguese | ||
| 298 | (defvar cp860-decode-table | ||
| 299 | [ | ||
| 300 | 255 173 155 156 nil nil 179 nil nil nil 166 174 170 nil nil nil | ||
| 301 | nil 241 253 nil nil nil nil 249 nil nil 167 175 172 171 nil 168 | ||
| 302 | 145 134 143 142 nil nil nil 128 146 144 137 nil 152 nil 139 nil | ||
| 303 | nil 165 159 169 140 153 nil nil nil 157 150 nil 154 nil nil nil | ||
| 304 | 133 160 131 132 nil nil nil 135 138 130 136 nil 141 161 nil nil | ||
| 305 | nil 164 149 162 147 148 nil 246 nil 151 163 nil 129 nil nil nil] | ||
| 306 | "Table for converting ISO-8859-1 characters into codepage 860 glyphs.") | ||
| 307 | (setplist 'cp860-decode-table | ||
| 308 | '(charset latin-iso8859-1 language "Latin-1" offset 160)) | ||
| 309 | |||
| 310 | ;; Icelandic | ||
| 311 | (defvar cp861-decode-table | ||
| 312 | [ | ||
| 313 | 255 173 nil 156 nil nil nil nil nil nil nil 174 170 nil nil nil | ||
| 314 | nil 241 253 nil nil nil nil 249 nil nil nil 175 172 171 nil 168 | ||
| 315 | nil 164 nil nil 142 143 146 128 nil 144 nil nil nil 165 nil nil | ||
| 316 | 139 nil 159 166 nil nil 153 nil 157 nil 167 nil 154 151 141 nil | ||
| 317 | 133 160 131 nil 132 134 145 135 138 130 136 137 nil 161 nil nil | ||
| 318 | 140 nil nil 162 147 nil 148 246 155 nil 163 150 129 152 149 nil] | ||
| 319 | "Table for converting ISO-8859-1 characters into codepage 861 glyphs.") | ||
| 320 | (setplist 'cp861-decode-table | ||
| 321 | '(charset latin-iso8859-1 language "Latin-1" offset 160)) | ||
| 322 | |||
| 323 | ;; Hebrew | ||
| 324 | (defvar cp862-decode-table | ||
| 325 | ;; Nth element is the code of a cp862 glyph for the multibyte | ||
| 326 | ;; character created by (make-char 'hebrew-iso8859-8 (+ N 160)). | ||
| 327 | ;; The element nil means there's no corresponding cp850 glyph. | ||
| 328 | [ | ||
| 329 | 255 173 155 156 nil 157 179 nil nil nil nil 174 170 196 nil nil | ||
| 330 | 248 241 253 nil nil 230 nil 249 nil nil 246 175 172 171 nil nil | ||
| 331 | nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil | ||
| 332 | nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil 205 | ||
| 333 | 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 | ||
| 334 | 144 145 146 147 148 149 150 151 152 153 154 nil nil nil nil nil] | ||
| 335 | "Table for converting ISO-8859-8 characters into codepage 862 glyphs.") | ||
| 336 | (setplist 'cp862-decode-table | ||
| 337 | '(charset hebrew-iso8859-8 language "Hebrew" offset 160)) | ||
| 338 | |||
| 339 | ;; French Canadian | ||
| 340 | (defvar cp863-decode-table | ||
| 341 | [ | ||
| 342 | 255 nil 155 156 152 nil 160 143 164 nil nil 174 170 nil nil 167 | ||
| 343 | nil 241 253 166 161 nil 134 249 165 nil nil 175 172 171 173 nil | ||
| 344 | 142 nil 132 nil nil nil nil 128 145 144 146 148 nil nil 168 149 | ||
| 345 | nil nil nil nil 153 nil nil nil nil 157 nil 158 154 nil nil nil | ||
| 346 | 133 nil 131 nil nil nil nil 135 138 130 136 137 141 nil 140 139 | ||
| 347 | nil nil nil 162 147 nil nil 246 nil 151 163 150 129 nil nil nil] | ||
| 348 | "Table for converting ISO-8859-1 characters into codepage 863 glyphs.") | ||
| 349 | (setplist 'cp863-decode-table | ||
| 350 | '(charset latin-iso8859-1 language "Latin-1" offset 160)) | ||
| 351 | |||
| 352 | ;; Arabic | ||
| 353 | ;; FIXME: Emacs doesn't seem to support the "Arabic" language | ||
| 354 | ;; environment yet. So this is only partially usable, for now | ||
| 355 | (defvar cp864-decode-table | ||
| 356 | [ | ||
| 357 | 255 nil nil nil 164 nil nil nil nil nil nil nil 172 161 nil nil | ||
| 358 | nil nil nil nil nil nil nil nil nil nil nil 187 nil nil nil 191 | ||
| 359 | nil 193 194 195 196 nil 198 199 169 201 170 171 173 174 175 207 | ||
| 360 | 208 209 210 188 189 190 235 215 216 223 238 nil nil nil nil nil | ||
| 361 | 224 247 248 252 251 239 242 243 232 233 253 nil nil nil nil nil | ||
| 362 | nil 241 nil nil nil nil nil nil nil nil nil nil nil nil nil nil] | ||
| 363 | "Table for converting ISO-8859-1 characters into codepage 863 glyphs.") | ||
| 364 | (setplist 'cp864-decode-table | ||
| 365 | '(charset arabic-iso8859-6 language nil offset 160)) | ||
| 366 | |||
| 367 | ;; Nordic (Norwegian/Danish) | ||
| 368 | (defvar cp865-decode-table | ||
| 369 | [ | ||
| 370 | 255 173 nil 156 nil nil nil nil nil nil 166 174 170 nil nil nil | ||
| 371 | nil 241 253 nil nil nil nil 249 nil nil 167 175 172 171 nil 168 | ||
| 372 | nil nil nil nil 142 143 146 128 nil 144 nil nil nil nil nil nil | ||
| 373 | nil 165 nil nil nil nil 153 nil 157 nil nil nil 154 nil nil nil | ||
| 374 | 133 160 131 nil 132 134 145 135 138 130 136 137 141 161 140 139 | ||
| 375 | nil 164 149 162 147 nil 148 246 155 151 163 150 129 nil nil 152] | ||
| 376 | "Table for converting ISO-8859-1 characters into codepage 865 glyphs.") | ||
| 377 | (setplist 'cp865-decode-table | ||
| 378 | '(charset latin-iso8859-1 language "Latin-1" offset 160)) | ||
| 379 | |||
| 380 | ;; Greek (yes, another one!) | ||
| 381 | (defvar cp869-decode-table | ||
| 382 | [ | ||
| 383 | 255 139 140 156 nil nil 138 245 249 151 nil 174 137 240 nil 142 | ||
| 384 | 248 241 153 154 239 247 134 136 141 143 144 175 146 171 149 152 | ||
| 385 | 161 164 165 166 167 168 169 170 172 173 181 182 183 184 189 190 | ||
| 386 | 198 199 nil 207 208 209 210 211 212 213 145 150 155 157 158 159 | ||
| 387 | 252 214 215 216 221 222 224 225 226 227 228 229 230 231 232 233 | ||
| 388 | 234 235 237 236 238 242 243 244 246 250 160 251 162 163 253 nil] | ||
| 389 | "Table for converting ISO-8859-7 characters into codepage 869 glyphs.") | ||
| 390 | (setplist 'cp869-decode-table | ||
| 391 | '(charset greek-iso8859-7 language "Greek" offset 160)) | ||
| 392 | |||
| 393 | ;; Conversion from codepage 775 to Latin-4 for Baltic countries. | ||
| 394 | (defvar cp775-decode-table | ||
| 395 | [ | ||
| 396 | 255 181 nil 138 150 nil 234 245 166 190 237 149 173 240 207 nil | ||
| 397 | 248 208 nil 139 239 nil 235 nil nil 213 137 133 nil nil 216 nil | ||
| 398 | 160 nil nil nil 142 143 146 189 182 144 183 nil 184 nil nil 161 | ||
| 399 | nil 238 226 232 nil 229 153 158 157 198 nil nil 154 nil 199 225 | ||
| 400 | 131 nil nil nil 132 134 145 212 209 130 210 nil 211 nil nil 140 | ||
| 401 | nil 236 147 233 nil 228 148 nil 155 214 nil nil 129 nil 215 nil] | ||
| 402 | "Table for converting ISO-8859-4 characters into codepage 775 glyphs.") | ||
| 403 | (setplist 'cp775-decode-table | ||
| 404 | '(charset latin-iso8859-4 language "Latin-4" offset 160)) | ||
| 405 | |||
| 406 | ;;;###autoload | ||
| 407 | (defun cp-make-coding-systems-for-codepage (codepage iso-name offset) | ||
| 408 | "Create 2 coding systems to convert IBM CODEPAGE into charset ISO-NAME | ||
| 409 | whose first character is at offset OFFSET from the beginning of 8-bit | ||
| 410 | ASCII table. | ||
| 411 | |||
| 412 | The two coding systems are identical except for the EOL conversion: one | ||
| 413 | of them decodes DOS-style EOLs, the other assumes Unix style and doesn't | ||
| 414 | perform any EOL conversions." | ||
| 415 | (let* ((decode-table (intern (format "%s-decode-table" codepage))) | ||
| 416 | (nonascii-table | ||
| 417 | (intern (format "%s-nonascii-translation-table" codepage))) | ||
| 418 | (decode-translation | ||
| 419 | (intern (format "%s-decode-translation-table" codepage))) | ||
| 420 | (encode-translation | ||
| 421 | (intern (format "%s-encode-translation-table" codepage))) | ||
| 422 | (codepage-dos | ||
| 423 | (intern (format "%s-dos" codepage))) | ||
| 424 | (codepage-unix | ||
| 425 | (intern (format "%s-unix" codepage)))) | ||
| 426 | (set nonascii-table | ||
| 427 | (make-translation-table-from-vector | ||
| 428 | (cp-decoding-vector-for-codepage | ||
| 429 | (symbol-value decode-table) iso-name offset))) | ||
| 430 | (define-translation-table encode-translation | ||
| 431 | (char-table-extra-slot (symbol-value nonascii-table) 0)) | ||
| 432 | ;; For charsets other than ascii and ISO-NAME, set `?' for | ||
| 433 | ;; one-column charsets, and some Japanese character for | ||
| 434 | ;; wide-column charsets. CCL encoder convert that Japanese | ||
| 435 | ;; character to "??". | ||
| 436 | (let ((tbl (char-table-extra-slot (symbol-value nonascii-table) 0)) | ||
| 437 | (charsets (delq 'ascii (delq iso-name | ||
| 438 | (copy-sequence charset-list)))) | ||
| 439 | (wide-column-char (make-char 'japanese-jisx0208 32 32))) | ||
| 440 | (while charsets | ||
| 441 | (aset tbl (make-char (car charsets)) | ||
| 442 | (if (= (charset-width (car charsets)) 1) ?? wide-column-char)) | ||
| 443 | (setq charsets (cdr charsets)))) | ||
| 444 | (define-translation-table decode-translation | ||
| 445 | (symbol-value nonascii-table)) | ||
| 446 | (cp-coding-system-for-codepage-1 | ||
| 447 | codepage-dos ?D iso-name decode-translation encode-translation) | ||
| 448 | (cp-coding-system-for-codepage-1 | ||
| 449 | codepage-unix ?D iso-name decode-translation encode-translation))) | ||
| 450 | |||
| 451 | (defun cp-codepage-decoder (codepage) | ||
| 452 | "If CODEPAGE is the name of a supported codepage, return its decode table; | ||
| 453 | otherwise return nil." | ||
| 454 | (let ((cp (if (symbolp codepage) (symbol-name codepage) codepage))) | ||
| 455 | (cond | ||
| 456 | ((stringp cp) | ||
| 457 | (intern-soft (format "%s-decode-table" cp))) | ||
| 458 | (t nil)))) | ||
| 459 | |||
| 460 | ;;;###autoload | ||
| 461 | (defun cp-charset-for-codepage (codepage) | ||
| 462 | "Return the charset for which there is a translation table to DOS CODEPAGE. | ||
| 463 | CODEPAGE must be the name of a DOS codepage, a string." | ||
| 464 | (let ((cp-decoder (cp-codepage-decoder codepage))) | ||
| 465 | (if (null cp-decoder) | ||
| 466 | (error "Unsupported codepage %s" codepage) | ||
| 467 | (get cp-decoder 'charset)))) | ||
| 468 | |||
| 469 | ;;;###autoload | ||
| 470 | (defun cp-language-for-codepage (codepage) | ||
| 471 | "Return the name of the MULE language environment for CODEPAGE. | ||
| 472 | CODEPAGE must be the name of a DOS codepage, a string." | ||
| 473 | (let ((cp-decoder (cp-codepage-decoder codepage))) | ||
| 474 | (if (null cp-decoder) | ||
| 475 | (error "Unsupported codepage %s" codepage) | ||
| 476 | (get cp-decoder 'language)))) | ||
| 477 | |||
| 478 | ;;;###autoload | ||
| 479 | (defun cp-offset-for-codepage (codepage) | ||
| 480 | "Return the offset to be used in setting up coding systems for CODEPAGE. | ||
| 481 | CODEPAGE must be the name of a DOS codepage, a string." | ||
| 482 | (let ((cp-decoder (cp-codepage-decoder codepage))) | ||
| 483 | (if (null cp-decoder) | ||
| 484 | (error "Unsupported codepage %s" codepage) | ||
| 485 | (get cp-decoder 'offset)))) | ||
| 486 | |||
| 487 | ;;;###autoload | ||
| 488 | (defun cp-supported-codepages () | ||
| 489 | "Return an alist of supported codepages. | ||
| 490 | |||
| 491 | Each association in the alist has the form (NNN . CHARSET), where NNN is the | ||
| 492 | codepage number, and CHARSET is the MULE charset which is the closest match | ||
| 493 | for the character set supported by that codepage. | ||
| 494 | |||
| 495 | A codepage NNN is supported if a variable called `cpNNN-decode-table' exists, | ||
| 496 | is a vector, and has a charset property." | ||
| 497 | (save-match-data | ||
| 498 | (let (alist chset sname) | ||
| 499 | (mapatoms | ||
| 500 | (function | ||
| 501 | (lambda (sym) | ||
| 502 | (if (and (boundp sym) | ||
| 503 | (string-match "\\`cp\\([1-9][0-9][0-9]\\)-decode-table\\'" | ||
| 504 | (setq sname (symbol-name sym))) | ||
| 505 | (vectorp (symbol-value sym)) | ||
| 506 | (setq chset (get sym 'charset))) | ||
| 507 | (setq alist | ||
| 508 | (cons (cons (match-string 1 sname) chset) alist)))))) | ||
| 509 | alist))) | ||
| 510 | |||
| 511 | ;;;###autoload | ||
| 512 | (defun codepage-setup (codepage) | ||
| 513 | "Create 2 coding systems for codepage CODEPAGE: cpCODEPAGE-dos and | ||
| 514 | cpCODEPAGE-unix. | ||
| 515 | |||
| 516 | These coding systems are meant for encoding and decoding 8-bit non-ASCII | ||
| 517 | characters used by the IBM codepages, typically in conjunction with files | ||
| 518 | read/written by MS-DOS software, or for display on MS-DOS terminal." | ||
| 519 | (interactive | ||
| 520 | (let ((completion-ignore-case t) | ||
| 521 | (candidates (cp-supported-codepages))) | ||
| 522 | (list (completing-read "Setup DOS Codepage: (default 437) " candidates | ||
| 523 | nil t nil nil "437")))) | ||
| 524 | (let ((cp (format "cp%s" codepage))) | ||
| 525 | (cp-make-coding-systems-for-codepage | ||
| 526 | cp (cp-charset-for-codepage cp) (cp-offset-for-codepage cp)))) | ||
| 527 | |||
| 528 | (provide 'codepage) | ||
| 529 | |||
| 530 | ;; codepage.el ends here | ||