diff options
| author | Eli Zaretskii | 1999-03-08 12:06:07 +0000 |
|---|---|---|
| committer | Eli Zaretskii | 1999-03-08 12:06:07 +0000 |
| commit | 7e37faa3567ef7c4a101b0c1a182ce45920a4648 (patch) | |
| tree | 2314e98c48707649b8f5a4d794c643d504f74d25 | |
| parent | ec81e97abceba8b3b56100ffe7380473feaa61f4 (diff) | |
| download | emacs-7e37faa3567ef7c4a101b0c1a182ce45920a4648.tar.gz emacs-7e37faa3567ef7c4a101b0c1a182ce45920a4648.zip | |
(cp1250-decode-table, cp1251-decode-table, cp1253-decode-table,
cp1257-decode-table): New translation tables for MS Windows
codepages.
(cp-make-coding-systems-for-codepage): Accept 4 digit
codepages.
| -rw-r--r-- | lisp/international/codepage.el | 74 |
1 files changed, 73 insertions, 1 deletions
diff --git a/lisp/international/codepage.el b/lisp/international/codepage.el index 6f595c61b90..b71bfdc65bb 100644 --- a/lisp/international/codepage.el +++ b/lisp/international/codepage.el | |||
| @@ -416,6 +416,78 @@ character is generated by (make-char CHARSET OFFSET)." | |||
| 416 | (setplist 'cp775-decode-table | 416 | (setplist 'cp775-decode-table |
| 417 | '(charset latin-iso8859-4 language "Latin-4" offset 160)) | 417 | '(charset latin-iso8859-4 language "Latin-4" offset 160)) |
| 418 | 418 | ||
| 419 | ;; Support for the Windows 12xx series of codepages that MS has | ||
| 420 | ;; butchered from the ISO-8859 specs. This does not add support for | ||
| 421 | ;; the extended characters that MS has added in the 128 - 159 coding | ||
| 422 | ;; range, only translates those characters that can be expressed in | ||
| 423 | ;; the corresponding iso-8859 codepage. | ||
| 424 | |||
| 425 | ;; Codepage Mapping: | ||
| 426 | ;; | ||
| 427 | ;; Windows-1250: ISO-8859-2 (Central Europe) - differs in some positions | ||
| 428 | ;; Windows-1251: ISO-8859-5 (Cyrillic) - differs wildly | ||
| 429 | ;; Windows-1252: ISO-8859-1 (West Europe) - exact match | ||
| 430 | ;; Windows-1253: ISO-8859-7 (Greek) - differs in some positions | ||
| 431 | ;; Windows-1254: ISO-8859-9 (Turkish) - exact match | ||
| 432 | ;; Windows-1255: ISO-8859-8 (Hebrew) - exact match | ||
| 433 | ;; Windows-1256: ISO-8859-6 (Arabic) - half match | ||
| 434 | ;; Windows-1257: ISO-8859-4 (Baltic) - differs, future Latin-7 | ||
| 435 | ;; Windows-1258: VISCII (Vietnamese) - Completely different | ||
| 436 | |||
| 437 | (defvar cp1250-decode-table | ||
| 438 | [ | ||
| 439 | 160 165 162 163 164 188 140 167 168 138 170 141 143 173 142 175 | ||
| 440 | 176 185 178 179 180 190 156 161 184 154 186 157 159 189 158 191 | ||
| 441 | 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 | ||
| 442 | 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 | ||
| 443 | 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 | ||
| 444 | 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 ] | ||
| 445 | "ISO-8859-2 to Windows-1250 (Central Europe) codepage decoding table") | ||
| 446 | (setplist 'cp1250-decode-table | ||
| 447 | '(charset latin-iso8859-2 language "Latin-2" offset 160)) | ||
| 448 | |||
| 449 | (defvar cp1251-decode-table | ||
| 450 | [ | ||
| 451 | 160 168 128 129 170 189 178 175 163 138 140 142 141 173 161 143 | ||
| 452 | 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 | ||
| 453 | 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 | ||
| 454 | 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 | ||
| 455 | 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 | ||
| 456 | 185 184 144 131 186 190 179 191 188 154 156 158 157 167 162 159 ] | ||
| 457 | "ISO-8859-5 to Windows-1251 (Cyrillic) codepage decoding table") | ||
| 458 | (setplist 'cp1251-decode-table | ||
| 459 | '(charset cyrillic-iso8859-5 language "Cyrillic-ISO" offset 160)) | ||
| 460 | |||
| 461 | ;; cp1253 is missing nbsp so we cannot quite translate perfectly. It | ||
| 462 | ;; also has two micro/mu characters which would require more complex | ||
| 463 | ;; processing to accomodate. | ||
| 464 | (defvar cp1253-decode-table | ||
| 465 | [ | ||
| 466 | nil 145 146 163 nil nil 166 167 168 169 nil 171 172 173 nil 151 | ||
| 467 | 176 177 178 179 180 161 162 183 184 185 186 187 188 189 190 191 | ||
| 468 | 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 | ||
| 469 | 208 209 nil 211 212 213 214 215 216 217 218 219 220 221 222 223 | ||
| 470 | 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 | ||
| 471 | 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 nil ] | ||
| 472 | "ISO-8859-7 to Windows-1253 (Greek) codepage decoding table") | ||
| 473 | (setplist 'cp1253-decode-table | ||
| 474 | '(charset greek-iso8859-7 language "Greek" offset 160)) | ||
| 475 | |||
| 476 | ;; Since Latin-7 is not yet official, and Emacs does not support it, | ||
| 477 | ;; provide translation between Windows-1257 and Latin-4 the best we | ||
| 478 | ;; can. | ||
| 479 | (defvar cp1257-decode-table | ||
| 480 | [ | ||
| 481 | 160 192 nil 170 164 nil 207 167 nil 208 199 204 nil 173 222 nil | ||
| 482 | 176 224 nil 186 nil nil 239 nil nil 240 231 236 nil nil 254 nil | ||
| 483 | 194 nil nil nil 196 197 175 193 200 201 198 nil 203 nil nil 206 | ||
| 484 | nil 210 212 205 nil 213 214 215 168 216 nil nil 220 nil 219 223 | ||
| 485 | 226 nil nil nil 228 229 191 225 232 233 230 nil 235 nil nil 238 | ||
| 486 | nil 242 244 237 nil 245 246 247 184 248 nil nil 252 nil 251 nil ] | ||
| 487 | "ISO-8859-4 to Windows-1257 (Baltic) codepage decoding table") | ||
| 488 | (setplist 'cp1257-decode-table | ||
| 489 | '(charset latin-iso8859-4 language "Latin-4" offset 160)) | ||
| 490 | |||
| 419 | ;;;###autoload | 491 | ;;;###autoload |
| 420 | (defun cp-make-coding-systems-for-codepage (codepage iso-name offset) | 492 | (defun cp-make-coding-systems-for-codepage (codepage iso-name offset) |
| 421 | "Create a coding system to convert IBM CODEPAGE into charset ISO-NAME | 493 | "Create a coding system to convert IBM CODEPAGE into charset ISO-NAME |
| @@ -514,7 +586,7 @@ is a vector, and has a charset property." | |||
| 514 | (function | 586 | (function |
| 515 | (lambda (sym) | 587 | (lambda (sym) |
| 516 | (if (and (boundp sym) | 588 | (if (and (boundp sym) |
| 517 | (string-match "\\`cp\\([1-9][0-9][0-9]\\)-decode-table\\'" | 589 | (string-match "\\`cp\\([1-9][0-9][0-9][0-9]?\\)-decode-table\\'" |
| 518 | (setq sname (symbol-name sym))) | 590 | (setq sname (symbol-name sym))) |
| 519 | (vectorp (symbol-value sym)) | 591 | (vectorp (symbol-value sym)) |
| 520 | (setq chset (get sym 'charset))) | 592 | (setq chset (get sym 'charset))) |