diff options
| author | Reiner Steib | 2006-04-17 19:37:15 +0000 |
|---|---|---|
| committer | Reiner Steib | 2006-04-17 19:37:15 +0000 |
| commit | bd29ba2089a66563ec538a3399d038007de6136f (patch) | |
| tree | bd35af7325bd4181398ec3aedc2391a3c59e1014 | |
| parent | 18c06a99aa65121a4c09138403a7b494b7d41d37 (diff) | |
| download | emacs-bd29ba2089a66563ec538a3399d038007de6136f.tar.gz emacs-bd29ba2089a66563ec538a3399d038007de6136f.zip | |
* mm-util.el (mm-charset-synonym-alist): Improve doc string.
(mm-charset-override-alist): New variable.
(mm-charset-to-coding-system): Use it.
(mm-codepage-setup): New helper function.
(mm-charset-eval-alist): New variable.
(mm-charset-to-coding-system): Use mm-charset-eval-alist. Warn
about unknown charsets. Add allow-override. Use
`mm-charset-override-alist' only when decoding.
(mm-detect-mime-charset-region): Use :mime-charset.
* mm-bodies.el (mm-decode-body, mm-decode-string): Call
`mm-charset-to-coding-system' with allow-override argument.
| -rw-r--r-- | lisp/gnus/ChangeLog | 15 | ||||
| -rw-r--r-- | lisp/gnus/mm-bodies.el | 13 | ||||
| -rw-r--r-- | lisp/gnus/mm-util.el | 121 |
3 files changed, 142 insertions, 7 deletions
diff --git a/lisp/gnus/ChangeLog b/lisp/gnus/ChangeLog index 50a7262e1a3..09dbe9e0027 100644 --- a/lisp/gnus/ChangeLog +++ b/lisp/gnus/ChangeLog | |||
| @@ -1,5 +1,20 @@ | |||
| 1 | 2006-04-17 Reiner Steib <Reiner.Steib@gmx.de> | 1 | 2006-04-17 Reiner Steib <Reiner.Steib@gmx.de> |
| 2 | 2 | ||
| 3 | [ Merge from Gnus trunk. ] | ||
| 4 | |||
| 5 | * mm-util.el (mm-charset-synonym-alist): Improve doc string. | ||
| 6 | (mm-charset-override-alist): New variable. | ||
| 7 | (mm-charset-to-coding-system): Use it. | ||
| 8 | (mm-codepage-setup): New helper function. | ||
| 9 | (mm-charset-eval-alist): New variable. | ||
| 10 | (mm-charset-to-coding-system): Use mm-charset-eval-alist. Warn | ||
| 11 | about unknown charsets. Add allow-override. Use | ||
| 12 | `mm-charset-override-alist' only when decoding. | ||
| 13 | (mm-detect-mime-charset-region): Use :mime-charset. | ||
| 14 | |||
| 15 | * mm-bodies.el (mm-decode-body, mm-decode-string): Call | ||
| 16 | `mm-charset-to-coding-system' with allow-override argument. | ||
| 17 | |||
| 3 | * message.el (message-tool-bar-zap-list, message-tool-bar) | 18 | * message.el (message-tool-bar-zap-list, message-tool-bar) |
| 4 | (message-tool-bar-gnome, message-tool-bar-retro): New variables. | 19 | (message-tool-bar-gnome, message-tool-bar-retro): New variables. |
| 5 | (message-tool-bar-local-item-from-menu): Remove. | 20 | (message-tool-bar-local-item-from-menu): Remove. |
diff --git a/lisp/gnus/mm-bodies.el b/lisp/gnus/mm-bodies.el index c58eb6bd41d..a10b8b28399 100644 --- a/lisp/gnus/mm-bodies.el +++ b/lisp/gnus/mm-bodies.el | |||
| @@ -56,6 +56,8 @@ | |||
| 56 | ;; known to break servers. | 56 | ;; known to break servers. |
| 57 | ;; Note: UTF-16 variants are invalid for text parts [RFC 2781], | 57 | ;; Note: UTF-16 variants are invalid for text parts [RFC 2781], |
| 58 | ;; so this can't happen :-/. | 58 | ;; so this can't happen :-/. |
| 59 | ;; PPS: Yes, it can happen if the user specifies UTF-16 in the MML | ||
| 60 | ;; markup. - jh. | ||
| 59 | (utf-16 . base64) | 61 | (utf-16 . base64) |
| 60 | (utf-16be . base64) | 62 | (utf-16be . base64) |
| 61 | (utf-16le . base64)) | 63 | (utf-16le . base64)) |
| @@ -251,7 +253,10 @@ decoding. If it is nil, default to `mail-parse-charset'." | |||
| 251 | (mm-decode-content-transfer-encoding encoding type)) | 253 | (mm-decode-content-transfer-encoding encoding type)) |
| 252 | (when (and (featurep 'mule) ;; Fixme: Wrong test for unibyte session. | 254 | (when (and (featurep 'mule) ;; Fixme: Wrong test for unibyte session. |
| 253 | (not (eq charset 'gnus-decoded))) | 255 | (not (eq charset 'gnus-decoded))) |
| 254 | (let ((coding-system (mm-charset-to-coding-system charset))) | 256 | (let ((coding-system (mm-charset-to-coding-system |
| 257 | ;; Allow overwrite using | ||
| 258 | ;; `mm-charset-override-alist'. | ||
| 259 | charset nil t))) | ||
| 255 | (if (and (not coding-system) | 260 | (if (and (not coding-system) |
| 256 | (listp mail-parse-ignored-charsets) | 261 | (listp mail-parse-ignored-charsets) |
| 257 | (memq 'gnus-unknown mail-parse-ignored-charsets)) | 262 | (memq 'gnus-unknown mail-parse-ignored-charsets)) |
| @@ -282,7 +287,11 @@ decoding. If it is nil, default to `mail-parse-charset'." | |||
| 282 | (setq charset mail-parse-charset)) | 287 | (setq charset mail-parse-charset)) |
| 283 | (or | 288 | (or |
| 284 | (when (featurep 'mule) | 289 | (when (featurep 'mule) |
| 285 | (let ((coding-system (mm-charset-to-coding-system charset))) | 290 | (let ((coding-system (mm-charset-to-coding-system |
| 291 | charset | ||
| 292 | ;; Allow overwrite using | ||
| 293 | ;; `mm-charset-override-alist'. | ||
| 294 | nil t))) | ||
| 286 | (if (and (not coding-system) | 295 | (if (and (not coding-system) |
| 287 | (listp mail-parse-ignored-charsets) | 296 | (listp mail-parse-ignored-charsets) |
| 288 | (memq 'gnus-unknown mail-parse-ignored-charsets)) | 297 | (memq 'gnus-unknown mail-parse-ignored-charsets)) |
diff --git a/lisp/gnus/mm-util.el b/lisp/gnus/mm-util.el index a8c1f3a87a1..e16750cfcf6 100644 --- a/lisp/gnus/mm-util.el +++ b/lisp/gnus/mm-util.el | |||
| @@ -177,6 +177,29 @@ system object in XEmacs." | |||
| 177 | ;; no-MULE XEmacs: | 177 | ;; no-MULE XEmacs: |
| 178 | (car (memq cs (mm-get-coding-system-list)))))) | 178 | (car (memq cs (mm-get-coding-system-list)))))) |
| 179 | 179 | ||
| 180 | (defun mm-codepage-setup (number &optional alias) | ||
| 181 | "Create a coding system cpNUMBER. | ||
| 182 | The coding system is created using `codepage-setup'. If ALIAS is | ||
| 183 | non-nil, an alias is created and added to | ||
| 184 | `mm-charset-synonym-alist'. If ALIAS is a string, it's used as | ||
| 185 | the alias. Else windows-NUMBER is used." | ||
| 186 | (interactive | ||
| 187 | (let ((completion-ignore-case t) | ||
| 188 | (candidates (cp-supported-codepages))) | ||
| 189 | (list (completing-read "Setup DOS Codepage: (default 437) " candidates | ||
| 190 | nil t nil nil "437")))) | ||
| 191 | (when alias | ||
| 192 | (setq alias (if (stringp alias) | ||
| 193 | (intern alias) | ||
| 194 | (intern (format "windows-%s" number))))) | ||
| 195 | (let* ((cp (intern (format "cp%s" number)))) | ||
| 196 | (unless (mm-coding-system-p cp) | ||
| 197 | (codepage-setup number)) | ||
| 198 | (when (and alias | ||
| 199 | ;; Don't add alias if setup of cp failed. | ||
| 200 | (mm-coding-system-p cp)) | ||
| 201 | (add-to-list 'mm-charset-synonym-alist (cons alias cp))))) | ||
| 202 | |||
| 180 | (defvar mm-charset-synonym-alist | 203 | (defvar mm-charset-synonym-alist |
| 181 | `( | 204 | `( |
| 182 | ;; Not in XEmacs, but it's not a proper MIME charset anyhow. | 205 | ;; Not in XEmacs, but it's not a proper MIME charset anyhow. |
| @@ -200,8 +223,61 @@ system object in XEmacs." | |||
| 200 | ,@(if (and (not (mm-coding-system-p 'windows-1250)) | 223 | ,@(if (and (not (mm-coding-system-p 'windows-1250)) |
| 201 | (mm-coding-system-p 'cp1250)) | 224 | (mm-coding-system-p 'cp1250)) |
| 202 | '((windows-1250 . cp1250))) | 225 | '((windows-1250 . cp1250))) |
| 226 | ;; A Microsoft misunderstanding. | ||
| 227 | ,@(if (and (not (mm-coding-system-p 'unicode)) | ||
| 228 | (mm-coding-system-p 'utf-16-le)) | ||
| 229 | '((unicode . utf-16-le))) | ||
| 230 | ;; A Microsoft misunderstanding. | ||
| 231 | ,@(unless (mm-coding-system-p 'ks_c_5601-1987) | ||
| 232 | (if (mm-coding-system-p 'cp949) | ||
| 233 | '((ks_c_5601-1987 . cp949)) | ||
| 234 | '((ks_c_5601-1987 . euc-kr)))) | ||
| 203 | ) | 235 | ) |
| 204 | "A mapping from invalid charset names to the real charset names.") | 236 | "A mapping from unknown or invalid charset names to the real charset names.") |
| 237 | |||
| 238 | (defcustom mm-charset-override-alist | ||
| 239 | `((iso-8859-1 . windows-1252)) | ||
| 240 | "A mapping from undesired charset names to their replacement. | ||
| 241 | |||
| 242 | You may add pairs like (iso-8859-1 . windows-1252) here, | ||
| 243 | i.e. treat iso-8859-1 as windows-1252. windows-1252 is a | ||
| 244 | superset of iso-8859-1." | ||
| 245 | :type '(list (set :inline t | ||
| 246 | (const (iso-8859-1 . windows-1252)) | ||
| 247 | (const (undecided . windows-1252))) | ||
| 248 | (repeat :inline t | ||
| 249 | :tag "Other options" | ||
| 250 | (cons (symbol :tag "From charset") | ||
| 251 | (symbol :tag "To charset")))) | ||
| 252 | :version "23.0" ;; No Gnus | ||
| 253 | :group 'mime) | ||
| 254 | |||
| 255 | (defcustom mm-charset-eval-alist | ||
| 256 | (if (featurep 'xemacs) | ||
| 257 | nil ;; I don't know what would be useful for XEmacs. | ||
| 258 | '(;; Emacs 21 offers 1250 1251 1253 1257. Emacs 22 provides autoloads for | ||
| 259 | ;; 1250-1258 (i.e. `mm-codepage-setup' does nothing). | ||
| 260 | (windows-1250 . (mm-codepage-setup 1250 t)) | ||
| 261 | (windows-1251 . (mm-codepage-setup 1251 t)) | ||
| 262 | (windows-1253 . (mm-codepage-setup 1253 t)) | ||
| 263 | (windows-1257 . (mm-codepage-setup 1257 t)))) | ||
| 264 | "An alist of (CHARSET . FORM) pairs. | ||
| 265 | If an article is encoded in an unknown CHARSET, FORM is | ||
| 266 | evaluated. This allows to load additional libraries providing | ||
| 267 | charsets on demand. If supported by your Emacs version, you | ||
| 268 | could use `autoload-coding-system' here." | ||
| 269 | :version "23.0" ;; No Gnus | ||
| 270 | :type '(list (set :inline t | ||
| 271 | (const (windows-1250 . (mm-codepage-setup 1250 t))) | ||
| 272 | (const (windows-1251 . (mm-codepage-setup 1251 t))) | ||
| 273 | (const (windows-1253 . (mm-codepage-setup 1253 t))) | ||
| 274 | (const (windows-1257 . (mm-codepage-setup 1257 t))) | ||
| 275 | (const (cp850 . (mm-codepage-setup 850 nil)))) | ||
| 276 | (repeat :inline t | ||
| 277 | :tag "Other options" | ||
| 278 | (cons (symbol :tag "charset") | ||
| 279 | (symbol :tag "form")))) | ||
| 280 | :group 'mime) | ||
| 205 | 281 | ||
| 206 | (defvar mm-binary-coding-system | 282 | (defvar mm-binary-coding-system |
| 207 | (cond | 283 | (cond |
| @@ -426,11 +502,17 @@ mail with multiple parts is preferred to sending a Unicode one.") | |||
| 426 | (pop alist)) | 502 | (pop alist)) |
| 427 | out))) | 503 | out))) |
| 428 | 504 | ||
| 429 | (defun mm-charset-to-coding-system (charset &optional lbt) | 505 | (defun mm-charset-to-coding-system (charset &optional lbt |
| 506 | allow-override) | ||
| 430 | "Return coding-system corresponding to CHARSET. | 507 | "Return coding-system corresponding to CHARSET. |
| 431 | CHARSET is a symbol naming a MIME charset. | 508 | CHARSET is a symbol naming a MIME charset. |
| 432 | If optional argument LBT (`unix', `dos' or `mac') is specified, it is | 509 | If optional argument LBT (`unix', `dos' or `mac') is specified, it is |
| 433 | used as the line break code type of the coding system." | 510 | used as the line break code type of the coding system. |
| 511 | |||
| 512 | If ALLOW-OVERRIDE is given, use `mm-charset-override-alist' to | ||
| 513 | map undesired charset names to their replacement. This should | ||
| 514 | only be used for decoding, not for encoding." | ||
| 515 | ;; OVERRIDE is used (only) in `mm-decode-body' and `mm-decode-string'. | ||
| 434 | (when (stringp charset) | 516 | (when (stringp charset) |
| 435 | (setq charset (intern (downcase charset)))) | 517 | (setq charset (intern (downcase charset)))) |
| 436 | (when lbt | 518 | (when lbt |
| @@ -442,6 +524,11 @@ used as the line break code type of the coding system." | |||
| 442 | ((or (null (mm-get-coding-system-list)) | 524 | ((or (null (mm-get-coding-system-list)) |
| 443 | (not (fboundp 'coding-system-get))) | 525 | (not (fboundp 'coding-system-get))) |
| 444 | charset) | 526 | charset) |
| 527 | ;; Check override list quite early. Should only used for decoding, not for | ||
| 528 | ;; encoding! | ||
| 529 | ((and allow-override | ||
| 530 | (let ((cs (cdr (assq charset mm-charset-override-alist)))) | ||
| 531 | (and cs (mm-coding-system-p cs) cs)))) | ||
| 445 | ;; ascii | 532 | ;; ascii |
| 446 | ((eq charset 'us-ascii) | 533 | ((eq charset 'us-ascii) |
| 447 | 'ascii) | 534 | 'ascii) |
| @@ -454,9 +541,27 @@ used as the line break code type of the coding system." | |||
| 454 | ;;; (eq charset (coding-system-get charset 'mime-charset)) | 541 | ;;; (eq charset (coding-system-get charset 'mime-charset)) |
| 455 | ) | 542 | ) |
| 456 | charset) | 543 | charset) |
| 544 | ;; Eval expressions from `mm-charset-eval-alist' | ||
| 545 | ((let* ((el (assq charset mm-charset-eval-alist)) | ||
| 546 | (cs (car el)) | ||
| 547 | (form (cdr el))) | ||
| 548 | (and cs | ||
| 549 | form | ||
| 550 | (prog2 | ||
| 551 | ;; Avoid errors... | ||
| 552 | (condition-case nil (eval form) (error nil)) | ||
| 553 | ;; (message "Failed to eval `%s'" form)) | ||
| 554 | (mm-coding-system-p cs) | ||
| 555 | (message "Added charset `%s' via `mm-charset-eval-alist'" cs)) | ||
| 556 | cs))) | ||
| 457 | ;; Translate invalid charsets. | 557 | ;; Translate invalid charsets. |
| 458 | ((let ((cs (cdr (assq charset mm-charset-synonym-alist)))) | 558 | ((let ((cs (cdr (assq charset mm-charset-synonym-alist)))) |
| 459 | (and cs (mm-coding-system-p cs) cs))) | 559 | (and cs |
| 560 | (mm-coding-system-p cs) | ||
| 561 | ;; (message | ||
| 562 | ;; "Using synonym `%s' from `mm-charset-synonym-alist' for `%s'" | ||
| 563 | ;; cs charset) | ||
| 564 | cs))) | ||
| 460 | ;; Last resort: search the coding system list for entries which | 565 | ;; Last resort: search the coding system list for entries which |
| 461 | ;; have the right mime-charset in case the canonical name isn't | 566 | ;; have the right mime-charset in case the canonical name isn't |
| 462 | ;; defined (though it should be). | 567 | ;; defined (though it should be). |
| @@ -468,6 +573,11 @@ used as the line break code type of the coding system." | |||
| 468 | (eq charset (or (coding-system-get c :mime-charset) | 573 | (eq charset (or (coding-system-get c :mime-charset) |
| 469 | (coding-system-get c 'mime-charset)))) | 574 | (coding-system-get c 'mime-charset)))) |
| 470 | (setq cs c))) | 575 | (setq cs c))) |
| 576 | (unless cs | ||
| 577 | ;; Warn the user about unknown charset: | ||
| 578 | (if (fboundp 'gnus-message) | ||
| 579 | (gnus-message 7 "Unknown charset: %s" charset) | ||
| 580 | (message "Unknown charset: %s" charset))) | ||
| 471 | cs)))) | 581 | cs)))) |
| 472 | 582 | ||
| 473 | (defsubst mm-replace-chars-in-string (string from to) | 583 | (defsubst mm-replace-chars-in-string (string from to) |
| @@ -1070,7 +1180,8 @@ If SUFFIX is non-nil, add that at the end of the file name." | |||
| 1070 | (defun mm-detect-mime-charset-region (start end) | 1180 | (defun mm-detect-mime-charset-region (start end) |
| 1071 | "Detect MIME charset of the text in the region between START and END." | 1181 | "Detect MIME charset of the text in the region between START and END." |
| 1072 | (let ((cs (mm-detect-coding-region start end))) | 1182 | (let ((cs (mm-detect-coding-region start end))) |
| 1073 | (coding-system-get cs 'mime-charset))) | 1183 | (or (coding-system-get cs :mime-charset) |
| 1184 | (coding-system-get cs 'mime-charset)))) | ||
| 1074 | (defun mm-detect-mime-charset-region (start end) | 1185 | (defun mm-detect-mime-charset-region (start end) |
| 1075 | "Detect MIME charset of the text in the region between START and END." | 1186 | "Detect MIME charset of the text in the region between START and END." |
| 1076 | (let ((cs (mm-detect-coding-region start end))) | 1187 | (let ((cs (mm-detect-coding-region start end))) |