diff options
| author | Katsumi Yamaoka | 2008-12-15 04:15:35 +0000 |
|---|---|---|
| committer | Katsumi Yamaoka | 2008-12-15 04:15:35 +0000 |
| commit | 96a222012f17c0c20b6b34ece57620d7c88e4ebb (patch) | |
| tree | f21c666709c59cfd385559af2e1ebc49ca80c67a | |
| parent | 75f23946ee80c2e25a2a0fa8a0b93c12a699d052 (diff) | |
| download | emacs-96a222012f17c0c20b6b34ece57620d7c88e4ebb.tar.gz emacs-96a222012f17c0c20b6b34ece57620d7c88e4ebb.zip | |
(mm-charset-eval-alist): Define it before mm-charset-to-coding-system.
(mm-charset-to-coding-system): Add optional argument `silent';
define it before mm-charset-override-alist.
(mm-charset-override-alist): Add `(gb2312 . gbk)' to the default value if it
can be used in Emacs currently running; silence mm-charset-to-coding-system.
| -rw-r--r-- | lisp/gnus/ChangeLog | 10 | ||||
| -rw-r--r-- | lisp/gnus/mm-util.el | 239 |
2 files changed, 141 insertions, 108 deletions
diff --git a/lisp/gnus/ChangeLog b/lisp/gnus/ChangeLog index 3e445c847c1..9e58c6046b0 100644 --- a/lisp/gnus/ChangeLog +++ b/lisp/gnus/ChangeLog | |||
| @@ -1,3 +1,13 @@ | |||
| 1 | 2008-12-15 Katsumi Yamaoka <yamaoka@jpl.org> | ||
| 2 | |||
| 3 | * mm-util.el (mm-charset-eval-alist): | ||
| 4 | Define it before mm-charset-to-coding-system. | ||
| 5 | (mm-charset-to-coding-system): Add optional argument `silent'; | ||
| 6 | define it before mm-charset-override-alist. | ||
| 7 | (mm-charset-override-alist): Add `(gb2312 . gbk)' to the | ||
| 8 | default value if it can be used in Emacs currently running; | ||
| 9 | silence mm-charset-to-coding-system. | ||
| 10 | |||
| 1 | 2008-12-10 Katsumi Yamaoka <yamaoka@jpl.org> | 11 | 2008-12-10 Katsumi Yamaoka <yamaoka@jpl.org> |
| 2 | 12 | ||
| 3 | * rfc2047.el (rfc2047-charset-to-coding-system): Add new argument | 13 | * rfc2047.el (rfc2047-charset-to-coding-system): Add new argument |
diff --git a/lisp/gnus/mm-util.el b/lisp/gnus/mm-util.el index 4630cefc25e..e6f0f26c56a 100644 --- a/lisp/gnus/mm-util.el +++ b/lisp/gnus/mm-util.el | |||
| @@ -388,8 +388,125 @@ Unless LIST is given, `mm-codepage-ibm-list' is used." | |||
| 388 | (mm-setup-codepage-iso-8859) | 388 | (mm-setup-codepage-iso-8859) |
| 389 | (mm-setup-codepage-ibm) | 389 | (mm-setup-codepage-ibm) |
| 390 | 390 | ||
| 391 | ;; Note: this has to be defined before `mm-charset-to-coding-system'. | ||
| 392 | (defcustom mm-charset-eval-alist | ||
| 393 | (if (featurep 'xemacs) | ||
| 394 | nil ;; I don't know what would be useful for XEmacs. | ||
| 395 | '(;; Emacs 21 offers 1250 1251 1253 1257. Emacs 22 provides autoloads for | ||
| 396 | ;; 1250-1258 (i.e. `mm-codepage-setup' does nothing). | ||
| 397 | (windows-1250 . (mm-codepage-setup 1250 t)) | ||
| 398 | (windows-1251 . (mm-codepage-setup 1251 t)) | ||
| 399 | (windows-1253 . (mm-codepage-setup 1253 t)) | ||
| 400 | (windows-1257 . (mm-codepage-setup 1257 t)))) | ||
| 401 | "An alist of (CHARSET . FORM) pairs. | ||
| 402 | If an article is encoded in an unknown CHARSET, FORM is | ||
| 403 | evaluated. This allows to load additional libraries providing | ||
| 404 | charsets on demand. If supported by your Emacs version, you | ||
| 405 | could use `autoload-coding-system' here." | ||
| 406 | :version "22.1" ;; Gnus 5.10.9 | ||
| 407 | :type '(list (set :inline t | ||
| 408 | (const (windows-1250 . (mm-codepage-setup 1250 t))) | ||
| 409 | (const (windows-1251 . (mm-codepage-setup 1251 t))) | ||
| 410 | (const (windows-1253 . (mm-codepage-setup 1253 t))) | ||
| 411 | (const (windows-1257 . (mm-codepage-setup 1257 t))) | ||
| 412 | (const (cp850 . (mm-codepage-setup 850 nil)))) | ||
| 413 | (repeat :inline t | ||
| 414 | :tag "Other options" | ||
| 415 | (cons (symbol :tag "charset") | ||
| 416 | (symbol :tag "form")))) | ||
| 417 | :group 'mime) | ||
| 418 | (put 'mm-charset-eval-alist 'risky-local-variable t) | ||
| 419 | |||
| 420 | ;; Note: this function has to be defined before `mm-charset-override-alist' | ||
| 421 | ;; since it will use this function in order to determine its default value | ||
| 422 | ;; when loading mm-util.elc. | ||
| 423 | (defun mm-charset-to-coding-system (charset &optional lbt | ||
| 424 | allow-override silent) | ||
| 425 | "Return coding-system corresponding to CHARSET. | ||
| 426 | CHARSET is a symbol naming a MIME charset. | ||
| 427 | If optional argument LBT (`unix', `dos' or `mac') is specified, it is | ||
| 428 | used as the line break code type of the coding system. | ||
| 429 | |||
| 430 | If ALLOW-OVERRIDE is given, use `mm-charset-override-alist' to | ||
| 431 | map undesired charset names to their replacement. This should | ||
| 432 | only be used for decoding, not for encoding. | ||
| 433 | |||
| 434 | A non-nil value of SILENT means don't issue a warning even if CHARSET | ||
| 435 | is not available." | ||
| 436 | ;; OVERRIDE is used (only) in `mm-decode-body' and `mm-decode-string'. | ||
| 437 | (when (stringp charset) | ||
| 438 | (setq charset (intern (downcase charset)))) | ||
| 439 | (when lbt | ||
| 440 | (setq charset (intern (format "%s-%s" charset lbt)))) | ||
| 441 | (cond | ||
| 442 | ((null charset) | ||
| 443 | charset) | ||
| 444 | ;; Running in a non-MULE environment. | ||
| 445 | ((or (null (mm-get-coding-system-list)) | ||
| 446 | (not (fboundp 'coding-system-get))) | ||
| 447 | charset) | ||
| 448 | ;; Check override list quite early. Should only used for decoding, not for | ||
| 449 | ;; encoding! | ||
| 450 | ((and allow-override | ||
| 451 | (let ((cs (cdr (assq charset mm-charset-override-alist)))) | ||
| 452 | (and cs (mm-coding-system-p cs) cs)))) | ||
| 453 | ;; ascii | ||
| 454 | ((eq charset 'us-ascii) | ||
| 455 | 'ascii) | ||
| 456 | ;; Check to see whether we can handle this charset. (This depends | ||
| 457 | ;; on there being some coding system matching each `mime-charset' | ||
| 458 | ;; property defined, as there should be.) | ||
| 459 | ((and (mm-coding-system-p charset) | ||
| 460 | ;;; Doing this would potentially weed out incorrect charsets. | ||
| 461 | ;;; charset | ||
| 462 | ;;; (eq charset (coding-system-get charset 'mime-charset)) | ||
| 463 | ) | ||
| 464 | charset) | ||
| 465 | ;; Eval expressions from `mm-charset-eval-alist' | ||
| 466 | ((let* ((el (assq charset mm-charset-eval-alist)) | ||
| 467 | (cs (car el)) | ||
| 468 | (form (cdr el))) | ||
| 469 | (and cs | ||
| 470 | form | ||
| 471 | (prog2 | ||
| 472 | ;; Avoid errors... | ||
| 473 | (condition-case nil (eval form) (error nil)) | ||
| 474 | ;; (message "Failed to eval `%s'" form)) | ||
| 475 | (mm-coding-system-p cs) | ||
| 476 | (message "Added charset `%s' via `mm-charset-eval-alist'" cs)) | ||
| 477 | cs))) | ||
| 478 | ;; Translate invalid charsets. | ||
| 479 | ((let ((cs (cdr (assq charset mm-charset-synonym-alist)))) | ||
| 480 | (and cs | ||
| 481 | (mm-coding-system-p cs) | ||
| 482 | ;; (message | ||
| 483 | ;; "Using synonym `%s' from `mm-charset-synonym-alist' for `%s'" | ||
| 484 | ;; cs charset) | ||
| 485 | cs))) | ||
| 486 | ;; Last resort: search the coding system list for entries which | ||
| 487 | ;; have the right mime-charset in case the canonical name isn't | ||
| 488 | ;; defined (though it should be). | ||
| 489 | ((let (cs) | ||
| 490 | ;; mm-get-coding-system-list returns a list of cs without lbt. | ||
| 491 | ;; Do we need -lbt? | ||
| 492 | (dolist (c (mm-get-coding-system-list)) | ||
| 493 | (if (and (null cs) | ||
| 494 | (eq charset (or (coding-system-get c :mime-charset) | ||
| 495 | (coding-system-get c 'mime-charset)))) | ||
| 496 | (setq cs c))) | ||
| 497 | (unless (or silent cs) | ||
| 498 | ;; Warn the user about unknown charset: | ||
| 499 | (if (fboundp 'gnus-message) | ||
| 500 | (gnus-message 7 "Unknown charset: %s" charset) | ||
| 501 | (message "Unknown charset: %s" charset))) | ||
| 502 | cs)))) | ||
| 503 | |||
| 504 | ;; Note: `mm-charset-to-coding-system' has to be defined before this. | ||
| 391 | (defcustom mm-charset-override-alist | 505 | (defcustom mm-charset-override-alist |
| 392 | '((iso-8859-1 . windows-1252) | 506 | ;; Note: pairs that cannot be used in the Emacs version currently running |
| 507 | ;; will be removed. | ||
| 508 | '((gb2312 . gbk) | ||
| 509 | (iso-8859-1 . windows-1252) | ||
| 393 | (iso-8859-8 . windows-1255) | 510 | (iso-8859-8 . windows-1255) |
| 394 | (iso-8859-9 . windows-1254)) | 511 | (iso-8859-9 . windows-1254)) |
| 395 | "A mapping from undesired charset names to their replacement. | 512 | "A mapping from undesired charset names to their replacement. |
| @@ -404,7 +521,8 @@ superset of iso-8859-1." | |||
| 404 | (let ((defaults | 521 | (let ((defaults |
| 405 | (delq nil | 522 | (delq nil |
| 406 | (mapcar (lambda (pair) | 523 | (mapcar (lambda (pair) |
| 407 | (if (mm-charset-to-coding-system (cdr pair)) | 524 | (if (mm-charset-to-coding-system (cdr pair) |
| 525 | nil nil t) | ||
| 408 | pair)) | 526 | pair)) |
| 409 | '((gb2312 . gbk) | 527 | '((gb2312 . gbk) |
| 410 | (iso-8859-1 . windows-1252) | 528 | (iso-8859-1 . windows-1252) |
| @@ -433,37 +551,20 @@ superset of iso-8859-1." | |||
| 433 | (cons :format "%v" | 551 | (cons :format "%v" |
| 434 | (symbol :size 3 :format "(%v") | 552 | (symbol :size 3 :format "(%v") |
| 435 | (symbol :size 3 :format " . %v)\n"))))))) | 553 | (symbol :size 3 :format " . %v)\n"))))))) |
| 554 | ;; Remove pairs that cannot be used in the Emacs version currently | ||
| 555 | ;; running. Note that this section will be evaluated when loading | ||
| 556 | ;; mm-util.elc. | ||
| 557 | :set (lambda (symbol value) | ||
| 558 | (custom-set-default | ||
| 559 | symbol (delq nil | ||
| 560 | (mapcar (lambda (pair) | ||
| 561 | (if (mm-charset-to-coding-system (cdr pair) | ||
| 562 | nil nil t) | ||
| 563 | pair)) | ||
| 564 | value)))) | ||
| 436 | :version "22.1" ;; Gnus 5.10.9 | 565 | :version "22.1" ;; Gnus 5.10.9 |
| 437 | :group 'mime) | 566 | :group 'mime) |
| 438 | 567 | ||
| 439 | (defcustom mm-charset-eval-alist | ||
| 440 | (if (featurep 'xemacs) | ||
| 441 | nil ;; I don't know what would be useful for XEmacs. | ||
| 442 | '(;; Emacs 21 offers 1250 1251 1253 1257. Emacs 22 provides autoloads for | ||
| 443 | ;; 1250-1258 (i.e. `mm-codepage-setup' does nothing). | ||
| 444 | (windows-1250 . (mm-codepage-setup 1250 t)) | ||
| 445 | (windows-1251 . (mm-codepage-setup 1251 t)) | ||
| 446 | (windows-1253 . (mm-codepage-setup 1253 t)) | ||
| 447 | (windows-1257 . (mm-codepage-setup 1257 t)))) | ||
| 448 | "An alist of (CHARSET . FORM) pairs. | ||
| 449 | If an article is encoded in an unknown CHARSET, FORM is | ||
| 450 | evaluated. This allows to load additional libraries providing | ||
| 451 | charsets on demand. If supported by your Emacs version, you | ||
| 452 | could use `autoload-coding-system' here." | ||
| 453 | :version "22.1" ;; Gnus 5.10.9 | ||
| 454 | :type '(list (set :inline t | ||
| 455 | (const (windows-1250 . (mm-codepage-setup 1250 t))) | ||
| 456 | (const (windows-1251 . (mm-codepage-setup 1251 t))) | ||
| 457 | (const (windows-1253 . (mm-codepage-setup 1253 t))) | ||
| 458 | (const (windows-1257 . (mm-codepage-setup 1257 t))) | ||
| 459 | (const (cp850 . (mm-codepage-setup 850 nil)))) | ||
| 460 | (repeat :inline t | ||
| 461 | :tag "Other options" | ||
| 462 | (cons (symbol :tag "charset") | ||
| 463 | (symbol :tag "form")))) | ||
| 464 | :group 'mime) | ||
| 465 | (put 'mm-charset-eval-alist 'risky-local-variable t) | ||
| 466 | |||
| 467 | (defvar mm-binary-coding-system | 568 | (defvar mm-binary-coding-system |
| 468 | (cond | 569 | (cond |
| 469 | ((mm-coding-system-p 'binary) 'binary) | 570 | ((mm-coding-system-p 'binary) 'binary) |
| @@ -690,84 +791,6 @@ mail with multiple parts is preferred to sending a Unicode one.") | |||
| 690 | (pop alist)) | 791 | (pop alist)) |
| 691 | out))) | 792 | out))) |
| 692 | 793 | ||
| 693 | (defun mm-charset-to-coding-system (charset &optional lbt | ||
| 694 | allow-override) | ||
| 695 | "Return coding-system corresponding to CHARSET. | ||
| 696 | CHARSET is a symbol naming a MIME charset. | ||
| 697 | If optional argument LBT (`unix', `dos' or `mac') is specified, it is | ||
| 698 | used as the line break code type of the coding system. | ||
| 699 | |||
| 700 | If ALLOW-OVERRIDE is given, use `mm-charset-override-alist' to | ||
| 701 | map undesired charset names to their replacement. This should | ||
| 702 | only be used for decoding, not for encoding." | ||
| 703 | ;; OVERRIDE is used (only) in `mm-decode-body' and `mm-decode-string'. | ||
| 704 | (when (stringp charset) | ||
| 705 | (setq charset (intern (downcase charset)))) | ||
| 706 | (when lbt | ||
| 707 | (setq charset (intern (format "%s-%s" charset lbt)))) | ||
| 708 | (cond | ||
| 709 | ((null charset) | ||
| 710 | charset) | ||
| 711 | ;; Running in a non-MULE environment. | ||
| 712 | ((or (null (mm-get-coding-system-list)) | ||
| 713 | (not (fboundp 'coding-system-get))) | ||
| 714 | charset) | ||
| 715 | ;; Check override list quite early. Should only used for decoding, not for | ||
| 716 | ;; encoding! | ||
| 717 | ((and allow-override | ||
| 718 | (let ((cs (cdr (assq charset mm-charset-override-alist)))) | ||
| 719 | (and cs (mm-coding-system-p cs) cs)))) | ||
| 720 | ;; ascii | ||
| 721 | ((eq charset 'us-ascii) | ||
| 722 | 'ascii) | ||
| 723 | ;; Check to see whether we can handle this charset. (This depends | ||
| 724 | ;; on there being some coding system matching each `mime-charset' | ||
| 725 | ;; property defined, as there should be.) | ||
| 726 | ((and (mm-coding-system-p charset) | ||
| 727 | ;;; Doing this would potentially weed out incorrect charsets. | ||
| 728 | ;;; charset | ||
| 729 | ;;; (eq charset (coding-system-get charset 'mime-charset)) | ||
| 730 | ) | ||
| 731 | charset) | ||
| 732 | ;; Eval expressions from `mm-charset-eval-alist' | ||
| 733 | ((let* ((el (assq charset mm-charset-eval-alist)) | ||
| 734 | (cs (car el)) | ||
| 735 | (form (cdr el))) | ||
| 736 | (and cs | ||
| 737 | form | ||
| 738 | (prog2 | ||
| 739 | ;; Avoid errors... | ||
| 740 | (condition-case nil (eval form) (error nil)) | ||
| 741 | ;; (message "Failed to eval `%s'" form)) | ||
| 742 | (mm-coding-system-p cs) | ||
| 743 | (message "Added charset `%s' via `mm-charset-eval-alist'" cs)) | ||
| 744 | cs))) | ||
| 745 | ;; Translate invalid charsets. | ||
| 746 | ((let ((cs (cdr (assq charset mm-charset-synonym-alist)))) | ||
| 747 | (and cs | ||
| 748 | (mm-coding-system-p cs) | ||
| 749 | ;; (message | ||
| 750 | ;; "Using synonym `%s' from `mm-charset-synonym-alist' for `%s'" | ||
| 751 | ;; cs charset) | ||
| 752 | cs))) | ||
| 753 | ;; Last resort: search the coding system list for entries which | ||
| 754 | ;; have the right mime-charset in case the canonical name isn't | ||
| 755 | ;; defined (though it should be). | ||
| 756 | ((let (cs) | ||
| 757 | ;; mm-get-coding-system-list returns a list of cs without lbt. | ||
| 758 | ;; Do we need -lbt? | ||
| 759 | (dolist (c (mm-get-coding-system-list)) | ||
| 760 | (if (and (null cs) | ||
| 761 | (eq charset (or (coding-system-get c :mime-charset) | ||
| 762 | (coding-system-get c 'mime-charset)))) | ||
| 763 | (setq cs c))) | ||
| 764 | (unless cs | ||
| 765 | ;; Warn the user about unknown charset: | ||
| 766 | (if (fboundp 'gnus-message) | ||
| 767 | (gnus-message 7 "Unknown charset: %s" charset) | ||
| 768 | (message "Unknown charset: %s" charset))) | ||
| 769 | cs)))) | ||
| 770 | |||
| 771 | (eval-and-compile | 794 | (eval-and-compile |
| 772 | (defvar mm-emacs-mule (and (not (featurep 'xemacs)) | 795 | (defvar mm-emacs-mule (and (not (featurep 'xemacs)) |
| 773 | (boundp 'default-enable-multibyte-characters) | 796 | (boundp 'default-enable-multibyte-characters) |