diff options
| author | Katsumi Yamaoka | 2017-02-17 09:52:09 +0000 |
|---|---|---|
| committer | Katsumi Yamaoka | 2017-02-17 09:52:09 +0000 |
| commit | 79f017d5c3019f8bc2a5014beda28bb3b829a8e3 (patch) | |
| tree | 68602a635d4213a17e846db41b3f96075e2c9421 | |
| parent | 78f869687e86d4a9f91003dbbbbacde2e2741487 (diff) | |
| download | emacs-79f017d5c3019f8bc2a5014beda28bb3b829a8e3.tar.gz emacs-79f017d5c3019f8bc2a5014beda28bb3b829a8e3.zip | |
mm-shr: Prefer charset specified in html meta tag
* lisp/gnus/mm-decode.el (mm-shr): Prefer charset specified in html
meta tag than mail-parse-charset in the case there is no charset spec
in MIME header.
| -rw-r--r-- | lisp/gnus/mm-decode.el | 68 |
1 files changed, 36 insertions, 32 deletions
diff --git a/lisp/gnus/mm-decode.el b/lisp/gnus/mm-decode.el index 989d4b8ea17..6b539399596 100644 --- a/lisp/gnus/mm-decode.el +++ b/lisp/gnus/mm-decode.el | |||
| @@ -1793,40 +1793,44 @@ If RECURSIVE, search recursively." | |||
| 1793 | (buffer-string)))))) | 1793 | (buffer-string)))))) |
| 1794 | (shr-inhibit-images mm-html-inhibit-images) | 1794 | (shr-inhibit-images mm-html-inhibit-images) |
| 1795 | (shr-blocked-images mm-html-blocked-images) | 1795 | (shr-blocked-images mm-html-blocked-images) |
| 1796 | charset coding char) | 1796 | charset coding char document) |
| 1797 | (unless handle | 1797 | (mm-with-part (or handle (setq handle (mm-dissect-buffer t))) |
| 1798 | (setq handle (mm-dissect-buffer t))) | 1798 | (setq case-fold-search t) |
| 1799 | (and (setq charset | 1799 | (setq charset |
| 1800 | (or (mail-content-type-get (mm-handle-type handle) 'charset) | 1800 | (or (mail-content-type-get (mm-handle-type handle) 'charset) |
| 1801 | mail-parse-charset)) | 1801 | (progn |
| 1802 | (setq coding (mm-charset-to-coding-system charset nil t)) | 1802 | (goto-char (point-min)) |
| 1803 | (eq coding 'ascii) | 1803 | (and (re-search-forward "\ |
| 1804 | (setq coding nil)) | 1804 | <meta\\s-+http-equiv=[\"']?content-type[\"']?\\s-+content=[\"']?\ |
| 1805 | text/\\(\\sw+\\)\\(?:;\\s-*charset=\\([^\t\n\r \"'>]+\\)\\)?[^>]*>" nil t) | ||
| 1806 | (setq coding | ||
| 1807 | (mm-charset-to-coding-system (match-string 2) | ||
| 1808 | nil t)) | ||
| 1809 | (string-match "\\`html\\'" (match-string 1)))) | ||
| 1810 | mail-parse-charset)) | ||
| 1811 | (when (or coding | ||
| 1812 | (setq coding (mm-charset-to-coding-system charset nil t))) | ||
| 1813 | (insert (prog1 | ||
| 1814 | (decode-coding-string (buffer-string) coding) | ||
| 1815 | (erase-buffer) | ||
| 1816 | (set-buffer-multibyte t)))) | ||
| 1817 | (goto-char (point-min)) | ||
| 1818 | (while (re-search-forward | ||
| 1819 | "&#\\(?:x\\([89][0-9a-f]\\)\\|\\(1[2-5][0-9]\\)\\);" nil t) | ||
| 1820 | (when (setq char | ||
| 1821 | (cdr (assq (if (match-beginning 1) | ||
| 1822 | (string-to-number (match-string 1) 16) | ||
| 1823 | (string-to-number (match-string 2))) | ||
| 1824 | mm-extra-numeric-entities))) | ||
| 1825 | (replace-match (char-to-string char)))) | ||
| 1826 | ;; Remove "soft hyphens". | ||
| 1827 | (goto-char (point-min)) | ||
| 1828 | (while (search-forward "" nil t) | ||
| 1829 | (replace-match "" t t)) | ||
| 1830 | (setq document (libxml-parse-html-region (point-min) (point-max)))) | ||
| 1805 | (save-restriction | 1831 | (save-restriction |
| 1806 | (narrow-to-region (point) (point)) | 1832 | (narrow-to-region (point) (point)) |
| 1807 | (shr-insert-document | 1833 | (shr-insert-document document) |
| 1808 | (mm-with-part handle | ||
| 1809 | (insert (prog1 | ||
| 1810 | (if coding | ||
| 1811 | (decode-coding-string (buffer-string) coding) | ||
| 1812 | (buffer-string)) | ||
| 1813 | (erase-buffer) | ||
| 1814 | (mm-enable-multibyte))) | ||
| 1815 | (goto-char (point-min)) | ||
| 1816 | (setq case-fold-search t) | ||
| 1817 | (while (re-search-forward | ||
| 1818 | "&#\\(?:x\\([89][0-9a-f]\\)\\|\\(1[2-5][0-9]\\)\\);" nil t) | ||
| 1819 | (when (setq char | ||
| 1820 | (cdr (assq (if (match-beginning 1) | ||
| 1821 | (string-to-number (match-string 1) 16) | ||
| 1822 | (string-to-number (match-string 2))) | ||
| 1823 | mm-extra-numeric-entities))) | ||
| 1824 | (replace-match (char-to-string char)))) | ||
| 1825 | ;; Remove "soft hyphens". | ||
| 1826 | (goto-char (point-min)) | ||
| 1827 | (while (search-forward "" nil t) | ||
| 1828 | (replace-match "" t t)) | ||
| 1829 | (libxml-parse-html-region (point-min) (point-max)))) | ||
| 1830 | (unless (bobp) | 1834 | (unless (bobp) |
| 1831 | (insert "\n")) | 1835 | (insert "\n")) |
| 1832 | (mm-convert-shr-links) | 1836 | (mm-convert-shr-links) |