aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKatsumi Yamaoka2017-02-17 09:52:09 +0000
committerKatsumi Yamaoka2017-02-17 09:52:09 +0000
commit79f017d5c3019f8bc2a5014beda28bb3b829a8e3 (patch)
tree68602a635d4213a17e846db41b3f96075e2c9421
parent78f869687e86d4a9f91003dbbbbacde2e2741487 (diff)
downloademacs-79f017d5c3019f8bc2a5014beda28bb3b829a8e3.tar.gz
emacs-79f017d5c3019f8bc2a5014beda28bb3b829a8e3.zip
mm-shr: Prefer charset specified in html meta tag
* lisp/gnus/mm-decode.el (mm-shr): Prefer charset specified in html meta tag than mail-parse-charset in the case there is no charset spec in MIME header.
-rw-r--r--lisp/gnus/mm-decode.el68
1 files changed, 36 insertions, 32 deletions
diff --git a/lisp/gnus/mm-decode.el b/lisp/gnus/mm-decode.el
index 989d4b8ea17..6b539399596 100644
--- a/lisp/gnus/mm-decode.el
+++ b/lisp/gnus/mm-decode.el
@@ -1793,40 +1793,44 @@ If RECURSIVE, search recursively."
1793 (buffer-string)))))) 1793 (buffer-string))))))
1794 (shr-inhibit-images mm-html-inhibit-images) 1794 (shr-inhibit-images mm-html-inhibit-images)
1795 (shr-blocked-images mm-html-blocked-images) 1795 (shr-blocked-images mm-html-blocked-images)
1796 charset coding char) 1796 charset coding char document)
1797 (unless handle 1797 (mm-with-part (or handle (setq handle (mm-dissect-buffer t)))
1798 (setq handle (mm-dissect-buffer t))) 1798 (setq case-fold-search t)
1799 (and (setq charset 1799 (setq charset
1800 (or (mail-content-type-get (mm-handle-type handle) 'charset) 1800 (or (mail-content-type-get (mm-handle-type handle) 'charset)
1801 mail-parse-charset)) 1801 (progn
1802 (setq coding (mm-charset-to-coding-system charset nil t)) 1802 (goto-char (point-min))
1803 (eq coding 'ascii) 1803 (and (re-search-forward "\
1804 (setq coding nil)) 1804<meta\\s-+http-equiv=[\"']?content-type[\"']?\\s-+content=[\"']?\
1805text/\\(\\sw+\\)\\(?:;\\s-*charset=\\([^\t\n\r \"'>]+\\)\\)?[^>]*>" nil t)
1806 (setq coding
1807 (mm-charset-to-coding-system (match-string 2)
1808 nil t))
1809 (string-match "\\`html\\'" (match-string 1))))
1810 mail-parse-charset))
1811 (when (or coding
1812 (setq coding (mm-charset-to-coding-system charset nil t)))
1813 (insert (prog1
1814 (decode-coding-string (buffer-string) coding)
1815 (erase-buffer)
1816 (set-buffer-multibyte t))))
1817 (goto-char (point-min))
1818 (while (re-search-forward
1819 "&#\\(?:x\\([89][0-9a-f]\\)\\|\\(1[2-5][0-9]\\)\\);" nil t)
1820 (when (setq char
1821 (cdr (assq (if (match-beginning 1)
1822 (string-to-number (match-string 1) 16)
1823 (string-to-number (match-string 2)))
1824 mm-extra-numeric-entities)))
1825 (replace-match (char-to-string char))))
1826 ;; Remove "soft hyphens".
1827 (goto-char (point-min))
1828 (while (search-forward "­" nil t)
1829 (replace-match "" t t))
1830 (setq document (libxml-parse-html-region (point-min) (point-max))))
1805 (save-restriction 1831 (save-restriction
1806 (narrow-to-region (point) (point)) 1832 (narrow-to-region (point) (point))
1807 (shr-insert-document 1833 (shr-insert-document document)
1808 (mm-with-part handle
1809 (insert (prog1
1810 (if coding
1811 (decode-coding-string (buffer-string) coding)
1812 (buffer-string))
1813 (erase-buffer)
1814 (mm-enable-multibyte)))
1815 (goto-char (point-min))
1816 (setq case-fold-search t)
1817 (while (re-search-forward
1818 "&#\\(?:x\\([89][0-9a-f]\\)\\|\\(1[2-5][0-9]\\)\\);" nil t)
1819 (when (setq char
1820 (cdr (assq (if (match-beginning 1)
1821 (string-to-number (match-string 1) 16)
1822 (string-to-number (match-string 2)))
1823 mm-extra-numeric-entities)))
1824 (replace-match (char-to-string char))))
1825 ;; Remove "soft hyphens".
1826 (goto-char (point-min))
1827 (while (search-forward "­" nil t)
1828 (replace-match "" t t))
1829 (libxml-parse-html-region (point-min) (point-max))))
1830 (unless (bobp) 1834 (unless (bobp)
1831 (insert "\n")) 1835 (insert "\n"))
1832 (mm-convert-shr-links) 1836 (mm-convert-shr-links)