aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEli Zaretskii2017-12-15 11:06:07 +0200
committerEli Zaretskii2017-12-15 11:06:07 +0200
commit889f07c352f7e0deccf59353a60a45f2716551d8 (patch)
tree91d6574814bd5359b7498f747389f6ab00763536
parenta2697fac0ec0d4dd915b619bb76792121514acfa (diff)
downloademacs-889f07c352f7e0deccf59353a60a45f2716551d8.tar.gz
emacs-889f07c352f7e0deccf59353a60a45f2716551d8.zip
Better support utf-8-with-signature and utf-8-hfs in XML/HTML
* lisp/international/mule.el (sgml-xml-auto-coding-function): Support UTF-8 with BOM and utf-8-hfs as variants of UTF-8, and obey the buffer's encoding if it is one of these variants, instead of re-encoding in UTF-8 proper. (Bug#20623)
-rw-r--r--lisp/international/mule.el15
1 files changed, 13 insertions, 2 deletions
diff --git a/lisp/international/mule.el b/lisp/international/mule.el
index 857fa800eb4..81c04db90e9 100644
--- a/lisp/international/mule.el
+++ b/lisp/international/mule.el
@@ -2493,7 +2493,17 @@ This function is intended to be added to `auto-coding-functions'."
2493 (let* ((match (match-string 1)) 2493 (let* ((match (match-string 1))
2494 (sym (intern (downcase match)))) 2494 (sym (intern (downcase match))))
2495 (if (coding-system-p sym) 2495 (if (coding-system-p sym)
2496 sym 2496 ;; If the encoding tag is UTF-8 and the buffer's
2497 ;; encoding is one of the variants of UTF-8, use the
2498 ;; buffer's encoding. This allows, e.g., saving an
2499 ;; XML file as UTF-8 with BOM when the tag says UTF-8.
2500 (let ((sym-type (coding-system-type sym))
2501 (bfcs-type
2502 (coding-system-type buffer-file-coding-system)))
2503 (if (and (coding-system-equal 'utf-8 sym-type)
2504 (coding-system-equal 'utf-8 bfcs-type))
2505 buffer-file-coding-system
2506 sym))
2497 (message "Warning: unknown coding system \"%s\"" match) 2507 (message "Warning: unknown coding system \"%s\"" match)
2498 nil)) 2508 nil))
2499 ;; Files without an encoding tag should be UTF-8. But users 2509 ;; Files without an encoding tag should be UTF-8. But users
@@ -2506,7 +2516,8 @@ This function is intended to be added to `auto-coding-functions'."
2506 (coding-system-base 2516 (coding-system-base
2507 (detect-coding-region (point-min) size t))))) 2517 (detect-coding-region (point-min) size t)))))
2508 ;; Pure ASCII always comes back as undecided. 2518 ;; Pure ASCII always comes back as undecided.
2509 (if (memq detected '(utf-8 undecided)) 2519 (if (memq detected
2520 '(utf-8 'utf-8-with-signature 'utf-8-hfs undecided))
2510 'utf-8 2521 'utf-8
2511 (warn "File contents detected as %s. 2522 (warn "File contents detected as %s.
2512 Consider adding an encoding attribute to the xml declaration, 2523 Consider adding an encoding attribute to the xml declaration,