aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason Rumney2008-02-18 01:45:54 +0000
committerJason Rumney2008-02-18 01:45:54 +0000
commitc657861758d1fd5b70dac9869336d33f9b36a609 (patch)
treee5e08521934122eea0af9fd1f04edac9aeb65312
parenta70f5385f7ec2839d876af8e960839307f325d0a (diff)
downloademacs-c657861758d1fd5b70dac9869336d33f9b36a609.tar.gz
emacs-c657861758d1fd5b70dac9869336d33f9b36a609.zip
* international/mule.el (sgml-xml-auto-coding-function): Detect
and warn if file encoding is not utf-8 and encoding not specified. (xml-find-file-coding-system): New function. * international/mule-conf.el (file-coding-system-alist): Use it.
-rw-r--r--lisp/ChangeLog7
-rw-r--r--lisp/international/mule-conf.el6
-rw-r--r--lisp/international/mule.el41
3 files changed, 48 insertions, 6 deletions
diff --git a/lisp/ChangeLog b/lisp/ChangeLog
index 7a236b3f620..fc62689b43a 100644
--- a/lisp/ChangeLog
+++ b/lisp/ChangeLog
@@ -1,3 +1,10 @@
12008-02-18 Jason Rumney <jasonr@gnu.org>
2
3 * international/mule.el (sgml-xml-auto-coding-function): Detect
4 and warn if file encoding is not utf-8 and encoding not specified.
5 (xml-find-file-coding-system): New function.
6 * international/mule-conf.el (file-coding-system-alist): Use it.
7
12008-02-17 Glenn Morris <rgm@gnu.org> 82008-02-17 Glenn Morris <rgm@gnu.org>
2 9
3 * international/mule-cmds.el (set-locale-environment): Pass 10 * international/mule-cmds.el (set-locale-environment): Pass
diff --git a/lisp/international/mule-conf.el b/lisp/international/mule-conf.el
index 1184612ed40..cd3b5a352fd 100644
--- a/lisp/international/mule-conf.el
+++ b/lisp/international/mule-conf.el
@@ -1470,11 +1470,7 @@ for decoding and encoding files, process I/O, etc."
1470(setq file-coding-system-alist 1470(setq file-coding-system-alist
1471 '(("\\.elc\\'" . utf-8-emacs) 1471 '(("\\.elc\\'" . utf-8-emacs)
1472 ("\\.utf\\(-8\\)?\\'" . utf-8) 1472 ("\\.utf\\(-8\\)?\\'" . utf-8)
1473 ;; This is the defined default for XML documents. It may be 1473 ("\\.xml\\'" . xml-find-file-coding-system)
1474 ;; overridden by a charset specification in the header. That
1475 ;; should be grokked by the auto-coding mechanism, but rms
1476 ;; vetoed that. -- fx
1477 ("\\.xml\\'" . utf-8)
1478 ;; We use raw-text for reading loaddefs.el so that if it 1474 ;; We use raw-text for reading loaddefs.el so that if it
1479 ;; happens to have DOS or Mac EOLs, they are converted to 1475 ;; happens to have DOS or Mac EOLs, they are converted to
1480 ;; newlines. This is required to make the special treatment 1476 ;; newlines. This is required to make the special treatment
diff --git a/lisp/international/mule.el b/lisp/international/mule.el
index c1723523b28..7952c7a6878 100644
--- a/lisp/international/mule.el
+++ b/lisp/international/mule.el
@@ -2288,7 +2288,22 @@ This function is intended to be added to `auto-coding-functions'."
2288 sym 2288 sym
2289 (message "Warning: unknown coding system \"%s\"" match) 2289 (message "Warning: unknown coding system \"%s\"" match)
2290 nil)) 2290 nil))
2291 'utf-8))))) 2291 ;; Files without an encoding tag should be UTF-8. But users
2292 ;; may be naive about encodings, and have saved the file from
2293 ;; another editor that does not help them get the encoding right.
2294 ;; Detect the encoding and warn the user if it is detected as
2295 ;; something other than UTF-8.
2296 (let ((detected
2297 (with-coding-priority '(utf-8)
2298 (coding-system-base
2299 (detect-coding-region (point-min) size t)))))
2300 ;; Pure ASCII always comes back as undecided.
2301 (if (memq detected '(utf-8 undecided))
2302 'utf-8
2303 (warn "File contents detected as %s.
2304 Consider adding an encoding attribute to the xml declaration,
2305 or saving as utf-8, as mandated by the xml specification." detected)
2306 detected)))))))
2292 2307
2293(defun sgml-html-meta-auto-coding-function (size) 2308(defun sgml-html-meta-auto-coding-function (size)
2294 "If the buffer has an HTML meta tag, use it to determine encoding. 2309 "If the buffer has an HTML meta tag, use it to determine encoding.
@@ -2314,6 +2329,30 @@ This function is intended to be added to `auto-coding-functions'."
2314 (message "Warning: unknown coding system \"%s\"" match) 2329 (message "Warning: unknown coding system \"%s\"" match)
2315 nil))))) 2330 nil)))))
2316 2331
2332(defun xml-find-file-coding-system (args)
2333 "Determine the coding system of an XML file without a declaration.
2334Strictly speaking, the file should be utf-8, but mistakes are
2335made, and there are genuine cases where XML fragments are saved,
2336with the encoding properly specified in a master document, or
2337added by processing software."
2338 (if (eq (car args) 'insert-file-contents)
2339 (let ((detected
2340 (with-coding-priority '(utf-8)
2341 (coding-system-base
2342 (detect-coding-region (point-min) (point-max) t)))))
2343 ;; Pure ASCII always comes back as undecided.
2344 (if (memq detected '(utf-8 undecided))
2345 'utf-8
2346 (warn "File contents detected as %s.
2347 Consider adding an xml declaration with the encoding specified,
2348 or saving as utf-8, as mandated by the xml specification." detected)
2349 detected))
2350 ;; Don't interfere with the user's wishes for saving the buffer.
2351 ;; We did what we could when the buffer was created to ensure the
2352 ;; correct encoding was used, or the user was warned, so any
2353 ;; non-conformity here is deliberate on the part of the user.
2354 'undecided))
2355
2317;;; 2356;;;
2318(provide 'mule) 2357(provide 'mule)
2319 2358