diff options
| author | Jason Rumney | 2008-02-18 01:45:54 +0000 |
|---|---|---|
| committer | Jason Rumney | 2008-02-18 01:45:54 +0000 |
| commit | c657861758d1fd5b70dac9869336d33f9b36a609 (patch) | |
| tree | e5e08521934122eea0af9fd1f04edac9aeb65312 | |
| parent | a70f5385f7ec2839d876af8e960839307f325d0a (diff) | |
| download | emacs-c657861758d1fd5b70dac9869336d33f9b36a609.tar.gz emacs-c657861758d1fd5b70dac9869336d33f9b36a609.zip | |
* international/mule.el (sgml-xml-auto-coding-function): Detect
and warn if file encoding is not utf-8 and encoding not specified.
(xml-find-file-coding-system): New function.
* international/mule-conf.el (file-coding-system-alist): Use it.
| -rw-r--r-- | lisp/ChangeLog | 7 | ||||
| -rw-r--r-- | lisp/international/mule-conf.el | 6 | ||||
| -rw-r--r-- | lisp/international/mule.el | 41 |
3 files changed, 48 insertions, 6 deletions
diff --git a/lisp/ChangeLog b/lisp/ChangeLog index 7a236b3f620..fc62689b43a 100644 --- a/lisp/ChangeLog +++ b/lisp/ChangeLog | |||
| @@ -1,3 +1,10 @@ | |||
| 1 | 2008-02-18 Jason Rumney <jasonr@gnu.org> | ||
| 2 | |||
| 3 | * international/mule.el (sgml-xml-auto-coding-function): Detect | ||
| 4 | and warn if file encoding is not utf-8 and encoding not specified. | ||
| 5 | (xml-find-file-coding-system): New function. | ||
| 6 | * international/mule-conf.el (file-coding-system-alist): Use it. | ||
| 7 | |||
| 1 | 2008-02-17 Glenn Morris <rgm@gnu.org> | 8 | 2008-02-17 Glenn Morris <rgm@gnu.org> |
| 2 | 9 | ||
| 3 | * international/mule-cmds.el (set-locale-environment): Pass | 10 | * international/mule-cmds.el (set-locale-environment): Pass |
diff --git a/lisp/international/mule-conf.el b/lisp/international/mule-conf.el index 1184612ed40..cd3b5a352fd 100644 --- a/lisp/international/mule-conf.el +++ b/lisp/international/mule-conf.el | |||
| @@ -1470,11 +1470,7 @@ for decoding and encoding files, process I/O, etc." | |||
| 1470 | (setq file-coding-system-alist | 1470 | (setq file-coding-system-alist |
| 1471 | '(("\\.elc\\'" . utf-8-emacs) | 1471 | '(("\\.elc\\'" . utf-8-emacs) |
| 1472 | ("\\.utf\\(-8\\)?\\'" . utf-8) | 1472 | ("\\.utf\\(-8\\)?\\'" . utf-8) |
| 1473 | ;; This is the defined default for XML documents. It may be | 1473 | ("\\.xml\\'" . xml-find-file-coding-system) |
| 1474 | ;; overridden by a charset specification in the header. That | ||
| 1475 | ;; should be grokked by the auto-coding mechanism, but rms | ||
| 1476 | ;; vetoed that. -- fx | ||
| 1477 | ("\\.xml\\'" . utf-8) | ||
| 1478 | ;; We use raw-text for reading loaddefs.el so that if it | 1474 | ;; We use raw-text for reading loaddefs.el so that if it |
| 1479 | ;; happens to have DOS or Mac EOLs, they are converted to | 1475 | ;; happens to have DOS or Mac EOLs, they are converted to |
| 1480 | ;; newlines. This is required to make the special treatment | 1476 | ;; newlines. This is required to make the special treatment |
diff --git a/lisp/international/mule.el b/lisp/international/mule.el index c1723523b28..7952c7a6878 100644 --- a/lisp/international/mule.el +++ b/lisp/international/mule.el | |||
| @@ -2288,7 +2288,22 @@ This function is intended to be added to `auto-coding-functions'." | |||
| 2288 | sym | 2288 | sym |
| 2289 | (message "Warning: unknown coding system \"%s\"" match) | 2289 | (message "Warning: unknown coding system \"%s\"" match) |
| 2290 | nil)) | 2290 | nil)) |
| 2291 | 'utf-8))))) | 2291 | ;; Files without an encoding tag should be UTF-8. But users |
| 2292 | ;; may be naive about encodings, and have saved the file from | ||
| 2293 | ;; another editor that does not help them get the encoding right. | ||
| 2294 | ;; Detect the encoding and warn the user if it is detected as | ||
| 2295 | ;; something other than UTF-8. | ||
| 2296 | (let ((detected | ||
| 2297 | (with-coding-priority '(utf-8) | ||
| 2298 | (coding-system-base | ||
| 2299 | (detect-coding-region (point-min) size t))))) | ||
| 2300 | ;; Pure ASCII always comes back as undecided. | ||
| 2301 | (if (memq detected '(utf-8 undecided)) | ||
| 2302 | 'utf-8 | ||
| 2303 | (warn "File contents detected as %s. | ||
| 2304 | Consider adding an encoding attribute to the xml declaration, | ||
| 2305 | or saving as utf-8, as mandated by the xml specification." detected) | ||
| 2306 | detected))))))) | ||
| 2292 | 2307 | ||
| 2293 | (defun sgml-html-meta-auto-coding-function (size) | 2308 | (defun sgml-html-meta-auto-coding-function (size) |
| 2294 | "If the buffer has an HTML meta tag, use it to determine encoding. | 2309 | "If the buffer has an HTML meta tag, use it to determine encoding. |
| @@ -2314,6 +2329,30 @@ This function is intended to be added to `auto-coding-functions'." | |||
| 2314 | (message "Warning: unknown coding system \"%s\"" match) | 2329 | (message "Warning: unknown coding system \"%s\"" match) |
| 2315 | nil))))) | 2330 | nil))))) |
| 2316 | 2331 | ||
| 2332 | (defun xml-find-file-coding-system (args) | ||
| 2333 | "Determine the coding system of an XML file without a declaration. | ||
| 2334 | Strictly speaking, the file should be utf-8, but mistakes are | ||
| 2335 | made, and there are genuine cases where XML fragments are saved, | ||
| 2336 | with the encoding properly specified in a master document, or | ||
| 2337 | added by processing software." | ||
| 2338 | (if (eq (car args) 'insert-file-contents) | ||
| 2339 | (let ((detected | ||
| 2340 | (with-coding-priority '(utf-8) | ||
| 2341 | (coding-system-base | ||
| 2342 | (detect-coding-region (point-min) (point-max) t))))) | ||
| 2343 | ;; Pure ASCII always comes back as undecided. | ||
| 2344 | (if (memq detected '(utf-8 undecided)) | ||
| 2345 | 'utf-8 | ||
| 2346 | (warn "File contents detected as %s. | ||
| 2347 | Consider adding an xml declaration with the encoding specified, | ||
| 2348 | or saving as utf-8, as mandated by the xml specification." detected) | ||
| 2349 | detected)) | ||
| 2350 | ;; Don't interfere with the user's wishes for saving the buffer. | ||
| 2351 | ;; We did what we could when the buffer was created to ensure the | ||
| 2352 | ;; correct encoding was used, or the user was warned, so any | ||
| 2353 | ;; non-conformity here is deliberate on the part of the user. | ||
| 2354 | 'undecided)) | ||
| 2355 | |||
| 2317 | ;;; | 2356 | ;;; |
| 2318 | (provide 'mule) | 2357 | (provide 'mule) |
| 2319 | 2358 | ||