diff options
| author | Philipp Stephani | 2020-05-23 13:56:09 +0200 |
|---|---|---|
| committer | Philipp Stephani | 2020-05-23 14:01:17 +0200 |
| commit | f8581bcf6a1942ebd331cae20e32945a3a86a3d1 (patch) | |
| tree | 3f1a946cba9cdac5b03f09ec1b01be962256295f | |
| parent | 232bb691c1095574b85b358c7f33a46d2ea79f29 (diff) | |
| download | emacs-f8581bcf6a1942ebd331cae20e32945a3a86a3d1.tar.gz emacs-f8581bcf6a1942ebd331cae20e32945a3a86a3d1.zip | |
Reject invalid characters in XML strings (Bug#41094).
* lisp/xml.el (xml-escape-string): Search for invalid characters.
(xml-invalid-character): New error symbol.
* test/lisp/xml-tests.el (xml-print-invalid-cdata): New unit test.
* etc/NEWS: Document new behavior.
| -rw-r--r-- | etc/NEWS | 7 | ||||
| -rw-r--r-- | lisp/xml.el | 13 | ||||
| -rw-r--r-- | test/lisp/xml-tests.el | 10 |
3 files changed, 29 insertions, 1 deletions
| @@ -393,6 +393,13 @@ component are now rejected by 'json-read' and friends. This makes | |||
| 393 | them more compliant with the JSON specification and consistent with | 393 | them more compliant with the JSON specification and consistent with |
| 394 | the native JSON parsing functions. | 394 | the native JSON parsing functions. |
| 395 | 395 | ||
| 396 | ** xml.el | ||
| 397 | |||
| 398 | *** XML serialization functions now reject invalid characters. | ||
| 399 | Previously 'xml-print' would produce invalid XML when given a string | ||
| 400 | with characters that are not valid in XML (see | ||
| 401 | https://www.w3.org/TR/xml/#charsets). Now it rejects such strings. | ||
| 402 | |||
| 396 | 403 | ||
| 397 | * New Modes and Packages in Emacs 28.1 | 404 | * New Modes and Packages in Emacs 28.1 |
| 398 | 405 | ||
diff --git a/lisp/xml.el b/lisp/xml.el index dc774a202cf..767cf042846 100644 --- a/lisp/xml.el +++ b/lisp/xml.el | |||
| @@ -1023,9 +1023,17 @@ entity references (e.g., replace each & with &). | |||
| 1023 | XML character data must not contain & or < characters, nor the > | 1023 | XML character data must not contain & or < characters, nor the > |
| 1024 | character under some circumstances. The XML spec does not impose | 1024 | character under some circumstances. The XML spec does not impose |
| 1025 | restriction on \" or \\=', but we just substitute for these too | 1025 | restriction on \" or \\=', but we just substitute for these too |
| 1026 | \(as is permitted by the spec)." | 1026 | \(as is permitted by the spec). |
| 1027 | |||
| 1028 | If STRING contains characters that are invalid in XML (as defined | ||
| 1029 | by https://www.w3.org/TR/xml/#charsets), signal an error of type | ||
| 1030 | `xml-invalid-character'." | ||
| 1027 | (with-temp-buffer | 1031 | (with-temp-buffer |
| 1028 | (insert string) | 1032 | (insert string) |
| 1033 | (goto-char (point-min)) | ||
| 1034 | (when (re-search-forward | ||
| 1035 | "[^\u0009\u000A\u000D\u0020-\uD7FF\uE000-\uFFFD\U00010000-\U0010FFFF]") | ||
| 1036 | (signal 'xml-invalid-character (list (char-before) (match-beginning 0)))) | ||
| 1029 | (dolist (substitution '(("&" . "&") | 1037 | (dolist (substitution '(("&" . "&") |
| 1030 | ("<" . "<") | 1038 | ("<" . "<") |
| 1031 | (">" . ">") | 1039 | (">" . ">") |
| @@ -1036,6 +1044,9 @@ restriction on \" or \\=', but we just substitute for these too | |||
| 1036 | (replace-match (cdr substitution) t t nil))) | 1044 | (replace-match (cdr substitution) t t nil))) |
| 1037 | (buffer-string))) | 1045 | (buffer-string))) |
| 1038 | 1046 | ||
| 1047 | (define-error 'xml-invalid-character "Invalid XML character" | ||
| 1048 | 'wrong-type-argument) | ||
| 1049 | |||
| 1039 | (defun xml-debug-print-internal (xml indent-string) | 1050 | (defun xml-debug-print-internal (xml indent-string) |
| 1040 | "Outputs the XML tree in the current buffer. | 1051 | "Outputs the XML tree in the current buffer. |
| 1041 | The first line is indented with INDENT-STRING." | 1052 | The first line is indented with INDENT-STRING." |
diff --git a/test/lisp/xml-tests.el b/test/lisp/xml-tests.el index 57e685cd347..72c78d00e3e 100644 --- a/test/lisp/xml-tests.el +++ b/test/lisp/xml-tests.el | |||
| @@ -164,6 +164,16 @@ Parser is called with and without 'symbol-qnames argument.") | |||
| 164 | (should (equal (cdr xml-parse-test--namespace-attribute-qnames) | 164 | (should (equal (cdr xml-parse-test--namespace-attribute-qnames) |
| 165 | (xml-parse-region nil nil nil nil 'symbol-qnames))))) | 165 | (xml-parse-region nil nil nil nil 'symbol-qnames))))) |
| 166 | 166 | ||
| 167 | (ert-deftest xml-print-invalid-cdata () | ||
| 168 | "Check that Bug#41094 is fixed." | ||
| 169 | (with-temp-buffer | ||
| 170 | (should (equal (should-error (xml-print '((foo () "\0"))) | ||
| 171 | :type 'xml-invalid-character) | ||
| 172 | '(xml-invalid-character 0 1))) | ||
| 173 | (should (equal (should-error (xml-print '((foo () "\u00FF \xFF"))) | ||
| 174 | :type 'xml-invalid-character) | ||
| 175 | '(xml-invalid-character #x3FFFFF 3))))) | ||
| 176 | |||
| 167 | ;; Local Variables: | 177 | ;; Local Variables: |
| 168 | ;; no-byte-compile: t | 178 | ;; no-byte-compile: t |
| 169 | ;; End: | 179 | ;; End: |