aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPhilipp Stephani2020-05-23 13:56:09 +0200
committerPhilipp Stephani2020-05-23 14:01:17 +0200
commitf8581bcf6a1942ebd331cae20e32945a3a86a3d1 (patch)
tree3f1a946cba9cdac5b03f09ec1b01be962256295f
parent232bb691c1095574b85b358c7f33a46d2ea79f29 (diff)
downloademacs-f8581bcf6a1942ebd331cae20e32945a3a86a3d1.tar.gz
emacs-f8581bcf6a1942ebd331cae20e32945a3a86a3d1.zip
Reject invalid characters in XML strings (Bug#41094).
* lisp/xml.el (xml-escape-string): Search for invalid characters. (xml-invalid-character): New error symbol. * test/lisp/xml-tests.el (xml-print-invalid-cdata): New unit test. * etc/NEWS: Document new behavior.
-rw-r--r--etc/NEWS7
-rw-r--r--lisp/xml.el13
-rw-r--r--test/lisp/xml-tests.el10
3 files changed, 29 insertions, 1 deletions
diff --git a/etc/NEWS b/etc/NEWS
index 32b59cb76fc..efad273da6c 100644
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -393,6 +393,13 @@ component are now rejected by 'json-read' and friends. This makes
393them more compliant with the JSON specification and consistent with 393them more compliant with the JSON specification and consistent with
394the native JSON parsing functions. 394the native JSON parsing functions.
395 395
396** xml.el
397
398*** XML serialization functions now reject invalid characters.
399Previously 'xml-print' would produce invalid XML when given a string
400with characters that are not valid in XML (see
401https://www.w3.org/TR/xml/#charsets). Now it rejects such strings.
402
396 403
397* New Modes and Packages in Emacs 28.1 404* New Modes and Packages in Emacs 28.1
398 405
diff --git a/lisp/xml.el b/lisp/xml.el
index dc774a202cf..767cf042846 100644
--- a/lisp/xml.el
+++ b/lisp/xml.el
@@ -1023,9 +1023,17 @@ entity references (e.g., replace each & with &).
1023XML character data must not contain & or < characters, nor the > 1023XML character data must not contain & or < characters, nor the >
1024character under some circumstances. The XML spec does not impose 1024character under some circumstances. The XML spec does not impose
1025restriction on \" or \\=', but we just substitute for these too 1025restriction on \" or \\=', but we just substitute for these too
1026\(as is permitted by the spec)." 1026\(as is permitted by the spec).
1027
1028If STRING contains characters that are invalid in XML (as defined
1029by https://www.w3.org/TR/xml/#charsets), signal an error of type
1030`xml-invalid-character'."
1027 (with-temp-buffer 1031 (with-temp-buffer
1028 (insert string) 1032 (insert string)
1033 (goto-char (point-min))
1034 (when (re-search-forward
1035 "[^\u0009\u000A\u000D\u0020-\uD7FF\uE000-\uFFFD\U00010000-\U0010FFFF]")
1036 (signal 'xml-invalid-character (list (char-before) (match-beginning 0))))
1029 (dolist (substitution '(("&" . "&amp;") 1037 (dolist (substitution '(("&" . "&amp;")
1030 ("<" . "&lt;") 1038 ("<" . "&lt;")
1031 (">" . "&gt;") 1039 (">" . "&gt;")
@@ -1036,6 +1044,9 @@ restriction on \" or \\=', but we just substitute for these too
1036 (replace-match (cdr substitution) t t nil))) 1044 (replace-match (cdr substitution) t t nil)))
1037 (buffer-string))) 1045 (buffer-string)))
1038 1046
1047(define-error 'xml-invalid-character "Invalid XML character"
1048 'wrong-type-argument)
1049
1039(defun xml-debug-print-internal (xml indent-string) 1050(defun xml-debug-print-internal (xml indent-string)
1040 "Outputs the XML tree in the current buffer. 1051 "Outputs the XML tree in the current buffer.
1041The first line is indented with INDENT-STRING." 1052The first line is indented with INDENT-STRING."
diff --git a/test/lisp/xml-tests.el b/test/lisp/xml-tests.el
index 57e685cd347..72c78d00e3e 100644
--- a/test/lisp/xml-tests.el
+++ b/test/lisp/xml-tests.el
@@ -164,6 +164,16 @@ Parser is called with and without 'symbol-qnames argument.")
164 (should (equal (cdr xml-parse-test--namespace-attribute-qnames) 164 (should (equal (cdr xml-parse-test--namespace-attribute-qnames)
165 (xml-parse-region nil nil nil nil 'symbol-qnames))))) 165 (xml-parse-region nil nil nil nil 'symbol-qnames)))))
166 166
167(ert-deftest xml-print-invalid-cdata ()
168 "Check that Bug#41094 is fixed."
169 (with-temp-buffer
170 (should (equal (should-error (xml-print '((foo () "\0")))
171 :type 'xml-invalid-character)
172 '(xml-invalid-character 0 1)))
173 (should (equal (should-error (xml-print '((foo () "\u00FF \xFF")))
174 :type 'xml-invalid-character)
175 '(xml-invalid-character #x3FFFFF 3)))))
176
167;; Local Variables: 177;; Local Variables:
168;; no-byte-compile: t 178;; no-byte-compile: t
169;; End: 179;; End: