aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--etc/NEWS7
-rw-r--r--lisp/xml.el13
-rw-r--r--test/lisp/xml-tests.el10
3 files changed, 29 insertions, 1 deletions
diff --git a/etc/NEWS b/etc/NEWS
index 32b59cb76fc..efad273da6c 100644
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -393,6 +393,13 @@ component are now rejected by 'json-read' and friends. This makes
393them more compliant with the JSON specification and consistent with 393them more compliant with the JSON specification and consistent with
394the native JSON parsing functions. 394the native JSON parsing functions.
395 395
396** xml.el
397
398*** XML serialization functions now reject invalid characters.
399Previously 'xml-print' would produce invalid XML when given a string
400with characters that are not valid in XML (see
401https://www.w3.org/TR/xml/#charsets). Now it rejects such strings.
402
396 403
397* New Modes and Packages in Emacs 28.1 404* New Modes and Packages in Emacs 28.1
398 405
diff --git a/lisp/xml.el b/lisp/xml.el
index dc774a202cf..767cf042846 100644
--- a/lisp/xml.el
+++ b/lisp/xml.el
@@ -1023,9 +1023,17 @@ entity references (e.g., replace each & with &).
1023XML character data must not contain & or < characters, nor the > 1023XML character data must not contain & or < characters, nor the >
1024character under some circumstances. The XML spec does not impose 1024character under some circumstances. The XML spec does not impose
1025restriction on \" or \\=', but we just substitute for these too 1025restriction on \" or \\=', but we just substitute for these too
1026\(as is permitted by the spec)." 1026\(as is permitted by the spec).
1027
1028If STRING contains characters that are invalid in XML (as defined
1029by https://www.w3.org/TR/xml/#charsets), signal an error of type
1030`xml-invalid-character'."
1027 (with-temp-buffer 1031 (with-temp-buffer
1028 (insert string) 1032 (insert string)
1033 (goto-char (point-min))
1034 (when (re-search-forward
1035 "[^\u0009\u000A\u000D\u0020-\uD7FF\uE000-\uFFFD\U00010000-\U0010FFFF]")
1036 (signal 'xml-invalid-character (list (char-before) (match-beginning 0))))
1029 (dolist (substitution '(("&" . "&amp;") 1037 (dolist (substitution '(("&" . "&amp;")
1030 ("<" . "&lt;") 1038 ("<" . "&lt;")
1031 (">" . "&gt;") 1039 (">" . "&gt;")
@@ -1036,6 +1044,9 @@ restriction on \" or \\=', but we just substitute for these too
1036 (replace-match (cdr substitution) t t nil))) 1044 (replace-match (cdr substitution) t t nil)))
1037 (buffer-string))) 1045 (buffer-string)))
1038 1046
1047(define-error 'xml-invalid-character "Invalid XML character"
1048 'wrong-type-argument)
1049
1039(defun xml-debug-print-internal (xml indent-string) 1050(defun xml-debug-print-internal (xml indent-string)
1040 "Outputs the XML tree in the current buffer. 1051 "Outputs the XML tree in the current buffer.
1041The first line is indented with INDENT-STRING." 1052The first line is indented with INDENT-STRING."
diff --git a/test/lisp/xml-tests.el b/test/lisp/xml-tests.el
index 57e685cd347..72c78d00e3e 100644
--- a/test/lisp/xml-tests.el
+++ b/test/lisp/xml-tests.el
@@ -164,6 +164,16 @@ Parser is called with and without 'symbol-qnames argument.")
164 (should (equal (cdr xml-parse-test--namespace-attribute-qnames) 164 (should (equal (cdr xml-parse-test--namespace-attribute-qnames)
165 (xml-parse-region nil nil nil nil 'symbol-qnames))))) 165 (xml-parse-region nil nil nil nil 'symbol-qnames)))))
166 166
167(ert-deftest xml-print-invalid-cdata ()
168 "Check that Bug#41094 is fixed."
169 (with-temp-buffer
170 (should (equal (should-error (xml-print '((foo () "\0")))
171 :type 'xml-invalid-character)
172 '(xml-invalid-character 0 1)))
173 (should (equal (should-error (xml-print '((foo () "\u00FF \xFF")))
174 :type 'xml-invalid-character)
175 '(xml-invalid-character #x3FFFFF 3)))))
176
167;; Local Variables: 177;; Local Variables:
168;; no-byte-compile: t 178;; no-byte-compile: t
169;; End: 179;; End: