aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChong Yidong2012-07-01 15:17:05 +0800
committerChong Yidong2012-07-01 15:17:05 +0800
commitfbf2e7ad3bd676083dae339aba16bf812dfc51a3 (patch)
tree1ee6f4f014de8f97f8a711f58d3323aebbf8ce41
parentb95b72547b5a2c5e4e294e9e703d3a85928f58f4 (diff)
downloademacs-fbf2e7ad3bd676083dae339aba16bf812dfc51a3.tar.gz
emacs-fbf2e7ad3bd676083dae339aba16bf812dfc51a3.zip
Improve xml parameter entity parsing, and add a new ERT test.
* test/automated/xml-parse-tests.el: New file. * lisp/xml.el (xml--parse-buffer): New function. Move most of xml-parse-region here. (xml-parse-region): Copy region into a temporary buffer, since parameter entity substitution requires changing buffer contents. Use xml--parse-buffer. (xml-parse-file): Use xml--parse-buffer. (xml-parse-dtd): Make parameter entity substitution work right.
-rw-r--r--lisp/ChangeLog10
-rw-r--r--lisp/xml.el180
-rw-r--r--test/ChangeLog4
-rw-r--r--test/automated/xml-parse-tests.el57
4 files changed, 178 insertions, 73 deletions
diff --git a/lisp/ChangeLog b/lisp/ChangeLog
index 0cae8a88e77..3156dc412e3 100644
--- a/lisp/ChangeLog
+++ b/lisp/ChangeLog
@@ -1,3 +1,13 @@
12012-07-01 Chong Yidong <cyd@gnu.org>
2
3 * xml.el (xml--parse-buffer): New function. Move most of
4 xml-parse-region here.
5 (xml-parse-region): Copy region into a temporary buffer, since
6 parameter entity substitution requires changing buffer contents.
7 Use xml--parse-buffer.
8 (xml-parse-file): Use xml--parse-buffer.
9 (xml-parse-dtd): Make parameter entity substitution work right.
10
12012-06-30 Glenn Morris <rgm@gnu.org> 112012-06-30 Glenn Morris <rgm@gnu.org>
2 12
3 * comint.el (follow-comint-scroll-to-bottom): Fix declaration. 13 * comint.el (follow-comint-scroll-to-bottom): Fix declaration.
diff --git a/lisp/xml.el b/lisp/xml.el
index a9e1b2c2830..841e19a174a 100644
--- a/lisp/xml.el
+++ b/lisp/xml.el
@@ -165,23 +165,12 @@ See also `xml-get-attribute-or-nil'."
165;;;###autoload 165;;;###autoload
166(defun xml-parse-file (file &optional parse-dtd parse-ns) 166(defun xml-parse-file (file &optional parse-dtd parse-ns)
167 "Parse the well-formed XML file FILE. 167 "Parse the well-formed XML file FILE.
168If FILE is already visited, use its buffer and don't kill it. 168Return the top node with all its children.
169Returns the top node with all its children.
170If PARSE-DTD is non-nil, the DTD is parsed rather than skipped. 169If PARSE-DTD is non-nil, the DTD is parsed rather than skipped.
171If PARSE-NS is non-nil, then QNAMES are expanded." 170If PARSE-NS is non-nil, then QNAMES are expanded."
172 (if (get-file-buffer file) 171 (with-temp-buffer
173 (with-current-buffer (get-file-buffer file) 172 (insert-file-contents file)
174 (save-excursion 173 (xml--parse-buffer parse-dtd parse-ns)))
175 (xml-parse-region (point-min)
176 (point-max)
177 (current-buffer)
178 parse-dtd parse-ns)))
179 (with-temp-buffer
180 (insert-file-contents file)
181 (xml-parse-region (point-min)
182 (point-max)
183 (current-buffer)
184 parse-dtd parse-ns))))
185 174
186(eval-and-compile 175(eval-and-compile
187(let* ((start-chars (concat "[:alpha:]:_")) 176(let* ((start-chars (concat "[:alpha:]:_"))
@@ -320,42 +309,44 @@ and returned as the first element of the list.
320If PARSE-NS is non-nil, then QNAMES are expanded." 309If PARSE-NS is non-nil, then QNAMES are expanded."
321 ;; Use fixed syntax table to ensure regexp char classes and syntax 310 ;; Use fixed syntax table to ensure regexp char classes and syntax
322 ;; specs DTRT. 311 ;; specs DTRT.
312 (unless buffer
313 (setq buffer (current-buffer)))
314 (with-temp-buffer
315 (insert-buffer-substring buffer beg end)
316 (xml--parse-buffer parse-dtd parse-ns)))
317
318(defun xml--parse-buffer (parse-dtd parse-ns)
323 (with-syntax-table (standard-syntax-table) 319 (with-syntax-table (standard-syntax-table)
324 (let ((case-fold-search nil) ; XML is case-sensitive. 320 (let ((case-fold-search nil) ; XML is case-sensitive.
325 ;; Prevent entity definitions from changing the defaults 321 ;; Prevent entity definitions from changing the defaults
326 (xml-entity-alist xml-entity-alist) 322 (xml-entity-alist xml-entity-alist)
327 (xml-parameter-entity-alist xml-parameter-entity-alist) 323 (xml-parameter-entity-alist xml-parameter-entity-alist)
328 xml result dtd) 324 xml result dtd)
329 (save-excursion 325 (goto-char (point-min))
330 (if buffer 326 (while (not (eobp))
331 (set-buffer buffer)) 327 (if (search-forward "<" nil t)
332 (save-restriction 328 (progn
333 (narrow-to-region beg end) 329 (forward-char -1)
334 (goto-char (point-min)) 330 (setq result (xml-parse-tag parse-dtd parse-ns))
335 (while (not (eobp)) 331 (cond
336 (if (search-forward "<" nil t) 332 ((null result)
337 (progn 333 ;; Not looking at an xml start tag.
338 (forward-char -1) 334 (unless (eobp)
339 (setq result (xml-parse-tag parse-dtd parse-ns)) 335 (forward-char 1)))
340 (cond 336 ((and xml (not xml-sub-parser))
341 ((null result) 337 ;; Translation of rule [1] of XML specifications
342 ;; Not looking at an xml start tag. 338 (error "XML: (Not Well-Formed) Only one root tag allowed"))
343 (unless (eobp) 339 ((and (listp (car result))
344 (forward-char 1))) 340 parse-dtd)
345 ((and xml (not xml-sub-parser)) 341 (setq dtd (car result))
346 ;; Translation of rule [1] of XML specifications 342 (if (cdr result) ; possible leading comment
347 (error "XML: (Not Well-Formed) Only one root tag allowed")) 343 (add-to-list 'xml (cdr result))))
348 ((and (listp (car result)) 344 (t
349 parse-dtd) 345 (add-to-list 'xml result))))
350 (setq dtd (car result)) 346 (goto-char (point-max))))
351 (if (cdr result) ; possible leading comment 347 (if parse-dtd
352 (add-to-list 'xml (cdr result)))) 348 (cons dtd (nreverse xml))
353 (t 349 (nreverse xml)))))
354 (add-to-list 'xml result))))
355 (goto-char (point-max))))
356 (if parse-dtd
357 (cons dtd (nreverse xml))
358 (nreverse xml)))))))
359 350
360(defun xml-maybe-do-ns (name default xml-ns) 351(defun xml-maybe-do-ns (name default xml-ns)
361 "Perform any namespace expansion. 352 "Perform any namespace expansion.
@@ -600,7 +591,10 @@ This follows the rule [28] in the XML specifications."
600 ;; Get the name of the document 591 ;; Get the name of the document
601 (looking-at xml-name-regexp) 592 (looking-at xml-name-regexp)
602 (let ((dtd (list (match-string-no-properties 0) 'dtd)) 593 (let ((dtd (list (match-string-no-properties 0) 'dtd))
603 (xml-parameter-entity-alist xml-parameter-entity-alist)) 594 (xml-parameter-entity-alist xml-parameter-entity-alist)
595 (parameter-entity-re (eval-when-compile
596 (concat "%\\(" xml-name-re "\\);")))
597 next-parameter-entity)
604 (goto-char (match-end 0)) 598 (goto-char (match-end 0))
605 (skip-syntax-forward " ") 599 (skip-syntax-forward " ")
606 600
@@ -638,13 +632,28 @@ This follows the rule [28] in the XML specifications."
638 (error "XML: Bad DTD")) 632 (error "XML: Bad DTD"))
639 (forward-char) 633 (forward-char)
640 634
635 ;; [2.8]: "markup declarations may be made up in whole or in
636 ;; part of the replacement text of parameter entities."
637
638 ;; Since parameter entities are valid only within the DTD, we
639 ;; first search for the position of the next possible parameter
640 ;; entity. Then, search for the next DTD element; if it ends
641 ;; before the next parameter entity, expand the parameter entity
642 ;; and try again.
643 (setq next-parameter-entity
644 (save-excursion
645 (if (re-search-forward parameter-entity-re nil t)
646 (match-beginning 0))))
647
641 ;; Parse the rest of the DTD 648 ;; Parse the rest of the DTD
642 ;; Fixme: Deal with NOTATION, PIs. 649 ;; Fixme: Deal with NOTATION, PIs.
643 (while (not (looking-at "\\s-*\\]")) 650 (while (not (looking-at "\\s-*\\]"))
644 (skip-syntax-forward " ") 651 (skip-syntax-forward " ")
645 (cond 652 (cond
646 ;; Element declaration [45]: 653 ;; Element declaration [45]:
647 ((looking-at "<!ELEMENT\\s-+\\([[:alnum:].%;]+\\)\\s-+\\([^>]+\\)>") 654 ((and (looking-at "<!ELEMENT\\s-+\\([[:alnum:].%;]+\\)\\s-+\\([^>]+\\)>")
655 (or (null next-parameter-entity)
656 (<= (match-end 0) next-parameter-entity)))
648 (let ((element (match-string-no-properties 1)) 657 (let ((element (match-string-no-properties 1))
649 (type (match-string-no-properties 2)) 658 (type (match-string-no-properties 2))
650 (end-pos (match-end 0))) 659 (end-pos (match-end 0)))
@@ -672,19 +681,31 @@ This follows the rule [28] in the XML specifications."
672 (goto-char end-pos))) 681 (goto-char end-pos)))
673 682
674 ;; Attribute-list declaration [52] (currently unsupported): 683 ;; Attribute-list declaration [52] (currently unsupported):
675 ((looking-at (concat "<!ATTLIST[ \t\n\r]*\\(" xml-name-re 684 ((and (looking-at (eval-when-compile
676 "\\)[ \t\n\r]*\\(" xml-att-def-re 685 (concat "<!ATTLIST[ \t\n\r]*\\(" xml-name-re
677 "\\)*[ \t\n\r]*>")) 686 "\\)[ \t\n\r]*\\(" xml-att-def-re
687 "\\)*[ \t\n\r]*>")))
688 (or (null next-parameter-entity)
689 (<= (match-end 0) next-parameter-entity)))
678 (goto-char (match-end 0))) 690 (goto-char (match-end 0)))
679 691
680 ;; Comments (skip to end): 692 ;; Comments (skip to end, ignoring parameter entity):
681 ((looking-at "<!--") 693 ((looking-at "<!--")
682 (search-forward "-->")) 694 (search-forward "-->")
695 (and next-parameter-entity
696 (> (point) next-parameter-entity)
697 (setq next-parameter-entity
698 (save-excursion
699 (if (re-search-forward parameter-entity-re nil t)
700 (match-beginning 0))))))
683 701
684 ;; Internal entity declarations: 702 ;; Internal entity declarations:
685 ((looking-at (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\(" 703 ((and (looking-at (eval-when-compile
686 xml-name-re "\\)[ \t\n\r]*\\(" 704 (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\("
687 xml-entity-value-re "\\)[ \t\n\r]*>")) 705 xml-name-re "\\)[ \t\n\r]*\\("
706 xml-entity-value-re "\\)[ \t\n\r]*>")))
707 (or (null next-parameter-entity)
708 (<= (match-end 0) next-parameter-entity)))
688 (let* ((name (prog1 (match-string-no-properties 2) 709 (let* ((name (prog1 (match-string-no-properties 2)
689 (goto-char (match-end 0)))) 710 (goto-char (match-end 0))))
690 (alist (if (match-string 1) 711 (alist (if (match-string 1)
@@ -700,26 +721,39 @@ This follows the rule [28] in the XML specifications."
700 (set alist (cons (cons name value) (symbol-value alist)))))) 721 (set alist (cons (cons name value) (symbol-value alist))))))
701 722
702 ;; External entity declarations (currently unsupported): 723 ;; External entity declarations (currently unsupported):
703 ((or (looking-at (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\(" 724 ((and (or (looking-at (eval-when-compile
704 xml-name-re "\\)[ \t\n\r]+SYSTEM[ \t\n\r]+" 725 (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\("
705 "\\(\"[^\"]*\"\\|'[^']*'\\)[ \t\n\r]*>")) 726 xml-name-re "\\)[ \t\n\r]+SYSTEM[ \t\n\r]+"
706 (looking-at (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\(" 727 "\\(\"[^\"]*\"\\|'[^']*'\\)[ \t\n\r]*>")))
707 xml-name-re "\\)[ \t\n\r]+PUBLIC[ \t\n\r]+" 728 (looking-at (eval-when-compile
708 "\"[- \r\na-zA-Z0-9'()+,./:=?;!*#@$_%]*\"" 729 (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\("
709 "\\|'[- \r\na-zA-Z0-9()+,./:=?;!*#@$_%]*'" 730 xml-name-re "\\)[ \t\n\r]+PUBLIC[ \t\n\r]+"
710 "[ \t\n\r]+\\(\"[^\"]*\"\\|'[^']*'\\)" 731 "\"[- \r\na-zA-Z0-9'()+,./:=?;!*#@$_%]*\""
711 "[ \t\n\r]*>"))) 732 "\\|'[- \r\na-zA-Z0-9()+,./:=?;!*#@$_%]*'"
733 "[ \t\n\r]+\\(\"[^\"]*\"\\|'[^']*'\\)"
734 "[ \t\n\r]*>"))))
735 (or (null next-parameter-entity)
736 (<= (match-end 0) next-parameter-entity)))
712 (goto-char (match-end 0))) 737 (goto-char (match-end 0)))
713 738
714 ;; Parameter entity: 739 ;; If a parameter entity is in the way, expand it.
715 ((looking-at (concat "%\\(" xml-name-re "\\);")) 740 (next-parameter-entity
716 (goto-char (match-end 0)) 741 (save-excursion
717 (let* ((entity (match-string 1)) 742 (goto-char next-parameter-entity)
718 (end (point-marker)) 743 (unless (looking-at parameter-entity-re)
719 (elt (assoc entity xml-parameter-entity-alist))) 744 (error "XML: Internal error"))
720 (when elt 745 (let* ((entity (match-string 1))
721 (replace-match (cdr elt) t t) 746 (beg (point-marker))
722 (goto-char end)))) 747 (elt (assoc entity xml-parameter-entity-alist)))
748 (if elt
749 (progn
750 (replace-match (cdr elt) t t)
751 ;; The replacement can itself be a parameter entity.
752 (goto-char next-parameter-entity))
753 (goto-char (match-end 0))))
754 (setq next-parameter-entity
755 (if (re-search-forward parameter-entity-re nil t)
756 (match-beginning 0)))))
723 757
724 ;; Anything else: 758 ;; Anything else:
725 (xml-validating-parser 759 (xml-validating-parser
diff --git a/test/ChangeLog b/test/ChangeLog
index 45fc70e0440..d9d9bc5a9fa 100644
--- a/test/ChangeLog
+++ b/test/ChangeLog
@@ -1,3 +1,7 @@
12012-07-01 Chong Yidong <cyd@gnu.org>
2
3 * automated/xml-parse-tests.el: New file.
4
12012-06-27 Stefan Monnier <monnier@iro.umontreal.ca> 52012-06-27 Stefan Monnier <monnier@iro.umontreal.ca>
2 6
3 * automated/ert-x-tests.el (ert-test-run-tests-interactively-2): 7 * automated/ert-x-tests.el (ert-test-run-tests-interactively-2):
diff --git a/test/automated/xml-parse-tests.el b/test/automated/xml-parse-tests.el
new file mode 100644
index 00000000000..8e8ef291bdc
--- /dev/null
+++ b/test/automated/xml-parse-tests.el
@@ -0,0 +1,57 @@
1;;; xml-parse-tests.el --- Test suite for XML parsing.
2
3;; Copyright (C) 2012 Free Software Foundation, Inc.
4
5;; Author: Chong Yidong <cyd@stupidchicken.com>
6;; Keywords: internal
7;; Human-Keywords: internal
8
9;; This file is part of GNU Emacs.
10
11;; GNU Emacs is free software: you can redistribute it and/or modify
12;; it under the terms of the GNU General Public License as published by
13;; the Free Software Foundation, either version 3 of the License, or
14;; (at your option) any later version.
15
16;; GNU Emacs is distributed in the hope that it will be useful,
17;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19;; GNU General Public License for more details.
20
21;; You should have received a copy of the GNU General Public License
22;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
23
24;;; Commentary:
25
26;; Type M-x test-xml-parse RET to generate the test buffer.
27
28;;; Code:
29
30(require 'xml)
31
32(defvar xml-parse-tests--data
33 '(;; General entity substitution
34 ("<?xml version=\"1.0\"?><!DOCTYPE foo SYSTEM \"bar.dtd\" [<!ENTITY ent \"AbC\">]><foo a=\"b\"><bar>&ent;;</bar></foo>" .
35 ((foo ((a . "b")) (bar nil "AbC;"))))
36 ;; Parameter entity substitution
37 ("<?xml version=\"1.0\"?><!DOCTYPE foo SYSTEM \"bar.dtd\" [<!ENTITY % pent \"AbC\"><!ENTITY ent \"%pent;\">]><foo a=\"b\"><bar>&ent;;</bar></foo>" .
38 ((foo ((a . "b")) (bar nil "AbC;"))))
39 ;; Tricky parameter entity substitution (like XML spec Appendix D)
40 ("<?xml version='1.0'?><!DOCTYPE foo [ <!ENTITY % xx '&#37;zz;'><!ENTITY % zz '&#60;!ENTITY ent \"b\" >' > %xx; ]><foo>A&ent;C</foo>" .
41 ((foo nil "AbC"))))
42 "Alist of XML strings and their expected parse trees.")
43
44(ert-deftest xml-parse-tests ()
45 "Test XML parsing."
46 (with-temp-buffer
47 (dolist (test xml-parse-tests--data)
48 (erase-buffer)
49 (insert (car test))
50 (should (equal (cdr test)
51 (xml-parse-region (point-min) (point-max)))))))
52
53;; Local Variables:
54;; no-byte-compile: t
55;; End:
56
57;;; xml-parse-tests.el ends here.