diff options
| author | Chong Yidong | 2012-07-01 15:17:05 +0800 |
|---|---|---|
| committer | Chong Yidong | 2012-07-01 15:17:05 +0800 |
| commit | fbf2e7ad3bd676083dae339aba16bf812dfc51a3 (patch) | |
| tree | 1ee6f4f014de8f97f8a711f58d3323aebbf8ce41 | |
| parent | b95b72547b5a2c5e4e294e9e703d3a85928f58f4 (diff) | |
| download | emacs-fbf2e7ad3bd676083dae339aba16bf812dfc51a3.tar.gz emacs-fbf2e7ad3bd676083dae339aba16bf812dfc51a3.zip | |
Improve xml parameter entity parsing, and add a new ERT test.
* test/automated/xml-parse-tests.el: New file.
* lisp/xml.el (xml--parse-buffer): New function. Move most of
xml-parse-region here.
(xml-parse-region): Copy region into a temporary buffer, since
parameter entity substitution requires changing buffer contents.
Use xml--parse-buffer.
(xml-parse-file): Use xml--parse-buffer.
(xml-parse-dtd): Make parameter entity substitution work right.
| -rw-r--r-- | lisp/ChangeLog | 10 | ||||
| -rw-r--r-- | lisp/xml.el | 180 | ||||
| -rw-r--r-- | test/ChangeLog | 4 | ||||
| -rw-r--r-- | test/automated/xml-parse-tests.el | 57 |
4 files changed, 178 insertions, 73 deletions
diff --git a/lisp/ChangeLog b/lisp/ChangeLog index 0cae8a88e77..3156dc412e3 100644 --- a/lisp/ChangeLog +++ b/lisp/ChangeLog | |||
| @@ -1,3 +1,13 @@ | |||
| 1 | 2012-07-01 Chong Yidong <cyd@gnu.org> | ||
| 2 | |||
| 3 | * xml.el (xml--parse-buffer): New function. Move most of | ||
| 4 | xml-parse-region here. | ||
| 5 | (xml-parse-region): Copy region into a temporary buffer, since | ||
| 6 | parameter entity substitution requires changing buffer contents. | ||
| 7 | Use xml--parse-buffer. | ||
| 8 | (xml-parse-file): Use xml--parse-buffer. | ||
| 9 | (xml-parse-dtd): Make parameter entity substitution work right. | ||
| 10 | |||
| 1 | 2012-06-30 Glenn Morris <rgm@gnu.org> | 11 | 2012-06-30 Glenn Morris <rgm@gnu.org> |
| 2 | 12 | ||
| 3 | * comint.el (follow-comint-scroll-to-bottom): Fix declaration. | 13 | * comint.el (follow-comint-scroll-to-bottom): Fix declaration. |
diff --git a/lisp/xml.el b/lisp/xml.el index a9e1b2c2830..841e19a174a 100644 --- a/lisp/xml.el +++ b/lisp/xml.el | |||
| @@ -165,23 +165,12 @@ See also `xml-get-attribute-or-nil'." | |||
| 165 | ;;;###autoload | 165 | ;;;###autoload |
| 166 | (defun xml-parse-file (file &optional parse-dtd parse-ns) | 166 | (defun xml-parse-file (file &optional parse-dtd parse-ns) |
| 167 | "Parse the well-formed XML file FILE. | 167 | "Parse the well-formed XML file FILE. |
| 168 | If FILE is already visited, use its buffer and don't kill it. | 168 | Return the top node with all its children. |
| 169 | Returns the top node with all its children. | ||
| 170 | If PARSE-DTD is non-nil, the DTD is parsed rather than skipped. | 169 | If PARSE-DTD is non-nil, the DTD is parsed rather than skipped. |
| 171 | If PARSE-NS is non-nil, then QNAMES are expanded." | 170 | If PARSE-NS is non-nil, then QNAMES are expanded." |
| 172 | (if (get-file-buffer file) | 171 | (with-temp-buffer |
| 173 | (with-current-buffer (get-file-buffer file) | 172 | (insert-file-contents file) |
| 174 | (save-excursion | 173 | (xml--parse-buffer parse-dtd parse-ns))) |
| 175 | (xml-parse-region (point-min) | ||
| 176 | (point-max) | ||
| 177 | (current-buffer) | ||
| 178 | parse-dtd parse-ns))) | ||
| 179 | (with-temp-buffer | ||
| 180 | (insert-file-contents file) | ||
| 181 | (xml-parse-region (point-min) | ||
| 182 | (point-max) | ||
| 183 | (current-buffer) | ||
| 184 | parse-dtd parse-ns)))) | ||
| 185 | 174 | ||
| 186 | (eval-and-compile | 175 | (eval-and-compile |
| 187 | (let* ((start-chars (concat "[:alpha:]:_")) | 176 | (let* ((start-chars (concat "[:alpha:]:_")) |
| @@ -320,42 +309,44 @@ and returned as the first element of the list. | |||
| 320 | If PARSE-NS is non-nil, then QNAMES are expanded." | 309 | If PARSE-NS is non-nil, then QNAMES are expanded." |
| 321 | ;; Use fixed syntax table to ensure regexp char classes and syntax | 310 | ;; Use fixed syntax table to ensure regexp char classes and syntax |
| 322 | ;; specs DTRT. | 311 | ;; specs DTRT. |
| 312 | (unless buffer | ||
| 313 | (setq buffer (current-buffer))) | ||
| 314 | (with-temp-buffer | ||
| 315 | (insert-buffer-substring buffer beg end) | ||
| 316 | (xml--parse-buffer parse-dtd parse-ns))) | ||
| 317 | |||
| 318 | (defun xml--parse-buffer (parse-dtd parse-ns) | ||
| 323 | (with-syntax-table (standard-syntax-table) | 319 | (with-syntax-table (standard-syntax-table) |
| 324 | (let ((case-fold-search nil) ; XML is case-sensitive. | 320 | (let ((case-fold-search nil) ; XML is case-sensitive. |
| 325 | ;; Prevent entity definitions from changing the defaults | 321 | ;; Prevent entity definitions from changing the defaults |
| 326 | (xml-entity-alist xml-entity-alist) | 322 | (xml-entity-alist xml-entity-alist) |
| 327 | (xml-parameter-entity-alist xml-parameter-entity-alist) | 323 | (xml-parameter-entity-alist xml-parameter-entity-alist) |
| 328 | xml result dtd) | 324 | xml result dtd) |
| 329 | (save-excursion | 325 | (goto-char (point-min)) |
| 330 | (if buffer | 326 | (while (not (eobp)) |
| 331 | (set-buffer buffer)) | 327 | (if (search-forward "<" nil t) |
| 332 | (save-restriction | 328 | (progn |
| 333 | (narrow-to-region beg end) | 329 | (forward-char -1) |
| 334 | (goto-char (point-min)) | 330 | (setq result (xml-parse-tag parse-dtd parse-ns)) |
| 335 | (while (not (eobp)) | 331 | (cond |
| 336 | (if (search-forward "<" nil t) | 332 | ((null result) |
| 337 | (progn | 333 | ;; Not looking at an xml start tag. |
| 338 | (forward-char -1) | 334 | (unless (eobp) |
| 339 | (setq result (xml-parse-tag parse-dtd parse-ns)) | 335 | (forward-char 1))) |
| 340 | (cond | 336 | ((and xml (not xml-sub-parser)) |
| 341 | ((null result) | 337 | ;; Translation of rule [1] of XML specifications |
| 342 | ;; Not looking at an xml start tag. | 338 | (error "XML: (Not Well-Formed) Only one root tag allowed")) |
| 343 | (unless (eobp) | 339 | ((and (listp (car result)) |
| 344 | (forward-char 1))) | 340 | parse-dtd) |
| 345 | ((and xml (not xml-sub-parser)) | 341 | (setq dtd (car result)) |
| 346 | ;; Translation of rule [1] of XML specifications | 342 | (if (cdr result) ; possible leading comment |
| 347 | (error "XML: (Not Well-Formed) Only one root tag allowed")) | 343 | (add-to-list 'xml (cdr result)))) |
| 348 | ((and (listp (car result)) | 344 | (t |
| 349 | parse-dtd) | 345 | (add-to-list 'xml result)))) |
| 350 | (setq dtd (car result)) | 346 | (goto-char (point-max)))) |
| 351 | (if (cdr result) ; possible leading comment | 347 | (if parse-dtd |
| 352 | (add-to-list 'xml (cdr result)))) | 348 | (cons dtd (nreverse xml)) |
| 353 | (t | 349 | (nreverse xml))))) |
| 354 | (add-to-list 'xml result)))) | ||
| 355 | (goto-char (point-max)))) | ||
| 356 | (if parse-dtd | ||
| 357 | (cons dtd (nreverse xml)) | ||
| 358 | (nreverse xml))))))) | ||
| 359 | 350 | ||
| 360 | (defun xml-maybe-do-ns (name default xml-ns) | 351 | (defun xml-maybe-do-ns (name default xml-ns) |
| 361 | "Perform any namespace expansion. | 352 | "Perform any namespace expansion. |
| @@ -600,7 +591,10 @@ This follows the rule [28] in the XML specifications." | |||
| 600 | ;; Get the name of the document | 591 | ;; Get the name of the document |
| 601 | (looking-at xml-name-regexp) | 592 | (looking-at xml-name-regexp) |
| 602 | (let ((dtd (list (match-string-no-properties 0) 'dtd)) | 593 | (let ((dtd (list (match-string-no-properties 0) 'dtd)) |
| 603 | (xml-parameter-entity-alist xml-parameter-entity-alist)) | 594 | (xml-parameter-entity-alist xml-parameter-entity-alist) |
| 595 | (parameter-entity-re (eval-when-compile | ||
| 596 | (concat "%\\(" xml-name-re "\\);"))) | ||
| 597 | next-parameter-entity) | ||
| 604 | (goto-char (match-end 0)) | 598 | (goto-char (match-end 0)) |
| 605 | (skip-syntax-forward " ") | 599 | (skip-syntax-forward " ") |
| 606 | 600 | ||
| @@ -638,13 +632,28 @@ This follows the rule [28] in the XML specifications." | |||
| 638 | (error "XML: Bad DTD")) | 632 | (error "XML: Bad DTD")) |
| 639 | (forward-char) | 633 | (forward-char) |
| 640 | 634 | ||
| 635 | ;; [2.8]: "markup declarations may be made up in whole or in | ||
| 636 | ;; part of the replacement text of parameter entities." | ||
| 637 | |||
| 638 | ;; Since parameter entities are valid only within the DTD, we | ||
| 639 | ;; first search for the position of the next possible parameter | ||
| 640 | ;; entity. Then, search for the next DTD element; if it ends | ||
| 641 | ;; before the next parameter entity, expand the parameter entity | ||
| 642 | ;; and try again. | ||
| 643 | (setq next-parameter-entity | ||
| 644 | (save-excursion | ||
| 645 | (if (re-search-forward parameter-entity-re nil t) | ||
| 646 | (match-beginning 0)))) | ||
| 647 | |||
| 641 | ;; Parse the rest of the DTD | 648 | ;; Parse the rest of the DTD |
| 642 | ;; Fixme: Deal with NOTATION, PIs. | 649 | ;; Fixme: Deal with NOTATION, PIs. |
| 643 | (while (not (looking-at "\\s-*\\]")) | 650 | (while (not (looking-at "\\s-*\\]")) |
| 644 | (skip-syntax-forward " ") | 651 | (skip-syntax-forward " ") |
| 645 | (cond | 652 | (cond |
| 646 | ;; Element declaration [45]: | 653 | ;; Element declaration [45]: |
| 647 | ((looking-at "<!ELEMENT\\s-+\\([[:alnum:].%;]+\\)\\s-+\\([^>]+\\)>") | 654 | ((and (looking-at "<!ELEMENT\\s-+\\([[:alnum:].%;]+\\)\\s-+\\([^>]+\\)>") |
| 655 | (or (null next-parameter-entity) | ||
| 656 | (<= (match-end 0) next-parameter-entity))) | ||
| 648 | (let ((element (match-string-no-properties 1)) | 657 | (let ((element (match-string-no-properties 1)) |
| 649 | (type (match-string-no-properties 2)) | 658 | (type (match-string-no-properties 2)) |
| 650 | (end-pos (match-end 0))) | 659 | (end-pos (match-end 0))) |
| @@ -672,19 +681,31 @@ This follows the rule [28] in the XML specifications." | |||
| 672 | (goto-char end-pos))) | 681 | (goto-char end-pos))) |
| 673 | 682 | ||
| 674 | ;; Attribute-list declaration [52] (currently unsupported): | 683 | ;; Attribute-list declaration [52] (currently unsupported): |
| 675 | ((looking-at (concat "<!ATTLIST[ \t\n\r]*\\(" xml-name-re | 684 | ((and (looking-at (eval-when-compile |
| 676 | "\\)[ \t\n\r]*\\(" xml-att-def-re | 685 | (concat "<!ATTLIST[ \t\n\r]*\\(" xml-name-re |
| 677 | "\\)*[ \t\n\r]*>")) | 686 | "\\)[ \t\n\r]*\\(" xml-att-def-re |
| 687 | "\\)*[ \t\n\r]*>"))) | ||
| 688 | (or (null next-parameter-entity) | ||
| 689 | (<= (match-end 0) next-parameter-entity))) | ||
| 678 | (goto-char (match-end 0))) | 690 | (goto-char (match-end 0))) |
| 679 | 691 | ||
| 680 | ;; Comments (skip to end): | 692 | ;; Comments (skip to end, ignoring parameter entity): |
| 681 | ((looking-at "<!--") | 693 | ((looking-at "<!--") |
| 682 | (search-forward "-->")) | 694 | (search-forward "-->") |
| 695 | (and next-parameter-entity | ||
| 696 | (> (point) next-parameter-entity) | ||
| 697 | (setq next-parameter-entity | ||
| 698 | (save-excursion | ||
| 699 | (if (re-search-forward parameter-entity-re nil t) | ||
| 700 | (match-beginning 0)))))) | ||
| 683 | 701 | ||
| 684 | ;; Internal entity declarations: | 702 | ;; Internal entity declarations: |
| 685 | ((looking-at (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\(" | 703 | ((and (looking-at (eval-when-compile |
| 686 | xml-name-re "\\)[ \t\n\r]*\\(" | 704 | (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\(" |
| 687 | xml-entity-value-re "\\)[ \t\n\r]*>")) | 705 | xml-name-re "\\)[ \t\n\r]*\\(" |
| 706 | xml-entity-value-re "\\)[ \t\n\r]*>"))) | ||
| 707 | (or (null next-parameter-entity) | ||
| 708 | (<= (match-end 0) next-parameter-entity))) | ||
| 688 | (let* ((name (prog1 (match-string-no-properties 2) | 709 | (let* ((name (prog1 (match-string-no-properties 2) |
| 689 | (goto-char (match-end 0)))) | 710 | (goto-char (match-end 0)))) |
| 690 | (alist (if (match-string 1) | 711 | (alist (if (match-string 1) |
| @@ -700,26 +721,39 @@ This follows the rule [28] in the XML specifications." | |||
| 700 | (set alist (cons (cons name value) (symbol-value alist)))))) | 721 | (set alist (cons (cons name value) (symbol-value alist)))))) |
| 701 | 722 | ||
| 702 | ;; External entity declarations (currently unsupported): | 723 | ;; External entity declarations (currently unsupported): |
| 703 | ((or (looking-at (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\(" | 724 | ((and (or (looking-at (eval-when-compile |
| 704 | xml-name-re "\\)[ \t\n\r]+SYSTEM[ \t\n\r]+" | 725 | (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\(" |
| 705 | "\\(\"[^\"]*\"\\|'[^']*'\\)[ \t\n\r]*>")) | 726 | xml-name-re "\\)[ \t\n\r]+SYSTEM[ \t\n\r]+" |
| 706 | (looking-at (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\(" | 727 | "\\(\"[^\"]*\"\\|'[^']*'\\)[ \t\n\r]*>"))) |
| 707 | xml-name-re "\\)[ \t\n\r]+PUBLIC[ \t\n\r]+" | 728 | (looking-at (eval-when-compile |
| 708 | "\"[- \r\na-zA-Z0-9'()+,./:=?;!*#@$_%]*\"" | 729 | (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\(" |
| 709 | "\\|'[- \r\na-zA-Z0-9()+,./:=?;!*#@$_%]*'" | 730 | xml-name-re "\\)[ \t\n\r]+PUBLIC[ \t\n\r]+" |
| 710 | "[ \t\n\r]+\\(\"[^\"]*\"\\|'[^']*'\\)" | 731 | "\"[- \r\na-zA-Z0-9'()+,./:=?;!*#@$_%]*\"" |
| 711 | "[ \t\n\r]*>"))) | 732 | "\\|'[- \r\na-zA-Z0-9()+,./:=?;!*#@$_%]*'" |
| 733 | "[ \t\n\r]+\\(\"[^\"]*\"\\|'[^']*'\\)" | ||
| 734 | "[ \t\n\r]*>")))) | ||
| 735 | (or (null next-parameter-entity) | ||
| 736 | (<= (match-end 0) next-parameter-entity))) | ||
| 712 | (goto-char (match-end 0))) | 737 | (goto-char (match-end 0))) |
| 713 | 738 | ||
| 714 | ;; Parameter entity: | 739 | ;; If a parameter entity is in the way, expand it. |
| 715 | ((looking-at (concat "%\\(" xml-name-re "\\);")) | 740 | (next-parameter-entity |
| 716 | (goto-char (match-end 0)) | 741 | (save-excursion |
| 717 | (let* ((entity (match-string 1)) | 742 | (goto-char next-parameter-entity) |
| 718 | (end (point-marker)) | 743 | (unless (looking-at parameter-entity-re) |
| 719 | (elt (assoc entity xml-parameter-entity-alist))) | 744 | (error "XML: Internal error")) |
| 720 | (when elt | 745 | (let* ((entity (match-string 1)) |
| 721 | (replace-match (cdr elt) t t) | 746 | (beg (point-marker)) |
| 722 | (goto-char end)))) | 747 | (elt (assoc entity xml-parameter-entity-alist))) |
| 748 | (if elt | ||
| 749 | (progn | ||
| 750 | (replace-match (cdr elt) t t) | ||
| 751 | ;; The replacement can itself be a parameter entity. | ||
| 752 | (goto-char next-parameter-entity)) | ||
| 753 | (goto-char (match-end 0)))) | ||
| 754 | (setq next-parameter-entity | ||
| 755 | (if (re-search-forward parameter-entity-re nil t) | ||
| 756 | (match-beginning 0))))) | ||
| 723 | 757 | ||
| 724 | ;; Anything else: | 758 | ;; Anything else: |
| 725 | (xml-validating-parser | 759 | (xml-validating-parser |
diff --git a/test/ChangeLog b/test/ChangeLog index 45fc70e0440..d9d9bc5a9fa 100644 --- a/test/ChangeLog +++ b/test/ChangeLog | |||
| @@ -1,3 +1,7 @@ | |||
| 1 | 2012-07-01 Chong Yidong <cyd@gnu.org> | ||
| 2 | |||
| 3 | * automated/xml-parse-tests.el: New file. | ||
| 4 | |||
| 1 | 2012-06-27 Stefan Monnier <monnier@iro.umontreal.ca> | 5 | 2012-06-27 Stefan Monnier <monnier@iro.umontreal.ca> |
| 2 | 6 | ||
| 3 | * automated/ert-x-tests.el (ert-test-run-tests-interactively-2): | 7 | * automated/ert-x-tests.el (ert-test-run-tests-interactively-2): |
diff --git a/test/automated/xml-parse-tests.el b/test/automated/xml-parse-tests.el new file mode 100644 index 00000000000..8e8ef291bdc --- /dev/null +++ b/test/automated/xml-parse-tests.el | |||
| @@ -0,0 +1,57 @@ | |||
| 1 | ;;; xml-parse-tests.el --- Test suite for XML parsing. | ||
| 2 | |||
| 3 | ;; Copyright (C) 2012 Free Software Foundation, Inc. | ||
| 4 | |||
| 5 | ;; Author: Chong Yidong <cyd@stupidchicken.com> | ||
| 6 | ;; Keywords: internal | ||
| 7 | ;; Human-Keywords: internal | ||
| 8 | |||
| 9 | ;; This file is part of GNU Emacs. | ||
| 10 | |||
| 11 | ;; GNU Emacs is free software: you can redistribute it and/or modify | ||
| 12 | ;; it under the terms of the GNU General Public License as published by | ||
| 13 | ;; the Free Software Foundation, either version 3 of the License, or | ||
| 14 | ;; (at your option) any later version. | ||
| 15 | |||
| 16 | ;; GNU Emacs is distributed in the hope that it will be useful, | ||
| 17 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 18 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 19 | ;; GNU General Public License for more details. | ||
| 20 | |||
| 21 | ;; You should have received a copy of the GNU General Public License | ||
| 22 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. | ||
| 23 | |||
| 24 | ;;; Commentary: | ||
| 25 | |||
| 26 | ;; Type M-x test-xml-parse RET to generate the test buffer. | ||
| 27 | |||
| 28 | ;;; Code: | ||
| 29 | |||
| 30 | (require 'xml) | ||
| 31 | |||
| 32 | (defvar xml-parse-tests--data | ||
| 33 | '(;; General entity substitution | ||
| 34 | ("<?xml version=\"1.0\"?><!DOCTYPE foo SYSTEM \"bar.dtd\" [<!ENTITY ent \"AbC\">]><foo a=\"b\"><bar>&ent;;</bar></foo>" . | ||
| 35 | ((foo ((a . "b")) (bar nil "AbC;")))) | ||
| 36 | ;; Parameter entity substitution | ||
| 37 | ("<?xml version=\"1.0\"?><!DOCTYPE foo SYSTEM \"bar.dtd\" [<!ENTITY % pent \"AbC\"><!ENTITY ent \"%pent;\">]><foo a=\"b\"><bar>&ent;;</bar></foo>" . | ||
| 38 | ((foo ((a . "b")) (bar nil "AbC;")))) | ||
| 39 | ;; Tricky parameter entity substitution (like XML spec Appendix D) | ||
| 40 | ("<?xml version='1.0'?><!DOCTYPE foo [ <!ENTITY % xx '%zz;'><!ENTITY % zz '<!ENTITY ent \"b\" >' > %xx; ]><foo>A&ent;C</foo>" . | ||
| 41 | ((foo nil "AbC")))) | ||
| 42 | "Alist of XML strings and their expected parse trees.") | ||
| 43 | |||
| 44 | (ert-deftest xml-parse-tests () | ||
| 45 | "Test XML parsing." | ||
| 46 | (with-temp-buffer | ||
| 47 | (dolist (test xml-parse-tests--data) | ||
| 48 | (erase-buffer) | ||
| 49 | (insert (car test)) | ||
| 50 | (should (equal (cdr test) | ||
| 51 | (xml-parse-region (point-min) (point-max))))))) | ||
| 52 | |||
| 53 | ;; Local Variables: | ||
| 54 | ;; no-byte-compile: t | ||
| 55 | ;; End: | ||
| 56 | |||
| 57 | ;;; xml-parse-tests.el ends here. | ||