diff options
| author | Chong Yidong | 2012-06-30 19:33:22 +0800 |
|---|---|---|
| committer | Chong Yidong | 2012-06-30 19:33:22 +0800 |
| commit | 7f3fbd5d73bff96d42ef087ec87b662005242842 (patch) | |
| tree | 7eb23afaced6e84f54696528db6df5d40d4e4f13 | |
| parent | c5e4379cef494adeed3ce4ba3c610819c088cda8 (diff) | |
| download | emacs-7f3fbd5d73bff96d42ef087ec87b662005242842.tar.gz emacs-7f3fbd5d73bff96d42ef087ec87b662005242842.zip | |
* xml.el: Implement XML parameter entities.
(xml-parameter-entity-alist): New variable.
(xml-parse-region, xml-parse-fragment): Preserve previous values
of xml-entity-alist and xml-parameter-entity-alist, so that
repeated calls on different documents do not change them.
(xml-parse-tag): Fix doctype regexp.
(xml--entity-replacement-text): New function.
(xml-parse-dtd): Use it. Don't handle system entities; doing that
properly requires url retrieval which is unimplemented.
(xml-escape-string): Doc fix.
| -rw-r--r-- | lisp/ChangeLog | 13 | ||||
| -rw-r--r-- | lisp/xml.el | 240 |
2 files changed, 145 insertions, 108 deletions
diff --git a/lisp/ChangeLog b/lisp/ChangeLog index f2fa5a37ac7..dddfce0414c 100644 --- a/lisp/ChangeLog +++ b/lisp/ChangeLog | |||
| @@ -1,3 +1,16 @@ | |||
| 1 | 2012-06-30 Chong Yidong <cyd@gnu.org> | ||
| 2 | |||
| 3 | * xml.el: Implement XML parameter entities. | ||
| 4 | (xml-parameter-entity-alist): New variable. | ||
| 5 | (xml-parse-region, xml-parse-fragment): Preserve previous values | ||
| 6 | of xml-entity-alist and xml-parameter-entity-alist, so that | ||
| 7 | repeated calls on different documents do not change them. | ||
| 8 | (xml-parse-tag): Fix doctype regexp. | ||
| 9 | (xml--entity-replacement-text): New function. | ||
| 10 | (xml-parse-dtd): Use it. Don't handle system entities; doing that | ||
| 11 | properly requires url retrieval which is unimplemented. | ||
| 12 | (xml-escape-string): Doc fix. | ||
| 13 | |||
| 1 | 2012-06-30 Stefan Monnier <monnier@iro.umontreal.ca> | 14 | 2012-06-30 Stefan Monnier <monnier@iro.umontreal.ca> |
| 2 | 15 | ||
| 3 | * emacs-lisp/cl-lib.el (cl-pushnew): Use macroexp-let2. | 16 | * emacs-lisp/cl-lib.el (cl-pushnew): Use macroexp-let2. |
diff --git a/lisp/xml.el b/lisp/xml.el index d1e824c4ece..f135bdfabe4 100644 --- a/lisp/xml.el +++ b/lisp/xml.el | |||
| @@ -95,10 +95,13 @@ | |||
| 95 | ("apos" . "'") | 95 | ("apos" . "'") |
| 96 | ("quot" . "\"") | 96 | ("quot" . "\"") |
| 97 | ("amp" . "&")) | 97 | ("amp" . "&")) |
| 98 | "The defined entities. Entities are added to this when the DTD is parsed.") | 98 | "Alist of defined XML entities.") |
| 99 | |||
| 100 | (defvar xml-parameter-entity-alist nil | ||
| 101 | "Alist of defined XML parametric entities.") | ||
| 99 | 102 | ||
| 100 | (defvar xml-sub-parser nil | 103 | (defvar xml-sub-parser nil |
| 101 | "Dynamically set this to a non-nil value if you want to parse an XML fragment.") | 104 | "Non-nil when the XML parser is parsing an XML fragment.") |
| 102 | 105 | ||
| 103 | (defvar xml-validating-parser nil | 106 | (defvar xml-validating-parser nil |
| 104 | "Set to non-nil to get validity checking.") | 107 | "Set to non-nil to get validity checking.") |
| @@ -308,6 +311,9 @@ If PARSE-NS is non-nil, then QNAMES are expanded." | |||
| 308 | ;; specs DTRT. | 311 | ;; specs DTRT. |
| 309 | (with-syntax-table (standard-syntax-table) | 312 | (with-syntax-table (standard-syntax-table) |
| 310 | (let ((case-fold-search nil) ; XML is case-sensitive. | 313 | (let ((case-fold-search nil) ; XML is case-sensitive. |
| 314 | ;; Prevent entity definitions from changing the defaults | ||
| 315 | (xml-entity-alist xml-entity-alist) | ||
| 316 | (xml-parameter-entity-alist xml-parameter-entity-alist) | ||
| 311 | xml result dtd) | 317 | xml result dtd) |
| 312 | (save-excursion | 318 | (save-excursion |
| 313 | (if buffer | 319 | (if buffer |
| @@ -366,6 +372,9 @@ specify that the name shouldn't be given a namespace." | |||
| 366 | (defun xml-parse-fragment (&optional parse-dtd parse-ns) | 372 | (defun xml-parse-fragment (&optional parse-dtd parse-ns) |
| 367 | "Parse xml-like fragments." | 373 | "Parse xml-like fragments." |
| 368 | (let ((xml-sub-parser t) | 374 | (let ((xml-sub-parser t) |
| 375 | ;; Prevent entity definitions from changing the defaults | ||
| 376 | (xml-entity-alist xml-entity-alist) | ||
| 377 | (xml-parameter-entity-alist xml-parameter-entity-alist) | ||
| 369 | children) | 378 | children) |
| 370 | (while (not (eobp)) | 379 | (while (not (eobp)) |
| 371 | (let ((bit (xml-parse-tag | 380 | (let ((bit (xml-parse-tag |
| @@ -413,7 +422,7 @@ Returns one of: | |||
| 413 | (buffer-substring-no-properties pos (match-beginning 0)) | 422 | (buffer-substring-no-properties pos (match-beginning 0)) |
| 414 | (xml-parse-string)))) | 423 | (xml-parse-string)))) |
| 415 | ;; DTD for the document | 424 | ;; DTD for the document |
| 416 | ((looking-at "<!DOCTYPE") | 425 | ((looking-at "<!DOCTYPE[ \t\n\r]") |
| 417 | (let ((dtd (xml-parse-dtd parse-ns))) | 426 | (let ((dtd (xml-parse-dtd parse-ns))) |
| 418 | (skip-syntax-forward " ") | 427 | (skip-syntax-forward " ") |
| 419 | (if xml-validating-parser | 428 | (if xml-validating-parser |
| @@ -580,11 +589,11 @@ This follows the rule [28] in the XML specifications." | |||
| 580 | ;; Get the name of the document | 589 | ;; Get the name of the document |
| 581 | (looking-at xml-name-regexp) | 590 | (looking-at xml-name-regexp) |
| 582 | (let ((dtd (list (match-string-no-properties 0) 'dtd)) | 591 | (let ((dtd (list (match-string-no-properties 0) 'dtd)) |
| 583 | type element end-pos) | 592 | (xml-parameter-entity-alist xml-parameter-entity-alist)) |
| 584 | (goto-char (match-end 0)) | 593 | (goto-char (match-end 0)) |
| 585 | |||
| 586 | (skip-syntax-forward " ") | 594 | (skip-syntax-forward " ") |
| 587 | ;; XML [75] | 595 | |
| 596 | ;; External subset (XML [75]) | ||
| 588 | (cond ((looking-at "PUBLIC\\s-+") | 597 | (cond ((looking-at "PUBLIC\\s-+") |
| 589 | (goto-char (match-end 0)) | 598 | (goto-char (match-end 0)) |
| 590 | (unless (or (re-search-forward | 599 | (unless (or (re-search-forward |
| @@ -607,119 +616,137 @@ This follows the rule [28] in the XML specifications." | |||
| 607 | (error "XML: Missing System ID")) | 616 | (error "XML: Missing System ID")) |
| 608 | (push (list (match-string-no-properties 1) 'system) dtd))) | 617 | (push (list (match-string-no-properties 1) 'system) dtd))) |
| 609 | (skip-syntax-forward " ") | 618 | (skip-syntax-forward " ") |
| 610 | (if (eq ?> (char-after)) | 619 | |
| 611 | (forward-char) | 620 | (if (eq (char-after) ?>) |
| 612 | (if (not (eq (char-after) ?\[)) | 621 | |
| 613 | (error "XML: Bad DTD") | 622 | ;; No internal subset |
| 614 | (forward-char) | 623 | (forward-char) |
| 615 | ;; Parse the rest of the DTD | ||
| 616 | ;; Fixme: Deal with NOTATION, PIs. | ||
| 617 | (while (not (looking-at "\\s-*\\]")) | ||
| 618 | (skip-syntax-forward " ") | ||
| 619 | (cond | ||
| 620 | |||
| 621 | ;; Translation of rule [45] of XML specifications | ||
| 622 | ((looking-at | ||
| 623 | "<!ELEMENT\\s-+\\([[:alnum:].%;]+\\)\\s-+\\([^>]+\\)>") | ||
| 624 | |||
| 625 | (setq element (match-string-no-properties 1) | ||
| 626 | type (match-string-no-properties 2)) | ||
| 627 | (setq end-pos (match-end 0)) | ||
| 628 | 624 | ||
| 629 | ;; Translation of rule [46] of XML specifications | 625 | ;; Internal subset (XML [28b]) |
| 626 | (unless (eq (char-after) ?\[) | ||
| 627 | (error "XML: Bad DTD")) | ||
| 628 | (forward-char) | ||
| 629 | |||
| 630 | ;; Parse the rest of the DTD | ||
| 631 | ;; Fixme: Deal with NOTATION, PIs. | ||
| 632 | (while (not (looking-at "\\s-*\\]")) | ||
| 633 | (skip-syntax-forward " ") | ||
| 634 | (cond | ||
| 635 | ;; Element declaration [45]: | ||
| 636 | ((looking-at "<!ELEMENT\\s-+\\([[:alnum:].%;]+\\)\\s-+\\([^>]+\\)>") | ||
| 637 | (let ((element (match-string-no-properties 1)) | ||
| 638 | (type (match-string-no-properties 2)) | ||
| 639 | (end-pos (match-end 0))) | ||
| 640 | ;; Translation of rule [46] of XML specifications | ||
| 630 | (cond | 641 | (cond |
| 631 | ((string-match "^EMPTY[ \t\n\r]*$" type) ;; empty declaration | 642 | ((string-match "^EMPTY[ \t\n\r]*$" type) ; empty declaration |
| 632 | (setq type 'empty)) | 643 | (setq type 'empty)) |
| 633 | ((string-match "^ANY[ \t\n\r]*$" type) ;; any type of contents | 644 | ((string-match "^ANY[ \t\n\r]*$" type) ; any type of contents |
| 634 | (setq type 'any)) | 645 | (setq type 'any)) |
| 635 | ((string-match "^(\\(.*\\))[ \t\n\r]*$" type) ;; children ([47]) | 646 | ((string-match "^(\\(.*\\))[ \t\n\r]*$" type) ; children ([47]) |
| 636 | (setq type (xml-parse-elem-type (match-string-no-properties 1 type)))) | 647 | (setq type (xml-parse-elem-type (match-string-no-properties 1 type)))) |
| 637 | ((string-match "^%[^;]+;[ \t\n\r]*$" type) ;; substitution | 648 | ((string-match "^%[^;]+;[ \t\n\r]*$" type) ; substitution |
| 638 | nil) | 649 | nil) |
| 639 | (t | 650 | (xml-validating-parser |
| 640 | (if xml-validating-parser | 651 | (error "XML: (Validity) Invalid element type in the DTD"))) |
| 641 | (error "XML: (Validity) Invalid element type in the DTD")))) | ||
| 642 | 652 | ||
| 643 | ;; rule [45]: the element declaration must be unique | 653 | ;; rule [45]: the element declaration must be unique |
| 644 | (if (and (assoc element dtd) | 654 | (and (assoc element dtd) |
| 645 | xml-validating-parser) | 655 | xml-validating-parser |
| 646 | (error "XML: (Validity) Element declarations must be unique in a DTD (<%s>)" | 656 | (error "XML: (Validity) DTD element declarations must be unique (<%s>)" |
| 647 | element)) | 657 | element)) |
| 648 | 658 | ||
| 649 | ;; Store the element in the DTD | 659 | ;; Store the element in the DTD |
| 650 | (push (list element type) dtd) | 660 | (push (list element type) dtd) |
| 651 | (goto-char end-pos)) | 661 | (goto-char end-pos))) |
| 652 | 662 | ||
| 653 | ;; Translation of rule [52] of XML specifications | 663 | ;; Attribute-list declaration [52] (currently unsupported): |
| 654 | ((looking-at (concat "<!ATTLIST[ \t\n\r]*\\(" xml-name-re | 664 | ((looking-at (concat "<!ATTLIST[ \t\n\r]*\\(" xml-name-re |
| 655 | "\\)[ \t\n\r]*\\(" xml-att-def-re | 665 | "\\)[ \t\n\r]*\\(" xml-att-def-re |
| 656 | "\\)*[ \t\n\r]*>")) | 666 | "\\)*[ \t\n\r]*>")) |
| 657 | 667 | (goto-char (match-end 0))) | |
| 658 | ;; We don't do anything with ATTLIST currently | 668 | |
| 659 | (goto-char (match-end 0))) | 669 | ;; Comments (skip to end): |
| 660 | 670 | ((looking-at "<!--") | |
| 661 | ((looking-at "<!--") | 671 | (search-forward "-->")) |
| 662 | (search-forward "-->")) | 672 | |
| 663 | ((looking-at (concat "<!ENTITY[ \t\n\r]*\\(" xml-name-re | 673 | ;; Internal entity declarations: |
| 664 | "\\)[ \t\n\r]*\\(" xml-entity-value-re | 674 | ((looking-at (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\(" |
| 665 | "\\)[ \t\n\r]*>")) | 675 | xml-name-re "\\)[ \t\n\r]*\\(" |
| 666 | (let ((name (match-string-no-properties 1)) | 676 | xml-entity-value-re "\\)[ \t\n\r]*>")) |
| 667 | (value (substring (match-string-no-properties 2) 1 | 677 | (let* ((name (prog1 (match-string-no-properties 2) |
| 668 | (- (length (match-string-no-properties 2)) 1)))) | 678 | (goto-char (match-end 0)))) |
| 669 | (goto-char (match-end 0)) | 679 | (alist (if (match-string 1) |
| 670 | (setq xml-entity-alist | 680 | 'xml-parameter-entity-alist |
| 671 | (append xml-entity-alist | 681 | 'xml-entity-alist)) |
| 672 | (list (cons name | 682 | ;; Retrieve the deplacement text: |
| 673 | (with-temp-buffer | 683 | (value (xml--entity-replacement-text |
| 674 | (insert value) | 684 | ;; Entity value, sans quotation marks: |
| 675 | (goto-char (point-min)) | 685 | (substring (match-string-no-properties 3) 1 -1)))) |
| 676 | (xml-parse-fragment | 686 | ;; If the same entity is declared more than once, the |
| 677 | xml-validating-parser | 687 | ;; first declaration is binding. |
| 678 | parse-ns)))))))) | 688 | (unless (assoc name (symbol-value alist)) |
| 679 | ((or (looking-at (concat "<!ENTITY[ \t\n\r]+\\(" xml-name-re | 689 | (set alist (cons (cons name value) (symbol-value alist)))))) |
| 680 | "\\)[ \t\n\r]+SYSTEM[ \t\n\r]+" | 690 | |
| 681 | "\\(\"[^\"]*\"\\|'[^']*'\\)[ \t\n\r]*>")) | 691 | ;; External entity declarations (currently unsupported): |
| 682 | (looking-at (concat "<!ENTITY[ \t\n\r]+\\(" xml-name-re | 692 | ((or (looking-at (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\(" |
| 683 | "\\)[ \t\n\r]+PUBLIC[ \t\n\r]+" | 693 | xml-name-re "\\)[ \t\n\r]+SYSTEM[ \t\n\r]+" |
| 684 | "\"[- \r\na-zA-Z0-9'()+,./:=?;!*#@$_%]*\"" | 694 | "\\(\"[^\"]*\"\\|'[^']*'\\)[ \t\n\r]*>")) |
| 685 | "\\|'[- \r\na-zA-Z0-9()+,./:=?;!*#@$_%]*'" | 695 | (looking-at (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\(" |
| 686 | "[ \t\n\r]+\\(\"[^\"]*\"\\|'[^']*'\\)" | 696 | xml-name-re "\\)[ \t\n\r]+PUBLIC[ \t\n\r]+" |
| 687 | "[ \t\n\r]*>"))) | 697 | "\"[- \r\na-zA-Z0-9'()+,./:=?;!*#@$_%]*\"" |
| 688 | (let ((name (match-string-no-properties 1)) | 698 | "\\|'[- \r\na-zA-Z0-9()+,./:=?;!*#@$_%]*'" |
| 689 | (file (substring (match-string-no-properties 2) 1 | 699 | "[ \t\n\r]+\\(\"[^\"]*\"\\|'[^']*'\\)" |
| 690 | (- (length (match-string-no-properties 2)) 1)))) | 700 | "[ \t\n\r]*>"))) |
| 691 | (goto-char (match-end 0)) | 701 | (goto-char (match-end 0))) |
| 692 | (setq xml-entity-alist | 702 | |
| 693 | (append xml-entity-alist | 703 | ;; Parameter entity: |
| 694 | (list (cons name (with-temp-buffer | 704 | ((looking-at (concat "%\\(" xml-name-re "\\);")) |
| 695 | (insert-file-contents file) | 705 | (goto-char (match-end 0)) |
| 696 | (goto-char (point-min)) | 706 | (let* ((entity (match-string 1)) |
| 697 | (xml-parse-fragment | 707 | (end (point-marker)) |
| 698 | xml-validating-parser | 708 | (elt (assoc entity xml-parameter-entity-alist))) |
| 699 | parse-ns)))))))) | 709 | (when elt |
| 700 | ;; skip parameter entity declarations | 710 | (replace-match (cdr elt) t t) |
| 701 | ((or (looking-at (concat "<!ENTITY[ \t\n\r]+%[ \t\n\r]+\\(" xml-name-re | 711 | (goto-char end)))) |
| 702 | "\\)[ \t\n\r]+SYSTEM[ \t\n\r]+" | 712 | |
| 703 | "\\(\"[^\"]*\"\\|'[^']*'\\)[ \t\n\r]*>")) | 713 | ;; Anything else: |
| 704 | (looking-at (concat "<!ENTITY[ \t\n\r]+" | 714 | (xml-validating-parser |
| 705 | "%[ \t\n\r]+" | 715 | (error "XML: (Validity) Invalid DTD item")))) |
| 706 | "\\(" xml-name-re "\\)[ \t\n\r]+" | 716 | |
| 707 | "PUBLIC[ \t\n\r]+" | ||
| 708 | "\\(\"[- \r\na-zA-Z0-9'()+,./:=?;!*#@$_%]*\"" | ||
| 709 | "\\|'[- \r\na-zA-Z0-9()+,./:=?;!*#@$_%]*'\\)[ \t\n\r]+" | ||
| 710 | "\\(\"[^\"]+\"\\|'[^']+'\\)" | ||
| 711 | "[ \t\n\r]*>"))) | ||
| 712 | (goto-char (match-end 0))) | ||
| 713 | ;; skip parameter entities | ||
| 714 | ((looking-at (concat "%" xml-name-re ";")) | ||
| 715 | (goto-char (match-end 0))) | ||
| 716 | (t | ||
| 717 | (when xml-validating-parser | ||
| 718 | (error "XML: (Validity) Invalid DTD item")))))) | ||
| 719 | (if (looking-at "\\s-*]>") | 717 | (if (looking-at "\\s-*]>") |
| 720 | (goto-char (match-end 0)))) | 718 | (goto-char (match-end 0)))) |
| 721 | (nreverse dtd))) | 719 | (nreverse dtd))) |
| 722 | 720 | ||
| 721 | (defun xml--entity-replacement-text (string) | ||
| 722 | "Return the replacement text for the entity value STRING. | ||
| 723 | The replacement text is obtained by replacing character | ||
| 724 | references and parameter-entity references." | ||
| 725 | (let ((ref-re (eval-when-compile | ||
| 726 | (concat "\\(?:&#\\([0-9]+\\)\\|&#x\\([0-9a-fA-F]+\\)\\|%\\(" | ||
| 727 | xml-name-re "\\)\\);"))) | ||
| 728 | children) | ||
| 729 | (while (string-match ref-re string) | ||
| 730 | (push (substring string 0 (match-beginning 0)) children) | ||
| 731 | (let ((remainder (substring string (match-end 0))) | ||
| 732 | ref val) | ||
| 733 | (cond ((setq ref (match-string 1 string)) | ||
| 734 | ;; Decimal character reference | ||
| 735 | (setq val (decode-char 'ucs (string-to-number ref))) | ||
| 736 | (if val (push (string val) children))) | ||
| 737 | ;; Hexadecimal character reference | ||
| 738 | ((setq ref (match-string 2 string)) | ||
| 739 | (setq val (decode-char 'ucs (string-to-number ref 16))) | ||
| 740 | (if val (push (string val) children))) | ||
| 741 | ;; Parameter entity reference | ||
| 742 | ((setq ref (match-string 3 string)) | ||
| 743 | (setq val (assoc ref xml-parameter-entity-alist)) | ||
| 744 | (if val | ||
| 745 | (push (cdr val) children) | ||
| 746 | (push (concat "%" ref ";") children)))) | ||
| 747 | (setq string remainder))) | ||
| 748 | (mapconcat 'identity (nreverse (cons string children)) ""))) | ||
| 749 | |||
| 723 | (defun xml-parse-elem-type (string) | 750 | (defun xml-parse-elem-type (string) |
| 724 | "Convert element type STRING into a Lisp structure." | 751 | "Convert element type STRING into a Lisp structure." |
| 725 | 752 | ||
| @@ -864,15 +891,12 @@ The first line is indented with the optional INDENT-STRING." | |||
| 864 | (defalias 'xml-print 'xml-debug-print) | 891 | (defalias 'xml-print 'xml-debug-print) |
| 865 | 892 | ||
| 866 | (defun xml-escape-string (string) | 893 | (defun xml-escape-string (string) |
| 867 | "Return the string with entity substitutions made from | 894 | "Return STRING with entity substitutions made from `xml-entity-alist'." |
| 868 | xml-entity-alist." | ||
| 869 | (mapconcat (lambda (byte) | 895 | (mapconcat (lambda (byte) |
| 870 | (let ((char (char-to-string byte))) | 896 | (let ((char (char-to-string byte))) |
| 871 | (if (rassoc char xml-entity-alist) | 897 | (if (rassoc char xml-entity-alist) |
| 872 | (concat "&" (car (rassoc char xml-entity-alist)) ";") | 898 | (concat "&" (car (rassoc char xml-entity-alist)) ";") |
| 873 | char))) | 899 | char))) |
| 874 | ;; This differs from the non-unicode branch. Just | ||
| 875 | ;; grabbing the string works here. | ||
| 876 | string "")) | 900 | string "")) |
| 877 | 901 | ||
| 878 | (defun xml-debug-print-internal (xml indent-string) | 902 | (defun xml-debug-print-internal (xml indent-string) |