aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChong Yidong2012-06-30 19:33:22 +0800
committerChong Yidong2012-06-30 19:33:22 +0800
commit7f3fbd5d73bff96d42ef087ec87b662005242842 (patch)
tree7eb23afaced6e84f54696528db6df5d40d4e4f13
parentc5e4379cef494adeed3ce4ba3c610819c088cda8 (diff)
downloademacs-7f3fbd5d73bff96d42ef087ec87b662005242842.tar.gz
emacs-7f3fbd5d73bff96d42ef087ec87b662005242842.zip
* xml.el: Implement XML parameter entities.
(xml-parameter-entity-alist): New variable. (xml-parse-region, xml-parse-fragment): Preserve previous values of xml-entity-alist and xml-parameter-entity-alist, so that repeated calls on different documents do not change them. (xml-parse-tag): Fix doctype regexp. (xml--entity-replacement-text): New function. (xml-parse-dtd): Use it. Don't handle system entities; doing that properly requires url retrieval which is unimplemented. (xml-escape-string): Doc fix.
-rw-r--r--lisp/ChangeLog13
-rw-r--r--lisp/xml.el240
2 files changed, 145 insertions, 108 deletions
diff --git a/lisp/ChangeLog b/lisp/ChangeLog
index f2fa5a37ac7..dddfce0414c 100644
--- a/lisp/ChangeLog
+++ b/lisp/ChangeLog
@@ -1,3 +1,16 @@
12012-06-30 Chong Yidong <cyd@gnu.org>
2
3 * xml.el: Implement XML parameter entities.
4 (xml-parameter-entity-alist): New variable.
5 (xml-parse-region, xml-parse-fragment): Preserve previous values
6 of xml-entity-alist and xml-parameter-entity-alist, so that
7 repeated calls on different documents do not change them.
8 (xml-parse-tag): Fix doctype regexp.
9 (xml--entity-replacement-text): New function.
10 (xml-parse-dtd): Use it. Don't handle system entities; doing that
11 properly requires url retrieval which is unimplemented.
12 (xml-escape-string): Doc fix.
13
12012-06-30 Stefan Monnier <monnier@iro.umontreal.ca> 142012-06-30 Stefan Monnier <monnier@iro.umontreal.ca>
2 15
3 * emacs-lisp/cl-lib.el (cl-pushnew): Use macroexp-let2. 16 * emacs-lisp/cl-lib.el (cl-pushnew): Use macroexp-let2.
diff --git a/lisp/xml.el b/lisp/xml.el
index d1e824c4ece..f135bdfabe4 100644
--- a/lisp/xml.el
+++ b/lisp/xml.el
@@ -95,10 +95,13 @@
95 ("apos" . "'") 95 ("apos" . "'")
96 ("quot" . "\"") 96 ("quot" . "\"")
97 ("amp" . "&")) 97 ("amp" . "&"))
98 "The defined entities. Entities are added to this when the DTD is parsed.") 98 "Alist of defined XML entities.")
99
100(defvar xml-parameter-entity-alist nil
101 "Alist of defined XML parametric entities.")
99 102
100(defvar xml-sub-parser nil 103(defvar xml-sub-parser nil
101 "Dynamically set this to a non-nil value if you want to parse an XML fragment.") 104 "Non-nil when the XML parser is parsing an XML fragment.")
102 105
103(defvar xml-validating-parser nil 106(defvar xml-validating-parser nil
104 "Set to non-nil to get validity checking.") 107 "Set to non-nil to get validity checking.")
@@ -308,6 +311,9 @@ If PARSE-NS is non-nil, then QNAMES are expanded."
308 ;; specs DTRT. 311 ;; specs DTRT.
309 (with-syntax-table (standard-syntax-table) 312 (with-syntax-table (standard-syntax-table)
310 (let ((case-fold-search nil) ; XML is case-sensitive. 313 (let ((case-fold-search nil) ; XML is case-sensitive.
314 ;; Prevent entity definitions from changing the defaults
315 (xml-entity-alist xml-entity-alist)
316 (xml-parameter-entity-alist xml-parameter-entity-alist)
311 xml result dtd) 317 xml result dtd)
312 (save-excursion 318 (save-excursion
313 (if buffer 319 (if buffer
@@ -366,6 +372,9 @@ specify that the name shouldn't be given a namespace."
366(defun xml-parse-fragment (&optional parse-dtd parse-ns) 372(defun xml-parse-fragment (&optional parse-dtd parse-ns)
367 "Parse xml-like fragments." 373 "Parse xml-like fragments."
368 (let ((xml-sub-parser t) 374 (let ((xml-sub-parser t)
375 ;; Prevent entity definitions from changing the defaults
376 (xml-entity-alist xml-entity-alist)
377 (xml-parameter-entity-alist xml-parameter-entity-alist)
369 children) 378 children)
370 (while (not (eobp)) 379 (while (not (eobp))
371 (let ((bit (xml-parse-tag 380 (let ((bit (xml-parse-tag
@@ -413,7 +422,7 @@ Returns one of:
413 (buffer-substring-no-properties pos (match-beginning 0)) 422 (buffer-substring-no-properties pos (match-beginning 0))
414 (xml-parse-string)))) 423 (xml-parse-string))))
415 ;; DTD for the document 424 ;; DTD for the document
416 ((looking-at "<!DOCTYPE") 425 ((looking-at "<!DOCTYPE[ \t\n\r]")
417 (let ((dtd (xml-parse-dtd parse-ns))) 426 (let ((dtd (xml-parse-dtd parse-ns)))
418 (skip-syntax-forward " ") 427 (skip-syntax-forward " ")
419 (if xml-validating-parser 428 (if xml-validating-parser
@@ -580,11 +589,11 @@ This follows the rule [28] in the XML specifications."
580 ;; Get the name of the document 589 ;; Get the name of the document
581 (looking-at xml-name-regexp) 590 (looking-at xml-name-regexp)
582 (let ((dtd (list (match-string-no-properties 0) 'dtd)) 591 (let ((dtd (list (match-string-no-properties 0) 'dtd))
583 type element end-pos) 592 (xml-parameter-entity-alist xml-parameter-entity-alist))
584 (goto-char (match-end 0)) 593 (goto-char (match-end 0))
585
586 (skip-syntax-forward " ") 594 (skip-syntax-forward " ")
587 ;; XML [75] 595
596 ;; External subset (XML [75])
588 (cond ((looking-at "PUBLIC\\s-+") 597 (cond ((looking-at "PUBLIC\\s-+")
589 (goto-char (match-end 0)) 598 (goto-char (match-end 0))
590 (unless (or (re-search-forward 599 (unless (or (re-search-forward
@@ -607,119 +616,137 @@ This follows the rule [28] in the XML specifications."
607 (error "XML: Missing System ID")) 616 (error "XML: Missing System ID"))
608 (push (list (match-string-no-properties 1) 'system) dtd))) 617 (push (list (match-string-no-properties 1) 'system) dtd)))
609 (skip-syntax-forward " ") 618 (skip-syntax-forward " ")
610 (if (eq ?> (char-after)) 619
611 (forward-char) 620 (if (eq (char-after) ?>)
612 (if (not (eq (char-after) ?\[)) 621
613 (error "XML: Bad DTD") 622 ;; No internal subset
614 (forward-char) 623 (forward-char)
615 ;; Parse the rest of the DTD
616 ;; Fixme: Deal with NOTATION, PIs.
617 (while (not (looking-at "\\s-*\\]"))
618 (skip-syntax-forward " ")
619 (cond
620
621 ;; Translation of rule [45] of XML specifications
622 ((looking-at
623 "<!ELEMENT\\s-+\\([[:alnum:].%;]+\\)\\s-+\\([^>]+\\)>")
624
625 (setq element (match-string-no-properties 1)
626 type (match-string-no-properties 2))
627 (setq end-pos (match-end 0))
628 624
629 ;; Translation of rule [46] of XML specifications 625 ;; Internal subset (XML [28b])
626 (unless (eq (char-after) ?\[)
627 (error "XML: Bad DTD"))
628 (forward-char)
629
630 ;; Parse the rest of the DTD
631 ;; Fixme: Deal with NOTATION, PIs.
632 (while (not (looking-at "\\s-*\\]"))
633 (skip-syntax-forward " ")
634 (cond
635 ;; Element declaration [45]:
636 ((looking-at "<!ELEMENT\\s-+\\([[:alnum:].%;]+\\)\\s-+\\([^>]+\\)>")
637 (let ((element (match-string-no-properties 1))
638 (type (match-string-no-properties 2))
639 (end-pos (match-end 0)))
640 ;; Translation of rule [46] of XML specifications
630 (cond 641 (cond
631 ((string-match "^EMPTY[ \t\n\r]*$" type) ;; empty declaration 642 ((string-match "^EMPTY[ \t\n\r]*$" type) ; empty declaration
632 (setq type 'empty)) 643 (setq type 'empty))
633 ((string-match "^ANY[ \t\n\r]*$" type) ;; any type of contents 644 ((string-match "^ANY[ \t\n\r]*$" type) ; any type of contents
634 (setq type 'any)) 645 (setq type 'any))
635 ((string-match "^(\\(.*\\))[ \t\n\r]*$" type) ;; children ([47]) 646 ((string-match "^(\\(.*\\))[ \t\n\r]*$" type) ; children ([47])
636 (setq type (xml-parse-elem-type (match-string-no-properties 1 type)))) 647 (setq type (xml-parse-elem-type (match-string-no-properties 1 type))))
637 ((string-match "^%[^;]+;[ \t\n\r]*$" type) ;; substitution 648 ((string-match "^%[^;]+;[ \t\n\r]*$" type) ; substitution
638 nil) 649 nil)
639 (t 650 (xml-validating-parser
640 (if xml-validating-parser 651 (error "XML: (Validity) Invalid element type in the DTD")))
641 (error "XML: (Validity) Invalid element type in the DTD"))))
642 652
643 ;; rule [45]: the element declaration must be unique 653 ;; rule [45]: the element declaration must be unique
644 (if (and (assoc element dtd) 654 (and (assoc element dtd)
645 xml-validating-parser) 655 xml-validating-parser
646 (error "XML: (Validity) Element declarations must be unique in a DTD (<%s>)" 656 (error "XML: (Validity) DTD element declarations must be unique (<%s>)"
647 element)) 657 element))
648 658
649 ;; Store the element in the DTD 659 ;; Store the element in the DTD
650 (push (list element type) dtd) 660 (push (list element type) dtd)
651 (goto-char end-pos)) 661 (goto-char end-pos)))
652 662
653 ;; Translation of rule [52] of XML specifications 663 ;; Attribute-list declaration [52] (currently unsupported):
654 ((looking-at (concat "<!ATTLIST[ \t\n\r]*\\(" xml-name-re 664 ((looking-at (concat "<!ATTLIST[ \t\n\r]*\\(" xml-name-re
655 "\\)[ \t\n\r]*\\(" xml-att-def-re 665 "\\)[ \t\n\r]*\\(" xml-att-def-re
656 "\\)*[ \t\n\r]*>")) 666 "\\)*[ \t\n\r]*>"))
657 667 (goto-char (match-end 0)))
658 ;; We don't do anything with ATTLIST currently 668
659 (goto-char (match-end 0))) 669 ;; Comments (skip to end):
660 670 ((looking-at "<!--")
661 ((looking-at "<!--") 671 (search-forward "-->"))
662 (search-forward "-->")) 672
663 ((looking-at (concat "<!ENTITY[ \t\n\r]*\\(" xml-name-re 673 ;; Internal entity declarations:
664 "\\)[ \t\n\r]*\\(" xml-entity-value-re 674 ((looking-at (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\("
665 "\\)[ \t\n\r]*>")) 675 xml-name-re "\\)[ \t\n\r]*\\("
666 (let ((name (match-string-no-properties 1)) 676 xml-entity-value-re "\\)[ \t\n\r]*>"))
667 (value (substring (match-string-no-properties 2) 1 677 (let* ((name (prog1 (match-string-no-properties 2)
668 (- (length (match-string-no-properties 2)) 1)))) 678 (goto-char (match-end 0))))
669 (goto-char (match-end 0)) 679 (alist (if (match-string 1)
670 (setq xml-entity-alist 680 'xml-parameter-entity-alist
671 (append xml-entity-alist 681 'xml-entity-alist))
672 (list (cons name 682 ;; Retrieve the deplacement text:
673 (with-temp-buffer 683 (value (xml--entity-replacement-text
674 (insert value) 684 ;; Entity value, sans quotation marks:
675 (goto-char (point-min)) 685 (substring (match-string-no-properties 3) 1 -1))))
676 (xml-parse-fragment 686 ;; If the same entity is declared more than once, the
677 xml-validating-parser 687 ;; first declaration is binding.
678 parse-ns)))))))) 688 (unless (assoc name (symbol-value alist))
679 ((or (looking-at (concat "<!ENTITY[ \t\n\r]+\\(" xml-name-re 689 (set alist (cons (cons name value) (symbol-value alist))))))
680 "\\)[ \t\n\r]+SYSTEM[ \t\n\r]+" 690
681 "\\(\"[^\"]*\"\\|'[^']*'\\)[ \t\n\r]*>")) 691 ;; External entity declarations (currently unsupported):
682 (looking-at (concat "<!ENTITY[ \t\n\r]+\\(" xml-name-re 692 ((or (looking-at (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\("
683 "\\)[ \t\n\r]+PUBLIC[ \t\n\r]+" 693 xml-name-re "\\)[ \t\n\r]+SYSTEM[ \t\n\r]+"
684 "\"[- \r\na-zA-Z0-9'()+,./:=?;!*#@$_%]*\"" 694 "\\(\"[^\"]*\"\\|'[^']*'\\)[ \t\n\r]*>"))
685 "\\|'[- \r\na-zA-Z0-9()+,./:=?;!*#@$_%]*'" 695 (looking-at (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\("
686 "[ \t\n\r]+\\(\"[^\"]*\"\\|'[^']*'\\)" 696 xml-name-re "\\)[ \t\n\r]+PUBLIC[ \t\n\r]+"
687 "[ \t\n\r]*>"))) 697 "\"[- \r\na-zA-Z0-9'()+,./:=?;!*#@$_%]*\""
688 (let ((name (match-string-no-properties 1)) 698 "\\|'[- \r\na-zA-Z0-9()+,./:=?;!*#@$_%]*'"
689 (file (substring (match-string-no-properties 2) 1 699 "[ \t\n\r]+\\(\"[^\"]*\"\\|'[^']*'\\)"
690 (- (length (match-string-no-properties 2)) 1)))) 700 "[ \t\n\r]*>")))
691 (goto-char (match-end 0)) 701 (goto-char (match-end 0)))
692 (setq xml-entity-alist 702
693 (append xml-entity-alist 703 ;; Parameter entity:
694 (list (cons name (with-temp-buffer 704 ((looking-at (concat "%\\(" xml-name-re "\\);"))
695 (insert-file-contents file) 705 (goto-char (match-end 0))
696 (goto-char (point-min)) 706 (let* ((entity (match-string 1))
697 (xml-parse-fragment 707 (end (point-marker))
698 xml-validating-parser 708 (elt (assoc entity xml-parameter-entity-alist)))
699 parse-ns)))))))) 709 (when elt
700 ;; skip parameter entity declarations 710 (replace-match (cdr elt) t t)
701 ((or (looking-at (concat "<!ENTITY[ \t\n\r]+%[ \t\n\r]+\\(" xml-name-re 711 (goto-char end))))
702 "\\)[ \t\n\r]+SYSTEM[ \t\n\r]+" 712
703 "\\(\"[^\"]*\"\\|'[^']*'\\)[ \t\n\r]*>")) 713 ;; Anything else:
704 (looking-at (concat "<!ENTITY[ \t\n\r]+" 714 (xml-validating-parser
705 "%[ \t\n\r]+" 715 (error "XML: (Validity) Invalid DTD item"))))
706 "\\(" xml-name-re "\\)[ \t\n\r]+" 716
707 "PUBLIC[ \t\n\r]+"
708 "\\(\"[- \r\na-zA-Z0-9'()+,./:=?;!*#@$_%]*\""
709 "\\|'[- \r\na-zA-Z0-9()+,./:=?;!*#@$_%]*'\\)[ \t\n\r]+"
710 "\\(\"[^\"]+\"\\|'[^']+'\\)"
711 "[ \t\n\r]*>")))
712 (goto-char (match-end 0)))
713 ;; skip parameter entities
714 ((looking-at (concat "%" xml-name-re ";"))
715 (goto-char (match-end 0)))
716 (t
717 (when xml-validating-parser
718 (error "XML: (Validity) Invalid DTD item"))))))
719 (if (looking-at "\\s-*]>") 717 (if (looking-at "\\s-*]>")
720 (goto-char (match-end 0)))) 718 (goto-char (match-end 0))))
721 (nreverse dtd))) 719 (nreverse dtd)))
722 720
721(defun xml--entity-replacement-text (string)
722 "Return the replacement text for the entity value STRING.
723The replacement text is obtained by replacing character
724references and parameter-entity references."
725 (let ((ref-re (eval-when-compile
726 (concat "\\(?:&#\\([0-9]+\\)\\|&#x\\([0-9a-fA-F]+\\)\\|%\\("
727 xml-name-re "\\)\\);")))
728 children)
729 (while (string-match ref-re string)
730 (push (substring string 0 (match-beginning 0)) children)
731 (let ((remainder (substring string (match-end 0)))
732 ref val)
733 (cond ((setq ref (match-string 1 string))
734 ;; Decimal character reference
735 (setq val (decode-char 'ucs (string-to-number ref)))
736 (if val (push (string val) children)))
737 ;; Hexadecimal character reference
738 ((setq ref (match-string 2 string))
739 (setq val (decode-char 'ucs (string-to-number ref 16)))
740 (if val (push (string val) children)))
741 ;; Parameter entity reference
742 ((setq ref (match-string 3 string))
743 (setq val (assoc ref xml-parameter-entity-alist))
744 (if val
745 (push (cdr val) children)
746 (push (concat "%" ref ";") children))))
747 (setq string remainder)))
748 (mapconcat 'identity (nreverse (cons string children)) "")))
749
723(defun xml-parse-elem-type (string) 750(defun xml-parse-elem-type (string)
724 "Convert element type STRING into a Lisp structure." 751 "Convert element type STRING into a Lisp structure."
725 752
@@ -864,15 +891,12 @@ The first line is indented with the optional INDENT-STRING."
864(defalias 'xml-print 'xml-debug-print) 891(defalias 'xml-print 'xml-debug-print)
865 892
866(defun xml-escape-string (string) 893(defun xml-escape-string (string)
867 "Return the string with entity substitutions made from 894 "Return STRING with entity substitutions made from `xml-entity-alist'."
868xml-entity-alist."
869 (mapconcat (lambda (byte) 895 (mapconcat (lambda (byte)
870 (let ((char (char-to-string byte))) 896 (let ((char (char-to-string byte)))
871 (if (rassoc char xml-entity-alist) 897 (if (rassoc char xml-entity-alist)
872 (concat "&" (car (rassoc char xml-entity-alist)) ";") 898 (concat "&" (car (rassoc char xml-entity-alist)) ";")
873 char))) 899 char)))
874 ;; This differs from the non-unicode branch. Just
875 ;; grabbing the string works here.
876 string "")) 900 string ""))
877 901
878(defun xml-debug-print-internal (xml indent-string) 902(defun xml-debug-print-internal (xml indent-string)