aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChong Yidong2007-03-17 18:55:52 +0000
committerChong Yidong2007-03-17 18:55:52 +0000
commitf6fcdfff176e2acb3fb5e3c6847fb5664ef01035 (patch)
treee9c4e009ec54d345f11d74da308607f3bfcf6345
parent19f512103c1dd481aaceaf2d7071d4ba0c246729 (diff)
downloademacs-f6fcdfff176e2acb3fb5e3c6847fb5664ef01035.tar.gz
emacs-f6fcdfff176e2acb3fb5e3c6847fb5664ef01035.zip
* xml.el (xml-parse-tag, xml-parse-string, xml-parse-attlist)
(xml-parse-dtd, xml-parse-elem-type, xml-substitute-special): Return to use of the -no-properties variants. There was consensus on emacs-devel that the speed of these variants was prefered since we are usually parsing files (from the internet or on disk) instead of XML created in Emacs.
-rw-r--r--lisp/xml.el58
1 files changed, 31 insertions, 27 deletions
diff --git a/lisp/xml.el b/lisp/xml.el
index 5b694d50f12..f7045779ca4 100644
--- a/lisp/xml.el
+++ b/lisp/xml.el
@@ -76,8 +76,12 @@
76 76
77;;; Code: 77;;; Code:
78 78
79;; Note that {buffer-substring,match-string}-no-properties were 79;; Note that buffer-substring and match-string were formerly used in
80;; formerly used in several places, but that removes composition info. 80;; several places, because the -no-properties variants remove
81;; composition info. However, after some discussion on emacs-devel,
82;; the consensus was that the speed of the -no-properties variants was
83;; a worthwhile tradeoff especially since we're usually parsing files
84;; instead of hand-crafted XML.
81 85
82;;******************************************************************* 86;;*******************************************************************
83;;** 87;;**
@@ -406,7 +410,7 @@ Returns one of:
406 (unless (search-forward "]]>" nil t) 410 (unless (search-forward "]]>" nil t)
407 (error "XML: (Not Well Formed) CDATA section does not end anywhere in the document")) 411 (error "XML: (Not Well Formed) CDATA section does not end anywhere in the document"))
408 (concat 412 (concat
409 (buffer-substring pos (match-beginning 0)) 413 (buffer-substring-no-properties pos (match-beginning 0))
410 (xml-parse-string)))) 414 (xml-parse-string))))
411 ;; DTD for the document 415 ;; DTD for the document
412 ((looking-at "<!DOCTYPE") 416 ((looking-at "<!DOCTYPE")
@@ -427,7 +431,7 @@ Returns one of:
427 (goto-char (match-end 1)) 431 (goto-char (match-end 1))
428 432
429 ;; Parse this node 433 ;; Parse this node
430 (let* ((node-name (match-string 1)) 434 (let* ((node-name (match-string-no-properties 1))
431 ;; Parse the attribute list. 435 ;; Parse the attribute list.
432 (attrs (xml-parse-attlist xml-ns)) 436 (attrs (xml-parse-attlist xml-ns))
433 children pos) 437 children pos)
@@ -480,7 +484,7 @@ Returns one of:
480 (nreverse children))) 484 (nreverse children)))
481 ;; This was an invalid start tag (Expected ">", but didn't see it.) 485 ;; This was an invalid start tag (Expected ">", but didn't see it.)
482 (error "XML: (Well-Formed) Couldn't parse tag: %s" 486 (error "XML: (Well-Formed) Couldn't parse tag: %s"
483 (buffer-substring (- (point) 10) (+ (point) 1))))))) 487 (buffer-substring-no-properties (- (point) 10) (+ (point) 1)))))))
484 (t ;; (Not one of PI, CDATA, Comment, End tag, or Start tag) 488 (t ;; (Not one of PI, CDATA, Comment, End tag, or Start tag)
485 (unless xml-sub-parser ; Usually, we error out. 489 (unless xml-sub-parser ; Usually, we error out.
486 (error "XML: (Well-Formed) Invalid character")) 490 (error "XML: (Well-Formed) Invalid character"))
@@ -495,7 +499,7 @@ Returns one of:
495 (string (progn (if (search-forward "<" nil t) 499 (string (progn (if (search-forward "<" nil t)
496 (forward-char -1) 500 (forward-char -1)
497 (goto-char (point-max))) 501 (goto-char (point-max)))
498 (buffer-substring pos (point))))) 502 (buffer-substring-no-properties pos (point)))))
499 ;; Clean up the string. As per XML specifications, the XML 503 ;; Clean up the string. As per XML specifications, the XML
500 ;; processor should always pass the whole string to the 504 ;; processor should always pass the whole string to the
501 ;; application. But \r's should be replaced: 505 ;; application. But \r's should be replaced:
@@ -516,7 +520,7 @@ Leave point at the first non-blank character after the tag."
516 (while (looking-at (eval-when-compile 520 (while (looking-at (eval-when-compile
517 (concat "\\(" xml-name-regexp "\\)\\s-*=\\s-*"))) 521 (concat "\\(" xml-name-regexp "\\)\\s-*=\\s-*")))
518 (setq end-pos (match-end 0)) 522 (setq end-pos (match-end 0))
519 (setq name (xml-maybe-do-ns (match-string 1) nil xml-ns)) 523 (setq name (xml-maybe-do-ns (match-string-no-properties 1) nil xml-ns))
520 (goto-char end-pos) 524 (goto-char end-pos)
521 525
522 ;; See also: http://www.w3.org/TR/2000/REC-xml-20001006#AVNormalize 526 ;; See also: http://www.w3.org/TR/2000/REC-xml-20001006#AVNormalize
@@ -535,7 +539,7 @@ Leave point at the first non-blank character after the tag."
535 539
536 ;; Multiple whitespace characters should be replaced with a single one 540 ;; Multiple whitespace characters should be replaced with a single one
537 ;; in the attributes 541 ;; in the attributes
538 (let ((string (match-string 1)) 542 (let ((string (match-string-no-properties 1))
539 (pos 0)) 543 (pos 0))
540 (replace-regexp-in-string "\\s-\\{2,\\}" " " string) 544 (replace-regexp-in-string "\\s-\\{2,\\}" " " string)
541 (let ((expansion (xml-substitute-special string))) 545 (let ((expansion (xml-substitute-special string)))
@@ -575,7 +579,7 @@ This follows the rule [28] in the XML specifications."
575 579
576 ;; Get the name of the document 580 ;; Get the name of the document
577 (looking-at xml-name-regexp) 581 (looking-at xml-name-regexp)
578 (let ((dtd (list (match-string 0) 'dtd)) 582 (let ((dtd (list (match-string-no-properties 0) 'dtd))
579 type element end-pos) 583 type element end-pos)
580 (goto-char (match-end 0)) 584 (goto-char (match-end 0))
581 585
@@ -590,18 +594,18 @@ This follows the rule [28] in the XML specifications."
590 "\\='\\([[:space:][:alnum:]-()+,./:=?;!*#@$_%]*\\)'" 594 "\\='\\([[:space:][:alnum:]-()+,./:=?;!*#@$_%]*\\)'"
591 nil t)) 595 nil t))
592 (error "XML: Missing Public ID")) 596 (error "XML: Missing Public ID"))
593 (let ((pubid (match-string 1))) 597 (let ((pubid (match-string-no-properties 1)))
594 (skip-syntax-forward " ") 598 (skip-syntax-forward " ")
595 (unless (or (re-search-forward "\\='\\([^']*\\)'" nil t) 599 (unless (or (re-search-forward "\\='\\([^']*\\)'" nil t)
596 (re-search-forward "\\=\"\\([^\"]*\\)\"" nil t)) 600 (re-search-forward "\\=\"\\([^\"]*\\)\"" nil t))
597 (error "XML: Missing System ID")) 601 (error "XML: Missing System ID"))
598 (push (list pubid (match-string 1) 'public) dtd))) 602 (push (list pubid (match-string-no-properties 1) 'public) dtd)))
599 ((looking-at "SYSTEM\\s-+") 603 ((looking-at "SYSTEM\\s-+")
600 (goto-char (match-end 0)) 604 (goto-char (match-end 0))
601 (unless (or (re-search-forward "\\='\\([^']*\\)'" nil t) 605 (unless (or (re-search-forward "\\='\\([^']*\\)'" nil t)
602 (re-search-forward "\\=\"\\([^\"]*\\)\"" nil t)) 606 (re-search-forward "\\=\"\\([^\"]*\\)\"" nil t))
603 (error "XML: Missing System ID")) 607 (error "XML: Missing System ID"))
604 (push (list (match-string 1) 'system) dtd))) 608 (push (list (match-string-no-properties 1) 'system) dtd)))
605 (skip-syntax-forward " ") 609 (skip-syntax-forward " ")
606 (if (eq ?> (char-after)) 610 (if (eq ?> (char-after))
607 (forward-char) 611 (forward-char)
@@ -618,7 +622,7 @@ This follows the rule [28] in the XML specifications."
618 ((looking-at 622 ((looking-at
619 "<!ELEMENT\\s-+\\([[:alnum:].%;]+\\)\\s-+\\([^>]+\\)>") 623 "<!ELEMENT\\s-+\\([[:alnum:].%;]+\\)\\s-+\\([^>]+\\)>")
620 624
621 (setq element (match-string 1) 625 (setq element (match-string-no-properties 1)
622 type (match-string-no-properties 2)) 626 type (match-string-no-properties 2))
623 (setq end-pos (match-end 0)) 627 (setq end-pos (match-end 0))
624 628
@@ -629,7 +633,7 @@ This follows the rule [28] in the XML specifications."
629 ((string-match "^ANY[ \t\n\r]*$" type) ;; any type of contents 633 ((string-match "^ANY[ \t\n\r]*$" type) ;; any type of contents
630 (setq type 'any)) 634 (setq type 'any))
631 ((string-match "^(\\(.*\\))[ \t\n\r]*$" type) ;; children ([47]) 635 ((string-match "^(\\(.*\\))[ \t\n\r]*$" type) ;; children ([47])
632 (setq type (xml-parse-elem-type (match-string 1 type)))) 636 (setq type (xml-parse-elem-type (match-string-no-properties 1 type))))
633 ((string-match "^%[^;]+;[ \t\n\r]*$" type) ;; substitution 637 ((string-match "^%[^;]+;[ \t\n\r]*$" type) ;; substitution
634 nil) 638 nil)
635 (t 639 (t
@@ -659,9 +663,9 @@ This follows the rule [28] in the XML specifications."
659 ((looking-at (concat "<!ENTITY[ \t\n\r]*\\(" xml-name-re 663 ((looking-at (concat "<!ENTITY[ \t\n\r]*\\(" xml-name-re
660 "\\)[ \t\n\r]*\\(" xml-entity-value-re 664 "\\)[ \t\n\r]*\\(" xml-entity-value-re
661 "\\)[ \t\n\r]*>")) 665 "\\)[ \t\n\r]*>"))
662 (let ((name (match-string 1)) 666 (let ((name (match-string-no-properties 1))
663 (value (substring (match-string 2) 1 667 (value (substring (match-string-no-properties 2) 1
664 (- (length (match-string 2)) 1)))) 668 (- (length (match-string-no-properties 2)) 1))))
665 (goto-char (match-end 0)) 669 (goto-char (match-end 0))
666 (setq xml-entity-alist 670 (setq xml-entity-alist
667 (append xml-entity-alist 671 (append xml-entity-alist
@@ -681,9 +685,9 @@ This follows the rule [28] in the XML specifications."
681 "\\|'[- \r\na-zA-Z0-9()+,./:=?;!*#@$_%]*'" 685 "\\|'[- \r\na-zA-Z0-9()+,./:=?;!*#@$_%]*'"
682 "[ \t\n\r]+\\(\"[^\"]*\"\\|'[^']*'\\)" 686 "[ \t\n\r]+\\(\"[^\"]*\"\\|'[^']*'\\)"
683 "[ \t\n\r]*>"))) 687 "[ \t\n\r]*>")))
684 (let ((name (match-string 1)) 688 (let ((name (match-string-no-properties 1))
685 (file (substring (match-string 2) 1 689 (file (substring (match-string-no-properties 2) 1
686 (- (length (match-string 2)) 1)))) 690 (- (length (match-string-no-properties 2)) 1))))
687 (goto-char (match-end 0)) 691 (goto-char (match-end 0))
688 (setq xml-entity-alist 692 (setq xml-entity-alist
689 (append xml-entity-alist 693 (append xml-entity-alist
@@ -722,8 +726,8 @@ This follows the rule [28] in the XML specifications."
722 (let (elem modifier) 726 (let (elem modifier)
723 (if (string-match "(\\([^)]+\\))\\([+*?]?\\)" string) 727 (if (string-match "(\\([^)]+\\))\\([+*?]?\\)" string)
724 (progn 728 (progn
725 (setq elem (match-string 1 string) 729 (setq elem (match-string-no-properties 1 string)
726 modifier (match-string 2 string)) 730 modifier (match-string-no-properties 2 string))
727 (if (string-match "|" elem) 731 (if (string-match "|" elem)
728 (setq elem (cons 'choice 732 (setq elem (cons 'choice
729 (mapcar 'xml-parse-elem-type 733 (mapcar 'xml-parse-elem-type
@@ -733,8 +737,8 @@ This follows the rule [28] in the XML specifications."
733 (mapcar 'xml-parse-elem-type 737 (mapcar 'xml-parse-elem-type
734 (split-string elem ","))))))) 738 (split-string elem ",")))))))
735 (if (string-match "[ \t\n\r]*\\([^+*?]+\\)\\([+*?]?\\)" string) 739 (if (string-match "[ \t\n\r]*\\([^+*?]+\\)\\([+*?]?\\)" string)
736 (setq elem (match-string 1 string) 740 (setq elem (match-string-no-properties 1 string)
737 modifier (match-string 2 string)))) 741 modifier (match-string-no-properties 2 string))))
738 742
739 (if (and (stringp elem) (string= elem "#PCDATA")) 743 (if (and (stringp elem) (string= elem "#PCDATA"))
740 (setq elem 'pcdata)) 744 (setq elem 'pcdata))
@@ -765,19 +769,19 @@ This follows the rule [28] in the XML specifications."
765 children end-point) 769 children end-point)
766 (while (string-match "&\\([^;]*\\);" string point) 770 (while (string-match "&\\([^;]*\\);" string point)
767 (setq end-point (match-end 0)) 771 (setq end-point (match-end 0))
768 (let* ((this-part (match-string 1 string)) 772 (let* ((this-part (match-string-no-properties 1 string))
769 (prev-part (substring string point (match-beginning 0))) 773 (prev-part (substring string point (match-beginning 0)))
770 (entity (assoc this-part xml-entity-alist)) 774 (entity (assoc this-part xml-entity-alist))
771 (expansion 775 (expansion
772 (cond ((string-match "#\\([0-9]+\\)" this-part) 776 (cond ((string-match "#\\([0-9]+\\)" this-part)
773 (let ((c (decode-char 777 (let ((c (decode-char
774 'ucs 778 'ucs
775 (string-to-number (match-string 1 this-part))))) 779 (string-to-number (match-string-no-properties 1 this-part)))))
776 (if c (string c)))) 780 (if c (string c))))
777 ((string-match "#x\\([[:xdigit:]]+\\)" this-part) 781 ((string-match "#x\\([[:xdigit:]]+\\)" this-part)
778 (let ((c (decode-char 782 (let ((c (decode-char
779 'ucs 783 'ucs
780 (string-to-number (match-string 1 this-part) 16)))) 784 (string-to-number (match-string-no-properties 1 this-part) 16))))
781 (if c (string c)))) 785 (if c (string c))))
782 (entity 786 (entity
783 (cdr entity)) 787 (cdr entity))