aboutsummaryrefslogtreecommitdiffstats
path: root/lisp
diff options
context:
space:
mode:
authorChong Yidong2007-03-17 18:55:52 +0000
committerChong Yidong2007-03-17 18:55:52 +0000
commitf6fcdfff176e2acb3fb5e3c6847fb5664ef01035 (patch)
treee9c4e009ec54d345f11d74da308607f3bfcf6345 /lisp
parent19f512103c1dd481aaceaf2d7071d4ba0c246729 (diff)
downloademacs-f6fcdfff176e2acb3fb5e3c6847fb5664ef01035.tar.gz
emacs-f6fcdfff176e2acb3fb5e3c6847fb5664ef01035.zip
* xml.el (xml-parse-tag, xml-parse-string, xml-parse-attlist)
(xml-parse-dtd, xml-parse-elem-type, xml-substitute-special): Return to use of the -no-properties variants. There was consensus on emacs-devel that the speed of these variants was prefered since we are usually parsing files (from the internet or on disk) instead of XML created in Emacs.
Diffstat (limited to 'lisp')
-rw-r--r--lisp/xml.el58
1 files changed, 31 insertions, 27 deletions
diff --git a/lisp/xml.el b/lisp/xml.el
index 5b694d50f12..f7045779ca4 100644
--- a/lisp/xml.el
+++ b/lisp/xml.el
@@ -76,8 +76,12 @@
76 76
77;;; Code: 77;;; Code:
78 78
79;; Note that {buffer-substring,match-string}-no-properties were 79;; Note that buffer-substring and match-string were formerly used in
80;; formerly used in several places, but that removes composition info. 80;; several places, because the -no-properties variants remove
81;; composition info. However, after some discussion on emacs-devel,
82;; the consensus was that the speed of the -no-properties variants was
83;; a worthwhile tradeoff especially since we're usually parsing files
84;; instead of hand-crafted XML.
81 85
82;;******************************************************************* 86;;*******************************************************************
83;;** 87;;**
@@ -406,7 +410,7 @@ Returns one of:
406 (unless (search-forward "]]>" nil t) 410 (unless (search-forward "]]>" nil t)
407 (error "XML: (Not Well Formed) CDATA section does not end anywhere in the document")) 411 (error "XML: (Not Well Formed) CDATA section does not end anywhere in the document"))
408 (concat 412 (concat
409 (buffer-substring pos (match-beginning 0)) 413 (buffer-substring-no-properties pos (match-beginning 0))
410 (xml-parse-string)))) 414 (xml-parse-string))))
411 ;; DTD for the document 415 ;; DTD for the document
412 ((looking-at "<!DOCTYPE") 416 ((looking-at "<!DOCTYPE")
@@ -427,7 +431,7 @@ Returns one of:
427 (goto-char (match-end 1)) 431 (goto-char (match-end 1))
428 432
429 ;; Parse this node 433 ;; Parse this node
430 (let* ((node-name (match-string 1)) 434 (let* ((node-name (match-string-no-properties 1))
431 ;; Parse the attribute list. 435 ;; Parse the attribute list.
432 (attrs (xml-parse-attlist xml-ns)) 436 (attrs (xml-parse-attlist xml-ns))
433 children pos) 437 children pos)
@@ -480,7 +484,7 @@ Returns one of:
480 (nreverse children))) 484 (nreverse children)))
481 ;; This was an invalid start tag (Expected ">", but didn't see it.) 485 ;; This was an invalid start tag (Expected ">", but didn't see it.)
482 (error "XML: (Well-Formed) Couldn't parse tag: %s" 486 (error "XML: (Well-Formed) Couldn't parse tag: %s"
483 (buffer-substring (- (point) 10) (+ (point) 1))))))) 487 (buffer-substring-no-properties (- (point) 10) (+ (point) 1)))))))
484 (t ;; (Not one of PI, CDATA, Comment, End tag, or Start tag) 488 (t ;; (Not one of PI, CDATA, Comment, End tag, or Start tag)
485 (unless xml-sub-parser ; Usually, we error out. 489 (unless xml-sub-parser ; Usually, we error out.
486 (error "XML: (Well-Formed) Invalid character")) 490 (error "XML: (Well-Formed) Invalid character"))
@@ -495,7 +499,7 @@ Returns one of:
495 (string (progn (if (search-forward "<" nil t) 499 (string (progn (if (search-forward "<" nil t)
496 (forward-char -1) 500 (forward-char -1)
497 (goto-char (point-max))) 501 (goto-char (point-max)))
498 (buffer-substring pos (point))))) 502 (buffer-substring-no-properties pos (point)))))
499 ;; Clean up the string. As per XML specifications, the XML 503 ;; Clean up the string. As per XML specifications, the XML
500 ;; processor should always pass the whole string to the 504 ;; processor should always pass the whole string to the
501 ;; application. But \r's should be replaced: 505 ;; application. But \r's should be replaced:
@@ -516,7 +520,7 @@ Leave point at the first non-blank character after the tag."
516 (while (looking-at (eval-when-compile 520 (while (looking-at (eval-when-compile
517 (concat "\\(" xml-name-regexp "\\)\\s-*=\\s-*"))) 521 (concat "\\(" xml-name-regexp "\\)\\s-*=\\s-*")))
518 (setq end-pos (match-end 0)) 522 (setq end-pos (match-end 0))
519 (setq name (xml-maybe-do-ns (match-string 1) nil xml-ns)) 523 (setq name (xml-maybe-do-ns (match-string-no-properties 1) nil xml-ns))
520 (goto-char end-pos) 524 (goto-char end-pos)
521 525
522 ;; See also: http://www.w3.org/TR/2000/REC-xml-20001006#AVNormalize 526 ;; See also: http://www.w3.org/TR/2000/REC-xml-20001006#AVNormalize
@@ -535,7 +539,7 @@ Leave point at the first non-blank character after the tag."
535 539
536 ;; Multiple whitespace characters should be replaced with a single one 540 ;; Multiple whitespace characters should be replaced with a single one
537 ;; in the attributes 541 ;; in the attributes
538 (let ((string (match-string 1)) 542 (let ((string (match-string-no-properties 1))
539 (pos 0)) 543 (pos 0))
540 (replace-regexp-in-string "\\s-\\{2,\\}" " " string) 544 (replace-regexp-in-string "\\s-\\{2,\\}" " " string)
541 (let ((expansion (xml-substitute-special string))) 545 (let ((expansion (xml-substitute-special string)))
@@ -575,7 +579,7 @@ This follows the rule [28] in the XML specifications."
575 579
576 ;; Get the name of the document 580 ;; Get the name of the document
577 (looking-at xml-name-regexp) 581 (looking-at xml-name-regexp)
578 (let ((dtd (list (match-string 0) 'dtd)) 582 (let ((dtd (list (match-string-no-properties 0) 'dtd))
579 type element end-pos) 583 type element end-pos)
580 (goto-char (match-end 0)) 584 (goto-char (match-end 0))
581 585
@@ -590,18 +594,18 @@ This follows the rule [28] in the XML specifications."
590 "\\='\\([[:space:][:alnum:]-()+,./:=?;!*#@$_%]*\\)'" 594 "\\='\\([[:space:][:alnum:]-()+,./:=?;!*#@$_%]*\\)'"
591 nil t)) 595 nil t))
592 (error "XML: Missing Public ID")) 596 (error "XML: Missing Public ID"))
593 (let ((pubid (match-string 1))) 597 (let ((pubid (match-string-no-properties 1)))
594 (skip-syntax-forward " ") 598 (skip-syntax-forward " ")
595 (unless (or (re-search-forward "\\='\\([^']*\\)'" nil t) 599 (unless (or (re-search-forward "\\='\\([^']*\\)'" nil t)
596 (re-search-forward "\\=\"\\([^\"]*\\)\"" nil t)) 600 (re-search-forward "\\=\"\\([^\"]*\\)\"" nil t))
597 (error "XML: Missing System ID")) 601 (error "XML: Missing System ID"))
598 (push (list pubid (match-string 1) 'public) dtd))) 602 (push (list pubid (match-string-no-properties 1) 'public) dtd)))
599 ((looking-at "SYSTEM\\s-+") 603 ((looking-at "SYSTEM\\s-+")
600 (goto-char (match-end 0)) 604 (goto-char (match-end 0))
601 (unless (or (re-search-forward "\\='\\([^']*\\)'" nil t) 605 (unless (or (re-search-forward "\\='\\([^']*\\)'" nil t)
602 (re-search-forward "\\=\"\\([^\"]*\\)\"" nil t)) 606 (re-search-forward "\\=\"\\([^\"]*\\)\"" nil t))
603 (error "XML: Missing System ID")) 607 (error "XML: Missing System ID"))
604 (push (list (match-string 1) 'system) dtd))) 608 (push (list (match-string-no-properties 1) 'system) dtd)))
605 (skip-syntax-forward " ") 609 (skip-syntax-forward " ")
606 (if (eq ?> (char-after)) 610 (if (eq ?> (char-after))
607 (forward-char) 611 (forward-char)
@@ -618,7 +622,7 @@ This follows the rule [28] in the XML specifications."
618 ((looking-at 622 ((looking-at
619 "<!ELEMENT\\s-+\\([[:alnum:].%;]+\\)\\s-+\\([^>]+\\)>") 623 "<!ELEMENT\\s-+\\([[:alnum:].%;]+\\)\\s-+\\([^>]+\\)>")
620 624
621 (setq element (match-string 1) 625 (setq element (match-string-no-properties 1)
622 type (match-string-no-properties 2)) 626 type (match-string-no-properties 2))
623 (setq end-pos (match-end 0)) 627 (setq end-pos (match-end 0))
624 628
@@ -629,7 +633,7 @@ This follows the rule [28] in the XML specifications."
629 ((string-match "^ANY[ \t\n\r]*$" type) ;; any type of contents 633 ((string-match "^ANY[ \t\n\r]*$" type) ;; any type of contents
630 (setq type 'any)) 634 (setq type 'any))
631 ((string-match "^(\\(.*\\))[ \t\n\r]*$" type) ;; children ([47]) 635 ((string-match "^(\\(.*\\))[ \t\n\r]*$" type) ;; children ([47])
632 (setq type (xml-parse-elem-type (match-string 1 type)))) 636 (setq type (xml-parse-elem-type (match-string-no-properties 1 type))))
633 ((string-match "^%[^;]+;[ \t\n\r]*$" type) ;; substitution 637 ((string-match "^%[^;]+;[ \t\n\r]*$" type) ;; substitution
634 nil) 638 nil)
635 (t 639 (t
@@ -659,9 +663,9 @@ This follows the rule [28] in the XML specifications."
659 ((looking-at (concat "<!ENTITY[ \t\n\r]*\\(" xml-name-re 663 ((looking-at (concat "<!ENTITY[ \t\n\r]*\\(" xml-name-re
660 "\\)[ \t\n\r]*\\(" xml-entity-value-re 664 "\\)[ \t\n\r]*\\(" xml-entity-value-re
661 "\\)[ \t\n\r]*>")) 665 "\\)[ \t\n\r]*>"))
662 (let ((name (match-string 1)) 666 (let ((name (match-string-no-properties 1))
663 (value (substring (match-string 2) 1 667 (value (substring (match-string-no-properties 2) 1
664 (- (length (match-string 2)) 1)))) 668 (- (length (match-string-no-properties 2)) 1))))
665 (goto-char (match-end 0)) 669 (goto-char (match-end 0))
666 (setq xml-entity-alist 670 (setq xml-entity-alist
667 (append xml-entity-alist 671 (append xml-entity-alist
@@ -681,9 +685,9 @@ This follows the rule [28] in the XML specifications."
681 "\\|'[- \r\na-zA-Z0-9()+,./:=?;!*#@$_%]*'" 685 "\\|'[- \r\na-zA-Z0-9()+,./:=?;!*#@$_%]*'"
682 "[ \t\n\r]+\\(\"[^\"]*\"\\|'[^']*'\\)" 686 "[ \t\n\r]+\\(\"[^\"]*\"\\|'[^']*'\\)"
683 "[ \t\n\r]*>"))) 687 "[ \t\n\r]*>")))
684 (let ((name (match-string 1)) 688 (let ((name (match-string-no-properties 1))
685 (file (substring (match-string 2) 1 689 (file (substring (match-string-no-properties 2) 1
686 (- (length (match-string 2)) 1)))) 690 (- (length (match-string-no-properties 2)) 1))))
687 (goto-char (match-end 0)) 691 (goto-char (match-end 0))
688 (setq xml-entity-alist 692 (setq xml-entity-alist
689 (append xml-entity-alist 693 (append xml-entity-alist
@@ -722,8 +726,8 @@ This follows the rule [28] in the XML specifications."
722 (let (elem modifier) 726 (let (elem modifier)
723 (if (string-match "(\\([^)]+\\))\\([+*?]?\\)" string) 727 (if (string-match "(\\([^)]+\\))\\([+*?]?\\)" string)
724 (progn 728 (progn
725 (setq elem (match-string 1 string) 729 (setq elem (match-string-no-properties 1 string)
726 modifier (match-string 2 string)) 730 modifier (match-string-no-properties 2 string))
727 (if (string-match "|" elem) 731 (if (string-match "|" elem)
728 (setq elem (cons 'choice 732 (setq elem (cons 'choice
729 (mapcar 'xml-parse-elem-type 733 (mapcar 'xml-parse-elem-type
@@ -733,8 +737,8 @@ This follows the rule [28] in the XML specifications."
733 (mapcar 'xml-parse-elem-type 737 (mapcar 'xml-parse-elem-type
734 (split-string elem ","))))))) 738 (split-string elem ",")))))))
735 (if (string-match "[ \t\n\r]*\\([^+*?]+\\)\\([+*?]?\\)" string) 739 (if (string-match "[ \t\n\r]*\\([^+*?]+\\)\\([+*?]?\\)" string)
736 (setq elem (match-string 1 string) 740 (setq elem (match-string-no-properties 1 string)
737 modifier (match-string 2 string)))) 741 modifier (match-string-no-properties 2 string))))
738 742
739 (if (and (stringp elem) (string= elem "#PCDATA")) 743 (if (and (stringp elem) (string= elem "#PCDATA"))
740 (setq elem 'pcdata)) 744 (setq elem 'pcdata))
@@ -765,19 +769,19 @@ This follows the rule [28] in the XML specifications."
765 children end-point) 769 children end-point)
766 (while (string-match "&\\([^;]*\\);" string point) 770 (while (string-match "&\\([^;]*\\);" string point)
767 (setq end-point (match-end 0)) 771 (setq end-point (match-end 0))
768 (let* ((this-part (match-string 1 string)) 772 (let* ((this-part (match-string-no-properties 1 string))
769 (prev-part (substring string point (match-beginning 0))) 773 (prev-part (substring string point (match-beginning 0)))
770 (entity (assoc this-part xml-entity-alist)) 774 (entity (assoc this-part xml-entity-alist))
771 (expansion 775 (expansion
772 (cond ((string-match "#\\([0-9]+\\)" this-part) 776 (cond ((string-match "#\\([0-9]+\\)" this-part)
773 (let ((c (decode-char 777 (let ((c (decode-char
774 'ucs 778 'ucs
775 (string-to-number (match-string 1 this-part))))) 779 (string-to-number (match-string-no-properties 1 this-part)))))
776 (if c (string c)))) 780 (if c (string c))))
777 ((string-match "#x\\([[:xdigit:]]+\\)" this-part) 781 ((string-match "#x\\([[:xdigit:]]+\\)" this-part)
778 (let ((c (decode-char 782 (let ((c (decode-char
779 'ucs 783 'ucs
780 (string-to-number (match-string 1 this-part) 16)))) 784 (string-to-number (match-string-no-properties 1 this-part) 16))))
781 (if c (string c)))) 785 (if c (string c))))
782 (entity 786 (entity
783 (cdr entity)) 787 (cdr entity))