diff options
| author | Chong Yidong | 2007-03-17 18:55:52 +0000 |
|---|---|---|
| committer | Chong Yidong | 2007-03-17 18:55:52 +0000 |
| commit | f6fcdfff176e2acb3fb5e3c6847fb5664ef01035 (patch) | |
| tree | e9c4e009ec54d345f11d74da308607f3bfcf6345 | |
| parent | 19f512103c1dd481aaceaf2d7071d4ba0c246729 (diff) | |
| download | emacs-f6fcdfff176e2acb3fb5e3c6847fb5664ef01035.tar.gz emacs-f6fcdfff176e2acb3fb5e3c6847fb5664ef01035.zip | |
* xml.el (xml-parse-tag, xml-parse-string, xml-parse-attlist)
(xml-parse-dtd, xml-parse-elem-type, xml-substitute-special):
Return to use of the -no-properties variants. There was
consensus on emacs-devel that the speed of these variants was
prefered since we are usually parsing files (from the internet
or on disk) instead of XML created in Emacs.
| -rw-r--r-- | lisp/xml.el | 58 |
1 files changed, 31 insertions, 27 deletions
diff --git a/lisp/xml.el b/lisp/xml.el index 5b694d50f12..f7045779ca4 100644 --- a/lisp/xml.el +++ b/lisp/xml.el | |||
| @@ -76,8 +76,12 @@ | |||
| 76 | 76 | ||
| 77 | ;;; Code: | 77 | ;;; Code: |
| 78 | 78 | ||
| 79 | ;; Note that {buffer-substring,match-string}-no-properties were | 79 | ;; Note that buffer-substring and match-string were formerly used in |
| 80 | ;; formerly used in several places, but that removes composition info. | 80 | ;; several places, because the -no-properties variants remove |
| 81 | ;; composition info. However, after some discussion on emacs-devel, | ||
| 82 | ;; the consensus was that the speed of the -no-properties variants was | ||
| 83 | ;; a worthwhile tradeoff especially since we're usually parsing files | ||
| 84 | ;; instead of hand-crafted XML. | ||
| 81 | 85 | ||
| 82 | ;;******************************************************************* | 86 | ;;******************************************************************* |
| 83 | ;;** | 87 | ;;** |
| @@ -406,7 +410,7 @@ Returns one of: | |||
| 406 | (unless (search-forward "]]>" nil t) | 410 | (unless (search-forward "]]>" nil t) |
| 407 | (error "XML: (Not Well Formed) CDATA section does not end anywhere in the document")) | 411 | (error "XML: (Not Well Formed) CDATA section does not end anywhere in the document")) |
| 408 | (concat | 412 | (concat |
| 409 | (buffer-substring pos (match-beginning 0)) | 413 | (buffer-substring-no-properties pos (match-beginning 0)) |
| 410 | (xml-parse-string)))) | 414 | (xml-parse-string)))) |
| 411 | ;; DTD for the document | 415 | ;; DTD for the document |
| 412 | ((looking-at "<!DOCTYPE") | 416 | ((looking-at "<!DOCTYPE") |
| @@ -427,7 +431,7 @@ Returns one of: | |||
| 427 | (goto-char (match-end 1)) | 431 | (goto-char (match-end 1)) |
| 428 | 432 | ||
| 429 | ;; Parse this node | 433 | ;; Parse this node |
| 430 | (let* ((node-name (match-string 1)) | 434 | (let* ((node-name (match-string-no-properties 1)) |
| 431 | ;; Parse the attribute list. | 435 | ;; Parse the attribute list. |
| 432 | (attrs (xml-parse-attlist xml-ns)) | 436 | (attrs (xml-parse-attlist xml-ns)) |
| 433 | children pos) | 437 | children pos) |
| @@ -480,7 +484,7 @@ Returns one of: | |||
| 480 | (nreverse children))) | 484 | (nreverse children))) |
| 481 | ;; This was an invalid start tag (Expected ">", but didn't see it.) | 485 | ;; This was an invalid start tag (Expected ">", but didn't see it.) |
| 482 | (error "XML: (Well-Formed) Couldn't parse tag: %s" | 486 | (error "XML: (Well-Formed) Couldn't parse tag: %s" |
| 483 | (buffer-substring (- (point) 10) (+ (point) 1))))))) | 487 | (buffer-substring-no-properties (- (point) 10) (+ (point) 1))))))) |
| 484 | (t ;; (Not one of PI, CDATA, Comment, End tag, or Start tag) | 488 | (t ;; (Not one of PI, CDATA, Comment, End tag, or Start tag) |
| 485 | (unless xml-sub-parser ; Usually, we error out. | 489 | (unless xml-sub-parser ; Usually, we error out. |
| 486 | (error "XML: (Well-Formed) Invalid character")) | 490 | (error "XML: (Well-Formed) Invalid character")) |
| @@ -495,7 +499,7 @@ Returns one of: | |||
| 495 | (string (progn (if (search-forward "<" nil t) | 499 | (string (progn (if (search-forward "<" nil t) |
| 496 | (forward-char -1) | 500 | (forward-char -1) |
| 497 | (goto-char (point-max))) | 501 | (goto-char (point-max))) |
| 498 | (buffer-substring pos (point))))) | 502 | (buffer-substring-no-properties pos (point))))) |
| 499 | ;; Clean up the string. As per XML specifications, the XML | 503 | ;; Clean up the string. As per XML specifications, the XML |
| 500 | ;; processor should always pass the whole string to the | 504 | ;; processor should always pass the whole string to the |
| 501 | ;; application. But \r's should be replaced: | 505 | ;; application. But \r's should be replaced: |
| @@ -516,7 +520,7 @@ Leave point at the first non-blank character after the tag." | |||
| 516 | (while (looking-at (eval-when-compile | 520 | (while (looking-at (eval-when-compile |
| 517 | (concat "\\(" xml-name-regexp "\\)\\s-*=\\s-*"))) | 521 | (concat "\\(" xml-name-regexp "\\)\\s-*=\\s-*"))) |
| 518 | (setq end-pos (match-end 0)) | 522 | (setq end-pos (match-end 0)) |
| 519 | (setq name (xml-maybe-do-ns (match-string 1) nil xml-ns)) | 523 | (setq name (xml-maybe-do-ns (match-string-no-properties 1) nil xml-ns)) |
| 520 | (goto-char end-pos) | 524 | (goto-char end-pos) |
| 521 | 525 | ||
| 522 | ;; See also: http://www.w3.org/TR/2000/REC-xml-20001006#AVNormalize | 526 | ;; See also: http://www.w3.org/TR/2000/REC-xml-20001006#AVNormalize |
| @@ -535,7 +539,7 @@ Leave point at the first non-blank character after the tag." | |||
| 535 | 539 | ||
| 536 | ;; Multiple whitespace characters should be replaced with a single one | 540 | ;; Multiple whitespace characters should be replaced with a single one |
| 537 | ;; in the attributes | 541 | ;; in the attributes |
| 538 | (let ((string (match-string 1)) | 542 | (let ((string (match-string-no-properties 1)) |
| 539 | (pos 0)) | 543 | (pos 0)) |
| 540 | (replace-regexp-in-string "\\s-\\{2,\\}" " " string) | 544 | (replace-regexp-in-string "\\s-\\{2,\\}" " " string) |
| 541 | (let ((expansion (xml-substitute-special string))) | 545 | (let ((expansion (xml-substitute-special string))) |
| @@ -575,7 +579,7 @@ This follows the rule [28] in the XML specifications." | |||
| 575 | 579 | ||
| 576 | ;; Get the name of the document | 580 | ;; Get the name of the document |
| 577 | (looking-at xml-name-regexp) | 581 | (looking-at xml-name-regexp) |
| 578 | (let ((dtd (list (match-string 0) 'dtd)) | 582 | (let ((dtd (list (match-string-no-properties 0) 'dtd)) |
| 579 | type element end-pos) | 583 | type element end-pos) |
| 580 | (goto-char (match-end 0)) | 584 | (goto-char (match-end 0)) |
| 581 | 585 | ||
| @@ -590,18 +594,18 @@ This follows the rule [28] in the XML specifications." | |||
| 590 | "\\='\\([[:space:][:alnum:]-()+,./:=?;!*#@$_%]*\\)'" | 594 | "\\='\\([[:space:][:alnum:]-()+,./:=?;!*#@$_%]*\\)'" |
| 591 | nil t)) | 595 | nil t)) |
| 592 | (error "XML: Missing Public ID")) | 596 | (error "XML: Missing Public ID")) |
| 593 | (let ((pubid (match-string 1))) | 597 | (let ((pubid (match-string-no-properties 1))) |
| 594 | (skip-syntax-forward " ") | 598 | (skip-syntax-forward " ") |
| 595 | (unless (or (re-search-forward "\\='\\([^']*\\)'" nil t) | 599 | (unless (or (re-search-forward "\\='\\([^']*\\)'" nil t) |
| 596 | (re-search-forward "\\=\"\\([^\"]*\\)\"" nil t)) | 600 | (re-search-forward "\\=\"\\([^\"]*\\)\"" nil t)) |
| 597 | (error "XML: Missing System ID")) | 601 | (error "XML: Missing System ID")) |
| 598 | (push (list pubid (match-string 1) 'public) dtd))) | 602 | (push (list pubid (match-string-no-properties 1) 'public) dtd))) |
| 599 | ((looking-at "SYSTEM\\s-+") | 603 | ((looking-at "SYSTEM\\s-+") |
| 600 | (goto-char (match-end 0)) | 604 | (goto-char (match-end 0)) |
| 601 | (unless (or (re-search-forward "\\='\\([^']*\\)'" nil t) | 605 | (unless (or (re-search-forward "\\='\\([^']*\\)'" nil t) |
| 602 | (re-search-forward "\\=\"\\([^\"]*\\)\"" nil t)) | 606 | (re-search-forward "\\=\"\\([^\"]*\\)\"" nil t)) |
| 603 | (error "XML: Missing System ID")) | 607 | (error "XML: Missing System ID")) |
| 604 | (push (list (match-string 1) 'system) dtd))) | 608 | (push (list (match-string-no-properties 1) 'system) dtd))) |
| 605 | (skip-syntax-forward " ") | 609 | (skip-syntax-forward " ") |
| 606 | (if (eq ?> (char-after)) | 610 | (if (eq ?> (char-after)) |
| 607 | (forward-char) | 611 | (forward-char) |
| @@ -618,7 +622,7 @@ This follows the rule [28] in the XML specifications." | |||
| 618 | ((looking-at | 622 | ((looking-at |
| 619 | "<!ELEMENT\\s-+\\([[:alnum:].%;]+\\)\\s-+\\([^>]+\\)>") | 623 | "<!ELEMENT\\s-+\\([[:alnum:].%;]+\\)\\s-+\\([^>]+\\)>") |
| 620 | 624 | ||
| 621 | (setq element (match-string 1) | 625 | (setq element (match-string-no-properties 1) |
| 622 | type (match-string-no-properties 2)) | 626 | type (match-string-no-properties 2)) |
| 623 | (setq end-pos (match-end 0)) | 627 | (setq end-pos (match-end 0)) |
| 624 | 628 | ||
| @@ -629,7 +633,7 @@ This follows the rule [28] in the XML specifications." | |||
| 629 | ((string-match "^ANY[ \t\n\r]*$" type) ;; any type of contents | 633 | ((string-match "^ANY[ \t\n\r]*$" type) ;; any type of contents |
| 630 | (setq type 'any)) | 634 | (setq type 'any)) |
| 631 | ((string-match "^(\\(.*\\))[ \t\n\r]*$" type) ;; children ([47]) | 635 | ((string-match "^(\\(.*\\))[ \t\n\r]*$" type) ;; children ([47]) |
| 632 | (setq type (xml-parse-elem-type (match-string 1 type)))) | 636 | (setq type (xml-parse-elem-type (match-string-no-properties 1 type)))) |
| 633 | ((string-match "^%[^;]+;[ \t\n\r]*$" type) ;; substitution | 637 | ((string-match "^%[^;]+;[ \t\n\r]*$" type) ;; substitution |
| 634 | nil) | 638 | nil) |
| 635 | (t | 639 | (t |
| @@ -659,9 +663,9 @@ This follows the rule [28] in the XML specifications." | |||
| 659 | ((looking-at (concat "<!ENTITY[ \t\n\r]*\\(" xml-name-re | 663 | ((looking-at (concat "<!ENTITY[ \t\n\r]*\\(" xml-name-re |
| 660 | "\\)[ \t\n\r]*\\(" xml-entity-value-re | 664 | "\\)[ \t\n\r]*\\(" xml-entity-value-re |
| 661 | "\\)[ \t\n\r]*>")) | 665 | "\\)[ \t\n\r]*>")) |
| 662 | (let ((name (match-string 1)) | 666 | (let ((name (match-string-no-properties 1)) |
| 663 | (value (substring (match-string 2) 1 | 667 | (value (substring (match-string-no-properties 2) 1 |
| 664 | (- (length (match-string 2)) 1)))) | 668 | (- (length (match-string-no-properties 2)) 1)))) |
| 665 | (goto-char (match-end 0)) | 669 | (goto-char (match-end 0)) |
| 666 | (setq xml-entity-alist | 670 | (setq xml-entity-alist |
| 667 | (append xml-entity-alist | 671 | (append xml-entity-alist |
| @@ -681,9 +685,9 @@ This follows the rule [28] in the XML specifications." | |||
| 681 | "\\|'[- \r\na-zA-Z0-9()+,./:=?;!*#@$_%]*'" | 685 | "\\|'[- \r\na-zA-Z0-9()+,./:=?;!*#@$_%]*'" |
| 682 | "[ \t\n\r]+\\(\"[^\"]*\"\\|'[^']*'\\)" | 686 | "[ \t\n\r]+\\(\"[^\"]*\"\\|'[^']*'\\)" |
| 683 | "[ \t\n\r]*>"))) | 687 | "[ \t\n\r]*>"))) |
| 684 | (let ((name (match-string 1)) | 688 | (let ((name (match-string-no-properties 1)) |
| 685 | (file (substring (match-string 2) 1 | 689 | (file (substring (match-string-no-properties 2) 1 |
| 686 | (- (length (match-string 2)) 1)))) | 690 | (- (length (match-string-no-properties 2)) 1)))) |
| 687 | (goto-char (match-end 0)) | 691 | (goto-char (match-end 0)) |
| 688 | (setq xml-entity-alist | 692 | (setq xml-entity-alist |
| 689 | (append xml-entity-alist | 693 | (append xml-entity-alist |
| @@ -722,8 +726,8 @@ This follows the rule [28] in the XML specifications." | |||
| 722 | (let (elem modifier) | 726 | (let (elem modifier) |
| 723 | (if (string-match "(\\([^)]+\\))\\([+*?]?\\)" string) | 727 | (if (string-match "(\\([^)]+\\))\\([+*?]?\\)" string) |
| 724 | (progn | 728 | (progn |
| 725 | (setq elem (match-string 1 string) | 729 | (setq elem (match-string-no-properties 1 string) |
| 726 | modifier (match-string 2 string)) | 730 | modifier (match-string-no-properties 2 string)) |
| 727 | (if (string-match "|" elem) | 731 | (if (string-match "|" elem) |
| 728 | (setq elem (cons 'choice | 732 | (setq elem (cons 'choice |
| 729 | (mapcar 'xml-parse-elem-type | 733 | (mapcar 'xml-parse-elem-type |
| @@ -733,8 +737,8 @@ This follows the rule [28] in the XML specifications." | |||
| 733 | (mapcar 'xml-parse-elem-type | 737 | (mapcar 'xml-parse-elem-type |
| 734 | (split-string elem ","))))))) | 738 | (split-string elem ","))))))) |
| 735 | (if (string-match "[ \t\n\r]*\\([^+*?]+\\)\\([+*?]?\\)" string) | 739 | (if (string-match "[ \t\n\r]*\\([^+*?]+\\)\\([+*?]?\\)" string) |
| 736 | (setq elem (match-string 1 string) | 740 | (setq elem (match-string-no-properties 1 string) |
| 737 | modifier (match-string 2 string)))) | 741 | modifier (match-string-no-properties 2 string)))) |
| 738 | 742 | ||
| 739 | (if (and (stringp elem) (string= elem "#PCDATA")) | 743 | (if (and (stringp elem) (string= elem "#PCDATA")) |
| 740 | (setq elem 'pcdata)) | 744 | (setq elem 'pcdata)) |
| @@ -765,19 +769,19 @@ This follows the rule [28] in the XML specifications." | |||
| 765 | children end-point) | 769 | children end-point) |
| 766 | (while (string-match "&\\([^;]*\\);" string point) | 770 | (while (string-match "&\\([^;]*\\);" string point) |
| 767 | (setq end-point (match-end 0)) | 771 | (setq end-point (match-end 0)) |
| 768 | (let* ((this-part (match-string 1 string)) | 772 | (let* ((this-part (match-string-no-properties 1 string)) |
| 769 | (prev-part (substring string point (match-beginning 0))) | 773 | (prev-part (substring string point (match-beginning 0))) |
| 770 | (entity (assoc this-part xml-entity-alist)) | 774 | (entity (assoc this-part xml-entity-alist)) |
| 771 | (expansion | 775 | (expansion |
| 772 | (cond ((string-match "#\\([0-9]+\\)" this-part) | 776 | (cond ((string-match "#\\([0-9]+\\)" this-part) |
| 773 | (let ((c (decode-char | 777 | (let ((c (decode-char |
| 774 | 'ucs | 778 | 'ucs |
| 775 | (string-to-number (match-string 1 this-part))))) | 779 | (string-to-number (match-string-no-properties 1 this-part))))) |
| 776 | (if c (string c)))) | 780 | (if c (string c)))) |
| 777 | ((string-match "#x\\([[:xdigit:]]+\\)" this-part) | 781 | ((string-match "#x\\([[:xdigit:]]+\\)" this-part) |
| 778 | (let ((c (decode-char | 782 | (let ((c (decode-char |
| 779 | 'ucs | 783 | 'ucs |
| 780 | (string-to-number (match-string 1 this-part) 16)))) | 784 | (string-to-number (match-string-no-properties 1 this-part) 16)))) |
| 781 | (if c (string c)))) | 785 | (if c (string c)))) |
| 782 | (entity | 786 | (entity |
| 783 | (cdr entity)) | 787 | (cdr entity)) |