aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChong Yidong2012-07-03 00:21:54 +0800
committerChong Yidong2012-07-03 00:21:54 +0800
commita7aef6f5c6e22b167ea0234ab84c0308201d681b (patch)
treefd09ca2bd61cdc2e8b8e222b73578c669fa2e354
parent2b5208f18115bd0f364c11cbdc8124878158927a (diff)
downloademacs-a7aef6f5c6e22b167ea0234ab84c0308201d681b.tar.gz
emacs-a7aef6f5c6e22b167ea0234ab84c0308201d681b.zip
* lisp/xml.el: Handle entity and character reference expansion correctly.
(xml-default-ns): New variable. (xml-entity-alist): Use XML spec definitions for lt and amp. (xml-parse-region): Make first two arguments optional. Discard text properties. (xml-parse-tag-1): New function, spun off from xml-parse-tag. All callers changed. (xml-parse-tag): Call xml-parse-tag-1. For backward compatibility, this function should not modify buffer contents. (xml-parse-tag-1): Fix opening-tag regexp. (xml-parse-string): Rewrite, handling entity and character references properly. (xml--entity-replacement-text): Signal an error if a parameter entity is undefined. * test/automated/xml-parse-tests.el (xml-parse-tests--data): More testcases.
-rw-r--r--lisp/ChangeLog18
-rw-r--r--lisp/xml.el372
-rw-r--r--test/ChangeLog5
-rw-r--r--test/automated/xml-parse-tests.el15
4 files changed, 220 insertions, 190 deletions
diff --git a/lisp/ChangeLog b/lisp/ChangeLog
index 0e7c49342c7..bab4085587e 100644
--- a/lisp/ChangeLog
+++ b/lisp/ChangeLog
@@ -1,3 +1,21 @@
12012-07-02 Chong Yidong <cyd@gnu.org>
2
3 * xml.el: Fix entity and character reference expansion, allowing
4 them to expand into markup as per XML spec.
5 (xml-default-ns): New variable.
6 (xml-entity-alist): Use XML spec definitions for lt and amp.
7 (xml-parse-region): Make first two arguments optional. Discard
8 text properties.
9 (xml-parse-tag-1): New function, spun off from xml-parse-tag. All
10 callers changed.
11 (xml-parse-tag): Call xml-parse-tag-1. For backward
12 compatibility, this function should not modify buffer contents.
13 (xml-parse-tag-1): Fix opening-tag regexp.
14 (xml-parse-string): Rewrite, handling entity and character
15 references properly.
16 (xml--entity-replacement-text): Signal an error if a parameter
17 entity is undefined.
18
12012-07-02 Stefan Monnier <monnier@iro.umontreal.ca> 192012-07-02 Stefan Monnier <monnier@iro.umontreal.ca>
2 20
3 * comint.el (comint-output-filter): Filter out repeated prompts. 21 * comint.el (comint-output-filter): Filter out repeated prompts.
diff --git a/lisp/xml.el b/lisp/xml.el
index 5c1d2390a23..a3e279b41bd 100644
--- a/lisp/xml.el
+++ b/lisp/xml.el
@@ -80,22 +80,23 @@
80;; a worthwhile tradeoff especially since we're usually parsing files 80;; a worthwhile tradeoff especially since we're usually parsing files
81;; instead of hand-crafted XML. 81;; instead of hand-crafted XML.
82 82
83;;******************************************************************* 83;;; Macros to parse the list
84;;**
85;;** Macros to parse the list
86;;**
87;;*******************************************************************
88 84
89(defconst xml-undefined-entity "?" 85(defconst xml-undefined-entity "?"
90 "What to substitute for undefined entities") 86 "What to substitute for undefined entities")
91 87
88(defconst xml-default-ns '(("" . "")
89 ("xml" . "http://www.w3.org/XML/1998/namespace")
90 ("xmlns" . "http://www.w3.org/2000/xmlns/"))
91 "Alist mapping default XML namespaces to their URIs.")
92
92(defvar xml-entity-alist 93(defvar xml-entity-alist
93 '(("lt" . "<") 94 '(("lt" . "&#60;")
94 ("gt" . ">") 95 ("gt" . ">")
95 ("apos" . "'") 96 ("apos" . "'")
96 ("quot" . "\"") 97 ("quot" . "\"")
97 ("amp" . "&")) 98 ("amp" . "&#38;"))
98 "Alist of defined XML entities.") 99 "Alist mapping XML entities to their replacement text.")
99 100
100(defvar xml-parameter-entity-alist nil 101(defvar xml-parameter-entity-alist nil
101 "Alist of defined XML parametric entities.") 102 "Alist of defined XML parametric entities.")
@@ -156,11 +157,7 @@ An empty string is returned if the attribute was not found.
156See also `xml-get-attribute-or-nil'." 157See also `xml-get-attribute-or-nil'."
157 (or (xml-get-attribute-or-nil node attribute) "")) 158 (or (xml-get-attribute-or-nil node attribute) ""))
158 159
159;;******************************************************************* 160;;; Creating the list
160;;**
161;;** Creating the list
162;;**
163;;*******************************************************************
164 161
165;;;###autoload 162;;;###autoload
166(defun xml-parse-file (file &optional parse-dtd parse-ns) 163(defun xml-parse-file (file &optional parse-dtd parse-ns)
@@ -299,8 +296,10 @@ If PARSE-NS is non-nil, then QNAMES are expanded."
299;; prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 296;; prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
300 297
301;;;###autoload 298;;;###autoload
302(defun xml-parse-region (beg end &optional buffer parse-dtd parse-ns) 299(defun xml-parse-region (&optional beg end buffer parse-dtd parse-ns)
303 "Parse the region from BEG to END in BUFFER. 300 "Parse the region from BEG to END in BUFFER.
301If BEG is nil, it defaults to `point-min'.
302If END is nil, it defaults to `point-max'.
304If BUFFER is nil, it defaults to the current buffer. 303If BUFFER is nil, it defaults to the current buffer.
305Returns the XML list for the region, or raises an error if the region 304Returns the XML list for the region, or raises an error if the region
306is not well-formed XML. 305is not well-formed XML.
@@ -312,7 +311,7 @@ If PARSE-NS is non-nil, then QNAMES are expanded."
312 (unless buffer 311 (unless buffer
313 (setq buffer (current-buffer))) 312 (setq buffer (current-buffer)))
314 (with-temp-buffer 313 (with-temp-buffer
315 (insert-buffer-substring buffer beg end) 314 (insert-buffer-substring-no-properties buffer beg end)
316 (xml--parse-buffer parse-dtd parse-ns))) 315 (xml--parse-buffer parse-dtd parse-ns)))
317 316
318(defun xml--parse-buffer (parse-dtd parse-ns) 317(defun xml--parse-buffer (parse-dtd parse-ns)
@@ -327,7 +326,7 @@ If PARSE-NS is non-nil, then QNAMES are expanded."
327 (if (search-forward "<" nil t) 326 (if (search-forward "<" nil t)
328 (progn 327 (progn
329 (forward-char -1) 328 (forward-char -1)
330 (setq result (xml-parse-tag parse-dtd parse-ns)) 329 (setq result (xml-parse-tag-1 parse-dtd parse-ns))
331 (cond 330 (cond
332 ((null result) 331 ((null result)
333 ;; Not looking at an xml start tag. 332 ;; Not looking at an xml start tag.
@@ -379,8 +378,7 @@ specify that the name shouldn't be given a namespace."
379 (xml-parameter-entity-alist xml-parameter-entity-alist) 378 (xml-parameter-entity-alist xml-parameter-entity-alist)
380 children) 379 children)
381 (while (not (eobp)) 380 (while (not (eobp))
382 (let ((bit (xml-parse-tag 381 (let ((bit (xml-parse-tag-1 parse-dtd parse-ns)))
383 parse-dtd parse-ns)))
384 (if children 382 (if children
385 (setq children (append (list bit) children)) 383 (setq children (append (list bit) children))
386 (if (stringp bit) 384 (if (stringp bit)
@@ -392,30 +390,32 @@ specify that the name shouldn't be given a namespace."
392 "Parse the tag at point. 390 "Parse the tag at point.
393If PARSE-DTD is non-nil, the DTD of the document, if any, is parsed and 391If PARSE-DTD is non-nil, the DTD of the document, if any, is parsed and
394returned as the first element in the list. 392returned as the first element in the list.
395If PARSE-NS is non-nil, then QNAMES are expanded. 393If PARSE-NS is non-nil, expand QNAMES; if the value of PARSE-NS
396Returns one of: 394is a list, use it as an alist mapping namespaces to URIs.
395
396Return one of:
397 - a list : the matching node 397 - a list : the matching node
398 - nil : the point is not looking at a tag. 398 - nil : the point is not looking at a tag.
399 - a pair : the first element is the DTD, the second is the node." 399 - a pair : the first element is the DTD, the second is the node."
400 (let ((buf (current-buffer))
401 (pos (point)))
402 (with-temp-buffer
403 (insert-buffer-substring-no-properties buf pos)
404 (goto-char (point-min))
405 (xml-parse-tag-1 parse-dtd parse-ns))))
406
407(defun xml-parse-tag-1 (&optional parse-dtd parse-ns)
408 "Like `xml-parse-tag', but possibly modify the buffer while working."
400 (let ((xml-validating-parser (or parse-dtd xml-validating-parser)) 409 (let ((xml-validating-parser (or parse-dtd xml-validating-parser))
401 (xml-ns (if (consp parse-ns) 410 (xml-ns (cond ((consp parse-ns) parse-ns)
402 parse-ns 411 (parse-ns xml-default-ns))))
403 (if parse-ns
404 (list
405 ;; Default for empty prefix is no namespace
406 (cons "" "")
407 ;; "xml" namespace
408 (cons "xml" "http://www.w3.org/XML/1998/namespace")
409 ;; We need to seed the xmlns namespace
410 (cons "xmlns" "http://www.w3.org/2000/xmlns/"))))))
411 (cond 412 (cond
412 ;; Processing instructions (like the <?xml version="1.0"?> tag at the 413 ;; Processing instructions, like <?xml version="1.0"?>.
413 ;; beginning of a document).
414 ((looking-at "<\\?") 414 ((looking-at "<\\?")
415 (search-forward "?>") 415 (search-forward "?>")
416 (skip-syntax-forward " ") 416 (skip-syntax-forward " ")
417 (xml-parse-tag parse-dtd xml-ns)) 417 (xml-parse-tag-1 parse-dtd xml-ns))
418 ;; Character data (CDATA) sections, in which no tag should be interpreted 418 ;; Character data (CDATA) sections, in which no tag should be interpreted
419 ((looking-at "<!\\[CDATA\\[") 419 ((looking-at "<!\\[CDATA\\[")
420 (let ((pos (match-end 0))) 420 (let ((pos (match-end 0)))
421 (unless (search-forward "]]>" nil t) 421 (unless (search-forward "]]>" nil t)
@@ -423,33 +423,32 @@ Returns one of:
423 (concat 423 (concat
424 (buffer-substring-no-properties pos (match-beginning 0)) 424 (buffer-substring-no-properties pos (match-beginning 0))
425 (xml-parse-string)))) 425 (xml-parse-string))))
426 ;; DTD for the document 426 ;; DTD for the document
427 ((looking-at "<!DOCTYPE[ \t\n\r]") 427 ((looking-at "<!DOCTYPE[ \t\n\r]")
428 (let ((dtd (xml-parse-dtd parse-ns))) 428 (let ((dtd (xml-parse-dtd parse-ns)))
429 (skip-syntax-forward " ") 429 (skip-syntax-forward " ")
430 (if xml-validating-parser 430 (if xml-validating-parser
431 (cons dtd (xml-parse-tag nil xml-ns)) 431 (cons dtd (xml-parse-tag-1 nil xml-ns))
432 (xml-parse-tag nil xml-ns)))) 432 (xml-parse-tag-1 nil xml-ns))))
433 ;; skip comments 433 ;; skip comments
434 ((looking-at "<!--") 434 ((looking-at "<!--")
435 (search-forward "-->") 435 (search-forward "-->")
436 ;; FIXME: This loses the skipped-over spaces.
436 (skip-syntax-forward " ") 437 (skip-syntax-forward " ")
437 (unless (eobp) 438 (unless (eobp)
438 (let ((xml-sub-parser t)) 439 (let ((xml-sub-parser t))
439 (xml-parse-tag parse-dtd xml-ns)))) 440 (xml-parse-tag-1 parse-dtd xml-ns))))
440 ;; end tag 441 ;; end tag
441 ((looking-at "</") 442 ((looking-at "</")
442 '()) 443 '())
443 ;; opening tag 444 ;; opening tag
444 ((looking-at "<\\([^/>[:space:]]+\\)") 445 ((looking-at (eval-when-compile (concat "<\\(" xml-name-re "\\)")))
445 (goto-char (match-end 1)) 446 (goto-char (match-end 1))
446
447 ;; Parse this node 447 ;; Parse this node
448 (let* ((node-name (match-string-no-properties 1)) 448 (let* ((node-name (match-string-no-properties 1))
449 ;; Parse the attribute list. 449 ;; Parse the attribute list.
450 (attrs (xml-parse-attlist xml-ns)) 450 (attrs (xml-parse-attlist xml-ns))
451 children) 451 children)
452
453 ;; add the xmlns:* attrs to our cache 452 ;; add the xmlns:* attrs to our cache
454 (when (consp xml-ns) 453 (when (consp xml-ns)
455 (dolist (attr attrs) 454 (dolist (attr attrs)
@@ -458,70 +457,114 @@ Returns one of:
458 (caar attr))) 457 (caar attr)))
459 (push (cons (cdar attr) (cdr attr)) 458 (push (cons (cdar attr) (cdr attr))
460 xml-ns)))) 459 xml-ns))))
461
462 (setq children (list attrs (xml-maybe-do-ns node-name "" xml-ns))) 460 (setq children (list attrs (xml-maybe-do-ns node-name "" xml-ns)))
461 (cond
462 ;; is this an empty element ?
463 ((looking-at "/>")
464 (forward-char 2)
465 (nreverse children))
466 ;; is this a valid start tag ?
467 ((eq (char-after) ?>)
468 (forward-char 1)
469 ;; Now check that we have the right end-tag.
470 (let ((end (concat "</" node-name "\\s-*>")))
471 (while (not (looking-at end))
472 (cond
473 ((eobp)
474 (error "XML: (Not Well-Formed) End of buffer while reading element `%s'"
475 node-name))
476 ((looking-at "</")
477 (forward-char 2)
478 (error "XML: (Not Well-Formed) Invalid end tag `%s' (expecting `%s')"
479 (let ((pos (point)))
480 (buffer-substring pos (if (re-search-forward "\\s-*>" nil t)
481 (match-beginning 0)
482 (point-max))))
483 node-name))
484 ;; Read a sub-element and push it onto CHILDREN.
485 ((= (char-after) ?<)
486 (let ((tag (xml-parse-tag-1 nil xml-ns)))
487 (when tag
488 (push tag children))))
489 ;; Read some character data.
490 (t
491 (let ((expansion (xml-parse-string)))
492 (push (if (stringp (car children))
493 ;; If two strings were separated by a
494 ;; comment, concat them.
495 (concat (pop children) expansion)
496 expansion)
497 children)))))
498 ;; Move point past the end-tag.
499 (goto-char (match-end 0))
500 (nreverse children)))
501 ;; Otherwise this was an invalid start tag (expected ">" not found.)
502 (t
503 (error "XML: (Well-Formed) Couldn't parse tag: %s"
504 (buffer-substring-no-properties (- (point) 10) (+ (point) 1)))))))
463 505
464 ;; is this an empty element ? 506 ;; (Not one of PI, CDATA, Comment, End tag, or Start tag)
465 (if (looking-at "/>") 507 (t
466 (progn 508 (unless xml-sub-parser ; Usually, we error out.
467 (forward-char 2)
468 (nreverse children))
469
470 ;; is this a valid start tag ?
471 (if (eq (char-after) ?>)
472 (progn
473 (forward-char 1)
474 ;; Now check that we have the right end-tag. Note that this
475 ;; one might contain spaces after the tag name
476 (let ((end (concat "</" node-name "\\s-*>")))
477 (while (not (looking-at end))
478 (cond
479 ((looking-at "</")
480 (error "XML: (Not Well-Formed) Invalid end tag (expecting %s) at pos %d"
481 node-name (point)))
482 ((= (char-after) ?<)
483 (let ((tag (xml-parse-tag nil xml-ns)))
484 (when tag
485 (push tag children))))
486 (t
487 (let ((expansion (xml-parse-string)))
488 (setq children
489 (if (stringp expansion)
490 (if (stringp (car children))
491 ;; The two strings were separated by a comment.
492 (setq children (append (list (concat (car children) expansion))
493 (cdr children)))
494 (setq children (append (list expansion) children)))
495 (setq children (append expansion children))))))))
496
497 (goto-char (match-end 0))
498 (nreverse children)))
499 ;; This was an invalid start tag (Expected ">", but didn't see it.)
500 (error "XML: (Well-Formed) Couldn't parse tag: %s"
501 (buffer-substring-no-properties (- (point) 10) (+ (point) 1)))))))
502 (t ;; (Not one of PI, CDATA, Comment, End tag, or Start tag)
503 (unless xml-sub-parser ; Usually, we error out.
504 (error "XML: (Well-Formed) Invalid character")) 509 (error "XML: (Well-Formed) Invalid character"))
505
506 ;; However, if we're parsing incrementally, then we need to deal 510 ;; However, if we're parsing incrementally, then we need to deal
507 ;; with stray CDATA. 511 ;; with stray CDATA.
508 (xml-parse-string))))) 512 (xml-parse-string)))))
509 513
510(defun xml-parse-string () 514(defun xml-parse-string ()
511 "Parse the next whatever. Could be a string, or an element." 515 "Parse character data at point, and return it as a string.
512 (let* ((pos (point)) 516Leave point at the start of the next thing to parse. This
513 (string (progn (skip-chars-forward "^<") 517function can modify the buffer by expanding entity and character
514 (buffer-substring-no-properties pos (point))))) 518references."
515 ;; Clean up the string. As per XML specifications, the XML 519 (let ((start (point))
516 ;; processor should always pass the whole string to the 520 ref val)
517 ;; application. But \r's should be replaced: 521 (while (and (not (eobp))
518 ;; http://www.w3.org/TR/2000/REC-xml-20001006#sec-line-ends 522 (not (looking-at "<")))
519 (setq pos 0) 523 ;; Find the next < or & character.
520 (while (string-match "\r\n?" string pos) 524 (skip-chars-forward "^<&")
521 (setq string (replace-match "\n" t t string)) 525 (when (eq (char-after) ?&)
522 (setq pos (1+ (match-beginning 0)))) 526 ;; If we find an entity or character reference, expand it.
523 527 (unless (looking-at (eval-when-compile
524 (xml-substitute-special string))) 528 (concat "&\\(?:#\\([0-9]+\\)\\|#x\\([0-9a-fA-F]+\\)\\|\\("
529 xml-name-re "\\)\\);")))
530 (error "XML: (Not Well-Formed) Invalid entity reference"))
531 ;; For a character reference, the next entity or character
532 ;; reference must be after the replacement. [4.6] "Numerical
533 ;; character references are expanded immediately when
534 ;; recognized and MUST be treated as character data."
535 (cond ((setq ref (match-string 1))
536 ;; Decimal character reference
537 (setq val (save-match-data
538 (decode-char 'ucs (string-to-number ref))))
539 (and (null val)
540 xml-validating-parser
541 (error "XML: (Validity) Invalid character `%s'" ref))
542 (replace-match (or (string val) xml-undefined-entity) t t))
543 ;; Hexadecimal character reference
544 ((setq ref (match-string 2))
545 (setq val (save-match-data
546 (decode-char 'ucs (string-to-number ref 16))))
547 (and (null val)
548 xml-validating-parser
549 (error "XML: (Validity) Invalid character `x%s'" ref))
550 (replace-match (or (string val) xml-undefined-entity) t t))
551 ;; For an entity reference, search again from the start
552 ;; of the replaced text, since the replacement can
553 ;; contain entity or character references, or markup.
554 ((setq ref (match-string 3))
555 (setq val (assoc ref xml-entity-alist))
556 (and (null val)
557 xml-validating-parser
558 (error "XML: (Validity) Undefined entity `%s'" ref))
559 (replace-match (cdr val) t t)
560 (goto-char (match-beginning 0))))))
561 ;; [2.11] Clean up line breaks.
562 (let ((end-marker (point-marker)))
563 (goto-char start)
564 (while (re-search-forward "\r\n?" end-marker t)
565 (replace-match "\n" t t))
566 (goto-char end-marker)
567 (buffer-substring start (point)))))
525 568
526(defun xml-parse-attlist (&optional xml-ns) 569(defun xml-parse-attlist (&optional xml-ns)
527 "Return the attribute-list after point. 570 "Return the attribute-list after point.
@@ -564,15 +607,11 @@ Leave point at the first non-blank character after the tag."
564 (skip-syntax-forward " ")) 607 (skip-syntax-forward " "))
565 (nreverse attlist))) 608 (nreverse attlist)))
566 609
567;;******************************************************************* 610;;; DTD (document type declaration)
568;;**
569;;** The DTD (document type declaration)
570;;** The following functions know how to skip or parse the DTD of
571;;** a document
572;;**
573;;*******************************************************************
574 611
575;; Fixme: This fails at least if the DTD contains conditional sections. 612;; The following functions know how to skip or parse the DTD of a
613;; document. FIXME: it fails at least if the DTD contains conditional
614;; sections.
576 615
577(defun xml-skip-dtd () 616(defun xml-skip-dtd ()
578 "Skip the DTD at point. 617 "Skip the DTD at point.
@@ -789,9 +828,10 @@ references and parameter-entity references."
789 ;; Parameter entity reference 828 ;; Parameter entity reference
790 ((setq ref (match-string 3 string)) 829 ((setq ref (match-string 3 string))
791 (setq val (assoc ref xml-parameter-entity-alist)) 830 (setq val (assoc ref xml-parameter-entity-alist))
792 (if val 831 (and (null val)
793 (push (cdr val) children) 832 xml-validating-parser
794 (push (concat "%" ref ";") children)))) 833 (error "XML: (Validity) Undefined parameter entity `%s'" ref))
834 (push (or (cdr val) xml-undefined-entity) children)))
795 (setq string remainder))) 835 (setq string remainder)))
796 (mapconcat 'identity (nreverse (cons string children)) ""))) 836 (mapconcat 'identity (nreverse (cons string children)) "")))
797 837
@@ -828,79 +868,40 @@ references and parameter-entity references."
828 (t 868 (t
829 elem)))) 869 elem))))
830 870
831;;******************************************************************* 871;;; Substituting special XML sequences
832;;**
833;;** Substituting special XML sequences
834;;**
835;;*******************************************************************
836 872
837(defun xml-substitute-special (string) 873(defun xml-substitute-special (string)
838 "Return STRING, after substituting entity references." 874 "Return STRING, after substituting entity and character references.
839 ;; This originally made repeated passes through the string from the 875STRING is assumed to occur in an XML attribute value."
840 ;; beginning, which isn't correct, since then either "&amp;amp;" or 876 (let ((ref-re (eval-when-compile
841 ;; "&#38;amp;" won't DTRT. 877 (concat "&\\(?:#\\(x\\)?\\([0-9]+\\)\\|\\("
842 878 xml-name-re "\\)\\);")))
843 (let ((point 0) 879 children)
844 children end-point) 880 (while (string-match ref-re string)
845 (while (string-match "&\\([^;]*\\);" string point) 881 (push (substring string 0 (match-beginning 0)) children)
846 (setq end-point (match-end 0)) 882 (let* ((remainder (substring string (match-end 0)))
847 (let* ((this-part (match-string-no-properties 1 string)) 883 (ref (match-string 2 string)))
848 (prev-part (substring string point (match-beginning 0))) 884 (if ref
849 (entity (assoc this-part xml-entity-alist)) 885 ;; [4.6] Character references are included as
850 (expansion 886 ;; character data.
851 (cond ((string-match "#\\([0-9]+\\)" this-part) 887 (let ((val (decode-char 'ucs (string-to-number
852 (let ((c (decode-char 888 ref (if (match-string 1 string) 16)))))
853 'ucs 889 (push (cond (val (string val))
854 (string-to-number (match-string-no-properties 1 this-part))))) 890 (xml-validating-parser
855 (if c (string c)))) 891 (error "XML: (Validity) Undefined character `x%s'" ref))
856 ((string-match "#x\\([[:xdigit:]]+\\)" this-part) 892 (t xml-undefined-entity))
857 (let ((c (decode-char 893 children)
858 'ucs 894 (setq string remainder))
859 (string-to-number (match-string-no-properties 1 this-part) 16)))) 895 ;; [4.4.5] Entity references are "included in literal".
860 (if c (string c)))) 896 ;; Note that we don't need do anything special to treat
861 (entity 897 ;; quotes as normal data characters.
862 (cdr entity)) 898 (setq ref (match-string 3 string))
863 ((eq (length this-part) 0) 899 (let ((val (or (cdr (assoc ref xml-entity-alist))
864 (error "XML: (Not Well-Formed) No entity given")) 900 (if xml-validating-parser
865 (t 901 (error "XML: (Validity) Undefined entity `%s'" ref)
866 (if xml-validating-parser 902 xml-undefined-entity))))
867 (error "XML: (Validity) Undefined entity `%s'" 903 (setq string (concat val remainder))))))
868 this-part) 904 (mapconcat 'identity (nreverse (cons string children)) "")))
869 xml-undefined-entity)))))
870
871 (cond ((null children)
872 ;; FIXME: If we have an entity that expands into XML, this won't work.
873 (setq children
874 (concat prev-part expansion)))
875 ((stringp children)
876 (if (stringp expansion)
877 (setq children (concat children prev-part expansion))
878 (setq children (list expansion (concat prev-part children)))))
879 ((and (stringp expansion)
880 (stringp (car children)))
881 (setcar children (concat prev-part expansion (car children))))
882 ((stringp expansion)
883 (setq children (append (concat prev-part expansion)
884 children)))
885 ((stringp (car children))
886 (setcar children (concat (car children) prev-part))
887 (setq children (append expansion children)))
888 (t
889 (setq children (list expansion
890 prev-part
891 children))))
892 (setq point end-point)))
893 (cond ((stringp children)
894 (concat children (substring string point)))
895 ((stringp (car (last children)))
896 (concat (car (last children)) (substring string point)))
897 ((null children)
898 string)
899 (t
900 (concat (mapconcat 'identity
901 (nreverse children)
902 "")
903 (substring string point))))))
904 905
905(defun xml-substitute-numeric-entities (string) 906(defun xml-substitute-numeric-entities (string)
906 "Substitute SGML numeric entities by their respective utf characters. 907 "Substitute SGML numeric entities by their respective utf characters.
@@ -921,12 +922,7 @@ by \"*\"."
921 string) 922 string)
922 nil)) 923 nil))
923 924
924;;******************************************************************* 925;;; Printing a parse tree (mainly for debugging).
925;;**
926;;** Printing a tree.
927;;** This function is intended mainly for debugging purposes.
928;;**
929;;*******************************************************************
930 926
931(defun xml-debug-print (xml &optional indent-string) 927(defun xml-debug-print (xml &optional indent-string)
932 "Outputs the XML in the current buffer. 928 "Outputs the XML in the current buffer.
diff --git a/test/ChangeLog b/test/ChangeLog
index d9d9bc5a9fa..3ff7124893a 100644
--- a/test/ChangeLog
+++ b/test/ChangeLog
@@ -1,3 +1,8 @@
12012-07-02 Chong Yidong <cyd@gnu.org>
2
3 * automated/xml-parse-tests.el (xml-parse-tests--data): More
4 testcases.
5
12012-07-01 Chong Yidong <cyd@gnu.org> 62012-07-01 Chong Yidong <cyd@gnu.org>
2 7
3 * automated/xml-parse-tests.el: New file. 8 * automated/xml-parse-tests.el: New file.
diff --git a/test/automated/xml-parse-tests.el b/test/automated/xml-parse-tests.el
index 8322a8c6ff9..ec3d7ca3065 100644
--- a/test/automated/xml-parse-tests.el
+++ b/test/automated/xml-parse-tests.el
@@ -33,15 +33,26 @@
33 '(;; General entity substitution 33 '(;; General entity substitution
34 ("<?xml version=\"1.0\"?><!DOCTYPE foo SYSTEM \"bar.dtd\" [<!ENTITY ent \"AbC\">]><foo a=\"b\"><bar>&ent;;</bar></foo>" . 34 ("<?xml version=\"1.0\"?><!DOCTYPE foo SYSTEM \"bar.dtd\" [<!ENTITY ent \"AbC\">]><foo a=\"b\"><bar>&ent;;</bar></foo>" .
35 ((foo ((a . "b")) (bar nil "AbC;")))) 35 ((foo ((a . "b")) (bar nil "AbC;"))))
36 ("<?xml version=\"1.0\"?><foo>&amp;amp;&#38;apos;&apos;&lt;&gt;&quot;</foo>" .
37 ((foo () "&amp;&apos;'<>\"")))
36 ;; Parameter entity substitution 38 ;; Parameter entity substitution
37 ("<?xml version=\"1.0\"?><!DOCTYPE foo SYSTEM \"bar.dtd\" [<!ENTITY % pent \"AbC\"><!ENTITY ent \"%pent;\">]><foo a=\"b\"><bar>&ent;;</bar></foo>" . 39 ("<?xml version=\"1.0\"?><!DOCTYPE foo SYSTEM \"bar.dtd\" [<!ENTITY % pent \"AbC\"><!ENTITY ent \"%pent;\">]><foo a=\"b\"><bar>&ent;;</bar></foo>" .
38 ((foo ((a . "b")) (bar nil "AbC;")))) 40 ((foo ((a . "b")) (bar nil "AbC;"))))
39 ;; Tricky parameter entity substitution (like XML spec Appendix D) 41 ;; Tricky parameter entity substitution (like XML spec Appendix D)
40 ("<?xml version='1.0'?><!DOCTYPE foo [ <!ENTITY % xx '&#37;zz;'><!ENTITY % zz '&#60;!ENTITY ent \"b\" >' > %xx; ]><foo>A&ent;C</foo>" . 42 ("<?xml version='1.0'?><!DOCTYPE foo [ <!ENTITY % xx '&#37;zz;'><!ENTITY % zz '&#60;!ENTITY ent \"b\" >' > %xx; ]><foo>A&ent;C</foo>" .
41 ((foo nil "AbC"))) 43 ((foo () "AbC")))
42 ;; Bug#7172 44 ;; Bug#7172
43 ("<?xml version=\"1.0\"?><!DOCTYPE foo [ <!ELEMENT EXAM_PLE EMPTY> ]><foo></foo>" . 45 ("<?xml version=\"1.0\"?><!DOCTYPE foo [ <!ELEMENT EXAM_PLE EMPTY> ]><foo></foo>" .
44 ((foo nil)))) 46 ((foo ())))
47 ;; Entities referencing entities, in character data
48 ("<!DOCTYPE foo [ <!ENTITY b \"B\"><!ENTITY abc \"a&b;c\">]><foo>&abc;</foo>" .
49 ((foo () "aBc")))
50 ;; Entities referencing entities, in attribute values
51 ("<!DOCTYPE foo [ <!ENTITY b \"B\"><!ENTITY abc \"a&b;c\">]><foo a=\"-&abc;-\">1</foo>" .
52 ((foo ((a . "-aBc-")) "1")))
53 ;; Character references must be treated as character data
54 ("<foo>AT&amp;T;</foo>" . ((foo () "AT&T;")))
55 ("<foo>&#38;amp;</foo>" . ((foo () "&amp;"))))
45 "Alist of XML strings and their expected parse trees.") 56 "Alist of XML strings and their expected parse trees.")
46 57
47(ert-deftest xml-parse-tests () 58(ert-deftest xml-parse-tests ()