diff options
| author | Chong Yidong | 2012-07-03 00:21:54 +0800 |
|---|---|---|
| committer | Chong Yidong | 2012-07-03 00:21:54 +0800 |
| commit | a7aef6f5c6e22b167ea0234ab84c0308201d681b (patch) | |
| tree | fd09ca2bd61cdc2e8b8e222b73578c669fa2e354 | |
| parent | 2b5208f18115bd0f364c11cbdc8124878158927a (diff) | |
| download | emacs-a7aef6f5c6e22b167ea0234ab84c0308201d681b.tar.gz emacs-a7aef6f5c6e22b167ea0234ab84c0308201d681b.zip | |
* lisp/xml.el: Handle entity and character reference expansion correctly.
(xml-default-ns): New variable.
(xml-entity-alist): Use XML spec definitions for lt and amp.
(xml-parse-region): Make first two arguments optional. Discard
text properties.
(xml-parse-tag-1): New function, spun off from xml-parse-tag. All
callers changed.
(xml-parse-tag): Call xml-parse-tag-1. For backward
compatibility, this function should not modify buffer contents.
(xml-parse-tag-1): Fix opening-tag regexp.
(xml-parse-string): Rewrite, handling entity and character
references properly.
(xml--entity-replacement-text): Signal an error if a parameter
entity is undefined.
* test/automated/xml-parse-tests.el (xml-parse-tests--data): More
testcases.
| -rw-r--r-- | lisp/ChangeLog | 18 | ||||
| -rw-r--r-- | lisp/xml.el | 372 | ||||
| -rw-r--r-- | test/ChangeLog | 5 | ||||
| -rw-r--r-- | test/automated/xml-parse-tests.el | 15 |
4 files changed, 220 insertions, 190 deletions
diff --git a/lisp/ChangeLog b/lisp/ChangeLog index 0e7c49342c7..bab4085587e 100644 --- a/lisp/ChangeLog +++ b/lisp/ChangeLog | |||
| @@ -1,3 +1,21 @@ | |||
| 1 | 2012-07-02 Chong Yidong <cyd@gnu.org> | ||
| 2 | |||
| 3 | * xml.el: Fix entity and character reference expansion, allowing | ||
| 4 | them to expand into markup as per XML spec. | ||
| 5 | (xml-default-ns): New variable. | ||
| 6 | (xml-entity-alist): Use XML spec definitions for lt and amp. | ||
| 7 | (xml-parse-region): Make first two arguments optional. Discard | ||
| 8 | text properties. | ||
| 9 | (xml-parse-tag-1): New function, spun off from xml-parse-tag. All | ||
| 10 | callers changed. | ||
| 11 | (xml-parse-tag): Call xml-parse-tag-1. For backward | ||
| 12 | compatibility, this function should not modify buffer contents. | ||
| 13 | (xml-parse-tag-1): Fix opening-tag regexp. | ||
| 14 | (xml-parse-string): Rewrite, handling entity and character | ||
| 15 | references properly. | ||
| 16 | (xml--entity-replacement-text): Signal an error if a parameter | ||
| 17 | entity is undefined. | ||
| 18 | |||
| 1 | 2012-07-02 Stefan Monnier <monnier@iro.umontreal.ca> | 19 | 2012-07-02 Stefan Monnier <monnier@iro.umontreal.ca> |
| 2 | 20 | ||
| 3 | * comint.el (comint-output-filter): Filter out repeated prompts. | 21 | * comint.el (comint-output-filter): Filter out repeated prompts. |
diff --git a/lisp/xml.el b/lisp/xml.el index 5c1d2390a23..a3e279b41bd 100644 --- a/lisp/xml.el +++ b/lisp/xml.el | |||
| @@ -80,22 +80,23 @@ | |||
| 80 | ;; a worthwhile tradeoff especially since we're usually parsing files | 80 | ;; a worthwhile tradeoff especially since we're usually parsing files |
| 81 | ;; instead of hand-crafted XML. | 81 | ;; instead of hand-crafted XML. |
| 82 | 82 | ||
| 83 | ;;******************************************************************* | 83 | ;;; Macros to parse the list |
| 84 | ;;** | ||
| 85 | ;;** Macros to parse the list | ||
| 86 | ;;** | ||
| 87 | ;;******************************************************************* | ||
| 88 | 84 | ||
| 89 | (defconst xml-undefined-entity "?" | 85 | (defconst xml-undefined-entity "?" |
| 90 | "What to substitute for undefined entities") | 86 | "What to substitute for undefined entities") |
| 91 | 87 | ||
| 88 | (defconst xml-default-ns '(("" . "") | ||
| 89 | ("xml" . "http://www.w3.org/XML/1998/namespace") | ||
| 90 | ("xmlns" . "http://www.w3.org/2000/xmlns/")) | ||
| 91 | "Alist mapping default XML namespaces to their URIs.") | ||
| 92 | |||
| 92 | (defvar xml-entity-alist | 93 | (defvar xml-entity-alist |
| 93 | '(("lt" . "<") | 94 | '(("lt" . "<") |
| 94 | ("gt" . ">") | 95 | ("gt" . ">") |
| 95 | ("apos" . "'") | 96 | ("apos" . "'") |
| 96 | ("quot" . "\"") | 97 | ("quot" . "\"") |
| 97 | ("amp" . "&")) | 98 | ("amp" . "&")) |
| 98 | "Alist of defined XML entities.") | 99 | "Alist mapping XML entities to their replacement text.") |
| 99 | 100 | ||
| 100 | (defvar xml-parameter-entity-alist nil | 101 | (defvar xml-parameter-entity-alist nil |
| 101 | "Alist of defined XML parametric entities.") | 102 | "Alist of defined XML parametric entities.") |
| @@ -156,11 +157,7 @@ An empty string is returned if the attribute was not found. | |||
| 156 | See also `xml-get-attribute-or-nil'." | 157 | See also `xml-get-attribute-or-nil'." |
| 157 | (or (xml-get-attribute-or-nil node attribute) "")) | 158 | (or (xml-get-attribute-or-nil node attribute) "")) |
| 158 | 159 | ||
| 159 | ;;******************************************************************* | 160 | ;;; Creating the list |
| 160 | ;;** | ||
| 161 | ;;** Creating the list | ||
| 162 | ;;** | ||
| 163 | ;;******************************************************************* | ||
| 164 | 161 | ||
| 165 | ;;;###autoload | 162 | ;;;###autoload |
| 166 | (defun xml-parse-file (file &optional parse-dtd parse-ns) | 163 | (defun xml-parse-file (file &optional parse-dtd parse-ns) |
| @@ -299,8 +296,10 @@ If PARSE-NS is non-nil, then QNAMES are expanded." | |||
| 299 | ;; prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? | 296 | ;; prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? |
| 300 | 297 | ||
| 301 | ;;;###autoload | 298 | ;;;###autoload |
| 302 | (defun xml-parse-region (beg end &optional buffer parse-dtd parse-ns) | 299 | (defun xml-parse-region (&optional beg end buffer parse-dtd parse-ns) |
| 303 | "Parse the region from BEG to END in BUFFER. | 300 | "Parse the region from BEG to END in BUFFER. |
| 301 | If BEG is nil, it defaults to `point-min'. | ||
| 302 | If END is nil, it defaults to `point-max'. | ||
| 304 | If BUFFER is nil, it defaults to the current buffer. | 303 | If BUFFER is nil, it defaults to the current buffer. |
| 305 | Returns the XML list for the region, or raises an error if the region | 304 | Returns the XML list for the region, or raises an error if the region |
| 306 | is not well-formed XML. | 305 | is not well-formed XML. |
| @@ -312,7 +311,7 @@ If PARSE-NS is non-nil, then QNAMES are expanded." | |||
| 312 | (unless buffer | 311 | (unless buffer |
| 313 | (setq buffer (current-buffer))) | 312 | (setq buffer (current-buffer))) |
| 314 | (with-temp-buffer | 313 | (with-temp-buffer |
| 315 | (insert-buffer-substring buffer beg end) | 314 | (insert-buffer-substring-no-properties buffer beg end) |
| 316 | (xml--parse-buffer parse-dtd parse-ns))) | 315 | (xml--parse-buffer parse-dtd parse-ns))) |
| 317 | 316 | ||
| 318 | (defun xml--parse-buffer (parse-dtd parse-ns) | 317 | (defun xml--parse-buffer (parse-dtd parse-ns) |
| @@ -327,7 +326,7 @@ If PARSE-NS is non-nil, then QNAMES are expanded." | |||
| 327 | (if (search-forward "<" nil t) | 326 | (if (search-forward "<" nil t) |
| 328 | (progn | 327 | (progn |
| 329 | (forward-char -1) | 328 | (forward-char -1) |
| 330 | (setq result (xml-parse-tag parse-dtd parse-ns)) | 329 | (setq result (xml-parse-tag-1 parse-dtd parse-ns)) |
| 331 | (cond | 330 | (cond |
| 332 | ((null result) | 331 | ((null result) |
| 333 | ;; Not looking at an xml start tag. | 332 | ;; Not looking at an xml start tag. |
| @@ -379,8 +378,7 @@ specify that the name shouldn't be given a namespace." | |||
| 379 | (xml-parameter-entity-alist xml-parameter-entity-alist) | 378 | (xml-parameter-entity-alist xml-parameter-entity-alist) |
| 380 | children) | 379 | children) |
| 381 | (while (not (eobp)) | 380 | (while (not (eobp)) |
| 382 | (let ((bit (xml-parse-tag | 381 | (let ((bit (xml-parse-tag-1 parse-dtd parse-ns))) |
| 383 | parse-dtd parse-ns))) | ||
| 384 | (if children | 382 | (if children |
| 385 | (setq children (append (list bit) children)) | 383 | (setq children (append (list bit) children)) |
| 386 | (if (stringp bit) | 384 | (if (stringp bit) |
| @@ -392,30 +390,32 @@ specify that the name shouldn't be given a namespace." | |||
| 392 | "Parse the tag at point. | 390 | "Parse the tag at point. |
| 393 | If PARSE-DTD is non-nil, the DTD of the document, if any, is parsed and | 391 | If PARSE-DTD is non-nil, the DTD of the document, if any, is parsed and |
| 394 | returned as the first element in the list. | 392 | returned as the first element in the list. |
| 395 | If PARSE-NS is non-nil, then QNAMES are expanded. | 393 | If PARSE-NS is non-nil, expand QNAMES; if the value of PARSE-NS |
| 396 | Returns one of: | 394 | is a list, use it as an alist mapping namespaces to URIs. |
| 395 | |||
| 396 | Return one of: | ||
| 397 | - a list : the matching node | 397 | - a list : the matching node |
| 398 | - nil : the point is not looking at a tag. | 398 | - nil : the point is not looking at a tag. |
| 399 | - a pair : the first element is the DTD, the second is the node." | 399 | - a pair : the first element is the DTD, the second is the node." |
| 400 | (let ((buf (current-buffer)) | ||
| 401 | (pos (point))) | ||
| 402 | (with-temp-buffer | ||
| 403 | (insert-buffer-substring-no-properties buf pos) | ||
| 404 | (goto-char (point-min)) | ||
| 405 | (xml-parse-tag-1 parse-dtd parse-ns)))) | ||
| 406 | |||
| 407 | (defun xml-parse-tag-1 (&optional parse-dtd parse-ns) | ||
| 408 | "Like `xml-parse-tag', but possibly modify the buffer while working." | ||
| 400 | (let ((xml-validating-parser (or parse-dtd xml-validating-parser)) | 409 | (let ((xml-validating-parser (or parse-dtd xml-validating-parser)) |
| 401 | (xml-ns (if (consp parse-ns) | 410 | (xml-ns (cond ((consp parse-ns) parse-ns) |
| 402 | parse-ns | 411 | (parse-ns xml-default-ns)))) |
| 403 | (if parse-ns | ||
| 404 | (list | ||
| 405 | ;; Default for empty prefix is no namespace | ||
| 406 | (cons "" "") | ||
| 407 | ;; "xml" namespace | ||
| 408 | (cons "xml" "http://www.w3.org/XML/1998/namespace") | ||
| 409 | ;; We need to seed the xmlns namespace | ||
| 410 | (cons "xmlns" "http://www.w3.org/2000/xmlns/")))))) | ||
| 411 | (cond | 412 | (cond |
| 412 | ;; Processing instructions (like the <?xml version="1.0"?> tag at the | 413 | ;; Processing instructions, like <?xml version="1.0"?>. |
| 413 | ;; beginning of a document). | ||
| 414 | ((looking-at "<\\?") | 414 | ((looking-at "<\\?") |
| 415 | (search-forward "?>") | 415 | (search-forward "?>") |
| 416 | (skip-syntax-forward " ") | 416 | (skip-syntax-forward " ") |
| 417 | (xml-parse-tag parse-dtd xml-ns)) | 417 | (xml-parse-tag-1 parse-dtd xml-ns)) |
| 418 | ;; Character data (CDATA) sections, in which no tag should be interpreted | 418 | ;; Character data (CDATA) sections, in which no tag should be interpreted |
| 419 | ((looking-at "<!\\[CDATA\\[") | 419 | ((looking-at "<!\\[CDATA\\[") |
| 420 | (let ((pos (match-end 0))) | 420 | (let ((pos (match-end 0))) |
| 421 | (unless (search-forward "]]>" nil t) | 421 | (unless (search-forward "]]>" nil t) |
| @@ -423,33 +423,32 @@ Returns one of: | |||
| 423 | (concat | 423 | (concat |
| 424 | (buffer-substring-no-properties pos (match-beginning 0)) | 424 | (buffer-substring-no-properties pos (match-beginning 0)) |
| 425 | (xml-parse-string)))) | 425 | (xml-parse-string)))) |
| 426 | ;; DTD for the document | 426 | ;; DTD for the document |
| 427 | ((looking-at "<!DOCTYPE[ \t\n\r]") | 427 | ((looking-at "<!DOCTYPE[ \t\n\r]") |
| 428 | (let ((dtd (xml-parse-dtd parse-ns))) | 428 | (let ((dtd (xml-parse-dtd parse-ns))) |
| 429 | (skip-syntax-forward " ") | 429 | (skip-syntax-forward " ") |
| 430 | (if xml-validating-parser | 430 | (if xml-validating-parser |
| 431 | (cons dtd (xml-parse-tag nil xml-ns)) | 431 | (cons dtd (xml-parse-tag-1 nil xml-ns)) |
| 432 | (xml-parse-tag nil xml-ns)))) | 432 | (xml-parse-tag-1 nil xml-ns)))) |
| 433 | ;; skip comments | 433 | ;; skip comments |
| 434 | ((looking-at "<!--") | 434 | ((looking-at "<!--") |
| 435 | (search-forward "-->") | 435 | (search-forward "-->") |
| 436 | ;; FIXME: This loses the skipped-over spaces. | ||
| 436 | (skip-syntax-forward " ") | 437 | (skip-syntax-forward " ") |
| 437 | (unless (eobp) | 438 | (unless (eobp) |
| 438 | (let ((xml-sub-parser t)) | 439 | (let ((xml-sub-parser t)) |
| 439 | (xml-parse-tag parse-dtd xml-ns)))) | 440 | (xml-parse-tag-1 parse-dtd xml-ns)))) |
| 440 | ;; end tag | 441 | ;; end tag |
| 441 | ((looking-at "</") | 442 | ((looking-at "</") |
| 442 | '()) | 443 | '()) |
| 443 | ;; opening tag | 444 | ;; opening tag |
| 444 | ((looking-at "<\\([^/>[:space:]]+\\)") | 445 | ((looking-at (eval-when-compile (concat "<\\(" xml-name-re "\\)"))) |
| 445 | (goto-char (match-end 1)) | 446 | (goto-char (match-end 1)) |
| 446 | |||
| 447 | ;; Parse this node | 447 | ;; Parse this node |
| 448 | (let* ((node-name (match-string-no-properties 1)) | 448 | (let* ((node-name (match-string-no-properties 1)) |
| 449 | ;; Parse the attribute list. | 449 | ;; Parse the attribute list. |
| 450 | (attrs (xml-parse-attlist xml-ns)) | 450 | (attrs (xml-parse-attlist xml-ns)) |
| 451 | children) | 451 | children) |
| 452 | |||
| 453 | ;; add the xmlns:* attrs to our cache | 452 | ;; add the xmlns:* attrs to our cache |
| 454 | (when (consp xml-ns) | 453 | (when (consp xml-ns) |
| 455 | (dolist (attr attrs) | 454 | (dolist (attr attrs) |
| @@ -458,70 +457,114 @@ Returns one of: | |||
| 458 | (caar attr))) | 457 | (caar attr))) |
| 459 | (push (cons (cdar attr) (cdr attr)) | 458 | (push (cons (cdar attr) (cdr attr)) |
| 460 | xml-ns)))) | 459 | xml-ns)))) |
| 461 | |||
| 462 | (setq children (list attrs (xml-maybe-do-ns node-name "" xml-ns))) | 460 | (setq children (list attrs (xml-maybe-do-ns node-name "" xml-ns))) |
| 461 | (cond | ||
| 462 | ;; is this an empty element ? | ||
| 463 | ((looking-at "/>") | ||
| 464 | (forward-char 2) | ||
| 465 | (nreverse children)) | ||
| 466 | ;; is this a valid start tag ? | ||
| 467 | ((eq (char-after) ?>) | ||
| 468 | (forward-char 1) | ||
| 469 | ;; Now check that we have the right end-tag. | ||
| 470 | (let ((end (concat "</" node-name "\\s-*>"))) | ||
| 471 | (while (not (looking-at end)) | ||
| 472 | (cond | ||
| 473 | ((eobp) | ||
| 474 | (error "XML: (Not Well-Formed) End of buffer while reading element `%s'" | ||
| 475 | node-name)) | ||
| 476 | ((looking-at "</") | ||
| 477 | (forward-char 2) | ||
| 478 | (error "XML: (Not Well-Formed) Invalid end tag `%s' (expecting `%s')" | ||
| 479 | (let ((pos (point))) | ||
| 480 | (buffer-substring pos (if (re-search-forward "\\s-*>" nil t) | ||
| 481 | (match-beginning 0) | ||
| 482 | (point-max)))) | ||
| 483 | node-name)) | ||
| 484 | ;; Read a sub-element and push it onto CHILDREN. | ||
| 485 | ((= (char-after) ?<) | ||
| 486 | (let ((tag (xml-parse-tag-1 nil xml-ns))) | ||
| 487 | (when tag | ||
| 488 | (push tag children)))) | ||
| 489 | ;; Read some character data. | ||
| 490 | (t | ||
| 491 | (let ((expansion (xml-parse-string))) | ||
| 492 | (push (if (stringp (car children)) | ||
| 493 | ;; If two strings were separated by a | ||
| 494 | ;; comment, concat them. | ||
| 495 | (concat (pop children) expansion) | ||
| 496 | expansion) | ||
| 497 | children))))) | ||
| 498 | ;; Move point past the end-tag. | ||
| 499 | (goto-char (match-end 0)) | ||
| 500 | (nreverse children))) | ||
| 501 | ;; Otherwise this was an invalid start tag (expected ">" not found.) | ||
| 502 | (t | ||
| 503 | (error "XML: (Well-Formed) Couldn't parse tag: %s" | ||
| 504 | (buffer-substring-no-properties (- (point) 10) (+ (point) 1))))))) | ||
| 463 | 505 | ||
| 464 | ;; is this an empty element ? | 506 | ;; (Not one of PI, CDATA, Comment, End tag, or Start tag) |
| 465 | (if (looking-at "/>") | 507 | (t |
| 466 | (progn | 508 | (unless xml-sub-parser ; Usually, we error out. |
| 467 | (forward-char 2) | ||
| 468 | (nreverse children)) | ||
| 469 | |||
| 470 | ;; is this a valid start tag ? | ||
| 471 | (if (eq (char-after) ?>) | ||
| 472 | (progn | ||
| 473 | (forward-char 1) | ||
| 474 | ;; Now check that we have the right end-tag. Note that this | ||
| 475 | ;; one might contain spaces after the tag name | ||
| 476 | (let ((end (concat "</" node-name "\\s-*>"))) | ||
| 477 | (while (not (looking-at end)) | ||
| 478 | (cond | ||
| 479 | ((looking-at "</") | ||
| 480 | (error "XML: (Not Well-Formed) Invalid end tag (expecting %s) at pos %d" | ||
| 481 | node-name (point))) | ||
| 482 | ((= (char-after) ?<) | ||
| 483 | (let ((tag (xml-parse-tag nil xml-ns))) | ||
| 484 | (when tag | ||
| 485 | (push tag children)))) | ||
| 486 | (t | ||
| 487 | (let ((expansion (xml-parse-string))) | ||
| 488 | (setq children | ||
| 489 | (if (stringp expansion) | ||
| 490 | (if (stringp (car children)) | ||
| 491 | ;; The two strings were separated by a comment. | ||
| 492 | (setq children (append (list (concat (car children) expansion)) | ||
| 493 | (cdr children))) | ||
| 494 | (setq children (append (list expansion) children))) | ||
| 495 | (setq children (append expansion children)))))))) | ||
| 496 | |||
| 497 | (goto-char (match-end 0)) | ||
| 498 | (nreverse children))) | ||
| 499 | ;; This was an invalid start tag (Expected ">", but didn't see it.) | ||
| 500 | (error "XML: (Well-Formed) Couldn't parse tag: %s" | ||
| 501 | (buffer-substring-no-properties (- (point) 10) (+ (point) 1))))))) | ||
| 502 | (t ;; (Not one of PI, CDATA, Comment, End tag, or Start tag) | ||
| 503 | (unless xml-sub-parser ; Usually, we error out. | ||
| 504 | (error "XML: (Well-Formed) Invalid character")) | 509 | (error "XML: (Well-Formed) Invalid character")) |
| 505 | |||
| 506 | ;; However, if we're parsing incrementally, then we need to deal | 510 | ;; However, if we're parsing incrementally, then we need to deal |
| 507 | ;; with stray CDATA. | 511 | ;; with stray CDATA. |
| 508 | (xml-parse-string))))) | 512 | (xml-parse-string))))) |
| 509 | 513 | ||
| 510 | (defun xml-parse-string () | 514 | (defun xml-parse-string () |
| 511 | "Parse the next whatever. Could be a string, or an element." | 515 | "Parse character data at point, and return it as a string. |
| 512 | (let* ((pos (point)) | 516 | Leave point at the start of the next thing to parse. This |
| 513 | (string (progn (skip-chars-forward "^<") | 517 | function can modify the buffer by expanding entity and character |
| 514 | (buffer-substring-no-properties pos (point))))) | 518 | references." |
| 515 | ;; Clean up the string. As per XML specifications, the XML | 519 | (let ((start (point)) |
| 516 | ;; processor should always pass the whole string to the | 520 | ref val) |
| 517 | ;; application. But \r's should be replaced: | 521 | (while (and (not (eobp)) |
| 518 | ;; http://www.w3.org/TR/2000/REC-xml-20001006#sec-line-ends | 522 | (not (looking-at "<"))) |
| 519 | (setq pos 0) | 523 | ;; Find the next < or & character. |
| 520 | (while (string-match "\r\n?" string pos) | 524 | (skip-chars-forward "^<&") |
| 521 | (setq string (replace-match "\n" t t string)) | 525 | (when (eq (char-after) ?&) |
| 522 | (setq pos (1+ (match-beginning 0)))) | 526 | ;; If we find an entity or character reference, expand it. |
| 523 | 527 | (unless (looking-at (eval-when-compile | |
| 524 | (xml-substitute-special string))) | 528 | (concat "&\\(?:#\\([0-9]+\\)\\|#x\\([0-9a-fA-F]+\\)\\|\\(" |
| 529 | xml-name-re "\\)\\);"))) | ||
| 530 | (error "XML: (Not Well-Formed) Invalid entity reference")) | ||
| 531 | ;; For a character reference, the next entity or character | ||
| 532 | ;; reference must be after the replacement. [4.6] "Numerical | ||
| 533 | ;; character references are expanded immediately when | ||
| 534 | ;; recognized and MUST be treated as character data." | ||
| 535 | (cond ((setq ref (match-string 1)) | ||
| 536 | ;; Decimal character reference | ||
| 537 | (setq val (save-match-data | ||
| 538 | (decode-char 'ucs (string-to-number ref)))) | ||
| 539 | (and (null val) | ||
| 540 | xml-validating-parser | ||
| 541 | (error "XML: (Validity) Invalid character `%s'" ref)) | ||
| 542 | (replace-match (or (string val) xml-undefined-entity) t t)) | ||
| 543 | ;; Hexadecimal character reference | ||
| 544 | ((setq ref (match-string 2)) | ||
| 545 | (setq val (save-match-data | ||
| 546 | (decode-char 'ucs (string-to-number ref 16)))) | ||
| 547 | (and (null val) | ||
| 548 | xml-validating-parser | ||
| 549 | (error "XML: (Validity) Invalid character `x%s'" ref)) | ||
| 550 | (replace-match (or (string val) xml-undefined-entity) t t)) | ||
| 551 | ;; For an entity reference, search again from the start | ||
| 552 | ;; of the replaced text, since the replacement can | ||
| 553 | ;; contain entity or character references, or markup. | ||
| 554 | ((setq ref (match-string 3)) | ||
| 555 | (setq val (assoc ref xml-entity-alist)) | ||
| 556 | (and (null val) | ||
| 557 | xml-validating-parser | ||
| 558 | (error "XML: (Validity) Undefined entity `%s'" ref)) | ||
| 559 | (replace-match (cdr val) t t) | ||
| 560 | (goto-char (match-beginning 0)))))) | ||
| 561 | ;; [2.11] Clean up line breaks. | ||
| 562 | (let ((end-marker (point-marker))) | ||
| 563 | (goto-char start) | ||
| 564 | (while (re-search-forward "\r\n?" end-marker t) | ||
| 565 | (replace-match "\n" t t)) | ||
| 566 | (goto-char end-marker) | ||
| 567 | (buffer-substring start (point))))) | ||
| 525 | 568 | ||
| 526 | (defun xml-parse-attlist (&optional xml-ns) | 569 | (defun xml-parse-attlist (&optional xml-ns) |
| 527 | "Return the attribute-list after point. | 570 | "Return the attribute-list after point. |
| @@ -564,15 +607,11 @@ Leave point at the first non-blank character after the tag." | |||
| 564 | (skip-syntax-forward " ")) | 607 | (skip-syntax-forward " ")) |
| 565 | (nreverse attlist))) | 608 | (nreverse attlist))) |
| 566 | 609 | ||
| 567 | ;;******************************************************************* | 610 | ;;; DTD (document type declaration) |
| 568 | ;;** | ||
| 569 | ;;** The DTD (document type declaration) | ||
| 570 | ;;** The following functions know how to skip or parse the DTD of | ||
| 571 | ;;** a document | ||
| 572 | ;;** | ||
| 573 | ;;******************************************************************* | ||
| 574 | 611 | ||
| 575 | ;; Fixme: This fails at least if the DTD contains conditional sections. | 612 | ;; The following functions know how to skip or parse the DTD of a |
| 613 | ;; document. FIXME: it fails at least if the DTD contains conditional | ||
| 614 | ;; sections. | ||
| 576 | 615 | ||
| 577 | (defun xml-skip-dtd () | 616 | (defun xml-skip-dtd () |
| 578 | "Skip the DTD at point. | 617 | "Skip the DTD at point. |
| @@ -789,9 +828,10 @@ references and parameter-entity references." | |||
| 789 | ;; Parameter entity reference | 828 | ;; Parameter entity reference |
| 790 | ((setq ref (match-string 3 string)) | 829 | ((setq ref (match-string 3 string)) |
| 791 | (setq val (assoc ref xml-parameter-entity-alist)) | 830 | (setq val (assoc ref xml-parameter-entity-alist)) |
| 792 | (if val | 831 | (and (null val) |
| 793 | (push (cdr val) children) | 832 | xml-validating-parser |
| 794 | (push (concat "%" ref ";") children)))) | 833 | (error "XML: (Validity) Undefined parameter entity `%s'" ref)) |
| 834 | (push (or (cdr val) xml-undefined-entity) children))) | ||
| 795 | (setq string remainder))) | 835 | (setq string remainder))) |
| 796 | (mapconcat 'identity (nreverse (cons string children)) ""))) | 836 | (mapconcat 'identity (nreverse (cons string children)) ""))) |
| 797 | 837 | ||
| @@ -828,79 +868,40 @@ references and parameter-entity references." | |||
| 828 | (t | 868 | (t |
| 829 | elem)))) | 869 | elem)))) |
| 830 | 870 | ||
| 831 | ;;******************************************************************* | 871 | ;;; Substituting special XML sequences |
| 832 | ;;** | ||
| 833 | ;;** Substituting special XML sequences | ||
| 834 | ;;** | ||
| 835 | ;;******************************************************************* | ||
| 836 | 872 | ||
| 837 | (defun xml-substitute-special (string) | 873 | (defun xml-substitute-special (string) |
| 838 | "Return STRING, after substituting entity references." | 874 | "Return STRING, after substituting entity and character references. |
| 839 | ;; This originally made repeated passes through the string from the | 875 | STRING is assumed to occur in an XML attribute value." |
| 840 | ;; beginning, which isn't correct, since then either "&amp;" or | 876 | (let ((ref-re (eval-when-compile |
| 841 | ;; "&amp;" won't DTRT. | 877 | (concat "&\\(?:#\\(x\\)?\\([0-9]+\\)\\|\\(" |
| 842 | 878 | xml-name-re "\\)\\);"))) | |
| 843 | (let ((point 0) | 879 | children) |
| 844 | children end-point) | 880 | (while (string-match ref-re string) |
| 845 | (while (string-match "&\\([^;]*\\);" string point) | 881 | (push (substring string 0 (match-beginning 0)) children) |
| 846 | (setq end-point (match-end 0)) | 882 | (let* ((remainder (substring string (match-end 0))) |
| 847 | (let* ((this-part (match-string-no-properties 1 string)) | 883 | (ref (match-string 2 string))) |
| 848 | (prev-part (substring string point (match-beginning 0))) | 884 | (if ref |
| 849 | (entity (assoc this-part xml-entity-alist)) | 885 | ;; [4.6] Character references are included as |
| 850 | (expansion | 886 | ;; character data. |
| 851 | (cond ((string-match "#\\([0-9]+\\)" this-part) | 887 | (let ((val (decode-char 'ucs (string-to-number |
| 852 | (let ((c (decode-char | 888 | ref (if (match-string 1 string) 16))))) |
| 853 | 'ucs | 889 | (push (cond (val (string val)) |
| 854 | (string-to-number (match-string-no-properties 1 this-part))))) | 890 | (xml-validating-parser |
| 855 | (if c (string c)))) | 891 | (error "XML: (Validity) Undefined character `x%s'" ref)) |
| 856 | ((string-match "#x\\([[:xdigit:]]+\\)" this-part) | 892 | (t xml-undefined-entity)) |
| 857 | (let ((c (decode-char | 893 | children) |
| 858 | 'ucs | 894 | (setq string remainder)) |
| 859 | (string-to-number (match-string-no-properties 1 this-part) 16)))) | 895 | ;; [4.4.5] Entity references are "included in literal". |
| 860 | (if c (string c)))) | 896 | ;; Note that we don't need do anything special to treat |
| 861 | (entity | 897 | ;; quotes as normal data characters. |
| 862 | (cdr entity)) | 898 | (setq ref (match-string 3 string)) |
| 863 | ((eq (length this-part) 0) | 899 | (let ((val (or (cdr (assoc ref xml-entity-alist)) |
| 864 | (error "XML: (Not Well-Formed) No entity given")) | 900 | (if xml-validating-parser |
| 865 | (t | 901 | (error "XML: (Validity) Undefined entity `%s'" ref) |
| 866 | (if xml-validating-parser | 902 | xml-undefined-entity)))) |
| 867 | (error "XML: (Validity) Undefined entity `%s'" | 903 | (setq string (concat val remainder)))))) |
| 868 | this-part) | 904 | (mapconcat 'identity (nreverse (cons string children)) ""))) |
| 869 | xml-undefined-entity))))) | ||
| 870 | |||
| 871 | (cond ((null children) | ||
| 872 | ;; FIXME: If we have an entity that expands into XML, this won't work. | ||
| 873 | (setq children | ||
| 874 | (concat prev-part expansion))) | ||
| 875 | ((stringp children) | ||
| 876 | (if (stringp expansion) | ||
| 877 | (setq children (concat children prev-part expansion)) | ||
| 878 | (setq children (list expansion (concat prev-part children))))) | ||
| 879 | ((and (stringp expansion) | ||
| 880 | (stringp (car children))) | ||
| 881 | (setcar children (concat prev-part expansion (car children)))) | ||
| 882 | ((stringp expansion) | ||
| 883 | (setq children (append (concat prev-part expansion) | ||
| 884 | children))) | ||
| 885 | ((stringp (car children)) | ||
| 886 | (setcar children (concat (car children) prev-part)) | ||
| 887 | (setq children (append expansion children))) | ||
| 888 | (t | ||
| 889 | (setq children (list expansion | ||
| 890 | prev-part | ||
| 891 | children)))) | ||
| 892 | (setq point end-point))) | ||
| 893 | (cond ((stringp children) | ||
| 894 | (concat children (substring string point))) | ||
| 895 | ((stringp (car (last children))) | ||
| 896 | (concat (car (last children)) (substring string point))) | ||
| 897 | ((null children) | ||
| 898 | string) | ||
| 899 | (t | ||
| 900 | (concat (mapconcat 'identity | ||
| 901 | (nreverse children) | ||
| 902 | "") | ||
| 903 | (substring string point)))))) | ||
| 904 | 905 | ||
| 905 | (defun xml-substitute-numeric-entities (string) | 906 | (defun xml-substitute-numeric-entities (string) |
| 906 | "Substitute SGML numeric entities by their respective utf characters. | 907 | "Substitute SGML numeric entities by their respective utf characters. |
| @@ -921,12 +922,7 @@ by \"*\"." | |||
| 921 | string) | 922 | string) |
| 922 | nil)) | 923 | nil)) |
| 923 | 924 | ||
| 924 | ;;******************************************************************* | 925 | ;;; Printing a parse tree (mainly for debugging). |
| 925 | ;;** | ||
| 926 | ;;** Printing a tree. | ||
| 927 | ;;** This function is intended mainly for debugging purposes. | ||
| 928 | ;;** | ||
| 929 | ;;******************************************************************* | ||
| 930 | 926 | ||
| 931 | (defun xml-debug-print (xml &optional indent-string) | 927 | (defun xml-debug-print (xml &optional indent-string) |
| 932 | "Outputs the XML in the current buffer. | 928 | "Outputs the XML in the current buffer. |
diff --git a/test/ChangeLog b/test/ChangeLog index d9d9bc5a9fa..3ff7124893a 100644 --- a/test/ChangeLog +++ b/test/ChangeLog | |||
| @@ -1,3 +1,8 @@ | |||
| 1 | 2012-07-02 Chong Yidong <cyd@gnu.org> | ||
| 2 | |||
| 3 | * automated/xml-parse-tests.el (xml-parse-tests--data): More | ||
| 4 | testcases. | ||
| 5 | |||
| 1 | 2012-07-01 Chong Yidong <cyd@gnu.org> | 6 | 2012-07-01 Chong Yidong <cyd@gnu.org> |
| 2 | 7 | ||
| 3 | * automated/xml-parse-tests.el: New file. | 8 | * automated/xml-parse-tests.el: New file. |
diff --git a/test/automated/xml-parse-tests.el b/test/automated/xml-parse-tests.el index 8322a8c6ff9..ec3d7ca3065 100644 --- a/test/automated/xml-parse-tests.el +++ b/test/automated/xml-parse-tests.el | |||
| @@ -33,15 +33,26 @@ | |||
| 33 | '(;; General entity substitution | 33 | '(;; General entity substitution |
| 34 | ("<?xml version=\"1.0\"?><!DOCTYPE foo SYSTEM \"bar.dtd\" [<!ENTITY ent \"AbC\">]><foo a=\"b\"><bar>&ent;;</bar></foo>" . | 34 | ("<?xml version=\"1.0\"?><!DOCTYPE foo SYSTEM \"bar.dtd\" [<!ENTITY ent \"AbC\">]><foo a=\"b\"><bar>&ent;;</bar></foo>" . |
| 35 | ((foo ((a . "b")) (bar nil "AbC;")))) | 35 | ((foo ((a . "b")) (bar nil "AbC;")))) |
| 36 | ("<?xml version=\"1.0\"?><foo>&amp;&apos;'<>"</foo>" . | ||
| 37 | ((foo () "&''<>\""))) | ||
| 36 | ;; Parameter entity substitution | 38 | ;; Parameter entity substitution |
| 37 | ("<?xml version=\"1.0\"?><!DOCTYPE foo SYSTEM \"bar.dtd\" [<!ENTITY % pent \"AbC\"><!ENTITY ent \"%pent;\">]><foo a=\"b\"><bar>&ent;;</bar></foo>" . | 39 | ("<?xml version=\"1.0\"?><!DOCTYPE foo SYSTEM \"bar.dtd\" [<!ENTITY % pent \"AbC\"><!ENTITY ent \"%pent;\">]><foo a=\"b\"><bar>&ent;;</bar></foo>" . |
| 38 | ((foo ((a . "b")) (bar nil "AbC;")))) | 40 | ((foo ((a . "b")) (bar nil "AbC;")))) |
| 39 | ;; Tricky parameter entity substitution (like XML spec Appendix D) | 41 | ;; Tricky parameter entity substitution (like XML spec Appendix D) |
| 40 | ("<?xml version='1.0'?><!DOCTYPE foo [ <!ENTITY % xx '%zz;'><!ENTITY % zz '<!ENTITY ent \"b\" >' > %xx; ]><foo>A&ent;C</foo>" . | 42 | ("<?xml version='1.0'?><!DOCTYPE foo [ <!ENTITY % xx '%zz;'><!ENTITY % zz '<!ENTITY ent \"b\" >' > %xx; ]><foo>A&ent;C</foo>" . |
| 41 | ((foo nil "AbC"))) | 43 | ((foo () "AbC"))) |
| 42 | ;; Bug#7172 | 44 | ;; Bug#7172 |
| 43 | ("<?xml version=\"1.0\"?><!DOCTYPE foo [ <!ELEMENT EXAM_PLE EMPTY> ]><foo></foo>" . | 45 | ("<?xml version=\"1.0\"?><!DOCTYPE foo [ <!ELEMENT EXAM_PLE EMPTY> ]><foo></foo>" . |
| 44 | ((foo nil)))) | 46 | ((foo ()))) |
| 47 | ;; Entities referencing entities, in character data | ||
| 48 | ("<!DOCTYPE foo [ <!ENTITY b \"B\"><!ENTITY abc \"a&b;c\">]><foo>&abc;</foo>" . | ||
| 49 | ((foo () "aBc"))) | ||
| 50 | ;; Entities referencing entities, in attribute values | ||
| 51 | ("<!DOCTYPE foo [ <!ENTITY b \"B\"><!ENTITY abc \"a&b;c\">]><foo a=\"-&abc;-\">1</foo>" . | ||
| 52 | ((foo ((a . "-aBc-")) "1"))) | ||
| 53 | ;; Character references must be treated as character data | ||
| 54 | ("<foo>AT&T;</foo>" . ((foo () "AT&T;"))) | ||
| 55 | ("<foo>&amp;</foo>" . ((foo () "&")))) | ||
| 45 | "Alist of XML strings and their expected parse trees.") | 56 | "Alist of XML strings and their expected parse trees.") |
| 46 | 57 | ||
| 47 | (ert-deftest xml-parse-tests () | 58 | (ert-deftest xml-parse-tests () |