diff options
| author | Mark A. Hershberger | 2004-07-09 14:22:33 +0000 |
|---|---|---|
| committer | Mark A. Hershberger | 2004-07-09 14:22:33 +0000 |
| commit | 6d12a4dfbcb5680fafac89769e1a2f111fdcc587 (patch) | |
| tree | bb56765fcd16836c88d2c647a12bb13dfe936c5c /lisp/xml.el | |
| parent | bcdf2143d312616d027880b16a6f43f4e0066792 (diff) | |
| download | emacs-6d12a4dfbcb5680fafac89769e1a2f111fdcc587.tar.gz emacs-6d12a4dfbcb5680fafac89769e1a2f111fdcc587.zip | |
2004-07-09 Mark A. Hershberger <mah@everybody.org>
* xml.el (xml-maybe-do-ns, xml-parse-tag): Produce elements in the
form
(("ns" . "element") (attr-list) children) instead of
((:ns . "element") (attr-list) children) in order to reduce the
number of symbols used.
(xml-skip-dtd): Change to use xml-parse-dtd but set
xml-validating-parsing to nil.
(xml-parse-dtd): Parse entity deleclarations in DOCTYPEs.
(xml-substitute-entity): Remove in favor of new entity substitution.
(xml-substitute-special): Rewrite in to substitute complex
entities from DOCTYPE declarations.
(xml-parse-fragment): Parse fragments from entity deleclarations.
(xml-parse-region, xml-parse-tag, xml-parse-attlist)
(xml-parse-dtd, xml-substitute-special): Make validity checks
conditioned on xml-validating-parser. Add "Not Well Formed" to
error messages about well-formedness.
Diffstat (limited to 'lisp/xml.el')
| -rw-r--r-- | lisp/xml.el | 400 |
1 files changed, 273 insertions, 127 deletions
diff --git a/lisp/xml.el b/lisp/xml.el index 03ef6346c70..993ef59b276 100644 --- a/lisp/xml.el +++ b/lisp/xml.el | |||
| @@ -84,6 +84,20 @@ | |||
| 84 | ;;** | 84 | ;;** |
| 85 | ;;******************************************************************* | 85 | ;;******************************************************************* |
| 86 | 86 | ||
| 87 | (defvar xml-entity-alist | ||
| 88 | '(("lt" . "<") | ||
| 89 | ("gt" . ">") | ||
| 90 | ("apos" . "'") | ||
| 91 | ("quot" . "\"") | ||
| 92 | ("amp" . "&")) | ||
| 93 | "The defined entities. Entities are added to this when the DTD is parsed.") | ||
| 94 | |||
| 95 | (defvar xml-sub-parser nil | ||
| 96 | "Dynamically set this to a non-nil value if you want to parse an XML fragment.") | ||
| 97 | |||
| 98 | (defvar xml-validating-parser nil | ||
| 99 | "Set to non-nil to get validity checking.") | ||
| 100 | |||
| 87 | (defsubst xml-node-name (node) | 101 | (defsubst xml-node-name (node) |
| 88 | "Return the tag associated with NODE. | 102 | "Return the tag associated with NODE. |
| 89 | Without namespace-aware parsing, the tag is a symbol. | 103 | Without namespace-aware parsing, the tag is a symbol. |
| @@ -164,6 +178,48 @@ If PARSE-NS is non-nil, then QNAMES are expanded." | |||
| 164 | (kill-buffer (current-buffer))) | 178 | (kill-buffer (current-buffer))) |
| 165 | xml))) | 179 | xml))) |
| 166 | 180 | ||
| 181 | |||
| 182 | (let* ((start-chars (concat ":[:alpha:]_")) | ||
| 183 | (name-chars (concat "-[:digit:]." start-chars)) | ||
| 184 | ;;[3] S ::= (#x20 | #x9 | #xD | #xA)+ | ||
| 185 | (whitespace "[ \t\n\r]")) | ||
| 186 | ;;[4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | ||
| 187 | ;; | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | ||
| 188 | ;; | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | ||
| 189 | ;; | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] | ||
| 190 | (defvar xml-name-start-char-re (concat "[" start-chars "]")) | ||
| 191 | ;;[4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040] | ||
| 192 | (defvar xml-name-char-re (concat "[" name-chars "]")) | ||
| 193 | ;;[5] Name ::= NameStartChar (NameChar)* | ||
| 194 | (defvar xml-name-re (concat xml-name-start-char-re xml-name-char-re "*")) | ||
| 195 | ;;[6] Names ::= Name (#x20 Name)* | ||
| 196 | (defvar xml-names-re (concat xml-name-re "\\(?: " xml-name-re "\\)*")) | ||
| 197 | ;;[7] Nmtoken ::= (NameChar)+ | ||
| 198 | (defvar xml-nmtoken-re (concat xml-name-char-re "+")) | ||
| 199 | ;;[8] Nmtokens ::= Nmtoken (#x20 Nmtoken)* | ||
| 200 | (defvar xml-nmtokens-re (concat xml-nmtoken-re "\\(?: " xml-name-re "\\)*")) | ||
| 201 | ;;[66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' | ||
| 202 | (defvar xml-char-ref-re "\\(?:&#[0-9]+;\\|&#x[0-9a-fA-F]+;\\)") | ||
| 203 | ;;[68] EntityRef ::= '&' Name ';' | ||
| 204 | (defvar xml-entity-ref (concat "&" xml-name-re ";")) | ||
| 205 | ;;[69] PEReference ::= '%' Name ';' | ||
| 206 | (defvar xml-pe-reference-re (concat "%" xml-name-re ";")) | ||
| 207 | ;;[67] Reference ::= EntityRef | CharRef | ||
| 208 | (defvar xml-reference-re (concat "\\(?:" xml-entity-ref "\\|" xml-char-ref-re "\\)")) | ||
| 209 | ;;[9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | ||
| 210 | ;; | "'" ([^%&'] | PEReference | Reference)* "'" | ||
| 211 | (defvar xml-entity-value-re (concat "\\(?:\"\\(?:[^%&\"]\\|" xml-pe-reference-re | ||
| 212 | "\\|" xml-reference-re "\\)*\"\\|'\\(?:[^%&']\\|" | ||
| 213 | xml-pe-reference-re "\\|" xml-reference-re "\\)*'\\)"))) | ||
| 214 | ;;[75] ExternalID ::= 'SYSTEM' S SystemLiteral | ||
| 215 | ;; | 'PUBLIC' S PubidLiteral S SystemLiteral | ||
| 216 | ;;[76] NDataDecl ::= S 'NDATA' S | ||
| 217 | ;;[73] EntityDef ::= EntityValue| (ExternalID NDataDecl?) | ||
| 218 | ;;[71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' | ||
| 219 | ;;[74] PEDef ::= EntityValue | ExternalID | ||
| 220 | ;;[72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' | ||
| 221 | ;;[70] EntityDecl ::= GEDecl | PEDecl | ||
| 222 | |||
| 167 | ;; Note that this is setup so that we can do whitespace-skipping with | 223 | ;; Note that this is setup so that we can do whitespace-skipping with |
| 168 | ;; `(skip-syntax-forward " ")', inter alia. Previously this was slow | 224 | ;; `(skip-syntax-forward " ")', inter alia. Previously this was slow |
| 169 | ;; compared with `re-search-forward', but that has been fixed. Also | 225 | ;; compared with `re-search-forward', but that has been fixed. Also |
| @@ -229,9 +285,9 @@ If PARSE-NS is non-nil, then QNAMES are expanded." | |||
| 229 | (progn | 285 | (progn |
| 230 | (forward-char -1) | 286 | (forward-char -1) |
| 231 | (setq result (xml-parse-tag parse-dtd parse-ns)) | 287 | (setq result (xml-parse-tag parse-dtd parse-ns)) |
| 232 | (if (and xml result) | 288 | (if (and xml result (not xml-sub-parser)) |
| 233 | ;; translation of rule [1] of XML specifications | 289 | ;; translation of rule [1] of XML specifications |
| 234 | (error "XML files can have only one toplevel tag") | 290 | (error "XML: (Not Well-Formed) Only one root tag allowed") |
| 235 | (cond | 291 | (cond |
| 236 | ((null result)) | 292 | ((null result)) |
| 237 | ((and (listp (car result)) | 293 | ((and (listp (car result)) |
| @@ -265,10 +321,24 @@ specify that the name shouldn't be given a namespace." | |||
| 265 | ;; matching cons in xml-ns. In which case we | 321 | ;; matching cons in xml-ns. In which case we |
| 266 | (ns (or (cdr (assoc (if special "xmlns" prefix) | 322 | (ns (or (cdr (assoc (if special "xmlns" prefix) |
| 267 | xml-ns)) | 323 | xml-ns)) |
| 268 | :))) | 324 | ""))) |
| 269 | (cons ns (if special "" lname))) | 325 | (cons ns (if special "" lname))) |
| 270 | (intern name))) | 326 | (intern name))) |
| 271 | 327 | ||
| 328 | (defun xml-parse-fragment (&optional parse-dtd parse-ns) | ||
| 329 | "Parse xml-like fragments." | ||
| 330 | (let ((xml-sub-parser t) | ||
| 331 | children) | ||
| 332 | (while (not (eobp)) | ||
| 333 | (let ((bit (xml-parse-tag | ||
| 334 | parse-dtd parse-ns))) | ||
| 335 | (if children | ||
| 336 | (setq children (append (list bit) children)) | ||
| 337 | (if (stringp bit) | ||
| 338 | (setq children (list bit)) | ||
| 339 | (setq children bit))))) | ||
| 340 | (reverse children))) | ||
| 341 | |||
| 272 | (defun xml-parse-tag (&optional parse-dtd parse-ns) | 342 | (defun xml-parse-tag (&optional parse-dtd parse-ns) |
| 273 | "Parse the tag at point. | 343 | "Parse the tag at point. |
| 274 | If PARSE-DTD is non-nil, the DTD of the document, if any, is parsed and | 344 | If PARSE-DTD is non-nil, the DTD of the document, if any, is parsed and |
| @@ -278,16 +348,17 @@ Returns one of: | |||
| 278 | - a list : the matching node | 348 | - a list : the matching node |
| 279 | - nil : the point is not looking at a tag. | 349 | - nil : the point is not looking at a tag. |
| 280 | - a pair : the first element is the DTD, the second is the node." | 350 | - a pair : the first element is the DTD, the second is the node." |
| 281 | (let ((xml-ns (if (consp parse-ns) | 351 | (let ((xml-validating-parser (or parse-dtd xml-validating-parser)) |
| 352 | (xml-ns (if (consp parse-ns) | ||
| 282 | parse-ns | 353 | parse-ns |
| 283 | (if parse-ns | 354 | (if parse-ns |
| 284 | (list | 355 | (list |
| 285 | ;; Default for empty prefix is no namespace | 356 | ;; Default for empty prefix is no namespace |
| 286 | (cons "" :) | 357 | (cons "" "") |
| 287 | ;; "xml" namespace | 358 | ;; "xml" namespace |
| 288 | (cons "xml" :http://www.w3.org/XML/1998/namespace) | 359 | (cons "xml" "http://www.w3.org/XML/1998/namespace") |
| 289 | ;; We need to seed the xmlns namespace | 360 | ;; We need to seed the xmlns namespace |
| 290 | (cons "xmlns" :http://www.w3.org/2000/xmlns/)))))) | 361 | (cons "xmlns" "http://www.w3.org/2000/xmlns/")))))) |
| 291 | (cond | 362 | (cond |
| 292 | ;; Processing instructions (like the <?xml version="1.0"?> tag at the | 363 | ;; Processing instructions (like the <?xml version="1.0"?> tag at the |
| 293 | ;; beginning of a document). | 364 | ;; beginning of a document). |
| @@ -299,18 +370,15 @@ Returns one of: | |||
| 299 | ((looking-at "<!\\[CDATA\\[") | 370 | ((looking-at "<!\\[CDATA\\[") |
| 300 | (let ((pos (match-end 0))) | 371 | (let ((pos (match-end 0))) |
| 301 | (unless (search-forward "]]>" nil t) | 372 | (unless (search-forward "]]>" nil t) |
| 302 | (error "CDATA section does not end anywhere in the document")) | 373 | (error "XML: (Not Well Formed) CDATA section does not end anywhere in the document")) |
| 303 | (buffer-substring pos (match-beginning 0)))) | 374 | (buffer-substring pos (match-beginning 0)))) |
| 304 | ;; DTD for the document | 375 | ;; DTD for the document |
| 305 | ((looking-at "<!DOCTYPE") | 376 | ((looking-at "<!DOCTYPE") |
| 306 | (let (dtd) | 377 | (let ((dtd (xml-parse-dtd parse-ns))) |
| 307 | (if parse-dtd | 378 | (skip-syntax-forward " ") |
| 308 | (setq dtd (xml-parse-dtd)) | 379 | (if xml-validating-parser |
| 309 | (xml-skip-dtd)) | 380 | (cons dtd (xml-parse-tag nil xml-ns)) |
| 310 | (skip-syntax-forward " ") | 381 | (xml-parse-tag nil xml-ns)))) |
| 311 | (if dtd | ||
| 312 | (cons dtd (xml-parse-tag nil xml-ns)) | ||
| 313 | (xml-parse-tag nil xml-ns)))) | ||
| 314 | ;; skip comments | 382 | ;; skip comments |
| 315 | ((looking-at "<!--") | 383 | ((looking-at "<!--") |
| 316 | (search-forward "-->") | 384 | (search-forward "-->") |
| @@ -332,65 +400,76 @@ Returns one of: | |||
| 332 | (when (consp xml-ns) | 400 | (when (consp xml-ns) |
| 333 | (dolist (attr attrs) | 401 | (dolist (attr attrs) |
| 334 | (when (and (consp (car attr)) | 402 | (when (and (consp (car attr)) |
| 335 | (eq :http://www.w3.org/2000/xmlns/ | 403 | (equal "http://www.w3.org/2000/xmlns/" |
| 336 | (caar attr))) | 404 | (caar attr))) |
| 337 | (push (cons (cdar attr) (intern (concat ":" (cdr attr)))) | 405 | (push (cons (cdar attr) (cdr attr)) |
| 338 | xml-ns)))) | 406 | xml-ns)))) |
| 339 | 407 | ||
| 340 | (setq children (list attrs (xml-maybe-do-ns node-name "" xml-ns))) | 408 | (setq children (list attrs (xml-maybe-do-ns node-name "" xml-ns))) |
| 341 | 409 | ||
| 342 | ;; is this an empty element ? | 410 | ;; is this an empty element ? |
| 343 | (if (looking-at "/>") | 411 | (if (looking-at "/>") |
| 344 | (progn | ||
| 345 | (forward-char 2) | ||
| 346 | (nreverse children)) | ||
| 347 | |||
| 348 | ;; is this a valid start tag ? | ||
| 349 | (if (eq (char-after) ?>) | ||
| 350 | (progn | 412 | (progn |
| 351 | (forward-char 1) | 413 | (forward-char 2) |
| 352 | ;; Now check that we have the right end-tag. Note that this | ||
| 353 | ;; one might contain spaces after the tag name | ||
| 354 | (let ((end (concat "</" node-name "\\s-*>"))) | ||
| 355 | (while (not (looking-at end)) | ||
| 356 | (cond | ||
| 357 | ((looking-at "</") | ||
| 358 | (error "XML: Invalid end tag (expecting %s) at pos %d" | ||
| 359 | node-name (point))) | ||
| 360 | ((= (char-after) ?<) | ||
| 361 | (let ((tag (xml-parse-tag nil xml-ns))) | ||
| 362 | (when tag | ||
| 363 | (push tag children)))) | ||
| 364 | (t | ||
| 365 | (setq pos (point)) | ||
| 366 | (search-forward "<") | ||
| 367 | (forward-char -1) | ||
| 368 | (let ((string (buffer-substring pos (point))) | ||
| 369 | (pos 0)) | ||
| 370 | |||
| 371 | ;; Clean up the string. As per XML | ||
| 372 | ;; specifications, the XML processor should | ||
| 373 | ;; always pass the whole string to the | ||
| 374 | ;; application. But \r's should be replaced: | ||
| 375 | ;; http://www.w3.org/TR/2000/REC-xml-20001006#sec-line-ends | ||
| 376 | (while (string-match "\r\n?" string pos) | ||
| 377 | (setq string (replace-match "\n" t t string)) | ||
| 378 | (setq pos (1+ (match-beginning 0)))) | ||
| 379 | |||
| 380 | (setq string (xml-substitute-special string)) | ||
| 381 | (setq children | ||
| 382 | (if (stringp (car children)) | ||
| 383 | ;; The two strings were separated by a comment. | ||
| 384 | (cons (concat (car children) string) | ||
| 385 | (cdr children)) | ||
| 386 | (cons string children)))))))) | ||
| 387 | |||
| 388 | (goto-char (match-end 0)) | ||
| 389 | (nreverse children)) | 414 | (nreverse children)) |
| 390 | ;; This was an invalid start tag | 415 | |
| 391 | (error "XML: Invalid attribute list"))))) | 416 | ;; is this a valid start tag ? |
| 392 | (t ;; This is not a tag. | 417 | (if (eq (char-after) ?>) |
| 393 | (error "XML: Invalid character"))))) | 418 | (progn |
| 419 | (forward-char 1) | ||
| 420 | ;; Now check that we have the right end-tag. Note that this | ||
| 421 | ;; one might contain spaces after the tag name | ||
| 422 | (let ((end (concat "</" node-name "\\s-*>"))) | ||
| 423 | (while (not (looking-at end)) | ||
| 424 | (cond | ||
| 425 | ((looking-at "</") | ||
| 426 | (error "XML: (Not Well-Formed) Invalid end tag (expecting %s) at pos %d" | ||
| 427 | node-name (point))) | ||
| 428 | ((= (char-after) ?<) | ||
| 429 | (let ((tag (xml-parse-tag nil xml-ns))) | ||
| 430 | (when tag | ||
| 431 | (push tag children)))) | ||
| 432 | (t | ||
| 433 | (let ((expansion (xml-parse-string))) | ||
| 434 | (setq children | ||
| 435 | (if (stringp expansion) | ||
| 436 | (if (stringp (car children)) | ||
| 437 | ;; The two strings were separated by a comment. | ||
| 438 | (setq children (append (concat (car children) expansion) | ||
| 439 | (cdr children))) | ||
| 440 | (setq children (append (list expansion) children))) | ||
| 441 | (setq children (append expansion children)))))))) | ||
| 442 | |||
| 443 | (goto-char (match-end 0)) | ||
| 444 | (nreverse children))) | ||
| 445 | ;; This was an invalid start tag (Expected ">", but didn't see it.) | ||
| 446 | (error "XML: (Well-Formed) Couldn't parse tag: %s" | ||
| 447 | (buffer-substring (- (point) 10) (+ (point) 1))))))) | ||
| 448 | (t ;; (Not one of PI, CDATA, Comment, End tag, or Start tag) | ||
| 449 | (unless xml-sub-parser ; Usually, we error out. | ||
| 450 | (error "XML: (Well-Formed) Invalid character")) | ||
| 451 | |||
| 452 | ;; However, if we're parsing incrementally, then we need to deal | ||
| 453 | ;; with stray CDATA. | ||
| 454 | (xml-parse-string))))) | ||
| 455 | |||
| 456 | (defun xml-parse-string () | ||
| 457 | "Parse the next whatever. Could be a string, or an element." | ||
| 458 | (let* ((pos (point)) | ||
| 459 | (string (progn (if (search-forward "<" nil t) | ||
| 460 | (forward-char -1) | ||
| 461 | (goto-char (point-max))) | ||
| 462 | (buffer-substring pos (point))))) | ||
| 463 | ;; Clean up the string. As per XML specifications, the XML | ||
| 464 | ;; processor should always pass the whole string to the | ||
| 465 | ;; application. But \r's should be replaced: | ||
| 466 | ;; http://www.w3.org/TR/2000/REC-xml-20001006#sec-line-ends | ||
| 467 | (setq pos 0) | ||
| 468 | (while (string-match "\r\n?" string pos) | ||
| 469 | (setq string (replace-match "\n" t t string)) | ||
| 470 | (setq pos (1+ (match-beginning 0)))) | ||
| 471 | |||
| 472 | (xml-substitute-special string))) | ||
| 394 | 473 | ||
| 395 | (defun xml-parse-attlist (&optional xml-ns) | 474 | (defun xml-parse-attlist (&optional xml-ns) |
| 396 | "Return the attribute-list after point. | 475 | "Return the attribute-list after point. |
| @@ -412,18 +491,23 @@ Leave point at the first non-blank character after the tag." | |||
| 412 | (setq end-pos (match-end 0)) | 491 | (setq end-pos (match-end 0)) |
| 413 | (if (looking-at "'\\([^']*\\)'") | 492 | (if (looking-at "'\\([^']*\\)'") |
| 414 | (setq end-pos (match-end 0)) | 493 | (setq end-pos (match-end 0)) |
| 415 | (error "XML: Attribute values must be given between quotes"))) | 494 | (error "XML: (Not Well-Formed) Attribute values must be given between quotes"))) |
| 416 | 495 | ||
| 417 | ;; Each attribute must be unique within a given element | 496 | ;; Each attribute must be unique within a given element |
| 418 | (if (assoc name attlist) | 497 | (if (assoc name attlist) |
| 419 | (error "XML: each attribute must be unique within an element")) | 498 | (error "XML: (Not Well-Formed) Each attribute must be unique within an element")) |
| 420 | 499 | ||
| 421 | ;; Multiple whitespace characters should be replaced with a single one | 500 | ;; Multiple whitespace characters should be replaced with a single one |
| 422 | ;; in the attributes | 501 | ;; in the attributes |
| 423 | (let ((string (match-string 1)) | 502 | (let ((string (match-string 1)) |
| 424 | (pos 0)) | 503 | (pos 0)) |
| 425 | (replace-regexp-in-string "\\s-\\{2,\\}" " " string) | 504 | (replace-regexp-in-string "\\s-\\{2,\\}" " " string) |
| 426 | (push (cons name (xml-substitute-special string)) attlist)) | 505 | (let ((expansion (xml-substitute-special string))) |
| 506 | (unless (stringp expansion) | ||
| 507 | ; We say this is the constraint. It is acctually that | ||
| 508 | ; external entities nor "<" can be in an attribute value. | ||
| 509 | (error "XML: (Not Well-Formed) Entities in attributes cannot expand into elements")) | ||
| 510 | (push (cons name expansion) attlist))) | ||
| 427 | 511 | ||
| 428 | (goto-char end-pos) | 512 | (goto-char end-pos) |
| 429 | (skip-syntax-forward " ")) | 513 | (skip-syntax-forward " ")) |
| @@ -442,24 +526,16 @@ Leave point at the first non-blank character after the tag." | |||
| 442 | (defun xml-skip-dtd () | 526 | (defun xml-skip-dtd () |
| 443 | "Skip the DTD at point. | 527 | "Skip the DTD at point. |
| 444 | This follows the rule [28] in the XML specifications." | 528 | This follows the rule [28] in the XML specifications." |
| 445 | (forward-char (length "<!DOCTYPE")) | 529 | (let ((xml-validating-parser nil)) |
| 446 | (if (looking-at "\\s-*>") | 530 | (xml-parse-dtd))) |
| 447 | (error "XML: invalid DTD (excepting name of the document)")) | ||
| 448 | (condition-case nil | ||
| 449 | (progn | ||
| 450 | (forward-sexp) | ||
| 451 | (skip-syntax-forward " ") | ||
| 452 | (if (looking-at "\\[") | ||
| 453 | (re-search-forward "]\\s-*>") | ||
| 454 | (search-forward ">"))) | ||
| 455 | (error (error "XML: No end to the DTD")))) | ||
| 456 | 531 | ||
| 457 | (defun xml-parse-dtd () | 532 | (defun xml-parse-dtd (&optional parse-ns) |
| 458 | "Parse the DTD at point." | 533 | "Parse the DTD at point." |
| 459 | (forward-char (eval-when-compile (length "<!DOCTYPE"))) | 534 | (forward-char (eval-when-compile (length "<!DOCTYPE"))) |
| 460 | (skip-syntax-forward " ") | 535 | (skip-syntax-forward " ") |
| 461 | (if (looking-at ">") | 536 | (if (and (looking-at ">") |
| 462 | (error "XML: invalid DTD (excepting name of the document)")) | 537 | xml-validating-parser) |
| 538 | (error "XML: (Validity) Invalid DTD (expecting name of the document)")) | ||
| 463 | 539 | ||
| 464 | ;; Get the name of the document | 540 | ;; Get the name of the document |
| 465 | (looking-at xml-name-regexp) | 541 | (looking-at xml-name-regexp) |
| @@ -477,27 +553,27 @@ This follows the rule [28] in the XML specifications." | |||
| 477 | (re-search-forward | 553 | (re-search-forward |
| 478 | "\\='\\([[:space:][:alnum:]-()+,./:=?;!*#@$_%]*\\)'" | 554 | "\\='\\([[:space:][:alnum:]-()+,./:=?;!*#@$_%]*\\)'" |
| 479 | nil t)) | 555 | nil t)) |
| 480 | (error "XML: missing public id")) | 556 | (error "XML: Missing Public ID")) |
| 481 | (let ((pubid (match-string 1))) | 557 | (let ((pubid (match-string 1))) |
| 558 | (skip-syntax-forward " ") | ||
| 482 | (unless (or (re-search-forward "\\='\\([^']*\\)'" nil t) | 559 | (unless (or (re-search-forward "\\='\\([^']*\\)'" nil t) |
| 483 | (re-search-forward "\\=\"\\([^\"]*\\)\"" nil t)) | 560 | (re-search-forward "\\=\"\\([^\"]*\\)\"" nil t)) |
| 484 | (error "XML: missing system id")) | 561 | (error "XML: Missing System ID")) |
| 485 | (push (list pubid (match-string 1) 'public) dtd))) | 562 | (push (list pubid (match-string 1) 'public) dtd))) |
| 486 | ((looking-at "SYSTEM\\s-+") | 563 | ((looking-at "SYSTEM\\s-+") |
| 487 | (goto-char (match-end 0)) | 564 | (goto-char (match-end 0)) |
| 488 | (unless (or (re-search-forward "\\='\\([^']*\\)'" nil t) | 565 | (unless (or (re-search-forward "\\='\\([^']*\\)'" nil t) |
| 489 | (re-search-forward "\\=\"\\([^\"]*\\)\"" nil t)) | 566 | (re-search-forward "\\=\"\\([^\"]*\\)\"" nil t)) |
| 490 | (error "XML: missing system id")) | 567 | (error "XML: Missing System ID")) |
| 491 | (push (list (match-string 1) 'system) dtd))) | 568 | (push (list (match-string 1) 'system) dtd))) |
| 492 | (skip-syntax-forward " ") | 569 | (skip-syntax-forward " ") |
| 493 | (if (eq ?> (char-after)) | 570 | (if (eq ?> (char-after)) |
| 494 | (forward-char) | 571 | (forward-char) |
| 495 | (skip-syntax-forward " ") | ||
| 496 | (if (not (eq (char-after) ?\[)) | 572 | (if (not (eq (char-after) ?\[)) |
| 497 | (error "XML: bad DTD") | 573 | (error "XML: Bad DTD") |
| 498 | (forward-char) | 574 | (forward-char) |
| 499 | ;; Parse the rest of the DTD | 575 | ;; Parse the rest of the DTD |
| 500 | ;; Fixme: Deal with ENTITY, ATTLIST, NOTATION, PIs. | 576 | ;; Fixme: Deal with ATTLIST, NOTATION, PIs. |
| 501 | (while (not (looking-at "\\s-*\\]")) | 577 | (while (not (looking-at "\\s-*\\]")) |
| 502 | (skip-syntax-forward " ") | 578 | (skip-syntax-forward " ") |
| 503 | (cond | 579 | (cond |
| @@ -521,11 +597,13 @@ This follows the rule [28] in the XML specifications." | |||
| 521 | ((string-match "^%[^;]+;[ \t\n\r]*$" type) ;; substitution | 597 | ((string-match "^%[^;]+;[ \t\n\r]*$" type) ;; substitution |
| 522 | nil) | 598 | nil) |
| 523 | (t | 599 | (t |
| 524 | (error "XML: Invalid element type in the DTD"))) | 600 | (if xml-validating-parser |
| 601 | error "XML: (Validity) Invalid element type in the DTD"))) | ||
| 525 | 602 | ||
| 526 | ;; rule [45]: the element declaration must be unique | 603 | ;; rule [45]: the element declaration must be unique |
| 527 | (if (assoc element dtd) | 604 | (if (and (assoc element dtd) |
| 528 | (error "XML: element declarations must be unique in a DTD (<%s>)" | 605 | xml-validating-parser) |
| 606 | (error "XML: (Validity) Element declarations must be unique in a DTD (<%s>)" | ||
| 529 | element)) | 607 | element)) |
| 530 | 608 | ||
| 531 | ;; Store the element in the DTD | 609 | ;; Store the element in the DTD |
| @@ -533,12 +611,49 @@ This follows the rule [28] in the XML specifications." | |||
| 533 | (goto-char end-pos)) | 611 | (goto-char end-pos)) |
| 534 | ((looking-at "<!--") | 612 | ((looking-at "<!--") |
| 535 | (search-forward "-->")) | 613 | (search-forward "-->")) |
| 536 | 614 | ((looking-at (concat "<!ENTITY[ \t\n\r]*\\(" xml-name-re | |
| 615 | "\\)[ \t\n\r]*\\(" xml-entity-value-re | ||
| 616 | "\\)[ \t\n\r]*>")) | ||
| 617 | (let ((name (buffer-substring (nth 2 (match-data)) | ||
| 618 | (nth 3 (match-data)))) | ||
| 619 | (value (buffer-substring (+ (nth 4 (match-data)) 1) | ||
| 620 | (- (nth 5 (match-data)) 1)))) | ||
| 621 | (goto-char (nth 1 (match-data))) | ||
| 622 | (setq xml-entity-alist | ||
| 623 | (append xml-entity-alist | ||
| 624 | (list (cons name | ||
| 625 | (with-temp-buffer | ||
| 626 | (insert value) | ||
| 627 | (goto-char (point-min)) | ||
| 628 | (xml-parse-fragment | ||
| 629 | xml-validating-parser | ||
| 630 | parse-ns)))))))) | ||
| 631 | ((or (looking-at (concat "<!ENTITY[ \t\n\r]+\\(" xml-name-re | ||
| 632 | "\\)[ \t\n\r]+SYSTEM[ \t\n\r]+" | ||
| 633 | "\\(\"[^\"]*\"\\|'[^']*'\\)[ \t\n\r]*>")) | ||
| 634 | (looking-at (concat "<!ENTITY[ \t\n\r]+\\(" xml-name-re | ||
| 635 | "\\)[ \t\n\r]+PUBLIC[ \t\n\r]+" | ||
| 636 | "\"[- \r\na-zA-Z0-9'()+,./:=?;!*#@$_%]*\"" | ||
| 637 | "\\|'[- \r\na-zA-Z0-9()+,./:=?;!*#@$_%]*'" | ||
| 638 | "[ \t\n\r]+\\(\"[^\"]*\"\\|'[^']*'\\)" | ||
| 639 | "[ \t\n\r]*>"))) | ||
| 640 | (let ((name (buffer-substring (nth 2 (match-data)) | ||
| 641 | (nth 3 (match-data)))) | ||
| 642 | (file (buffer-substring (+ (nth 4 (match-data)) 1) | ||
| 643 | (- (nth 5 (match-data)) 1)))) | ||
| 644 | (goto-char (nth 1 (match-data))) | ||
| 645 | (setq xml-entity-alist | ||
| 646 | (append xml-entity-alist | ||
| 647 | (list (cons name (with-temp-buffer | ||
| 648 | (insert-file-contents file) | ||
| 649 | (goto-char (point-min)) | ||
| 650 | (xml-parse-fragment | ||
| 651 | xml-validating-parser | ||
| 652 | parse-ns)))))))) | ||
| 537 | (t | 653 | (t |
| 538 | (error "XML: Invalid DTD item"))) | 654 | (error "XML: (Validity) Invalid DTD item"))))) |
| 539 | 655 | (if (looking-at "\\s-*]>") | |
| 540 | ;; Skip the end of the DTD | 656 | (goto-char (nth 1 (match-data))))) |
| 541 | (search-forward ">")))) | ||
| 542 | (nreverse dtd))) | 657 | (nreverse dtd))) |
| 543 | 658 | ||
| 544 | (defun xml-parse-elem-type (string) | 659 | (defun xml-parse-elem-type (string) |
| @@ -580,41 +695,72 @@ This follows the rule [28] in the XML specifications." | |||
| 580 | ;;** | 695 | ;;** |
| 581 | ;;******************************************************************* | 696 | ;;******************************************************************* |
| 582 | 697 | ||
| 583 | (eval-when-compile | ||
| 584 | (defvar str)) ; dynamic from replace-regexp-in-string | ||
| 585 | |||
| 586 | ;; Fixme: Take declared entities from the DTD when they're available. | ||
| 587 | (defun xml-substitute-entity (match) | ||
| 588 | "Subroutine of `xml-substitute-special'." | ||
| 589 | (save-match-data | ||
| 590 | (let ((match1 (match-string 1 str))) | ||
| 591 | (cond ((string= match1 "lt") "<") | ||
| 592 | ((string= match1 "gt") ">") | ||
| 593 | ((string= match1 "apos") "'") | ||
| 594 | ((string= match1 "quot") "\"") | ||
| 595 | ((string= match1 "amp") "&") | ||
| 596 | ((and (string-match "#\\([0-9]+\\)" match1) | ||
| 597 | (let ((c (decode-char | ||
| 598 | 'ucs | ||
| 599 | (string-to-number (match-string 1 match1))))) | ||
| 600 | (if c (string c))))) ; else unrepresentable | ||
| 601 | ((and (string-match "#x\\([[:xdigit:]]+\\)" match1) | ||
| 602 | (let ((c (decode-char | ||
| 603 | 'ucs | ||
| 604 | (string-to-number (match-string 1 match1) 16)))) | ||
| 605 | (if c (string c))))) | ||
| 606 | ;; Default to asis. Arguably, unrepresentable code points | ||
| 607 | ;; might be best replaced with U+FFFD. | ||
| 608 | (t match))))) | ||
| 609 | |||
| 610 | (defun xml-substitute-special (string) | 698 | (defun xml-substitute-special (string) |
| 611 | "Return STRING, after subsituting entity references." | 699 | "Return STRING, after subsituting entity references." |
| 612 | ;; This originally made repeated passes through the string from the | 700 | ;; This originally made repeated passes through the string from the |
| 613 | ;; beginning, which isn't correct, since then either "&amp;" or | 701 | ;; beginning, which isn't correct, since then either "&amp;" or |
| 614 | ;; "&amp;" won't DTRT. | 702 | ;; "&amp;" won't DTRT. |
| 615 | (replace-regexp-in-string "&\\([^;]+\\);" | ||
| 616 | #'xml-substitute-entity string t t)) | ||
| 617 | 703 | ||
| 704 | (let ((point 0) | ||
| 705 | children end-point) | ||
| 706 | (while (string-match "&\\([^;]+\\);" string point) | ||
| 707 | (setq end-point (match-end 0)) | ||
| 708 | (let* ((this-part (match-string 1 string)) | ||
| 709 | (prev-part (substring string point (match-beginning 0))) | ||
| 710 | (entity (assoc this-part xml-entity-alist)) | ||
| 711 | (expansion | ||
| 712 | (cond ((string-match "#\\([0-9]+\\)" this-part) | ||
| 713 | (let ((c (decode-char | ||
| 714 | 'ucs | ||
| 715 | (string-to-number (match-string 1 this-part))))) | ||
| 716 | (if c (string c)))) | ||
| 717 | ((string-match "#x\\([[:xdigit:]]+\\)" this-part) | ||
| 718 | (let ((c (decode-char | ||
| 719 | 'ucs | ||
| 720 | (string-to-number (match-string 1 this-part) 16)))) | ||
| 721 | (if c (string c)))) | ||
| 722 | (entity | ||
| 723 | (cdr entity)) | ||
| 724 | (t | ||
| 725 | (if xml-validating-parser | ||
| 726 | (error "XML: (Validity) Undefined entity `%s'" | ||
| 727 | (match-string 1 this-part))))))) | ||
| 728 | |||
| 729 | (cond ((null children) | ||
| 730 | (if (stringp expansion) | ||
| 731 | (setq children (concat prev-part expansion)) | ||
| 732 | (if (stringp (car (last expansion))) | ||
| 733 | (progn | ||
| 734 | (setq children | ||
| 735 | (list (concat prev-part (car expansion)) | ||
| 736 | (cdr expansion)))) | ||
| 737 | (setq children (append expansion prev-part))))) | ||
| 738 | ((stringp children) | ||
| 739 | (if (stringp expansion) | ||
| 740 | (setq children (concat children prev-part expansion)) | ||
| 741 | (setq children (list expansion (concat prev-part children))))) | ||
| 742 | ((and (stringp expansion) | ||
| 743 | (stringp (car children))) | ||
| 744 | (setcar children (concat prev-part expansion (car children)))) | ||
| 745 | ((stringp expansion) | ||
| 746 | (setq children (append (concat prev-part expansion) | ||
| 747 | children))) | ||
| 748 | ((stringp (car children)) | ||
| 749 | (setcar children (concat (car children) prev-part)) | ||
| 750 | (setq children (append expansion children))) | ||
| 751 | (t | ||
| 752 | (setq children (list expansion | ||
| 753 | prev-part | ||
| 754 | children)))) | ||
| 755 | (setq point end-point))) | ||
| 756 | (cond ((stringp children) | ||
| 757 | (concat children (substring string point))) | ||
| 758 | ((stringp (car (last children))) | ||
| 759 | (concat (car children) (substring string point))) | ||
| 760 | ((null children) | ||
| 761 | string) | ||
| 762 | (t | ||
| 763 | (nreverse children))))) | ||
| 618 | ;;******************************************************************* | 764 | ;;******************************************************************* |
| 619 | ;;** | 765 | ;;** |
| 620 | ;;** Printing a tree. | 766 | ;;** Printing a tree. |