aboutsummaryrefslogtreecommitdiffstats
path: root/lisp
diff options
context:
space:
mode:
authorMark A. Hershberger2004-07-09 14:22:33 +0000
committerMark A. Hershberger2004-07-09 14:22:33 +0000
commit6d12a4dfbcb5680fafac89769e1a2f111fdcc587 (patch)
treebb56765fcd16836c88d2c647a12bb13dfe936c5c /lisp
parentbcdf2143d312616d027880b16a6f43f4e0066792 (diff)
downloademacs-6d12a4dfbcb5680fafac89769e1a2f111fdcc587.tar.gz
emacs-6d12a4dfbcb5680fafac89769e1a2f111fdcc587.zip
2004-07-09 Mark A. Hershberger <mah@everybody.org>
* xml.el (xml-maybe-do-ns, xml-parse-tag): Produce elements in the form (("ns" . "element") (attr-list) children) instead of ((:ns . "element") (attr-list) children) in order to reduce the number of symbols used. (xml-skip-dtd): Change to use xml-parse-dtd but set xml-validating-parsing to nil. (xml-parse-dtd): Parse entity deleclarations in DOCTYPEs. (xml-substitute-entity): Remove in favor of new entity substitution. (xml-substitute-special): Rewrite in to substitute complex entities from DOCTYPE declarations. (xml-parse-fragment): Parse fragments from entity deleclarations. (xml-parse-region, xml-parse-tag, xml-parse-attlist) (xml-parse-dtd, xml-substitute-special): Make validity checks conditioned on xml-validating-parser. Add "Not Well Formed" to error messages about well-formedness.
Diffstat (limited to 'lisp')
-rw-r--r--lisp/ChangeLog19
-rw-r--r--lisp/xml.el400
2 files changed, 292 insertions, 127 deletions
diff --git a/lisp/ChangeLog b/lisp/ChangeLog
index b36e1bae10c..8c8dc99040f 100644
--- a/lisp/ChangeLog
+++ b/lisp/ChangeLog
@@ -1,3 +1,22 @@
12004-07-09 Mark A. Hershberger <mah@everybody.org>
2
3 * xml.el (xml-maybe-do-ns, xml-parse-tag): Produce elements in the
4 form
5 (("ns" . "element") (attr-list) children) instead of
6 ((:ns . "element") (attr-list) children) in order to reduce the
7 number of symbols used.
8 (xml-skip-dtd): Change to use xml-parse-dtd but set
9 xml-validating-parsing to nil.
10 (xml-parse-dtd): Parse entity deleclarations in DOCTYPEs.
11 (xml-substitute-entity): Remove in favor of new entity substitution.
12 (xml-substitute-special): Rewrite in to substitute complex
13 entities from DOCTYPE declarations.
14 (xml-parse-fragment): Parse fragments from entity deleclarations.
15 (xml-parse-region, xml-parse-tag, xml-parse-attlist)
16 (xml-parse-dtd, xml-substitute-special): Make validity checks
17 conditioned on xml-validating-parser. Add "Not Well Formed" to
18 error messages about well-formedness.
19
12004-07-08 Steven Tamm <steventamm@mac.com> 202004-07-08 Steven Tamm <steventamm@mac.com>
2 21
3 * term/mac-win.el (mac-scroll-ignore-events, mac-scroll-down) 22 * term/mac-win.el (mac-scroll-ignore-events, mac-scroll-down)
diff --git a/lisp/xml.el b/lisp/xml.el
index 03ef6346c70..993ef59b276 100644
--- a/lisp/xml.el
+++ b/lisp/xml.el
@@ -84,6 +84,20 @@
84;;** 84;;**
85;;******************************************************************* 85;;*******************************************************************
86 86
87(defvar xml-entity-alist
88 '(("lt" . "<")
89 ("gt" . ">")
90 ("apos" . "'")
91 ("quot" . "\"")
92 ("amp" . "&"))
93 "The defined entities. Entities are added to this when the DTD is parsed.")
94
95(defvar xml-sub-parser nil
96 "Dynamically set this to a non-nil value if you want to parse an XML fragment.")
97
98(defvar xml-validating-parser nil
99 "Set to non-nil to get validity checking.")
100
87(defsubst xml-node-name (node) 101(defsubst xml-node-name (node)
88 "Return the tag associated with NODE. 102 "Return the tag associated with NODE.
89Without namespace-aware parsing, the tag is a symbol. 103Without namespace-aware parsing, the tag is a symbol.
@@ -164,6 +178,48 @@ If PARSE-NS is non-nil, then QNAMES are expanded."
164 (kill-buffer (current-buffer))) 178 (kill-buffer (current-buffer)))
165 xml))) 179 xml)))
166 180
181
182(let* ((start-chars (concat ":[:alpha:]_"))
183 (name-chars (concat "-[:digit:]." start-chars))
184;;[3] S ::= (#x20 | #x9 | #xD | #xA)+
185 (whitespace "[ \t\n\r]"))
186;;[4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6]
187;; | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF]
188;; | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF]
189;; | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
190 (defvar xml-name-start-char-re (concat "[" start-chars "]"))
191;;[4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
192 (defvar xml-name-char-re (concat "[" name-chars "]"))
193;;[5] Name ::= NameStartChar (NameChar)*
194 (defvar xml-name-re (concat xml-name-start-char-re xml-name-char-re "*"))
195;;[6] Names ::= Name (#x20 Name)*
196 (defvar xml-names-re (concat xml-name-re "\\(?: " xml-name-re "\\)*"))
197;;[7] Nmtoken ::= (NameChar)+
198 (defvar xml-nmtoken-re (concat xml-name-char-re "+"))
199;;[8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
200 (defvar xml-nmtokens-re (concat xml-nmtoken-re "\\(?: " xml-name-re "\\)*"))
201;;[66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
202 (defvar xml-char-ref-re "\\(?:&#[0-9]+;\\|&#x[0-9a-fA-F]+;\\)")
203;;[68] EntityRef ::= '&' Name ';'
204 (defvar xml-entity-ref (concat "&" xml-name-re ";"))
205;;[69] PEReference ::= '%' Name ';'
206 (defvar xml-pe-reference-re (concat "%" xml-name-re ";"))
207;;[67] Reference ::= EntityRef | CharRef
208 (defvar xml-reference-re (concat "\\(?:" xml-entity-ref "\\|" xml-char-ref-re "\\)"))
209;;[9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"'
210;; | "'" ([^%&'] | PEReference | Reference)* "'"
211 (defvar xml-entity-value-re (concat "\\(?:\"\\(?:[^%&\"]\\|" xml-pe-reference-re
212 "\\|" xml-reference-re "\\)*\"\\|'\\(?:[^%&']\\|"
213 xml-pe-reference-re "\\|" xml-reference-re "\\)*'\\)")))
214;;[75] ExternalID ::= 'SYSTEM' S SystemLiteral
215;; | 'PUBLIC' S PubidLiteral S SystemLiteral
216;;[76] NDataDecl ::= S 'NDATA' S
217;;[73] EntityDef ::= EntityValue| (ExternalID NDataDecl?)
218;;[71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
219;;[74] PEDef ::= EntityValue | ExternalID
220;;[72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
221;;[70] EntityDecl ::= GEDecl | PEDecl
222
167;; Note that this is setup so that we can do whitespace-skipping with 223;; Note that this is setup so that we can do whitespace-skipping with
168;; `(skip-syntax-forward " ")', inter alia. Previously this was slow 224;; `(skip-syntax-forward " ")', inter alia. Previously this was slow
169;; compared with `re-search-forward', but that has been fixed. Also 225;; compared with `re-search-forward', but that has been fixed. Also
@@ -229,9 +285,9 @@ If PARSE-NS is non-nil, then QNAMES are expanded."
229 (progn 285 (progn
230 (forward-char -1) 286 (forward-char -1)
231 (setq result (xml-parse-tag parse-dtd parse-ns)) 287 (setq result (xml-parse-tag parse-dtd parse-ns))
232 (if (and xml result) 288 (if (and xml result (not xml-sub-parser))
233 ;; translation of rule [1] of XML specifications 289 ;; translation of rule [1] of XML specifications
234 (error "XML files can have only one toplevel tag") 290 (error "XML: (Not Well-Formed) Only one root tag allowed")
235 (cond 291 (cond
236 ((null result)) 292 ((null result))
237 ((and (listp (car result)) 293 ((and (listp (car result))
@@ -265,10 +321,24 @@ specify that the name shouldn't be given a namespace."
265 ;; matching cons in xml-ns. In which case we 321 ;; matching cons in xml-ns. In which case we
266 (ns (or (cdr (assoc (if special "xmlns" prefix) 322 (ns (or (cdr (assoc (if special "xmlns" prefix)
267 xml-ns)) 323 xml-ns))
268 :))) 324 "")))
269 (cons ns (if special "" lname))) 325 (cons ns (if special "" lname)))
270 (intern name))) 326 (intern name)))
271 327
328(defun xml-parse-fragment (&optional parse-dtd parse-ns)
329 "Parse xml-like fragments."
330 (let ((xml-sub-parser t)
331 children)
332 (while (not (eobp))
333 (let ((bit (xml-parse-tag
334 parse-dtd parse-ns)))
335 (if children
336 (setq children (append (list bit) children))
337 (if (stringp bit)
338 (setq children (list bit))
339 (setq children bit)))))
340 (reverse children)))
341
272(defun xml-parse-tag (&optional parse-dtd parse-ns) 342(defun xml-parse-tag (&optional parse-dtd parse-ns)
273 "Parse the tag at point. 343 "Parse the tag at point.
274If PARSE-DTD is non-nil, the DTD of the document, if any, is parsed and 344If PARSE-DTD is non-nil, the DTD of the document, if any, is parsed and
@@ -278,16 +348,17 @@ Returns one of:
278 - a list : the matching node 348 - a list : the matching node
279 - nil : the point is not looking at a tag. 349 - nil : the point is not looking at a tag.
280 - a pair : the first element is the DTD, the second is the node." 350 - a pair : the first element is the DTD, the second is the node."
281 (let ((xml-ns (if (consp parse-ns) 351 (let ((xml-validating-parser (or parse-dtd xml-validating-parser))
352 (xml-ns (if (consp parse-ns)
282 parse-ns 353 parse-ns
283 (if parse-ns 354 (if parse-ns
284 (list 355 (list
285 ;; Default for empty prefix is no namespace 356 ;; Default for empty prefix is no namespace
286 (cons "" :) 357 (cons "" "")
287 ;; "xml" namespace 358 ;; "xml" namespace
288 (cons "xml" :http://www.w3.org/XML/1998/namespace) 359 (cons "xml" "http://www.w3.org/XML/1998/namespace")
289 ;; We need to seed the xmlns namespace 360 ;; We need to seed the xmlns namespace
290 (cons "xmlns" :http://www.w3.org/2000/xmlns/)))))) 361 (cons "xmlns" "http://www.w3.org/2000/xmlns/"))))))
291 (cond 362 (cond
292 ;; Processing instructions (like the <?xml version="1.0"?> tag at the 363 ;; Processing instructions (like the <?xml version="1.0"?> tag at the
293 ;; beginning of a document). 364 ;; beginning of a document).
@@ -299,18 +370,15 @@ Returns one of:
299 ((looking-at "<!\\[CDATA\\[") 370 ((looking-at "<!\\[CDATA\\[")
300 (let ((pos (match-end 0))) 371 (let ((pos (match-end 0)))
301 (unless (search-forward "]]>" nil t) 372 (unless (search-forward "]]>" nil t)
302 (error "CDATA section does not end anywhere in the document")) 373 (error "XML: (Not Well Formed) CDATA section does not end anywhere in the document"))
303 (buffer-substring pos (match-beginning 0)))) 374 (buffer-substring pos (match-beginning 0))))
304 ;; DTD for the document 375 ;; DTD for the document
305 ((looking-at "<!DOCTYPE") 376 ((looking-at "<!DOCTYPE")
306 (let (dtd) 377 (let ((dtd (xml-parse-dtd parse-ns)))
307 (if parse-dtd 378 (skip-syntax-forward " ")
308 (setq dtd (xml-parse-dtd)) 379 (if xml-validating-parser
309 (xml-skip-dtd)) 380 (cons dtd (xml-parse-tag nil xml-ns))
310 (skip-syntax-forward " ") 381 (xml-parse-tag nil xml-ns))))
311 (if dtd
312 (cons dtd (xml-parse-tag nil xml-ns))
313 (xml-parse-tag nil xml-ns))))
314 ;; skip comments 382 ;; skip comments
315 ((looking-at "<!--") 383 ((looking-at "<!--")
316 (search-forward "-->") 384 (search-forward "-->")
@@ -332,65 +400,76 @@ Returns one of:
332 (when (consp xml-ns) 400 (when (consp xml-ns)
333 (dolist (attr attrs) 401 (dolist (attr attrs)
334 (when (and (consp (car attr)) 402 (when (and (consp (car attr))
335 (eq :http://www.w3.org/2000/xmlns/ 403 (equal "http://www.w3.org/2000/xmlns/"
336 (caar attr))) 404 (caar attr)))
337 (push (cons (cdar attr) (intern (concat ":" (cdr attr)))) 405 (push (cons (cdar attr) (cdr attr))
338 xml-ns)))) 406 xml-ns))))
339 407
340 (setq children (list attrs (xml-maybe-do-ns node-name "" xml-ns))) 408 (setq children (list attrs (xml-maybe-do-ns node-name "" xml-ns)))
341 409
342 ;; is this an empty element ? 410 ;; is this an empty element ?
343 (if (looking-at "/>") 411 (if (looking-at "/>")
344 (progn
345 (forward-char 2)
346 (nreverse children))
347
348 ;; is this a valid start tag ?
349 (if (eq (char-after) ?>)
350 (progn 412 (progn
351 (forward-char 1) 413 (forward-char 2)
352 ;; Now check that we have the right end-tag. Note that this
353 ;; one might contain spaces after the tag name
354 (let ((end (concat "</" node-name "\\s-*>")))
355 (while (not (looking-at end))
356 (cond
357 ((looking-at "</")
358 (error "XML: Invalid end tag (expecting %s) at pos %d"
359 node-name (point)))
360 ((= (char-after) ?<)
361 (let ((tag (xml-parse-tag nil xml-ns)))
362 (when tag
363 (push tag children))))
364 (t
365 (setq pos (point))
366 (search-forward "<")
367 (forward-char -1)
368 (let ((string (buffer-substring pos (point)))
369 (pos 0))
370
371 ;; Clean up the string. As per XML
372 ;; specifications, the XML processor should
373 ;; always pass the whole string to the
374 ;; application. But \r's should be replaced:
375 ;; http://www.w3.org/TR/2000/REC-xml-20001006#sec-line-ends
376 (while (string-match "\r\n?" string pos)
377 (setq string (replace-match "\n" t t string))
378 (setq pos (1+ (match-beginning 0))))
379
380 (setq string (xml-substitute-special string))
381 (setq children
382 (if (stringp (car children))
383 ;; The two strings were separated by a comment.
384 (cons (concat (car children) string)
385 (cdr children))
386 (cons string children))))))))
387
388 (goto-char (match-end 0))
389 (nreverse children)) 414 (nreverse children))
390 ;; This was an invalid start tag 415
391 (error "XML: Invalid attribute list"))))) 416 ;; is this a valid start tag ?
392 (t ;; This is not a tag. 417 (if (eq (char-after) ?>)
393 (error "XML: Invalid character"))))) 418 (progn
419 (forward-char 1)
420 ;; Now check that we have the right end-tag. Note that this
421 ;; one might contain spaces after the tag name
422 (let ((end (concat "</" node-name "\\s-*>")))
423 (while (not (looking-at end))
424 (cond
425 ((looking-at "</")
426 (error "XML: (Not Well-Formed) Invalid end tag (expecting %s) at pos %d"
427 node-name (point)))
428 ((= (char-after) ?<)
429 (let ((tag (xml-parse-tag nil xml-ns)))
430 (when tag
431 (push tag children))))
432 (t
433 (let ((expansion (xml-parse-string)))
434 (setq children
435 (if (stringp expansion)
436 (if (stringp (car children))
437 ;; The two strings were separated by a comment.
438 (setq children (append (concat (car children) expansion)
439 (cdr children)))
440 (setq children (append (list expansion) children)))
441 (setq children (append expansion children))))))))
442
443 (goto-char (match-end 0))
444 (nreverse children)))
445 ;; This was an invalid start tag (Expected ">", but didn't see it.)
446 (error "XML: (Well-Formed) Couldn't parse tag: %s"
447 (buffer-substring (- (point) 10) (+ (point) 1)))))))
448 (t ;; (Not one of PI, CDATA, Comment, End tag, or Start tag)
449 (unless xml-sub-parser ; Usually, we error out.
450 (error "XML: (Well-Formed) Invalid character"))
451
452 ;; However, if we're parsing incrementally, then we need to deal
453 ;; with stray CDATA.
454 (xml-parse-string)))))
455
456(defun xml-parse-string ()
457 "Parse the next whatever. Could be a string, or an element."
458 (let* ((pos (point))
459 (string (progn (if (search-forward "<" nil t)
460 (forward-char -1)
461 (goto-char (point-max)))
462 (buffer-substring pos (point)))))
463 ;; Clean up the string. As per XML specifications, the XML
464 ;; processor should always pass the whole string to the
465 ;; application. But \r's should be replaced:
466 ;; http://www.w3.org/TR/2000/REC-xml-20001006#sec-line-ends
467 (setq pos 0)
468 (while (string-match "\r\n?" string pos)
469 (setq string (replace-match "\n" t t string))
470 (setq pos (1+ (match-beginning 0))))
471
472 (xml-substitute-special string)))
394 473
395(defun xml-parse-attlist (&optional xml-ns) 474(defun xml-parse-attlist (&optional xml-ns)
396 "Return the attribute-list after point. 475 "Return the attribute-list after point.
@@ -412,18 +491,23 @@ Leave point at the first non-blank character after the tag."
412 (setq end-pos (match-end 0)) 491 (setq end-pos (match-end 0))
413 (if (looking-at "'\\([^']*\\)'") 492 (if (looking-at "'\\([^']*\\)'")
414 (setq end-pos (match-end 0)) 493 (setq end-pos (match-end 0))
415 (error "XML: Attribute values must be given between quotes"))) 494 (error "XML: (Not Well-Formed) Attribute values must be given between quotes")))
416 495
417 ;; Each attribute must be unique within a given element 496 ;; Each attribute must be unique within a given element
418 (if (assoc name attlist) 497 (if (assoc name attlist)
419 (error "XML: each attribute must be unique within an element")) 498 (error "XML: (Not Well-Formed) Each attribute must be unique within an element"))
420 499
421 ;; Multiple whitespace characters should be replaced with a single one 500 ;; Multiple whitespace characters should be replaced with a single one
422 ;; in the attributes 501 ;; in the attributes
423 (let ((string (match-string 1)) 502 (let ((string (match-string 1))
424 (pos 0)) 503 (pos 0))
425 (replace-regexp-in-string "\\s-\\{2,\\}" " " string) 504 (replace-regexp-in-string "\\s-\\{2,\\}" " " string)
426 (push (cons name (xml-substitute-special string)) attlist)) 505 (let ((expansion (xml-substitute-special string)))
506 (unless (stringp expansion)
507 ; We say this is the constraint. It is acctually that
508 ; external entities nor "<" can be in an attribute value.
509 (error "XML: (Not Well-Formed) Entities in attributes cannot expand into elements"))
510 (push (cons name expansion) attlist)))
427 511
428 (goto-char end-pos) 512 (goto-char end-pos)
429 (skip-syntax-forward " ")) 513 (skip-syntax-forward " "))
@@ -442,24 +526,16 @@ Leave point at the first non-blank character after the tag."
442(defun xml-skip-dtd () 526(defun xml-skip-dtd ()
443 "Skip the DTD at point. 527 "Skip the DTD at point.
444This follows the rule [28] in the XML specifications." 528This follows the rule [28] in the XML specifications."
445 (forward-char (length "<!DOCTYPE")) 529 (let ((xml-validating-parser nil))
446 (if (looking-at "\\s-*>") 530 (xml-parse-dtd)))
447 (error "XML: invalid DTD (excepting name of the document)"))
448 (condition-case nil
449 (progn
450 (forward-sexp)
451 (skip-syntax-forward " ")
452 (if (looking-at "\\[")
453 (re-search-forward "]\\s-*>")
454 (search-forward ">")))
455 (error (error "XML: No end to the DTD"))))
456 531
457(defun xml-parse-dtd () 532(defun xml-parse-dtd (&optional parse-ns)
458 "Parse the DTD at point." 533 "Parse the DTD at point."
459 (forward-char (eval-when-compile (length "<!DOCTYPE"))) 534 (forward-char (eval-when-compile (length "<!DOCTYPE")))
460 (skip-syntax-forward " ") 535 (skip-syntax-forward " ")
461 (if (looking-at ">") 536 (if (and (looking-at ">")
462 (error "XML: invalid DTD (excepting name of the document)")) 537 xml-validating-parser)
538 (error "XML: (Validity) Invalid DTD (expecting name of the document)"))
463 539
464 ;; Get the name of the document 540 ;; Get the name of the document
465 (looking-at xml-name-regexp) 541 (looking-at xml-name-regexp)
@@ -477,27 +553,27 @@ This follows the rule [28] in the XML specifications."
477 (re-search-forward 553 (re-search-forward
478 "\\='\\([[:space:][:alnum:]-()+,./:=?;!*#@$_%]*\\)'" 554 "\\='\\([[:space:][:alnum:]-()+,./:=?;!*#@$_%]*\\)'"
479 nil t)) 555 nil t))
480 (error "XML: missing public id")) 556 (error "XML: Missing Public ID"))
481 (let ((pubid (match-string 1))) 557 (let ((pubid (match-string 1)))
558 (skip-syntax-forward " ")
482 (unless (or (re-search-forward "\\='\\([^']*\\)'" nil t) 559 (unless (or (re-search-forward "\\='\\([^']*\\)'" nil t)
483 (re-search-forward "\\=\"\\([^\"]*\\)\"" nil t)) 560 (re-search-forward "\\=\"\\([^\"]*\\)\"" nil t))
484 (error "XML: missing system id")) 561 (error "XML: Missing System ID"))
485 (push (list pubid (match-string 1) 'public) dtd))) 562 (push (list pubid (match-string 1) 'public) dtd)))
486 ((looking-at "SYSTEM\\s-+") 563 ((looking-at "SYSTEM\\s-+")
487 (goto-char (match-end 0)) 564 (goto-char (match-end 0))
488 (unless (or (re-search-forward "\\='\\([^']*\\)'" nil t) 565 (unless (or (re-search-forward "\\='\\([^']*\\)'" nil t)
489 (re-search-forward "\\=\"\\([^\"]*\\)\"" nil t)) 566 (re-search-forward "\\=\"\\([^\"]*\\)\"" nil t))
490 (error "XML: missing system id")) 567 (error "XML: Missing System ID"))
491 (push (list (match-string 1) 'system) dtd))) 568 (push (list (match-string 1) 'system) dtd)))
492 (skip-syntax-forward " ") 569 (skip-syntax-forward " ")
493 (if (eq ?> (char-after)) 570 (if (eq ?> (char-after))
494 (forward-char) 571 (forward-char)
495 (skip-syntax-forward " ")
496 (if (not (eq (char-after) ?\[)) 572 (if (not (eq (char-after) ?\[))
497 (error "XML: bad DTD") 573 (error "XML: Bad DTD")
498 (forward-char) 574 (forward-char)
499 ;; Parse the rest of the DTD 575 ;; Parse the rest of the DTD
500 ;; Fixme: Deal with ENTITY, ATTLIST, NOTATION, PIs. 576 ;; Fixme: Deal with ATTLIST, NOTATION, PIs.
501 (while (not (looking-at "\\s-*\\]")) 577 (while (not (looking-at "\\s-*\\]"))
502 (skip-syntax-forward " ") 578 (skip-syntax-forward " ")
503 (cond 579 (cond
@@ -521,11 +597,13 @@ This follows the rule [28] in the XML specifications."
521 ((string-match "^%[^;]+;[ \t\n\r]*$" type) ;; substitution 597 ((string-match "^%[^;]+;[ \t\n\r]*$" type) ;; substitution
522 nil) 598 nil)
523 (t 599 (t
524 (error "XML: Invalid element type in the DTD"))) 600 (if xml-validating-parser
601 error "XML: (Validity) Invalid element type in the DTD")))
525 602
526 ;; rule [45]: the element declaration must be unique 603 ;; rule [45]: the element declaration must be unique
527 (if (assoc element dtd) 604 (if (and (assoc element dtd)
528 (error "XML: element declarations must be unique in a DTD (<%s>)" 605 xml-validating-parser)
606 (error "XML: (Validity) Element declarations must be unique in a DTD (<%s>)"
529 element)) 607 element))
530 608
531 ;; Store the element in the DTD 609 ;; Store the element in the DTD
@@ -533,12 +611,49 @@ This follows the rule [28] in the XML specifications."
533 (goto-char end-pos)) 611 (goto-char end-pos))
534 ((looking-at "<!--") 612 ((looking-at "<!--")
535 (search-forward "-->")) 613 (search-forward "-->"))
536 614 ((looking-at (concat "<!ENTITY[ \t\n\r]*\\(" xml-name-re
615 "\\)[ \t\n\r]*\\(" xml-entity-value-re
616 "\\)[ \t\n\r]*>"))
617 (let ((name (buffer-substring (nth 2 (match-data))
618 (nth 3 (match-data))))
619 (value (buffer-substring (+ (nth 4 (match-data)) 1)
620 (- (nth 5 (match-data)) 1))))
621 (goto-char (nth 1 (match-data)))
622 (setq xml-entity-alist
623 (append xml-entity-alist
624 (list (cons name
625 (with-temp-buffer
626 (insert value)
627 (goto-char (point-min))
628 (xml-parse-fragment
629 xml-validating-parser
630 parse-ns))))))))
631 ((or (looking-at (concat "<!ENTITY[ \t\n\r]+\\(" xml-name-re
632 "\\)[ \t\n\r]+SYSTEM[ \t\n\r]+"
633 "\\(\"[^\"]*\"\\|'[^']*'\\)[ \t\n\r]*>"))
634 (looking-at (concat "<!ENTITY[ \t\n\r]+\\(" xml-name-re
635 "\\)[ \t\n\r]+PUBLIC[ \t\n\r]+"
636 "\"[- \r\na-zA-Z0-9'()+,./:=?;!*#@$_%]*\""
637 "\\|'[- \r\na-zA-Z0-9()+,./:=?;!*#@$_%]*'"
638 "[ \t\n\r]+\\(\"[^\"]*\"\\|'[^']*'\\)"
639 "[ \t\n\r]*>")))
640 (let ((name (buffer-substring (nth 2 (match-data))
641 (nth 3 (match-data))))
642 (file (buffer-substring (+ (nth 4 (match-data)) 1)
643 (- (nth 5 (match-data)) 1))))
644 (goto-char (nth 1 (match-data)))
645 (setq xml-entity-alist
646 (append xml-entity-alist
647 (list (cons name (with-temp-buffer
648 (insert-file-contents file)
649 (goto-char (point-min))
650 (xml-parse-fragment
651 xml-validating-parser
652 parse-ns))))))))
537 (t 653 (t
538 (error "XML: Invalid DTD item"))) 654 (error "XML: (Validity) Invalid DTD item")))))
539 655 (if (looking-at "\\s-*]>")
540 ;; Skip the end of the DTD 656 (goto-char (nth 1 (match-data)))))
541 (search-forward ">"))))
542 (nreverse dtd))) 657 (nreverse dtd)))
543 658
544(defun xml-parse-elem-type (string) 659(defun xml-parse-elem-type (string)
@@ -580,41 +695,72 @@ This follows the rule [28] in the XML specifications."
580;;** 695;;**
581;;******************************************************************* 696;;*******************************************************************
582 697
583(eval-when-compile
584 (defvar str)) ; dynamic from replace-regexp-in-string
585
586;; Fixme: Take declared entities from the DTD when they're available.
587(defun xml-substitute-entity (match)
588 "Subroutine of `xml-substitute-special'."
589 (save-match-data
590 (let ((match1 (match-string 1 str)))
591 (cond ((string= match1 "lt") "<")
592 ((string= match1 "gt") ">")
593 ((string= match1 "apos") "'")
594 ((string= match1 "quot") "\"")
595 ((string= match1 "amp") "&")
596 ((and (string-match "#\\([0-9]+\\)" match1)
597 (let ((c (decode-char
598 'ucs
599 (string-to-number (match-string 1 match1)))))
600 (if c (string c))))) ; else unrepresentable
601 ((and (string-match "#x\\([[:xdigit:]]+\\)" match1)
602 (let ((c (decode-char
603 'ucs
604 (string-to-number (match-string 1 match1) 16))))
605 (if c (string c)))))
606 ;; Default to asis. Arguably, unrepresentable code points
607 ;; might be best replaced with U+FFFD.
608 (t match)))))
609
610(defun xml-substitute-special (string) 698(defun xml-substitute-special (string)
611 "Return STRING, after subsituting entity references." 699 "Return STRING, after subsituting entity references."
612 ;; This originally made repeated passes through the string from the 700 ;; This originally made repeated passes through the string from the
613 ;; beginning, which isn't correct, since then either "&amp;amp;" or 701 ;; beginning, which isn't correct, since then either "&amp;amp;" or
614 ;; "&#38;amp;" won't DTRT. 702 ;; "&#38;amp;" won't DTRT.
615 (replace-regexp-in-string "&\\([^;]+\\);"
616 #'xml-substitute-entity string t t))
617 703
704 (let ((point 0)
705 children end-point)
706 (while (string-match "&\\([^;]+\\);" string point)
707 (setq end-point (match-end 0))
708 (let* ((this-part (match-string 1 string))
709 (prev-part (substring string point (match-beginning 0)))
710 (entity (assoc this-part xml-entity-alist))
711 (expansion
712 (cond ((string-match "#\\([0-9]+\\)" this-part)
713 (let ((c (decode-char
714 'ucs
715 (string-to-number (match-string 1 this-part)))))
716 (if c (string c))))
717 ((string-match "#x\\([[:xdigit:]]+\\)" this-part)
718 (let ((c (decode-char
719 'ucs
720 (string-to-number (match-string 1 this-part) 16))))
721 (if c (string c))))
722 (entity
723 (cdr entity))
724 (t
725 (if xml-validating-parser
726 (error "XML: (Validity) Undefined entity `%s'"
727 (match-string 1 this-part)))))))
728
729 (cond ((null children)
730 (if (stringp expansion)
731 (setq children (concat prev-part expansion))
732 (if (stringp (car (last expansion)))
733 (progn
734 (setq children
735 (list (concat prev-part (car expansion))
736 (cdr expansion))))
737 (setq children (append expansion prev-part)))))
738 ((stringp children)
739 (if (stringp expansion)
740 (setq children (concat children prev-part expansion))
741 (setq children (list expansion (concat prev-part children)))))
742 ((and (stringp expansion)
743 (stringp (car children)))
744 (setcar children (concat prev-part expansion (car children))))
745 ((stringp expansion)
746 (setq children (append (concat prev-part expansion)
747 children)))
748 ((stringp (car children))
749 (setcar children (concat (car children) prev-part))
750 (setq children (append expansion children)))
751 (t
752 (setq children (list expansion
753 prev-part
754 children))))
755 (setq point end-point)))
756 (cond ((stringp children)
757 (concat children (substring string point)))
758 ((stringp (car (last children)))
759 (concat (car children) (substring string point)))
760 ((null children)
761 string)
762 (t
763 (nreverse children)))))
618;;******************************************************************* 764;;*******************************************************************
619;;** 765;;**
620;;** Printing a tree. 766;;** Printing a tree.