diff options
| author | Vinicius Jose Latorre | 2004-04-05 01:48:53 +0000 |
|---|---|---|
| committer | Vinicius Jose Latorre | 2004-04-05 01:48:53 +0000 |
| commit | 728df3d91cf2590d25999343b4f0daf417809828 (patch) | |
| tree | 2e91e939aec1c9089871632d7aafc18373d0b6ba | |
| parent | ab19c39b45e8225b32a44ae2aa335c5870dd3e98 (diff) | |
| download | emacs-728df3d91cf2590d25999343b4f0daf417809828.tar.gz emacs-728df3d91cf2590d25999343b4f0daf417809828.zip | |
Parser for DTD (Data Type Definition for XML).
| -rw-r--r-- | lisp/progmodes/ebnf-dtd.el | 1350 |
1 files changed, 1350 insertions, 0 deletions
diff --git a/lisp/progmodes/ebnf-dtd.el b/lisp/progmodes/ebnf-dtd.el new file mode 100644 index 00000000000..45c8abd0fad --- /dev/null +++ b/lisp/progmodes/ebnf-dtd.el | |||
| @@ -0,0 +1,1350 @@ | |||
| 1 | ;;; ebnf-dtd.el --- parser for DTD (Data Type Description for XML) | ||
| 2 | |||
| 3 | ;; Copyright (C) 2004 Free Sofware Foundation, Inc. | ||
| 4 | |||
| 5 | ;; Author: Vinicius Jose Latorre <viniciusjl@ig.com.br> | ||
| 6 | ;; Maintainer: Vinicius Jose Latorre <viniciusjl@ig.com.br> | ||
| 7 | ;; Time-stamp: <2004/04/04 21:50:16 vinicius> | ||
| 8 | ;; Keywords: wp, ebnf, PostScript | ||
| 9 | ;; Version: 1.0 | ||
| 10 | |||
| 11 | ;; This file is part of GNU Emacs. | ||
| 12 | |||
| 13 | ;; GNU Emacs is free software; you can redistribute it and/or modify | ||
| 14 | ;; it under the terms of the GNU General Public License as published by | ||
| 15 | ;; the Free Software Foundation; either version 2, or (at your option) | ||
| 16 | ;; any later version. | ||
| 17 | |||
| 18 | ;; GNU Emacs is distributed in the hope that it will be useful, | ||
| 19 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 20 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 21 | ;; GNU General Public License for more details. | ||
| 22 | |||
| 23 | ;; You should have received a copy of the GNU General Public License | ||
| 24 | ;; along with GNU Emacs; see the file COPYING. If not, write to the | ||
| 25 | ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 26 | ;; Boston, MA 02111-1307, USA. | ||
| 27 | |||
| 28 | ;;; Commentary: | ||
| 29 | |||
| 30 | ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | ||
| 31 | ;; | ||
| 32 | ;; | ||
| 33 | ;; This is part of ebnf2ps package. | ||
| 34 | ;; | ||
| 35 | ;; This package defines a parser for DTD (Data Type Description for XML). | ||
| 36 | ;; | ||
| 37 | ;; See ebnf2ps.el for documentation. | ||
| 38 | ;; | ||
| 39 | ;; | ||
| 40 | ;; DTD Syntax | ||
| 41 | ;; ---------- | ||
| 42 | ;; | ||
| 43 | ;; See the URLs: | ||
| 44 | ;; `http://www.w3.org/TR/2004/REC-xml-20040204/' | ||
| 45 | ;; (Extensible Markup Language (XML) 1.0 (Third Edition)) | ||
| 46 | ;; `http://www.w3.org/TR/html40/' | ||
| 47 | ;; (HTML 4.01 Specification) | ||
| 48 | ;; `http://www.w3.org/TR/NOTE-html-970421' | ||
| 49 | ;; (HTML DTD with support for Style Sheets) | ||
| 50 | ;; | ||
| 51 | ;; | ||
| 52 | ;; /* Document */ | ||
| 53 | ;; | ||
| 54 | ;; document ::= prolog element Misc* | ||
| 55 | ;; /* Note that *only* the prolog will be parsed */ | ||
| 56 | ;; | ||
| 57 | ;; | ||
| 58 | ;; /* Characters */ | ||
| 59 | ;; | ||
| 60 | ;; Char ::= #x9 | #xA | #xD | ||
| 61 | ;; | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] | ||
| 62 | ;; /* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. */ | ||
| 63 | ;; | ||
| 64 | ;; /* NOTE: | ||
| 65 | ;; | ||
| 66 | ;; Document authors are encouraged to avoid "compatibility characters", as | ||
| 67 | ;; defined in section 6.8 of [Unicode] (see also D21 in section 3.6 of | ||
| 68 | ;; [Unicode3]). The characters defined in the following ranges are also | ||
| 69 | ;; discouraged. They are either control characters or permanently undefined | ||
| 70 | ;; Unicode characters: | ||
| 71 | ;; | ||
| 72 | ;; [#x7F-#x84], [#x86-#x9F], [#xFDD0-#xFDDF], | ||
| 73 | ;; [#1FFFE-#x1FFFF], [#2FFFE-#x2FFFF], [#3FFFE-#x3FFFF], | ||
| 74 | ;; [#4FFFE-#x4FFFF], [#5FFFE-#x5FFFF], [#6FFFE-#x6FFFF], | ||
| 75 | ;; [#7FFFE-#x7FFFF], [#8FFFE-#x8FFFF], [#9FFFE-#x9FFFF], | ||
| 76 | ;; [#AFFFE-#xAFFFF], [#BFFFE-#xBFFFF], [#CFFFE-#xCFFFF], | ||
| 77 | ;; [#DFFFE-#xDFFFF], [#EFFFE-#xEFFFF], [#FFFFE-#xFFFFF], | ||
| 78 | ;; [#10FFFE-#x10FFFF]. */ | ||
| 79 | ;; | ||
| 80 | ;; | ||
| 81 | ;; /* White Space */ | ||
| 82 | ;; | ||
| 83 | ;; S ::= (#x20 | #x9 | #xD | #xA)+ | ||
| 84 | ;; | ||
| 85 | ;; | ||
| 86 | ;; /* Names and Tokens */ | ||
| 87 | ;; | ||
| 88 | ;; NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | ||
| 89 | ;; | CombiningChar | Extender | ||
| 90 | ;; | ||
| 91 | ;; Name ::= (Letter | '_' | ':') (NameChar)* | ||
| 92 | ;; | ||
| 93 | ;; Names ::= Name (#x20 Name)* | ||
| 94 | ;; | ||
| 95 | ;; Nmtoken ::= (NameChar)+ | ||
| 96 | ;; | ||
| 97 | ;; Nmtokens ::= Nmtoken (#x20 Nmtoken)* | ||
| 98 | ;; | ||
| 99 | ;; | ||
| 100 | ;; /* Literals */ | ||
| 101 | ;; | ||
| 102 | ;; EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | ||
| 103 | ;; | "'" ([^%&'] | PEReference | Reference)* "'" | ||
| 104 | ;; | ||
| 105 | ;; AttValue ::= '"' ([^<&"] | Reference)* '"' | ||
| 106 | ;; | "'" ([^<&'] | Reference)* "'" | ||
| 107 | ;; | ||
| 108 | ;; SystemLiteral ::= ('"' [^"]* '"') | ||
| 109 | ;; | ("'" [^']* "'") | ||
| 110 | ;; | ||
| 111 | ;; PubidLiteral ::= '"' PubidChar* '"' | ||
| 112 | ;; | "'" (PubidChar - "'")* "'" | ||
| 113 | ;; | ||
| 114 | ;; PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] | ||
| 115 | ;; | ||
| 116 | ;; /* NOTE: | ||
| 117 | ;; | ||
| 118 | ;; Although the EntityValue production allows the definition of a general | ||
| 119 | ;; entity consisting of a single explicit < in the literal (e.g., <!ENTITY | ||
| 120 | ;; mylt "<">), it is strongly advised to avoid this practice since any | ||
| 121 | ;; reference to that entity will cause a well-formedness error. */ | ||
| 122 | ;; | ||
| 123 | ;; | ||
| 124 | ;; /* Character Data */ | ||
| 125 | ;; | ||
| 126 | ;; CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) | ||
| 127 | ;; | ||
| 128 | ;; | ||
| 129 | ;; /* Comments */ | ||
| 130 | ;; | ||
| 131 | ;; Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' | ||
| 132 | ;; | ||
| 133 | ;; | ||
| 134 | ;; /* Processing Instructions */ | ||
| 135 | ;; | ||
| 136 | ;; PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' | ||
| 137 | ;; | ||
| 138 | ;; PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) | ||
| 139 | ;; | ||
| 140 | ;; | ||
| 141 | ;; /* CDATA Sections */ | ||
| 142 | ;; | ||
| 143 | ;; CDSect ::= CDStart CData CDEnd | ||
| 144 | ;; | ||
| 145 | ;; CDStart ::= '<![CDATA[' | ||
| 146 | ;; | ||
| 147 | ;; CData ::= (Char* - (Char* ']]>' Char*)) | ||
| 148 | ;; | ||
| 149 | ;; CDEnd ::= ']]>' | ||
| 150 | ;; | ||
| 151 | ;; | ||
| 152 | ;; /* Prolog */ | ||
| 153 | ;; | ||
| 154 | ;; prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? | ||
| 155 | ;; | ||
| 156 | ;; XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' | ||
| 157 | ;; | ||
| 158 | ;; VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') | ||
| 159 | ;; | ||
| 160 | ;; Eq ::= S? '=' S? | ||
| 161 | ;; | ||
| 162 | ;; VersionNum ::= '1.0' | ||
| 163 | ;; | ||
| 164 | ;; Misc ::= Comment | PI | S | ||
| 165 | ;; | ||
| 166 | ;; | ||
| 167 | ;; /* Document Type Definition */ | ||
| 168 | ;; | ||
| 169 | ;; doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? | ||
| 170 | ;; ('[' intSubset ']' S?)? '>' | ||
| 171 | ;; [VC: Root Element Type] | ||
| 172 | ;; [WFC: External Subset] | ||
| 173 | ;; | ||
| 174 | ;; DeclSep ::= PEReference | S | ||
| 175 | ;; [WFC: PE Between Declarations] | ||
| 176 | ;; | ||
| 177 | ;; intSubset ::= (markupdecl | DeclSep)* | ||
| 178 | ;; | ||
| 179 | ;; markupdecl ::= elementdecl | AttlistDecl | EntityDecl | ||
| 180 | ;; | NotationDecl | PI | Comment | ||
| 181 | ;; [VC: Proper Declaration/PE Nesting] | ||
| 182 | ;; [WFC: PEs in Internal Subset] | ||
| 183 | ;; | ||
| 184 | ;; | ||
| 185 | ;; /* External Subset */ | ||
| 186 | ;; | ||
| 187 | ;; extSubset ::= TextDecl? extSubsetDecl | ||
| 188 | ;; | ||
| 189 | ;; extSubsetDecl ::= ( markupdecl | conditionalSect | DeclSep)* | ||
| 190 | ;; | ||
| 191 | ;; | ||
| 192 | ;; /* Standalone Document Declaration */ | ||
| 193 | ;; | ||
| 194 | ;; SDDecl ::= S 'standalone' Eq | ||
| 195 | ;; (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) | ||
| 196 | ;; [VC: Standalone Document Declaration] | ||
| 197 | ;; | ||
| 198 | ;; | ||
| 199 | ;; /* Element */ | ||
| 200 | ;; | ||
| 201 | ;; element ::= EmptyElemTag | STag content ETag | ||
| 202 | ;; [WFC: Element Type Match] | ||
| 203 | ;; [VC: Element Valid] | ||
| 204 | ;; | ||
| 205 | ;; | ||
| 206 | ;; /* Start-tag */ | ||
| 207 | ;; | ||
| 208 | ;; STag ::= '<' Name (S Attribute)* S? '>' | ||
| 209 | ;; [WFC: Unique Att Spec] | ||
| 210 | ;; | ||
| 211 | ;; Attribute ::= Name Eq AttValue | ||
| 212 | ;; [VC: Attribute Value Type] | ||
| 213 | ;; [WFC: No External Entity References] | ||
| 214 | ;; [WFC: No < in Attribute Values] | ||
| 215 | ;; | ||
| 216 | ;; | ||
| 217 | ;; /* End-tag */ | ||
| 218 | ;; | ||
| 219 | ;; ETag ::= '</' Name S? '>' | ||
| 220 | ;; | ||
| 221 | ;; | ||
| 222 | ;; /* Content of Elements */ | ||
| 223 | ;; | ||
| 224 | ;; content ::= CharData? | ||
| 225 | ;; ((element | Reference | CDSect | PI | Comment) CharData?)* | ||
| 226 | ;; | ||
| 227 | ;; | ||
| 228 | ;; /* Tags for Empty Elements */ | ||
| 229 | ;; | ||
| 230 | ;; EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' | ||
| 231 | ;; [WFC: Unique Att Spec] | ||
| 232 | ;; | ||
| 233 | ;; | ||
| 234 | ;; /* Element Type Declaration */ | ||
| 235 | ;; | ||
| 236 | ;; elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' | ||
| 237 | ;; [VC: Unique Element Type Declaration] | ||
| 238 | ;; | ||
| 239 | ;; contentspec ::= 'EMPTY' | 'ANY' | Mixed | children | ||
| 240 | ;; | ||
| 241 | ;; | ||
| 242 | ;; /* Element-content Models */ | ||
| 243 | ;; | ||
| 244 | ;; children ::= (choice | seq) ('?' | '*' | '+')? | ||
| 245 | ;; | ||
| 246 | ;; cp ::= (Name | choice | seq) ('?' | '*' | '+')? | ||
| 247 | ;; | ||
| 248 | ;; choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')' | ||
| 249 | ;; [VC: Proper Group/PE Nesting] | ||
| 250 | ;; | ||
| 251 | ;; seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' | ||
| 252 | ;; [VC: Proper Group/PE Nesting] | ||
| 253 | ;; | ||
| 254 | ;; | ||
| 255 | ;; /* Mixed-content Declaration */ | ||
| 256 | ;; | ||
| 257 | ;; Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | ||
| 258 | ;; | '(' S? '#PCDATA' S? ')' | ||
| 259 | ;; [VC: Proper Group/PE Nesting] | ||
| 260 | ;; [VC: No Duplicate Types] | ||
| 261 | ;; | ||
| 262 | ;; | ||
| 263 | ;; /* Attribute-list Declaration */ | ||
| 264 | ;; | ||
| 265 | ;; AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' | ||
| 266 | ;; | ||
| 267 | ;; AttDef ::= S Name S AttType S DefaultDecl | ||
| 268 | ;; | ||
| 269 | ;; | ||
| 270 | ;; /* Attribute Types */ | ||
| 271 | ;; | ||
| 272 | ;; AttType ::= StringType | TokenizedType | EnumeratedType | ||
| 273 | ;; | ||
| 274 | ;; StringType ::= 'CDATA' | ||
| 275 | ;; | ||
| 276 | ;; TokenizedType ::= 'ID' [VC: ID] | ||
| 277 | ;; [VC: One ID per Element Type] | ||
| 278 | ;; [VC: ID Attribute Default] | ||
| 279 | ;; | 'IDREF' [VC: IDREF] | ||
| 280 | ;; | 'IDREFS' [VC: IDREF] | ||
| 281 | ;; | 'ENTITY' [VC: Entity Name] | ||
| 282 | ;; | 'ENTITIES' [VC: Entity Name] | ||
| 283 | ;; | 'NMTOKEN' [VC: Name Token] | ||
| 284 | ;; | 'NMTOKENS' [VC: Name Token] | ||
| 285 | ;; | ||
| 286 | ;; | ||
| 287 | ;; /* Enumerated Attribute Types */ | ||
| 288 | ;; | ||
| 289 | ;; EnumeratedType ::= NotationType | Enumeration | ||
| 290 | ;; | ||
| 291 | ;; NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' | ||
| 292 | ;; [VC: Notation Attributes] | ||
| 293 | ;; [VC: One Notation Per Element Type] | ||
| 294 | ;; [VC: No Notation on Empty Element] | ||
| 295 | ;; [VC: No Duplicate Tokens] | ||
| 296 | ;; | ||
| 297 | ;; Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' | ||
| 298 | ;; [VC: Enumeration] | ||
| 299 | ;; [VC: No Duplicate Tokens] | ||
| 300 | ;; | ||
| 301 | ;; | ||
| 302 | ;; /* Attribute Defaults */ | ||
| 303 | ;; | ||
| 304 | ;; DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | ||
| 305 | ;; | (('#FIXED' S)? AttValue) | ||
| 306 | ;; [VC: Required Attribute] | ||
| 307 | ;; [VC: Attribute Default Value Syntactically Correct] | ||
| 308 | ;; [WFC: No < in Attribute Values] | ||
| 309 | ;; [VC: Fixed Attribute Default] | ||
| 310 | ;; | ||
| 311 | ;; | ||
| 312 | ;; /* Conditional Section */ | ||
| 313 | ;; | ||
| 314 | ;; conditionalSect ::= includeSect | ignoreSect | ||
| 315 | ;; | ||
| 316 | ;; includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' | ||
| 317 | ;; [VC: Proper Conditional Section/PE Nesting] | ||
| 318 | ;; | ||
| 319 | ;; ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' | ||
| 320 | ;; [VC: Proper Conditional Section/PE Nesting] | ||
| 321 | ;; | ||
| 322 | ;; ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)* | ||
| 323 | ;; | ||
| 324 | ;; Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) | ||
| 325 | ;; | ||
| 326 | ;; | ||
| 327 | ;; /* Character Reference */ | ||
| 328 | ;; | ||
| 329 | ;; CharRef ::= '&#' [0-9]+ ';' | ||
| 330 | ;; | '&#x' [0-9a-fA-F]+ ';' | ||
| 331 | ;; [WFC: Legal Character] | ||
| 332 | ;; | ||
| 333 | ;; | ||
| 334 | ;; /* Entity Reference */ | ||
| 335 | ;; | ||
| 336 | ;; Reference ::= EntityRef | CharRef | ||
| 337 | ;; | ||
| 338 | ;; EntityRef ::= '&' Name ';' | ||
| 339 | ;; [WFC: Entity Declared] | ||
| 340 | ;; [VC: Entity Declared] | ||
| 341 | ;; [WFC: Parsed Entity] | ||
| 342 | ;; [WFC: No Recursion] | ||
| 343 | ;; | ||
| 344 | ;; PEReference ::= '%' Name ';' | ||
| 345 | ;; [VC: Entity Declared] | ||
| 346 | ;; [WFC: No Recursion] | ||
| 347 | ;; [WFC: In DTD] | ||
| 348 | ;; | ||
| 349 | ;; | ||
| 350 | ;; /* Entity Declaration */ | ||
| 351 | ;; | ||
| 352 | ;; EntityDecl ::= GEDecl | PEDecl | ||
| 353 | ;; | ||
| 354 | ;; GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' | ||
| 355 | ;; | ||
| 356 | ;; PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' | ||
| 357 | ;; | ||
| 358 | ;; EntityDef ::= EntityValue | (ExternalID NDataDecl?) | ||
| 359 | ;; | ||
| 360 | ;; PEDef ::= EntityValue | ExternalID | ||
| 361 | ;; | ||
| 362 | ;; | ||
| 363 | ;; /* External Entity Declaration */ | ||
| 364 | ;; | ||
| 365 | ;; ExternalID ::= 'SYSTEM' S SystemLiteral | ||
| 366 | ;; | 'PUBLIC' S PubidLiteral S SystemLiteral | ||
| 367 | ;; | ||
| 368 | ;; NDataDecl ::= S 'NDATA' S Name | ||
| 369 | ;; [VC: Notation Declared] | ||
| 370 | ;; | ||
| 371 | ;; | ||
| 372 | ;; /* Text Declaration */ | ||
| 373 | ;; | ||
| 374 | ;; TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' | ||
| 375 | ;; | ||
| 376 | ;; | ||
| 377 | ;; /* Well-Formed External Parsed Entity */ | ||
| 378 | ;; | ||
| 379 | ;; extParsedEnt ::= TextDecl? content | ||
| 380 | ;; | ||
| 381 | ;; | ||
| 382 | ;; /* Encoding Declaration */ | ||
| 383 | ;; | ||
| 384 | ;; EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) | ||
| 385 | ;; | ||
| 386 | ;; EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* | ||
| 387 | ;; /* Encoding name contains only Latin characters */ | ||
| 388 | ;; | ||
| 389 | ;; | ||
| 390 | ;; /* Notation Declarations */ | ||
| 391 | ;; | ||
| 392 | ;; NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' | ||
| 393 | ;; [VC: Unique Notation Name] | ||
| 394 | ;; | ||
| 395 | ;; PublicID ::= 'PUBLIC' S PubidLiteral | ||
| 396 | ;; | ||
| 397 | ;; | ||
| 398 | ;; /* Characters */ | ||
| 399 | ;; | ||
| 400 | ;; Letter ::= BaseChar | Ideographic | ||
| 401 | ;; | ||
| 402 | ;; BaseChar ::= [#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] | ||
| 403 | ;; | [#x00D8-#x00F6] | [#x00F8-#x00FF] | [#x0100-#x0131] | ||
| 404 | ;; | [#x0134-#x013E] | [#x0141-#x0148] | [#x014A-#x017E] | ||
| 405 | ;; | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5] | ||
| 406 | ;; | [#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1] | ||
| 407 | ;; | #x0386 | [#x0388-#x038A] | #x038C | ||
| 408 | ;; | [#x038E-#x03A1] | [#x03A3-#x03CE] | [#x03D0-#x03D6] | ||
| 409 | ;; | #x03DA | #x03DC | #x03DE | ||
| 410 | ;; | #x03E0 | [#x03E2-#x03F3] | [#x0401-#x040C] | ||
| 411 | ;; | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481] | ||
| 412 | ;; | [#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC] | ||
| 413 | ;; | [#x04D0-#x04EB] | [#x04EE-#x04F5] | [#x04F8-#x04F9] | ||
| 414 | ;; | [#x0531-#x0556] | #x0559 | [#x0561-#x0586] | ||
| 415 | ;; | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A] | ||
| 416 | ;; | [#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE] | ||
| 417 | ;; | [#x06C0-#x06CE] | [#x06D0-#x06D3] | #x06D5 | ||
| 418 | ;; | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D | ||
| 419 | ;; | [#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990] | ||
| 420 | ;; | [#x0993-#x09A8] | [#x09AA-#x09B0] | #x09B2 | ||
| 421 | ;; | [#x09B6-#x09B9] | [#x09DC-#x09DD] | [#x09DF-#x09E1] | ||
| 422 | ;; | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10] | ||
| 423 | ;; | [#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33] | ||
| 424 | ;; | [#x0A35-#x0A36] | [#x0A38-#x0A39] | [#x0A59-#x0A5C] | ||
| 425 | ;; | #x0A5E | [#x0A72-#x0A74] | [#x0A85-#x0A8B] | ||
| 426 | ;; | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8] | ||
| 427 | ;; | [#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9] | ||
| 428 | ;; | #x0ABD | #x0AE0 | [#x0B05-#x0B0C] | ||
| 429 | ;; | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30] | ||
| 430 | ;; | [#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D | ||
| 431 | ;; | [#x0B5C-#x0B5D] | [#x0B5F-#x0B61] | [#x0B85-#x0B8A] | ||
| 432 | ;; | [#x0B8E-#x0B90] | [#x0B92-#x0B95] | [#x0B99-#x0B9A] | ||
| 433 | ;; | #x0B9C | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4] | ||
| 434 | ;; | [#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9] | ||
| 435 | ;; | [#x0C05-#x0C0C] | [#x0C0E-#x0C10] | [#x0C12-#x0C28] | ||
| 436 | ;; | [#x0C2A-#x0C33] | [#x0C35-#x0C39] | [#x0C60-#x0C61] | ||
| 437 | ;; | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8] | ||
| 438 | ;; | [#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE | ||
| 439 | ;; | [#x0CE0-#x0CE1] | [#x0D05-#x0D0C] | [#x0D0E-#x0D10] | ||
| 440 | ;; | [#x0D12-#x0D28] | [#x0D2A-#x0D39] | [#x0D60-#x0D61] | ||
| 441 | ;; | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33] | ||
| 442 | ;; | [#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84 | ||
| 443 | ;; | [#x0E87-#x0E88] | #x0E8A | #x0E8D | ||
| 444 | ;; | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3] | ||
| 445 | ;; | #x0EA5 | #x0EA7 | [#x0EAA-#x0EAB] | ||
| 446 | ;; | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3] | ||
| 447 | ;; | #x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47] | ||
| 448 | ;; | [#x0F49-#x0F69] | [#x10A0-#x10C5] | [#x10D0-#x10F6] | ||
| 449 | ;; | #x1100 | [#x1102-#x1103] | [#x1105-#x1107] | ||
| 450 | ;; | #x1109 | [#x110B-#x110C] | [#x110E-#x1112] | ||
| 451 | ;; | #x113C | #x113E | #x1140 | ||
| 452 | ;; | #x114C | #x114E | #x1150 | ||
| 453 | ;; | [#x1154-#x1155] | #x1159 | [#x115F-#x1161] | ||
| 454 | ;; | #x1163 | #x1165 | #x1167 | ||
| 455 | ;; | #x1169 | [#x116D-#x116E] | [#x1172-#x1173] | ||
| 456 | ;; | #x1175 | #x119E | #x11A8 | ||
| 457 | ;; | #x11AB | [#x11AE-#x11AF] | [#x11B7-#x11B8] | ||
| 458 | ;; | #x11BA | [#x11BC-#x11C2] | #x11EB | ||
| 459 | ;; | #x11F0 | #x11F9 | [#x1E00-#x1E9B] | ||
| 460 | ;; | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D] | ||
| 461 | ;; | [#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57] | ||
| 462 | ;; | #x1F59 | #x1F5B | #x1F5D | ||
| 463 | ;; | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC] | ||
| 464 | ;; | #x1FBE | [#x1FC2-#x1FC4] | [#x1FC6-#x1FCC] | ||
| 465 | ;; | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB] | [#x1FE0-#x1FEC] | ||
| 466 | ;; | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126 | ||
| 467 | ;; | [#x212A-#x212B] | #x212E | [#x2180-#x2182] | ||
| 468 | ;; | [#x3041-#x3094] | [#x30A1-#x30FA] | [#x3105-#x312C] | ||
| 469 | ;; | [#xAC00-#xD7A3] | ||
| 470 | ;; | ||
| 471 | ;; Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029] | ||
| 472 | ;; | ||
| 473 | ;; CombiningChar ::= [#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486] | ||
| 474 | ;; | [#x0591-#x05A1] | [#x05A3-#x05B9] | [#x05BB-#x05BD] | ||
| 475 | ;; | #x05BF | [#x05C1-#x05C2] | #x05C4 | ||
| 476 | ;; | [#x064B-#x0652] | #x0670 | [#x06D6-#x06DC] | ||
| 477 | ;; | [#x06DD-#x06DF] | [#x06E0-#x06E4] | [#x06E7-#x06E8] | ||
| 478 | ;; | [#x06EA-#x06ED] | [#x0901-#x0903] | #x093C | ||
| 479 | ;; | [#x093E-#x094C] | #x094D | [#x0951-#x0954] | ||
| 480 | ;; | [#x0962-#x0963] | [#x0981-#x0983] | #x09BC | ||
| 481 | ;; | #x09BE | #x09BF | [#x09C0-#x09C4] | ||
| 482 | ;; | [#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7 | ||
| 483 | ;; | [#x09E2-#x09E3] | #x0A02 | #x0A3C | ||
| 484 | ;; | #x0A3E | #x0A3F | [#x0A40-#x0A42] | ||
| 485 | ;; | [#x0A47-#x0A48] | [#x0A4B-#x0A4D] | [#x0A70-#x0A71] | ||
| 486 | ;; | [#x0A81-#x0A83] | #x0ABC | [#x0ABE-#x0AC5] | ||
| 487 | ;; | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03] | ||
| 488 | ;; | #x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48] | ||
| 489 | ;; | [#x0B4B-#x0B4D] | [#x0B56-#x0B57] | [#x0B82-#x0B83] | ||
| 490 | ;; | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] | [#x0BCA-#x0BCD] | ||
| 491 | ;; | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44] | ||
| 492 | ;; | [#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56] | ||
| 493 | ;; | [#x0C82-#x0C83] | [#x0CBE-#x0CC4] | [#x0CC6-#x0CC8] | ||
| 494 | ;; | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6] | [#x0D02-#x0D03] | ||
| 495 | ;; | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D] | ||
| 496 | ;; | #x0D57 | #x0E31 | [#x0E34-#x0E3A] | ||
| 497 | ;; | [#x0E47-#x0E4E] | #x0EB1 | [#x0EB4-#x0EB9] | ||
| 498 | ;; | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19] | ||
| 499 | ;; | #x0F35 | #x0F37 | #x0F39 | ||
| 500 | ;; | #x0F3E | #x0F3F | [#x0F71-#x0F84] | ||
| 501 | ;; | [#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97 | ||
| 502 | ;; | [#x0F99-#x0FAD] | [#x0FB1-#x0FB7] | #x0FB9 | ||
| 503 | ;; | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F] | ||
| 504 | ;; | #x3099 | #x309A | ||
| 505 | ;; | ||
| 506 | ;; Digit ::= [#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9] | ||
| 507 | ;; | [#x0966-#x096F] | [#x09E6-#x09EF] | [#x0A66-#x0A6F] | ||
| 508 | ;; | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] | [#x0BE7-#x0BEF] | ||
| 509 | ;; | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F] | ||
| 510 | ;; | [#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29] | ||
| 511 | ;; | ||
| 512 | ;; Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6 | ||
| 513 | ;; | #x3005 | [#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE] | ||
| 514 | ;; | ||
| 515 | ;; | ||
| 516 | ;; NOTES | ||
| 517 | ;; ----- | ||
| 518 | ;; | ||
| 519 | ;; At moment, only the `<!ELEMENT' generates a syntactic chart. The | ||
| 520 | ;; `<!ATTLIST', `<!NOTATION' and `<!ENTITY' are syntacticly checked but they | ||
| 521 | ;; don't generate a syntactic chart. | ||
| 522 | ;; | ||
| 523 | ;; Besides the syntax above, ebnf-dtd also accepts a `pure' dtd file. An | ||
| 524 | ;; example of a `pure' dtd file is: | ||
| 525 | ;; | ||
| 526 | ;; <?xml version="1.0" encoding="UTF-8"?> | ||
| 527 | ;; <!-- | ||
| 528 | ;; The main element. | ||
| 529 | ;; --> | ||
| 530 | ;; <!ELEMENT workflow (registers?, trigger-functions?, initial-actions, | ||
| 531 | ;; steps, splits?, joins?)> | ||
| 532 | ;; <!-- | ||
| 533 | ;; An action that can be executed (id must be unique among actions for | ||
| 534 | ;; the enclosing step). | ||
| 535 | ;; Used in: actions | ||
| 536 | ;; --> | ||
| 537 | ;; <!ELEMENT action (restrict-to, validators?, pre-functions?, results, | ||
| 538 | ;; post-functions?)> | ||
| 539 | ;; <!ATTLIST action | ||
| 540 | ;; id CDATA #REQUIRED | ||
| 541 | ;; name CDATA #REQUIRED | ||
| 542 | ;; > | ||
| 543 | ;; | ||
| 544 | ;; | ||
| 545 | ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | ||
| 546 | |||
| 547 | ;;; Code: | ||
| 548 | |||
| 549 | |||
| 550 | (require 'ebnf-otz) | ||
| 551 | |||
| 552 | |||
| 553 | (defvar ebnf-dtd-lex nil | ||
| 554 | "Value returned by `ebnf-dtd-lex' function.") | ||
| 555 | |||
| 556 | |||
| 557 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | ||
| 558 | ;; Syntactic analyzer | ||
| 559 | |||
| 560 | |||
| 561 | ;;; document ::= prolog element Misc* | ||
| 562 | ;;; /* Note that *only* the prolog will be parsed */ | ||
| 563 | |||
| 564 | (defun ebnf-dtd-parser (start) | ||
| 565 | "DTD parser." | ||
| 566 | (let ((total (+ (- ebnf-limit start) 1)) | ||
| 567 | (bias (1- start)) | ||
| 568 | (origin (point)) | ||
| 569 | rule-list token rule the-end) | ||
| 570 | (goto-char start) | ||
| 571 | (setq token (ebnf-dtd-lex)) | ||
| 572 | (and (eq token 'end-of-input) | ||
| 573 | (error "Empty DTD file")) | ||
| 574 | (setq token (ebnf-dtd-prolog token)) | ||
| 575 | (unless (eq (car token) 'end-prolog) | ||
| 576 | (setq the-end (cdr token) | ||
| 577 | token (car token)) | ||
| 578 | (while (not (eq token the-end)) | ||
| 579 | (ebnf-message-float | ||
| 580 | "Parsing...%s%%" | ||
| 581 | (/ (* (- (point) bias) 100.0) total)) | ||
| 582 | (setq token (ebnf-dtd-intsubset token) | ||
| 583 | rule (cdr token) | ||
| 584 | token (car token)) | ||
| 585 | (or (null rule) | ||
| 586 | (ebnf-add-empty-rule-list rule) | ||
| 587 | (setq rule-list (cons rule rule-list)))) | ||
| 588 | (or (eq the-end 'end-of-input) | ||
| 589 | (eq (ebnf-dtd-lex) 'end-decl) | ||
| 590 | (error "Missing end of DOCTYPE")) | ||
| 591 | ;; adjust message, 'cause *only* prolog will be parsed | ||
| 592 | (ebnf-message-float "Parsing...%s%%" 100.0)) | ||
| 593 | (goto-char origin) | ||
| 594 | rule-list)) | ||
| 595 | |||
| 596 | |||
| 597 | ;;; prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? | ||
| 598 | ;;; | ||
| 599 | ;;; XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' | ||
| 600 | ;;; | ||
| 601 | ;;; VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') | ||
| 602 | ;;; | ||
| 603 | ;;; Eq ::= S? '=' S? | ||
| 604 | ;;; | ||
| 605 | ;;; VersionNum ::= '1.0' | ||
| 606 | ;;; | ||
| 607 | ;;; Misc ::= Comment | PI | S | ||
| 608 | ;;; | ||
| 609 | ;;; EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) | ||
| 610 | ;;; | ||
| 611 | ;;; EncName ::= [A-Za-z] ([-A-Za-z0-9._])* | ||
| 612 | ;;; /* Encoding name contains only Latin characters */ | ||
| 613 | ;;; | ||
| 614 | ;;; SDDecl ::= S 'standalone' Eq | ||
| 615 | ;;; (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) | ||
| 616 | ;;; | ||
| 617 | ;;; doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? | ||
| 618 | ;;; ('[' intSubset ']' S?)? '>' | ||
| 619 | |||
| 620 | |||
| 621 | (defun ebnf-dtd-prolog (token) | ||
| 622 | (when (and (eq token 'begin-pi) (string= ebnf-dtd-lex "xml")) | ||
| 623 | ;; version = "1.0" | ||
| 624 | (setq token (ebnf-dtd-attribute (ebnf-dtd-lex) 'version-attr | ||
| 625 | "^1\\.0$" "XML version")) | ||
| 626 | ;; ( encoding = "encoding name" )? | ||
| 627 | (setq token (ebnf-dtd-attribute-optional | ||
| 628 | token 'encoding-attr | ||
| 629 | "^[A-Za-z][-A-Za-z0-9._]*$" "XML encoding")) | ||
| 630 | ;; ( standalone = ( "yes" | "no" ) )? | ||
| 631 | (setq token (ebnf-dtd-attribute-optional | ||
| 632 | token 'standalone-attr | ||
| 633 | "^yes|no$" "XML standalone")) | ||
| 634 | (or (eq token 'end-pi) | ||
| 635 | (error "Missing end of XML processing instruction"))) | ||
| 636 | ;; processing instructions | ||
| 637 | (setq token (ebnf-dtd-pi (ebnf-dtd-lex))) | ||
| 638 | (cond | ||
| 639 | ;; DOCTYPE | ||
| 640 | ((eq token 'doctype-decl) | ||
| 641 | (or (eq (ebnf-dtd-lex) 'name) | ||
| 642 | (error "Document type name is missing")) | ||
| 643 | (cons (if (eq (ebnf-dtd-externalid) 'begin-subset) | ||
| 644 | (ebnf-dtd-lex) | ||
| 645 | 'end-prolog) | ||
| 646 | 'end-subset)) | ||
| 647 | ((memq token '(element-decl attlist-decl entity-decl notation-decl)) | ||
| 648 | (cons token 'end-of-input)) | ||
| 649 | (t | ||
| 650 | '(end-prolog . end-subset)) | ||
| 651 | )) | ||
| 652 | |||
| 653 | |||
| 654 | (defun ebnf-dtd-attribute (token attr match attr-name) | ||
| 655 | (or (eq token attr) | ||
| 656 | (error "%s attribute is missing" attr-name)) | ||
| 657 | (ebnf-dtd-attribute-optional token attr match attr-name)) | ||
| 658 | |||
| 659 | |||
| 660 | (defun ebnf-dtd-attribute-optional (token attr match attr-name) | ||
| 661 | (when (eq token attr) | ||
| 662 | (or (and (eq (ebnf-dtd-lex) 'equal) | ||
| 663 | (eq (ebnf-dtd-lex) 'string) | ||
| 664 | (string-match match ebnf-dtd-lex)) | ||
| 665 | (error "XML %s attribute is invalid" attr-name)) | ||
| 666 | (setq token (ebnf-dtd-lex))) | ||
| 667 | token) | ||
| 668 | |||
| 669 | |||
| 670 | ;;; ExternalID ::= 'SYSTEM' S SystemLiteral | ||
| 671 | ;;; | 'PUBLIC' S PubidLiteral S SystemLiteral | ||
| 672 | |||
| 673 | |||
| 674 | (defun ebnf-dtd-externalid (&optional token) | ||
| 675 | (let ((must-have token)) | ||
| 676 | (or token (setq token (ebnf-dtd-lex))) | ||
| 677 | (cond ((eq token 'system) | ||
| 678 | (ebnf-dtd-systemliteral)) | ||
| 679 | ((eq token 'public) | ||
| 680 | (ebnf-dtd-pubidliteral) | ||
| 681 | (ebnf-dtd-systemliteral)) | ||
| 682 | (must-have | ||
| 683 | (error "Missing `SYSTEM' or `PUBLIC' in external id")) | ||
| 684 | (t | ||
| 685 | token)))) | ||
| 686 | |||
| 687 | |||
| 688 | ;;; SystemLiteral ::= ('"' [^"]* '"') | ||
| 689 | ;;; | ("'" [^']* "'") | ||
| 690 | |||
| 691 | |||
| 692 | (defun ebnf-dtd-systemliteral () | ||
| 693 | (or (eq (ebnf-dtd-lex) 'string) | ||
| 694 | (error "System identifier is invalid")) | ||
| 695 | (ebnf-dtd-lex)) | ||
| 696 | |||
| 697 | |||
| 698 | ;;; PubidLiteral ::= '"' PubidChar* '"' | ||
| 699 | ;;; | "'" (PubidChar - "'")* "'" | ||
| 700 | ;;; | ||
| 701 | ;;; PubidChar ::= [-'()+,./:=?;!*#@$_%\n\r a-zA-Z0-9] | ||
| 702 | |||
| 703 | |||
| 704 | (defun ebnf-dtd-pubidliteral () | ||
| 705 | (or (and (eq (ebnf-dtd-lex) 'string) | ||
| 706 | (string-match "^[-'()+,./:=?;!*#@$_%\n\r a-zA-Z0-9]*$" | ||
| 707 | ebnf-dtd-lex)) | ||
| 708 | (error "Public identifier is invalid"))) | ||
| 709 | |||
| 710 | |||
| 711 | ;;; PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' | ||
| 712 | ;;; | ||
| 713 | ;;; PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) | ||
| 714 | |||
| 715 | |||
| 716 | (defun ebnf-dtd-pi (token) | ||
| 717 | (while (eq token 'begin-pi) | ||
| 718 | (and (string-match "^[xX][mM][lL]$" ebnf-dtd-lex) | ||
| 719 | (error "Processing instruction name can not be `XML'")) | ||
| 720 | (while (not (eq (ebnf-dtd-lex) 'end-pi))) | ||
| 721 | (setq token (ebnf-dtd-lex))) | ||
| 722 | token) | ||
| 723 | |||
| 724 | |||
| 725 | ;;; doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? | ||
| 726 | ;;; ('[' intSubset ']' S?)? '>' | ||
| 727 | ;;; | ||
| 728 | ;;; intSubset ::= (markupdecl | DeclSep)* | ||
| 729 | ;;; | ||
| 730 | ;;; DeclSep ::= PEReference | S | ||
| 731 | ;;; | ||
| 732 | ;;; markupdecl ::= elementdecl | AttlistDecl | EntityDecl | ||
| 733 | ;;; | NotationDecl | PI | Comment | ||
| 734 | |||
| 735 | |||
| 736 | (defun ebnf-dtd-intsubset (token) | ||
| 737 | ;; PI - Processing Instruction | ||
| 738 | (and (eq token 'begin-pi) | ||
| 739 | (setq token (ebnf-dtd-pi token))) | ||
| 740 | (cond | ||
| 741 | ((memq token '(end-subset end-of-input)) | ||
| 742 | (cons token nil)) | ||
| 743 | ((eq token 'pe-ref) | ||
| 744 | (cons (ebnf-dtd-lex) nil)) ; annotation | ||
| 745 | ((eq token 'element-decl) | ||
| 746 | (ebnf-dtd-elementdecl)) ; rule | ||
| 747 | ((eq token 'attlist-decl) | ||
| 748 | (ebnf-dtd-attlistdecl)) ; annotation | ||
| 749 | ((eq token 'entity-decl) | ||
| 750 | (ebnf-dtd-entitydecl)) ; annotation | ||
| 751 | ((eq token 'notation-decl) | ||
| 752 | (ebnf-dtd-notationdecl)) ; annotation | ||
| 753 | (t | ||
| 754 | (error "Invalid DOCTYPE element")) | ||
| 755 | )) | ||
| 756 | |||
| 757 | |||
| 758 | ;;; elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' | ||
| 759 | ;;; | ||
| 760 | ;;; contentspec ::= 'EMPTY' | 'ANY' | Mixed | children | ||
| 761 | ;;; | ||
| 762 | ;;; Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | ||
| 763 | ;;; | '(' S? '#PCDATA' S? ')' | ||
| 764 | ;;; | ||
| 765 | ;;; children ::= (choice | seq) ('?' | '*' | '+')? | ||
| 766 | ;;; | ||
| 767 | ;;; choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')' | ||
| 768 | ;;; | ||
| 769 | ;;; seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' | ||
| 770 | ;;; | ||
| 771 | ;;; cp ::= (Name | choice | seq) ('?' | '*' | '+')? | ||
| 772 | |||
| 773 | |||
| 774 | (defun ebnf-dtd-elementdecl () | ||
| 775 | (let ((action ebnf-action) | ||
| 776 | name token body) | ||
| 777 | (setq ebnf-action nil) | ||
| 778 | (or (eq (ebnf-dtd-lex) 'name) | ||
| 779 | (error "Invalid ELEMENT name")) | ||
| 780 | (setq name ebnf-dtd-lex | ||
| 781 | token (ebnf-dtd-lex) | ||
| 782 | body (cond ((memq token '(empty any)) | ||
| 783 | (let ((term (ebnf-make-terminal ebnf-dtd-lex))) | ||
| 784 | (cons (ebnf-dtd-lex) term))) | ||
| 785 | ((eq token 'begin-group) | ||
| 786 | (setq token (ebnf-dtd-lex)) | ||
| 787 | (if (eq token 'pcdata) | ||
| 788 | (ebnf-dtd-mixed) | ||
| 789 | (ebnf-dtd-children token))) | ||
| 790 | (t | ||
| 791 | (error "Invalid ELEMENT content")) | ||
| 792 | )) | ||
| 793 | (or (eq (car body) 'end-decl) | ||
| 794 | (error "Missing `>' in ELEMENT declaration")) | ||
| 795 | (ebnf-eps-add-production name) | ||
| 796 | (cons (ebnf-dtd-lex) | ||
| 797 | (ebnf-make-production name (cdr body) action)))) | ||
| 798 | |||
| 799 | |||
| 800 | ;;; Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | ||
| 801 | ;;; | '(' S? '#PCDATA' S? ')' | ||
| 802 | |||
| 803 | |||
| 804 | (defun ebnf-dtd-mixed () | ||
| 805 | (let* ((alt (cons (ebnf-make-terminal ebnf-dtd-lex) nil)) | ||
| 806 | (token (ebnf-dtd-lex)) | ||
| 807 | (has-alternative (eq token 'alternative))) | ||
| 808 | (while (eq token 'alternative) | ||
| 809 | (or (eq (ebnf-dtd-lex) 'name) | ||
| 810 | (error "Invalid name")) | ||
| 811 | (setq alt (cons ebnf-dtd-lex alt) | ||
| 812 | token (ebnf-dtd-lex))) | ||
| 813 | (or (eq token 'end-group) | ||
| 814 | (error "Missing `)'")) | ||
| 815 | (and has-alternative | ||
| 816 | (or (eq (ebnf-dtd-lex) 'zero-or-more) | ||
| 817 | (error "Missing `*'"))) | ||
| 818 | (ebnf-token-alternative alt (cons (ebnf-dtd-lex) nil)))) | ||
| 819 | |||
| 820 | |||
| 821 | ;;; children ::= (choice | seq) ('?' | '*' | '+')? | ||
| 822 | |||
| 823 | |||
| 824 | (defun ebnf-dtd-children (token) | ||
| 825 | (ebnf-dtd-operators (ebnf-dtd-choice-seq token))) | ||
| 826 | |||
| 827 | |||
| 828 | ;;; choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')' | ||
| 829 | ;;; | ||
| 830 | ;;; seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' | ||
| 831 | |||
| 832 | |||
| 833 | (defun ebnf-dtd-choice-seq (token) | ||
| 834 | (setq token (ebnf-dtd-cp token)) | ||
| 835 | (let (elist) | ||
| 836 | (cond | ||
| 837 | ;; choice | ||
| 838 | ((eq (car token) 'alternative) | ||
| 839 | (while (eq (car token) 'alternative) | ||
| 840 | (setq elist (cons (cdr token) elist) | ||
| 841 | token (ebnf-dtd-cp (ebnf-dtd-lex)))) | ||
| 842 | (setq elist (ebnf-token-alternative elist token))) | ||
| 843 | ;; seq | ||
| 844 | ((eq (car token) 'comma) | ||
| 845 | (while (eq (car token) 'comma) | ||
| 846 | (setq elist (cons (cdr token) elist) | ||
| 847 | token (ebnf-dtd-cp (ebnf-dtd-lex)))) | ||
| 848 | (setq elist (ebnf-token-sequence (cons (cdr token) elist)))) | ||
| 849 | ;; only one element | ||
| 850 | (t | ||
| 851 | (setq elist (cdr token)))) | ||
| 852 | (or (eq (car token) 'end-group) | ||
| 853 | (error "Missing `)' in ELEMENT content")) | ||
| 854 | elist)) | ||
| 855 | |||
| 856 | |||
| 857 | ;;; cp ::= (Name | choice | seq) ('?' | '*' | '+')? | ||
| 858 | |||
| 859 | |||
| 860 | (defun ebnf-dtd-cp (token) | ||
| 861 | (ebnf-dtd-operators (cond ((eq token 'name) | ||
| 862 | (ebnf-make-terminal ebnf-dtd-lex)) | ||
| 863 | ((eq token 'begin-group) | ||
| 864 | (ebnf-dtd-choice-seq (ebnf-dtd-lex))) | ||
| 865 | (t | ||
| 866 | (error "Invalid element")) | ||
| 867 | ))) | ||
| 868 | |||
| 869 | |||
| 870 | ;;; elm ('?' | '*' | '+')? | ||
| 871 | |||
| 872 | |||
| 873 | (defun ebnf-dtd-operators (elm) | ||
| 874 | (let ((token (ebnf-dtd-lex))) | ||
| 875 | (cond ((eq token 'optional) ; ? - optional | ||
| 876 | (cons (ebnf-dtd-lex) (ebnf-token-optional elm))) | ||
| 877 | ((eq token 'zero-or-more) ; * - zero or more | ||
| 878 | (cons (ebnf-dtd-lex) (ebnf-make-zero-or-more elm))) | ||
| 879 | ((eq token 'one-or-more) ; + - one or more | ||
| 880 | (cons (ebnf-dtd-lex) (ebnf-make-one-or-more elm))) | ||
| 881 | (t ; only element | ||
| 882 | (cons token elm)) | ||
| 883 | ))) | ||
| 884 | |||
| 885 | |||
| 886 | ;;; AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' | ||
| 887 | ;;; | ||
| 888 | ;;; AttDef ::= S Name S AttType S DefaultDecl | ||
| 889 | ;;; | ||
| 890 | ;;; AttType ::= StringType | TokenizedType | EnumeratedType | ||
| 891 | ;;; | ||
| 892 | ;;; StringType ::= 'CDATA' | ||
| 893 | ;;; | ||
| 894 | ;;; TokenizedType ::= 'ID' | ||
| 895 | ;;; | 'IDREF' | ||
| 896 | ;;; | 'IDREFS' | ||
| 897 | ;;; | 'ENTITY' | ||
| 898 | ;;; | 'ENTITIES' | ||
| 899 | ;;; | 'NMTOKEN' | ||
| 900 | ;;; | 'NMTOKENS' | ||
| 901 | ;;; | ||
| 902 | ;;; EnumeratedType ::= NotationType | Enumeration | ||
| 903 | ;;; | ||
| 904 | ;;; NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' | ||
| 905 | ;;; | ||
| 906 | ;;; Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' | ||
| 907 | ;;; | ||
| 908 | ;;; DefaultDecl ::= '#REQUIRED' | ||
| 909 | ;;; | '#IMPLIED' | ||
| 910 | ;;; | (('#FIXED' S)? AttValue) | ||
| 911 | ;;; | ||
| 912 | ;;; | ||
| 913 | ;;; AttValue ::= '"' ([^<&"] | Reference)* '"' | ||
| 914 | ;;; | "'" ([^<&'] | Reference)* "'" | ||
| 915 | ;;; | ||
| 916 | ;;; Reference ::= EntityRef | CharRef | ||
| 917 | ;;; | ||
| 918 | ;;; EntityRef ::= '&' Name ';' | ||
| 919 | ;;; | ||
| 920 | ;;; CharRef ::= '&#' [0-9]+ ';' | ||
| 921 | ;;; | '&#x' [0-9a-fA-F]+ ';' | ||
| 922 | |||
| 923 | ;;; "^\\(&\\([A-Za-z_:][-A-Za-z0-9._:]*\\|#\\(x[0-9a-fA-F]+\\|[0-9]+\\)\\);\\|[^<&]\\)*$" | ||
| 924 | |||
| 925 | |||
| 926 | (defun ebnf-dtd-attlistdecl () | ||
| 927 | (or (eq (ebnf-dtd-lex) 'name) | ||
| 928 | (error "Invalid ATTLIST name")) | ||
| 929 | (let (token) | ||
| 930 | (while (eq (setq token (ebnf-dtd-lex)) 'name) | ||
| 931 | ;; type | ||
| 932 | (setq token (ebnf-dtd-lex)) | ||
| 933 | (cond | ||
| 934 | ((eq token 'notation) | ||
| 935 | (or (eq (ebnf-dtd-lex) 'begin-group) | ||
| 936 | (error "Missing `(' in NOTATION type in ATTLIST declaration")) | ||
| 937 | (ebnf-dtd-namelist "NOTATION" '(name))) | ||
| 938 | ((eq token 'begin-group) | ||
| 939 | (ebnf-dtd-namelist "enumeration" '(name name-char))) | ||
| 940 | ((memq token | ||
| 941 | '(cdata id idref idrefs entity entities nmtoken nmtokens))) | ||
| 942 | (t | ||
| 943 | (error "Invalid type in ATTLIST declaration"))) | ||
| 944 | ;; default value | ||
| 945 | (setq token (ebnf-dtd-lex)) | ||
| 946 | (unless (memq token '(required implied)) | ||
| 947 | (and (eq token 'fixed) | ||
| 948 | (setq token (ebnf-dtd-lex))) | ||
| 949 | (or (and (eq token 'string) | ||
| 950 | (string-match | ||
| 951 | "^\\(&\\([A-Za-z_:][-A-Za-z0-9._:]*\\|#\\(x[0-9a-fA-F]+\\|[0-9]+\\)\\);\\|[^<&]\\)*$" | ||
| 952 | ebnf-dtd-lex)) | ||
| 953 | (error "Invalid default value in ATTLIST declaration")))) | ||
| 954 | (or (eq token 'end-decl) | ||
| 955 | (error "Missing `>' in end of ATTLIST")) | ||
| 956 | (cons (ebnf-dtd-lex) nil))) | ||
| 957 | |||
| 958 | |||
| 959 | (defun ebnf-dtd-namelist (type name-list) | ||
| 960 | (let (token) | ||
| 961 | (while (progn | ||
| 962 | (or (memq (ebnf-dtd-lex) name-list) | ||
| 963 | (error "Invalid name in %s type in ATTLIST declaration" type)) | ||
| 964 | (eq (setq token (ebnf-dtd-lex)) 'alternative))) | ||
| 965 | (or (eq token 'end-group) | ||
| 966 | (error "Missing `)' in %s type in ATTLIST declaration" type)))) | ||
| 967 | |||
| 968 | |||
| 969 | ;;; EntityDecl ::= GEDecl | PEDecl | ||
| 970 | ;;; | ||
| 971 | ;;; GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' | ||
| 972 | ;;; | ||
| 973 | ;;; PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' | ||
| 974 | ;;; | ||
| 975 | ;;; EntityDef ::= EntityValue | (ExternalID NDataDecl?) | ||
| 976 | ;;; | ||
| 977 | ;;; PEDef ::= EntityValue | ExternalID | ||
| 978 | ;;; | ||
| 979 | ;;; NDataDecl ::= S 'NDATA' S Name | ||
| 980 | ;;; | ||
| 981 | ;;; | ||
| 982 | ;;; EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | ||
| 983 | ;;; | "'" ([^%&'] | PEReference | Reference)* "'" | ||
| 984 | ;;; | ||
| 985 | ;;; PEReference ::= '%' Name ';' | ||
| 986 | ;;; | ||
| 987 | ;;; Reference ::= EntityRef | CharRef | ||
| 988 | ;;; | ||
| 989 | ;;; EntityRef ::= '&' Name ';' | ||
| 990 | ;;; | ||
| 991 | ;;; CharRef ::= '&#' [0-9]+ ';' | ||
| 992 | ;;; | '&#x' [0-9a-fA-F]+ ';' | ||
| 993 | |||
| 994 | ;;; "^\\(%[A-Za-z_:][-A-Za-z0-9._:]*;\\|&\\([A-Za-z_:][-A-Za-z0-9._:]*\\|#\\(x[0-9a-fA-F]+\\|[0-9]+\\)\\);\\|[^%&]\\)*$" | ||
| 995 | |||
| 996 | |||
| 997 | (defun ebnf-dtd-entitydecl () | ||
| 998 | (let* ((token (ebnf-dtd-lex)) | ||
| 999 | (pedecl (eq token 'percent))) | ||
| 1000 | (and pedecl | ||
| 1001 | (setq token (ebnf-dtd-lex))) | ||
| 1002 | (or (eq token 'name) | ||
| 1003 | (error "Invalid name of ENTITY")) | ||
| 1004 | (setq token (ebnf-dtd-lex)) | ||
| 1005 | (if (eq token 'string) | ||
| 1006 | (if (string-match | ||
| 1007 | "^\\(%[A-Za-z_:][-A-Za-z0-9._:]*;\\|&\\([A-Za-z_:][-A-Za-z0-9._:]*\\|#\\(x[0-9a-fA-F]+\\|[0-9]+\\)\\);\\|[^%&]\\)*$" | ||
| 1008 | ebnf-dtd-lex) | ||
| 1009 | (setq token (ebnf-dtd-lex)) | ||
| 1010 | (error "Invalid ENTITY definition")) | ||
| 1011 | (setq token (ebnf-dtd-externalid token)) | ||
| 1012 | (when (and (not pedecl) (eq token 'ndata)) | ||
| 1013 | (or (eq (ebnf-dtd-lex) 'name) | ||
| 1014 | (error "Invalid NDATA name")) | ||
| 1015 | (setq token (ebnf-dtd-lex)))) | ||
| 1016 | (or (eq token 'end-decl) | ||
| 1017 | (error "Missing `>' in end of ENTITY")) | ||
| 1018 | (cons (ebnf-dtd-lex) nil))) | ||
| 1019 | |||
| 1020 | |||
| 1021 | ;;; NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' | ||
| 1022 | ;;; | ||
| 1023 | ;;; PublicID ::= 'PUBLIC' S PubidLiteral | ||
| 1024 | |||
| 1025 | |||
| 1026 | (defun ebnf-dtd-notationdecl () | ||
| 1027 | (or (eq (ebnf-dtd-lex) 'name) | ||
| 1028 | (error "Invalid name NOTATION")) | ||
| 1029 | (or (eq (ebnf-dtd-externalid-or-publicid) 'end-decl) | ||
| 1030 | (error "Missing `>' in end of NOTATION")) | ||
| 1031 | (cons (ebnf-dtd-lex) nil)) | ||
| 1032 | |||
| 1033 | |||
| 1034 | ;;; ExternalID ::= 'SYSTEM' S SystemLiteral | ||
| 1035 | ;;; | 'PUBLIC' S PubidLiteral S SystemLiteral | ||
| 1036 | ;;; | ||
| 1037 | ;;; PublicID ::= 'PUBLIC' S PubidLiteral | ||
| 1038 | |||
| 1039 | |||
| 1040 | (defun ebnf-dtd-externalid-or-publicid () | ||
| 1041 | (let ((token (ebnf-dtd-lex))) | ||
| 1042 | (cond ((eq token 'system) | ||
| 1043 | (ebnf-dtd-systemliteral)) | ||
| 1044 | ((eq token 'public) | ||
| 1045 | (ebnf-dtd-pubidliteral) | ||
| 1046 | (and (eq (setq token (ebnf-dtd-lex)) 'string) | ||
| 1047 | (setq token (ebnf-dtd-lex))) | ||
| 1048 | token) | ||
| 1049 | (t | ||
| 1050 | (error "Missing `SYSTEM' or `PUBLIC'"))))) | ||
| 1051 | |||
| 1052 | |||
| 1053 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | ||
| 1054 | ;; Lexical analyzer | ||
| 1055 | |||
| 1056 | |||
| 1057 | (defconst ebnf-dtd-token-table (make-vector 256 'error) | ||
| 1058 | "Vector used to map characters to a lexical token.") | ||
| 1059 | |||
| 1060 | |||
| 1061 | (defun ebnf-dtd-initialize () | ||
| 1062 | "Initialize EBNF token table." | ||
| 1063 | ;; control character & control 8-bit character are set to `error' | ||
| 1064 | (let ((char ?\060)) | ||
| 1065 | ;; digits: 0-9 | ||
| 1066 | (while (< char ?\072) | ||
| 1067 | (aset ebnf-dtd-token-table char 'name-char) | ||
| 1068 | (setq char (1+ char))) | ||
| 1069 | ;; printable character: A-Z | ||
| 1070 | (setq char ?\101) | ||
| 1071 | (while (< char ?\133) | ||
| 1072 | (aset ebnf-dtd-token-table char 'name) | ||
| 1073 | (setq char (1+ char))) | ||
| 1074 | ;; printable character: a-z | ||
| 1075 | (setq char ?\141) | ||
| 1076 | (while (< char ?\173) | ||
| 1077 | (aset ebnf-dtd-token-table char 'name) | ||
| 1078 | (setq char (1+ char))) | ||
| 1079 | ;; European 8-bit accentuated characters: | ||
| 1080 | (setq char ?\240) | ||
| 1081 | (while (< char ?\400) | ||
| 1082 | (aset ebnf-dtd-token-table char 'name) | ||
| 1083 | (setq char (1+ char))) | ||
| 1084 | ;; Override name characters: | ||
| 1085 | (aset ebnf-dtd-token-table ?_ 'name) | ||
| 1086 | (aset ebnf-dtd-token-table ?: 'name) | ||
| 1087 | (aset ebnf-dtd-token-table ?. 'name-char) | ||
| 1088 | (aset ebnf-dtd-token-table ?- 'name-char) | ||
| 1089 | ;; Override space characters: | ||
| 1090 | (aset ebnf-dtd-token-table ?\n 'space) ; [NL] linefeed | ||
| 1091 | (aset ebnf-dtd-token-table ?\r 'space) ; [CR] carriage return | ||
| 1092 | (aset ebnf-dtd-token-table ?\t 'space) ; [HT] horizontal tab | ||
| 1093 | (aset ebnf-dtd-token-table ?\ 'space) ; [SP] space | ||
| 1094 | ;; Override other lexical characters: | ||
| 1095 | (aset ebnf-dtd-token-table ?= 'equal) | ||
| 1096 | (aset ebnf-dtd-token-table ?, 'comma) | ||
| 1097 | (aset ebnf-dtd-token-table ?* 'zero-or-more) | ||
| 1098 | (aset ebnf-dtd-token-table ?+ 'one-or-more) | ||
| 1099 | (aset ebnf-dtd-token-table ?| 'alternative) | ||
| 1100 | (aset ebnf-dtd-token-table ?% 'percent) | ||
| 1101 | (aset ebnf-dtd-token-table ?& 'ampersand) | ||
| 1102 | (aset ebnf-dtd-token-table ?# 'hash) | ||
| 1103 | (aset ebnf-dtd-token-table ?\? 'interrogation) | ||
| 1104 | (aset ebnf-dtd-token-table ?\" 'double-quote) | ||
| 1105 | (aset ebnf-dtd-token-table ?\' 'single-quote) | ||
| 1106 | (aset ebnf-dtd-token-table ?< 'less-than) | ||
| 1107 | (aset ebnf-dtd-token-table ?> 'end-decl) | ||
| 1108 | (aset ebnf-dtd-token-table ?\( 'begin-group) | ||
| 1109 | (aset ebnf-dtd-token-table ?\) 'end-group) | ||
| 1110 | (aset ebnf-dtd-token-table ?\[ 'begin-subset) | ||
| 1111 | (aset ebnf-dtd-token-table ?\] 'end-subset))) | ||
| 1112 | |||
| 1113 | |||
| 1114 | ;; replace the range "\240-\377" (see `ebnf-range-regexp'). | ||
| 1115 | (defconst ebnf-dtd-name-chars | ||
| 1116 | (ebnf-range-regexp "-._:0-9A-Za-z" ?\240 ?\377)) | ||
| 1117 | |||
| 1118 | |||
| 1119 | (defconst ebnf-dtd-decl-alist | ||
| 1120 | '(("ATTLIST" . attlist-decl) | ||
| 1121 | ("DOCTYPE" . doctype-decl) | ||
| 1122 | ("ELEMENT" . element-decl) | ||
| 1123 | ("ENTITY" . entity-decl) | ||
| 1124 | ("NOTATION" . notation-decl))) | ||
| 1125 | |||
| 1126 | |||
| 1127 | (defconst ebnf-dtd-element-alist | ||
| 1128 | '(("#FIXED" . fixed) | ||
| 1129 | ("#IMPLIED" . implied) | ||
| 1130 | ("#PCDATA" . pcdata) | ||
| 1131 | ("#REQUIRED" . required))) | ||
| 1132 | |||
| 1133 | |||
| 1134 | (defconst ebnf-dtd-name-alist | ||
| 1135 | '(("ANY" . any) | ||
| 1136 | ("CDATA" . cdata) | ||
| 1137 | ("EMPTY" . empty) | ||
| 1138 | ("ENTITIES" . entities) | ||
| 1139 | ("ENTITY" . entity) | ||
| 1140 | ("ID" . id) | ||
| 1141 | ("IDREF" . idref) | ||
| 1142 | ("IDREFS" . idrefs) | ||
| 1143 | ("NDATA" . ndata) | ||
| 1144 | ("NMTOKEN" . nmtoken) | ||
| 1145 | ("NMTOKENS" . nmtokens) | ||
| 1146 | ("NOTATION" . notation) | ||
| 1147 | ("PUBLIC" . public) | ||
| 1148 | ("SYSTEM" . system) | ||
| 1149 | ("encoding" . encoding-attr) | ||
| 1150 | ("standalone" . standalone-attr) | ||
| 1151 | ("version" . version-attr))) | ||
| 1152 | |||
| 1153 | |||
| 1154 | (defun ebnf-dtd-lex () | ||
| 1155 | "Lexical analyser for DTD. | ||
| 1156 | |||
| 1157 | Return a lexical token. | ||
| 1158 | |||
| 1159 | See documentation for variable `ebnf-dtd-lex'." | ||
| 1160 | (if (>= (point) ebnf-limit) | ||
| 1161 | 'end-of-input | ||
| 1162 | (let (token) | ||
| 1163 | ;; skip spaces and comments | ||
| 1164 | (while (if (> (following-char) 255) | ||
| 1165 | (progn | ||
| 1166 | (setq token 'error) | ||
| 1167 | nil) | ||
| 1168 | (setq token (aref ebnf-dtd-token-table (following-char))) | ||
| 1169 | (cond | ||
| 1170 | ((eq token 'space) | ||
| 1171 | (skip-chars-forward " \n\r\t" ebnf-limit) | ||
| 1172 | (< (point) ebnf-limit)) | ||
| 1173 | ((and (eq token 'less-than) | ||
| 1174 | (looking-at "<!--")) | ||
| 1175 | (ebnf-dtd-skip-comment)) | ||
| 1176 | (t nil) | ||
| 1177 | ))) | ||
| 1178 | (cond | ||
| 1179 | ;; end of input | ||
| 1180 | ((>= (point) ebnf-limit) | ||
| 1181 | 'end-of-input) | ||
| 1182 | ;; error | ||
| 1183 | ((eq token 'error) | ||
| 1184 | (error "Illegal character")) | ||
| 1185 | ;; beginning of declaration: | ||
| 1186 | ;; <?name, <!ATTLIST, <!DOCTYPE, <!ELEMENT, <!ENTITY, <!NOTATION | ||
| 1187 | ((eq token 'less-than) | ||
| 1188 | (forward-char) | ||
| 1189 | (let ((char (following-char))) | ||
| 1190 | (cond ((= char ?\?) ; <? | ||
| 1191 | (forward-char) | ||
| 1192 | (setq ebnf-dtd-lex (ebnf-buffer-substring ebnf-dtd-name-chars)) | ||
| 1193 | 'begin-pi) | ||
| 1194 | ((= char ?!) ; <! | ||
| 1195 | (forward-char) | ||
| 1196 | (let ((decl (ebnf-buffer-substring ebnf-dtd-name-chars))) | ||
| 1197 | (or (cdr (assoc decl ebnf-dtd-decl-alist)) | ||
| 1198 | (error "Invalid declaration name `%s'" decl)))) | ||
| 1199 | (t ; <x | ||
| 1200 | (error "Invalid declaration `<%c'" char))))) | ||
| 1201 | ;; name, namechar | ||
| 1202 | ((memq token '(name name-char)) | ||
| 1203 | (setq ebnf-dtd-lex (ebnf-buffer-substring ebnf-dtd-name-chars)) | ||
| 1204 | (or (cdr (assoc ebnf-dtd-lex ebnf-dtd-name-alist)) | ||
| 1205 | token)) | ||
| 1206 | ;; ?, ?> | ||
| 1207 | ((eq token 'interrogation) | ||
| 1208 | (forward-char) | ||
| 1209 | (if (/= (following-char) ?>) | ||
| 1210 | 'optional | ||
| 1211 | (forward-char) | ||
| 1212 | 'end-pi)) | ||
| 1213 | ;; #FIXED, #IMPLIED, #PCDATA, #REQUIRED | ||
| 1214 | ((eq token 'hash) | ||
| 1215 | (forward-char) | ||
| 1216 | (setq ebnf-dtd-lex | ||
| 1217 | (concat "#" (ebnf-buffer-substring ebnf-dtd-name-chars))) | ||
| 1218 | (or (cdr (assoc ebnf-dtd-lex ebnf-dtd-element-alist)) | ||
| 1219 | (error "Invalid element `%s'" ebnf-dtd-lex))) | ||
| 1220 | ;; "string" | ||
| 1221 | ((eq token 'double-quote) | ||
| 1222 | (setq ebnf-dtd-lex (ebnf-dtd-string ?\")) | ||
| 1223 | 'string) | ||
| 1224 | ;; 'string' | ||
| 1225 | ((eq token 'single-quote) | ||
| 1226 | (setq ebnf-dtd-lex (ebnf-dtd-string ?\')) | ||
| 1227 | 'string) | ||
| 1228 | ;; %, %name; | ||
| 1229 | ((eq token 'percent) | ||
| 1230 | (forward-char) | ||
| 1231 | (if (looking-at "[ \n\r\t]") | ||
| 1232 | 'percent | ||
| 1233 | (setq ebnf-dtd-lex (ebnf-dtd-name-ref "%")) | ||
| 1234 | 'pe-ref)) | ||
| 1235 | ;; &#...;, &#x...;, &name; | ||
| 1236 | ((eq token 'ampersand) | ||
| 1237 | (forward-char) | ||
| 1238 | (if (/= (following-char) ?#) | ||
| 1239 | (progn | ||
| 1240 | ;; &name; | ||
| 1241 | (setq ebnf-dtd-lex (ebnf-dtd-name-ref "&")) | ||
| 1242 | 'entity-ref) | ||
| 1243 | ;; &#...;, &#x...; | ||
| 1244 | (forward-char) | ||
| 1245 | (setq ebnf-dtd-lex (if (/= (following-char) ?x) | ||
| 1246 | (ebnf-dtd-char-ref "&#" "0-9") | ||
| 1247 | (forward-char) | ||
| 1248 | (ebnf-dtd-char-ref "&#x" "0-9a-fA-F"))) | ||
| 1249 | 'char-ref)) | ||
| 1250 | ;; miscellaneous: (, ), [, ], =, |, *, +, >, `,' | ||
| 1251 | (t | ||
| 1252 | (forward-char) | ||
| 1253 | token) | ||
| 1254 | )))) | ||
| 1255 | |||
| 1256 | |||
| 1257 | (defun ebnf-dtd-name-ref (start) | ||
| 1258 | (ebnf-dtd-char-ref start ebnf-dtd-name-chars)) | ||
| 1259 | |||
| 1260 | |||
| 1261 | (defun ebnf-dtd-char-ref (start chars) | ||
| 1262 | (let ((char (ebnf-buffer-substring chars))) | ||
| 1263 | (or (= (following-char) ?\;) | ||
| 1264 | (error "Invalid element `%s%s%c'" start char (following-char))) | ||
| 1265 | (forward-char) | ||
| 1266 | (format "%s%s;" start char))) | ||
| 1267 | |||
| 1268 | |||
| 1269 | ;; replace the range "\240-\377" (see `ebnf-range-regexp'). | ||
| 1270 | (defconst ebnf-dtd-double-string-chars | ||
| 1271 | (ebnf-range-regexp "\t -!#-~" ?\240 ?\377)) | ||
| 1272 | (defconst ebnf-dtd-single-string-chars | ||
| 1273 | (ebnf-range-regexp "\t -&(-~" ?\240 ?\377)) | ||
| 1274 | |||
| 1275 | |||
| 1276 | (defun ebnf-dtd-string (delim) | ||
| 1277 | (buffer-substring-no-properties | ||
| 1278 | (progn | ||
| 1279 | (forward-char) | ||
| 1280 | (point)) | ||
| 1281 | (progn | ||
| 1282 | (skip-chars-forward (if (= delim ?\") | ||
| 1283 | ebnf-dtd-double-string-chars | ||
| 1284 | ebnf-dtd-single-string-chars) | ||
| 1285 | ebnf-limit) | ||
| 1286 | (or (= (following-char) delim) | ||
| 1287 | (error "Missing string delimiter `%c'" delim)) | ||
| 1288 | (prog1 | ||
| 1289 | (point) | ||
| 1290 | (forward-char))))) | ||
| 1291 | |||
| 1292 | |||
| 1293 | ;; replace the range "\177-\237" (see `ebnf-range-regexp'). | ||
| 1294 | (defconst ebnf-dtd-comment-chars | ||
| 1295 | (ebnf-range-regexp "^-\000-\010\013\014\016-\037" ?\177 ?\237)) | ||
| 1296 | (defconst ebnf-dtd-filename-chars | ||
| 1297 | (ebnf-range-regexp "^-\000-\037" ?\177 ?\237)) | ||
| 1298 | |||
| 1299 | |||
| 1300 | (defun ebnf-dtd-skip-comment () | ||
| 1301 | (forward-char 4) ; <!-- | ||
| 1302 | (cond | ||
| 1303 | ;; open EPS file | ||
| 1304 | ((and ebnf-eps-executing (= (following-char) ?\[)) | ||
| 1305 | (ebnf-eps-add-context (ebnf-dtd-eps-filename))) | ||
| 1306 | ;; close EPS file | ||
| 1307 | ((and ebnf-eps-executing (= (following-char) ?\])) | ||
| 1308 | (ebnf-eps-remove-context (ebnf-dtd-eps-filename))) | ||
| 1309 | ;; any other action in comment | ||
| 1310 | (t | ||
| 1311 | (setq ebnf-action (aref ebnf-comment-table (following-char)))) | ||
| 1312 | ) | ||
| 1313 | (while (progn | ||
| 1314 | (skip-chars-forward ebnf-dtd-comment-chars ebnf-limit) | ||
| 1315 | (and (< (point) ebnf-limit) | ||
| 1316 | (not (looking-at "-->")))) | ||
| 1317 | (skip-chars-forward "-" ebnf-limit)) | ||
| 1318 | ;; check for a valid end of comment | ||
| 1319 | (cond ((>= (point) ebnf-limit) | ||
| 1320 | nil) | ||
| 1321 | ((looking-at "-->") | ||
| 1322 | (forward-char 3) | ||
| 1323 | t) | ||
| 1324 | (t | ||
| 1325 | (error "Illegal character")) | ||
| 1326 | )) | ||
| 1327 | |||
| 1328 | |||
| 1329 | (defun ebnf-dtd-eps-filename () | ||
| 1330 | (forward-char) | ||
| 1331 | (let (fname) | ||
| 1332 | (while (progn | ||
| 1333 | (setq fname | ||
| 1334 | (concat fname | ||
| 1335 | (ebnf-buffer-substring ebnf-dtd-filename-chars))) | ||
| 1336 | (and (< (point) ebnf-limit) | ||
| 1337 | (= (following-char) ?-) ; may be \n, \t, \r | ||
| 1338 | (not (looking-at "-->")))) | ||
| 1339 | (setq fname (concat fname (ebnf-buffer-substring "-")))) | ||
| 1340 | fname)) | ||
| 1341 | |||
| 1342 | |||
| 1343 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | ||
| 1344 | |||
| 1345 | |||
| 1346 | (provide 'ebnf-dtd) | ||
| 1347 | |||
| 1348 | |||
| 1349 | ;;; arch-tag: | ||
| 1350 | ;;; ebnf-dtd.el ends here | ||