diff options
| author | Vinicius Jose Latorre | 2004-03-28 22:48:32 +0000 |
|---|---|---|
| committer | Vinicius Jose Latorre | 2004-03-28 22:48:32 +0000 |
| commit | f3c3dee6b271b8507c0caa96b2e02b1d50aacdea (patch) | |
| tree | 46d2bdcc8c49f9cc2723061ba69e3ba32241cb2f | |
| parent | 9d59cbb052bb97870a3d28ed6f79e386e13b003e (diff) | |
| download | emacs-f3c3dee6b271b8507c0caa96b2e02b1d50aacdea.tar.gz emacs-f3c3dee6b271b8507c0caa96b2e02b1d50aacdea.zip | |
Parser for EBNF used to specify XML (EBNFX)
| -rw-r--r-- | lisp/progmodes/ebnf-ebx.el | 673 |
1 files changed, 673 insertions, 0 deletions
diff --git a/lisp/progmodes/ebnf-ebx.el b/lisp/progmodes/ebnf-ebx.el new file mode 100644 index 00000000000..bdd4dd030d3 --- /dev/null +++ b/lisp/progmodes/ebnf-ebx.el | |||
| @@ -0,0 +1,673 @@ | |||
| 1 | ;;; ebnf-ebx.el --- parser for EBNF used to specify XML (EBNFX) | ||
| 2 | |||
| 3 | ;; Copyright (C) 2004 Free Sofware Foundation, Inc. | ||
| 4 | |||
| 5 | ;; Author: Vinicius Jose Latorre <viniciusjl@ig.com.br> | ||
| 6 | ;; Maintainer: Vinicius Jose Latorre <viniciusjl@ig.com.br> | ||
| 7 | ;; Time-stamp: <2004/03/22 08:53:21 vinicius> | ||
| 8 | ;; Keywords: wp, ebnf, PostScript | ||
| 9 | ;; Version: 1.0 | ||
| 10 | |||
| 11 | ;; This file is part of GNU Emacs. | ||
| 12 | |||
| 13 | ;; GNU Emacs is free software; you can redistribute it and/or modify | ||
| 14 | ;; it under the terms of the GNU General Public License as published by | ||
| 15 | ;; the Free Software Foundation; either version 2, or (at your option) | ||
| 16 | ;; any later version. | ||
| 17 | |||
| 18 | ;; GNU Emacs is distributed in the hope that it will be useful, | ||
| 19 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 20 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 21 | ;; GNU General Public License for more details. | ||
| 22 | |||
| 23 | ;; You should have received a copy of the GNU General Public License | ||
| 24 | ;; along with GNU Emacs; see the file COPYING. If not, write to the | ||
| 25 | ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 26 | ;; Boston, MA 02111-1307, USA. | ||
| 27 | |||
| 28 | ;;; Commentary: | ||
| 29 | |||
| 30 | ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | ||
| 31 | ;; | ||
| 32 | ;; | ||
| 33 | ;; This is part of ebnf2ps package. | ||
| 34 | ;; | ||
| 35 | ;; This package defines a parser for EBNF used to specify XML (EBNFX). | ||
| 36 | ;; | ||
| 37 | ;; See ebnf2ps.el for documentation. | ||
| 38 | ;; | ||
| 39 | ;; | ||
| 40 | ;; EBNFX Syntax | ||
| 41 | ;; ------------ | ||
| 42 | ;; | ||
| 43 | ;; See the URL: | ||
| 44 | ;; `http://www.w3.org/TR/2004/REC-xml-20040204/#sec-notation' | ||
| 45 | ;; (Extensible Markup Language (XML) 1.0 (Third Edition)) | ||
| 46 | ;; | ||
| 47 | ;; | ||
| 48 | ;; rule ::= symbol '::=' expression | ||
| 49 | ;; /* rules are separated by at least one blank line. */ | ||
| 50 | ;; | ||
| 51 | ;; expression ::= concatenation ('|' concatenation)* | ||
| 52 | ;; | ||
| 53 | ;; concatenation ::= exception* | ||
| 54 | ;; | ||
| 55 | ;; exception ::= term ('-' term)? | ||
| 56 | ;; | ||
| 57 | ;; term ::= factor ('*' | '+' | '?')? | ||
| 58 | ;; | ||
| 59 | ;; factor ::= hex-char+ | ||
| 60 | ;; | '[' '^'? ( char ( '-' char )? )+ ']' | ||
| 61 | ;; | '"' 'string' '"' | ||
| 62 | ;; | "'" "string" "'" | ||
| 63 | ;; | '(' expression ')' | ||
| 64 | ;; | symbol | ||
| 65 | ;; | ||
| 66 | ;; symbol ::= 'upper or lower case letter' | ||
| 67 | ;; ('upper or lower case letter' | '-' | '_')* | ||
| 68 | ;; /* upper and lower 8-bit accentuated characters are included */ | ||
| 69 | ;; | ||
| 70 | ;; hex-char ::= '#x' [0-9A-Fa-f]+ | ||
| 71 | ;; | ||
| 72 | ;; char ::= hex-char | 'any character except control characters' | ||
| 73 | ;; /* 8-bit accentuated characters are included */ | ||
| 74 | ;; | ||
| 75 | ;; any-char ::= char | 'newline' | 'tab' | ||
| 76 | ;; | ||
| 77 | ;; ignore ::= '[' ('wfc' | 'WFC' | 'vc' | 'VC') ':' ( any-char - ']' )* ']' | ||
| 78 | ;; | ||
| 79 | ;; comment ::= '/*' ( any-char - '*/' ) '*/' | ||
| 80 | ;; | ||
| 81 | ;; | ||
| 82 | ;; Below is the Notation section extracted from the URL cited above. | ||
| 83 | ;; | ||
| 84 | ;; 6 Notation | ||
| 85 | ;; | ||
| 86 | ;; The formal grammar of XML is given in this specification using a simple | ||
| 87 | ;; Extended Backus-Naur Form (EBNF) notation. Each rule in the grammar defines | ||
| 88 | ;; one symbol, in the form | ||
| 89 | ;; | ||
| 90 | ;; symbol ::= expression | ||
| 91 | ;; | ||
| 92 | ;; Symbols are written with an initial capital letter if they are the start | ||
| 93 | ;; symbol of a regular language, otherwise with an initial lowercase letter. | ||
| 94 | ;; Literal strings are quoted. | ||
| 95 | ;; | ||
| 96 | ;; Within the expression on the right-hand side of a rule, the following | ||
| 97 | ;; expressions are used to match strings of one or more characters: | ||
| 98 | ;; | ||
| 99 | ;; #xN | ||
| 100 | ;; | ||
| 101 | ;; where N is a hexadecimal integer, the expression matches the character | ||
| 102 | ;; whose number (code point) in ISO/IEC 10646 is N. The number of leading | ||
| 103 | ;; zeros in the #xN form is insignificant. | ||
| 104 | ;; | ||
| 105 | ;; [a-zA-Z], [#xN-#xN] | ||
| 106 | ;; | ||
| 107 | ;; matches any Char with a value in the range(s) indicated (inclusive). | ||
| 108 | ;; | ||
| 109 | ;; [abc], [#xN#xN#xN] | ||
| 110 | ;; | ||
| 111 | ;; matches any Char with a value among the characters enumerated. | ||
| 112 | ;; Enumerations and ranges can be mixed in one set of brackets. | ||
| 113 | ;; | ||
| 114 | ;; [^a-z], [^#xN-#xN] | ||
| 115 | ;; | ||
| 116 | ;; matches any Char with a value outside the range indicated. | ||
| 117 | ;; | ||
| 118 | ;; [^abc], [^#xN#xN#xN] | ||
| 119 | ;; | ||
| 120 | ;; matches any Char with a value not among the characters given. | ||
| 121 | ;; Enumerations and ranges of forbidden values can be mixed in one set of | ||
| 122 | ;; brackets. | ||
| 123 | ;; | ||
| 124 | ;; "string" | ||
| 125 | ;; | ||
| 126 | ;; matches a literal string matching that given inside the double quotes. | ||
| 127 | ;; | ||
| 128 | ;; 'string' | ||
| 129 | ;; | ||
| 130 | ;; matches a literal string matching that given inside the single quotes. | ||
| 131 | ;; | ||
| 132 | ;; These symbols may be combined to match more complex patterns as follows, | ||
| 133 | ;; where A and B represent simple expressions: | ||
| 134 | ;; | ||
| 135 | ;; (expression) | ||
| 136 | ;; | ||
| 137 | ;; expression is treated as a unit and may be combined as described in this | ||
| 138 | ;; list. | ||
| 139 | ;; | ||
| 140 | ;; A? | ||
| 141 | ;; | ||
| 142 | ;; matches A or nothing; optional A. | ||
| 143 | ;; | ||
| 144 | ;; A B | ||
| 145 | ;; | ||
| 146 | ;; matches A followed by B. This operator has higher precedence than | ||
| 147 | ;; alternation; thus A B | C D is identical to (A B) | (C D). | ||
| 148 | ;; | ||
| 149 | ;; A | B | ||
| 150 | ;; | ||
| 151 | ;; matches A or B. | ||
| 152 | ;; | ||
| 153 | ;; A - B | ||
| 154 | ;; | ||
| 155 | ;; matches any string that matches A but does not match B. | ||
| 156 | ;; | ||
| 157 | ;; A+ | ||
| 158 | ;; | ||
| 159 | ;; matches one or more occurrences of A. Concatenation has higher | ||
| 160 | ;; precedence than alternation; thus A+ | B+ is identical to (A+) | (B+). | ||
| 161 | ;; | ||
| 162 | ;; A* | ||
| 163 | ;; | ||
| 164 | ;; matches zero or more occurrences of A. Concatenation has higher | ||
| 165 | ;; precedence than alternation; thus A* | B* is identical to (A*) | (B*). | ||
| 166 | ;; | ||
| 167 | ;; Other notations used in the productions are: | ||
| 168 | ;; | ||
| 169 | ;; /* ... */ | ||
| 170 | ;; | ||
| 171 | ;; comment. | ||
| 172 | ;; | ||
| 173 | ;; [ wfc: ... ] | ||
| 174 | ;; | ||
| 175 | ;; well-formedness constraint; this identifies by name a constraint on | ||
| 176 | ;; well-formed documents associated with a production. | ||
| 177 | ;; | ||
| 178 | ;; [ vc: ... ] | ||
| 179 | ;; | ||
| 180 | ;; validity constraint; this identifies by name a constraint on valid | ||
| 181 | ;; documents associated with a production. | ||
| 182 | ;; | ||
| 183 | ;; | ||
| 184 | ;; Differences Between EBNFX And ebnf2ps EBNFX | ||
| 185 | ;; ------------------------------------------- | ||
| 186 | ;; | ||
| 187 | ;; Besides the characters that EBNFX accepts, ebnf2ps EBNFX accepts also the | ||
| 188 | ;; underscore (_) and minus (-) for rule name and european 8-bit accentuated | ||
| 189 | ;; characters (from \240 to \377) for rule name, string and comment. Also | ||
| 190 | ;; rule name can start with upper case letter. | ||
| 191 | ;; | ||
| 192 | ;; | ||
| 193 | ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | ||
| 194 | |||
| 195 | ;;; Code: | ||
| 196 | |||
| 197 | |||
| 198 | (require 'ebnf-otz) | ||
| 199 | |||
| 200 | |||
| 201 | (defvar ebnf-ebx-lex nil | ||
| 202 | "Value returned by `ebnf-ebx-lex' function.") | ||
| 203 | |||
| 204 | |||
| 205 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | ||
| 206 | ;; Syntactic analyzer | ||
| 207 | |||
| 208 | |||
| 209 | ;;; rulelist ::= rule+ | ||
| 210 | |||
| 211 | (defun ebnf-ebx-parser (start) | ||
| 212 | "EBNFX parser." | ||
| 213 | (let ((total (+ (- ebnf-limit start) 1)) | ||
| 214 | (bias (1- start)) | ||
| 215 | (origin (point)) | ||
| 216 | rule-list token rule) | ||
| 217 | (goto-char start) | ||
| 218 | (setq token (ebnf-ebx-lex)) | ||
| 219 | (and (eq token 'end-of-input) | ||
| 220 | (error "Invalid EBNFX file format")) | ||
| 221 | (and (eq token 'end-of-rule) | ||
| 222 | (setq token (ebnf-ebx-lex))) | ||
| 223 | (while (not (eq token 'end-of-input)) | ||
| 224 | (ebnf-message-float | ||
| 225 | "Parsing...%s%%" | ||
| 226 | (/ (* (- (point) bias) 100.0) total)) | ||
| 227 | (setq token (ebnf-ebx-rule token) | ||
| 228 | rule (cdr token) | ||
| 229 | token (car token)) | ||
| 230 | (or (ebnf-add-empty-rule-list rule) | ||
| 231 | (setq rule-list (cons rule rule-list)))) | ||
| 232 | (goto-char origin) | ||
| 233 | rule-list)) | ||
| 234 | |||
| 235 | |||
| 236 | ;;; rule ::= symbol '::=' expression | ||
| 237 | |||
| 238 | |||
| 239 | (defun ebnf-ebx-rule (token) | ||
| 240 | (let ((name ebnf-ebx-lex) | ||
| 241 | (action ebnf-action) | ||
| 242 | elements) | ||
| 243 | (setq ebnf-action nil) | ||
| 244 | (or (eq token 'non-terminal) | ||
| 245 | (error "Invalid rule name")) | ||
| 246 | (setq token (ebnf-ebx-lex)) | ||
| 247 | (or (eq token 'production) | ||
| 248 | (error "Invalid rule: missing `::='")) | ||
| 249 | (setq elements (ebnf-ebx-expression)) | ||
| 250 | (or (memq (car elements) '(end-of-rule end-of-input)) | ||
| 251 | (error "Invalid rule: there is no end of rule")) | ||
| 252 | (setq elements (cdr elements)) | ||
| 253 | (ebnf-eps-add-production name) | ||
| 254 | (cons (ebnf-ebx-lex) | ||
| 255 | (ebnf-make-production name elements action)))) | ||
| 256 | |||
| 257 | |||
| 258 | ;; expression ::= concatenation ('|' concatenation)* | ||
| 259 | |||
| 260 | |||
| 261 | (defun ebnf-ebx-expression () | ||
| 262 | (let (body concatenation) | ||
| 263 | (while (eq (car (setq concatenation | ||
| 264 | (ebnf-ebx-concatenation (ebnf-ebx-lex)))) | ||
| 265 | 'alternative) | ||
| 266 | (setq body (cons (cdr concatenation) body))) | ||
| 267 | (ebnf-token-alternative body concatenation))) | ||
| 268 | |||
| 269 | |||
| 270 | ;; concatenation ::= exception* | ||
| 271 | |||
| 272 | |||
| 273 | (defun ebnf-ebx-concatenation (token) | ||
| 274 | (let ((term (ebnf-ebx-exception token)) | ||
| 275 | seq) | ||
| 276 | (or (setq token (car term) | ||
| 277 | term (cdr term)) | ||
| 278 | (error "Empty element")) | ||
| 279 | (setq seq (cons term seq)) | ||
| 280 | (while (setq term (ebnf-ebx-exception token) | ||
| 281 | token (car term) | ||
| 282 | term (cdr term)) | ||
| 283 | (setq seq (cons term seq))) | ||
| 284 | (cons token | ||
| 285 | (if (= (length seq) 1) | ||
| 286 | ;; sequence with only one element | ||
| 287 | (car seq) | ||
| 288 | ;; a real sequence | ||
| 289 | (ebnf-make-sequence (nreverse seq)))))) | ||
| 290 | |||
| 291 | |||
| 292 | ;;; exception ::= term ('-' term)? | ||
| 293 | |||
| 294 | |||
| 295 | (defun ebnf-ebx-exception (token) | ||
| 296 | (let ((term (ebnf-ebx-term token))) | ||
| 297 | (if (eq (car term) 'exception) | ||
| 298 | (let ((except (ebnf-ebx-term (ebnf-ebx-lex)))) | ||
| 299 | (cons (car except) | ||
| 300 | (ebnf-make-except (cdr term) (cdr except)))) | ||
| 301 | term))) | ||
| 302 | |||
| 303 | |||
| 304 | |||
| 305 | ;;; term ::= factor ('*' | '+' | '?')? | ||
| 306 | |||
| 307 | |||
| 308 | (defun ebnf-ebx-term (token) | ||
| 309 | (let ((factor (ebnf-ebx-factor token))) | ||
| 310 | (when factor | ||
| 311 | (setq token (ebnf-ebx-lex)) | ||
| 312 | (cond ((eq token 'zero-or-more) | ||
| 313 | (setq factor (ebnf-make-zero-or-more factor) | ||
| 314 | token (ebnf-ebx-lex))) | ||
| 315 | ((eq token 'one-or-more) | ||
| 316 | (setq factor (ebnf-make-one-or-more factor) | ||
| 317 | token (ebnf-ebx-lex))) | ||
| 318 | ((eq token 'optional) | ||
| 319 | (setq factor (ebnf-token-optional factor) | ||
| 320 | token (ebnf-ebx-lex))))) | ||
| 321 | (cons token factor))) | ||
| 322 | |||
| 323 | |||
| 324 | ;;; factor ::= hex-char+ | ||
| 325 | ;;; | '[' '^'? ( char ( '-' char )? )+ ']' | ||
| 326 | ;;; | '"' 'string' '"' | ||
| 327 | ;;; | "'" "string" "'" | ||
| 328 | ;;; | '(' expression ')' | ||
| 329 | ;;; | symbol | ||
| 330 | ;;; | ||
| 331 | ;;; symbol ::= 'upper or lower case letter' | ||
| 332 | ;;; ('upper or lower case letter' | '-' | '_')* | ||
| 333 | ;;; /* upper and lower 8-bit accentuated characters are included */ | ||
| 334 | ;;; | ||
| 335 | ;;; hex-char ::= '#x' [0-9A-Fa-f]+ | ||
| 336 | ;;; | ||
| 337 | ;;; char ::= hex-char | 'any character except control characters' | ||
| 338 | ;;; /* 8-bit accentuated characters are included */ | ||
| 339 | ;;; | ||
| 340 | ;;; any-char ::= char | 'newline' | 'tab' | ||
| 341 | |||
| 342 | |||
| 343 | (defun ebnf-ebx-factor (token) | ||
| 344 | (cond | ||
| 345 | ;; terminal | ||
| 346 | ((eq token 'terminal) | ||
| 347 | (ebnf-make-terminal ebnf-ebx-lex)) | ||
| 348 | ;; non-terminal | ||
| 349 | ((eq token 'non-terminal) | ||
| 350 | (ebnf-make-non-terminal ebnf-ebx-lex)) | ||
| 351 | ;; group | ||
| 352 | ((eq token 'begin-group) | ||
| 353 | (let ((body (ebnf-ebx-expression))) | ||
| 354 | (or (eq (car body) 'end-group) | ||
| 355 | (error "Missing `)'")) | ||
| 356 | (cdr body))) | ||
| 357 | ;; no element | ||
| 358 | (t | ||
| 359 | nil) | ||
| 360 | )) | ||
| 361 | |||
| 362 | |||
| 363 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | ||
| 364 | ;; Lexical analyzer | ||
| 365 | |||
| 366 | |||
| 367 | (defconst ebnf-ebx-token-table (make-vector 256 'error) | ||
| 368 | "Vector used to map characters to a lexical token.") | ||
| 369 | |||
| 370 | |||
| 371 | (defun ebnf-ebx-initialize () | ||
| 372 | "Initialize EBNFX token table." | ||
| 373 | ;; control character & control 8-bit character are set to `error' | ||
| 374 | (let ((char ?\101)) | ||
| 375 | ;; printable character: A-Z | ||
| 376 | (while (< char ?\133) | ||
| 377 | (aset ebnf-ebx-token-table char 'non-terminal) | ||
| 378 | (setq char (1+ char))) | ||
| 379 | ;; printable character: a-z | ||
| 380 | (setq char ?\141) | ||
| 381 | (while (< char ?\173) | ||
| 382 | (aset ebnf-ebx-token-table char 'non-terminal) | ||
| 383 | (setq char (1+ char))) | ||
| 384 | ;; European 8-bit accentuated characters: | ||
| 385 | (setq char ?\240) | ||
| 386 | (while (< char ?\400) | ||
| 387 | (aset ebnf-ebx-token-table char 'non-terminal) | ||
| 388 | (setq char (1+ char))) | ||
| 389 | ;; Override end of line characters: | ||
| 390 | (aset ebnf-ebx-token-table ?\n 'end-of-rule) ; [NL] linefeed | ||
| 391 | (aset ebnf-ebx-token-table ?\r 'end-of-rule) ; [CR] carriage return | ||
| 392 | ;; Override space characters: | ||
| 393 | (aset ebnf-ebx-token-table ?\013 'space) ; [VT] vertical tab | ||
| 394 | (aset ebnf-ebx-token-table ?\t 'space) ; [HT] horizontal tab | ||
| 395 | (aset ebnf-ebx-token-table ?\ 'space) ; [SP] space | ||
| 396 | ;; Override form feed character: | ||
| 397 | (aset ebnf-ebx-token-table ?\f 'form-feed) ; [FF] form feed | ||
| 398 | ;; Override other lexical characters: | ||
| 399 | (aset ebnf-ebx-token-table ?# 'hash) | ||
| 400 | (aset ebnf-ebx-token-table ?\" 'double-quote) | ||
| 401 | (aset ebnf-ebx-token-table ?\' 'single-quote) | ||
| 402 | (aset ebnf-ebx-token-table ?\( 'begin-group) | ||
| 403 | (aset ebnf-ebx-token-table ?\) 'end-group) | ||
| 404 | (aset ebnf-ebx-token-table ?- 'exception) | ||
| 405 | (aset ebnf-ebx-token-table ?: 'colon) | ||
| 406 | (aset ebnf-ebx-token-table ?\[ 'begin-square) | ||
| 407 | (aset ebnf-ebx-token-table ?| 'alternative) | ||
| 408 | (aset ebnf-ebx-token-table ?* 'zero-or-more) | ||
| 409 | (aset ebnf-ebx-token-table ?+ 'one-or-more) | ||
| 410 | (aset ebnf-ebx-token-table ?\? 'optional) | ||
| 411 | ;; Override comment character: | ||
| 412 | (aset ebnf-ebx-token-table ?/ 'comment))) | ||
| 413 | |||
| 414 | |||
| 415 | ;; replace the range "\240-\377" (see `ebnf-range-regexp'). | ||
| 416 | (defconst ebnf-ebx-non-terminal-chars | ||
| 417 | (ebnf-range-regexp "-_A-Za-z" ?\240 ?\377)) | ||
| 418 | (defconst ebnf-ebx-non-terminal-letter-chars | ||
| 419 | (ebnf-range-regexp "A-Za-z" ?\240 ?\377)) | ||
| 420 | |||
| 421 | |||
| 422 | (defun ebnf-ebx-lex () | ||
| 423 | "Lexical analyser for EBNFX. | ||
| 424 | |||
| 425 | Return a lexical token. | ||
| 426 | |||
| 427 | See documentation for variable `ebnf-ebx-lex'." | ||
| 428 | (if (>= (point) ebnf-limit) | ||
| 429 | 'end-of-input | ||
| 430 | (let (token) | ||
| 431 | ;; skip spaces and comments | ||
| 432 | (while (if (> (following-char) 255) | ||
| 433 | (progn | ||
| 434 | (setq token 'error) | ||
| 435 | nil) | ||
| 436 | (setq token (aref ebnf-ebx-token-table (following-char))) | ||
| 437 | (cond | ||
| 438 | ((eq token 'space) | ||
| 439 | (skip-chars-forward " \013\t" ebnf-limit) | ||
| 440 | (< (point) ebnf-limit)) | ||
| 441 | ((eq token 'comment) | ||
| 442 | (ebnf-ebx-skip-comment)) | ||
| 443 | ((eq token 'form-feed) | ||
| 444 | (forward-char) | ||
| 445 | (setq ebnf-action 'form-feed)) | ||
| 446 | ((eq token 'end-of-rule) | ||
| 447 | (ebnf-ebx-skip-end-of-rule)) | ||
| 448 | ((and (eq token 'begin-square) | ||
| 449 | (let ((case-fold-search t)) | ||
| 450 | (looking-at "\\[\\(wfc\\|vc\\):"))) | ||
| 451 | (ebnf-ebx-skip-constraint)) | ||
| 452 | (t nil) | ||
| 453 | ))) | ||
| 454 | (cond | ||
| 455 | ;; end of input | ||
| 456 | ((>= (point) ebnf-limit) | ||
| 457 | 'end-of-input) | ||
| 458 | ;; error | ||
| 459 | ((eq token 'error) | ||
| 460 | (error "Illegal character")) | ||
| 461 | ;; end of rule | ||
| 462 | ((eq token 'end-of-rule) | ||
| 463 | 'end-of-rule) | ||
| 464 | ;; terminal: #x [0-9A-Fa-f]+ | ||
| 465 | ((eq token 'hash) | ||
| 466 | (setq ebnf-ebx-lex (ebnf-ebx-character)) | ||
| 467 | 'terminal) | ||
| 468 | ;; terminal: "string" | ||
| 469 | ((eq token 'double-quote) | ||
| 470 | (setq ebnf-ebx-lex (ebnf-ebx-string ?\")) | ||
| 471 | 'terminal) | ||
| 472 | ;; terminal: 'string' | ||
| 473 | ((eq token 'single-quote) | ||
| 474 | (setq ebnf-ebx-lex (ebnf-ebx-string ?\')) | ||
| 475 | 'terminal) | ||
| 476 | ;; terminal: [ ^? ( char ( - char )? )+ ] | ||
| 477 | ((eq token 'begin-square) | ||
| 478 | (setq ebnf-ebx-lex (ebnf-ebx-range)) | ||
| 479 | 'terminal) | ||
| 480 | ;; non-terminal: NAME | ||
| 481 | ((eq token 'non-terminal) | ||
| 482 | (setq ebnf-ebx-lex | ||
| 483 | (ebnf-buffer-substring ebnf-ebx-non-terminal-chars)) | ||
| 484 | 'non-terminal) | ||
| 485 | ;; colon: ::= | ||
| 486 | ((eq token 'colon) | ||
| 487 | (or (looking-at "::=") | ||
| 488 | (error "Missing `::=' token")) | ||
| 489 | (forward-char 3) | ||
| 490 | 'production) | ||
| 491 | ;; miscellaneous: (, ), *, +, ?, |, - | ||
| 492 | (t | ||
| 493 | (forward-char) | ||
| 494 | token) | ||
| 495 | )))) | ||
| 496 | |||
| 497 | |||
| 498 | ;; replace the range "\177-\237" (see `ebnf-range-regexp'). | ||
| 499 | (defconst ebnf-ebx-constraint-chars | ||
| 500 | (ebnf-range-regexp "^\000-\010\016-\037]" ?\177 ?\237)) | ||
| 501 | |||
| 502 | |||
| 503 | (defun ebnf-ebx-skip-constraint () | ||
| 504 | (or (> (skip-chars-forward ebnf-ebx-constraint-chars ebnf-limit) 0) | ||
| 505 | (error "Invalid character")) | ||
| 506 | (or (= (following-char) ?\]) | ||
| 507 | (error "Missing end of constraint `]'")) | ||
| 508 | (forward-char) | ||
| 509 | t) | ||
| 510 | |||
| 511 | |||
| 512 | |||
| 513 | (defun ebnf-ebx-skip-end-of-rule () | ||
| 514 | (let (eor-p) | ||
| 515 | (while (progn | ||
| 516 | ;; end of rule ==> 2 or more consecutive end of lines | ||
| 517 | (setq eor-p (or (> (skip-chars-forward "\r\n" ebnf-limit) 1) | ||
| 518 | eor-p)) | ||
| 519 | ;; skip spaces | ||
| 520 | (skip-chars-forward " \013\t" ebnf-limit) | ||
| 521 | ;; skip comments | ||
| 522 | (and (= (following-char) ?/) | ||
| 523 | (ebnf-ebx-skip-comment)))) | ||
| 524 | (not eor-p))) | ||
| 525 | |||
| 526 | |||
| 527 | ;; replace the range "\177-\237" (see `ebnf-range-regexp'). | ||
| 528 | (defconst ebnf-ebx-comment-chars | ||
| 529 | (ebnf-range-regexp "^\000-\010\016-\037\\*" ?\177 ?\237)) | ||
| 530 | (defconst ebnf-ebx-filename-chars | ||
| 531 | (ebnf-range-regexp "^\000-\037\\*" ?\177 ?\237)) | ||
| 532 | |||
| 533 | |||
| 534 | (defun ebnf-ebx-skip-comment () | ||
| 535 | (forward-char) | ||
| 536 | (or (= (following-char) ?*) | ||
| 537 | (error "Invalid beginning of comment")) | ||
| 538 | (forward-char) | ||
| 539 | (cond | ||
| 540 | ;; open EPS file | ||
| 541 | ((and ebnf-eps-executing (= (following-char) ?\[)) | ||
| 542 | (ebnf-eps-add-context (ebnf-ebx-eps-filename))) | ||
| 543 | ;; close EPS file | ||
| 544 | ((and ebnf-eps-executing (= (following-char) ?\])) | ||
| 545 | (ebnf-eps-remove-context (ebnf-ebx-eps-filename))) | ||
| 546 | ;; any other action in comment | ||
| 547 | (t | ||
| 548 | (setq ebnf-action (aref ebnf-comment-table (following-char)))) | ||
| 549 | ) | ||
| 550 | (while (progn | ||
| 551 | (skip-chars-forward ebnf-ebx-comment-chars ebnf-limit) | ||
| 552 | (or (= (following-char) ?*) | ||
| 553 | (error "Missing end of comment")) | ||
| 554 | (forward-char) | ||
| 555 | (and (/= (following-char) ?/) | ||
| 556 | (< (point) ebnf-limit)))) | ||
| 557 | ;; check for a valid end of comment | ||
| 558 | (and (>= (point) ebnf-limit) | ||
| 559 | (error "Missing end of comment")) | ||
| 560 | (forward-char) | ||
| 561 | t) | ||
| 562 | |||
| 563 | |||
| 564 | (defun ebnf-ebx-eps-filename () | ||
| 565 | (forward-char) | ||
| 566 | (let (fname nchar) | ||
| 567 | (while (progn | ||
| 568 | (setq fname | ||
| 569 | (concat fname | ||
| 570 | (ebnf-buffer-substring ebnf-ebx-filename-chars))) | ||
| 571 | (and (< (point) ebnf-limit) | ||
| 572 | (> (setq nchar (skip-chars-forward "*" ebnf-limit)) 0) | ||
| 573 | (< (point) ebnf-limit) | ||
| 574 | (/= (following-char) ?/))) | ||
| 575 | (setq fname (concat fname (make-string nchar ?*)) | ||
| 576 | nchar nil)) | ||
| 577 | (if (or (not nchar) (= nchar 0)) | ||
| 578 | fname | ||
| 579 | (and (< (point) ebnf-limit) | ||
| 580 | (= (following-char) ?/) | ||
| 581 | (setq nchar (1- nchar))) | ||
| 582 | (concat fname (make-string nchar ?*))))) | ||
| 583 | |||
| 584 | |||
| 585 | ;; replace the range "\240-\377" (see `ebnf-range-regexp'). | ||
| 586 | (defconst ebnf-ebx-double-string-chars | ||
| 587 | (ebnf-range-regexp "\t -!#-~" ?\240 ?\377)) | ||
| 588 | (defconst ebnf-ebx-single-string-chars | ||
| 589 | (ebnf-range-regexp "\t -&(-~" ?\240 ?\377)) | ||
| 590 | |||
| 591 | |||
| 592 | (defun ebnf-ebx-string (delim) | ||
| 593 | (buffer-substring-no-properties | ||
| 594 | (progn | ||
| 595 | (forward-char) | ||
| 596 | (point)) | ||
| 597 | (progn | ||
| 598 | (skip-chars-forward (if (= delim ?\") | ||
| 599 | ebnf-ebx-double-string-chars | ||
| 600 | ebnf-ebx-single-string-chars) | ||
| 601 | ebnf-limit) | ||
| 602 | (or (= (following-char) delim) | ||
| 603 | (error "Missing string delimiter `%c'" delim)) | ||
| 604 | (prog1 | ||
| 605 | (point) | ||
| 606 | (forward-char))))) | ||
| 607 | |||
| 608 | |||
| 609 | (defun ebnf-ebx-character () | ||
| 610 | ;; #x [0-9A-Fa-f]+ | ||
| 611 | (buffer-substring-no-properties | ||
| 612 | (point) | ||
| 613 | (progn | ||
| 614 | (ebnf-ebx-hex-character) | ||
| 615 | (point)))) | ||
| 616 | |||
| 617 | |||
| 618 | (defun ebnf-ebx-range () | ||
| 619 | ;; [ ^? ( char ( - char )? )+ ] | ||
| 620 | (buffer-substring-no-properties | ||
| 621 | (point) | ||
| 622 | (progn | ||
| 623 | (forward-char) | ||
| 624 | (and (= (following-char) ?^) | ||
| 625 | (forward-char)) | ||
| 626 | (and (= (following-char) ?-) | ||
| 627 | (forward-char)) | ||
| 628 | (while (progn | ||
| 629 | (ebnf-ebx-any-character) | ||
| 630 | (when (= (following-char) ?-) | ||
| 631 | (forward-char) | ||
| 632 | (ebnf-ebx-any-character)) | ||
| 633 | (and (/= (following-char) ?\]) | ||
| 634 | (< (point) ebnf-limit)))) | ||
| 635 | (and (>= (point) ebnf-limit) | ||
| 636 | (error "Missing end of character range `]'")) | ||
| 637 | (forward-char) | ||
| 638 | (point)))) | ||
| 639 | |||
| 640 | |||
| 641 | (defun ebnf-ebx-any-character () | ||
| 642 | (let ((char (following-char))) | ||
| 643 | (cond ((= char ?#) | ||
| 644 | (ebnf-ebx-hex-character t)) | ||
| 645 | ((or (and (<= ?\ char) (<= char ?\")) ; # | ||
| 646 | (and (<= ?$ char) (<= char ?,)) ; - | ||
| 647 | (and (<= ?. char) (<= char ?\\)) ; ] | ||
| 648 | (and (<= ?^ char) (<= char ?~)) | ||
| 649 | (and (<= ?\240 char) (<= char ?\377))) | ||
| 650 | (forward-char)) | ||
| 651 | (t | ||
| 652 | (error "Invalid character `%c'" char))))) | ||
| 653 | |||
| 654 | |||
| 655 | (defun ebnf-ebx-hex-character (&optional no-error) | ||
| 656 | ;; #x [0-9A-Fa-f]+ | ||
| 657 | (forward-char) | ||
| 658 | (if (/= (following-char) ?x) | ||
| 659 | (or no-error | ||
| 660 | (error "Invalid hexadecimal character")) | ||
| 661 | (forward-char) | ||
| 662 | (or (> (skip-chars-forward "0-9A-Fa-f" ebnf-limit) 0) | ||
| 663 | (error "Invalid hexadecimal character")))) | ||
| 664 | |||
| 665 | |||
| 666 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | ||
| 667 | |||
| 668 | |||
| 669 | (provide 'ebnf-ebx) | ||
| 670 | |||
| 671 | |||
| 672 | ;;; arch-tag: | ||
| 673 | ;;; ebnf-ebx.el ends here | ||