diff options
| -rw-r--r-- | lisp/cedet/semantic/wisent.el | 346 |
1 files changed, 346 insertions, 0 deletions
diff --git a/lisp/cedet/semantic/wisent.el b/lisp/cedet/semantic/wisent.el new file mode 100644 index 00000000000..35671aa7ada --- /dev/null +++ b/lisp/cedet/semantic/wisent.el | |||
| @@ -0,0 +1,346 @@ | |||
| 1 | ;;; semantic/wisent.el --- Wisent - Semantic gateway | ||
| 2 | |||
| 3 | ;;; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007 | ||
| 4 | ;;; Free Software Foundation, Inc. | ||
| 5 | |||
| 6 | ;; Author: David Ponce <david@dponce.com> | ||
| 7 | ;; Maintainer: David Ponce <david@dponce.com> | ||
| 8 | ;; Created: 30 Aug 2001 | ||
| 9 | ;; Keywords: syntax | ||
| 10 | |||
| 11 | ;; This file is part of GNU Emacs. | ||
| 12 | |||
| 13 | ;; GNU Emacs is free software: you can redistribute it and/or modify | ||
| 14 | ;; it under the terms of the GNU General Public License as published by | ||
| 15 | ;; the Free Software Foundation, either version 3 of the License, or | ||
| 16 | ;; (at your option) any later version. | ||
| 17 | |||
| 18 | ;; GNU Emacs is distributed in the hope that it will be useful, | ||
| 19 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 20 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 21 | ;; GNU General Public License for more details. | ||
| 22 | |||
| 23 | ;; You should have received a copy of the GNU General Public License | ||
| 24 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. | ||
| 25 | |||
| 26 | ;;; Commentary: | ||
| 27 | ;; | ||
| 28 | ;; Here are functions necessary to use the Wisent LALR parser from | ||
| 29 | ;; Semantic environment. | ||
| 30 | |||
| 31 | ;;; History: | ||
| 32 | ;; | ||
| 33 | |||
| 34 | ;;; Code: | ||
| 35 | |||
| 36 | (require 'semantic) | ||
| 37 | (require 'semantic/wisent/wisent) | ||
| 38 | |||
| 39 | ;;; Lexical analysis | ||
| 40 | ;; | ||
| 41 | (defvar wisent-lex-istream nil | ||
| 42 | "Input stream of `semantic-lex' syntactic tokens.") | ||
| 43 | |||
| 44 | (defvar wisent-lex-lookahead nil | ||
| 45 | "Extra lookahead token. | ||
| 46 | When non-nil it is directly returned by `wisent-lex-function'.") | ||
| 47 | |||
| 48 | ;; Maintain this alias for compatibility until all WY grammars have | ||
| 49 | ;; been translated again to Elisp code. | ||
| 50 | (semantic-alias-obsolete 'wisent-lex-make-token-table | ||
| 51 | 'semantic-lex-make-type-table) | ||
| 52 | |||
| 53 | (defmacro wisent-lex-eoi () | ||
| 54 | "Return an End-Of-Input lexical token. | ||
| 55 | The EOI token is like this: ($EOI "" POINT-MAX . POINT-MAX)." | ||
| 56 | `(cons ',wisent-eoi-term | ||
| 57 | (cons "" | ||
| 58 | (cons (point-max) (point-max))))) | ||
| 59 | |||
| 60 | (defmacro define-wisent-lexer (name doc &rest body) | ||
| 61 | "Create a new lexical analyzer with NAME. | ||
| 62 | DOC is a documentation string describing this analyzer. | ||
| 63 | When a token is available in `wisent-lex-istream', eval BODY forms | ||
| 64 | sequentially. BODY must return a lexical token for the LALR parser. | ||
| 65 | |||
| 66 | Each token in input was produced by `semantic-lex', it is a list: | ||
| 67 | |||
| 68 | (TOKSYM START . END) | ||
| 69 | |||
| 70 | TOKSYM is a terminal symbol used in the grammar. | ||
| 71 | START and END mark boundary in the current buffer of that token's | ||
| 72 | value. | ||
| 73 | |||
| 74 | Returned tokens must have the form: | ||
| 75 | |||
| 76 | (TOKSYM VALUE START . END) | ||
| 77 | |||
| 78 | where VALUE is the buffer substring between START and END positions." | ||
| 79 | `(defun | ||
| 80 | ,name () ,doc | ||
| 81 | (cond | ||
| 82 | (wisent-lex-lookahead | ||
| 83 | (prog1 wisent-lex-lookahead | ||
| 84 | (setq wisent-lex-lookahead nil))) | ||
| 85 | (wisent-lex-istream | ||
| 86 | ,@body) | ||
| 87 | ((wisent-lex-eoi))))) | ||
| 88 | |||
| 89 | (define-wisent-lexer wisent-lex | ||
| 90 | "Return the next available lexical token in Wisent's form. | ||
| 91 | The variable `wisent-lex-istream' contains the list of lexical tokens | ||
| 92 | produced by `semantic-lex'. Pop the next token available and convert | ||
| 93 | it to a form suitable for the Wisent's parser." | ||
| 94 | (let* ((tk (car wisent-lex-istream))) | ||
| 95 | ;; Eat input stream | ||
| 96 | (setq wisent-lex-istream (cdr wisent-lex-istream)) | ||
| 97 | (cons (semantic-lex-token-class tk) | ||
| 98 | (cons (semantic-lex-token-text tk) | ||
| 99 | (semantic-lex-token-bounds tk))))) | ||
| 100 | |||
| 101 | ;;; Syntax analysis | ||
| 102 | ;; | ||
| 103 | (defvar wisent-error-function nil | ||
| 104 | "Function used to report parse error. | ||
| 105 | By default use the function `wisent-message'.") | ||
| 106 | (make-variable-buffer-local 'wisent-error-function) | ||
| 107 | |||
| 108 | (defvar wisent-lexer-function 'wisent-lex | ||
| 109 | "Function used to obtain the next lexical token in input. | ||
| 110 | Should be a lexical analyzer created with `define-wisent-lexer'.") | ||
| 111 | (make-variable-buffer-local 'wisent-lexer-function) | ||
| 112 | |||
| 113 | ;; Tag production | ||
| 114 | ;; | ||
| 115 | (defsubst wisent-raw-tag (semantic-tag) | ||
| 116 | "Return raw form of given Semantic tag SEMANTIC-TAG. | ||
| 117 | Should be used in semantic actions, in grammars, to build a Semantic | ||
| 118 | parse tree." | ||
| 119 | (nconc semantic-tag | ||
| 120 | (if (or $region | ||
| 121 | (setq $region (nthcdr 2 wisent-input))) | ||
| 122 | (list (car $region) (cdr $region)) | ||
| 123 | (list (point-max) (point-max))))) | ||
| 124 | |||
| 125 | (defsubst wisent-cook-tag (raw-tag) | ||
| 126 | "From raw form of Semantic tag RAW-TAG, return a list of cooked tags. | ||
| 127 | Should be used in semantic actions, in grammars, to build a Semantic | ||
| 128 | parse tree." | ||
| 129 | (let* ((cooked (semantic--tag-expand raw-tag)) | ||
| 130 | (l cooked)) | ||
| 131 | (while l | ||
| 132 | (semantic--tag-put-property (car l) 'reparse-symbol $nterm) | ||
| 133 | (setq l (cdr l))) | ||
| 134 | cooked)) | ||
| 135 | |||
| 136 | ;; Unmatched syntax collector | ||
| 137 | ;; | ||
| 138 | (defun wisent-collect-unmatched-syntax (nomatch) | ||
| 139 | "Add lexical token NOMATCH to the cache of unmatched tokens. | ||
| 140 | See also the variable `semantic-unmatched-syntax-cache'. | ||
| 141 | |||
| 142 | NOMATCH is in Wisent's form: (SYMBOL VALUE START . END) | ||
| 143 | and will be collected in `semantic-lex' form: (SYMBOL START . END)." | ||
| 144 | (let ((region (cddr nomatch))) | ||
| 145 | (and (number-or-marker-p (car region)) | ||
| 146 | (number-or-marker-p (cdr region)) | ||
| 147 | (setq semantic-unmatched-syntax-cache | ||
| 148 | (cons (cons (car nomatch) region) | ||
| 149 | semantic-unmatched-syntax-cache))))) | ||
| 150 | |||
| 151 | ;; Parser plug-ins | ||
| 152 | ;; | ||
| 153 | ;; The following functions permit to plug the Wisent LALR parser in | ||
| 154 | ;; Semantic toolkit. They use the standard API provided by Semantic | ||
| 155 | ;; to plug parsers in. | ||
| 156 | ;; | ||
| 157 | ;; Two plug-ins are available, BUT ONLY ONE MUST BE USED AT A TIME: | ||
| 158 | ;; | ||
| 159 | ;; - `wisent-parse-stream' designed to override the standard function | ||
| 160 | ;; `semantic-parse-stream'. | ||
| 161 | ;; | ||
| 162 | ;; - `wisent-parse-region' designed to override the standard function | ||
| 163 | ;; `semantic-parse-region'. | ||
| 164 | ;; | ||
| 165 | ;; Maybe the latter is faster because it eliminates a lot of function | ||
| 166 | ;; call. | ||
| 167 | ;; | ||
| 168 | (defun wisent-parse-stream (stream goal) | ||
| 169 | "Parse STREAM using the Wisent LALR parser. | ||
| 170 | GOAL is a nonterminal symbol to start parsing at. | ||
| 171 | Return the list (STREAM SEMANTIC-STREAM) where STREAM are those | ||
| 172 | elements of STREAM that have not been used. SEMANTIC-STREAM is the | ||
| 173 | list of semantic tags found. | ||
| 174 | The LALR parser automaton must be available in buffer local variable | ||
| 175 | `semantic--parse-table'. | ||
| 176 | |||
| 177 | Must be installed by `semantic-install-function-overrides' to override | ||
| 178 | the standard function `semantic-parse-stream'." | ||
| 179 | (let (wisent-lex-istream wisent-lex-lookahead la-elt cache) | ||
| 180 | |||
| 181 | ;; IMPLEMENTATION NOTES: | ||
| 182 | ;; `wisent-parse' returns a lookahead token when it stopped | ||
| 183 | ;; parsing before encountering the end of input. To re-enter the | ||
| 184 | ;; parser it is necessary to push back in the lexical input stream | ||
| 185 | ;; the last lookahead token issued. Because the format of | ||
| 186 | ;; lookahead tokens and tokens in STREAM can be different the | ||
| 187 | ;; lookahead token is put in the variable `wisent-lex-lookahead' | ||
| 188 | ;; before calling `wisent-parse'. Wisent's lexers always pop the | ||
| 189 | ;; next lexical token from that variable when non nil, then from | ||
| 190 | ;; the lexical input stream. | ||
| 191 | ;; | ||
| 192 | ;; The first element of STREAM is used to keep lookahead tokens | ||
| 193 | ;; across successive calls to `wisent-parse-stream'. In fact | ||
| 194 | ;; what is kept is a stack of lookaheads encountered so far. It | ||
| 195 | ;; is cleared when `wisent-parse' returns a valid semantic tag, | ||
| 196 | ;; or twice the same lookahead token! The latter indicates that | ||
| 197 | ;; there is a syntax error on that token. If so, tokens currently | ||
| 198 | ;; in the lookahead stack have not been used, and are moved into | ||
| 199 | ;; `semantic-unmatched-syntax-cache'. When the parser will be | ||
| 200 | ;; re-entered, a new lexical token will be read from STREAM. | ||
| 201 | ;; | ||
| 202 | ;; The first element of STREAM that contains the lookahead stack | ||
| 203 | ;; has this format (compatible with the format of `semantic-lex' | ||
| 204 | ;; tokens): | ||
| 205 | ;; | ||
| 206 | ;; (LOOKAHEAD-STACK START . END) | ||
| 207 | ;; | ||
| 208 | ;; where LOOKAHEAD-STACK is a list of lookahead tokens. And | ||
| 209 | ;; START/END are the bounds of the lookahead at top of stack. | ||
| 210 | |||
| 211 | ;; Retrieve lookahead token from stack | ||
| 212 | (setq la-elt (car stream)) | ||
| 213 | (if (consp (car la-elt)) | ||
| 214 | ;; The first elt of STREAM contains a lookahead stack | ||
| 215 | (setq wisent-lex-lookahead (caar la-elt) | ||
| 216 | stream (cdr stream)) | ||
| 217 | (setq la-elt nil)) | ||
| 218 | ;; Parse | ||
| 219 | (setq wisent-lex-istream stream | ||
| 220 | cache (semantic-safe "wisent-parse-stream: %s" | ||
| 221 | (condition-case error-to-filter | ||
| 222 | (wisent-parse semantic--parse-table | ||
| 223 | wisent-lexer-function | ||
| 224 | wisent-error-function | ||
| 225 | goal) | ||
| 226 | (args-out-of-range | ||
| 227 | (if (and (not debug-on-error) | ||
| 228 | (= wisent-parse-max-stack-size | ||
| 229 | (nth 2 error-to-filter))) | ||
| 230 | (progn | ||
| 231 | (message "wisent-parse-stream: %s" | ||
| 232 | (error-message-string error-to-filter)) | ||
| 233 | (message "wisent-parse-max-stack-size \ | ||
| 234 | might need to be increased")) | ||
| 235 | (apply 'signal error-to-filter)))))) | ||
| 236 | ;; Manage returned lookahead token | ||
| 237 | (if wisent-lookahead | ||
| 238 | (if (eq (caar la-elt) wisent-lookahead) | ||
| 239 | ;; It is already at top of lookahead stack | ||
| 240 | (progn | ||
| 241 | (setq cache nil | ||
| 242 | la-elt (car la-elt)) | ||
| 243 | (while la-elt | ||
| 244 | ;; Collect unmatched tokens from the stack | ||
| 245 | (run-hook-with-args | ||
| 246 | 'wisent-discarding-token-functions (car la-elt)) | ||
| 247 | (setq la-elt (cdr la-elt)))) | ||
| 248 | ;; New lookahead token | ||
| 249 | (if (or (consp cache) ;; Clear the stack if parse succeeded | ||
| 250 | (null la-elt)) | ||
| 251 | (setq la-elt (cons nil nil))) | ||
| 252 | ;; Push it into the stack | ||
| 253 | (setcar la-elt (cons wisent-lookahead (car la-elt))) | ||
| 254 | ;; Update START/END | ||
| 255 | (setcdr la-elt (cddr wisent-lookahead)) | ||
| 256 | ;; Push (LOOKAHEAD-STACK START . END) in STREAM | ||
| 257 | (setq wisent-lex-istream (cons la-elt wisent-lex-istream)))) | ||
| 258 | ;; Return (STREAM SEMANTIC-STREAM) | ||
| 259 | (list wisent-lex-istream | ||
| 260 | (if (consp cache) cache '(nil)) | ||
| 261 | ))) | ||
| 262 | |||
| 263 | (defun wisent-parse-region (start end &optional goal depth returnonerror) | ||
| 264 | "Parse the area between START and END using the Wisent LALR parser. | ||
| 265 | Return the list of semantic tags found. | ||
| 266 | Optional arguments GOAL is a nonterminal symbol to start parsing at, | ||
| 267 | DEPTH is the lexical depth to scan, and RETURNONERROR is a flag to | ||
| 268 | stop parsing on syntax error, when non-nil. | ||
| 269 | The LALR parser automaton must be available in buffer local variable | ||
| 270 | `semantic--parse-table'. | ||
| 271 | |||
| 272 | Must be installed by `semantic-install-function-overrides' to override | ||
| 273 | the standard function `semantic-parse-region'." | ||
| 274 | (if (or (< start (point-min)) (> end (point-max)) (< end start)) | ||
| 275 | (error "Invalid bounds [%s %s] passed to `wisent-parse-region'" | ||
| 276 | start end)) | ||
| 277 | (let* ((case-fold-search semantic-case-fold) | ||
| 278 | (wisent-lex-istream (semantic-lex start end depth)) | ||
| 279 | ptree tag cooked lstack wisent-lex-lookahead) | ||
| 280 | ;; Loop while there are lexical tokens available | ||
| 281 | (while wisent-lex-istream | ||
| 282 | ;; Parse | ||
| 283 | (setq wisent-lex-lookahead (car lstack) | ||
| 284 | tag (semantic-safe "wisent-parse-region: %s" | ||
| 285 | (wisent-parse semantic--parse-table | ||
| 286 | wisent-lexer-function | ||
| 287 | wisent-error-function | ||
| 288 | goal))) | ||
| 289 | ;; Manage returned lookahead token | ||
| 290 | (if wisent-lookahead | ||
| 291 | (if (eq (car lstack) wisent-lookahead) | ||
| 292 | ;; It is already at top of lookahead stack | ||
| 293 | (progn | ||
| 294 | (setq tag nil) | ||
| 295 | (while lstack | ||
| 296 | ;; Collect unmatched tokens from lookahead stack | ||
| 297 | (run-hook-with-args | ||
| 298 | 'wisent-discarding-token-functions (car lstack)) | ||
| 299 | (setq lstack (cdr lstack)))) | ||
| 300 | ;; Push new lookahead token into the stack | ||
| 301 | (setq lstack (cons wisent-lookahead lstack)))) | ||
| 302 | ;; Manage the parser result | ||
| 303 | (cond | ||
| 304 | ;; Parse succeeded, cook result | ||
| 305 | ((consp tag) | ||
| 306 | (setq lstack nil ;; Clear the lookahead stack | ||
| 307 | cooked (semantic--tag-expand tag) | ||
| 308 | ptree (append cooked ptree)) | ||
| 309 | (while cooked | ||
| 310 | (setq tag (car cooked) | ||
| 311 | cooked (cdr cooked)) | ||
| 312 | (or (semantic--tag-get-property tag 'reparse-symbol) | ||
| 313 | (semantic--tag-put-property tag 'reparse-symbol goal))) | ||
| 314 | ) | ||
| 315 | ;; Return on error if requested | ||
| 316 | (returnonerror | ||
| 317 | (setq wisent-lex-istream nil) | ||
| 318 | )) | ||
| 319 | ;; Work in progress... | ||
| 320 | (if wisent-lex-istream | ||
| 321 | (and (eq semantic-working-type 'percent) | ||
| 322 | (boundp 'semantic--progress-reporter) | ||
| 323 | semantic--progress-reporter | ||
| 324 | (progress-reporter-update | ||
| 325 | semantic--progress-reporter | ||
| 326 | (/ (* 100 (semantic-lex-token-start | ||
| 327 | (car wisent-lex-istream))) | ||
| 328 | (point-max)))))) | ||
| 329 | ;; Return parse tree | ||
| 330 | (nreverse ptree))) | ||
| 331 | |||
| 332 | ;;; Interfacing with edebug | ||
| 333 | ;; | ||
| 334 | (add-hook | ||
| 335 | 'edebug-setup-hook | ||
| 336 | #'(lambda () | ||
| 337 | |||
| 338 | (def-edebug-spec define-wisent-lexer | ||
| 339 | (&define name stringp def-body) | ||
| 340 | ) | ||
| 341 | |||
| 342 | )) | ||
| 343 | |||
| 344 | (provide 'semantic/wisent) | ||
| 345 | |||
| 346 | ;;; semantic/wisent.el ends here | ||