diff options
| author | Stefan Monnier | 2000-05-22 04:29:52 +0000 |
|---|---|---|
| committer | Stefan Monnier | 2000-05-22 04:29:52 +0000 |
| commit | 40aeecadb8fc54c941eb36c6584654627c300c39 (patch) | |
| tree | 6d7351b72361f557263738ede14be73163765d20 | |
| parent | c9d80d38167190dcf2c1d049b723e7a70e0d7538 (diff) | |
| download | emacs-40aeecadb8fc54c941eb36c6584654627c300c39.tar.gz emacs-40aeecadb8fc54c941eb36c6584654627c300c39.zip | |
Rewritten to take advantage of shy-groups and
intervals which makes it heaps simpler.
| -rw-r--r-- | lisp/ChangeLog | 3 | ||||
| -rw-r--r-- | lisp/emacs-lisp/sregex.el | 628 |
2 files changed, 146 insertions, 485 deletions
diff --git a/lisp/ChangeLog b/lisp/ChangeLog index 737524baa43..04aa3be2b30 100644 --- a/lisp/ChangeLog +++ b/lisp/ChangeLog | |||
| @@ -1,5 +1,8 @@ | |||
| 1 | 2000-05-22 Stefan Monnier <monnier@cs.yale.edu> | 1 | 2000-05-22 Stefan Monnier <monnier@cs.yale.edu> |
| 2 | 2 | ||
| 3 | * emacs-lisp/sregex.el: Rewritten to take advantage of shy-groups and | ||
| 4 | intervals which makes it heaps simpler. | ||
| 5 | |||
| 3 | * newcomment.el (comment-region-internal): Go back to BEG after quoting | 6 | * newcomment.el (comment-region-internal): Go back to BEG after quoting |
| 4 | the nested comment markers. | 7 | the nested comment markers. |
| 5 | 8 | ||
diff --git a/lisp/emacs-lisp/sregex.el b/lisp/emacs-lisp/sregex.el index 09fc2313675..2c808eba5be 100644 --- a/lisp/emacs-lisp/sregex.el +++ b/lisp/emacs-lisp/sregex.el | |||
| @@ -1,6 +1,6 @@ | |||
| 1 | ;;; sregex.el --- symbolic regular expressions | 1 | ;;; sregex.el --- symbolic regular expressions |
| 2 | 2 | ||
| 3 | ;; Copyright (C) 1997, 1998 Free Software Foundation, Inc. | 3 | ;; Copyright (C) 1997, 1998, 2000 Free Software Foundation, Inc. |
| 4 | 4 | ||
| 5 | ;; Author: Bob Glickstein <bobg+sregex@zanshin.com> | 5 | ;; Author: Bob Glickstein <bobg+sregex@zanshin.com> |
| 6 | ;; Maintainer: Bob Glickstein <bobg+sregex@zanshin.com> | 6 | ;; Maintainer: Bob Glickstein <bobg+sregex@zanshin.com> |
| @@ -48,7 +48,7 @@ | |||
| 48 | ;; to overcome operator precedence; that also happens automatically. | 48 | ;; to overcome operator precedence; that also happens automatically. |
| 49 | ;; For example: | 49 | ;; For example: |
| 50 | 50 | ||
| 51 | ;; (sregexq (opt (or "Bob" "Robert"))) => "\\(Bob\\|Robert\\)?" | 51 | ;; (sregexq (opt (or "Bob" "Robert"))) => "\\(?:Bob\\|Robert\\)?" |
| 52 | 52 | ||
| 53 | ;; It *is* possible to group parts of the expression in order to refer | 53 | ;; It *is* possible to group parts of the expression in order to refer |
| 54 | ;; to them with numbered backreferences: | 54 | ;; to them with numbered backreferences: |
| @@ -57,14 +57,6 @@ | |||
| 57 | ;; ", Spot, " | 57 | ;; ", Spot, " |
| 58 | ;; (backref 1)) => "\\(Go\\|Run\\), Spot, \\1" | 58 | ;; (backref 1)) => "\\(Go\\|Run\\), Spot, \\1" |
| 59 | 59 | ||
| 60 | ;; If `sregexq' needs to introduce its own grouping parentheses, it | ||
| 61 | ;; will automatically renumber your backreferences: | ||
| 62 | |||
| 63 | ;; (sregexq (opt "resent-") | ||
| 64 | ;; (group (or "to" "cc" "bcc")) | ||
| 65 | ;; ": " | ||
| 66 | ;; (backref 1)) => "\\(resent-\\)?\\(to\\|cc\\|bcc\\): \\2" | ||
| 67 | |||
| 68 | ;; `sregexq' is a macro. Each time it is used, it constructs a simple | 60 | ;; `sregexq' is a macro. Each time it is used, it constructs a simple |
| 69 | ;; Lisp expression that then invokes a moderately complex engine to | 61 | ;; Lisp expression that then invokes a moderately complex engine to |
| 70 | ;; interpret the sregex and render the string form. Because of this, | 62 | ;; interpret the sregex and render the string form. Because of this, |
| @@ -99,47 +91,6 @@ | |||
| 99 | ;; (digits '(1+ (char (?0 . ?9))))) | 91 | ;; (digits '(1+ (char (?0 . ?9))))) |
| 100 | ;; (sregex 'bol dotstar ":" whitespace digits)) => "^.*:\\s-+[0-9]+" | 92 | ;; (sregex 'bol dotstar ":" whitespace digits)) => "^.*:\\s-+[0-9]+" |
| 101 | 93 | ||
| 102 | ;; This package also provides sregex-specific versions of the Emacs | ||
| 103 | ;; functions `replace-match', `match-string', | ||
| 104 | ;; `match-string-no-properties', `match-beginning', `match-end', and | ||
| 105 | ;; `match-data'. In each case, the sregex version's name begins with | ||
| 106 | ;; `sregex-' and takes one additional optional parameter, an sregex | ||
| 107 | ;; "info" object. Each of these functions is concerned with numbered | ||
| 108 | ;; submatches. Since sregex may renumber submatches, alternate | ||
| 109 | ;; versions of these functions are needed that know how to adjust the | ||
| 110 | ;; supplied number. | ||
| 111 | |||
| 112 | ;; The sregex info object for the most recently evaluated sregex can | ||
| 113 | ;; be obtained with `sregex-info'; so if you precompute your sregexes | ||
| 114 | ;; and you plan to use `replace-match' or one of the others with it, | ||
| 115 | ;; you need to record the info object for later use: | ||
| 116 | |||
| 117 | ;; (let* ((regex (sregexq (opt "resent-") | ||
| 118 | ;; (group (or "to" "cc" "bcc")) | ||
| 119 | ;; ":")) | ||
| 120 | ;; (regex-info (sregex-info))) | ||
| 121 | ;; ... | ||
| 122 | ;; (if (re-search-forward regex ...) | ||
| 123 | ;; (let ((which (sregex-match-string 1 nil regex-info))) | ||
| 124 | ;; ...))) | ||
| 125 | |||
| 126 | ;; In this example, `regex' is "\\(resent-\\)?\\(to\\|cc\\|bcc\\):", | ||
| 127 | ;; so the call to (sregex-match-string 1 ...) is automatically turned | ||
| 128 | ;; into a call to (match-string 2 ...). | ||
| 129 | |||
| 130 | ;; If the sregex info argument to `sregex-replace-match', | ||
| 131 | ;; `sregex-match-string', `sregex-match-string-no-properties', | ||
| 132 | ;; `sregex-match-beginning', `sregex-match-end', or | ||
| 133 | ;; `sregex-match-data' is omitted, the current value of (sregex-info) | ||
| 134 | ;; is used. | ||
| 135 | |||
| 136 | ;; You can do your own sregex submatch renumbering with | ||
| 137 | ;; `sregex-backref-num'. | ||
| 138 | |||
| 139 | ;; Finally, `sregex-save-match-data' is like `save-match-data' but | ||
| 140 | ;; also saves and restores the information maintained by | ||
| 141 | ;; `sregex-info'. | ||
| 142 | |||
| 143 | ;; To use this package in a Lisp program, simply (require 'sregex). | 94 | ;; To use this package in a Lisp program, simply (require 'sregex). |
| 144 | 95 | ||
| 145 | ;; Here are the clauses allowed in an `sregex' or `sregexq' | 96 | ;; Here are the clauses allowed in an `sregex' or `sregexq' |
| @@ -165,23 +116,21 @@ | |||
| 165 | 116 | ||
| 166 | ;; - (sequence CLAUSE ...) | 117 | ;; - (sequence CLAUSE ...) |
| 167 | 118 | ||
| 168 | ;; Groups the given CLAUSEs; may or may not use "\\(" and "\\)". | 119 | ;; Groups the given CLAUSEs; may or may not use "\\(?:" and "\\)". |
| 169 | ;; Clauses groups by `sequence' do not count for purposes of | 120 | ;; Clauses grouped by `sequence' do not count for purposes of |
| 170 | ;; numbering backreferences. Use `sequence' in situations like | 121 | ;; numbering backreferences. Use `sequence' in situations like |
| 171 | ;; this: | 122 | ;; this: |
| 172 | 123 | ||
| 173 | ;; (sregexq (or "dog" "cat" | 124 | ;; (sregexq (or "dog" "cat" |
| 174 | ;; (sequence (opt "sea ") "monkey"))) | 125 | ;; (sequence (opt "sea ") "monkey"))) |
| 175 | ;; => "dog\\|cat\\|\\(sea \\)?monkey" | 126 | ;; => "dog\\|cat\\|\\(?:sea \\)?monkey" |
| 176 | 127 | ||
| 177 | ;; where a single `or' alternate needs to contain multiple | 128 | ;; where a single `or' alternate needs to contain multiple |
| 178 | ;; subclauses. | 129 | ;; subclauses. |
| 179 | 130 | ||
| 180 | ;; - (backref N) | 131 | ;; - (backref N) |
| 181 | ;; Matches the same string previously matched by the Nth "group" in | 132 | ;; Matches the same string previously matched by the Nth "group" in |
| 182 | ;; the same sregex. N is a positive integer. In the resulting | 133 | ;; the same sregex. N is a positive integer. |
| 183 | ;; regex, N may be adjusted to account for automatically introduced | ||
| 184 | ;; groups. | ||
| 185 | 134 | ||
| 186 | ;; - (or CLAUSE ...) | 135 | ;; - (or CLAUSE ...) |
| 187 | ;; Matches any one of the CLAUSEs by separating them with "\\|". | 136 | ;; Matches any one of the CLAUSEs by separating them with "\\|". |
| @@ -276,158 +225,37 @@ | |||
| 276 | 225 | ||
| 277 | ;;; To do: | 226 | ;;; To do: |
| 278 | 227 | ||
| 279 | ;; Make (sregexq (or "a" (sequence "b" "c"))) return "a\\|bc" instead | ||
| 280 | ;; of "a\\|\\(bc\\)" | ||
| 281 | |||
| 282 | ;; An earlier version of this package could optionally translate the | 228 | ;; An earlier version of this package could optionally translate the |
| 283 | ;; symbolic regex into other languages' syntaxes, e.g. Perl. For | 229 | ;; symbolic regex into other languages' syntaxes, e.g. Perl. For |
| 284 | ;; instance, with Perl syntax selected, (sregexq (or "ab" "cd")) would | 230 | ;; instance, with Perl syntax selected, (sregexq (or "ab" "cd")) would |
| 285 | ;; yield "ab|cd" instead of "ab\\|cd". It might be useful to restore | 231 | ;; yield "ab|cd" instead of "ab\\|cd". It might be useful to restore |
| 286 | ;; such a facility. | 232 | ;; such a facility. |
| 287 | 233 | ||
| 288 | ;;; Bugs: | 234 | ;; - handle multibyte chars in sregex--char-aux |
| 235 | ;; - add support for character classes ([:blank:], ...) | ||
| 236 | ;; - add support for non-greedy operators *? and +? | ||
| 237 | ;; - bug: (sregexq (opt (opt ?a))) returns "a??" which is a non-greedy "a?" | ||
| 289 | 238 | ||
| 290 | ;; The (regex REGEX) form can confuse the code that distinguishes | 239 | ;;; Bugs: |
| 291 | ;; introduced groups from user-specified groups. Try to avoid using | ||
| 292 | ;; grouping within a `regex' form. Failing that, try to avoid using | ||
| 293 | ;; backrefs if you're using `regex'. | ||
| 294 | 240 | ||
| 295 | ;;; Code: | 241 | ;;; Code: |
| 296 | 242 | ||
| 297 | (defsubst sregex--value-unitp (val) (nth 0 val)) | 243 | (eval-when-compile (require 'cl)) |
| 298 | (defsubst sregex--value-groups (val) (nth 1 val)) | ||
| 299 | (defsubst sregex--value-tree (val) (nth 2 val)) | ||
| 300 | |||
| 301 | (defun sregex--make-value (unitp groups tree) | ||
| 302 | (list unitp groups tree)) | ||
| 303 | |||
| 304 | (defvar sregex--current-sregex nil | ||
| 305 | "Global state for `sregex-info'.") | ||
| 306 | |||
| 307 | (defun sregex-info () | ||
| 308 | "Return extra information about the latest call to `sregex'. | ||
| 309 | This extra information is needed in order to adjust user-requested | ||
| 310 | backreference numbers to numbers suitable for the generated regexp. | ||
| 311 | See e.g. `sregex-match-string' and `sregex-backref-num'." | ||
| 312 | sregex--current-sregex) | ||
| 313 | |||
| 314 | ; (require 'advice) | ||
| 315 | ; (defadvice save-match-data (around sregex-save-match-data protect) | ||
| 316 | ; (let ((sregex--saved-sregex sregex--current-sregex)) | ||
| 317 | ; (unwind-protect | ||
| 318 | ; ad-do-it | ||
| 319 | ; (setq sregex--current-sregex sregex--saved-sregex)))) | ||
| 320 | (defmacro sregex-save-match-data (&rest forms) | ||
| 321 | "Like `save-match-data', but also saves and restores `sregex-info' data." | ||
| 322 | `(let ((sregex--saved-sregex sregex--current-sregex)) | ||
| 323 | (unwind-protect | ||
| 324 | (save-match-data ,@forms) | ||
| 325 | (setq sregex--current-sregex sregex--saved-sregex)))) | ||
| 326 | |||
| 327 | (defun sregex-replace-match (replacement | ||
| 328 | &optional fixedcase literal string subexp sregex) | ||
| 329 | "Like `replace-match', for a regexp made with `sregex'. | ||
| 330 | This takes one additional optional argument, the `sregex' info, which | ||
| 331 | can be obtained with `sregex-info'. The SUBEXP argument is adjusted | ||
| 332 | to allow for \"introduced groups\". If the extra argument is omitted | ||
| 333 | or nil, it defaults to the current value of (sregex-info)." | ||
| 334 | (replace-match replacement fixedcase literal string | ||
| 335 | (and subexp | ||
| 336 | (sregex-backref-num subexp sregex)))) | ||
| 337 | |||
| 338 | (defun sregex-match-string (count &optional in-string sregex) | ||
| 339 | "Like `match-string', for a regexp made with `sregex'. | ||
| 340 | This takes one additional optional argument, the `sregex' info, which | ||
| 341 | can be obtained with `sregex-info'. The COUNT argument is adjusted to | ||
| 342 | allow for \"introduced groups\". If the extra argument is omitted or | ||
| 343 | nil, it defaults to the current value of (sregex-info)." | ||
| 344 | (match-string (and count | ||
| 345 | (sregex-backref-num count sregex)) | ||
| 346 | in-string)) | ||
| 347 | 244 | ||
| 245 | ;; Compatibility code for when we didn't have shy-groups | ||
| 246 | (defvar sregex--current-sregex nil) | ||
| 247 | (defun sregex-info () nil) | ||
| 248 | (defmacro sregex-save-match-data (&rest forms) (cons 'save-match-data forms)) | ||
| 249 | (defun sregex-replace-match (r &optional f l str subexp x) | ||
| 250 | (replace-match r f l str subexp)) | ||
| 251 | (defun sregex-match-string (c &optional i x) (match-string c i)) | ||
| 348 | (defun sregex-match-string-no-properties (count &optional in-string sregex) | 252 | (defun sregex-match-string-no-properties (count &optional in-string sregex) |
| 349 | "Like `match-string-no-properties', for a regexp made with `sregex'. | 253 | (match-string-no-properties count in-string)) |
| 350 | This takes one additional optional argument, the `sregex' info, which | 254 | (defun sregex-match-beginning (count &optional sregex) (match-beginning count)) |
| 351 | can be obtained with `sregex-info'. The COUNT argument is adjusted to | 255 | (defun sregex-match-end (count &optional sregex) (match-end count)) |
| 352 | allow for \"introduced groups\". If the extra argument is omitted or | 256 | (defun sregex-match-data (&optional sregex) (match-data)) |
| 353 | nil, it defaults to the current value of (sregex-info)." | 257 | (defun sregex-backref-num (n &optional sregex) n) |
| 354 | (match-string-no-properties | 258 | |
| 355 | (and count | ||
| 356 | (sregex-backref-num count sregex)) | ||
| 357 | in-string)) | ||
| 358 | |||
| 359 | (defun sregex-match-beginning (count &optional sregex) | ||
| 360 | "Like `match-beginning', for a regexp made with `sregex'. | ||
| 361 | This takes one additional optional argument, the `sregex' info, which | ||
| 362 | can be obtained with `sregex-info'. The COUNT argument is adjusted to | ||
| 363 | allow for \"introduced groups\". If the extra argument is omitted or | ||
| 364 | nil, it defaults to the current value of (sregex-info)." | ||
| 365 | (match-beginning (sregex-backref-num count sregex))) | ||
| 366 | |||
| 367 | (defun sregex-match-end (count &optional sregex) | ||
| 368 | "Like `match-end', for a regexp made with `sregex'. | ||
| 369 | This takes one additional optional argument, the `sregex' info, which | ||
| 370 | can be obtained with `sregex-info'. The COUNT argument is adjusted to | ||
| 371 | allow for \"introduced groups\". If the extra argument is omitted or | ||
| 372 | nil, it defaults to the current value of (sregex-info)." | ||
| 373 | (match-end (sregex-backref-num count sregex))) | ||
| 374 | |||
| 375 | (defun sregex-match-data (&optional sregex) | ||
| 376 | "Like `match-data', for a regexp made with `sregex'. | ||
| 377 | This takes one additional optional argument, the `sregex' info, which | ||
| 378 | can be obtained with `sregex-info'. \"Introduced groups\" are removed | ||
| 379 | from the result. If the extra argument is omitted or nil, it defaults | ||
| 380 | to the current value of (sregex-info)." | ||
| 381 | (let* ((data (match-data)) | ||
| 382 | (groups (sregex--value-groups (or sregex | ||
| 383 | sregex--current-sregex))) | ||
| 384 | (result (list (car (cdr data)) | ||
| 385 | (car data)))) | ||
| 386 | (setq data (cdr (cdr data))) | ||
| 387 | (while data | ||
| 388 | (if (car groups) | ||
| 389 | (setq result (append (list (car (cdr data)) | ||
| 390 | (car data)) | ||
| 391 | result))) | ||
| 392 | (setq groups (cdr groups) | ||
| 393 | data (cdr (cdr data)))) | ||
| 394 | (reverse result))) | ||
| 395 | |||
| 396 | (defun sregex--render-tree (tree sregex) | ||
| 397 | (let ((key (car tree))) | ||
| 398 | (cond ((eq key 'str) | ||
| 399 | (cdr tree)) | ||
| 400 | ((eq key 'or) | ||
| 401 | (mapconcat '(lambda (x) | ||
| 402 | (sregex--render-tree x sregex)) | ||
| 403 | (cdr tree) | ||
| 404 | "\\|")) | ||
| 405 | ((eq key 'sequence) | ||
| 406 | (apply 'concat | ||
| 407 | (mapcar '(lambda (x) | ||
| 408 | (sregex--render-tree x sregex)) | ||
| 409 | (cdr tree)))) | ||
| 410 | ((eq key 'group) | ||
| 411 | (concat "\\(" | ||
| 412 | (sregex--render-tree (cdr tree) sregex) | ||
| 413 | "\\)")) | ||
| 414 | ((eq key 'opt) | ||
| 415 | (concat (sregex--render-tree (cdr tree) sregex) | ||
| 416 | "?")) | ||
| 417 | ((eq key '0+) | ||
| 418 | (concat (sregex--render-tree (cdr tree) sregex) | ||
| 419 | "*")) | ||
| 420 | ((eq key '1+) | ||
| 421 | (concat (sregex--render-tree (cdr tree) sregex) | ||
| 422 | "+")) | ||
| 423 | ((eq key 'backref) | ||
| 424 | (let ((num (sregex-backref-num (cdr tree) sregex))) | ||
| 425 | (if (> num 9) | ||
| 426 | (error "sregex: backref number %d too high after adjustment" | ||
| 427 | num) | ||
| 428 | (concat "\\" (int-to-string num))))) | ||
| 429 | (t (error "sregex internal error: unknown tree type %S" | ||
| 430 | key))))) | ||
| 431 | 259 | ||
| 432 | (defun sregex (&rest exps) | 260 | (defun sregex (&rest exps) |
| 433 | "Symbolic regular expression interpreter. | 261 | "Symbolic regular expression interpreter. |
| @@ -443,10 +271,7 @@ subexpressions: | |||
| 443 | (whitespace '(1+ (syntax ?-))) | 271 | (whitespace '(1+ (syntax ?-))) |
| 444 | (digits '(1+ (char (?0 . ?9))))) | 272 | (digits '(1+ (char (?0 . ?9))))) |
| 445 | (sregex 'bol dotstar \":\" whitespace digits)) => \"^.*:\\\\s-+[0-9]+\"" | 273 | (sregex 'bol dotstar \":\" whitespace digits)) => \"^.*:\\\\s-+[0-9]+\"" |
| 446 | (progn | 274 | (sregex--sequence exps nil)) |
| 447 | (setq sregex--current-sregex (sregex--sequence exps nil)) | ||
| 448 | (sregex--render-tree (sregex--value-tree sregex--current-sregex) | ||
| 449 | sregex--current-sregex))) | ||
| 450 | 275 | ||
| 451 | (defmacro sregexq (&rest exps) | 276 | (defmacro sregexq (&rest exps) |
| 452 | "Symbolic regular expression interpreter. | 277 | "Symbolic regular expression interpreter. |
| @@ -546,22 +371,20 @@ Here are the clauses allowed in an `sregex' or `sregexq' expression: | |||
| 546 | - (sequence CLAUSE ...) | 371 | - (sequence CLAUSE ...) |
| 547 | 372 | ||
| 548 | Groups the given CLAUSEs; may or may not use \"\\\\(\" and \"\\\\)\". | 373 | Groups the given CLAUSEs; may or may not use \"\\\\(\" and \"\\\\)\". |
| 549 | Clauses groups by `sequence' do not count for purposes of | 374 | Clauses grouped by `sequence' do not count for purposes of |
| 550 | numbering backreferences. Use `sequence' in situations like | 375 | numbering backreferences. Use `sequence' in situations like |
| 551 | this: | 376 | this: |
| 552 | 377 | ||
| 553 | (sregexq (or \"dog\" \"cat\" | 378 | (sregexq (or \"dog\" \"cat\" |
| 554 | (sequence (opt \"sea \") \"monkey\"))) | 379 | (sequence (opt \"sea \") \"monkey\"))) |
| 555 | => \"dog\\\\|cat\\\\|\\\\(sea \\\\)?monkey\" | 380 | => \"dog\\\\|cat\\\\|\\\\(?:sea \\\\)?monkey\" |
| 556 | 381 | ||
| 557 | where a single `or' alternate needs to contain multiple | 382 | where a single `or' alternate needs to contain multiple |
| 558 | subclauses. | 383 | subclauses. |
| 559 | 384 | ||
| 560 | - (backref N) | 385 | - (backref N) |
| 561 | Matches the same string previously matched by the Nth \"group\" in | 386 | Matches the same string previously matched by the Nth \"group\" in |
| 562 | the same sregex. N is a positive integer. In the resulting | 387 | the same sregex. N is a positive integer. |
| 563 | regex, N may be adjusted to account for automatically introduced | ||
| 564 | groups. | ||
| 565 | 388 | ||
| 566 | - (or CLAUSE ...) | 389 | - (or CLAUSE ...) |
| 567 | Matches any one of the CLAUSEs by separating them with \"\\\\|\". | 390 | Matches any one of the CLAUSEs by separating them with \"\\\\|\". |
| @@ -639,10 +462,7 @@ Here are the clauses allowed in an `sregex' or `sregexq' expression: | |||
| 639 | This is a \"trapdoor\" for including ordinary regular expression | 462 | This is a \"trapdoor\" for including ordinary regular expression |
| 640 | strings in the result. Some regular expressions are clearer when | 463 | strings in the result. Some regular expressions are clearer when |
| 641 | written the old way: \"[a-z]\" vs. (sregexq (char (?a . ?z))), for | 464 | written the old way: \"[a-z]\" vs. (sregexq (char (?a . ?z))), for |
| 642 | instance. However, using this can confuse the code that | 465 | instance. |
| 643 | distinguishes introduced groups from user-specified groups. Avoid | ||
| 644 | using grouping within a `regex' form. Failing that, avoid using | ||
| 645 | backrefs if you're using `regex'. | ||
| 646 | 466 | ||
| 647 | Each CHAR-CLAUSE that is passed to (char ...) and (not-char ...) | 467 | Each CHAR-CLAUSE that is passed to (char ...) and (not-char ...) |
| 648 | has one of the following forms: | 468 | has one of the following forms: |
| @@ -659,290 +479,128 @@ has one of the following forms: | |||
| 659 | `(apply 'sregex ',exps)) | 479 | `(apply 'sregex ',exps)) |
| 660 | 480 | ||
| 661 | (defun sregex--engine (exp combine) | 481 | (defun sregex--engine (exp combine) |
| 662 | (let* ((val (cond ((stringp exp) | 482 | (cond |
| 663 | (sregex--make-value (or (not (eq combine 'suffix)) | 483 | ((stringp exp) |
| 664 | (= (length exp) 1)) | 484 | (if (and combine |
| 665 | nil | 485 | (eq combine 'suffix) |
| 666 | (cons 'str | 486 | (/= (length exp) 1)) |
| 667 | (regexp-quote exp)))) | 487 | (concat "\\(?:" (regexp-quote exp) "\\)") |
| 668 | ((symbolp exp) | 488 | (regexp-quote exp))) |
| 669 | (funcall (intern (concat "sregex--" | 489 | ((symbolp exp) |
| 670 | (symbol-name exp))) | 490 | (ecase exp |
| 671 | combine)) | 491 | (any ".") |
| 672 | ((consp exp) | 492 | (bol "^") |
| 673 | (funcall (intern (concat "sregex--" | 493 | (eol "$") |
| 674 | (symbol-name (car exp)))) | 494 | (wordchar "\\w") |
| 675 | (cdr exp) | 495 | (not-wordchar "\\W") |
| 676 | combine)) | 496 | (bot "\\`") |
| 677 | (t (error "Invalid expression: %s" exp)))) | 497 | (eot "\\'") |
| 678 | (unitp (sregex--value-unitp val)) | 498 | (point "\\=") |
| 679 | (groups (sregex--value-groups val)) | 499 | (word-boundary "\\b") |
| 680 | (tree (sregex--value-tree val))) | 500 | (not-word-boundary "\\B") |
| 681 | (if (and combine (not unitp)) | 501 | (bow "\\<") |
| 682 | (sregex--make-value t | 502 | (eow "\\>"))) |
| 683 | (cons nil groups) | 503 | ((consp exp) |
| 684 | (cons 'group tree)) | 504 | (funcall (intern (concat "sregex--" |
| 685 | (sregex--make-value unitp groups tree)))) | 505 | (symbol-name (car exp)))) |
| 506 | (cdr exp) | ||
| 507 | combine)) | ||
| 508 | (t (error "Invalid expression: %s" exp)))) | ||
| 686 | 509 | ||
| 687 | (defun sregex--sequence (exps combine) | 510 | (defun sregex--sequence (exps combine) |
| 688 | (if (= (length exps) 1) | 511 | (if (= (length exps) 1) (sregex--engine (car exps) combine) |
| 689 | (sregex--engine (car exps) combine) | 512 | (let ((re (mapconcat |
| 690 | (let ((groups nil) | 513 | (lambda (e) (sregex--engine e 'concat)) |
| 691 | (trees nil)) ;grows in reverse | 514 | exps ""))) |
| 692 | (while exps | ||
| 693 | (let ((val (sregex--engine (car exps) 'concat))) | ||
| 694 | (setq groups (append groups | ||
| 695 | (sregex--value-groups val)) | ||
| 696 | trees (cons (sregex--value-tree val) trees) | ||
| 697 | exps (cdr exps)))) | ||
| 698 | (setq trees (nreverse trees)) | ||
| 699 | (if (eq combine 'suffix) | 515 | (if (eq combine 'suffix) |
| 700 | (sregex--make-value t | 516 | (concat "\\(?:" re "\\)") |
| 701 | (cons nil groups) | 517 | re)))) |
| 702 | (cons 'group | ||
| 703 | (cons 'sequence trees))) | ||
| 704 | (sregex--make-value (not (eq combine 'suffix)) | ||
| 705 | groups | ||
| 706 | (cons 'sequence trees)))))) | ||
| 707 | |||
| 708 | (defun sregex--group (exps combine) | ||
| 709 | (let ((val (sregex--sequence exps nil))) | ||
| 710 | (sregex--make-value t | ||
| 711 | (cons t (sregex--value-groups val)) | ||
| 712 | (cons 'group (sregex--value-tree val))))) | ||
| 713 | |||
| 714 | (defun sregex-backref-num (n &optional sregex) | ||
| 715 | "Adjust backreference number N according to SREGEX. | ||
| 716 | When `sregex' introduces parenthesized groups that the user didn't ask | ||
| 717 | for, the numbering of the groups that the user *did* ask for gets all | ||
| 718 | out of whack. This function accounts for introduced groups. Example: | ||
| 719 | |||
| 720 | (sregexq (opt \"ab\") | ||
| 721 | (group (or \"c\" \"d\"))) => \"\\\\(ab\\\\)?\\\\(c\\\\|d\\\\)\" | ||
| 722 | (setq info (sregex-info)) | ||
| 723 | (sregex-backref-num 1 info) => 2 | ||
| 724 | |||
| 725 | The SREGEX parameter is optional and defaults to the current value of | ||
| 726 | `sregex-info'." | ||
| 727 | (let ((groups (sregex--value-groups (or sregex | ||
| 728 | sregex--current-sregex))) | ||
| 729 | (result 0)) | ||
| 730 | (while (and groups (> n 0)) | ||
| 731 | (if (car groups) | ||
| 732 | (setq n (1- n))) | ||
| 733 | (setq result (1+ result) | ||
| 734 | groups (cdr groups))) | ||
| 735 | result)) | ||
| 736 | |||
| 737 | (defun sregex--backref (exps combine) | ||
| 738 | (sregex--make-value t nil (cons 'backref (car exps)))) | ||
| 739 | |||
| 740 | (defun sregex--any (combine) | ||
| 741 | (sregex--make-value t nil '(str . "."))) | ||
| 742 | |||
| 743 | (defun sregex--opt (exps combine) | ||
| 744 | (let ((val (sregex--sequence exps 'suffix))) | ||
| 745 | (sregex--make-value t | ||
| 746 | (sregex--value-groups val) | ||
| 747 | (cons 'opt (sregex--value-tree val))))) | ||
| 748 | |||
| 749 | (defun sregex--0+ (exps combine) | ||
| 750 | (let ((val (sregex--sequence exps 'suffix))) | ||
| 751 | (sregex--make-value t | ||
| 752 | (sregex--value-groups val) | ||
| 753 | (cons '0+ (sregex--value-tree val))))) | ||
| 754 | (defun sregex--1+ (exps combine) | ||
| 755 | (let ((val (sregex--sequence exps 'suffix))) | ||
| 756 | (sregex--make-value t | ||
| 757 | (sregex--value-groups val) | ||
| 758 | (cons '1+ (sregex--value-tree val))))) | ||
| 759 | |||
| 760 | (defun sregex--repeat (exps combine) | ||
| 761 | (let ((min (or (car exps) 0)) | ||
| 762 | (max (car (cdr exps)))) | ||
| 763 | (setq exps (cdr (cdr exps))) | ||
| 764 | (cond ((zerop min) | ||
| 765 | (cond ((equal max 0) ;degenerate | ||
| 766 | (sregex--make-value t nil nil)) | ||
| 767 | ((equal max 1) | ||
| 768 | (sregex--opt exps combine)) | ||
| 769 | ((not max) | ||
| 770 | (sregex--0+ exps combine)) | ||
| 771 | (t (sregex--sequence (make-list max | ||
| 772 | (cons 'opt exps)) | ||
| 773 | combine)))) | ||
| 774 | ((= min 1) | ||
| 775 | (cond ((equal max 1) | ||
| 776 | (sregex--sequence exps combine)) | ||
| 777 | ((not max) | ||
| 778 | (sregex--1+ exps combine)) | ||
| 779 | (t (sregex--sequence (append exps | ||
| 780 | (make-list (1- max) | ||
| 781 | (cons 'opt exps))) | ||
| 782 | combine)))) | ||
| 783 | (t (sregex--sequence (append exps | ||
| 784 | (list (append (list 'repeat | ||
| 785 | (1- min) | ||
| 786 | (and max | ||
| 787 | (1- max))) | ||
| 788 | exps))) | ||
| 789 | combine))))) | ||
| 790 | 518 | ||
| 791 | (defun sregex--or (exps combine) | 519 | (defun sregex--or (exps combine) |
| 792 | (if (= (length exps) 1) | 520 | (if (= (length exps) 1) (sregex--engine (car exps) combine) |
| 793 | (sregex--engine (car exps) combine) | 521 | (let ((re (mapconcat |
| 794 | (let ((groups nil) | 522 | (lambda (e) (sregex--engine e 'or)) |
| 795 | (trees nil)) | 523 | exps "\\|"))) |
| 796 | (while exps | 524 | (if (not (eq combine 'or)) |
| 797 | (let ((val (sregex--engine (car exps) 'or))) | 525 | (concat "\\(?:" re "\\)") |
| 798 | (setq groups (append groups | 526 | re)))) |
| 799 | (sregex--value-groups val)) | 527 | |
| 800 | trees (cons (sregex--value-tree val) trees) | 528 | (defun sregex--group (exps combine) (concat "\\(" (sregex--sequence exps nil) "\\)")) |
| 801 | exps (cdr exps)))) | 529 | |
| 802 | (sregex--make-value (eq combine 'or) | 530 | (defun sregex--backref (exps combine) (concat "\\" (int-to-string (car exps)))) |
| 803 | groups | 531 | (defun sregex--opt (exps combine) (concat (sregex--sequence exps 'suffix) "?")) |
| 804 | (cons 'or (nreverse trees)))))) | 532 | (defun sregex--0+ (exps combine) (concat (sregex--sequence exps 'suffix) "*")) |
| 805 | 533 | (defun sregex--1+ (exps combine) (concat (sregex--sequence exps 'suffix) "+")) | |
| 806 | (defmacro sregex--char-range-aux () | 534 | |
| 807 | '(if start | 535 | (defun sregex--char (exps combine) (sregex--char-aux nil exps)) |
| 808 | (let (startc endc) | 536 | (defun sregex--not-char (exps combine) (sregex--char-aux t exps)) |
| 809 | (if (and (<= 32 start) | 537 | |
| 810 | (<= start 127)) | 538 | (defun sregex--syntax (exps combine) (format "\\s%c" (car exps))) |
| 811 | (setq startc (char-to-string start) | 539 | (defun sregex--not-syntax (exps combine) (format "\\S%c" (car exps))) |
| 812 | endc (char-to-string end)) | 540 | |
| 813 | (setq startc (format "\\%03o" start) | 541 | (defun sregex--regex (exps combine) |
| 814 | endc (format "\\%03o" end))) | 542 | (if combine (concat "\\(?:" (car exps) "\\)") (car exps))) |
| 815 | (if (> end start) | 543 | |
| 816 | (if (> end (+ start 1)) | 544 | (defun sregex--repeat (exps combine) |
| 817 | (setq class (concat class startc "-" endc)) | 545 | (let* ((min (or (pop exps) 0)) |
| 818 | (setq class (concat class startc endc))) | 546 | (minstr (number-to-string min)) |
| 819 | (setq class (concat class startc)))))) | 547 | (max (pop exps))) |
| 820 | 548 | (concat (sregex--sequence exps 'suffix) | |
| 821 | (defmacro sregex--char-range (rstart rend) | 549 | (concat "\\{" minstr "," |
| 822 | `(let ((i ,rstart) | 550 | (when max (number-to-string max)) "\\}")))) |
| 823 | start end) | 551 | |
| 824 | (while (<= i ,rend) | 552 | (defun sregex--char-range (start end) |
| 825 | (if (aref chars i) | 553 | (let ((startc (char-to-string start)) |
| 826 | (progn | 554 | (endc (char-to-string end))) |
| 827 | (if start | 555 | (cond |
| 828 | (setq end i) | 556 | ((> end (+ start 2)) (concat startc "-" endc)) |
| 829 | (setq start i | 557 | ((> end (+ start 1)) (concat startc (char-to-string (1+ start)) endc)) |
| 830 | end i)) | 558 | ((> end start) (concat startc endc)) |
| 831 | (aset chars i nil)) | 559 | (t startc)))) |
| 832 | (sregex--char-range-aux) | ||
| 833 | (setq start nil | ||
| 834 | end nil)) | ||
| 835 | (setq i (1+ i))) | ||
| 836 | (sregex--char-range-aux))) | ||
| 837 | 560 | ||
| 838 | (defun sregex--char-aux (complement args) | 561 | (defun sregex--char-aux (complement args) |
| 839 | (let ((chars (make-vector 256 nil))) | 562 | ;; regex-opt does the same, we should join effort. |
| 840 | (while args | 563 | (let ((chars (make-bool-vector 256 nil))) ; Yeah, right! |
| 841 | (let ((arg (car args))) | 564 | (dolist (arg args) |
| 842 | (cond ((integerp arg) | 565 | (cond ((integerp arg) (aset chars arg t)) |
| 843 | (aset chars arg t)) | 566 | ((stringp arg) (mapcar (lambda (c) (aset chars c t)) arg)) |
| 844 | ((stringp arg) | 567 | ((consp arg) |
| 845 | (mapcar (function | 568 | (let ((start (car arg)) |
| 846 | (lambda (c) | 569 | (end (cdr arg))) |
| 847 | (aset chars c t))) | 570 | (when (> start end) |
| 848 | arg)) | 571 | (let ((tmp start)) (setq start end) (setq end tmp))) |
| 849 | ((consp arg) | 572 | ;; now start <= end |
| 850 | (let ((start (car arg)) | 573 | (let ((i start)) |
| 851 | (end (cdr arg))) | 574 | (while (<= i end) |
| 852 | (if (> start end) | 575 | (aset chars i t) |
| 853 | (let ((tmp start)) | 576 | (setq i (1+ i)))))))) |
| 854 | (setq start end | ||
| 855 | end tmp))) | ||
| 856 | ;; now start <= end | ||
| 857 | (let ((i start)) | ||
| 858 | (while (<= i end) | ||
| 859 | (aset chars i t) | ||
| 860 | (setq i (1+ i)))))))) | ||
| 861 | (setq args (cdr args))) | ||
| 862 | ;; now chars is a map of the characters in the class | 577 | ;; now chars is a map of the characters in the class |
| 863 | (let ((class "") | 578 | (let ((caret (aref chars ?^)) |
| 864 | (caret (aref chars ?^))) | 579 | (dash (aref chars ?-)) |
| 580 | (class (if (aref chars ?\]) "]" ""))) | ||
| 865 | (aset chars ?^ nil) | 581 | (aset chars ?^ nil) |
| 866 | (if (aref chars ?\]) | 582 | (aset chars ?- nil) |
| 867 | (progn | 583 | (aset chars ?\] nil) |
| 868 | (setq class (concat class "]")) | 584 | |
| 869 | (aset chars ?\] nil))) | 585 | (let (start end) |
| 870 | (if (aref chars ?-) | 586 | (dotimes (i 256) |
| 871 | (progn | 587 | (if (aref chars i) |
| 872 | (setq class (concat class "-")) | 588 | (progn |
| 873 | (aset chars ?- nil))) | 589 | (unless start (setq start i)) |
| 874 | (if (aref chars ?\\) | 590 | (setq end i) |
| 875 | (progn | 591 | (aset chars i nil)) |
| 876 | (setq class (concat class "\\\\")) | 592 | (when start |
| 877 | (aset chars ?\\ nil))) | 593 | (setq class (concat class (sregex--char-range start end))) |
| 878 | 594 | (setq start nil)))) | |
| 879 | (sregex--char-range ?A ?Z) | 595 | (if start |
| 880 | (sregex--char-range ?a ?z) | 596 | (setq class (concat class (sregex--char-range start end))))) |
| 881 | (sregex--char-range ?0 ?9) | 597 | |
| 882 | 598 | (if (> (length class) 0) | |
| 883 | (let ((i 32)) | 599 | (setq class (concat class (if caret "^") (if dash "-"))) |
| 884 | (while (< i 128) | 600 | (setq class (concat class (if dash "-") (if caret "^")))) |
| 885 | (if (aref chars i) | 601 | (if (and (not complement) (= (length class) 1)) |
| 886 | (progn | 602 | (regexp-quote class) |
| 887 | (setq class (concat class (char-to-string i))) | 603 | (concat "[" (if complement "^") class "]"))))) |
| 888 | (aset chars i nil))) | ||
| 889 | (setq i (1+ i)))) | ||
| 890 | |||
| 891 | (sregex--char-range 0 31) | ||
| 892 | (sregex--char-range 128 255) | ||
| 893 | |||
| 894 | (let ((i 0)) | ||
| 895 | (while (< i 256) | ||
| 896 | (if (aref chars i) | ||
| 897 | (setq class (concat class (format "\\%03o" i)))) | ||
| 898 | (setq i (1+ i)))) | ||
| 899 | |||
| 900 | (if caret | ||
| 901 | (setq class (concat class "^"))) | ||
| 902 | (concat "[" (if complement "^") class "]")))) | ||
| 903 | |||
| 904 | (defun sregex--char (exps combine) | ||
| 905 | (sregex--make-value t nil (cons 'str (sregex--char-aux nil exps)))) | ||
| 906 | (defun sregex--not-char (exps combine) | ||
| 907 | (sregex--make-value t nil (cons 'str (sregex--char-aux t exps)))) | ||
| 908 | |||
| 909 | (defun sregex--bol (combine) | ||
| 910 | (sregex--make-value t nil '(str . "^"))) | ||
| 911 | (defun sregex--eol (combine) | ||
| 912 | (sregex--make-value t nil '(str . "$"))) | ||
| 913 | |||
| 914 | (defun sregex--wordchar (combine) | ||
| 915 | (sregex--make-value t nil '(str . "\\w"))) | ||
| 916 | (defun sregex--not-wordchar (combine) | ||
| 917 | (sregex--make-value t nil '(str . "\\W"))) | ||
| 918 | |||
| 919 | (defun sregex--syntax (exps combine) | ||
| 920 | (sregex--make-value t nil (cons 'str (format "\\s%c" (car exps))))) | ||
| 921 | (defun sregex--not-syntax (exps combine) | ||
| 922 | (sregex--make-value t nil (cons 'str (format "\\S%c" (car exps))))) | ||
| 923 | |||
| 924 | (defun sregex--bot (combine) | ||
| 925 | (sregex--make-value t nil (cons 'str "\\`"))) | ||
| 926 | (defun sregex--eot (combine) | ||
| 927 | (sregex--make-value t nil (cons 'str "\\'"))) | ||
| 928 | |||
| 929 | (defun sregex--point (combine) | ||
| 930 | (sregex--make-value t nil '(str . "\\="))) | ||
| 931 | |||
| 932 | (defun sregex--word-boundary (combine) | ||
| 933 | (sregex--make-value t nil '(str . "\\b"))) | ||
| 934 | (defun sregex--not-word-boundary (combine) | ||
| 935 | (sregex--make-value t nil '(str . "\\B"))) | ||
| 936 | |||
| 937 | (defun sregex--bow (combine) | ||
| 938 | (sregex--make-value t nil '(str . "\\<"))) | ||
| 939 | (defun sregex--eow (combine) | ||
| 940 | (sregex--make-value t nil '(str . "\\>"))) | ||
| 941 | |||
| 942 | |||
| 943 | ;; trapdoor - usage discouraged | ||
| 944 | (defun sregex--regex (exps combine) | ||
| 945 | (sregex--make-value nil nil (car exps))) | ||
| 946 | 604 | ||
| 947 | (provide 'sregex) | 605 | (provide 'sregex) |
| 948 | 606 | ||