diff options
| author | Richard M. Stallman | 2004-11-19 19:36:09 +0000 |
|---|---|---|
| committer | Richard M. Stallman | 2004-11-19 19:36:09 +0000 |
| commit | f9b0fd9964ef94bce907db72a826df1347a9ffed (patch) | |
| tree | 7f1c68a41081d8c324facd0ba6353859f57a79f6 /src | |
| parent | e1c1c5a7f4b92cadec79150bd55a96f33ad0742a (diff) | |
| download | emacs-f9b0fd9964ef94bce907db72a826df1347a9ffed.tar.gz emacs-f9b0fd9964ef94bce907db72a826df1347a9ffed.zip | |
(regex_compile): Substitute whitespace_regexp for spaces, if it is nonzero.
(whitespace_regexp): New variable.
(re_set_whitespace_regexp): New function.
Diffstat (limited to 'src')
| -rw-r--r-- | src/regex.c | 73 |
1 files changed, 71 insertions, 2 deletions
diff --git a/src/regex.c b/src/regex.c index 36787238a32..459c0ab26b1 100644 --- a/src/regex.c +++ b/src/regex.c | |||
| @@ -1250,7 +1250,7 @@ reg_syntax_t re_syntax_options; | |||
| 1250 | 1250 | ||
| 1251 | reg_syntax_t | 1251 | reg_syntax_t |
| 1252 | re_set_syntax (syntax) | 1252 | re_set_syntax (syntax) |
| 1253 | reg_syntax_t syntax; | 1253 | reg_syntax_t syntax; |
| 1254 | { | 1254 | { |
| 1255 | reg_syntax_t ret = re_syntax_options; | 1255 | reg_syntax_t ret = re_syntax_options; |
| 1256 | 1256 | ||
| @@ -1258,6 +1258,17 @@ re_set_syntax (syntax) | |||
| 1258 | return ret; | 1258 | return ret; |
| 1259 | } | 1259 | } |
| 1260 | WEAK_ALIAS (__re_set_syntax, re_set_syntax) | 1260 | WEAK_ALIAS (__re_set_syntax, re_set_syntax) |
| 1261 | |||
| 1262 | /* Regexp to use to replace spaces, or NULL meaning don't. */ | ||
| 1263 | static re_char *whitespace_regexp; | ||
| 1264 | |||
| 1265 | void | ||
| 1266 | re_set_whitespace_regexp (regexp) | ||
| 1267 | re_char *regexp; | ||
| 1268 | { | ||
| 1269 | whitespace_regexp = regexp; | ||
| 1270 | } | ||
| 1271 | WEAK_ALIAS (__re_set_syntax, re_set_syntax) | ||
| 1261 | 1272 | ||
| 1262 | /* This table gives an error message for each of the error codes listed | 1273 | /* This table gives an error message for each of the error codes listed |
| 1263 | in regex.h. Obviously the order here has to be same as there. | 1274 | in regex.h. Obviously the order here has to be same as there. |
| @@ -2436,6 +2447,15 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2436 | /* If the object matched can contain multibyte characters. */ | 2447 | /* If the object matched can contain multibyte characters. */ |
| 2437 | const boolean multibyte = RE_MULTIBYTE_P (bufp); | 2448 | const boolean multibyte = RE_MULTIBYTE_P (bufp); |
| 2438 | 2449 | ||
| 2450 | /* Nonzero if we have pushed down into a subpattern. */ | ||
| 2451 | int in_subpattern = 0; | ||
| 2452 | |||
| 2453 | /* These hold the values of p, pattern, and pend from the main | ||
| 2454 | pattern when we have pushed into a subpattern. */ | ||
| 2455 | re_char *main_p; | ||
| 2456 | re_char *main_pattern; | ||
| 2457 | re_char *main_pend; | ||
| 2458 | |||
| 2439 | #ifdef DEBUG | 2459 | #ifdef DEBUG |
| 2440 | debug++; | 2460 | debug++; |
| 2441 | DEBUG_PRINT1 ("\nCompiling pattern: "); | 2461 | DEBUG_PRINT1 ("\nCompiling pattern: "); |
| @@ -2498,12 +2518,61 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2498 | begalt = b = bufp->buffer; | 2518 | begalt = b = bufp->buffer; |
| 2499 | 2519 | ||
| 2500 | /* Loop through the uncompiled pattern until we're at the end. */ | 2520 | /* Loop through the uncompiled pattern until we're at the end. */ |
| 2501 | while (p != pend) | 2521 | while (1) |
| 2502 | { | 2522 | { |
| 2523 | if (p == pend) | ||
| 2524 | { | ||
| 2525 | /* If this is the end of an included regexp, | ||
| 2526 | pop back to the main regexp and try again. */ | ||
| 2527 | if (in_subpattern) | ||
| 2528 | { | ||
| 2529 | in_subpattern = 0; | ||
| 2530 | pattern = main_pattern; | ||
| 2531 | p = main_p; | ||
| 2532 | pend = main_pend; | ||
| 2533 | continue; | ||
| 2534 | } | ||
| 2535 | /* If this is the end of the main regexp, we are done. */ | ||
| 2536 | break; | ||
| 2537 | } | ||
| 2538 | |||
| 2503 | PATFETCH (c); | 2539 | PATFETCH (c); |
| 2504 | 2540 | ||
| 2505 | switch (c) | 2541 | switch (c) |
| 2506 | { | 2542 | { |
| 2543 | case ' ': | ||
| 2544 | { | ||
| 2545 | re_char *p1 = p; | ||
| 2546 | |||
| 2547 | /* If there's no special whitespace regexp, treat | ||
| 2548 | spaces normally. */ | ||
| 2549 | if (!whitespace_regexp) | ||
| 2550 | goto normal_char; | ||
| 2551 | |||
| 2552 | /* Peek past following spaces. */ | ||
| 2553 | while (p1 != pend) | ||
| 2554 | { | ||
| 2555 | if (*p1 != ' ') | ||
| 2556 | break; | ||
| 2557 | p1++; | ||
| 2558 | } | ||
| 2559 | /* If the spaces are followed by a repetition op, | ||
| 2560 | treat them normally. */ | ||
| 2561 | if (p1 == pend | ||
| 2562 | || (*p1 == '*' || *p1 == '+' || *p1 == '?' | ||
| 2563 | || (*p1 == '\\' && p1 + 1 != pend && p1[1] == '{'))) | ||
| 2564 | goto normal_char; | ||
| 2565 | |||
| 2566 | /* Replace the spaces with the whitespace regexp. */ | ||
| 2567 | in_subpattern = 1; | ||
| 2568 | main_p = p1; | ||
| 2569 | main_pend = pend; | ||
| 2570 | main_pattern = pattern; | ||
| 2571 | p = pattern = whitespace_regexp; | ||
| 2572 | pend = p + strlen (p); | ||
| 2573 | break; | ||
| 2574 | } | ||
| 2575 | |||
| 2507 | case '^': | 2576 | case '^': |
| 2508 | { | 2577 | { |
| 2509 | if ( /* If at start of pattern, it's an operator. */ | 2578 | if ( /* If at start of pattern, it's an operator. */ |