diff options
| author | Stefan Monnier | 2000-08-31 17:19:15 +0000 |
|---|---|---|
| committer | Stefan Monnier | 2000-08-31 17:19:15 +0000 |
| commit | c0f9ea083ec038ab5aa3049cd12268f4c0597578 (patch) | |
| tree | e5e8312ae28a82f673f1d647a8d375186e719bbd /src | |
| parent | d04efc645a1888c81bb6b0237cfc341ab24f4390 (diff) | |
| download | emacs-c0f9ea083ec038ab5aa3049cd12268f4c0597578.tar.gz emacs-c0f9ea083ec038ab5aa3049cd12268f4c0597578.zip | |
* regex.h (RE_NO_NEWLINE_ANCHOR): New syntax flag.
(struct re_pattern_buffer): Remove newline_anchor.
* regex.c: Keep namespace clean for GNU libc by renaming <fun>
to __<fun> and using `weak_alias (__<fun>, <fun>)'.
(re_max_failures, fail_stack): Use size_t rather than unsigned.
(regex_compile): For ^ and $, choose between buffer and line (beg|end)
depending on the new RE_NO_NEWLINE_ANCHOR syntax flag.
(print_compiled_pattern, re_search_2, mutually_exclusive_p)
(re_match_2_internal, re_compile_pattern, re_comp, regcomp):
Get rid of references to newline_anchor.
(regcomp): Allocate and precompute a fastmap.
Diffstat (limited to 'src')
| -rw-r--r-- | src/ChangeLog | 14 | ||||
| -rw-r--r-- | src/regex.c | 149 | ||||
| -rw-r--r-- | src/regex.h | 14 |
3 files changed, 121 insertions, 56 deletions
diff --git a/src/ChangeLog b/src/ChangeLog index f72cf43685a..27b79e8998a 100644 --- a/src/ChangeLog +++ b/src/ChangeLog | |||
| @@ -1,3 +1,17 @@ | |||
| 1 | 2000-08-31 Stefan Monnier <monnier@cs.yale.edu> | ||
| 2 | |||
| 3 | * regex.h (RE_NO_NEWLINE_ANCHOR): New syntax flag. | ||
| 4 | (struct re_pattern_buffer): Remove newline_anchor. | ||
| 5 | * regex.c: Keep namespace clean for GNU libc by renaming <fun> | ||
| 6 | to __<fun> and using `weak_alias (__<fun>, <fun>)'. | ||
| 7 | (re_max_failures, fail_stack): Use size_t rather than unsigned. | ||
| 8 | (regex_compile): For ^ and $, choose between buffer and line (beg|end) | ||
| 9 | depending on the new RE_NO_NEWLINE_ANCHOR syntax flag. | ||
| 10 | (print_compiled_pattern, re_search_2, mutually_exclusive_p) | ||
| 11 | (re_match_2_internal, re_compile_pattern, re_comp, regcomp): | ||
| 12 | Get rid of references to newline_anchor. | ||
| 13 | (regcomp): Allocate and precompute a fastmap. | ||
| 14 | |||
| 1 | 2000-08-31 Gerd Moellmann <gerd@gnu.org> | 15 | 2000-08-31 Gerd Moellmann <gerd@gnu.org> |
| 2 | 16 | ||
| 3 | * lread.c (openp): GCPRO local variable `filename'. | 17 | * lread.c (openp): GCPRO local variable `filename'. |
diff --git a/src/regex.c b/src/regex.c index 71c9dfe4507..f779d9d82e1 100644 --- a/src/regex.c +++ b/src/regex.c | |||
| @@ -25,7 +25,6 @@ | |||
| 25 | - replace succeed_n + jump_n with a combined operation so that the counter | 25 | - replace succeed_n + jump_n with a combined operation so that the counter |
| 26 | can simply be decremented when popping the failure_point without having | 26 | can simply be decremented when popping the failure_point without having |
| 27 | to stack up failure_count entries. | 27 | to stack up failure_count entries. |
| 28 | - get rid of `newline_anchor'. | ||
| 29 | */ | 28 | */ |
| 30 | 29 | ||
| 31 | /* AIX requires this to be the first thing in the file. */ | 30 | /* AIX requires this to be the first thing in the file. */ |
| @@ -47,6 +46,38 @@ | |||
| 47 | # include <sys/types.h> | 46 | # include <sys/types.h> |
| 48 | #endif | 47 | #endif |
| 49 | 48 | ||
| 49 | #ifdef _LIBC | ||
| 50 | /* We have to keep the namespace clean. */ | ||
| 51 | # define regfree(preg) __regfree (preg) | ||
| 52 | # define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef) | ||
| 53 | # define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags) | ||
| 54 | # define regerror(errcode, preg, errbuf, errbuf_size) \ | ||
| 55 | __regerror(errcode, preg, errbuf, errbuf_size) | ||
| 56 | # define re_set_registers(bu, re, nu, st, en) \ | ||
| 57 | __re_set_registers (bu, re, nu, st, en) | ||
| 58 | # define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \ | ||
| 59 | __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) | ||
| 60 | # define re_match(bufp, string, size, pos, regs) \ | ||
| 61 | __re_match (bufp, string, size, pos, regs) | ||
| 62 | # define re_search(bufp, string, size, startpos, range, regs) \ | ||
| 63 | __re_search (bufp, string, size, startpos, range, regs) | ||
| 64 | # define re_compile_pattern(pattern, length, bufp) \ | ||
| 65 | __re_compile_pattern (pattern, length, bufp) | ||
| 66 | # define re_set_syntax(syntax) __re_set_syntax (syntax) | ||
| 67 | # define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \ | ||
| 68 | __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop) | ||
| 69 | # define re_compile_fastmap(bufp) __re_compile_fastmap (bufp) | ||
| 70 | |||
| 71 | # define WEAK_ALIAS(a,b) weak_alias (a, b) | ||
| 72 | |||
| 73 | /* We are also using some library internals. */ | ||
| 74 | # include <locale/localeinfo.h> | ||
| 75 | # include <locale/elem-hash.h> | ||
| 76 | # include <langinfo.h> | ||
| 77 | #else | ||
| 78 | # define WEAK_ALIAS(a,b) | ||
| 79 | #endif | ||
| 80 | |||
| 50 | /* This is for other GNU distributions with internationalized messages. */ | 81 | /* This is for other GNU distributions with internationalized messages. */ |
| 51 | #if HAVE_LIBINTL_H || defined _LIBC | 82 | #if HAVE_LIBINTL_H || defined _LIBC |
| 52 | # include <libintl.h> | 83 | # include <libintl.h> |
| @@ -1108,7 +1139,6 @@ print_compiled_pattern (bufp) | |||
| 1108 | printf ("re_nsub: %d\t", bufp->re_nsub); | 1139 | printf ("re_nsub: %d\t", bufp->re_nsub); |
| 1109 | printf ("regs_alloc: %d\t", bufp->regs_allocated); | 1140 | printf ("regs_alloc: %d\t", bufp->regs_allocated); |
| 1110 | printf ("can_be_null: %d\t", bufp->can_be_null); | 1141 | printf ("can_be_null: %d\t", bufp->can_be_null); |
| 1111 | printf ("newline_anchor: %d\n", bufp->newline_anchor); | ||
| 1112 | printf ("no_sub: %d\t", bufp->no_sub); | 1142 | printf ("no_sub: %d\t", bufp->no_sub); |
| 1113 | printf ("not_bol: %d\t", bufp->not_bol); | 1143 | printf ("not_bol: %d\t", bufp->not_bol); |
| 1114 | printf ("not_eol: %d\t", bufp->not_eol); | 1144 | printf ("not_eol: %d\t", bufp->not_eol); |
| @@ -1184,6 +1214,7 @@ re_set_syntax (syntax) | |||
| 1184 | re_syntax_options = syntax; | 1214 | re_syntax_options = syntax; |
| 1185 | return ret; | 1215 | return ret; |
| 1186 | } | 1216 | } |
| 1217 | WEAK_ALIAS (__re_set_syntax, re_set_syntax) | ||
| 1187 | 1218 | ||
| 1188 | /* This table gives an error message for each of the error codes listed | 1219 | /* This table gives an error message for each of the error codes listed |
| 1189 | in regex.h. Obviously the order here has to be same as there. | 1220 | in regex.h. Obviously the order here has to be same as there. |
| @@ -1264,21 +1295,22 @@ static const char *re_error_msgid[] = | |||
| 1264 | /* Roughly the maximum number of failure points on the stack. Would be | 1295 | /* Roughly the maximum number of failure points on the stack. Would be |
| 1265 | exactly that if always used TYPICAL_FAILURE_SIZE items each time we failed. | 1296 | exactly that if always used TYPICAL_FAILURE_SIZE items each time we failed. |
| 1266 | This is a variable only so users of regex can assign to it; we never | 1297 | This is a variable only so users of regex can assign to it; we never |
| 1267 | change it ourselves. */ | 1298 | change it ourselves. */ |
| 1268 | #if defined MATCH_MAY_ALLOCATE | 1299 | # if defined MATCH_MAY_ALLOCATE |
| 1269 | /* Note that 4400 is enough to cause a crash on Alpha OSF/1, | 1300 | /* Note that 4400 was enough to cause a crash on Alpha OSF/1, |
| 1270 | whose default stack limit is 2mb. In order for a larger | 1301 | whose default stack limit is 2mb. In order for a larger |
| 1271 | value to work reliably, you have to try to make it accord | 1302 | value to work reliably, you have to try to make it accord |
| 1272 | with the process stack limit. */ | 1303 | with the process stack limit. */ |
| 1273 | int re_max_failures = 40000; | 1304 | size_t re_max_failures = 40000; |
| 1274 | #else | 1305 | # else |
| 1275 | int re_max_failures = 4000; | 1306 | size_t re_max_failures = 4000; |
| 1276 | #endif | 1307 | # endif |
| 1277 | 1308 | ||
| 1278 | union fail_stack_elt | 1309 | union fail_stack_elt |
| 1279 | { | 1310 | { |
| 1280 | const unsigned char *pointer; | 1311 | const unsigned char *pointer; |
| 1281 | unsigned int integer; | 1312 | /* This should be the biggest `int' that's no bigger than a pointer. */ |
| 1313 | long integer; | ||
| 1282 | }; | 1314 | }; |
| 1283 | 1315 | ||
| 1284 | typedef union fail_stack_elt fail_stack_elt_t; | 1316 | typedef union fail_stack_elt fail_stack_elt_t; |
| @@ -1286,9 +1318,9 @@ typedef union fail_stack_elt fail_stack_elt_t; | |||
| 1286 | typedef struct | 1318 | typedef struct |
| 1287 | { | 1319 | { |
| 1288 | fail_stack_elt_t *stack; | 1320 | fail_stack_elt_t *stack; |
| 1289 | unsigned size; | 1321 | size_t size; |
| 1290 | unsigned avail; /* Offset of next open position. */ | 1322 | size_t avail; /* Offset of next open position. */ |
| 1291 | unsigned frame; /* Offset of the cur constructed frame. */ | 1323 | size_t frame; /* Offset of the cur constructed frame. */ |
| 1292 | } fail_stack_type; | 1324 | } fail_stack_type; |
| 1293 | 1325 | ||
| 1294 | #define PATTERN_STACK_EMPTY() (fail_stack.avail == 0) | 1326 | #define PATTERN_STACK_EMPTY() (fail_stack.avail == 0) |
| @@ -1963,8 +1995,7 @@ static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type | |||
| 1963 | `re_nsub' is the number of subexpressions in PATTERN; | 1995 | `re_nsub' is the number of subexpressions in PATTERN; |
| 1964 | `not_bol' and `not_eol' are zero; | 1996 | `not_bol' and `not_eol' are zero; |
| 1965 | 1997 | ||
| 1966 | The `fastmap' and `newline_anchor' fields are neither | 1998 | The `fastmap' field is neither examined nor set. */ |
| 1967 | examined nor set. */ | ||
| 1968 | 1999 | ||
| 1969 | /* Insert the `jump' from the end of last alternative to "here". | 2000 | /* Insert the `jump' from the end of last alternative to "here". |
| 1970 | The space for the jump has already been allocated. */ | 2001 | The space for the jump has already been allocated. */ |
| @@ -2126,7 +2157,7 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2126 | || syntax & RE_CONTEXT_INDEP_ANCHORS | 2157 | || syntax & RE_CONTEXT_INDEP_ANCHORS |
| 2127 | /* Otherwise, depends on what's come before. */ | 2158 | /* Otherwise, depends on what's come before. */ |
| 2128 | || at_begline_loc_p (pattern, p, syntax)) | 2159 | || at_begline_loc_p (pattern, p, syntax)) |
| 2129 | BUF_PUSH (begline); | 2160 | BUF_PUSH ((syntax & RE_NO_NEWLINE_ANCHOR) ? begbuf : begline); |
| 2130 | else | 2161 | else |
| 2131 | goto normal_char; | 2162 | goto normal_char; |
| 2132 | } | 2163 | } |
| @@ -2141,7 +2172,7 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2141 | || syntax & RE_CONTEXT_INDEP_ANCHORS | 2172 | || syntax & RE_CONTEXT_INDEP_ANCHORS |
| 2142 | /* Otherwise, depends on what's next. */ | 2173 | /* Otherwise, depends on what's next. */ |
| 2143 | || at_endline_loc_p (p, pend, syntax)) | 2174 | || at_endline_loc_p (p, pend, syntax)) |
| 2144 | BUF_PUSH (endline); | 2175 | BUF_PUSH ((syntax & RE_NO_NEWLINE_ANCHOR) ? endbuf : endline); |
| 2145 | else | 2176 | else |
| 2146 | goto normal_char; | 2177 | goto normal_char; |
| 2147 | } | 2178 | } |
| @@ -3399,7 +3430,6 @@ analyse_first (p, pend, fastmap, multibyte) | |||
| 3399 | so that `p' is monotonically increasing. More to the point, we | 3430 | so that `p' is monotonically increasing. More to the point, we |
| 3400 | never set `p' (or push) anything `<= p1'. */ | 3431 | never set `p' (or push) anything `<= p1'. */ |
| 3401 | 3432 | ||
| 3402 | /* If can_be_null is set, then the fastmap will not be used anyway. */ | ||
| 3403 | while (1) | 3433 | while (1) |
| 3404 | { | 3434 | { |
| 3405 | /* `p1' is used as a marker of how far back a `on_failure_jump' | 3435 | /* `p1' is used as a marker of how far back a `on_failure_jump' |
| @@ -3689,9 +3719,9 @@ re_compile_fastmap (bufp) | |||
| 3689 | 3719 | ||
| 3690 | analysis = analyse_first (bufp->buffer, bufp->buffer + bufp->used, | 3720 | analysis = analyse_first (bufp->buffer, bufp->buffer + bufp->used, |
| 3691 | fastmap, RE_MULTIBYTE_P (bufp)); | 3721 | fastmap, RE_MULTIBYTE_P (bufp)); |
| 3722 | bufp->can_be_null = (analysis != 0); | ||
| 3692 | if (analysis < -1) | 3723 | if (analysis < -1) |
| 3693 | return analysis; | 3724 | return analysis; |
| 3694 | bufp->can_be_null = (analysis != 0); | ||
| 3695 | return 0; | 3725 | return 0; |
| 3696 | } /* re_compile_fastmap */ | 3726 | } /* re_compile_fastmap */ |
| 3697 | 3727 | ||
| @@ -3729,6 +3759,7 @@ re_set_registers (bufp, regs, num_regs, starts, ends) | |||
| 3729 | regs->start = regs->end = (regoff_t *) 0; | 3759 | regs->start = regs->end = (regoff_t *) 0; |
| 3730 | } | 3760 | } |
| 3731 | } | 3761 | } |
| 3762 | WEAK_ALIAS (__re_set_registers, re_set_registers) | ||
| 3732 | 3763 | ||
| 3733 | /* Searching routines. */ | 3764 | /* Searching routines. */ |
| 3734 | 3765 | ||
| @@ -3745,6 +3776,7 @@ re_search (bufp, string, size, startpos, range, regs) | |||
| 3745 | return re_search_2 (bufp, NULL, 0, string, size, startpos, range, | 3776 | return re_search_2 (bufp, NULL, 0, string, size, startpos, range, |
| 3746 | regs, size); | 3777 | regs, size); |
| 3747 | } | 3778 | } |
| 3779 | WEAK_ALIAS (__re_search, re_search) | ||
| 3748 | 3780 | ||
| 3749 | /* End address of virtual concatenation of string. */ | 3781 | /* End address of virtual concatenation of string. */ |
| 3750 | #define STOP_ADDR_VSTRING(P) \ | 3782 | #define STOP_ADDR_VSTRING(P) \ |
| @@ -3792,7 +3824,7 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop) | |||
| 3792 | register RE_TRANSLATE_TYPE translate = bufp->translate; | 3824 | register RE_TRANSLATE_TYPE translate = bufp->translate; |
| 3793 | int total_size = size1 + size2; | 3825 | int total_size = size1 + size2; |
| 3794 | int endpos = startpos + range; | 3826 | int endpos = startpos + range; |
| 3795 | int anchored_start = 0; | 3827 | boolean anchored_start; |
| 3796 | 3828 | ||
| 3797 | /* Nonzero if we have to concern multibyte character. */ | 3829 | /* Nonzero if we have to concern multibyte character. */ |
| 3798 | const boolean multibyte = RE_MULTIBYTE_P (bufp); | 3830 | const boolean multibyte = RE_MULTIBYTE_P (bufp); |
| @@ -3836,8 +3868,7 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop) | |||
| 3836 | return -2; | 3868 | return -2; |
| 3837 | 3869 | ||
| 3838 | /* See whether the pattern is anchored. */ | 3870 | /* See whether the pattern is anchored. */ |
| 3839 | if (bufp->buffer[0] == begline) | 3871 | anchored_start = (bufp->buffer[0] == begline); |
| 3840 | anchored_start = 1; | ||
| 3841 | 3872 | ||
| 3842 | #ifdef emacs | 3873 | #ifdef emacs |
| 3843 | gl_state.object = re_match_object; | 3874 | gl_state.object = re_match_object; |
| @@ -3857,10 +3888,9 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop) | |||
| 3857 | because that case doesn't repeat. */ | 3888 | because that case doesn't repeat. */ |
| 3858 | if (anchored_start && startpos > 0) | 3889 | if (anchored_start && startpos > 0) |
| 3859 | { | 3890 | { |
| 3860 | if (! (bufp->newline_anchor | 3891 | if (! ((startpos <= size1 ? string1[startpos - 1] |
| 3861 | && ((startpos <= size1 ? string1[startpos - 1] | 3892 | : string2[startpos - size1 - 1]) |
| 3862 | : string2[startpos - size1 - 1]) | 3893 | == '\n')) |
| 3863 | == '\n'))) | ||
| 3864 | goto advance; | 3894 | goto advance; |
| 3865 | } | 3895 | } |
| 3866 | 3896 | ||
| @@ -4009,6 +4039,7 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop) | |||
| 4009 | } | 4039 | } |
| 4010 | return -1; | 4040 | return -1; |
| 4011 | } /* re_search_2 */ | 4041 | } /* re_search_2 */ |
| 4042 | WEAK_ALIAS (__re_search_2, re_search_2) | ||
| 4012 | 4043 | ||
| 4013 | /* Declarations and macros for re_match_2. */ | 4044 | /* Declarations and macros for re_match_2. */ |
| 4014 | 4045 | ||
| @@ -4213,9 +4244,6 @@ mutually_exclusive_p (bufp, p1, p2) | |||
| 4213 | break; | 4244 | break; |
| 4214 | 4245 | ||
| 4215 | case endline: | 4246 | case endline: |
| 4216 | if (!bufp->newline_anchor) | ||
| 4217 | break; | ||
| 4218 | /* Fallthrough */ | ||
| 4219 | case exactn: | 4247 | case exactn: |
| 4220 | { | 4248 | { |
| 4221 | register unsigned int c | 4249 | register unsigned int c |
| @@ -4377,6 +4405,7 @@ re_match (bufp, string, size, pos, regs) | |||
| 4377 | # endif | 4405 | # endif |
| 4378 | return result; | 4406 | return result; |
| 4379 | } | 4407 | } |
| 4408 | WEAK_ALIAS (__re_match, re_match) | ||
| 4380 | #endif /* not emacs */ | 4409 | #endif /* not emacs */ |
| 4381 | 4410 | ||
| 4382 | #ifdef emacs | 4411 | #ifdef emacs |
| @@ -4424,6 +4453,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4424 | #endif | 4453 | #endif |
| 4425 | return result; | 4454 | return result; |
| 4426 | } | 4455 | } |
| 4456 | WEAK_ALIAS (__re_match_2, re_match_2) | ||
| 4427 | 4457 | ||
| 4428 | /* This is a separate function so that we can force an alloca cleanup | 4458 | /* This is a separate function so that we can force an alloca cleanup |
| 4429 | afterwards. */ | 4459 | afterwards. */ |
| @@ -5089,8 +5119,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5089 | 5119 | ||
| 5090 | 5120 | ||
| 5091 | /* begline matches the empty string at the beginning of the string | 5121 | /* begline matches the empty string at the beginning of the string |
| 5092 | (unless `not_bol' is set in `bufp'), and, if | 5122 | (unless `not_bol' is set in `bufp'), and after newlines. */ |
| 5093 | `newline_anchor' is set, after newlines. */ | ||
| 5094 | case begline: | 5123 | case begline: |
| 5095 | DEBUG_PRINT1 ("EXECUTING begline.\n"); | 5124 | DEBUG_PRINT1 ("EXECUTING begline.\n"); |
| 5096 | 5125 | ||
| @@ -5102,7 +5131,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5102 | { | 5131 | { |
| 5103 | unsigned char c; | 5132 | unsigned char c; |
| 5104 | GET_CHAR_BEFORE_2 (c, d, string1, end1, string2, end2); | 5133 | GET_CHAR_BEFORE_2 (c, d, string1, end1, string2, end2); |
| 5105 | if (c == '\n' && bufp->newline_anchor) | 5134 | if (c == '\n') |
| 5106 | break; | 5135 | break; |
| 5107 | } | 5136 | } |
| 5108 | /* In all other cases, we fail. */ | 5137 | /* In all other cases, we fail. */ |
| @@ -5120,7 +5149,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5120 | else | 5149 | else |
| 5121 | { | 5150 | { |
| 5122 | PREFETCH_NOLIMIT (); | 5151 | PREFETCH_NOLIMIT (); |
| 5123 | if (*d == '\n' && bufp->newline_anchor) | 5152 | if (*d == '\n') |
| 5124 | break; | 5153 | break; |
| 5125 | } | 5154 | } |
| 5126 | goto fail; | 5155 | goto fail; |
| @@ -5645,15 +5674,13 @@ re_compile_pattern (pattern, length, bufp) | |||
| 5645 | setting no_sub. */ | 5674 | setting no_sub. */ |
| 5646 | bufp->no_sub = 0; | 5675 | bufp->no_sub = 0; |
| 5647 | 5676 | ||
| 5648 | /* Match anchors at newline. */ | ||
| 5649 | bufp->newline_anchor = 1; | ||
| 5650 | |||
| 5651 | ret = regex_compile ((re_char*) pattern, length, re_syntax_options, bufp); | 5677 | ret = regex_compile ((re_char*) pattern, length, re_syntax_options, bufp); |
| 5652 | 5678 | ||
| 5653 | if (!ret) | 5679 | if (!ret) |
| 5654 | return NULL; | 5680 | return NULL; |
| 5655 | return gettext (re_error_msgid[(int) ret]); | 5681 | return gettext (re_error_msgid[(int) ret]); |
| 5656 | } | 5682 | } |
| 5683 | WEAK_ALIAS (__re_compile_pattern, re_compile_pattern) | ||
| 5657 | 5684 | ||
| 5658 | /* Entry points compatible with 4.2 BSD regex library. We don't define | 5685 | /* Entry points compatible with 4.2 BSD regex library. We don't define |
| 5659 | them unless specifically requested. */ | 5686 | them unless specifically requested. */ |
| @@ -5700,9 +5727,6 @@ re_comp (s) | |||
| 5700 | /* Since `re_exec' always passes NULL for the `regs' argument, we | 5727 | /* Since `re_exec' always passes NULL for the `regs' argument, we |
| 5701 | don't need to initialize the pattern buffer fields which affect it. */ | 5728 | don't need to initialize the pattern buffer fields which affect it. */ |
| 5702 | 5729 | ||
| 5703 | /* Match anchors at newlines. */ | ||
| 5704 | re_comp_buf.newline_anchor = 1; | ||
| 5705 | |||
| 5706 | ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf); | 5730 | ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf); |
| 5707 | 5731 | ||
| 5708 | if (!ret) | 5732 | if (!ret) |
| @@ -5740,8 +5764,8 @@ re_exec (s) | |||
| 5740 | `syntax' to RE_SYNTAX_POSIX_EXTENDED if the | 5764 | `syntax' to RE_SYNTAX_POSIX_EXTENDED if the |
| 5741 | REG_EXTENDED bit in CFLAGS is set; otherwise, to | 5765 | REG_EXTENDED bit in CFLAGS is set; otherwise, to |
| 5742 | RE_SYNTAX_POSIX_BASIC; | 5766 | RE_SYNTAX_POSIX_BASIC; |
| 5743 | `newline_anchor' to REG_NEWLINE being set in CFLAGS; | 5767 | `fastmap' to an allocated space for the fastmap; |
| 5744 | `fastmap' and `fastmap_accurate' to zero; | 5768 | `fastmap_accurate' to zero; |
| 5745 | `re_nsub' to the number of subexpressions in PATTERN. | 5769 | `re_nsub' to the number of subexpressions in PATTERN. |
| 5746 | 5770 | ||
| 5747 | PATTERN is the address of the pattern string. | 5771 | PATTERN is the address of the pattern string. |
| @@ -5780,11 +5804,8 @@ regcomp (preg, pattern, cflags) | |||
| 5780 | preg->allocated = 0; | 5804 | preg->allocated = 0; |
| 5781 | preg->used = 0; | 5805 | preg->used = 0; |
| 5782 | 5806 | ||
| 5783 | /* Don't bother to use a fastmap when searching. This simplifies the | 5807 | /* Try to allocate space for the fastmap. */ |
| 5784 | REG_NEWLINE case: if we used a fastmap, we'd have to put all the | 5808 | preg->fastmap = (char *) malloc (1 << BYTEWIDTH); |
| 5785 | characters after newlines into the fastmap. This way, we just try | ||
| 5786 | every character. */ | ||
| 5787 | preg->fastmap = 0; | ||
| 5788 | 5809 | ||
| 5789 | if (cflags & REG_ICASE) | 5810 | if (cflags & REG_ICASE) |
| 5790 | { | 5811 | { |
| @@ -5808,11 +5829,9 @@ regcomp (preg, pattern, cflags) | |||
| 5808 | { /* REG_NEWLINE implies neither . nor [^...] match newline. */ | 5829 | { /* REG_NEWLINE implies neither . nor [^...] match newline. */ |
| 5809 | syntax &= ~RE_DOT_NEWLINE; | 5830 | syntax &= ~RE_DOT_NEWLINE; |
| 5810 | syntax |= RE_HAT_LISTS_NOT_NEWLINE; | 5831 | syntax |= RE_HAT_LISTS_NOT_NEWLINE; |
| 5811 | /* It also changes the matching behavior. */ | ||
| 5812 | preg->newline_anchor = 1; | ||
| 5813 | } | 5832 | } |
| 5814 | else | 5833 | else |
| 5815 | preg->newline_anchor = 0; | 5834 | syntax |= RE_NO_NEWLINE_ANCHOR; |
| 5816 | 5835 | ||
| 5817 | preg->no_sub = !!(cflags & REG_NOSUB); | 5836 | preg->no_sub = !!(cflags & REG_NOSUB); |
| 5818 | 5837 | ||
| @@ -5822,10 +5841,22 @@ regcomp (preg, pattern, cflags) | |||
| 5822 | 5841 | ||
| 5823 | /* POSIX doesn't distinguish between an unmatched open-group and an | 5842 | /* POSIX doesn't distinguish between an unmatched open-group and an |
| 5824 | unmatched close-group: both are REG_EPAREN. */ | 5843 | unmatched close-group: both are REG_EPAREN. */ |
| 5825 | if (ret == REG_ERPAREN) ret = REG_EPAREN; | 5844 | if (ret == REG_ERPAREN) |
| 5826 | 5845 | ret = REG_EPAREN; | |
| 5846 | |||
| 5847 | if (ret == REG_NOERROR && preg->fastmap) | ||
| 5848 | { /* Compute the fastmap now, since regexec cannot modify the pattern | ||
| 5849 | buffer. */ | ||
| 5850 | re_compile_fastmap (preg); | ||
| 5851 | if (preg->can_be_null) | ||
| 5852 | { /* The fastmap can't be used anyway. */ | ||
| 5853 | free (preg->fastmap); | ||
| 5854 | preg->fastmap = NULL; | ||
| 5855 | } | ||
| 5856 | } | ||
| 5827 | return (int) ret; | 5857 | return (int) ret; |
| 5828 | } | 5858 | } |
| 5859 | WEAK_ALIAS (__regcomp, regcomp) | ||
| 5829 | 5860 | ||
| 5830 | 5861 | ||
| 5831 | /* regexec searches for a given pattern, specified by PREG, in the | 5862 | /* regexec searches for a given pattern, specified by PREG, in the |
| @@ -5854,7 +5885,7 @@ regexec (preg, string, nmatch, pmatch, eflags) | |||
| 5854 | struct re_registers regs; | 5885 | struct re_registers regs; |
| 5855 | regex_t private_preg; | 5886 | regex_t private_preg; |
| 5856 | int len = strlen (string); | 5887 | int len = strlen (string); |
| 5857 | boolean want_reg_info = !preg->no_sub && nmatch > 0; | 5888 | boolean want_reg_info = !preg->no_sub && nmatch > 0 && pmatch; |
| 5858 | 5889 | ||
| 5859 | private_preg = *preg; | 5890 | private_preg = *preg; |
| 5860 | 5891 | ||
| @@ -5875,6 +5906,15 @@ regexec (preg, string, nmatch, pmatch, eflags) | |||
| 5875 | regs.end = regs.start + nmatch; | 5906 | regs.end = regs.start + nmatch; |
| 5876 | } | 5907 | } |
| 5877 | 5908 | ||
| 5909 | /* Instead of using not_eol to implement REG_NOTEOL, we could simply | ||
| 5910 | pass (&private_preg, string, len + 1, 0, len, ...) pretending the string | ||
| 5911 | was a little bit longer but still only matching the real part. | ||
| 5912 | This works because the `endline' will check for a '\n' and will find a | ||
| 5913 | '\0', correctly deciding that this is not the end of a line. | ||
| 5914 | But it doesn't work out so nicely for REG_NOTBOL, since we don't have | ||
| 5915 | a convenient '\0' there. For all we know, the string could be preceded | ||
| 5916 | by '\n' which would throw things off. */ | ||
| 5917 | |||
| 5878 | /* Perform the searching operation. */ | 5918 | /* Perform the searching operation. */ |
| 5879 | ret = re_search (&private_preg, string, len, | 5919 | ret = re_search (&private_preg, string, len, |
| 5880 | /* start: */ 0, /* range: */ len, | 5920 | /* start: */ 0, /* range: */ len, |
| @@ -5901,6 +5941,7 @@ regexec (preg, string, nmatch, pmatch, eflags) | |||
| 5901 | /* We want zero return to mean success, unlike `re_search'. */ | 5941 | /* We want zero return to mean success, unlike `re_search'. */ |
| 5902 | return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH; | 5942 | return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH; |
| 5903 | } | 5943 | } |
| 5944 | WEAK_ALIAS (__regexec, regexec) | ||
| 5904 | 5945 | ||
| 5905 | 5946 | ||
| 5906 | /* Returns a message corresponding to an error code, ERRCODE, returned | 5947 | /* Returns a message corresponding to an error code, ERRCODE, returned |
| @@ -5941,6 +5982,7 @@ regerror (errcode, preg, errbuf, errbuf_size) | |||
| 5941 | 5982 | ||
| 5942 | return msg_size; | 5983 | return msg_size; |
| 5943 | } | 5984 | } |
| 5985 | WEAK_ALIAS (__regerror, regerror) | ||
| 5944 | 5986 | ||
| 5945 | 5987 | ||
| 5946 | /* Free dynamically allocated space used by PREG. */ | 5988 | /* Free dynamically allocated space used by PREG. */ |
| @@ -5965,5 +6007,6 @@ regfree (preg) | |||
| 5965 | free (preg->translate); | 6007 | free (preg->translate); |
| 5966 | preg->translate = NULL; | 6008 | preg->translate = NULL; |
| 5967 | } | 6009 | } |
| 6010 | WEAK_ALIAS (__regfree, regfree) | ||
| 5968 | 6011 | ||
| 5969 | #endif /* not emacs */ | 6012 | #endif /* not emacs */ |
diff --git a/src/regex.h b/src/regex.h index 46f2a633c3a..ef4284cdce2 100644 --- a/src/regex.h +++ b/src/regex.h | |||
| @@ -150,6 +150,17 @@ typedef unsigned long int reg_syntax_t; | |||
| 150 | /* If this bit is set, then (?:...) is treated as a shy group. */ | 150 | /* If this bit is set, then (?:...) is treated as a shy group. */ |
| 151 | #define RE_SHY_GROUPS (RE_FRUGAL << 1) | 151 | #define RE_SHY_GROUPS (RE_FRUGAL << 1) |
| 152 | 152 | ||
| 153 | /* If this bit is set, ^ and $ only match at beg/end of buffer. */ | ||
| 154 | #define RE_NO_NEWLINE_ANCHOR (RE_SHY_GROUPS << 1) | ||
| 155 | |||
| 156 | /* If this bit is set, turn on internal regex debugging. | ||
| 157 | If not set, and debugging was on, turn it off. | ||
| 158 | This only works if regex.c is compiled -DDEBUG. | ||
| 159 | We define this bit always, so that all that's needed to turn on | ||
| 160 | debugging is to recompile regex.c; the calling code can always have | ||
| 161 | this bit set, and it won't affect anything in the normal case. */ | ||
| 162 | #define RE_DEBUG (RE_NO_NEWLINE_ANCHOR << 1) | ||
| 163 | |||
| 153 | /* This global variable defines the particular regexp syntax to use (for | 164 | /* This global variable defines the particular regexp syntax to use (for |
| 154 | some interfaces). When a regexp is compiled, the syntax used is | 165 | some interfaces). When a regexp is compiled, the syntax used is |
| 155 | stored in the pattern buffer, so changing this does not affect | 166 | stored in the pattern buffer, so changing this does not affect |
| @@ -379,9 +390,6 @@ struct re_pattern_buffer | |||
| 379 | /* Similarly for an end-of-line anchor. */ | 390 | /* Similarly for an end-of-line anchor. */ |
| 380 | unsigned not_eol : 1; | 391 | unsigned not_eol : 1; |
| 381 | 392 | ||
| 382 | /* If true, an anchor at a newline matches. */ | ||
| 383 | unsigned newline_anchor : 1; | ||
| 384 | |||
| 385 | #ifdef emacs | 393 | #ifdef emacs |
| 386 | /* If true, multi-byte form in the `buffer' should be recognized as a | 394 | /* If true, multi-byte form in the `buffer' should be recognized as a |
| 387 | multibyte character. */ | 395 | multibyte character. */ |