aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorStefan Monnier2000-08-31 17:19:15 +0000
committerStefan Monnier2000-08-31 17:19:15 +0000
commitc0f9ea083ec038ab5aa3049cd12268f4c0597578 (patch)
treee5e8312ae28a82f673f1d647a8d375186e719bbd /src
parentd04efc645a1888c81bb6b0237cfc341ab24f4390 (diff)
downloademacs-c0f9ea083ec038ab5aa3049cd12268f4c0597578.tar.gz
emacs-c0f9ea083ec038ab5aa3049cd12268f4c0597578.zip
* regex.h (RE_NO_NEWLINE_ANCHOR): New syntax flag.
(struct re_pattern_buffer): Remove newline_anchor. * regex.c: Keep namespace clean for GNU libc by renaming <fun> to __<fun> and using `weak_alias (__<fun>, <fun>)'. (re_max_failures, fail_stack): Use size_t rather than unsigned. (regex_compile): For ^ and $, choose between buffer and line (beg|end) depending on the new RE_NO_NEWLINE_ANCHOR syntax flag. (print_compiled_pattern, re_search_2, mutually_exclusive_p) (re_match_2_internal, re_compile_pattern, re_comp, regcomp): Get rid of references to newline_anchor. (regcomp): Allocate and precompute a fastmap.
Diffstat (limited to 'src')
-rw-r--r--src/ChangeLog14
-rw-r--r--src/regex.c149
-rw-r--r--src/regex.h14
3 files changed, 121 insertions, 56 deletions
diff --git a/src/ChangeLog b/src/ChangeLog
index f72cf43685a..27b79e8998a 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,17 @@
12000-08-31 Stefan Monnier <monnier@cs.yale.edu>
2
3 * regex.h (RE_NO_NEWLINE_ANCHOR): New syntax flag.
4 (struct re_pattern_buffer): Remove newline_anchor.
5 * regex.c: Keep namespace clean for GNU libc by renaming <fun>
6 to __<fun> and using `weak_alias (__<fun>, <fun>)'.
7 (re_max_failures, fail_stack): Use size_t rather than unsigned.
8 (regex_compile): For ^ and $, choose between buffer and line (beg|end)
9 depending on the new RE_NO_NEWLINE_ANCHOR syntax flag.
10 (print_compiled_pattern, re_search_2, mutually_exclusive_p)
11 (re_match_2_internal, re_compile_pattern, re_comp, regcomp):
12 Get rid of references to newline_anchor.
13 (regcomp): Allocate and precompute a fastmap.
14
12000-08-31 Gerd Moellmann <gerd@gnu.org> 152000-08-31 Gerd Moellmann <gerd@gnu.org>
2 16
3 * lread.c (openp): GCPRO local variable `filename'. 17 * lread.c (openp): GCPRO local variable `filename'.
diff --git a/src/regex.c b/src/regex.c
index 71c9dfe4507..f779d9d82e1 100644
--- a/src/regex.c
+++ b/src/regex.c
@@ -25,7 +25,6 @@
25 - replace succeed_n + jump_n with a combined operation so that the counter 25 - replace succeed_n + jump_n with a combined operation so that the counter
26 can simply be decremented when popping the failure_point without having 26 can simply be decremented when popping the failure_point without having
27 to stack up failure_count entries. 27 to stack up failure_count entries.
28 - get rid of `newline_anchor'.
29 */ 28 */
30 29
31/* AIX requires this to be the first thing in the file. */ 30/* AIX requires this to be the first thing in the file. */
@@ -47,6 +46,38 @@
47# include <sys/types.h> 46# include <sys/types.h>
48#endif 47#endif
49 48
49#ifdef _LIBC
50/* We have to keep the namespace clean. */
51# define regfree(preg) __regfree (preg)
52# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
53# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
54# define regerror(errcode, preg, errbuf, errbuf_size) \
55 __regerror(errcode, preg, errbuf, errbuf_size)
56# define re_set_registers(bu, re, nu, st, en) \
57 __re_set_registers (bu, re, nu, st, en)
58# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
59 __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
60# define re_match(bufp, string, size, pos, regs) \
61 __re_match (bufp, string, size, pos, regs)
62# define re_search(bufp, string, size, startpos, range, regs) \
63 __re_search (bufp, string, size, startpos, range, regs)
64# define re_compile_pattern(pattern, length, bufp) \
65 __re_compile_pattern (pattern, length, bufp)
66# define re_set_syntax(syntax) __re_set_syntax (syntax)
67# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
68 __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
69# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
70
71# define WEAK_ALIAS(a,b) weak_alias (a, b)
72
73/* We are also using some library internals. */
74# include <locale/localeinfo.h>
75# include <locale/elem-hash.h>
76# include <langinfo.h>
77#else
78# define WEAK_ALIAS(a,b)
79#endif
80
50/* This is for other GNU distributions with internationalized messages. */ 81/* This is for other GNU distributions with internationalized messages. */
51#if HAVE_LIBINTL_H || defined _LIBC 82#if HAVE_LIBINTL_H || defined _LIBC
52# include <libintl.h> 83# include <libintl.h>
@@ -1108,7 +1139,6 @@ print_compiled_pattern (bufp)
1108 printf ("re_nsub: %d\t", bufp->re_nsub); 1139 printf ("re_nsub: %d\t", bufp->re_nsub);
1109 printf ("regs_alloc: %d\t", bufp->regs_allocated); 1140 printf ("regs_alloc: %d\t", bufp->regs_allocated);
1110 printf ("can_be_null: %d\t", bufp->can_be_null); 1141 printf ("can_be_null: %d\t", bufp->can_be_null);
1111 printf ("newline_anchor: %d\n", bufp->newline_anchor);
1112 printf ("no_sub: %d\t", bufp->no_sub); 1142 printf ("no_sub: %d\t", bufp->no_sub);
1113 printf ("not_bol: %d\t", bufp->not_bol); 1143 printf ("not_bol: %d\t", bufp->not_bol);
1114 printf ("not_eol: %d\t", bufp->not_eol); 1144 printf ("not_eol: %d\t", bufp->not_eol);
@@ -1184,6 +1214,7 @@ re_set_syntax (syntax)
1184 re_syntax_options = syntax; 1214 re_syntax_options = syntax;
1185 return ret; 1215 return ret;
1186} 1216}
1217WEAK_ALIAS (__re_set_syntax, re_set_syntax)
1187 1218
1188/* This table gives an error message for each of the error codes listed 1219/* This table gives an error message for each of the error codes listed
1189 in regex.h. Obviously the order here has to be same as there. 1220 in regex.h. Obviously the order here has to be same as there.
@@ -1264,21 +1295,22 @@ static const char *re_error_msgid[] =
1264/* Roughly the maximum number of failure points on the stack. Would be 1295/* Roughly the maximum number of failure points on the stack. Would be
1265 exactly that if always used TYPICAL_FAILURE_SIZE items each time we failed. 1296 exactly that if always used TYPICAL_FAILURE_SIZE items each time we failed.
1266 This is a variable only so users of regex can assign to it; we never 1297 This is a variable only so users of regex can assign to it; we never
1267 change it ourselves. */ 1298 change it ourselves. */
1268#if defined MATCH_MAY_ALLOCATE 1299# if defined MATCH_MAY_ALLOCATE
1269/* Note that 4400 is enough to cause a crash on Alpha OSF/1, 1300/* Note that 4400 was enough to cause a crash on Alpha OSF/1,
1270 whose default stack limit is 2mb. In order for a larger 1301 whose default stack limit is 2mb. In order for a larger
1271 value to work reliably, you have to try to make it accord 1302 value to work reliably, you have to try to make it accord
1272 with the process stack limit. */ 1303 with the process stack limit. */
1273int re_max_failures = 40000; 1304size_t re_max_failures = 40000;
1274#else 1305# else
1275int re_max_failures = 4000; 1306size_t re_max_failures = 4000;
1276#endif 1307# endif
1277 1308
1278union fail_stack_elt 1309union fail_stack_elt
1279{ 1310{
1280 const unsigned char *pointer; 1311 const unsigned char *pointer;
1281 unsigned int integer; 1312 /* This should be the biggest `int' that's no bigger than a pointer. */
1313 long integer;
1282}; 1314};
1283 1315
1284typedef union fail_stack_elt fail_stack_elt_t; 1316typedef union fail_stack_elt fail_stack_elt_t;
@@ -1286,9 +1318,9 @@ typedef union fail_stack_elt fail_stack_elt_t;
1286typedef struct 1318typedef struct
1287{ 1319{
1288 fail_stack_elt_t *stack; 1320 fail_stack_elt_t *stack;
1289 unsigned size; 1321 size_t size;
1290 unsigned avail; /* Offset of next open position. */ 1322 size_t avail; /* Offset of next open position. */
1291 unsigned frame; /* Offset of the cur constructed frame. */ 1323 size_t frame; /* Offset of the cur constructed frame. */
1292} fail_stack_type; 1324} fail_stack_type;
1293 1325
1294#define PATTERN_STACK_EMPTY() (fail_stack.avail == 0) 1326#define PATTERN_STACK_EMPTY() (fail_stack.avail == 0)
@@ -1963,8 +1995,7 @@ static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type
1963 `re_nsub' is the number of subexpressions in PATTERN; 1995 `re_nsub' is the number of subexpressions in PATTERN;
1964 `not_bol' and `not_eol' are zero; 1996 `not_bol' and `not_eol' are zero;
1965 1997
1966 The `fastmap' and `newline_anchor' fields are neither 1998 The `fastmap' field is neither examined nor set. */
1967 examined nor set. */
1968 1999
1969/* Insert the `jump' from the end of last alternative to "here". 2000/* Insert the `jump' from the end of last alternative to "here".
1970 The space for the jump has already been allocated. */ 2001 The space for the jump has already been allocated. */
@@ -2126,7 +2157,7 @@ regex_compile (pattern, size, syntax, bufp)
2126 || syntax & RE_CONTEXT_INDEP_ANCHORS 2157 || syntax & RE_CONTEXT_INDEP_ANCHORS
2127 /* Otherwise, depends on what's come before. */ 2158 /* Otherwise, depends on what's come before. */
2128 || at_begline_loc_p (pattern, p, syntax)) 2159 || at_begline_loc_p (pattern, p, syntax))
2129 BUF_PUSH (begline); 2160 BUF_PUSH ((syntax & RE_NO_NEWLINE_ANCHOR) ? begbuf : begline);
2130 else 2161 else
2131 goto normal_char; 2162 goto normal_char;
2132 } 2163 }
@@ -2141,7 +2172,7 @@ regex_compile (pattern, size, syntax, bufp)
2141 || syntax & RE_CONTEXT_INDEP_ANCHORS 2172 || syntax & RE_CONTEXT_INDEP_ANCHORS
2142 /* Otherwise, depends on what's next. */ 2173 /* Otherwise, depends on what's next. */
2143 || at_endline_loc_p (p, pend, syntax)) 2174 || at_endline_loc_p (p, pend, syntax))
2144 BUF_PUSH (endline); 2175 BUF_PUSH ((syntax & RE_NO_NEWLINE_ANCHOR) ? endbuf : endline);
2145 else 2176 else
2146 goto normal_char; 2177 goto normal_char;
2147 } 2178 }
@@ -3399,7 +3430,6 @@ analyse_first (p, pend, fastmap, multibyte)
3399 so that `p' is monotonically increasing. More to the point, we 3430 so that `p' is monotonically increasing. More to the point, we
3400 never set `p' (or push) anything `<= p1'. */ 3431 never set `p' (or push) anything `<= p1'. */
3401 3432
3402 /* If can_be_null is set, then the fastmap will not be used anyway. */
3403 while (1) 3433 while (1)
3404 { 3434 {
3405 /* `p1' is used as a marker of how far back a `on_failure_jump' 3435 /* `p1' is used as a marker of how far back a `on_failure_jump'
@@ -3689,9 +3719,9 @@ re_compile_fastmap (bufp)
3689 3719
3690 analysis = analyse_first (bufp->buffer, bufp->buffer + bufp->used, 3720 analysis = analyse_first (bufp->buffer, bufp->buffer + bufp->used,
3691 fastmap, RE_MULTIBYTE_P (bufp)); 3721 fastmap, RE_MULTIBYTE_P (bufp));
3722 bufp->can_be_null = (analysis != 0);
3692 if (analysis < -1) 3723 if (analysis < -1)
3693 return analysis; 3724 return analysis;
3694 bufp->can_be_null = (analysis != 0);
3695 return 0; 3725 return 0;
3696} /* re_compile_fastmap */ 3726} /* re_compile_fastmap */
3697 3727
@@ -3729,6 +3759,7 @@ re_set_registers (bufp, regs, num_regs, starts, ends)
3729 regs->start = regs->end = (regoff_t *) 0; 3759 regs->start = regs->end = (regoff_t *) 0;
3730 } 3760 }
3731} 3761}
3762WEAK_ALIAS (__re_set_registers, re_set_registers)
3732 3763
3733/* Searching routines. */ 3764/* Searching routines. */
3734 3765
@@ -3745,6 +3776,7 @@ re_search (bufp, string, size, startpos, range, regs)
3745 return re_search_2 (bufp, NULL, 0, string, size, startpos, range, 3776 return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
3746 regs, size); 3777 regs, size);
3747} 3778}
3779WEAK_ALIAS (__re_search, re_search)
3748 3780
3749/* End address of virtual concatenation of string. */ 3781/* End address of virtual concatenation of string. */
3750#define STOP_ADDR_VSTRING(P) \ 3782#define STOP_ADDR_VSTRING(P) \
@@ -3792,7 +3824,7 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
3792 register RE_TRANSLATE_TYPE translate = bufp->translate; 3824 register RE_TRANSLATE_TYPE translate = bufp->translate;
3793 int total_size = size1 + size2; 3825 int total_size = size1 + size2;
3794 int endpos = startpos + range; 3826 int endpos = startpos + range;
3795 int anchored_start = 0; 3827 boolean anchored_start;
3796 3828
3797 /* Nonzero if we have to concern multibyte character. */ 3829 /* Nonzero if we have to concern multibyte character. */
3798 const boolean multibyte = RE_MULTIBYTE_P (bufp); 3830 const boolean multibyte = RE_MULTIBYTE_P (bufp);
@@ -3836,8 +3868,7 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
3836 return -2; 3868 return -2;
3837 3869
3838 /* See whether the pattern is anchored. */ 3870 /* See whether the pattern is anchored. */
3839 if (bufp->buffer[0] == begline) 3871 anchored_start = (bufp->buffer[0] == begline);
3840 anchored_start = 1;
3841 3872
3842#ifdef emacs 3873#ifdef emacs
3843 gl_state.object = re_match_object; 3874 gl_state.object = re_match_object;
@@ -3857,10 +3888,9 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
3857 because that case doesn't repeat. */ 3888 because that case doesn't repeat. */
3858 if (anchored_start && startpos > 0) 3889 if (anchored_start && startpos > 0)
3859 { 3890 {
3860 if (! (bufp->newline_anchor 3891 if (! ((startpos <= size1 ? string1[startpos - 1]
3861 && ((startpos <= size1 ? string1[startpos - 1] 3892 : string2[startpos - size1 - 1])
3862 : string2[startpos - size1 - 1]) 3893 == '\n'))
3863 == '\n')))
3864 goto advance; 3894 goto advance;
3865 } 3895 }
3866 3896
@@ -4009,6 +4039,7 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
4009 } 4039 }
4010 return -1; 4040 return -1;
4011} /* re_search_2 */ 4041} /* re_search_2 */
4042WEAK_ALIAS (__re_search_2, re_search_2)
4012 4043
4013/* Declarations and macros for re_match_2. */ 4044/* Declarations and macros for re_match_2. */
4014 4045
@@ -4213,9 +4244,6 @@ mutually_exclusive_p (bufp, p1, p2)
4213 break; 4244 break;
4214 4245
4215 case endline: 4246 case endline:
4216 if (!bufp->newline_anchor)
4217 break;
4218 /* Fallthrough */
4219 case exactn: 4247 case exactn:
4220 { 4248 {
4221 register unsigned int c 4249 register unsigned int c
@@ -4377,6 +4405,7 @@ re_match (bufp, string, size, pos, regs)
4377# endif 4405# endif
4378 return result; 4406 return result;
4379} 4407}
4408WEAK_ALIAS (__re_match, re_match)
4380#endif /* not emacs */ 4409#endif /* not emacs */
4381 4410
4382#ifdef emacs 4411#ifdef emacs
@@ -4424,6 +4453,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
4424#endif 4453#endif
4425 return result; 4454 return result;
4426} 4455}
4456WEAK_ALIAS (__re_match_2, re_match_2)
4427 4457
4428/* This is a separate function so that we can force an alloca cleanup 4458/* This is a separate function so that we can force an alloca cleanup
4429 afterwards. */ 4459 afterwards. */
@@ -5089,8 +5119,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5089 5119
5090 5120
5091 /* begline matches the empty string at the beginning of the string 5121 /* begline matches the empty string at the beginning of the string
5092 (unless `not_bol' is set in `bufp'), and, if 5122 (unless `not_bol' is set in `bufp'), and after newlines. */
5093 `newline_anchor' is set, after newlines. */
5094 case begline: 5123 case begline:
5095 DEBUG_PRINT1 ("EXECUTING begline.\n"); 5124 DEBUG_PRINT1 ("EXECUTING begline.\n");
5096 5125
@@ -5102,7 +5131,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5102 { 5131 {
5103 unsigned char c; 5132 unsigned char c;
5104 GET_CHAR_BEFORE_2 (c, d, string1, end1, string2, end2); 5133 GET_CHAR_BEFORE_2 (c, d, string1, end1, string2, end2);
5105 if (c == '\n' && bufp->newline_anchor) 5134 if (c == '\n')
5106 break; 5135 break;
5107 } 5136 }
5108 /* In all other cases, we fail. */ 5137 /* In all other cases, we fail. */
@@ -5120,7 +5149,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5120 else 5149 else
5121 { 5150 {
5122 PREFETCH_NOLIMIT (); 5151 PREFETCH_NOLIMIT ();
5123 if (*d == '\n' && bufp->newline_anchor) 5152 if (*d == '\n')
5124 break; 5153 break;
5125 } 5154 }
5126 goto fail; 5155 goto fail;
@@ -5645,15 +5674,13 @@ re_compile_pattern (pattern, length, bufp)
5645 setting no_sub. */ 5674 setting no_sub. */
5646 bufp->no_sub = 0; 5675 bufp->no_sub = 0;
5647 5676
5648 /* Match anchors at newline. */
5649 bufp->newline_anchor = 1;
5650
5651 ret = regex_compile ((re_char*) pattern, length, re_syntax_options, bufp); 5677 ret = regex_compile ((re_char*) pattern, length, re_syntax_options, bufp);
5652 5678
5653 if (!ret) 5679 if (!ret)
5654 return NULL; 5680 return NULL;
5655 return gettext (re_error_msgid[(int) ret]); 5681 return gettext (re_error_msgid[(int) ret]);
5656} 5682}
5683WEAK_ALIAS (__re_compile_pattern, re_compile_pattern)
5657 5684
5658/* Entry points compatible with 4.2 BSD regex library. We don't define 5685/* Entry points compatible with 4.2 BSD regex library. We don't define
5659 them unless specifically requested. */ 5686 them unless specifically requested. */
@@ -5700,9 +5727,6 @@ re_comp (s)
5700 /* Since `re_exec' always passes NULL for the `regs' argument, we 5727 /* Since `re_exec' always passes NULL for the `regs' argument, we
5701 don't need to initialize the pattern buffer fields which affect it. */ 5728 don't need to initialize the pattern buffer fields which affect it. */
5702 5729
5703 /* Match anchors at newlines. */
5704 re_comp_buf.newline_anchor = 1;
5705
5706 ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf); 5730 ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
5707 5731
5708 if (!ret) 5732 if (!ret)
@@ -5740,8 +5764,8 @@ re_exec (s)
5740 `syntax' to RE_SYNTAX_POSIX_EXTENDED if the 5764 `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
5741 REG_EXTENDED bit in CFLAGS is set; otherwise, to 5765 REG_EXTENDED bit in CFLAGS is set; otherwise, to
5742 RE_SYNTAX_POSIX_BASIC; 5766 RE_SYNTAX_POSIX_BASIC;
5743 `newline_anchor' to REG_NEWLINE being set in CFLAGS; 5767 `fastmap' to an allocated space for the fastmap;
5744 `fastmap' and `fastmap_accurate' to zero; 5768 `fastmap_accurate' to zero;
5745 `re_nsub' to the number of subexpressions in PATTERN. 5769 `re_nsub' to the number of subexpressions in PATTERN.
5746 5770
5747 PATTERN is the address of the pattern string. 5771 PATTERN is the address of the pattern string.
@@ -5780,11 +5804,8 @@ regcomp (preg, pattern, cflags)
5780 preg->allocated = 0; 5804 preg->allocated = 0;
5781 preg->used = 0; 5805 preg->used = 0;
5782 5806
5783 /* Don't bother to use a fastmap when searching. This simplifies the 5807 /* Try to allocate space for the fastmap. */
5784 REG_NEWLINE case: if we used a fastmap, we'd have to put all the 5808 preg->fastmap = (char *) malloc (1 << BYTEWIDTH);
5785 characters after newlines into the fastmap. This way, we just try
5786 every character. */
5787 preg->fastmap = 0;
5788 5809
5789 if (cflags & REG_ICASE) 5810 if (cflags & REG_ICASE)
5790 { 5811 {
@@ -5808,11 +5829,9 @@ regcomp (preg, pattern, cflags)
5808 { /* REG_NEWLINE implies neither . nor [^...] match newline. */ 5829 { /* REG_NEWLINE implies neither . nor [^...] match newline. */
5809 syntax &= ~RE_DOT_NEWLINE; 5830 syntax &= ~RE_DOT_NEWLINE;
5810 syntax |= RE_HAT_LISTS_NOT_NEWLINE; 5831 syntax |= RE_HAT_LISTS_NOT_NEWLINE;
5811 /* It also changes the matching behavior. */
5812 preg->newline_anchor = 1;
5813 } 5832 }
5814 else 5833 else
5815 preg->newline_anchor = 0; 5834 syntax |= RE_NO_NEWLINE_ANCHOR;
5816 5835
5817 preg->no_sub = !!(cflags & REG_NOSUB); 5836 preg->no_sub = !!(cflags & REG_NOSUB);
5818 5837
@@ -5822,10 +5841,22 @@ regcomp (preg, pattern, cflags)
5822 5841
5823 /* POSIX doesn't distinguish between an unmatched open-group and an 5842 /* POSIX doesn't distinguish between an unmatched open-group and an
5824 unmatched close-group: both are REG_EPAREN. */ 5843 unmatched close-group: both are REG_EPAREN. */
5825 if (ret == REG_ERPAREN) ret = REG_EPAREN; 5844 if (ret == REG_ERPAREN)
5826 5845 ret = REG_EPAREN;
5846
5847 if (ret == REG_NOERROR && preg->fastmap)
5848 { /* Compute the fastmap now, since regexec cannot modify the pattern
5849 buffer. */
5850 re_compile_fastmap (preg);
5851 if (preg->can_be_null)
5852 { /* The fastmap can't be used anyway. */
5853 free (preg->fastmap);
5854 preg->fastmap = NULL;
5855 }
5856 }
5827 return (int) ret; 5857 return (int) ret;
5828} 5858}
5859WEAK_ALIAS (__regcomp, regcomp)
5829 5860
5830 5861
5831/* regexec searches for a given pattern, specified by PREG, in the 5862/* regexec searches for a given pattern, specified by PREG, in the
@@ -5854,7 +5885,7 @@ regexec (preg, string, nmatch, pmatch, eflags)
5854 struct re_registers regs; 5885 struct re_registers regs;
5855 regex_t private_preg; 5886 regex_t private_preg;
5856 int len = strlen (string); 5887 int len = strlen (string);
5857 boolean want_reg_info = !preg->no_sub && nmatch > 0; 5888 boolean want_reg_info = !preg->no_sub && nmatch > 0 && pmatch;
5858 5889
5859 private_preg = *preg; 5890 private_preg = *preg;
5860 5891
@@ -5875,6 +5906,15 @@ regexec (preg, string, nmatch, pmatch, eflags)
5875 regs.end = regs.start + nmatch; 5906 regs.end = regs.start + nmatch;
5876 } 5907 }
5877 5908
5909 /* Instead of using not_eol to implement REG_NOTEOL, we could simply
5910 pass (&private_preg, string, len + 1, 0, len, ...) pretending the string
5911 was a little bit longer but still only matching the real part.
5912 This works because the `endline' will check for a '\n' and will find a
5913 '\0', correctly deciding that this is not the end of a line.
5914 But it doesn't work out so nicely for REG_NOTBOL, since we don't have
5915 a convenient '\0' there. For all we know, the string could be preceded
5916 by '\n' which would throw things off. */
5917
5878 /* Perform the searching operation. */ 5918 /* Perform the searching operation. */
5879 ret = re_search (&private_preg, string, len, 5919 ret = re_search (&private_preg, string, len,
5880 /* start: */ 0, /* range: */ len, 5920 /* start: */ 0, /* range: */ len,
@@ -5901,6 +5941,7 @@ regexec (preg, string, nmatch, pmatch, eflags)
5901 /* We want zero return to mean success, unlike `re_search'. */ 5941 /* We want zero return to mean success, unlike `re_search'. */
5902 return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH; 5942 return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
5903} 5943}
5944WEAK_ALIAS (__regexec, regexec)
5904 5945
5905 5946
5906/* Returns a message corresponding to an error code, ERRCODE, returned 5947/* Returns a message corresponding to an error code, ERRCODE, returned
@@ -5941,6 +5982,7 @@ regerror (errcode, preg, errbuf, errbuf_size)
5941 5982
5942 return msg_size; 5983 return msg_size;
5943} 5984}
5985WEAK_ALIAS (__regerror, regerror)
5944 5986
5945 5987
5946/* Free dynamically allocated space used by PREG. */ 5988/* Free dynamically allocated space used by PREG. */
@@ -5965,5 +6007,6 @@ regfree (preg)
5965 free (preg->translate); 6007 free (preg->translate);
5966 preg->translate = NULL; 6008 preg->translate = NULL;
5967} 6009}
6010WEAK_ALIAS (__regfree, regfree)
5968 6011
5969#endif /* not emacs */ 6012#endif /* not emacs */
diff --git a/src/regex.h b/src/regex.h
index 46f2a633c3a..ef4284cdce2 100644
--- a/src/regex.h
+++ b/src/regex.h
@@ -150,6 +150,17 @@ typedef unsigned long int reg_syntax_t;
150/* If this bit is set, then (?:...) is treated as a shy group. */ 150/* If this bit is set, then (?:...) is treated as a shy group. */
151#define RE_SHY_GROUPS (RE_FRUGAL << 1) 151#define RE_SHY_GROUPS (RE_FRUGAL << 1)
152 152
153/* If this bit is set, ^ and $ only match at beg/end of buffer. */
154#define RE_NO_NEWLINE_ANCHOR (RE_SHY_GROUPS << 1)
155
156/* If this bit is set, turn on internal regex debugging.
157 If not set, and debugging was on, turn it off.
158 This only works if regex.c is compiled -DDEBUG.
159 We define this bit always, so that all that's needed to turn on
160 debugging is to recompile regex.c; the calling code can always have
161 this bit set, and it won't affect anything in the normal case. */
162#define RE_DEBUG (RE_NO_NEWLINE_ANCHOR << 1)
163
153/* This global variable defines the particular regexp syntax to use (for 164/* This global variable defines the particular regexp syntax to use (for
154 some interfaces). When a regexp is compiled, the syntax used is 165 some interfaces). When a regexp is compiled, the syntax used is
155 stored in the pattern buffer, so changing this does not affect 166 stored in the pattern buffer, so changing this does not affect
@@ -379,9 +390,6 @@ struct re_pattern_buffer
379 /* Similarly for an end-of-line anchor. */ 390 /* Similarly for an end-of-line anchor. */
380 unsigned not_eol : 1; 391 unsigned not_eol : 1;
381 392
382 /* If true, an anchor at a newline matches. */
383 unsigned newline_anchor : 1;
384
385#ifdef emacs 393#ifdef emacs
386 /* If true, multi-byte form in the `buffer' should be recognized as a 394 /* If true, multi-byte form in the `buffer' should be recognized as a
387 multibyte character. */ 395 multibyte character. */