aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Eggert2018-08-05 18:41:20 -0700
committerPaul Eggert2018-08-05 19:36:09 -0700
commit3a6abe65c1324361bf0efcb65df61d22a39cfaaf (patch)
tree90ecb27f9ecbb8a0f8d9b24cf67a809b52b0b32d
parentd904cc83f3036db96107a3976cee1a0112547de6 (diff)
downloademacs-3a6abe65c1324361bf0efcb65df61d22a39cfaaf.tar.gz
emacs-3a6abe65c1324361bf0efcb65df61d22a39cfaaf.zip
Simplify regex-emacs code by assuming Emacs
* src/regex-emacs.c: Omit no-longer-needed AIX code. Don’t ignore GCC warnings. Include regex-emacs.h immediately after config.h, to test that it’s independent. Omit the "#ifndef emacs" and "#ifdef REGEX_MALLOC" and "#if WIDE_CHAR_SUPPORT" or "#ifdef _REGEX_RE_COMP", code, as we are no longer interested in compiling outside Emacs (with or without debugging or native wide char support) or in avoiding alloca. (REGEX_EMACS_DEBUG, regex_emacs_debug): Rename from DEBUG and debug, to avoid collision with other DEBUGS. All uses changed. In debugging output, change %ld and %zd to %zu when appropriate. No need to include stddef.h, stdlib.h, sys/types.h, wchar.h, wctype.h, locale/localeinfo.h, locale/elem-hash.h, langinfo.h, libintl.h, unistd.h, stdbool.h, string.h, stdio.h, assert.h. All uses of assert changed to eassert. (RE_DUP_MAX, reg_syntax_t, RE_BACKSLASH_ESCAPE_IN_LISTS) (RE_BK_PLUS_QM, RE_CHAR_CLASSES, RE_CONTEXT_INDEP_ANCHORS) (RE_CONTEXT_INDEP_OPS, RE_CONTEXT_INVALID_OPS, RE_DOT_NEWLINE) (RE_DOT_NOT_NULL, RE_HAT_LISTS_NOT_NEWLINE, RE_INTERVALS) (RE_LIMITED_OPS, RE_NEWLINE_ALT, RE_NO_BK_BRACES) (RE_NO_BK_PARENS, RE_NO_BK_REFS, RE_NO_BK_VBAR) (RE_NO_EMPTY_RANGES, RE_UNMATCHED_RIGHT_PAREN_ORD) (RE_NO_POSIX_BACKTRACKING, RE_NO_GNU_OPS, RE_FRUGAL) (RE_SHY_GROUPS, RE_NO_NEWLINE_ANCHOR, RE_SYNTAX_EMACS) (REG_NOERROR, REG_NOMATCH, REG_BADPAT, REG_ECOLLATE) (REG_ECTYPE, REG_EESCAPE, REG_ESUBREG, REG_EBRACK, REG_EPAREN) (REG_EBRACE, REG_BADBR, REG_ERANGE, REG_ESPACE, REG_BADRPT) (REG_EEND, REG_ESIZE, REG_ERPAREN, REG_ERANGEX, REG_ESIZEBR) (reg_errcode_t, REGS_UNALLOCATED, REGS_REALLOCATE, REGS_FIXED) (RE_NREGS, RE_TRANSLATE, RE_TRANSLATE_P): Move here from regex-emacs.h. (RE_NREGS): Define unconditionally. (boolean): Remove. All uses replaced by bool. (WIDE_CHAR_SUPPORT, regfree, regexec, regcomp, regerror): (re_set_syntax, re_syntax_options, WEAK_ALIAS, gettext, gettext_noop): Remove. All uses removed. (malloc, realloc, free): Do not redefine. Adjust all callers to use xmalloc, xrealloc, xfree instead. (re_error_msgid): Use C99 to avoid need to keep in same order as reg_error_t. (REGEX_USE_SAFE_ALLOCA): Simplify by using USE_SAFE_ALLOCA. (REGEX_ALLOCATE, REGEX_REALLOCATE, REGEX_FREE, REGEX_ALLOCATE_STACK) (REGEX_REALLOCATE_STACK, REGEX_FREE_STACK): Remove. All callers changed to use the non-REGEX_MALLOC version. (REGEX_TALLOC): Remove. All callers changed to use SAFE_ALLOCA. (re_set_syntax): Remove; unused. (MATCH_MAY_ALLOCATE): Remove; now always true. All uses simplified. (INIT_FAILURE_ALLOC): Define unconditionally. (re_compile_fastmap): Now static. (re_compile_pattern): Avoid unnecessary cast. * src/regex-emacs.h (EMACS_REGEX_H): Renamed from _REGEX_H to avoid possible collision with glibc. Don’t include sys/types.h. All uses of ssize_t changed to ptrdiff_t. Don’t worry about C++ or VMS. Assume emacs is defined and that _REGEX_RE_COMP and WIDE_CHAR_SUPPORT are not. Define struct re_registers before including lisp.h. (REG_ENOSYS, RE_TRANSLATE_TYPE): Remove; all uses replaced by Lisp_Object. (regoff_t): Remove. All uses replaced with ptrdiff_t. (re_match, regcomp, regexec, regerror, regfree): Remove decl of nonexistent functions. (RE_DEBUG, RE_SYNTAX_AWK, RE_SYNTAX_GNU_AWK) (RE_SYNTAX_POSIX_AWK, RE_SYNTAX_GREP, RE_SYNTAX_EGREP) (RE_SYNTAX_POSIX_EGREP, RE_SYNTAX_ED, RE_SYNTAX_SED) (_RE_SYNTAX_POSIX_COMMON, RE_SYNTAX_POSIX_BASIC) (RE_SYNTAX_POSIX_MINIMAL_BASIC, RE_SYNTAX_POSIX_EXTENDED) (RE_SYNTAX_POSIX_MINIMAL_EXTENDED, REG_EXTENDED, REG_ICASE) (REG_NEWLINE, REG_NOSUB, REG_NOTBOL, REG_NOTEOL, regmatch_t): Remove; unused. * src/search.c (Fset_match_data): Simplify range test now that we know it’s ptrdiff_t.
-rw-r--r--src/regex-emacs.c2013
-rw-r--r--src/regex-emacs.h543
-rw-r--r--src/search.c21
-rw-r--r--src/thread.h4
4 files changed, 500 insertions, 2081 deletions
diff --git a/src/regex-emacs.c b/src/regex-emacs.c
index 08fc8c67f1c..eb5970ffcf1 100644
--- a/src/regex-emacs.c
+++ b/src/regex-emacs.c
@@ -21,159 +21,187 @@
21 - structure the opcode space into opcode+flag. 21 - structure the opcode space into opcode+flag.
22 - merge with glibc's regex.[ch]. 22 - merge with glibc's regex.[ch].
23 - replace (succeed_n + jump_n + set_number_at) with something that doesn't 23 - replace (succeed_n + jump_n + set_number_at) with something that doesn't
24 need to modify the compiled regexp so that re_match can be reentrant. 24 need to modify the compiled regexp so that re_search can be reentrant.
25 - get rid of on_failure_jump_smart by doing the optimization in re_comp 25 - get rid of on_failure_jump_smart by doing the optimization in re_comp
26 rather than at run-time, so that re_match can be reentrant. 26 rather than at run-time, so that re_search can be reentrant.
27*/ 27*/
28 28
29/* AIX requires this to be the first thing in the file. */
30#if defined _AIX && !defined REGEX_MALLOC
31 #pragma alloca
32#endif
33
34/* Ignore some GCC warnings for now. This section should go away
35 once the Emacs and Gnulib regex code is merged. */
36#if 4 < __GNUC__ + (5 <= __GNUC_MINOR__) || defined __clang__
37# pragma GCC diagnostic ignored "-Wstrict-overflow"
38# ifndef emacs
39# pragma GCC diagnostic ignored "-Wunused-function"
40# pragma GCC diagnostic ignored "-Wunused-macros"
41# pragma GCC diagnostic ignored "-Wunused-result"
42# pragma GCC diagnostic ignored "-Wunused-variable"
43# endif
44#endif
45
46#if 4 < __GNUC__ + (6 <= __GNUC_MINOR__) && ! defined __clang__
47# pragma GCC diagnostic ignored "-Wunused-but-set-variable"
48#endif
49
50#include <config.h> 29#include <config.h>
51 30
52#include <stddef.h> 31/* Get the interface, including the syntax bits. */
53#include <stdlib.h> 32#include "regex-emacs.h"
54
55#ifdef emacs
56/* We need this for `regex-emacs.h', and perhaps for the Emacs include
57 files. */
58# include <sys/types.h>
59#endif
60
61/* Whether to use ISO C Amendment 1 wide char functions.
62 Those should not be used for Emacs since it uses its own. */
63#if defined _LIBC
64#define WIDE_CHAR_SUPPORT 1
65#else
66#define WIDE_CHAR_SUPPORT \
67 (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC && !emacs)
68#endif
69 33
70/* For platform which support the ISO C amendment 1 functionality we 34#include <stdlib.h>
71 support user defined character classes. */
72#if WIDE_CHAR_SUPPORT
73/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */
74# include <wchar.h>
75# include <wctype.h>
76#endif
77 35
78#ifdef _LIBC 36#include "character.h"
79/* We have to keep the namespace clean. */ 37#include "buffer.h"
80# define regfree(preg) __regfree (preg)
81# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
82# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
83# define regerror(err_code, preg, errbuf, errbuf_size) \
84 __regerror (err_code, preg, errbuf, errbuf_size)
85# define re_set_registers(bu, re, nu, st, en) \
86 __re_set_registers (bu, re, nu, st, en)
87# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
88 __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
89# define re_match(bufp, string, size, pos, regs) \
90 __re_match (bufp, string, size, pos, regs)
91# define re_search(bufp, string, size, startpos, range, regs) \
92 __re_search (bufp, string, size, startpos, range, regs)
93# define re_compile_pattern(pattern, length, bufp) \
94 __re_compile_pattern (pattern, length, bufp)
95# define re_set_syntax(syntax) __re_set_syntax (syntax)
96# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
97 __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
98# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
99
100/* Make sure we call libc's function even if the user overrides them. */
101# define btowc __btowc
102# define iswctype __iswctype
103# define wctype __wctype
104
105# define WEAK_ALIAS(a,b) weak_alias (a, b)
106
107/* We are also using some library internals. */
108# include <locale/localeinfo.h>
109# include <locale/elem-hash.h>
110# include <langinfo.h>
111#else
112# define WEAK_ALIAS(a,b)
113#endif
114 38
115/* This is for other GNU distributions with internationalized messages. */ 39#include "syntax.h"
116#if HAVE_LIBINTL_H || defined _LIBC 40#include "category.h"
117# include <libintl.h>
118#else
119# define gettext(msgid) (msgid)
120#endif
121 41
122#ifndef gettext_noop 42/* Maximum number of duplicates an interval can allow. Some systems
123/* This define is so xgettext can find the internationalizable 43 define this in other header files, but we want our
124 strings. */ 44 value, so remove any previous define. */
125# define gettext_noop(String) String 45#ifdef RE_DUP_MAX
46# undef RE_DUP_MAX
126#endif 47#endif
127 48/* Repeat counts are stored in opcodes as 2 byte integers. This was
128/* The `emacs' switch turns on certain matching commands 49 previously limited to 7fff because the parsing code uses signed
129 that make sense only in Emacs. */ 50 ints. But Emacs only runs on 32 bit platforms anyway. */
130#ifdef emacs 51#define RE_DUP_MAX (0xffff)
131 52
132# include "lisp.h" 53/* The following bits are used to determine the regexp syntax we
133# include "character.h" 54 recognize. The set/not-set meanings where historically chosen so
134# include "buffer.h" 55 that Emacs syntax had the value 0.
135 56 The bits are given in alphabetical order, and
136# include "syntax.h" 57 the definitions shifted by one from the previous bit; thus, when we
137# include "category.h" 58 add or remove a bit, only one other definition need change. */
59typedef unsigned long reg_syntax_t;
60
61/* If this bit is not set, then \ inside a bracket expression is literal.
62 If set, then such a \ quotes the following character. */
63#define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1)
64
65/* If this bit is not set, then + and ? are operators, and \+ and \? are
66 literals.
67 If set, then \+ and \? are operators and + and ? are literals. */
68#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
69
70/* If this bit is set, then character classes are supported. They are:
71 [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
72 [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
73 If not set, then character classes are not supported. */
74#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
75
76/* If this bit is set, then ^ and $ are always anchors (outside bracket
77 expressions, of course).
78 If this bit is not set, then it depends:
79 ^ is an anchor if it is at the beginning of a regular
80 expression or after an open-group or an alternation operator;
81 $ is an anchor if it is at the end of a regular expression, or
82 before a close-group or an alternation operator.
83
84 This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
85 POSIX draft 11.2 says that * etc. in leading positions is undefined.
86 We already implemented a previous draft which made those constructs
87 invalid, though, so we haven't changed the code back. */
88#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
89
90/* If this bit is set, then special characters are always special
91 regardless of where they are in the pattern.
92 If this bit is not set, then special characters are special only in
93 some contexts; otherwise they are ordinary. Specifically,
94 * + ? and intervals are only special when not after the beginning,
95 open-group, or alternation operator. */
96#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
97
98/* If this bit is set, then *, +, ?, and { cannot be first in an re or
99 immediately after an alternation or begin-group operator. */
100#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
101
102/* If this bit is set, then . matches newline.
103 If not set, then it doesn't. */
104#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
105
106/* If this bit is set, then . doesn't match NUL.
107 If not set, then it does. */
108#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
109
110/* If this bit is set, nonmatching lists [^...] do not match newline.
111 If not set, they do. */
112#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
113
114/* If this bit is set, either \{...\} or {...} defines an
115 interval, depending on RE_NO_BK_BRACES.
116 If not set, \{, \}, {, and } are literals. */
117#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
118
119/* If this bit is set, +, ? and | aren't recognized as operators.
120 If not set, they are. */
121#define RE_LIMITED_OPS (RE_INTERVALS << 1)
122
123/* If this bit is set, newline is an alternation operator.
124 If not set, newline is literal. */
125#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
126
127/* If this bit is set, then `{...}' defines an interval, and \{ and \}
128 are literals.
129 If not set, then `\{...\}' defines an interval. */
130#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
131
132/* If this bit is set, (...) defines a group, and \( and \) are literals.
133 If not set, \(...\) defines a group, and ( and ) are literals. */
134#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
135
136/* If this bit is set, then \<digit> matches <digit>.
137 If not set, then \<digit> is a back-reference. */
138#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
139
140/* If this bit is set, then | is an alternation operator, and \| is literal.
141 If not set, then \| is an alternation operator, and | is literal. */
142#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
143
144/* If this bit is set, then an ending range point collating higher
145 than the starting range point, as in [z-a], is invalid.
146 If not set, then when ending range point collates higher than the
147 starting range point, the range is ignored. */
148#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
149
150/* If this bit is set, then an unmatched ) is ordinary.
151 If not set, then an unmatched ) is invalid. */
152#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
153
154/* If this bit is set, succeed as soon as we match the whole pattern,
155 without further backtracking. */
156#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
157
158/* If this bit is set, do not process the GNU regex operators.
159 If not set, then the GNU regex operators are recognized. */
160#define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1)
161
162/* If this bit is set, then *?, +? and ?? match non greedily. */
163#define RE_FRUGAL (RE_NO_GNU_OPS << 1)
164
165/* If this bit is set, then (?:...) is treated as a shy group. */
166#define RE_SHY_GROUPS (RE_FRUGAL << 1)
167
168/* If this bit is set, ^ and $ only match at beg/end of buffer. */
169#define RE_NO_NEWLINE_ANCHOR (RE_SHY_GROUPS << 1)
170
171/* This global variable defines the particular regexp syntax to use (for
172 some interfaces). When a regexp is compiled, the syntax used is
173 stored in the pattern buffer, so changing this does not affect
174 already-compiled regexps. */
175/* extern reg_syntax_t re_syntax_options; */
176/* Define combinations of the above bits for the standard possibilities. */
177#define RE_SYNTAX_EMACS \
178 (RE_CHAR_CLASSES | RE_INTERVALS | RE_SHY_GROUPS | RE_FRUGAL)
138 179
139/* Make syntax table lookup grant data in gl_state. */ 180/* Make syntax table lookup grant data in gl_state. */
140# define SYNTAX(c) syntax_property (c, 1) 181#define SYNTAX(c) syntax_property (c, 1)
141
142# ifdef malloc
143# undef malloc
144# endif
145# define malloc xmalloc
146# ifdef realloc
147# undef realloc
148# endif
149# define realloc xrealloc
150# ifdef free
151# undef free
152# endif
153# define free xfree
154 182
155/* Converts the pointer to the char to BEG-based offset from the start. */ 183/* Converts the pointer to the char to BEG-based offset from the start. */
156# define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d)) 184#define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d))
157/* Strings are 0-indexed, buffers are 1-indexed; we pun on the boolean 185/* Strings are 0-indexed, buffers are 1-indexed; we pun on the boolean
158 result to get the right base index. */ 186 result to get the right base index. */
159# define POS_AS_IN_BUFFER(p) \ 187#define POS_AS_IN_BUFFER(p) \
160 ((p) + (NILP (gl_state.object) || BUFFERP (gl_state.object))) 188 ((p) + (NILP (gl_state.object) || BUFFERP (gl_state.object)))
161 189
162# define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte) 190#define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte)
163# define RE_TARGET_MULTIBYTE_P(bufp) ((bufp)->target_multibyte) 191#define RE_TARGET_MULTIBYTE_P(bufp) ((bufp)->target_multibyte)
164# define RE_STRING_CHAR(p, multibyte) \ 192#define RE_STRING_CHAR(p, multibyte) \
165 (multibyte ? (STRING_CHAR (p)) : (*(p))) 193 (multibyte ? (STRING_CHAR (p)) : (*(p)))
166# define RE_STRING_CHAR_AND_LENGTH(p, len, multibyte) \ 194#define RE_STRING_CHAR_AND_LENGTH(p, len, multibyte) \
167 (multibyte ? (STRING_CHAR_AND_LENGTH (p, len)) : ((len) = 1, *(p))) 195 (multibyte ? (STRING_CHAR_AND_LENGTH (p, len)) : ((len) = 1, *(p)))
168 196
169# define RE_CHAR_TO_MULTIBYTE(c) UNIBYTE_TO_CHAR (c) 197#define RE_CHAR_TO_MULTIBYTE(c) UNIBYTE_TO_CHAR (c)
170 198
171# define RE_CHAR_TO_UNIBYTE(c) CHAR_TO_BYTE_SAFE (c) 199#define RE_CHAR_TO_UNIBYTE(c) CHAR_TO_BYTE_SAFE (c)
172 200
173/* Set C a (possibly converted to multibyte) character before P. P 201/* Set C a (possibly converted to multibyte) character before P. P
174 points into a string which is the virtual concatenation of STR1 202 points into a string which is the virtual concatenation of STR1
175 (which ends at END1) or STR2 (which ends at END2). */ 203 (which ends at END1) or STR2 (which ends at END2). */
176# define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \ 204#define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
177 do { \ 205 do { \
178 if (target_multibyte) \ 206 if (target_multibyte) \
179 { \ 207 { \
@@ -191,7 +219,7 @@
191 219
192/* Set C a (possibly converted to multibyte) character at P, and set 220/* Set C a (possibly converted to multibyte) character at P, and set
193 LEN to the byte length of that character. */ 221 LEN to the byte length of that character. */
194# define GET_CHAR_AFTER(c, p, len) \ 222#define GET_CHAR_AFTER(c, p, len) \
195 do { \ 223 do { \
196 if (target_multibyte) \ 224 if (target_multibyte) \
197 (c) = STRING_CHAR_AND_LENGTH (p, len); \ 225 (c) = STRING_CHAR_AND_LENGTH (p, len); \
@@ -202,235 +230,66 @@
202 (c) = RE_CHAR_TO_MULTIBYTE (c); \ 230 (c) = RE_CHAR_TO_MULTIBYTE (c); \
203 } \ 231 } \
204 } while (0) 232 } while (0)
205
206#else /* not emacs */
207
208/* If we are not linking with Emacs proper,
209 we can't use the relocating allocator
210 even if config.h says that we can. */
211# undef REL_ALLOC
212
213# include <unistd.h>
214
215/* When used in Emacs's lib-src, we need xmalloc and xrealloc. */
216
217static ATTRIBUTE_MALLOC void *
218xmalloc (size_t size)
219{
220 void *val = malloc (size);
221 if (!val && size)
222 {
223 write (STDERR_FILENO, "virtual memory exhausted\n", 25);
224 exit (1);
225 }
226 return val;
227}
228
229static void *
230xrealloc (void *block, size_t size)
231{
232 void *val;
233 /* We must call malloc explicitly when BLOCK is 0, since some
234 reallocs don't do this. */
235 if (! block)
236 val = malloc (size);
237 else
238 val = realloc (block, size);
239 if (!val && size)
240 {
241 write (STDERR_FILENO, "virtual memory exhausted\n", 25);
242 exit (1);
243 }
244 return val;
245}
246
247# ifdef malloc
248# undef malloc
249# endif
250# define malloc xmalloc
251# ifdef realloc
252# undef realloc
253# endif
254# define realloc xrealloc
255
256# include <stdbool.h>
257# include <string.h>
258
259/* Define the syntax stuff for \<, \>, etc. */
260
261/* Sword must be nonzero for the wordchar pattern commands in re_match_2. */
262enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 };
263
264/* Dummy macros for non-Emacs environments. */
265# define MAX_MULTIBYTE_LENGTH 1
266# define RE_MULTIBYTE_P(x) 0
267# define RE_TARGET_MULTIBYTE_P(x) 0
268# define WORD_BOUNDARY_P(c1, c2) (0)
269# define BYTES_BY_CHAR_HEAD(p) (1)
270# define PREV_CHAR_BOUNDARY(p, limit) ((p)--)
271# define STRING_CHAR(p) (*(p))
272# define RE_STRING_CHAR(p, multibyte) STRING_CHAR (p)
273# define CHAR_STRING(c, s) (*(s) = (c), 1)
274# define STRING_CHAR_AND_LENGTH(p, actual_len) ((actual_len) = 1, *(p))
275# define RE_STRING_CHAR_AND_LENGTH(p, len, multibyte) STRING_CHAR_AND_LENGTH (p, len)
276# define RE_CHAR_TO_MULTIBYTE(c) (c)
277# define RE_CHAR_TO_UNIBYTE(c) (c)
278# define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
279 (c = ((p) == (str2) ? *((end1) - 1) : *((p) - 1)))
280# define GET_CHAR_AFTER(c, p, len) \
281 (c = *p, len = 1)
282# define CHAR_BYTE8_P(c) (0)
283# define CHAR_LEADING_CODE(c) (c)
284
285#endif /* not emacs */
286
287#ifndef RE_TRANSLATE
288# define RE_TRANSLATE(TBL, C) ((unsigned char)(TBL)[C])
289# define RE_TRANSLATE_P(TBL) (TBL)
290#endif
291 233
292/* Get the interface, including the syntax bits. */
293#include "regex-emacs.h"
294
295/* isalpha etc. are used for the character classes. */ 234/* isalpha etc. are used for the character classes. */
296#include <ctype.h> 235#include <ctype.h>
297 236
298#ifdef emacs
299
300/* 1 if C is an ASCII character. */ 237/* 1 if C is an ASCII character. */
301# define IS_REAL_ASCII(c) ((c) < 0200) 238#define IS_REAL_ASCII(c) ((c) < 0200)
302 239
303/* 1 if C is a unibyte character. */ 240/* 1 if C is a unibyte character. */
304# define ISUNIBYTE(c) (SINGLE_BYTE_CHAR_P ((c))) 241#define ISUNIBYTE(c) (SINGLE_BYTE_CHAR_P ((c)))
305 242
306/* The Emacs definitions should not be directly affected by locales. */ 243/* The Emacs definitions should not be directly affected by locales. */
307 244
308/* In Emacs, these are only used for single-byte characters. */ 245/* In Emacs, these are only used for single-byte characters. */
309# define ISDIGIT(c) ((c) >= '0' && (c) <= '9') 246#define ISDIGIT(c) ((c) >= '0' && (c) <= '9')
310# define ISCNTRL(c) ((c) < ' ') 247#define ISCNTRL(c) ((c) < ' ')
311# define ISXDIGIT(c) (0 <= char_hexdigit (c)) 248#define ISXDIGIT(c) (0 <= char_hexdigit (c))
312 249
313/* The rest must handle multibyte characters. */ 250/* The rest must handle multibyte characters. */
314 251
315# define ISBLANK(c) (IS_REAL_ASCII (c) \ 252#define ISBLANK(c) (IS_REAL_ASCII (c) \
316 ? ((c) == ' ' || (c) == '\t') \ 253 ? ((c) == ' ' || (c) == '\t') \
317 : blankp (c)) 254 : blankp (c))
318 255
319# define ISGRAPH(c) (SINGLE_BYTE_CHAR_P (c) \ 256#define ISGRAPH(c) (SINGLE_BYTE_CHAR_P (c) \
320 ? (c) > ' ' && !((c) >= 0177 && (c) <= 0240) \ 257 ? (c) > ' ' && !((c) >= 0177 && (c) <= 0240) \
321 : graphicp (c)) 258 : graphicp (c))
322 259
323# define ISPRINT(c) (SINGLE_BYTE_CHAR_P (c) \ 260#define ISPRINT(c) (SINGLE_BYTE_CHAR_P (c) \
324 ? (c) >= ' ' && !((c) >= 0177 && (c) <= 0237) \ 261 ? (c) >= ' ' && !((c) >= 0177 && (c) <= 0237) \
325 : printablep (c)) 262 : printablep (c))
326 263
327# define ISALNUM(c) (IS_REAL_ASCII (c) \ 264#define ISALNUM(c) (IS_REAL_ASCII (c) \
328 ? (((c) >= 'a' && (c) <= 'z') \ 265 ? (((c) >= 'a' && (c) <= 'z') \
329 || ((c) >= 'A' && (c) <= 'Z') \ 266 || ((c) >= 'A' && (c) <= 'Z') \
330 || ((c) >= '0' && (c) <= '9')) \ 267 || ((c) >= '0' && (c) <= '9')) \
331 : alphanumericp (c)) 268 : alphanumericp (c))
332 269
333# define ISALPHA(c) (IS_REAL_ASCII (c) \ 270#define ISALPHA(c) (IS_REAL_ASCII (c) \
334 ? (((c) >= 'a' && (c) <= 'z') \ 271 ? (((c) >= 'a' && (c) <= 'z') \
335 || ((c) >= 'A' && (c) <= 'Z')) \ 272 || ((c) >= 'A' && (c) <= 'Z')) \
336 : alphabeticp (c)) 273 : alphabeticp (c))
337 274
338# define ISLOWER(c) lowercasep (c) 275#define ISLOWER(c) lowercasep (c)
339 276
340# define ISPUNCT(c) (IS_REAL_ASCII (c) \ 277#define ISPUNCT(c) (IS_REAL_ASCII (c) \
341 ? ((c) > ' ' && (c) < 0177 \ 278 ? ((c) > ' ' && (c) < 0177 \
342 && !(((c) >= 'a' && (c) <= 'z') \ 279 && !(((c) >= 'a' && (c) <= 'z') \
343 || ((c) >= 'A' && (c) <= 'Z') \ 280 || ((c) >= 'A' && (c) <= 'Z') \
344 || ((c) >= '0' && (c) <= '9'))) \ 281 || ((c) >= '0' && (c) <= '9'))) \
345 : SYNTAX (c) != Sword) 282 : SYNTAX (c) != Sword)
346 283
347# define ISSPACE(c) (SYNTAX (c) == Swhitespace) 284#define ISSPACE(c) (SYNTAX (c) == Swhitespace)
348 285
349# define ISUPPER(c) uppercasep (c) 286#define ISUPPER(c) uppercasep (c)
350
351# define ISWORD(c) (SYNTAX (c) == Sword)
352
353#else /* not emacs */
354
355/* 1 if C is an ASCII character. */
356# define IS_REAL_ASCII(c) ((c) < 0200)
357
358/* This distinction is not meaningful, except in Emacs. */
359# define ISUNIBYTE(c) 1
360
361# ifdef isblank
362# define ISBLANK(c) isblank (c)
363# else
364# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
365# endif
366# ifdef isgraph
367# define ISGRAPH(c) isgraph (c)
368# else
369# define ISGRAPH(c) (isprint (c) && !isspace (c))
370# endif
371
372/* Solaris defines ISPRINT so we must undefine it first. */
373# undef ISPRINT
374# define ISPRINT(c) isprint (c)
375# define ISDIGIT(c) isdigit (c)
376# define ISALNUM(c) isalnum (c)
377# define ISALPHA(c) isalpha (c)
378# define ISCNTRL(c) iscntrl (c)
379# define ISLOWER(c) islower (c)
380# define ISPUNCT(c) ispunct (c)
381# define ISSPACE(c) isspace (c)
382# define ISUPPER(c) isupper (c)
383# define ISXDIGIT(c) isxdigit (c)
384
385# define ISWORD(c) ISALPHA (c)
386
387# ifdef _tolower
388# define TOLOWER(c) _tolower (c)
389# else
390# define TOLOWER(c) tolower (c)
391# endif
392
393/* How many characters in the character set. */
394# define CHAR_SET_SIZE 256
395
396# ifdef SYNTAX_TABLE
397
398extern char *re_syntax_table;
399
400# else /* not SYNTAX_TABLE */
401
402static char re_syntax_table[CHAR_SET_SIZE];
403
404static void
405init_syntax_once (void)
406{
407 register int c;
408 static int done = 0;
409
410 if (done)
411 return;
412
413 memset (re_syntax_table, 0, sizeof re_syntax_table);
414
415 for (c = 0; c < CHAR_SET_SIZE; ++c)
416 if (ISALNUM (c))
417 re_syntax_table[c] = Sword;
418
419 re_syntax_table['_'] = Ssymbol;
420
421 done = 1;
422}
423 287
424# endif /* not SYNTAX_TABLE */ 288#define ISWORD(c) (SYNTAX (c) == Sword)
425
426# define SYNTAX(c) re_syntax_table[(c)]
427
428#endif /* not emacs */
429 289
430#define SIGN_EXTEND_CHAR(c) ((signed char) (c)) 290#define SIGN_EXTEND_CHAR(c) ((signed char) (c))
431 291
432/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we 292/* Use alloca instead of malloc. This is because using malloc in
433 use `alloca' instead of `malloc'. This is because using malloc in
434 re_search* or re_match* could cause memory leaks when C-g is used 293 re_search* or re_match* could cause memory leaks when C-g is used
435 in Emacs (note that SAFE_ALLOCA could also call malloc, but does so 294 in Emacs (note that SAFE_ALLOCA could also call malloc, but does so
436 via `record_xmalloc' which uses `unwind_protect' to ensure the 295 via `record_xmalloc' which uses `unwind_protect' to ensure the
@@ -442,64 +301,17 @@ init_syntax_once (void)
442 not functions -- `alloca'-allocated space disappears at the end of the 301 not functions -- `alloca'-allocated space disappears at the end of the
443 function it is called in. */ 302 function it is called in. */
444 303
445#ifdef REGEX_MALLOC
446
447# define REGEX_ALLOCATE malloc
448# define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
449# define REGEX_FREE free
450
451#else /* not REGEX_MALLOC */
452
453# ifdef emacs
454/* This may be adjusted in main(), if the stack is successfully grown. */ 304/* This may be adjusted in main(), if the stack is successfully grown. */
455ptrdiff_t emacs_re_safe_alloca = MAX_ALLOCA; 305ptrdiff_t emacs_re_safe_alloca = MAX_ALLOCA;
456/* Like USE_SAFE_ALLOCA, but use emacs_re_safe_alloca. */ 306/* Like USE_SAFE_ALLOCA, but use emacs_re_safe_alloca. */
457# define REGEX_USE_SAFE_ALLOCA \ 307#define REGEX_USE_SAFE_ALLOCA \
458 ptrdiff_t sa_avail = emacs_re_safe_alloca; \ 308 USE_SAFE_ALLOCA; sa_avail = emacs_re_safe_alloca
459 ptrdiff_t sa_count = SPECPDL_INDEX ()
460
461# define REGEX_SAFE_FREE() SAFE_FREE ()
462# define REGEX_ALLOCATE SAFE_ALLOCA
463# else
464# include <alloca.h>
465# define REGEX_ALLOCATE alloca
466# endif
467 309
468/* Assumes a `char *destination' variable. */ 310/* Assumes a `char *destination' variable. */
469# define REGEX_REALLOCATE(source, osize, nsize) \ 311#define REGEX_REALLOCATE(source, osize, nsize) \
470 (destination = REGEX_ALLOCATE (nsize), \ 312 (destination = SAFE_ALLOCA (nsize), \
471 memcpy (destination, source, osize)) 313 memcpy (destination, source, osize))
472 314
473/* No need to do anything to free, after alloca. */
474# define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */
475
476#endif /* not REGEX_MALLOC */
477
478#ifndef REGEX_USE_SAFE_ALLOCA
479# define REGEX_USE_SAFE_ALLOCA ((void) 0)
480# define REGEX_SAFE_FREE() ((void) 0)
481#endif
482
483/* Define how to allocate the failure stack. */
484
485#if defined REL_ALLOC && defined REGEX_MALLOC
486
487# define REGEX_ALLOCATE_STACK(size) \
488 r_alloc (&failure_stack_ptr, (size))
489# define REGEX_REALLOCATE_STACK(source, osize, nsize) \
490 r_re_alloc (&failure_stack_ptr, (nsize))
491# define REGEX_FREE_STACK(ptr) \
492 r_alloc_free (&failure_stack_ptr)
493
494#else /* not using relocating allocator */
495
496# define REGEX_ALLOCATE_STACK(size) REGEX_ALLOCATE (size)
497# define REGEX_REALLOCATE_STACK(source, o, n) REGEX_REALLOCATE (source, o, n)
498# define REGEX_FREE_STACK(ptr) REGEX_FREE (ptr)
499
500#endif /* not using relocating allocator */
501
502
503/* True if `size1' is non-NULL and PTR is pointing anywhere inside 315/* True if `size1' is non-NULL and PTR is pointing anywhere inside
504 `string1' or just past its end. This works if PTR is NULL, which is 316 `string1' or just past its end. This works if PTR is NULL, which is
505 a good thing. */ 317 a good thing. */
@@ -507,30 +319,21 @@ ptrdiff_t emacs_re_safe_alloca = MAX_ALLOCA;
507 (size1 && string1 <= (ptr) && (ptr) <= string1 + size1) 319 (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
508 320
509/* (Re)Allocate N items of type T using malloc, or fail. */ 321/* (Re)Allocate N items of type T using malloc, or fail. */
510#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t))) 322#define TALLOC(n, t) ((t *) xmalloc ((n) * sizeof (t)))
511#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t))) 323#define RETALLOC(addr, n, t) ((addr) = (t *) xrealloc (addr, (n) * sizeof (t)))
512#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
513 324
514#define BYTEWIDTH 8 /* In bits. */ 325#define BYTEWIDTH 8 /* In bits. */
515 326
516#ifndef emacs
517# undef max
518# undef min
519# define max(a, b) ((a) > (b) ? (a) : (b))
520# define min(a, b) ((a) < (b) ? (a) : (b))
521#endif
522
523/* Type of source-pattern and string chars. */ 327/* Type of source-pattern and string chars. */
524typedef const unsigned char re_char; 328typedef const unsigned char re_char;
525 329
526typedef char boolean; 330static void re_compile_fastmap (struct re_pattern_buffer *);
527 331static ptrdiff_t re_match_2_internal (struct re_pattern_buffer *bufp,
528static regoff_t re_match_2_internal (struct re_pattern_buffer *bufp,
529 re_char *string1, size_t size1, 332 re_char *string1, size_t size1,
530 re_char *string2, size_t size2, 333 re_char *string2, size_t size2,
531 ssize_t pos, 334 ptrdiff_t pos,
532 struct re_registers *regs, 335 struct re_registers *regs,
533 ssize_t stop); 336 ptrdiff_t stop);
534 337
535/* These are the command codes that appear in compiled regular 338/* These are the command codes that appear in compiled regular
536 expressions. Some opcodes are followed by argument bytes. A 339 expressions. Some opcodes are followed by argument bytes. A
@@ -592,8 +395,7 @@ typedef enum
592 /* Fail unless at end of line. */ 395 /* Fail unless at end of line. */
593 endline, 396 endline,
594 397
595 /* Succeeds if at beginning of buffer (if emacs) or at beginning 398 /* Succeeds if at beginning of buffer. */
596 of string to be matched (if not). */
597 begbuf, 399 begbuf,
598 400
599 /* Analogously, for end of buffer/string. */ 401 /* Analogously, for end of buffer/string. */
@@ -658,10 +460,9 @@ typedef enum
658 syntaxspec, 460 syntaxspec,
659 461
660 /* Matches any character whose syntax is not that specified. */ 462 /* Matches any character whose syntax is not that specified. */
661 notsyntaxspec 463 notsyntaxspec,
662 464
663#ifdef emacs 465 at_dot, /* Succeeds if at point. */
664 , at_dot, /* Succeeds if at point. */
665 466
666 /* Matches any character whose category-set contains the specified 467 /* Matches any character whose category-set contains the specified
667 category. The operator is followed by a byte which contains a 468 category. The operator is followed by a byte which contains a
@@ -672,7 +473,6 @@ typedef enum
672 specified category. The operator is followed by a byte which 473 specified category. The operator is followed by a byte which
673 contains the category code (mnemonic ASCII character). */ 474 contains the category code (mnemonic ASCII character). */
674 notcategoryspec 475 notcategoryspec
675#endif /* emacs */
676} re_opcode_t; 476} re_opcode_t;
677 477
678/* Common operations on the compiled pattern. */ 478/* Common operations on the compiled pattern. */
@@ -760,12 +560,10 @@ extract_number_and_incr (re_char **source)
760 and the 2 bytes of flags at the start of the range table. */ 560 and the 2 bytes of flags at the start of the range table. */
761#define CHARSET_RANGE_TABLE(p) (&(p)[4 + CHARSET_BITMAP_SIZE (p)]) 561#define CHARSET_RANGE_TABLE(p) (&(p)[4 + CHARSET_BITMAP_SIZE (p)])
762 562
763#ifdef emacs
764/* Extract the bit flags that start a range table. */ 563/* Extract the bit flags that start a range table. */
765#define CHARSET_RANGE_TABLE_BITS(p) \ 564#define CHARSET_RANGE_TABLE_BITS(p) \
766 ((p)[2 + CHARSET_BITMAP_SIZE (p)] \ 565 ((p)[2 + CHARSET_BITMAP_SIZE (p)] \
767 + (p)[3 + CHARSET_BITMAP_SIZE (p)] * 0x100) 566 + (p)[3 + CHARSET_BITMAP_SIZE (p)] * 0x100)
768#endif
769 567
770/* Return the address of end of RANGE_TABLE. COUNT is number of 568/* Return the address of end of RANGE_TABLE. COUNT is number of
771 ranges (which is a pair of (start, end)) in the RANGE_TABLE. `* 2' 569 ranges (which is a pair of (start, end)) in the RANGE_TABLE. `* 2'
@@ -774,29 +572,23 @@ extract_number_and_incr (re_char **source)
774#define CHARSET_RANGE_TABLE_END(range_table, count) \ 572#define CHARSET_RANGE_TABLE_END(range_table, count) \
775 ((range_table) + (count) * 2 * 3) 573 ((range_table) + (count) * 2 * 3)
776 574
777/* If DEBUG is defined, Regex prints many voluminous messages about what 575/* If REGEX_EMACS_DEBUG is defined, print many voluminous messages
778 it is doing (if the variable `debug' is nonzero). If linked with the 576 (if the variable regex_emacs_debug is positive). */
779 main program in `iregex.c', you can enter patterns and strings
780 interactively. And if linked with the main program in `main.c' and
781 the other test files, you can run the already-written tests. */
782 577
783#ifdef DEBUG 578#ifdef REGEX_EMACS_DEBUG
784 579
785/* We use standard I/O for debugging. */ 580/* We use standard I/O for debugging. */
786# include <stdio.h> 581# include <stdio.h>
787 582
788/* It is useful to test things that ``must'' be true when debugging. */ 583static int regex_emacs_debug = -100000;
789# include <assert.h>
790
791static int debug = -100000;
792 584
793# define DEBUG_STATEMENT(e) e 585# define DEBUG_STATEMENT(e) e
794# define DEBUG_PRINT(...) if (debug > 0) printf (__VA_ARGS__) 586# define DEBUG_PRINT(...) if (regex_emacs_debug > 0) printf (__VA_ARGS__)
795# define DEBUG_COMPILES_ARGUMENTS 587# define DEBUG_COMPILES_ARGUMENTS
796# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \ 588# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
797 if (debug > 0) print_partial_compiled_pattern (s, e) 589 if (regex_emacs_debug > 0) print_partial_compiled_pattern (s, e)
798# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \ 590# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
799 if (debug > 0) print_double_string (w, s1, sz1, s2, sz2) 591 if (regex_emacs_debug > 0) print_double_string (w, s1, sz1, s2, sz2)
800 592
801 593
802/* Print the fastmap in human-readable form. */ 594/* Print the fastmap in human-readable form. */
@@ -1085,7 +877,7 @@ print_compiled_pattern (struct re_pattern_buffer *bufp)
1085 re_char *buffer = bufp->buffer; 877 re_char *buffer = bufp->buffer;
1086 878
1087 print_partial_compiled_pattern (buffer, buffer + bufp->used); 879 print_partial_compiled_pattern (buffer, buffer + bufp->used);
1088 printf ("%ld bytes used/%ld bytes allocated.\n", 880 printf ("%zu bytes used/%zu bytes allocated.\n",
1089 bufp->used, bufp->allocated); 881 bufp->used, bufp->allocated);
1090 882
1091 if (bufp->fastmap_accurate && bufp->fastmap) 883 if (bufp->fastmap_accurate && bufp->fastmap)
@@ -1131,146 +923,100 @@ print_double_string (re_char *where, re_char *string1, ssize_t size1,
1131 } 923 }
1132} 924}
1133 925
1134#else /* not DEBUG */ 926#else /* not REGEX_EMACS_DEBUG */
1135
1136# undef assert
1137# define assert(e)
1138 927
1139# define DEBUG_STATEMENT(e) 928# define DEBUG_STATEMENT(e)
1140# define DEBUG_PRINT(...) 929# define DEBUG_PRINT(...)
1141# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) 930# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
1142# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) 931# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
1143 932
1144#endif /* not DEBUG */ 933#endif /* not REGEX_EMACS_DEBUG */
1145 934
1146#ifndef emacs 935typedef enum
1147
1148/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
1149 also be assigned to arbitrarily: each pattern buffer stores its own
1150 syntax, so it can be changed between regex compilations. */
1151/* This has no initializer because initialized variables in Emacs
1152 become read-only after dumping. */
1153reg_syntax_t re_syntax_options;
1154
1155
1156/* Specify the precise syntax of regexps for compilation. This provides
1157 for compatibility for various utilities which historically have
1158 different, incompatible syntaxes.
1159
1160 The argument SYNTAX is a bit mask comprised of the various bits
1161 defined in regex-emacs.h. We return the old syntax. */
1162
1163reg_syntax_t
1164re_set_syntax (reg_syntax_t syntax)
1165{ 936{
1166 reg_syntax_t ret = re_syntax_options; 937 REG_NOERROR = 0, /* Success. */
1167 938 REG_NOMATCH, /* Didn't find a match (for regexec). */
1168 re_syntax_options = syntax; 939
1169 return ret; 940 /* POSIX regcomp return error codes. (In the order listed in the
1170} 941 standard.) An older version of this code supported the POSIX
1171WEAK_ALIAS (__re_set_syntax, re_set_syntax) 942 API; this version continues to use these names internally. */
1172 943 REG_BADPAT, /* Invalid pattern. */
1173#endif 944 REG_ECOLLATE, /* Not implemented. */
1174 945 REG_ECTYPE, /* Invalid character class name. */
1175/* This table gives an error message for each of the error codes listed 946 REG_EESCAPE, /* Trailing backslash. */
1176 in regex-emacs.h. Obviously the order here has to be same as there. 947 REG_ESUBREG, /* Invalid back reference. */
1177 POSIX doesn't require that we do anything for REG_NOERROR, 948 REG_EBRACK, /* Unmatched left bracket. */
1178 but why not be nice? */ 949 REG_EPAREN, /* Parenthesis imbalance. */
950 REG_EBRACE, /* Unmatched \{. */
951 REG_BADBR, /* Invalid contents of \{\}. */
952 REG_ERANGE, /* Invalid range end. */
953 REG_ESPACE, /* Ran out of memory. */
954 REG_BADRPT, /* No preceding re for repetition op. */
955
956 /* Error codes we've added. */
957 REG_EEND, /* Premature end. */
958 REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */
959 REG_ERPAREN, /* Unmatched ) or \); not returned from regcomp. */
960 REG_ERANGEX, /* Range striding over charsets. */
961 REG_ESIZEBR /* n or m too big in \{n,m\} */
962} reg_errcode_t;
1179 963
1180static const char *re_error_msgid[] = 964static const char *re_error_msgid[] =
1181 { 965 {
1182 gettext_noop ("Success"), /* REG_NOERROR */ 966 [REG_NOERROR] = "Success",
1183 gettext_noop ("No match"), /* REG_NOMATCH */ 967 [REG_NOMATCH] = "No match",
1184 gettext_noop ("Invalid regular expression"), /* REG_BADPAT */ 968 [REG_BADPAT] = "Invalid regular expression",
1185 gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */ 969 [REG_ECOLLATE] = "Invalid collation character",
1186 gettext_noop ("Invalid character class name"), /* REG_ECTYPE */ 970 [REG_ECTYPE] = "Invalid character class name",
1187 gettext_noop ("Trailing backslash"), /* REG_EESCAPE */ 971 [REG_EESCAPE] = "Trailing backslash",
1188 gettext_noop ("Invalid back reference"), /* REG_ESUBREG */ 972 [REG_ESUBREG] = "Invalid back reference",
1189 gettext_noop ("Unmatched [ or [^"), /* REG_EBRACK */ 973 [REG_EBRACK] = "Unmatched [ or [^",
1190 gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */ 974 [REG_EPAREN] = "Unmatched ( or \\(",
1191 gettext_noop ("Unmatched \\{"), /* REG_EBRACE */ 975 [REG_EBRACE] = "Unmatched \\{",
1192 gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */ 976 [REG_BADBR] = "Invalid content of \\{\\}",
1193 gettext_noop ("Invalid range end"), /* REG_ERANGE */ 977 [REG_ERANGE] = "Invalid range end",
1194 gettext_noop ("Memory exhausted"), /* REG_ESPACE */ 978 [REG_ESPACE] = "Memory exhausted",
1195 gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */ 979 [REG_BADRPT] = "Invalid preceding regular expression",
1196 gettext_noop ("Premature end of regular expression"), /* REG_EEND */ 980 [REG_EEND] = "Premature end of regular expression",
1197 gettext_noop ("Regular expression too big"), /* REG_ESIZE */ 981 [REG_ESIZE] = "Regular expression too big",
1198 gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */ 982 [REG_ERPAREN] = "Unmatched ) or \\)",
1199 gettext_noop ("Range striding over charsets"), /* REG_ERANGEX */ 983 [REG_ERANGEX ] = "Range striding over charsets",
1200 gettext_noop ("Invalid content of \\{\\}, repetitions too big") /* REG_ESIZEBR */ 984 [REG_ESIZEBR ] = "Invalid content of \\{\\}",
1201 }; 985 };
1202
1203/* Whether to allocate memory during matching. */
1204
1205/* Define MATCH_MAY_ALLOCATE to allow the searching and matching
1206 functions allocate memory for the failure stack and registers.
1207 Normally should be defined, because otherwise searching and
1208 matching routines will have much smaller memory resources at their
1209 disposal, and therefore might fail to handle complex regexps.
1210 Therefore undefine MATCH_MAY_ALLOCATE only in the following
1211 exceptional situations:
1212
1213 . When running on a system where memory is at premium.
1214 . When alloca cannot be used at all, perhaps due to bugs in
1215 its implementation, or its being unavailable, or due to a
1216 very small stack size. This requires to define REGEX_MALLOC
1217 to use malloc instead, which in turn could lead to memory
1218 leaks if search is interrupted by a signal. (For these
1219 reasons, defining REGEX_MALLOC when building Emacs
1220 automatically undefines MATCH_MAY_ALLOCATE, but outside
1221 Emacs you may not care about memory leaks.) If you want to
1222 prevent the memory leaks, undefine MATCH_MAY_ALLOCATE.
1223 . When code that calls the searching and matching functions
1224 cannot allow memory allocation, for whatever reasons. */
1225
1226/* Normally, this is fine. */
1227#define MATCH_MAY_ALLOCATE
1228
1229/* The match routines may not allocate if (1) they would do it with malloc
1230 and (2) it's not safe for them to use malloc.
1231 Note that if REL_ALLOC is defined, matching would not use malloc for the
1232 failure stack, but we would still use it for the register vectors;
1233 so REL_ALLOC should not affect this. */
1234#if defined REGEX_MALLOC && defined emacs
1235# undef MATCH_MAY_ALLOCATE
1236#endif
1237 986
1238/* While regex matching of a single compiled pattern isn't reentrant 987/* For 'regs_allocated'. */
1239 (because we compile regexes to bytecode programs, and the bytecode 988enum { REGS_UNALLOCATED, REGS_REALLOCATE, REGS_FIXED };
1240 programs are self-modifying), the regex machinery must nevertheless
1241 be reentrant with respect to _different_ patterns, and we do that
1242 by avoiding global variables and using MATCH_MAY_ALLOCATE. */
1243#if !defined MATCH_MAY_ALLOCATE && defined emacs
1244# error "Emacs requires MATCH_MAY_ALLOCATE"
1245#endif
1246 989
990/* If 'regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
991 're_match_2' returns information about at least this many registers
992 the first time a `regs' structure is passed. */
993enum { RE_NREGS = 30 };
1247 994
995/* The searching and matching functions allocate memory for the
996 failure stack and registers. Otherwise searching and matching
997 routines would have much smaller memory resources at their
998 disposal, and therefore might fail to handle complex regexps. */
999
1248/* Failure stack declarations and macros; both re_compile_fastmap and 1000/* Failure stack declarations and macros; both re_compile_fastmap and
1249 re_match_2 use a failure stack. These have to be macros because of 1001 re_match_2 use a failure stack. These have to be macros because of
1250 REGEX_ALLOCATE_STACK. */ 1002 SAFE_ALLOCA. */
1251 1003
1252 1004
1253/* Approximate number of failure points for which to initially allocate space 1005/* Approximate number of failure points for which to initially allocate space
1254 when matching. If this number is exceeded, we allocate more 1006 when matching. If this number is exceeded, we allocate more
1255 space, so it is not a hard limit. */ 1007 space, so it is not a hard limit. */
1256#ifndef INIT_FAILURE_ALLOC 1008#define INIT_FAILURE_ALLOC 20
1257# define INIT_FAILURE_ALLOC 20
1258#endif
1259 1009
1260/* Roughly the maximum number of failure points on the stack. Would be 1010/* Roughly the maximum number of failure points on the stack. Would be
1261 exactly that if always used TYPICAL_FAILURE_SIZE items each time we failed. 1011 exactly that if always used TYPICAL_FAILURE_SIZE items each time we failed.
1262 This is a variable only so users of regex can assign to it; we never 1012 This is a variable only so users of regex can assign to it; we never
1263 change it ourselves. We always multiply it by TYPICAL_FAILURE_SIZE 1013 change it ourselves. We always multiply it by TYPICAL_FAILURE_SIZE
1264 before using it, so it should probably be a byte-count instead. */ 1014 before using it, so it should probably be a byte-count instead. */
1265# if defined MATCH_MAY_ALLOCATE
1266/* Note that 4400 was enough to cause a crash on Alpha OSF/1, 1015/* Note that 4400 was enough to cause a crash on Alpha OSF/1,
1267 whose default stack limit is 2mb. In order for a larger 1016 whose default stack limit is 2mb. In order for a larger
1268 value to work reliably, you have to try to make it accord 1017 value to work reliably, you have to try to make it accord
1269 with the process stack limit. */ 1018 with the process stack limit. */
1270size_t emacs_re_max_failures = 40000; 1019size_t emacs_re_max_failures = 40000;
1271# else
1272size_t emacs_re_max_failures = 4000;
1273# endif
1274 1020
1275union fail_stack_elt 1021union fail_stack_elt
1276{ 1022{
@@ -1292,33 +1038,17 @@ typedef struct
1292#define FAIL_STACK_EMPTY() (fail_stack.frame == 0) 1038#define FAIL_STACK_EMPTY() (fail_stack.frame == 0)
1293 1039
1294 1040
1295/* Define macros to initialize and free the failure stack. 1041/* Define macros to initialize and free the failure stack. */
1296 Do `return -2' if the alloc fails. */
1297 1042
1298#ifdef MATCH_MAY_ALLOCATE 1043#define INIT_FAIL_STACK() \
1299# define INIT_FAIL_STACK() \
1300 do { \ 1044 do { \
1301 fail_stack.stack = \ 1045 fail_stack.stack = \
1302 REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * TYPICAL_FAILURE_SIZE \ 1046 SAFE_ALLOCA (INIT_FAILURE_ALLOC * TYPICAL_FAILURE_SIZE \
1303 * sizeof (fail_stack_elt_t)); \ 1047 * sizeof (fail_stack_elt_t)); \
1304 \
1305 if (fail_stack.stack == NULL) \
1306 return -2; \
1307 \
1308 fail_stack.size = INIT_FAILURE_ALLOC; \ 1048 fail_stack.size = INIT_FAILURE_ALLOC; \
1309 fail_stack.avail = 0; \ 1049 fail_stack.avail = 0; \
1310 fail_stack.frame = 0; \ 1050 fail_stack.frame = 0; \
1311 } while (0) 1051 } while (0)
1312#else
1313# define INIT_FAIL_STACK() \
1314 do { \
1315 fail_stack.avail = 0; \
1316 fail_stack.frame = 0; \
1317 } while (0)
1318
1319# define RETALLOC_IF(addr, n, t) \
1320 if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
1321#endif
1322 1052
1323 1053
1324/* Double the size of FAIL_STACK, up to a limit 1054/* Double the size of FAIL_STACK, up to a limit
@@ -1327,7 +1057,7 @@ typedef struct
1327 Return 1 if succeeds, and 0 if either ran out of memory 1057 Return 1 if succeeds, and 0 if either ran out of memory
1328 allocating space for it or it was already too large. 1058 allocating space for it or it was already too large.
1329 1059
1330 REGEX_REALLOCATE_STACK requires `destination' be declared. */ 1060 REGEX_REALLOCATE requires `destination' be declared. */
1331 1061
1332/* Factor to increase the failure stack size by 1062/* Factor to increase the failure stack size by
1333 when we increase it. 1063 when we increase it.
@@ -1340,18 +1070,15 @@ typedef struct
1340 (((fail_stack).size >= emacs_re_max_failures * TYPICAL_FAILURE_SIZE) \ 1070 (((fail_stack).size >= emacs_re_max_failures * TYPICAL_FAILURE_SIZE) \
1341 ? 0 \ 1071 ? 0 \
1342 : ((fail_stack).stack \ 1072 : ((fail_stack).stack \
1343 = REGEX_REALLOCATE_STACK ((fail_stack).stack, \ 1073 = REGEX_REALLOCATE ((fail_stack).stack, \
1344 (fail_stack).size * sizeof (fail_stack_elt_t), \ 1074 (fail_stack).size * sizeof (fail_stack_elt_t), \
1345 min (emacs_re_max_failures * TYPICAL_FAILURE_SIZE, \ 1075 min (emacs_re_max_failures * TYPICAL_FAILURE_SIZE, \
1346 ((fail_stack).size * FAIL_STACK_GROWTH_FACTOR)) \ 1076 ((fail_stack).size * FAIL_STACK_GROWTH_FACTOR)) \
1347 * sizeof (fail_stack_elt_t)), \ 1077 * sizeof (fail_stack_elt_t)), \
1348 \ 1078 ((fail_stack).size \
1349 (fail_stack).stack == NULL \ 1079 = (min (emacs_re_max_failures * TYPICAL_FAILURE_SIZE, \
1350 ? 0 \ 1080 ((fail_stack).size * FAIL_STACK_GROWTH_FACTOR)))), \
1351 : ((fail_stack).size \ 1081 1))
1352 = (min (emacs_re_max_failures * TYPICAL_FAILURE_SIZE, \
1353 ((fail_stack).size * FAIL_STACK_GROWTH_FACTOR))), \
1354 1)))
1355 1082
1356 1083
1357/* Push a pointer value onto the failure stack. 1084/* Push a pointer value onto the failure stack.
@@ -1385,8 +1112,8 @@ typedef struct
1385while (REMAINING_AVAIL_SLOTS <= space) { \ 1112while (REMAINING_AVAIL_SLOTS <= space) { \
1386 if (!GROW_FAIL_STACK (fail_stack)) \ 1113 if (!GROW_FAIL_STACK (fail_stack)) \
1387 return -2; \ 1114 return -2; \
1388 DEBUG_PRINT ("\n Doubled stack; size now: %zd\n", (fail_stack).size);\ 1115 DEBUG_PRINT ("\n Doubled stack; size now: %zu\n", (fail_stack).size);\
1389 DEBUG_PRINT (" slots available: %zd\n", REMAINING_AVAIL_SLOTS);\ 1116 DEBUG_PRINT (" slots available: %zu\n", REMAINING_AVAIL_SLOTS);\
1390} 1117}
1391 1118
1392/* Push register NUM onto the stack. */ 1119/* Push register NUM onto the stack. */
@@ -1424,7 +1151,7 @@ do { \
1424 if (pfreg == -1) \ 1151 if (pfreg == -1) \
1425 { \ 1152 { \
1426 /* It's a counter. */ \ 1153 /* It's a counter. */ \
1427 /* Here, we discard `const', making re_match non-reentrant. */ \ 1154 /* Discard 'const', making re_search non-reentrant. */ \
1428 unsigned char *ptr = (unsigned char *) POP_FAILURE_POINTER (); \ 1155 unsigned char *ptr = (unsigned char *) POP_FAILURE_POINTER (); \
1429 pfreg = POP_FAILURE_INT (); \ 1156 pfreg = POP_FAILURE_INT (); \
1430 STORE_NUMBER (ptr, pfreg); \ 1157 STORE_NUMBER (ptr, pfreg); \
@@ -1442,14 +1169,14 @@ do { \
1442/* Check that we are not stuck in an infinite loop. */ 1169/* Check that we are not stuck in an infinite loop. */
1443#define CHECK_INFINITE_LOOP(pat_cur, string_place) \ 1170#define CHECK_INFINITE_LOOP(pat_cur, string_place) \
1444do { \ 1171do { \
1445 ssize_t failure = TOP_FAILURE_HANDLE (); \ 1172 ptrdiff_t failure = TOP_FAILURE_HANDLE (); \
1446 /* Check for infinite matching loops */ \ 1173 /* Check for infinite matching loops */ \
1447 while (failure > 0 \ 1174 while (failure > 0 \
1448 && (FAILURE_STR (failure) == string_place \ 1175 && (FAILURE_STR (failure) == string_place \
1449 || FAILURE_STR (failure) == NULL)) \ 1176 || FAILURE_STR (failure) == NULL)) \
1450 { \ 1177 { \
1451 assert (FAILURE_PAT (failure) >= bufp->buffer \ 1178 eassert (FAILURE_PAT (failure) >= bufp->buffer \
1452 && FAILURE_PAT (failure) <= bufp->buffer + bufp->used); \ 1179 && FAILURE_PAT (failure) <= bufp->buffer + bufp->used); \
1453 if (FAILURE_PAT (failure) == pat_cur) \ 1180 if (FAILURE_PAT (failure) == pat_cur) \
1454 { \ 1181 { \
1455 cycle = 1; \ 1182 cycle = 1; \
@@ -1478,14 +1205,14 @@ do { \
1478 \ 1205 \
1479 DEBUG_STATEMENT (nfailure_points_pushed++); \ 1206 DEBUG_STATEMENT (nfailure_points_pushed++); \
1480 DEBUG_PRINT ("\nPUSH_FAILURE_POINT:\n"); \ 1207 DEBUG_PRINT ("\nPUSH_FAILURE_POINT:\n"); \
1481 DEBUG_PRINT (" Before push, next avail: %zd\n", (fail_stack).avail); \ 1208 DEBUG_PRINT (" Before push, next avail: %zu\n", (fail_stack).avail); \
1482 DEBUG_PRINT (" size: %zd\n", (fail_stack).size);\ 1209 DEBUG_PRINT (" size: %zu\n", (fail_stack).size);\
1483 \ 1210 \
1484 ENSURE_FAIL_STACK (NUM_NONREG_ITEMS); \ 1211 ENSURE_FAIL_STACK (NUM_NONREG_ITEMS); \
1485 \ 1212 \
1486 DEBUG_PRINT ("\n"); \ 1213 DEBUG_PRINT ("\n"); \
1487 \ 1214 \
1488 DEBUG_PRINT (" Push frame index: %zd\n", fail_stack.frame); \ 1215 DEBUG_PRINT (" Push frame index: %zu\n", fail_stack.frame); \
1489 PUSH_FAILURE_INT (fail_stack.frame); \ 1216 PUSH_FAILURE_INT (fail_stack.frame); \
1490 \ 1217 \
1491 DEBUG_PRINT (" Push string %p: \"", string_place); \ 1218 DEBUG_PRINT (" Push string %p: \"", string_place); \
@@ -1523,12 +1250,12 @@ do { \
1523 1250
1524#define POP_FAILURE_POINT(str, pat) \ 1251#define POP_FAILURE_POINT(str, pat) \
1525do { \ 1252do { \
1526 assert (!FAIL_STACK_EMPTY ()); \ 1253 eassert (!FAIL_STACK_EMPTY ()); \
1527 \ 1254 \
1528 /* Remove failure points and point to how many regs pushed. */ \ 1255 /* Remove failure points and point to how many regs pushed. */ \
1529 DEBUG_PRINT ("POP_FAILURE_POINT:\n"); \ 1256 DEBUG_PRINT ("POP_FAILURE_POINT:\n"); \
1530 DEBUG_PRINT (" Before pop, next avail: %zd\n", fail_stack.avail); \ 1257 DEBUG_PRINT (" Before pop, next avail: %zu\n", fail_stack.avail); \
1531 DEBUG_PRINT (" size: %zd\n", fail_stack.size); \ 1258 DEBUG_PRINT (" size: %zu\n", fail_stack.size); \
1532 \ 1259 \
1533 /* Pop the saved registers. */ \ 1260 /* Pop the saved registers. */ \
1534 while (fail_stack.frame < fail_stack.avail) \ 1261 while (fail_stack.frame < fail_stack.avail) \
@@ -1547,10 +1274,10 @@ do { \
1547 DEBUG_PRINT ("\"\n"); \ 1274 DEBUG_PRINT ("\"\n"); \
1548 \ 1275 \
1549 fail_stack.frame = POP_FAILURE_INT (); \ 1276 fail_stack.frame = POP_FAILURE_INT (); \
1550 DEBUG_PRINT (" Popping frame index: %zd\n", fail_stack.frame); \ 1277 DEBUG_PRINT (" Popping frame index: %zu\n", fail_stack.frame); \
1551 \ 1278 \
1552 assert (fail_stack.avail >= 0); \ 1279 eassert (fail_stack.avail >= 0); \
1553 assert (fail_stack.frame <= fail_stack.avail); \ 1280 eassert (fail_stack.frame <= fail_stack.avail); \
1554 \ 1281 \
1555 DEBUG_STATEMENT (nfailure_points_popped++); \ 1282 DEBUG_STATEMENT (nfailure_points_popped++); \
1556} while (0) /* POP_FAILURE_POINT */ 1283} while (0) /* POP_FAILURE_POINT */
@@ -1563,12 +1290,8 @@ do { \
1563/* Subroutine declarations and macros for regex_compile. */ 1290/* Subroutine declarations and macros for regex_compile. */
1564 1291
1565static reg_errcode_t regex_compile (re_char *pattern, size_t size, 1292static reg_errcode_t regex_compile (re_char *pattern, size_t size,
1566#ifdef emacs
1567 bool posix_backtracking, 1293 bool posix_backtracking,
1568 const char *whitespace_regexp, 1294 const char *whitespace_regexp,
1569#else
1570 reg_syntax_t syntax,
1571#endif
1572 struct re_pattern_buffer *bufp); 1295 struct re_pattern_buffer *bufp);
1573static void store_op1 (re_opcode_t op, unsigned char *loc, int arg); 1296static void store_op1 (re_opcode_t op, unsigned char *loc, int arg);
1574static void store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2); 1297static void store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2);
@@ -1576,10 +1299,10 @@ static void insert_op1 (re_opcode_t op, unsigned char *loc,
1576 int arg, unsigned char *end); 1299 int arg, unsigned char *end);
1577static void insert_op2 (re_opcode_t op, unsigned char *loc, 1300static void insert_op2 (re_opcode_t op, unsigned char *loc,
1578 int arg1, int arg2, unsigned char *end); 1301 int arg1, int arg2, unsigned char *end);
1579static boolean at_begline_loc_p (re_char *pattern, re_char *p, 1302static bool at_begline_loc_p (re_char *pattern, re_char *p,
1580 reg_syntax_t syntax); 1303 reg_syntax_t syntax);
1581static boolean at_endline_loc_p (re_char *p, re_char *pend, 1304static bool at_endline_loc_p (re_char *p, re_char *pend,
1582 reg_syntax_t syntax); 1305 reg_syntax_t syntax);
1583static re_char *skip_one_char (re_char *p); 1306static re_char *skip_one_char (re_char *p);
1584static int analyze_first (re_char *p, re_char *pend, 1307static int analyze_first (re_char *p, re_char *pend,
1585 char *fastmap, const int multibyte); 1308 char *fastmap, const int multibyte);
@@ -1595,14 +1318,15 @@ static int analyze_first (re_char *p, re_char *pend,
1595 } while (0) 1318 } while (0)
1596 1319
1597 1320
1598/* If `translate' is non-null, return translate[D], else just D. We 1321#define RE_TRANSLATE(TBL, C) char_table_translate (TBL, C)
1322#define RE_TRANSLATE_P(TBL) (!EQ (TBL, make_number (0)))
1323
1324/* If `translate' is non-zero, return translate[D], else just D. We
1599 cast the subscript to translate because some data is declared as 1325 cast the subscript to translate because some data is declared as
1600 `char *', to avoid warnings when a string constant is passed. But 1326 `char *', to avoid warnings when a string constant is passed. But
1601 when we use a character as a subscript we must make it unsigned. */ 1327 when we use a character as a subscript we must make it unsigned. */
1602#ifndef TRANSLATE 1328#define TRANSLATE(d) \
1603# define TRANSLATE(d) \
1604 (RE_TRANSLATE_P (translate) ? RE_TRANSLATE (translate, (d)) : (d)) 1329 (RE_TRANSLATE_P (translate) ? RE_TRANSLATE (translate, (d)) : (d))
1605#endif
1606 1330
1607 1331
1608/* Macros for outputting the compiled pattern into `buffer'. */ 1332/* Macros for outputting the compiled pattern into `buffer'. */
@@ -1677,8 +1401,6 @@ static int analyze_first (re_char *p, re_char *pend,
1677 if (laststart_set) laststart_off = laststart - old_buffer; \ 1401 if (laststart_set) laststart_off = laststart - old_buffer; \
1678 if (pending_exact_set) pending_exact_off = pending_exact - old_buffer; \ 1402 if (pending_exact_set) pending_exact_off = pending_exact - old_buffer; \
1679 RETALLOC (bufp->buffer, bufp->allocated, unsigned char); \ 1403 RETALLOC (bufp->buffer, bufp->allocated, unsigned char); \
1680 if (bufp->buffer == NULL) \
1681 return REG_ESPACE; \
1682 unsigned char *new_buffer = bufp->buffer; \ 1404 unsigned char *new_buffer = bufp->buffer; \
1683 b = new_buffer + b_off; \ 1405 b = new_buffer + b_off; \
1684 begalt = new_buffer + begalt_off; \ 1406 begalt = new_buffer + begalt_off; \
@@ -1729,12 +1451,6 @@ typedef struct
1729 1451
1730/* The next available element. */ 1452/* The next available element. */
1731#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) 1453#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1732
1733/* Explicit quit checking is needed for Emacs, which uses polling to
1734 process input events. */
1735#ifndef emacs
1736static void maybe_quit (void) {}
1737#endif
1738 1454
1739/* Structure to manage work area for range table. */ 1455/* Structure to manage work area for range table. */
1740struct range_table_work_area 1456struct range_table_work_area
@@ -1745,8 +1461,6 @@ struct range_table_work_area
1745 int bits; /* flag to record character classes */ 1461 int bits; /* flag to record character classes */
1746}; 1462};
1747 1463
1748#ifdef emacs
1749
1750/* Make sure that WORK_AREA can hold more N multibyte characters. 1464/* Make sure that WORK_AREA can hold more N multibyte characters.
1751 This is used only in set_image_of_range and set_image_of_range_1. 1465 This is used only in set_image_of_range and set_image_of_range_1.
1752 It expects WORK_AREA to be a pointer. 1466 It expects WORK_AREA to be a pointer.
@@ -1773,13 +1487,11 @@ struct range_table_work_area
1773 (work_area).table[(work_area).used++] = (range_end); \ 1487 (work_area).table[(work_area).used++] = (range_end); \
1774 } while (0) 1488 } while (0)
1775 1489
1776#endif /* emacs */
1777
1778/* Free allocated memory for WORK_AREA. */ 1490/* Free allocated memory for WORK_AREA. */
1779#define FREE_RANGE_TABLE_WORK_AREA(work_area) \ 1491#define FREE_RANGE_TABLE_WORK_AREA(work_area) \
1780 do { \ 1492 do { \
1781 if ((work_area).table) \ 1493 if ((work_area).table) \
1782 free ((work_area).table); \ 1494 xfree ((work_area).table); \
1783 } while (0) 1495 } while (0)
1784 1496
1785#define CLEAR_RANGE_TABLE_WORK_USED(work_area) ((work_area).used = 0, (work_area).bits = 0) 1497#define CLEAR_RANGE_TABLE_WORK_USED(work_area) ((work_area).used = 0, (work_area).bits = 0)
@@ -1807,8 +1519,6 @@ struct range_table_work_area
1807#define SET_LIST_BIT(c) (b[((c)) / BYTEWIDTH] |= 1 << ((c) % BYTEWIDTH)) 1519#define SET_LIST_BIT(c) (b[((c)) / BYTEWIDTH] |= 1 << ((c) % BYTEWIDTH))
1808 1520
1809 1521
1810#ifdef emacs
1811
1812/* Store characters in the range FROM to TO in the bitmap at B (for 1522/* Store characters in the range FROM to TO in the bitmap at B (for
1813 ASCII and unibyte characters) and WORK_AREA (for multibyte 1523 ASCII and unibyte characters) and WORK_AREA (for multibyte
1814 characters) while translating them and paying attention to the 1524 characters) while translating them and paying attention to the
@@ -1912,8 +1622,6 @@ struct range_table_work_area
1912 } \ 1622 } \
1913 } while (0) 1623 } while (0)
1914 1624
1915#endif /* emacs */
1916
1917/* Get the next unsigned number in the uncompiled pattern. */ 1625/* Get the next unsigned number in the uncompiled pattern. */
1918#define GET_INTERVAL_COUNT(num) \ 1626#define GET_INTERVAL_COUNT(num) \
1919 do { \ 1627 do { \
@@ -1936,8 +1644,6 @@ struct range_table_work_area
1936 } \ 1644 } \
1937 } while (0) 1645 } while (0)
1938 1646
1939#if ! WIDE_CHAR_SUPPORT
1940
1941/* Parse a character class, i.e. string such as "[:name:]". *strp 1647/* Parse a character class, i.e. string such as "[:name:]". *strp
1942 points to the string to be parsed and limit is length, in bytes, of 1648 points to the string to be parsed and limit is length, in bytes, of
1943 that string. 1649 that string.
@@ -2031,7 +1737,7 @@ re_wctype_parse (const unsigned char **strp, unsigned limit)
2031} 1737}
2032 1738
2033/* True if CH is in the char class CC. */ 1739/* True if CH is in the char class CC. */
2034boolean 1740bool
2035re_iswctype (int ch, re_wctype_t cc) 1741re_iswctype (int ch, re_wctype_t cc)
2036{ 1742{
2037 switch (cc) 1743 switch (cc)
@@ -2084,7 +1790,6 @@ re_wctype_to_bit (re_wctype_t cc)
2084 abort (); 1790 abort ();
2085 } 1791 }
2086} 1792}
2087#endif
2088 1793
2089/* Filling in the work area of a range. */ 1794/* Filling in the work area of a range. */
2090 1795
@@ -2094,288 +1799,16 @@ static void
2094extend_range_table_work_area (struct range_table_work_area *work_area) 1799extend_range_table_work_area (struct range_table_work_area *work_area)
2095{ 1800{
2096 work_area->allocated += 16 * sizeof (int); 1801 work_area->allocated += 16 * sizeof (int);
2097 work_area->table = realloc (work_area->table, work_area->allocated); 1802 work_area->table = xrealloc (work_area->table, work_area->allocated);
2098} 1803}
2099
2100#if 0
2101#ifdef emacs
2102
2103/* Carefully find the ranges of codes that are equivalent
2104 under case conversion to the range start..end when passed through
2105 TRANSLATE. Handle the case where non-letters can come in between
2106 two upper-case letters (which happens in Latin-1).
2107 Also handle the case of groups of more than 2 case-equivalent chars.
2108
2109 The basic method is to look at consecutive characters and see
2110 if they can form a run that can be handled as one.
2111
2112 Returns -1 if successful, REG_ESPACE if ran out of space. */
2113
2114static int
2115set_image_of_range_1 (struct range_table_work_area *work_area,
2116 re_wchar_t start, re_wchar_t end,
2117 RE_TRANSLATE_TYPE translate)
2118{
2119 /* `one_case' indicates a character, or a run of characters,
2120 each of which is an isolate (no case-equivalents).
2121 This includes all ASCII non-letters.
2122
2123 `two_case' indicates a character, or a run of characters,
2124 each of which has two case-equivalent forms.
2125 This includes all ASCII letters.
2126
2127 `strange' indicates a character that has more than one
2128 case-equivalent. */
2129
2130 enum case_type {one_case, two_case, strange};
2131
2132 /* Describe the run that is in progress,
2133 which the next character can try to extend.
2134 If run_type is strange, that means there really is no run.
2135 If run_type is one_case, then run_start...run_end is the run.
2136 If run_type is two_case, then the run is run_start...run_end,
2137 and the case-equivalents end at run_eqv_end. */
2138
2139 enum case_type run_type = strange;
2140 int run_start, run_end, run_eqv_end;
2141
2142 Lisp_Object eqv_table;
2143
2144 if (!RE_TRANSLATE_P (translate))
2145 {
2146 EXTEND_RANGE_TABLE (work_area, 2);
2147 work_area->table[work_area->used++] = (start);
2148 work_area->table[work_area->used++] = (end);
2149 return -1;
2150 }
2151
2152 eqv_table = XCHAR_TABLE (translate)->extras[2];
2153
2154 for (; start <= end; start++)
2155 {
2156 enum case_type this_type;
2157 int eqv = RE_TRANSLATE (eqv_table, start);
2158 int minchar, maxchar;
2159
2160 /* Classify this character */
2161 if (eqv == start)
2162 this_type = one_case;
2163 else if (RE_TRANSLATE (eqv_table, eqv) == start)
2164 this_type = two_case;
2165 else
2166 this_type = strange;
2167
2168 if (start < eqv)
2169 minchar = start, maxchar = eqv;
2170 else
2171 minchar = eqv, maxchar = start;
2172
2173 /* Can this character extend the run in progress? */
2174 if (this_type == strange || this_type != run_type
2175 || !(minchar == run_end + 1
2176 && (run_type == two_case
2177 ? maxchar == run_eqv_end + 1 : 1)))
2178 {
2179 /* No, end the run.
2180 Record each of its equivalent ranges. */
2181 if (run_type == one_case)
2182 {
2183 EXTEND_RANGE_TABLE (work_area, 2);
2184 work_area->table[work_area->used++] = run_start;
2185 work_area->table[work_area->used++] = run_end;
2186 }
2187 else if (run_type == two_case)
2188 {
2189 EXTEND_RANGE_TABLE (work_area, 4);
2190 work_area->table[work_area->used++] = run_start;
2191 work_area->table[work_area->used++] = run_end;
2192 work_area->table[work_area->used++]
2193 = RE_TRANSLATE (eqv_table, run_start);
2194 work_area->table[work_area->used++]
2195 = RE_TRANSLATE (eqv_table, run_end);
2196 }
2197 run_type = strange;
2198 }
2199
2200 if (this_type == strange)
2201 {
2202 /* For a strange character, add each of its equivalents, one
2203 by one. Don't start a range. */
2204 do
2205 {
2206 EXTEND_RANGE_TABLE (work_area, 2);
2207 work_area->table[work_area->used++] = eqv;
2208 work_area->table[work_area->used++] = eqv;
2209 eqv = RE_TRANSLATE (eqv_table, eqv);
2210 }
2211 while (eqv != start);
2212 }
2213
2214 /* Add this char to the run, or start a new run. */
2215 else if (run_type == strange)
2216 {
2217 /* Initialize a new range. */
2218 run_type = this_type;
2219 run_start = start;
2220 run_end = start;
2221 run_eqv_end = RE_TRANSLATE (eqv_table, run_end);
2222 }
2223 else
2224 {
2225 /* Extend a running range. */
2226 run_end = minchar;
2227 run_eqv_end = RE_TRANSLATE (eqv_table, run_end);
2228 }
2229 }
2230
2231 /* If a run is still in progress at the end, finish it now
2232 by recording its equivalent ranges. */
2233 if (run_type == one_case)
2234 {
2235 EXTEND_RANGE_TABLE (work_area, 2);
2236 work_area->table[work_area->used++] = run_start;
2237 work_area->table[work_area->used++] = run_end;
2238 }
2239 else if (run_type == two_case)
2240 {
2241 EXTEND_RANGE_TABLE (work_area, 4);
2242 work_area->table[work_area->used++] = run_start;
2243 work_area->table[work_area->used++] = run_end;
2244 work_area->table[work_area->used++]
2245 = RE_TRANSLATE (eqv_table, run_start);
2246 work_area->table[work_area->used++]
2247 = RE_TRANSLATE (eqv_table, run_end);
2248 }
2249
2250 return -1;
2251}
2252
2253#endif /* emacs */
2254
2255/* Record the image of the range start..end when passed through
2256 TRANSLATE. This is not necessarily TRANSLATE(start)..TRANSLATE(end)
2257 and is not even necessarily contiguous.
2258 Normally we approximate it with the smallest contiguous range that contains
2259 all the chars we need. However, for Latin-1 we go to extra effort
2260 to do a better job.
2261
2262 This function is not called for ASCII ranges.
2263
2264 Returns -1 if successful, REG_ESPACE if ran out of space. */
2265
2266static int
2267set_image_of_range (struct range_table_work_area *work_area,
2268 re_wchar_t start, re_wchar_t end,
2269 RE_TRANSLATE_TYPE translate)
2270{
2271 re_wchar_t cmin, cmax;
2272
2273#ifdef emacs
2274 /* For Latin-1 ranges, use set_image_of_range_1
2275 to get proper handling of ranges that include letters and nonletters.
2276 For a range that includes the whole of Latin-1, this is not necessary.
2277 For other character sets, we don't bother to get this right. */
2278 if (RE_TRANSLATE_P (translate) && start < 04400
2279 && !(start < 04200 && end >= 04377))
2280 {
2281 int newend;
2282 int tem;
2283 newend = end;
2284 if (newend > 04377)
2285 newend = 04377;
2286 tem = set_image_of_range_1 (work_area, start, newend, translate);
2287 if (tem > 0)
2288 return tem;
2289
2290 start = 04400;
2291 if (end < 04400)
2292 return -1;
2293 }
2294#endif
2295
2296 EXTEND_RANGE_TABLE (work_area, 2);
2297 work_area->table[work_area->used++] = (start);
2298 work_area->table[work_area->used++] = (end);
2299
2300 cmin = -1, cmax = -1;
2301
2302 if (RE_TRANSLATE_P (translate))
2303 {
2304 int ch;
2305
2306 for (ch = start; ch <= end; ch++)
2307 {
2308 re_wchar_t c = TRANSLATE (ch);
2309 if (! (start <= c && c <= end))
2310 {
2311 if (cmin == -1)
2312 cmin = c, cmax = c;
2313 else
2314 {
2315 cmin = min (cmin, c);
2316 cmax = max (cmax, c);
2317 }
2318 }
2319 }
2320
2321 if (cmin != -1)
2322 {
2323 EXTEND_RANGE_TABLE (work_area, 2);
2324 work_area->table[work_area->used++] = (cmin);
2325 work_area->table[work_area->used++] = (cmax);
2326 }
2327 }
2328
2329 return -1;
2330}
2331#endif /* 0 */
2332
2333#ifndef MATCH_MAY_ALLOCATE
2334
2335/* If we cannot allocate large objects within re_match_2_internal,
2336 we make the fail stack and register vectors global.
2337 The fail stack, we grow to the maximum size when a regexp
2338 is compiled.
2339 The register vectors, we adjust in size each time we
2340 compile a regexp, according to the number of registers it needs. */
2341
2342static fail_stack_type fail_stack;
2343
2344/* Size with which the following vectors are currently allocated.
2345 That is so we can make them bigger as needed,
2346 but never make them smaller. */
2347static int regs_allocated_size;
2348
2349static re_char ** regstart, ** regend;
2350static re_char **best_regstart, **best_regend;
2351
2352/* Make the register vectors big enough for NUM_REGS registers,
2353 but don't make them smaller. */
2354
2355static
2356regex_grow_registers (int num_regs)
2357{
2358 if (num_regs > regs_allocated_size)
2359 {
2360 RETALLOC_IF (regstart, num_regs, re_char *);
2361 RETALLOC_IF (regend, num_regs, re_char *);
2362 RETALLOC_IF (best_regstart, num_regs, re_char *);
2363 RETALLOC_IF (best_regend, num_regs, re_char *);
2364
2365 regs_allocated_size = num_regs;
2366 }
2367}
2368
2369#endif /* not MATCH_MAY_ALLOCATE */
2370 1804
2371static boolean group_in_compile_stack (compile_stack_type compile_stack, 1805static bool group_in_compile_stack (compile_stack_type, regnum_t);
2372 regnum_t regnum);
2373 1806
2374/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. 1807/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
2375 Returns one of error codes defined in `regex-emacs.h', or zero for success. 1808 Returns one of error codes defined in `regex-emacs.h', or zero for success.
2376 1809
2377 If WHITESPACE_REGEXP is given (only #ifdef emacs), it is used instead of 1810 If WHITESPACE_REGEXP is given, it is used instead of a space
2378 a space character in PATTERN. 1811 character in PATTERN.
2379 1812
2380 Assumes the `allocated' (and perhaps `buffer') and `translate' 1813 Assumes the `allocated' (and perhaps `buffer') and `translate'
2381 fields are set in BUFP on entry. 1814 fields are set in BUFP on entry.
@@ -2404,42 +1837,33 @@ do { \
2404#define FREE_STACK_RETURN(value) \ 1837#define FREE_STACK_RETURN(value) \
2405 do { \ 1838 do { \
2406 FREE_RANGE_TABLE_WORK_AREA (range_table_work); \ 1839 FREE_RANGE_TABLE_WORK_AREA (range_table_work); \
2407 free (compile_stack.stack); \ 1840 xfree (compile_stack.stack); \
2408 return value; \ 1841 return value; \
2409 } while (0) 1842 } while (0)
2410 1843
2411static reg_errcode_t 1844static reg_errcode_t
2412regex_compile (re_char *pattern, size_t size, 1845regex_compile (re_char *pattern, size_t size,
2413#ifdef emacs
2414# define syntax RE_SYNTAX_EMACS
2415 bool posix_backtracking, 1846 bool posix_backtracking,
2416 const char *whitespace_regexp, 1847 const char *whitespace_regexp,
2417#else
2418 reg_syntax_t syntax,
2419# define posix_backtracking (!(syntax & RE_NO_POSIX_BACKTRACKING))
2420#endif
2421 struct re_pattern_buffer *bufp) 1848 struct re_pattern_buffer *bufp)
2422{ 1849{
1850 reg_syntax_t syntax = RE_SYNTAX_EMACS;
1851
2423 /* We fetch characters from PATTERN here. */ 1852 /* We fetch characters from PATTERN here. */
2424 register re_wchar_t c, c1; 1853 int c, c1;
2425 1854
2426 /* Points to the end of the buffer, where we should append. */ 1855 /* Points to the end of the buffer, where we should append. */
2427 register unsigned char *b; 1856 unsigned char *b;
2428 1857
2429 /* Keeps track of unclosed groups. */ 1858 /* Keeps track of unclosed groups. */
2430 compile_stack_type compile_stack; 1859 compile_stack_type compile_stack;
2431 1860
2432 /* Points to the current (ending) position in the pattern. */ 1861 /* Points to the current (ending) position in the pattern. */
2433#ifdef AIX
2434 /* `const' makes AIX compiler fail. */
2435 unsigned char *p = pattern;
2436#else
2437 re_char *p = pattern; 1862 re_char *p = pattern;
2438#endif
2439 re_char *pend = pattern + size; 1863 re_char *pend = pattern + size;
2440 1864
2441 /* How to translate the characters in the pattern. */ 1865 /* How to translate the characters in the pattern. */
2442 RE_TRANSLATE_TYPE translate = bufp->translate; 1866 Lisp_Object translate = bufp->translate;
2443 1867
2444 /* Address of the count-byte of the most recently inserted `exactn' 1868 /* Address of the count-byte of the most recently inserted `exactn'
2445 command. This makes it possible to tell if a new exact-match 1869 command. This makes it possible to tell if a new exact-match
@@ -2468,9 +1892,8 @@ regex_compile (re_char *pattern, size_t size,
2468 struct range_table_work_area range_table_work; 1892 struct range_table_work_area range_table_work;
2469 1893
2470 /* If the object matched can contain multibyte characters. */ 1894 /* If the object matched can contain multibyte characters. */
2471 const boolean multibyte = RE_MULTIBYTE_P (bufp); 1895 bool multibyte = RE_MULTIBYTE_P (bufp);
2472 1896
2473#ifdef emacs
2474 /* Nonzero if we have pushed down into a subpattern. */ 1897 /* Nonzero if we have pushed down into a subpattern. */
2475 int in_subpattern = 0; 1898 int in_subpattern = 0;
2476 1899
@@ -2479,26 +1902,22 @@ regex_compile (re_char *pattern, size_t size,
2479 re_char *main_p; 1902 re_char *main_p;
2480 re_char *main_pattern; 1903 re_char *main_pattern;
2481 re_char *main_pend; 1904 re_char *main_pend;
2482#endif
2483 1905
2484#ifdef DEBUG 1906#ifdef REGEX_EMACS_DEBUG
2485 debug++; 1907 regex_emacs_debug++;
2486 DEBUG_PRINT ("\nCompiling pattern: "); 1908 DEBUG_PRINT ("\nCompiling pattern: ");
2487 if (debug > 0) 1909 if (regex_emacs_debug > 0)
2488 { 1910 {
2489 unsigned debug_count; 1911 size_t debug_count;
2490 1912
2491 for (debug_count = 0; debug_count < size; debug_count++) 1913 for (debug_count = 0; debug_count < size; debug_count++)
2492 putchar (pattern[debug_count]); 1914 putchar (pattern[debug_count]);
2493 putchar ('\n'); 1915 putchar ('\n');
2494 } 1916 }
2495#endif /* DEBUG */ 1917#endif
2496 1918
2497 /* Initialize the compile stack. */ 1919 /* Initialize the compile stack. */
2498 compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t); 1920 compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
2499 if (compile_stack.stack == NULL)
2500 return REG_ESPACE;
2501
2502 compile_stack.size = INIT_COMPILE_STACK_SIZE; 1921 compile_stack.size = INIT_COMPILE_STACK_SIZE;
2503 compile_stack.avail = 0; 1922 compile_stack.avail = 0;
2504 1923
@@ -2506,9 +1925,6 @@ regex_compile (re_char *pattern, size_t size,
2506 range_table_work.allocated = 0; 1925 range_table_work.allocated = 0;
2507 1926
2508 /* Initialize the pattern buffer. */ 1927 /* Initialize the pattern buffer. */
2509#ifndef emacs
2510 bufp->syntax = syntax;
2511#endif
2512 bufp->fastmap_accurate = 0; 1928 bufp->fastmap_accurate = 0;
2513 bufp->not_bol = bufp->not_eol = 0; 1929 bufp->not_bol = bufp->not_eol = 0;
2514 bufp->used_syntax = 0; 1930 bufp->used_syntax = 0;
@@ -2521,11 +1937,6 @@ regex_compile (re_char *pattern, size_t size,
2521 /* Always count groups, whether or not bufp->no_sub is set. */ 1937 /* Always count groups, whether or not bufp->no_sub is set. */
2522 bufp->re_nsub = 0; 1938 bufp->re_nsub = 0;
2523 1939
2524#if !defined emacs && !defined SYNTAX_TABLE
2525 /* Initialize the syntax table. */
2526 init_syntax_once ();
2527#endif
2528
2529 if (bufp->allocated == 0) 1940 if (bufp->allocated == 0)
2530 { 1941 {
2531 if (bufp->buffer) 1942 if (bufp->buffer)
@@ -2538,8 +1949,6 @@ regex_compile (re_char *pattern, size_t size,
2538 { /* Caller did not allocate a buffer. Do it for them. */ 1949 { /* Caller did not allocate a buffer. Do it for them. */
2539 bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char); 1950 bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
2540 } 1951 }
2541 if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
2542
2543 bufp->allocated = INIT_BUF_SIZE; 1952 bufp->allocated = INIT_BUF_SIZE;
2544 } 1953 }
2545 1954
@@ -2550,7 +1959,6 @@ regex_compile (re_char *pattern, size_t size,
2550 { 1959 {
2551 if (p == pend) 1960 if (p == pend)
2552 { 1961 {
2553#ifdef emacs
2554 /* If this is the end of an included regexp, 1962 /* If this is the end of an included regexp,
2555 pop back to the main regexp and try again. */ 1963 pop back to the main regexp and try again. */
2556 if (in_subpattern) 1964 if (in_subpattern)
@@ -2561,7 +1969,6 @@ regex_compile (re_char *pattern, size_t size,
2561 pend = main_pend; 1969 pend = main_pend;
2562 continue; 1970 continue;
2563 } 1971 }
2564#endif
2565 /* If this is the end of the main regexp, we are done. */ 1972 /* If this is the end of the main regexp, we are done. */
2566 break; 1973 break;
2567 } 1974 }
@@ -2570,7 +1977,6 @@ regex_compile (re_char *pattern, size_t size,
2570 1977
2571 switch (c) 1978 switch (c)
2572 { 1979 {
2573#ifdef emacs
2574 case ' ': 1980 case ' ':
2575 { 1981 {
2576 re_char *p1 = p; 1982 re_char *p1 = p;
@@ -2603,7 +2009,6 @@ regex_compile (re_char *pattern, size_t size,
2603 pend = p + strlen (whitespace_regexp); 2009 pend = p + strlen (whitespace_regexp);
2604 break; 2010 break;
2605 } 2011 }
2606#endif
2607 2012
2608 case '^': 2013 case '^':
2609 { 2014 {
@@ -2654,8 +2059,8 @@ regex_compile (re_char *pattern, size_t size,
2654 2059
2655 { 2060 {
2656 /* 1 means zero (many) matches is allowed. */ 2061 /* 1 means zero (many) matches is allowed. */
2657 boolean zero_times_ok = 0, many_times_ok = 0; 2062 bool zero_times_ok = false, many_times_ok = false;
2658 boolean greedy = 1; 2063 bool greedy = true;
2659 2064
2660 /* If there is a sequence of repetition chars, collapse it 2065 /* If there is a sequence of repetition chars, collapse it
2661 down to just one (the right one). We can't combine 2066 down to just one (the right one). We can't combine
@@ -2666,7 +2071,7 @@ regex_compile (re_char *pattern, size_t size,
2666 { 2071 {
2667 if ((syntax & RE_FRUGAL) 2072 if ((syntax & RE_FRUGAL)
2668 && c == '?' && (zero_times_ok || many_times_ok)) 2073 && c == '?' && (zero_times_ok || many_times_ok))
2669 greedy = 0; 2074 greedy = false;
2670 else 2075 else
2671 { 2076 {
2672 zero_times_ok |= c != '+'; 2077 zero_times_ok |= c != '+';
@@ -2705,13 +2110,13 @@ regex_compile (re_char *pattern, size_t size,
2705 { 2110 {
2706 if (many_times_ok) 2111 if (many_times_ok)
2707 { 2112 {
2708 boolean simple = skip_one_char (laststart) == b; 2113 bool simple = skip_one_char (laststart) == b;
2709 size_t startoffset = 0; 2114 size_t startoffset = 0;
2710 re_opcode_t ofj = 2115 re_opcode_t ofj =
2711 /* Check if the loop can match the empty string. */ 2116 /* Check if the loop can match the empty string. */
2712 (simple || !analyze_first (laststart, b, NULL, 0)) 2117 (simple || !analyze_first (laststart, b, NULL, 0))
2713 ? on_failure_jump : on_failure_jump_loop; 2118 ? on_failure_jump : on_failure_jump_loop;
2714 assert (skip_one_char (laststart) <= b); 2119 eassert (skip_one_char (laststart) <= b);
2715 2120
2716 if (!zero_times_ok && simple) 2121 if (!zero_times_ok && simple)
2717 { /* Since simple * loops can be made faster by using 2122 { /* Since simple * loops can be made faster by using
@@ -2744,7 +2149,7 @@ regex_compile (re_char *pattern, size_t size,
2744 else 2149 else
2745 { 2150 {
2746 /* A simple ? pattern. */ 2151 /* A simple ? pattern. */
2747 assert (zero_times_ok); 2152 eassert (zero_times_ok);
2748 GET_BUFFER_SPACE (3); 2153 GET_BUFFER_SPACE (3);
2749 INSERT_JUMP (on_failure_jump, laststart, b + 3); 2154 INSERT_JUMP (on_failure_jump, laststart, b + 3);
2750 b += 3; 2155 b += 3;
@@ -2756,7 +2161,7 @@ regex_compile (re_char *pattern, size_t size,
2756 GET_BUFFER_SPACE (7); /* We might use less. */ 2161 GET_BUFFER_SPACE (7); /* We might use less. */
2757 if (many_times_ok) 2162 if (many_times_ok)
2758 { 2163 {
2759 boolean emptyp = analyze_first (laststart, b, NULL, 0); 2164 bool emptyp = analyze_first (laststart, b, NULL, 0);
2760 2165
2761 /* The non-greedy multiple match looks like 2166 /* The non-greedy multiple match looks like
2762 a repeat..until: we only need a conditional jump 2167 a repeat..until: we only need a conditional jump
@@ -2831,10 +2236,9 @@ regex_compile (re_char *pattern, size_t size,
2831 /* Read in characters and ranges, setting map bits. */ 2236 /* Read in characters and ranges, setting map bits. */
2832 for (;;) 2237 for (;;)
2833 { 2238 {
2834 boolean escaped_char = false;
2835 const unsigned char *p2 = p; 2239 const unsigned char *p2 = p;
2836 re_wctype_t cc; 2240 re_wctype_t cc;
2837 re_wchar_t ch; 2241 int ch;
2838 2242
2839 if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 2243 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2840 2244
@@ -2849,15 +2253,6 @@ regex_compile (re_char *pattern, size_t size,
2849 if (p == pend) 2253 if (p == pend)
2850 FREE_STACK_RETURN (REG_EBRACK); 2254 FREE_STACK_RETURN (REG_EBRACK);
2851 2255
2852#ifndef emacs
2853 for (ch = 0; ch < (1 << BYTEWIDTH); ++ch)
2854 if (re_iswctype (btowc (ch), cc))
2855 {
2856 c = TRANSLATE (ch);
2857 if (c < (1 << BYTEWIDTH))
2858 SET_LIST_BIT (c);
2859 }
2860#else /* emacs */
2861 /* Most character classes in a multibyte match just set 2256 /* Most character classes in a multibyte match just set
2862 a flag. Exceptions are is_blank, is_digit, is_cntrl, and 2257 a flag. Exceptions are is_blank, is_digit, is_cntrl, and
2863 is_xdigit, since they can only match ASCII characters. 2258 is_xdigit, since they can only match ASCII characters.
@@ -2884,7 +2279,7 @@ regex_compile (re_char *pattern, size_t size,
2884 } 2279 }
2885 SET_RANGE_TABLE_WORK_AREA_BIT 2280 SET_RANGE_TABLE_WORK_AREA_BIT
2886 (range_table_work, re_wctype_to_bit (cc)); 2281 (range_table_work, re_wctype_to_bit (cc));
2887#endif /* emacs */ 2282
2888 /* In most cases the matching rule for char classes only 2283 /* In most cases the matching rule for char classes only
2889 uses the syntax table for multibyte chars, so that the 2284 uses the syntax table for multibyte chars, so that the
2890 content of the syntax-table is not hardcoded in the 2285 content of the syntax-table is not hardcoded in the
@@ -2908,7 +2303,6 @@ regex_compile (re_char *pattern, size_t size,
2908 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); 2303 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2909 2304
2910 PATFETCH (c); 2305 PATFETCH (c);
2911 escaped_char = true;
2912 } 2306 }
2913 else 2307 else
2914 { 2308 {
@@ -2927,13 +2321,12 @@ regex_compile (re_char *pattern, size_t size,
2927 2321
2928 /* Fetch the character which ends the range. */ 2322 /* Fetch the character which ends the range. */
2929 PATFETCH (c1); 2323 PATFETCH (c1);
2930#ifdef emacs 2324
2931 if (CHAR_BYTE8_P (c1) 2325 if (CHAR_BYTE8_P (c1)
2932 && ! ASCII_CHAR_P (c) && ! CHAR_BYTE8_P (c)) 2326 && ! ASCII_CHAR_P (c) && ! CHAR_BYTE8_P (c))
2933 /* Treat the range from a multibyte character to 2327 /* Treat the range from a multibyte character to
2934 raw-byte character as empty. */ 2328 raw-byte character as empty. */
2935 c = c1 + 1; 2329 c = c1 + 1;
2936#endif /* emacs */
2937 } 2330 }
2938 else 2331 else
2939 /* Range from C to C. */ 2332 /* Range from C to C. */
@@ -2947,15 +2340,6 @@ regex_compile (re_char *pattern, size_t size,
2947 } 2340 }
2948 else 2341 else
2949 { 2342 {
2950#ifndef emacs
2951 /* Set the range into bitmap */
2952 for (; c <= c1; c++)
2953 {
2954 ch = TRANSLATE (c);
2955 if (ch < (1 << BYTEWIDTH))
2956 SET_LIST_BIT (ch);
2957 }
2958#else /* emacs */
2959 if (c < 128) 2343 if (c < 128)
2960 { 2344 {
2961 ch = min (127, c1); 2345 ch = min (127, c1);
@@ -2982,7 +2366,6 @@ regex_compile (re_char *pattern, size_t size,
2982 SETUP_UNIBYTE_RANGE (range_table_work, c, c1); 2366 SETUP_UNIBYTE_RANGE (range_table_work, c, c1);
2983 } 2367 }
2984 } 2368 }
2985#endif /* emacs */
2986 } 2369 }
2987 } 2370 }
2988 2371
@@ -3007,8 +2390,7 @@ regex_compile (re_char *pattern, size_t size,
3007 /* Indicate the existence of range table. */ 2390 /* Indicate the existence of range table. */
3008 laststart[1] |= 0x80; 2391 laststart[1] |= 0x80;
3009 2392
3010 /* Store the character class flag bits into the range table. 2393 /* Store the character class flag bits into the range table. */
3011 If not in emacs, these flag bits are always 0. */
3012 *b++ = RANGE_TABLE_WORK_BITS (range_table_work) & 0xff; 2394 *b++ = RANGE_TABLE_WORK_BITS (range_table_work) & 0xff;
3013 *b++ = RANGE_TABLE_WORK_BITS (range_table_work) >> 8; 2395 *b++ = RANGE_TABLE_WORK_BITS (range_table_work) >> 8;
3014 2396
@@ -3127,8 +2509,6 @@ regex_compile (re_char *pattern, size_t size,
3127 { 2509 {
3128 RETALLOC (compile_stack.stack, compile_stack.size << 1, 2510 RETALLOC (compile_stack.stack, compile_stack.size << 1,
3129 compile_stack_elt_t); 2511 compile_stack_elt_t);
3130 if (compile_stack.stack == NULL) return REG_ESPACE;
3131
3132 compile_stack.size <<= 1; 2512 compile_stack.size <<= 1;
3133 } 2513 }
3134 2514
@@ -3184,7 +2564,7 @@ regex_compile (re_char *pattern, size_t size,
3184 2564
3185 /* Since we just checked for an empty stack above, this 2565 /* Since we just checked for an empty stack above, this
3186 ``can't happen''. */ 2566 ``can't happen''. */
3187 assert (compile_stack.avail != 0); 2567 eassert (compile_stack.avail != 0);
3188 { 2568 {
3189 /* We don't just want to restore into `regnum', because 2569 /* We don't just want to restore into `regnum', because
3190 later groups should continue to be numbered higher, 2570 later groups should continue to be numbered higher,
@@ -3410,7 +2790,7 @@ regex_compile (re_char *pattern, size_t size,
3410 2790
3411 unfetch_interval: 2791 unfetch_interval:
3412 /* If an invalid interval, match the characters as literals. */ 2792 /* If an invalid interval, match the characters as literals. */
3413 assert (beg_interval); 2793 eassert (beg_interval);
3414 p = beg_interval; 2794 p = beg_interval;
3415 beg_interval = NULL; 2795 beg_interval = NULL;
3416 2796
@@ -3419,13 +2799,12 @@ regex_compile (re_char *pattern, size_t size,
3419 2799
3420 if (!(syntax & RE_NO_BK_BRACES)) 2800 if (!(syntax & RE_NO_BK_BRACES))
3421 { 2801 {
3422 assert (p > pattern && p[-1] == '\\'); 2802 eassert (p > pattern && p[-1] == '\\');
3423 goto normal_backslash; 2803 goto normal_backslash;
3424 } 2804 }
3425 else 2805 else
3426 goto normal_char; 2806 goto normal_char;
3427 2807
3428#ifdef emacs
3429 case '=': 2808 case '=':
3430 laststart = b; 2809 laststart = b;
3431 BUF_PUSH (at_dot); 2810 BUF_PUSH (at_dot);
@@ -3454,8 +2833,6 @@ regex_compile (re_char *pattern, size_t size,
3454 PATFETCH (c); 2833 PATFETCH (c);
3455 BUF_PUSH_2 (notcategoryspec, c); 2834 BUF_PUSH_2 (notcategoryspec, c);
3456 break; 2835 break;
3457#endif /* emacs */
3458
3459 2836
3460 case 'w': 2837 case 'w':
3461 if (syntax & RE_NO_GNU_OPS) 2838 if (syntax & RE_NO_GNU_OPS)
@@ -3607,7 +2984,7 @@ regex_compile (re_char *pattern, size_t size,
3607 c1 = RE_CHAR_TO_MULTIBYTE (c); 2984 c1 = RE_CHAR_TO_MULTIBYTE (c);
3608 if (! CHAR_BYTE8_P (c1)) 2985 if (! CHAR_BYTE8_P (c1))
3609 { 2986 {
3610 re_wchar_t c2 = TRANSLATE (c1); 2987 int c2 = TRANSLATE (c1);
3611 2988
3612 if (c1 != c2 && (c1 = RE_CHAR_TO_UNIBYTE (c2)) >= 0) 2989 if (c1 != c2 && (c1 = RE_CHAR_TO_UNIBYTE (c2)) >= 0)
3613 c = c1; 2990 c = c1;
@@ -3638,41 +3015,18 @@ regex_compile (re_char *pattern, size_t size,
3638 /* We have succeeded; set the length of the buffer. */ 3015 /* We have succeeded; set the length of the buffer. */
3639 bufp->used = b - bufp->buffer; 3016 bufp->used = b - bufp->buffer;
3640 3017
3641#ifdef DEBUG 3018#ifdef REGEX_EMACS_DEBUG
3642 if (debug > 0) 3019 if (regex_emacs_debug > 0)
3643 { 3020 {
3644 re_compile_fastmap (bufp); 3021 re_compile_fastmap (bufp);
3645 DEBUG_PRINT ("\nCompiled pattern: \n"); 3022 DEBUG_PRINT ("\nCompiled pattern: \n");
3646 print_compiled_pattern (bufp); 3023 print_compiled_pattern (bufp);
3647 } 3024 }
3648 debug--; 3025 regex_emacs_debug--;
3649#endif /* DEBUG */ 3026#endif
3650
3651#ifndef MATCH_MAY_ALLOCATE
3652 /* Initialize the failure stack to the largest possible stack. This
3653 isn't necessary unless we're trying to avoid calling alloca in
3654 the search and match routines. */
3655 {
3656 int num_regs = bufp->re_nsub + 1;
3657
3658 if (fail_stack.size < emacs_re_max_failures * TYPICAL_FAILURE_SIZE)
3659 {
3660 fail_stack.size = emacs_re_max_failures * TYPICAL_FAILURE_SIZE;
3661 falk_stack.stack = realloc (fail_stack.stack,
3662 fail_stack.size * sizeof *falk_stack.stack);
3663 }
3664
3665 regex_grow_registers (num_regs);
3666 }
3667#endif /* not MATCH_MAY_ALLOCATE */
3668 3027
3669 FREE_STACK_RETURN (REG_NOERROR); 3028 FREE_STACK_RETURN (REG_NOERROR);
3670 3029
3671#ifdef emacs
3672# undef syntax
3673#else
3674# undef posix_backtracking
3675#endif
3676} /* regex_compile */ 3030} /* regex_compile */
3677 3031
3678/* Subroutines for `regex_compile'. */ 3032/* Subroutines for `regex_compile'. */
@@ -3733,11 +3087,11 @@ insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, unsigned cha
3733 after an alternative or a begin-subexpression. We assume there is at 3087 after an alternative or a begin-subexpression. We assume there is at
3734 least one character before the ^. */ 3088 least one character before the ^. */
3735 3089
3736static boolean 3090static bool
3737at_begline_loc_p (re_char *pattern, re_char *p, reg_syntax_t syntax) 3091at_begline_loc_p (re_char *pattern, re_char *p, reg_syntax_t syntax)
3738{ 3092{
3739 re_char *prev = p - 2; 3093 re_char *prev = p - 2;
3740 boolean odd_backslashes; 3094 bool odd_backslashes;
3741 3095
3742 /* After a subexpression? */ 3096 /* After a subexpression? */
3743 if (*prev == '(') 3097 if (*prev == '(')
@@ -3774,11 +3128,11 @@ at_begline_loc_p (re_char *pattern, re_char *p, reg_syntax_t syntax)
3774/* The dual of at_begline_loc_p. This one is for $. We assume there is 3128/* The dual of at_begline_loc_p. This one is for $. We assume there is
3775 at least one character after the $, i.e., `P < PEND'. */ 3129 at least one character after the $, i.e., `P < PEND'. */
3776 3130
3777static boolean 3131static bool
3778at_endline_loc_p (re_char *p, re_char *pend, reg_syntax_t syntax) 3132at_endline_loc_p (re_char *p, re_char *pend, reg_syntax_t syntax)
3779{ 3133{
3780 re_char *next = p; 3134 re_char *next = p;
3781 boolean next_backslash = *next == '\\'; 3135 bool next_backslash = *next == '\\';
3782 re_char *next_next = p + 1 < pend ? p + 1 : 0; 3136 re_char *next_next = p + 1 < pend ? p + 1 : 0;
3783 3137
3784 return 3138 return
@@ -3794,10 +3148,10 @@ at_endline_loc_p (re_char *p, re_char *pend, reg_syntax_t syntax)
3794/* Returns true if REGNUM is in one of COMPILE_STACK's elements and 3148/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
3795 false if it's not. */ 3149 false if it's not. */
3796 3150
3797static boolean 3151static bool
3798group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum) 3152group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum)
3799{ 3153{
3800 ssize_t this_element; 3154 ptrdiff_t this_element;
3801 3155
3802 for (this_element = compile_stack.avail - 1; 3156 for (this_element = compile_stack.avail - 1;
3803 this_element >= 0; 3157 this_element >= 0;
@@ -3823,13 +3177,13 @@ analyze_first (re_char *p, re_char *pend, char *fastmap,
3823 const int multibyte) 3177 const int multibyte)
3824{ 3178{
3825 int j, k; 3179 int j, k;
3826 boolean not; 3180 bool not;
3827 3181
3828 /* If all elements for base leading-codes in fastmap is set, this 3182 /* If all elements for base leading-codes in fastmap is set, this
3829 flag is set true. */ 3183 flag is set true. */
3830 boolean match_any_multibyte_characters = false; 3184 bool match_any_multibyte_characters = false;
3831 3185
3832 assert (p); 3186 eassert (p);
3833 3187
3834 /* The loop below works as follows: 3188 /* The loop below works as follows:
3835 - It has a working-list kept in the PATTERN_STACK and which basically 3189 - It has a working-list kept in the PATTERN_STACK and which basically
@@ -3920,7 +3274,6 @@ analyze_first (re_char *p, re_char *pend, char *fastmap,
3920 if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not) 3274 if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not)
3921 fastmap[j] = 1; 3275 fastmap[j] = 1;
3922 3276
3923#ifdef emacs
3924 if (/* Any leading code can possibly start a character 3277 if (/* Any leading code can possibly start a character
3925 which doesn't match the specified set of characters. */ 3278 which doesn't match the specified set of characters. */
3926 not 3279 not
@@ -3966,20 +3319,11 @@ analyze_first (re_char *p, re_char *pend, char *fastmap,
3966 fastmap[j] = 1; 3319 fastmap[j] = 1;
3967 } 3320 }
3968 } 3321 }
3969#endif
3970 break; 3322 break;
3971 3323
3972 case syntaxspec: 3324 case syntaxspec:
3973 case notsyntaxspec: 3325 case notsyntaxspec:
3974 if (!fastmap) break; 3326 if (!fastmap) break;
3975#ifndef emacs
3976 not = (re_opcode_t)p[-1] == notsyntaxspec;
3977 k = *p++;
3978 for (j = 0; j < (1 << BYTEWIDTH); j++)
3979 if ((SYNTAX (j) == (enum syntaxcode) k) ^ not)
3980 fastmap[j] = 1;
3981 break;
3982#else /* emacs */
3983 /* This match depends on text properties. These end with 3327 /* This match depends on text properties. These end with
3984 aborting optimizations. */ 3328 aborting optimizations. */
3985 return -1; 3329 return -1;
@@ -4008,7 +3352,6 @@ analyze_first (re_char *p, re_char *pend, char *fastmap,
4008 `continue'. */ 3352 `continue'. */
4009 3353
4010 case at_dot: 3354 case at_dot:
4011#endif /* !emacs */
4012 case no_op: 3355 case no_op:
4013 case begline: 3356 case begline:
4014 case endline: 3357 case endline:
@@ -4066,7 +3409,7 @@ analyze_first (re_char *p, re_char *pend, char *fastmap,
4066 3409
4067 case jump_n: 3410 case jump_n:
4068 /* This code simply does not properly handle forward jump_n. */ 3411 /* This code simply does not properly handle forward jump_n. */
4069 DEBUG_STATEMENT (EXTRACT_NUMBER (j, p); assert (j < 0)); 3412 DEBUG_STATEMENT (EXTRACT_NUMBER (j, p); eassert (j < 0));
4070 p += 4; 3413 p += 4;
4071 /* jump_n can either jump or fall through. The (backward) jump 3414 /* jump_n can either jump or fall through. The (backward) jump
4072 case has already been handled, so we only need to look at the 3415 case has already been handled, so we only need to look at the
@@ -4075,7 +3418,7 @@ analyze_first (re_char *p, re_char *pend, char *fastmap,
4075 3418
4076 case succeed_n: 3419 case succeed_n:
4077 /* If N == 0, it should be an on_failure_jump_loop instead. */ 3420 /* If N == 0, it should be an on_failure_jump_loop instead. */
4078 DEBUG_STATEMENT (EXTRACT_NUMBER (j, p + 2); assert (j > 0)); 3421 DEBUG_STATEMENT (EXTRACT_NUMBER (j, p + 2); eassert (j > 0));
4079 p += 4; 3422 p += 4;
4080 /* We only care about one iteration of the loop, so we don't 3423 /* We only care about one iteration of the loop, so we don't
4081 need to consider the case where this behaves like an 3424 need to consider the case where this behaves like an
@@ -4126,13 +3469,13 @@ analyze_first (re_char *p, re_char *pend, char *fastmap,
4126 3469
4127 Returns 0 if we succeed, -2 if an internal error. */ 3470 Returns 0 if we succeed, -2 if an internal error. */
4128 3471
4129int 3472static void
4130re_compile_fastmap (struct re_pattern_buffer *bufp) 3473re_compile_fastmap (struct re_pattern_buffer *bufp)
4131{ 3474{
4132 char *fastmap = bufp->fastmap; 3475 char *fastmap = bufp->fastmap;
4133 int analysis; 3476 int analysis;
4134 3477
4135 assert (fastmap && bufp->buffer); 3478 eassert (fastmap && bufp->buffer);
4136 3479
4137 memset (fastmap, 0, 1 << BYTEWIDTH); /* Assume nothing's valid. */ 3480 memset (fastmap, 0, 1 << BYTEWIDTH); /* Assume nothing's valid. */
4138 bufp->fastmap_accurate = 1; /* It will be when we're done. */ 3481 bufp->fastmap_accurate = 1; /* It will be when we're done. */
@@ -4140,14 +3483,13 @@ re_compile_fastmap (struct re_pattern_buffer *bufp)
4140 analysis = analyze_first (bufp->buffer, bufp->buffer + bufp->used, 3483 analysis = analyze_first (bufp->buffer, bufp->buffer + bufp->used,
4141 fastmap, RE_MULTIBYTE_P (bufp)); 3484 fastmap, RE_MULTIBYTE_P (bufp));
4142 bufp->can_be_null = (analysis != 0); 3485 bufp->can_be_null = (analysis != 0);
4143 return 0;
4144} /* re_compile_fastmap */ 3486} /* re_compile_fastmap */
4145 3487
4146/* Set REGS to hold NUM_REGS registers, storing them in STARTS and 3488/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
4147 ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use 3489 ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
4148 this memory for recording register information. STARTS and ENDS 3490 this memory for recording register information. STARTS and ENDS
4149 must be allocated using the malloc library routine, and must each 3491 must be allocated using the malloc library routine, and must each
4150 be at least NUM_REGS * sizeof (regoff_t) bytes long. 3492 be at least NUM_REGS * sizeof (ptrdiff_t) bytes long.
4151 3493
4152 If NUM_REGS == 0, then subsequent matches should allocate their own 3494 If NUM_REGS == 0, then subsequent matches should allocate their own
4153 register data. 3495 register data.
@@ -4157,7 +3499,8 @@ re_compile_fastmap (struct re_pattern_buffer *bufp)
4157 freeing the old data. */ 3499 freeing the old data. */
4158 3500
4159void 3501void
4160re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs, unsigned int num_regs, regoff_t *starts, regoff_t *ends) 3502re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs,
3503 unsigned int num_regs, ptrdiff_t *starts, ptrdiff_t *ends)
4161{ 3504{
4162 if (num_regs) 3505 if (num_regs)
4163 { 3506 {
@@ -4173,21 +3516,19 @@ re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs, uns
4173 regs->start = regs->end = 0; 3516 regs->start = regs->end = 0;
4174 } 3517 }
4175} 3518}
4176WEAK_ALIAS (__re_set_registers, re_set_registers)
4177 3519
4178/* Searching routines. */ 3520/* Searching routines. */
4179 3521
4180/* Like re_search_2, below, but only one string is specified, and 3522/* Like re_search_2, below, but only one string is specified, and
4181 doesn't let you say where to stop matching. */ 3523 doesn't let you say where to stop matching. */
4182 3524
4183regoff_t 3525ptrdiff_t
4184re_search (struct re_pattern_buffer *bufp, const char *string, size_t size, 3526re_search (struct re_pattern_buffer *bufp, const char *string, size_t size,
4185 ssize_t startpos, ssize_t range, struct re_registers *regs) 3527 ptrdiff_t startpos, ptrdiff_t range, struct re_registers *regs)
4186{ 3528{
4187 return re_search_2 (bufp, NULL, 0, string, size, startpos, range, 3529 return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
4188 regs, size); 3530 regs, size);
4189} 3531}
4190WEAK_ALIAS (__re_search, re_search)
4191 3532
4192/* Head address of virtual concatenation of string. */ 3533/* Head address of virtual concatenation of string. */
4193#define HEAD_ADDR_VSTRING(P) \ 3534#define HEAD_ADDR_VSTRING(P) \
@@ -4218,21 +3559,21 @@ WEAK_ALIAS (__re_search, re_search)
4218 found, -1 if no match, or -2 if error (such as failure 3559 found, -1 if no match, or -2 if error (such as failure
4219 stack overflow). */ 3560 stack overflow). */
4220 3561
4221regoff_t 3562ptrdiff_t
4222re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1, 3563re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1,
4223 const char *str2, size_t size2, ssize_t startpos, ssize_t range, 3564 const char *str2, size_t size2, ptrdiff_t startpos, ptrdiff_t range,
4224 struct re_registers *regs, ssize_t stop) 3565 struct re_registers *regs, ptrdiff_t stop)
4225{ 3566{
4226 regoff_t val; 3567 ptrdiff_t val;
4227 re_char *string1 = (re_char *) str1; 3568 re_char *string1 = (re_char *) str1;
4228 re_char *string2 = (re_char *) str2; 3569 re_char *string2 = (re_char *) str2;
4229 register char *fastmap = bufp->fastmap; 3570 char *fastmap = bufp->fastmap;
4230 register RE_TRANSLATE_TYPE translate = bufp->translate; 3571 Lisp_Object translate = bufp->translate;
4231 size_t total_size = size1 + size2; 3572 size_t total_size = size1 + size2;
4232 ssize_t endpos = startpos + range; 3573 ptrdiff_t endpos = startpos + range;
4233 boolean anchored_start; 3574 bool anchored_start;
4234 /* Nonzero if we are searching multibyte string. */ 3575 /* Nonzero if we are searching multibyte string. */
4235 const boolean multibyte = RE_TARGET_MULTIBYTE_P (bufp); 3576 bool multibyte = RE_TARGET_MULTIBYTE_P (bufp);
4236 3577
4237 /* Check for out-of-range STARTPOS. */ 3578 /* Check for out-of-range STARTPOS. */
4238 if (startpos < 0 || startpos > total_size) 3579 if (startpos < 0 || startpos > total_size)
@@ -4256,7 +3597,6 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1,
4256 range = 0; 3597 range = 0;
4257 } 3598 }
4258 3599
4259#ifdef emacs
4260 /* In a forward search for something that starts with \=. 3600 /* In a forward search for something that starts with \=.
4261 don't keep searching past point. */ 3601 don't keep searching past point. */
4262 if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0) 3602 if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
@@ -4265,7 +3605,6 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1,
4265 if (range < 0) 3605 if (range < 0)
4266 return -1; 3606 return -1;
4267 } 3607 }
4268#endif /* emacs */
4269 3608
4270 /* Update the fastmap now if not correct already. */ 3609 /* Update the fastmap now if not correct already. */
4271 if (fastmap && !bufp->fastmap_accurate) 3610 if (fastmap && !bufp->fastmap_accurate)
@@ -4274,14 +3613,12 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1,
4274 /* See whether the pattern is anchored. */ 3613 /* See whether the pattern is anchored. */
4275 anchored_start = (bufp->buffer[0] == begline); 3614 anchored_start = (bufp->buffer[0] == begline);
4276 3615
4277#ifdef emacs
4278 gl_state.object = re_match_object; /* Used by SYNTAX_TABLE_BYTE_TO_CHAR. */ 3616 gl_state.object = re_match_object; /* Used by SYNTAX_TABLE_BYTE_TO_CHAR. */
4279 { 3617 {
4280 ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (startpos)); 3618 ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (startpos));
4281 3619
4282 SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1); 3620 SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
4283 } 3621 }
4284#endif
4285 3622
4286 /* Loop through the string, looking for a place to start matching. */ 3623 /* Loop through the string, looking for a place to start matching. */
4287 for (;;) 3624 for (;;)
@@ -4304,14 +3641,14 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1,
4304 the first null string. */ 3641 the first null string. */
4305 if (fastmap && startpos < total_size && !bufp->can_be_null) 3642 if (fastmap && startpos < total_size && !bufp->can_be_null)
4306 { 3643 {
4307 register re_char *d; 3644 re_char *d;
4308 register re_wchar_t buf_ch; 3645 int buf_ch;
4309 3646
4310 d = POS_ADDR_VSTRING (startpos); 3647 d = POS_ADDR_VSTRING (startpos);
4311 3648
4312 if (range > 0) /* Searching forwards. */ 3649 if (range > 0) /* Searching forwards. */
4313 { 3650 {
4314 ssize_t irange = range, lim = 0; 3651 ptrdiff_t irange = range, lim = 0;
4315 3652
4316 if (startpos < size1 && startpos + range >= size1) 3653 if (startpos < size1 && startpos + range >= size1)
4317 lim = range - (size1 - startpos); 3654 lim = range - (size1 - startpos);
@@ -4336,11 +3673,9 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1,
4336 else 3673 else
4337 while (range > lim) 3674 while (range > lim)
4338 { 3675 {
4339 register re_wchar_t ch, translated;
4340
4341 buf_ch = *d; 3676 buf_ch = *d;
4342 ch = RE_CHAR_TO_MULTIBYTE (buf_ch); 3677 int ch = RE_CHAR_TO_MULTIBYTE (buf_ch);
4343 translated = RE_TRANSLATE (translate, ch); 3678 int translated = RE_TRANSLATE (translate, ch);
4344 if (translated != ch 3679 if (translated != ch
4345 && (ch = RE_CHAR_TO_UNIBYTE (translated)) >= 0) 3680 && (ch = RE_CHAR_TO_UNIBYTE (translated)) >= 0)
4346 buf_ch = ch; 3681 buf_ch = ch;
@@ -4383,11 +3718,9 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1,
4383 } 3718 }
4384 else 3719 else
4385 { 3720 {
4386 register re_wchar_t ch, translated;
4387
4388 buf_ch = *d; 3721 buf_ch = *d;
4389 ch = RE_CHAR_TO_MULTIBYTE (buf_ch); 3722 int ch = RE_CHAR_TO_MULTIBYTE (buf_ch);
4390 translated = TRANSLATE (ch); 3723 int translated = TRANSLATE (ch);
4391 if (translated != ch 3724 if (translated != ch
4392 && (ch = RE_CHAR_TO_UNIBYTE (translated)) >= 0) 3725 && (ch = RE_CHAR_TO_UNIBYTE (translated)) >= 0)
4393 buf_ch = ch; 3726 buf_ch = ch;
@@ -4457,13 +3790,12 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1,
4457 } 3790 }
4458 return -1; 3791 return -1;
4459} /* re_search_2 */ 3792} /* re_search_2 */
4460WEAK_ALIAS (__re_search_2, re_search_2)
4461 3793
4462/* Declarations and macros for re_match_2. */ 3794/* Declarations and macros for re_match_2. */
4463 3795
4464static int bcmp_translate (re_char *s1, re_char *s2, 3796static int bcmp_translate (re_char *s1, re_char *s2,
4465 register ssize_t len, 3797 ptrdiff_t len,
4466 RE_TRANSLATE_TYPE translate, 3798 Lisp_Object translate,
4467 const int multibyte); 3799 const int multibyte);
4468 3800
4469/* This converts PTR, a pointer into one of the search strings `string1' 3801/* This converts PTR, a pointer into one of the search strings `string1'
@@ -4531,29 +3863,6 @@ static int bcmp_translate (re_char *s1, re_char *s2,
4531 || WORDCHAR_P (d - 1) != WORDCHAR_P (d)) 3863 || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
4532#endif 3864#endif
4533 3865
4534/* Free everything we malloc. */
4535#ifdef MATCH_MAY_ALLOCATE
4536# define FREE_VAR(var) \
4537 do { \
4538 if (var) \
4539 { \
4540 REGEX_FREE (var); \
4541 var = NULL; \
4542 } \
4543 } while (0)
4544# define FREE_VARIABLES() \
4545 do { \
4546 REGEX_FREE_STACK (fail_stack.stack); \
4547 FREE_VAR (regstart); \
4548 FREE_VAR (regend); \
4549 FREE_VAR (best_regstart); \
4550 FREE_VAR (best_regend); \
4551 REGEX_SAFE_FREE (); \
4552 } while (0)
4553#else
4554# define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */
4555#endif /* not MATCH_MAY_ALLOCATE */
4556
4557 3866
4558/* Optimization routines. */ 3867/* Optimization routines. */
4559 3868
@@ -4586,10 +3895,8 @@ skip_one_char (re_char *p)
4586 3895
4587 case syntaxspec: 3896 case syntaxspec:
4588 case notsyntaxspec: 3897 case notsyntaxspec:
4589#ifdef emacs
4590 case categoryspec: 3898 case categoryspec:
4591 case notcategoryspec: 3899 case notcategoryspec:
4592#endif /* emacs */
4593 p++; 3900 p++;
4594 break; 3901 break;
4595 3902
@@ -4623,7 +3930,7 @@ skip_noops (re_char *p, re_char *pend)
4623 return p; 3930 return p;
4624 } 3931 }
4625 } 3932 }
4626 assert (p == pend); 3933 eassert (p == pend);
4627 return p; 3934 return p;
4628} 3935}
4629 3936
@@ -4656,11 +3963,10 @@ execute_charset (re_char **pp, unsigned c, unsigned corig, bool unibyte)
4656 && p[2 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) 3963 && p[2 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4657 return !not; 3964 return !not;
4658 } 3965 }
4659#ifdef emacs
4660 else if (rtp) 3966 else if (rtp)
4661 { 3967 {
4662 int class_bits = CHARSET_RANGE_TABLE_BITS (p); 3968 int class_bits = CHARSET_RANGE_TABLE_BITS (p);
4663 re_wchar_t range_start, range_end; 3969 int range_start, range_end;
4664 3970
4665 /* Sort tests by the most commonly used classes with some adjustment to which 3971 /* Sort tests by the most commonly used classes with some adjustment to which
4666 tests are easiest to perform. Take a look at comment in re_wctype_parse 3972 tests are easiest to perform. Take a look at comment in re_wctype_parse
@@ -4691,7 +3997,7 @@ execute_charset (re_char **pp, unsigned c, unsigned corig, bool unibyte)
4691 return !not; 3997 return !not;
4692 } 3998 }
4693 } 3999 }
4694#endif /* emacs */ 4000
4695 return not; 4001 return not;
4696} 4002}
4697 4003
@@ -4701,11 +4007,11 @@ mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1,
4701 re_char *p2) 4007 re_char *p2)
4702{ 4008{
4703 re_opcode_t op2; 4009 re_opcode_t op2;
4704 const boolean multibyte = RE_MULTIBYTE_P (bufp); 4010 bool multibyte = RE_MULTIBYTE_P (bufp);
4705 unsigned char *pend = bufp->buffer + bufp->used; 4011 unsigned char *pend = bufp->buffer + bufp->used;
4706 4012
4707 assert (p1 >= bufp->buffer && p1 < pend 4013 eassert (p1 >= bufp->buffer && p1 < pend
4708 && p2 >= bufp->buffer && p2 <= pend); 4014 && p2 >= bufp->buffer && p2 <= pend);
4709 4015
4710 /* Skip over open/close-group commands. 4016 /* Skip over open/close-group commands.
4711 If what follows this loop is a ...+ construct, 4017 If what follows this loop is a ...+ construct,
@@ -4716,8 +4022,8 @@ mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1,
4716 is only used in the case where p1 is a simple match operator. */ 4022 is only used in the case where p1 is a simple match operator. */
4717 /* p1 = skip_noops (p1, pend); */ 4023 /* p1 = skip_noops (p1, pend); */
4718 4024
4719 assert (p1 >= bufp->buffer && p1 < pend 4025 eassert (p1 >= bufp->buffer && p1 < pend
4720 && p2 >= bufp->buffer && p2 <= pend); 4026 && p2 >= bufp->buffer && p2 <= pend);
4721 4027
4722 op2 = p2 == pend ? succeed : *p2; 4028 op2 = p2 == pend ? succeed : *p2;
4723 4029
@@ -4736,7 +4042,7 @@ mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1,
4736 case endline: 4042 case endline:
4737 case exactn: 4043 case exactn:
4738 { 4044 {
4739 register re_wchar_t c 4045 int c
4740 = (re_opcode_t) *p2 == endline ? '\n' 4046 = (re_opcode_t) *p2 == endline ? '\n'
4741 : RE_STRING_CHAR (p2 + 2, multibyte); 4047 : RE_STRING_CHAR (p2 + 2, multibyte);
4742 4048
@@ -4866,12 +4172,10 @@ mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1,
4866 || (re_opcode_t) *p1 == syntaxspec) 4172 || (re_opcode_t) *p1 == syntaxspec)
4867 && p1[1] == Sword); 4173 && p1[1] == Sword);
4868 4174
4869#ifdef emacs
4870 case categoryspec: 4175 case categoryspec:
4871 return ((re_opcode_t) *p1 == notcategoryspec && p1[1] == p2[1]); 4176 return ((re_opcode_t) *p1 == notcategoryspec && p1[1] == p2[1]);
4872 case notcategoryspec: 4177 case notcategoryspec:
4873 return ((re_opcode_t) *p1 == categoryspec && p1[1] == p2[1]); 4178 return ((re_opcode_t) *p1 == categoryspec && p1[1] == p2[1]);
4874#endif /* emacs */
4875 4179
4876 default: 4180 default:
4877 ; 4181 ;
@@ -4884,20 +4188,6 @@ mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1,
4884 4188
4885/* Matching routines. */ 4189/* Matching routines. */
4886 4190
4887#ifndef emacs /* Emacs never uses this. */
4888/* re_match is like re_match_2 except it takes only a single string. */
4889
4890regoff_t
4891re_match (struct re_pattern_buffer *bufp, const char *string,
4892 size_t size, ssize_t pos, struct re_registers *regs)
4893{
4894 regoff_t result = re_match_2_internal (bufp, NULL, 0, (re_char *) string,
4895 size, pos, regs, size);
4896 return result;
4897}
4898WEAK_ALIAS (__re_match, re_match)
4899#endif /* not emacs */
4900
4901/* re_match_2 matches the compiled pattern in BUFP against the 4191/* re_match_2 matches the compiled pattern in BUFP against the
4902 the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 4192 the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
4903 and SIZE2, respectively). We start matching at POS, and stop 4193 and SIZE2, respectively). We start matching at POS, and stop
@@ -4911,34 +4201,31 @@ WEAK_ALIAS (__re_match, re_match)
4911 failure stack overflowing). Otherwise, we return the length of the 4201 failure stack overflowing). Otherwise, we return the length of the
4912 matched substring. */ 4202 matched substring. */
4913 4203
4914regoff_t 4204ptrdiff_t
4915re_match_2 (struct re_pattern_buffer *bufp, const char *string1, 4205re_match_2 (struct re_pattern_buffer *bufp, const char *string1,
4916 size_t size1, const char *string2, size_t size2, ssize_t pos, 4206 size_t size1, const char *string2, size_t size2, ptrdiff_t pos,
4917 struct re_registers *regs, ssize_t stop) 4207 struct re_registers *regs, ptrdiff_t stop)
4918{ 4208{
4919 regoff_t result; 4209 ptrdiff_t result;
4920 4210
4921#ifdef emacs 4211 ptrdiff_t charpos;
4922 ssize_t charpos;
4923 gl_state.object = re_match_object; /* Used by SYNTAX_TABLE_BYTE_TO_CHAR. */ 4212 gl_state.object = re_match_object; /* Used by SYNTAX_TABLE_BYTE_TO_CHAR. */
4924 charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (pos)); 4213 charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (pos));
4925 SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1); 4214 SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
4926#endif
4927 4215
4928 result = re_match_2_internal (bufp, (re_char *) string1, size1, 4216 result = re_match_2_internal (bufp, (re_char *) string1, size1,
4929 (re_char *) string2, size2, 4217 (re_char *) string2, size2,
4930 pos, regs, stop); 4218 pos, regs, stop);
4931 return result; 4219 return result;
4932} 4220}
4933WEAK_ALIAS (__re_match_2, re_match_2)
4934 4221
4935 4222
4936/* This is a separate function so that we can force an alloca cleanup 4223/* This is a separate function so that we can force an alloca cleanup
4937 afterwards. */ 4224 afterwards. */
4938static regoff_t 4225static ptrdiff_t
4939re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, 4226re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
4940 size_t size1, re_char *string2, size_t size2, 4227 size_t size1, re_char *string2, size_t size2,
4941 ssize_t pos, struct re_registers *regs, ssize_t stop) 4228 ptrdiff_t pos, struct re_registers *regs, ptrdiff_t stop)
4942{ 4229{
4943 /* General temporaries. */ 4230 /* General temporaries. */
4944 int mcnt; 4231 int mcnt;
@@ -4965,13 +4252,13 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
4965 re_char *pend = p + bufp->used; 4252 re_char *pend = p + bufp->used;
4966 4253
4967 /* We use this to map every character in the string. */ 4254 /* We use this to map every character in the string. */
4968 RE_TRANSLATE_TYPE translate = bufp->translate; 4255 Lisp_Object translate = bufp->translate;
4969 4256
4970 /* Nonzero if BUFP is setup from a multibyte regex. */ 4257 /* True if BUFP is setup from a multibyte regex. */
4971 const boolean multibyte = RE_MULTIBYTE_P (bufp); 4258 bool multibyte = RE_MULTIBYTE_P (bufp);
4972 4259
4973 /* Nonzero if STRING1/STRING2 are multibyte. */ 4260 /* True if STRING1/STRING2 are multibyte. */
4974 const boolean target_multibyte = RE_TARGET_MULTIBYTE_P (bufp); 4261 bool target_multibyte = RE_TARGET_MULTIBYTE_P (bufp);
4975 4262
4976 /* Failure point stack. Each place that can handle a failure further 4263 /* Failure point stack. Each place that can handle a failure further
4977 down the line pushes a failure point on this stack. It consists of 4264 down the line pushes a failure point on this stack. It consists of
@@ -4980,19 +4267,11 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
4980 registers, and, finally, two char *'s. The first char * is where 4267 registers, and, finally, two char *'s. The first char * is where
4981 to resume scanning the pattern; the second one is where to resume 4268 to resume scanning the pattern; the second one is where to resume
4982 scanning the strings. */ 4269 scanning the strings. */
4983#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */
4984 fail_stack_type fail_stack; 4270 fail_stack_type fail_stack;
4985#endif
4986#ifdef DEBUG_COMPILES_ARGUMENTS 4271#ifdef DEBUG_COMPILES_ARGUMENTS
4987 unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0; 4272 unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
4988#endif 4273#endif
4989 4274
4990#if defined REL_ALLOC && defined REGEX_MALLOC
4991 /* This holds the pointer to the failure stack, when
4992 it is allocated relocatably. */
4993 fail_stack_elt_t *failure_stack_ptr;
4994#endif
4995
4996 /* We fill all the registers internally, independent of what we 4275 /* We fill all the registers internally, independent of what we
4997 return, for use in backreferences. The number here includes 4276 return, for use in backreferences. The number here includes
4998 an element for register zero. */ 4277 an element for register zero. */
@@ -5005,18 +4284,14 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
5005 matching and the regnum-th regend points to right after where we 4284 matching and the regnum-th regend points to right after where we
5006 stopped matching the regnum-th subexpression. (The zeroth register 4285 stopped matching the regnum-th subexpression. (The zeroth register
5007 keeps track of what the whole pattern matches.) */ 4286 keeps track of what the whole pattern matches.) */
5008#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ 4287 re_char **regstart UNINIT, **regend UNINIT;
5009 re_char **regstart, **regend;
5010#endif
5011 4288
5012 /* The following record the register info as found in the above 4289 /* The following record the register info as found in the above
5013 variables when we find a match better than any we've seen before. 4290 variables when we find a match better than any we've seen before.
5014 This happens as we backtrack through the failure points, which in 4291 This happens as we backtrack through the failure points, which in
5015 turn happens only if we have not yet matched the entire string. */ 4292 turn happens only if we have not yet matched the entire string. */
5016 unsigned best_regs_set = false; 4293 unsigned best_regs_set = false;
5017#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ 4294 re_char **best_regstart UNINIT, **best_regend UNINIT;
5018 re_char **best_regstart, **best_regend;
5019#endif
5020 4295
5021 /* Logically, this is `best_regend[0]'. But we don't want to have to 4296 /* Logically, this is `best_regend[0]'. But we don't want to have to
5022 allocate space for that if we're not allocating space for anything 4297 allocate space for that if we're not allocating space for anything
@@ -5039,7 +4314,6 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
5039 4314
5040 INIT_FAIL_STACK (); 4315 INIT_FAIL_STACK ();
5041 4316
5042#ifdef MATCH_MAY_ALLOCATE
5043 /* Do not bother to initialize all the register variables if there are 4317 /* Do not bother to initialize all the register variables if there are
5044 no groups in the pattern, as it takes a fair amount of time. If 4318 no groups in the pattern, as it takes a fair amount of time. If
5045 there are groups, we include space for register 0 (the whole 4319 there are groups, we include space for register 0 (the whole
@@ -5047,29 +4321,16 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
5047 array indexing. We should fix this. */ 4321 array indexing. We should fix this. */
5048 if (bufp->re_nsub) 4322 if (bufp->re_nsub)
5049 { 4323 {
5050 regstart = REGEX_TALLOC (num_regs, re_char *); 4324 regstart = SAFE_ALLOCA (num_regs * 4 * sizeof *regstart);
5051 regend = REGEX_TALLOC (num_regs, re_char *); 4325 regend = regstart + num_regs;
5052 best_regstart = REGEX_TALLOC (num_regs, re_char *); 4326 best_regstart = regend + num_regs;
5053 best_regend = REGEX_TALLOC (num_regs, re_char *); 4327 best_regend = best_regstart + num_regs;
5054
5055 if (!(regstart && regend && best_regstart && best_regend))
5056 {
5057 FREE_VARIABLES ();
5058 return -2;
5059 }
5060 } 4328 }
5061 else
5062 {
5063 /* We must initialize all our variables to NULL, so that
5064 `FREE_VARIABLES' doesn't try to free them. */
5065 regstart = regend = best_regstart = best_regend = NULL;
5066 }
5067#endif /* MATCH_MAY_ALLOCATE */
5068 4329
5069 /* The starting position is bogus. */ 4330 /* The starting position is bogus. */
5070 if (pos < 0 || pos > size1 + size2) 4331 if (pos < 0 || pos > size1 + size2)
5071 { 4332 {
5072 FREE_VARIABLES (); 4333 SAFE_FREE ();
5073 return -1; 4334 return -1;
5074 } 4335 }
5075 4336
@@ -5229,13 +4490,8 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
5229 extra element beyond `num_regs' for the `-1' marker 4490 extra element beyond `num_regs' for the `-1' marker
5230 GNU code uses. */ 4491 GNU code uses. */
5231 regs->num_regs = max (RE_NREGS, num_regs + 1); 4492 regs->num_regs = max (RE_NREGS, num_regs + 1);
5232 regs->start = TALLOC (regs->num_regs, regoff_t); 4493 regs->start = TALLOC (regs->num_regs, ptrdiff_t);
5233 regs->end = TALLOC (regs->num_regs, regoff_t); 4494 regs->end = TALLOC (regs->num_regs, ptrdiff_t);
5234 if (regs->start == NULL || regs->end == NULL)
5235 {
5236 FREE_VARIABLES ();
5237 return -2;
5238 }
5239 bufp->regs_allocated = REGS_REALLOCATE; 4495 bufp->regs_allocated = REGS_REALLOCATE;
5240 } 4496 }
5241 else if (bufp->regs_allocated == REGS_REALLOCATE) 4497 else if (bufp->regs_allocated == REGS_REALLOCATE)
@@ -5245,21 +4501,12 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
5245 if (regs->num_regs < num_regs + 1) 4501 if (regs->num_regs < num_regs + 1)
5246 { 4502 {
5247 regs->num_regs = num_regs + 1; 4503 regs->num_regs = num_regs + 1;
5248 RETALLOC (regs->start, regs->num_regs, regoff_t); 4504 RETALLOC (regs->start, regs->num_regs, ptrdiff_t);
5249 RETALLOC (regs->end, regs->num_regs, regoff_t); 4505 RETALLOC (regs->end, regs->num_regs, ptrdiff_t);
5250 if (regs->start == NULL || regs->end == NULL)
5251 {
5252 FREE_VARIABLES ();
5253 return -2;
5254 }
5255 } 4506 }
5256 } 4507 }
5257 else 4508 else
5258 { 4509 eassert (bufp->regs_allocated == REGS_FIXED);
5259 /* These braces fend off a "empty body in an else-statement"
5260 warning under GCC when assert expands to nothing. */
5261 assert (bufp->regs_allocated == REGS_FIXED);
5262 }
5263 4510
5264 /* Convert the pointer data in `regstart' and `regend' to 4511 /* Convert the pointer data in `regstart' and `regend' to
5265 indices. Register zero has to be set differently, 4512 indices. Register zero has to be set differently,
@@ -5301,7 +4548,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
5301 4548
5302 DEBUG_PRINT ("Returning %td from re_match_2.\n", dcnt); 4549 DEBUG_PRINT ("Returning %td from re_match_2.\n", dcnt);
5303 4550
5304 FREE_VARIABLES (); 4551 SAFE_FREE ();
5305 return dcnt; 4552 return dcnt;
5306 } 4553 }
5307 4554
@@ -5328,33 +4575,6 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
5328 /* Remember the start point to rollback upon failure. */ 4575 /* Remember the start point to rollback upon failure. */
5329 dfail = d; 4576 dfail = d;
5330 4577
5331#ifndef emacs
5332 /* This is written out as an if-else so we don't waste time
5333 testing `translate' inside the loop. */
5334 if (RE_TRANSLATE_P (translate))
5335 do
5336 {
5337 PREFETCH ();
5338 if (RE_TRANSLATE (translate, *d) != *p++)
5339 {
5340 d = dfail;
5341 goto fail;
5342 }
5343 d++;
5344 }
5345 while (--mcnt);
5346 else
5347 do
5348 {
5349 PREFETCH ();
5350 if (*d++ != *p++)
5351 {
5352 d = dfail;
5353 goto fail;
5354 }
5355 }
5356 while (--mcnt);
5357#else /* emacs */
5358 /* The cost of testing `translate' is comparatively small. */ 4578 /* The cost of testing `translate' is comparatively small. */
5359 if (target_multibyte) 4579 if (target_multibyte)
5360 do 4580 do
@@ -5419,7 +4639,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
5419 d++; 4639 d++;
5420 } 4640 }
5421 while (--mcnt); 4641 while (--mcnt);
5422#endif 4642
5423 break; 4643 break;
5424 4644
5425 4645
@@ -5427,7 +4647,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
5427 case anychar: 4647 case anychar:
5428 { 4648 {
5429 int buf_charlen; 4649 int buf_charlen;
5430 re_wchar_t buf_ch; 4650 int buf_ch;
5431 reg_syntax_t syntax; 4651 reg_syntax_t syntax;
5432 4652
5433 DEBUG_PRINT ("EXECUTING anychar.\n"); 4653 DEBUG_PRINT ("EXECUTING anychar.\n");
@@ -5437,11 +4657,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
5437 target_multibyte); 4657 target_multibyte);
5438 buf_ch = TRANSLATE (buf_ch); 4658 buf_ch = TRANSLATE (buf_ch);
5439 4659
5440#ifdef emacs
5441 syntax = RE_SYNTAX_EMACS; 4660 syntax = RE_SYNTAX_EMACS;
5442#else
5443 syntax = bufp->syntax;
5444#endif
5445 4661
5446 if ((!(syntax & RE_DOT_NEWLINE) && buf_ch == '\n') 4662 if ((!(syntax & RE_DOT_NEWLINE) && buf_ch == '\n')
5447 || ((syntax & RE_DOT_NOT_NULL) && buf_ch == '\000')) 4663 || ((syntax & RE_DOT_NOT_NULL) && buf_ch == '\000'))
@@ -5460,7 +4676,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
5460 int len; 4676 int len;
5461 4677
5462 /* Whether matching against a unibyte character. */ 4678 /* Whether matching against a unibyte character. */
5463 boolean unibyte_char = false; 4679 bool unibyte_char = false;
5464 4680
5465 DEBUG_PRINT ("EXECUTING charset%s.\n", 4681 DEBUG_PRINT ("EXECUTING charset%s.\n",
5466 (re_opcode_t) *(p - 1) == charset_not ? "_not" : ""); 4682 (re_opcode_t) *(p - 1) == charset_not ? "_not" : "");
@@ -5530,10 +4746,10 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
5530 case stop_memory: 4746 case stop_memory:
5531 DEBUG_PRINT ("EXECUTING stop_memory %d:\n", *p); 4747 DEBUG_PRINT ("EXECUTING stop_memory %d:\n", *p);
5532 4748
5533 assert (!REG_UNSET (regstart[*p])); 4749 eassert (!REG_UNSET (regstart[*p]));
5534 /* Strictly speaking, there should be code such as: 4750 /* Strictly speaking, there should be code such as:
5535 4751
5536 assert (REG_UNSET (regend[*p])); 4752 eassert (REG_UNSET (regend[*p]));
5537 PUSH_FAILURE_REGSTOP ((unsigned int)*p); 4753 PUSH_FAILURE_REGSTOP ((unsigned int)*p);
5538 4754
5539 But the only info to be pushed is regend[*p] and it is known to 4755 But the only info to be pushed is regend[*p] and it is known to
@@ -5557,7 +4773,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
5557 followed by the numeric value of <digit> as the register number. */ 4773 followed by the numeric value of <digit> as the register number. */
5558 case duplicate: 4774 case duplicate:
5559 { 4775 {
5560 register re_char *d2, *dend2; 4776 re_char *d2, *dend2;
5561 int regno = *p++; /* Get which register to match against. */ 4777 int regno = *p++; /* Get which register to match against. */
5562 DEBUG_PRINT ("EXECUTING duplicate %d.\n", regno); 4778 DEBUG_PRINT ("EXECUTING duplicate %d.\n", regno);
5563 4779
@@ -5719,7 +4935,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
5719 DEBUG_PRINT ("EXECUTING on_failure_jump_nastyloop %d (to %p):\n", 4935 DEBUG_PRINT ("EXECUTING on_failure_jump_nastyloop %d (to %p):\n",
5720 mcnt, p + mcnt); 4936 mcnt, p + mcnt);
5721 4937
5722 assert ((re_opcode_t)p[-4] == no_op); 4938 eassert ((re_opcode_t)p[-4] == no_op);
5723 { 4939 {
5724 int cycle = 0; 4940 int cycle = 0;
5725 CHECK_INFINITE_LOOP (p - 4, d); 4941 CHECK_INFINITE_LOOP (p - 4, d);
@@ -5788,7 +5004,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
5788 mcnt, p + mcnt); 5004 mcnt, p + mcnt);
5789 { 5005 {
5790 re_char *p1 = p; /* Next operation. */ 5006 re_char *p1 = p; /* Next operation. */
5791 /* Here, we discard `const', making re_match non-reentrant. */ 5007 /* Discard 'const', making re_search non-reentrant. */
5792 unsigned char *p2 = (unsigned char *) p + mcnt; /* Jump dest. */ 5008 unsigned char *p2 = (unsigned char *) p + mcnt; /* Jump dest. */
5793 unsigned char *p3 = (unsigned char *) p - 3; /* opcode location. */ 5009 unsigned char *p3 = (unsigned char *) p - 3; /* opcode location. */
5794 5010
@@ -5799,9 +5015,9 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
5799 5015
5800 /* Ensure this is indeed the trivial kind of loop 5016 /* Ensure this is indeed the trivial kind of loop
5801 we are expecting. */ 5017 we are expecting. */
5802 assert (skip_one_char (p1) == p2 - 3); 5018 eassert (skip_one_char (p1) == p2 - 3);
5803 assert ((re_opcode_t) p2[-3] == jump && p2 + mcnt == p); 5019 eassert ((re_opcode_t) p2[-3] == jump && p2 + mcnt == p);
5804 DEBUG_STATEMENT (debug += 2); 5020 DEBUG_STATEMENT (regex_emacs_debug += 2);
5805 if (mutually_exclusive_p (bufp, p1, p2)) 5021 if (mutually_exclusive_p (bufp, p1, p2))
5806 { 5022 {
5807 /* Use a fast `on_failure_keep_string_jump' loop. */ 5023 /* Use a fast `on_failure_keep_string_jump' loop. */
@@ -5815,7 +5031,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
5815 DEBUG_PRINT (" smart default => slow loop.\n"); 5031 DEBUG_PRINT (" smart default => slow loop.\n");
5816 *p3 = (unsigned char) on_failure_jump; 5032 *p3 = (unsigned char) on_failure_jump;
5817 } 5033 }
5818 DEBUG_STATEMENT (debug -= 2); 5034 DEBUG_STATEMENT (regex_emacs_debug -= 2);
5819 } 5035 }
5820 break; 5036 break;
5821 5037
@@ -5840,7 +5056,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
5840 /* Originally, mcnt is how many times we HAVE to succeed. */ 5056 /* Originally, mcnt is how many times we HAVE to succeed. */
5841 if (mcnt != 0) 5057 if (mcnt != 0)
5842 { 5058 {
5843 /* Here, we discard `const', making re_match non-reentrant. */ 5059 /* Discard 'const', making re_search non-reentrant. */
5844 unsigned char *p2 = (unsigned char *) p + 2; /* counter loc. */ 5060 unsigned char *p2 = (unsigned char *) p + 2; /* counter loc. */
5845 mcnt--; 5061 mcnt--;
5846 p += 4; 5062 p += 4;
@@ -5859,7 +5075,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
5859 /* Originally, this is how many times we CAN jump. */ 5075 /* Originally, this is how many times we CAN jump. */
5860 if (mcnt != 0) 5076 if (mcnt != 0)
5861 { 5077 {
5862 /* Here, we discard `const', making re_match non-reentrant. */ 5078 /* Discard 'const', making re_search non-reentrant. */
5863 unsigned char *p2 = (unsigned char *) p + 2; /* counter loc. */ 5079 unsigned char *p2 = (unsigned char *) p + 2; /* counter loc. */
5864 mcnt--; 5080 mcnt--;
5865 PUSH_NUMBER (p2, mcnt); 5081 PUSH_NUMBER (p2, mcnt);
@@ -5876,7 +5092,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
5876 DEBUG_PRINT ("EXECUTING set_number_at.\n"); 5092 DEBUG_PRINT ("EXECUTING set_number_at.\n");
5877 5093
5878 EXTRACT_NUMBER_AND_INCR (mcnt, p); 5094 EXTRACT_NUMBER_AND_INCR (mcnt, p);
5879 /* Here, we discard `const', making re_match non-reentrant. */ 5095 /* Discard 'const', making re_search non-reentrant. */
5880 p2 = (unsigned char *) p + mcnt; 5096 p2 = (unsigned char *) p + mcnt;
5881 /* Signedness doesn't matter since we only copy MCNT's bits. */ 5097 /* Signedness doesn't matter since we only copy MCNT's bits. */
5882 EXTRACT_NUMBER_AND_INCR (mcnt, p); 5098 EXTRACT_NUMBER_AND_INCR (mcnt, p);
@@ -5888,7 +5104,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
5888 case wordbound: 5104 case wordbound:
5889 case notwordbound: 5105 case notwordbound:
5890 { 5106 {
5891 boolean not = (re_opcode_t) *(p - 1) == notwordbound; 5107 bool not = (re_opcode_t) *(p - 1) == notwordbound;
5892 DEBUG_PRINT ("EXECUTING %swordbound.\n", not ? "not" : ""); 5108 DEBUG_PRINT ("EXECUTING %swordbound.\n", not ? "not" : "");
5893 5109
5894 /* We SUCCEED (or FAIL) in one of the following cases: */ 5110 /* We SUCCEED (or FAIL) in one of the following cases: */
@@ -5900,19 +5116,15 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
5900 { 5116 {
5901 /* C1 is the character before D, S1 is the syntax of C1, C2 5117 /* C1 is the character before D, S1 is the syntax of C1, C2
5902 is the character at D, and S2 is the syntax of C2. */ 5118 is the character at D, and S2 is the syntax of C2. */
5903 re_wchar_t c1, c2; 5119 int c1, c2;
5904 int s1, s2; 5120 int s1, s2;
5905 int dummy; 5121 int dummy;
5906#ifdef emacs 5122 ptrdiff_t offset = PTR_TO_OFFSET (d - 1);
5907 ssize_t offset = PTR_TO_OFFSET (d - 1); 5123 ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
5908 ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
5909 UPDATE_SYNTAX_TABLE (charpos); 5124 UPDATE_SYNTAX_TABLE (charpos);
5910#endif
5911 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); 5125 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
5912 s1 = SYNTAX (c1); 5126 s1 = SYNTAX (c1);
5913#ifdef emacs
5914 UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1); 5127 UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
5915#endif
5916 PREFETCH_NOLIMIT (); 5128 PREFETCH_NOLIMIT ();
5917 GET_CHAR_AFTER (c2, d, dummy); 5129 GET_CHAR_AFTER (c2, d, dummy);
5918 s2 = SYNTAX (c2); 5130 s2 = SYNTAX (c2);
@@ -5942,14 +5154,12 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
5942 { 5154 {
5943 /* C1 is the character before D, S1 is the syntax of C1, C2 5155 /* C1 is the character before D, S1 is the syntax of C1, C2
5944 is the character at D, and S2 is the syntax of C2. */ 5156 is the character at D, and S2 is the syntax of C2. */
5945 re_wchar_t c1, c2; 5157 int c1, c2;
5946 int s1, s2; 5158 int s1, s2;
5947 int dummy; 5159 int dummy;
5948#ifdef emacs 5160 ptrdiff_t offset = PTR_TO_OFFSET (d);
5949 ssize_t offset = PTR_TO_OFFSET (d); 5161 ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
5950 ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
5951 UPDATE_SYNTAX_TABLE (charpos); 5162 UPDATE_SYNTAX_TABLE (charpos);
5952#endif
5953 PREFETCH (); 5163 PREFETCH ();
5954 GET_CHAR_AFTER (c2, d, dummy); 5164 GET_CHAR_AFTER (c2, d, dummy);
5955 s2 = SYNTAX (c2); 5165 s2 = SYNTAX (c2);
@@ -5962,9 +5172,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
5962 if (!AT_STRINGS_BEG (d)) 5172 if (!AT_STRINGS_BEG (d))
5963 { 5173 {
5964 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); 5174 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
5965#ifdef emacs
5966 UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1); 5175 UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1);
5967#endif
5968 s1 = SYNTAX (c1); 5176 s1 = SYNTAX (c1);
5969 5177
5970 /* ... and S1 is Sword, and WORD_BOUNDARY_P (C1, C2) 5178 /* ... and S1 is Sword, and WORD_BOUNDARY_P (C1, C2)
@@ -5987,14 +5195,12 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
5987 { 5195 {
5988 /* C1 is the character before D, S1 is the syntax of C1, C2 5196 /* C1 is the character before D, S1 is the syntax of C1, C2
5989 is the character at D, and S2 is the syntax of C2. */ 5197 is the character at D, and S2 is the syntax of C2. */
5990 re_wchar_t c1, c2; 5198 int c1, c2;
5991 int s1, s2; 5199 int s1, s2;
5992 int dummy; 5200 int dummy;
5993#ifdef emacs 5201 ptrdiff_t offset = PTR_TO_OFFSET (d) - 1;
5994 ssize_t offset = PTR_TO_OFFSET (d) - 1; 5202 ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
5995 ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
5996 UPDATE_SYNTAX_TABLE (charpos); 5203 UPDATE_SYNTAX_TABLE (charpos);
5997#endif
5998 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); 5204 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
5999 s1 = SYNTAX (c1); 5205 s1 = SYNTAX (c1);
6000 5206
@@ -6007,9 +5213,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
6007 { 5213 {
6008 PREFETCH_NOLIMIT (); 5214 PREFETCH_NOLIMIT ();
6009 GET_CHAR_AFTER (c2, d, dummy); 5215 GET_CHAR_AFTER (c2, d, dummy);
6010#ifdef emacs
6011 UPDATE_SYNTAX_TABLE_FORWARD (charpos); 5216 UPDATE_SYNTAX_TABLE_FORWARD (charpos);
6012#endif
6013 s2 = SYNTAX (c2); 5217 s2 = SYNTAX (c2);
6014 5218
6015 /* ... and S2 is Sword, and WORD_BOUNDARY_P (C1, C2) 5219 /* ... and S2 is Sword, and WORD_BOUNDARY_P (C1, C2)
@@ -6032,13 +5236,11 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
6032 { 5236 {
6033 /* C1 is the character before D, S1 is the syntax of C1, C2 5237 /* C1 is the character before D, S1 is the syntax of C1, C2
6034 is the character at D, and S2 is the syntax of C2. */ 5238 is the character at D, and S2 is the syntax of C2. */
6035 re_wchar_t c1, c2; 5239 int c1, c2;
6036 int s1, s2; 5240 int s1, s2;
6037#ifdef emacs 5241 ptrdiff_t offset = PTR_TO_OFFSET (d);
6038 ssize_t offset = PTR_TO_OFFSET (d); 5242 ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
6039 ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
6040 UPDATE_SYNTAX_TABLE (charpos); 5243 UPDATE_SYNTAX_TABLE (charpos);
6041#endif
6042 PREFETCH (); 5244 PREFETCH ();
6043 c2 = RE_STRING_CHAR (d, target_multibyte); 5245 c2 = RE_STRING_CHAR (d, target_multibyte);
6044 s2 = SYNTAX (c2); 5246 s2 = SYNTAX (c2);
@@ -6051,9 +5253,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
6051 if (!AT_STRINGS_BEG (d)) 5253 if (!AT_STRINGS_BEG (d))
6052 { 5254 {
6053 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); 5255 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
6054#ifdef emacs
6055 UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1); 5256 UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1);
6056#endif
6057 s1 = SYNTAX (c1); 5257 s1 = SYNTAX (c1);
6058 5258
6059 /* ... and S1 is Sword or Ssymbol. */ 5259 /* ... and S1 is Sword or Ssymbol. */
@@ -6075,13 +5275,11 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
6075 { 5275 {
6076 /* C1 is the character before D, S1 is the syntax of C1, C2 5276 /* C1 is the character before D, S1 is the syntax of C1, C2
6077 is the character at D, and S2 is the syntax of C2. */ 5277 is the character at D, and S2 is the syntax of C2. */
6078 re_wchar_t c1, c2; 5278 int c1, c2;
6079 int s1, s2; 5279 int s1, s2;
6080#ifdef emacs 5280 ptrdiff_t offset = PTR_TO_OFFSET (d) - 1;
6081 ssize_t offset = PTR_TO_OFFSET (d) - 1; 5281 ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
6082 ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
6083 UPDATE_SYNTAX_TABLE (charpos); 5282 UPDATE_SYNTAX_TABLE (charpos);
6084#endif
6085 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); 5283 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
6086 s1 = SYNTAX (c1); 5284 s1 = SYNTAX (c1);
6087 5285
@@ -6094,9 +5292,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
6094 { 5292 {
6095 PREFETCH_NOLIMIT (); 5293 PREFETCH_NOLIMIT ();
6096 c2 = RE_STRING_CHAR (d, target_multibyte); 5294 c2 = RE_STRING_CHAR (d, target_multibyte);
6097#ifdef emacs
6098 UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1); 5295 UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
6099#endif
6100 s2 = SYNTAX (c2); 5296 s2 = SYNTAX (c2);
6101 5297
6102 /* ... and S2 is Sword or Ssymbol. */ 5298 /* ... and S2 is Sword or Ssymbol. */
@@ -6109,21 +5305,19 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
6109 case syntaxspec: 5305 case syntaxspec:
6110 case notsyntaxspec: 5306 case notsyntaxspec:
6111 { 5307 {
6112 boolean not = (re_opcode_t) *(p - 1) == notsyntaxspec; 5308 bool not = (re_opcode_t) *(p - 1) == notsyntaxspec;
6113 mcnt = *p++; 5309 mcnt = *p++;
6114 DEBUG_PRINT ("EXECUTING %ssyntaxspec %d.\n", not ? "not" : "", 5310 DEBUG_PRINT ("EXECUTING %ssyntaxspec %d.\n", not ? "not" : "",
6115 mcnt); 5311 mcnt);
6116 PREFETCH (); 5312 PREFETCH ();
6117#ifdef emacs
6118 { 5313 {
6119 ssize_t offset = PTR_TO_OFFSET (d); 5314 ptrdiff_t offset = PTR_TO_OFFSET (d);
6120 ssize_t pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (offset); 5315 ptrdiff_t pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
6121 UPDATE_SYNTAX_TABLE (pos1); 5316 UPDATE_SYNTAX_TABLE (pos1);
6122 } 5317 }
6123#endif
6124 { 5318 {
6125 int len; 5319 int len;
6126 re_wchar_t c; 5320 int c;
6127 5321
6128 GET_CHAR_AFTER (c, d, len); 5322 GET_CHAR_AFTER (c, d, len);
6129 if ((SYNTAX (c) != (enum syntaxcode) mcnt) ^ not) 5323 if ((SYNTAX (c) != (enum syntaxcode) mcnt) ^ not)
@@ -6133,7 +5327,6 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
6133 } 5327 }
6134 break; 5328 break;
6135 5329
6136#ifdef emacs
6137 case at_dot: 5330 case at_dot:
6138 DEBUG_PRINT ("EXECUTING at_dot.\n"); 5331 DEBUG_PRINT ("EXECUTING at_dot.\n");
6139 if (PTR_BYTE_POS (d) != PT_BYTE) 5332 if (PTR_BYTE_POS (d) != PT_BYTE)
@@ -6143,7 +5336,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
6143 case categoryspec: 5336 case categoryspec:
6144 case notcategoryspec: 5337 case notcategoryspec:
6145 { 5338 {
6146 boolean not = (re_opcode_t) *(p - 1) == notcategoryspec; 5339 bool not = (re_opcode_t) *(p - 1) == notcategoryspec;
6147 mcnt = *p++; 5340 mcnt = *p++;
6148 DEBUG_PRINT ("EXECUTING %scategoryspec %d.\n", 5341 DEBUG_PRINT ("EXECUTING %scategoryspec %d.\n",
6149 not ? "not" : "", mcnt); 5342 not ? "not" : "", mcnt);
@@ -6151,7 +5344,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
6151 5344
6152 { 5345 {
6153 int len; 5346 int len;
6154 re_wchar_t c; 5347 int c;
6155 GET_CHAR_AFTER (c, d, len); 5348 GET_CHAR_AFTER (c, d, len);
6156 if ((!CHAR_HAS_CATEGORY (c, mcnt)) ^ not) 5349 if ((!CHAR_HAS_CATEGORY (c, mcnt)) ^ not)
6157 goto fail; 5350 goto fail;
@@ -6160,8 +5353,6 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
6160 } 5353 }
6161 break; 5354 break;
6162 5355
6163#endif /* emacs */
6164
6165 default: 5356 default:
6166 abort (); 5357 abort ();
6167 } 5358 }
@@ -6180,11 +5371,11 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
6180 switch (*pat++) 5371 switch (*pat++)
6181 { 5372 {
6182 case on_failure_keep_string_jump: 5373 case on_failure_keep_string_jump:
6183 assert (str == NULL); 5374 eassert (str == NULL);
6184 goto continue_failure_jump; 5375 goto continue_failure_jump;
6185 5376
6186 case on_failure_jump_nastyloop: 5377 case on_failure_jump_nastyloop:
6187 assert ((re_opcode_t)pat[-2] == no_op); 5378 eassert ((re_opcode_t)pat[-2] == no_op);
6188 PUSH_FAILURE_POINT (pat - 2, str); 5379 PUSH_FAILURE_POINT (pat - 2, str);
6189 FALLTHROUGH; 5380 FALLTHROUGH;
6190 case on_failure_jump_loop: 5381 case on_failure_jump_loop:
@@ -6204,7 +5395,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
6204 abort (); 5395 abort ();
6205 } 5396 }
6206 5397
6207 assert (p >= bufp->buffer && p <= pend); 5398 eassert (p >= bufp->buffer && p <= pend);
6208 5399
6209 if (d >= string1 && d <= end1) 5400 if (d >= string1 && d <= end1)
6210 dend = end_match_1; 5401 dend = end_match_1;
@@ -6216,7 +5407,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
6216 if (best_regs_set) 5407 if (best_regs_set)
6217 goto restore_best_regs; 5408 goto restore_best_regs;
6218 5409
6219 FREE_VARIABLES (); 5410 SAFE_FREE ();
6220 5411
6221 return -1; /* Failure to match. */ 5412 return -1; /* Failure to match. */
6222} 5413}
@@ -6227,8 +5418,8 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
6227 bytes; nonzero otherwise. */ 5418 bytes; nonzero otherwise. */
6228 5419
6229static int 5420static int
6230bcmp_translate (re_char *s1, re_char *s2, ssize_t len, 5421bcmp_translate (re_char *s1, re_char *s2, ptrdiff_t len,
6231 RE_TRANSLATE_TYPE translate, const int target_multibyte) 5422 Lisp_Object translate, int target_multibyte)
6232{ 5423{
6233 re_char *p1 = s1, *p2 = s2; 5424 re_char *p1 = s1, *p2 = s2;
6234 re_char *p1_end = s1 + len; 5425 re_char *p1_end = s1 + len;
@@ -6239,7 +5430,7 @@ bcmp_translate (re_char *s1, re_char *s2, ssize_t len,
6239 while (p1 < p1_end && p2 < p2_end) 5430 while (p1 < p1_end && p2 < p2_end)
6240 { 5431 {
6241 int p1_charlen, p2_charlen; 5432 int p1_charlen, p2_charlen;
6242 re_wchar_t p1_ch, p2_ch; 5433 int p1_ch, p2_ch;
6243 5434
6244 GET_CHAR_AFTER (p1_ch, p1, p1_charlen); 5435 GET_CHAR_AFTER (p1_ch, p1, p1_charlen);
6245 GET_CHAR_AFTER (p2_ch, p2, p2_charlen); 5436 GET_CHAR_AFTER (p2_ch, p2, p2_charlen);
@@ -6270,9 +5461,7 @@ bcmp_translate (re_char *s1, re_char *s2, ssize_t len,
6270 5461
6271const char * 5462const char *
6272re_compile_pattern (const char *pattern, size_t length, 5463re_compile_pattern (const char *pattern, size_t length,
6273#ifdef emacs
6274 bool posix_backtracking, const char *whitespace_regexp, 5464 bool posix_backtracking, const char *whitespace_regexp,
6275#endif
6276 struct re_pattern_buffer *bufp) 5465 struct re_pattern_buffer *bufp)
6277{ 5466{
6278 reg_errcode_t ret; 5467 reg_errcode_t ret;
@@ -6282,334 +5471,16 @@ re_compile_pattern (const char *pattern, size_t length,
6282 bufp->regs_allocated = REGS_UNALLOCATED; 5471 bufp->regs_allocated = REGS_UNALLOCATED;
6283 5472
6284 /* And GNU code determines whether or not to get register information 5473 /* And GNU code determines whether or not to get register information
6285 by passing null for the REGS argument to re_match, etc., not by 5474 by passing null for the REGS argument to re_search, etc., not by
6286 setting no_sub. */ 5475 setting no_sub. */
6287 bufp->no_sub = 0; 5476 bufp->no_sub = 0;
6288 5477
6289 ret = regex_compile ((re_char *) pattern, length, 5478 ret = regex_compile ((re_char *) pattern, length,
6290#ifdef emacs
6291 posix_backtracking, 5479 posix_backtracking,
6292 whitespace_regexp, 5480 whitespace_regexp,
6293#else
6294 re_syntax_options,
6295#endif
6296 bufp); 5481 bufp);
6297 5482
6298 if (!ret) 5483 if (!ret)
6299 return NULL; 5484 return NULL;
6300 return gettext (re_error_msgid[(int) ret]); 5485 return re_error_msgid[ret];
6301}
6302WEAK_ALIAS (__re_compile_pattern, re_compile_pattern)
6303
6304/* Entry points compatible with 4.2 BSD regex library. We don't define
6305 them unless specifically requested. */
6306
6307#if defined _REGEX_RE_COMP || defined _LIBC
6308
6309/* BSD has one and only one pattern buffer. */
6310static struct re_pattern_buffer re_comp_buf;
6311
6312char *
6313# ifdef _LIBC
6314/* Make these definitions weak in libc, so POSIX programs can redefine
6315 these names if they don't use our functions, and still use
6316 regcomp/regexec below without link errors. */
6317weak_function
6318# endif
6319re_comp (const char *s)
6320{
6321 reg_errcode_t ret;
6322
6323 if (!s)
6324 {
6325 if (!re_comp_buf.buffer)
6326 /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
6327 return (char *) gettext ("No previous regular expression");
6328 return 0;
6329 }
6330
6331 if (!re_comp_buf.buffer)
6332 {
6333 re_comp_buf.buffer = malloc (200);
6334 if (re_comp_buf.buffer == NULL)
6335 /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
6336 return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
6337 re_comp_buf.allocated = 200;
6338
6339 re_comp_buf.fastmap = malloc (1 << BYTEWIDTH);
6340 if (re_comp_buf.fastmap == NULL)
6341 /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
6342 return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
6343 }
6344
6345 /* Since `re_exec' always passes NULL for the `regs' argument, we
6346 don't need to initialize the pattern buffer fields which affect it. */
6347
6348 ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
6349
6350 if (!ret)
6351 return NULL;
6352
6353 /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
6354 return (char *) gettext (re_error_msgid[(int) ret]);
6355}
6356
6357
6358int
6359# ifdef _LIBC
6360weak_function
6361# endif
6362re_exec (const char *s)
6363{
6364 const size_t len = strlen (s);
6365 return re_search (&re_comp_buf, s, len, 0, len, 0) >= 0;
6366} 5486}
6367#endif /* _REGEX_RE_COMP */
6368
6369/* POSIX.2 functions. Don't define these for Emacs. */
6370
6371#ifndef emacs
6372
6373/* regcomp takes a regular expression as a string and compiles it.
6374
6375 PREG is a regex_t *. We do not expect any fields to be initialized,
6376 since POSIX says we shouldn't. Thus, we set
6377
6378 `buffer' to the compiled pattern;
6379 `used' to the length of the compiled pattern;
6380 `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
6381 REG_EXTENDED bit in CFLAGS is set; otherwise, to
6382 RE_SYNTAX_POSIX_BASIC;
6383 `fastmap' to an allocated space for the fastmap;
6384 `fastmap_accurate' to zero;
6385 `re_nsub' to the number of subexpressions in PATTERN.
6386
6387 PATTERN is the address of the pattern string.
6388
6389 CFLAGS is a series of bits which affect compilation.
6390
6391 If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
6392 use POSIX basic syntax.
6393
6394 If REG_NEWLINE is set, then . and [^...] don't match newline.
6395 Also, regexec will try a match beginning after every newline.
6396
6397 If REG_ICASE is set, then we considers upper- and lowercase
6398 versions of letters to be equivalent when matching.
6399
6400 If REG_NOSUB is set, then when PREG is passed to regexec, that
6401 routine will report only success or failure, and nothing about the
6402 registers.
6403
6404 It returns 0 if it succeeds, nonzero if it doesn't. (See regex-emacs.h for
6405 the return codes and their meanings.) */
6406
6407reg_errcode_t
6408regcomp (regex_t *_Restrict_ preg, const char *_Restrict_ pattern,
6409 int cflags)
6410{
6411 reg_errcode_t ret;
6412 reg_syntax_t syntax
6413 = (cflags & REG_EXTENDED) ?
6414 RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
6415
6416 /* regex_compile will allocate the space for the compiled pattern. */
6417 preg->buffer = 0;
6418 preg->allocated = 0;
6419 preg->used = 0;
6420
6421 /* Try to allocate space for the fastmap. */
6422 preg->fastmap = malloc (1 << BYTEWIDTH);
6423
6424 if (cflags & REG_ICASE)
6425 {
6426 unsigned i;
6427
6428 preg->translate = malloc (CHAR_SET_SIZE * sizeof *preg->translate);
6429 if (preg->translate == NULL)
6430 return (int) REG_ESPACE;
6431
6432 /* Map uppercase characters to corresponding lowercase ones. */
6433 for (i = 0; i < CHAR_SET_SIZE; i++)
6434 preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i;
6435 }
6436 else
6437 preg->translate = NULL;
6438
6439 /* If REG_NEWLINE is set, newlines are treated differently. */
6440 if (cflags & REG_NEWLINE)
6441 { /* REG_NEWLINE implies neither . nor [^...] match newline. */
6442 syntax &= ~RE_DOT_NEWLINE;
6443 syntax |= RE_HAT_LISTS_NOT_NEWLINE;
6444 }
6445 else
6446 syntax |= RE_NO_NEWLINE_ANCHOR;
6447
6448 preg->no_sub = !!(cflags & REG_NOSUB);
6449
6450 /* POSIX says a null character in the pattern terminates it, so we
6451 can use strlen here in compiling the pattern. */
6452 ret = regex_compile ((re_char *) pattern, strlen (pattern), syntax, preg);
6453
6454 /* POSIX doesn't distinguish between an unmatched open-group and an
6455 unmatched close-group: both are REG_EPAREN. */
6456 if (ret == REG_ERPAREN)
6457 ret = REG_EPAREN;
6458
6459 if (ret == REG_NOERROR && preg->fastmap)
6460 { /* Compute the fastmap now, since regexec cannot modify the pattern
6461 buffer. */
6462 re_compile_fastmap (preg);
6463 if (preg->can_be_null)
6464 { /* The fastmap can't be used anyway. */
6465 free (preg->fastmap);
6466 preg->fastmap = NULL;
6467 }
6468 }
6469 return ret;
6470}
6471WEAK_ALIAS (__regcomp, regcomp)
6472
6473
6474/* regexec searches for a given pattern, specified by PREG, in the
6475 string STRING.
6476
6477 If NMATCH is zero or REG_NOSUB was set in the cflags argument to
6478 `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
6479 least NMATCH elements, and we set them to the offsets of the
6480 corresponding matched substrings.
6481
6482 EFLAGS specifies `execution flags' which affect matching: if
6483 REG_NOTBOL is set, then ^ does not match at the beginning of the
6484 string; if REG_NOTEOL is set, then $ does not match at the end.
6485
6486 We return 0 if we find a match and REG_NOMATCH if not. */
6487
6488reg_errcode_t
6489regexec (const regex_t *_Restrict_ preg, const char *_Restrict_ string,
6490 size_t nmatch, regmatch_t pmatch[_Restrict_arr_], int eflags)
6491{
6492 regoff_t ret;
6493 struct re_registers regs;
6494 regex_t private_preg;
6495 size_t len = strlen (string);
6496 boolean want_reg_info = !preg->no_sub && nmatch > 0 && pmatch;
6497
6498 private_preg = *preg;
6499
6500 private_preg.not_bol = !!(eflags & REG_NOTBOL);
6501 private_preg.not_eol = !!(eflags & REG_NOTEOL);
6502
6503 /* The user has told us exactly how many registers to return
6504 information about, via `nmatch'. We have to pass that on to the
6505 matching routines. */
6506 private_preg.regs_allocated = REGS_FIXED;
6507
6508 if (want_reg_info)
6509 {
6510 regs.num_regs = nmatch;
6511 regs.start = TALLOC (nmatch * 2, regoff_t);
6512 if (regs.start == NULL)
6513 return REG_NOMATCH;
6514 regs.end = regs.start + nmatch;
6515 }
6516
6517 /* Instead of using not_eol to implement REG_NOTEOL, we could simply
6518 pass (&private_preg, string, len + 1, 0, len, ...) pretending the string
6519 was a little bit longer but still only matching the real part.
6520 This works because the `endline' will check for a '\n' and will find a
6521 '\0', correctly deciding that this is not the end of a line.
6522 But it doesn't work out so nicely for REG_NOTBOL, since we don't have
6523 a convenient '\0' there. For all we know, the string could be preceded
6524 by '\n' which would throw things off. */
6525
6526 /* Perform the searching operation. */
6527 ret = re_search (&private_preg, string, len,
6528 /* start: */ 0, /* range: */ len,
6529 want_reg_info ? &regs : 0);
6530
6531 /* Copy the register information to the POSIX structure. */
6532 if (want_reg_info)
6533 {
6534 if (ret >= 0)
6535 {
6536 unsigned r;
6537
6538 for (r = 0; r < nmatch; r++)
6539 {
6540 pmatch[r].rm_so = regs.start[r];
6541 pmatch[r].rm_eo = regs.end[r];
6542 }
6543 }
6544
6545 /* If we needed the temporary register info, free the space now. */
6546 free (regs.start);
6547 }
6548
6549 /* We want zero return to mean success, unlike `re_search'. */
6550 return ret >= 0 ? REG_NOERROR : REG_NOMATCH;
6551}
6552WEAK_ALIAS (__regexec, regexec)
6553
6554
6555/* Returns a message corresponding to an error code, ERR_CODE, returned
6556 from either regcomp or regexec. We don't use PREG here.
6557
6558 ERR_CODE was previously called ERRCODE, but that name causes an
6559 error with msvc8 compiler. */
6560
6561size_t
6562regerror (int err_code, const regex_t *preg, char *errbuf, size_t errbuf_size)
6563{
6564 const char *msg;
6565 size_t msg_size;
6566
6567 if (err_code < 0
6568 || err_code >= (sizeof (re_error_msgid) / sizeof (re_error_msgid[0])))
6569 /* Only error codes returned by the rest of the code should be passed
6570 to this routine. If we are given anything else, or if other regex
6571 code generates an invalid error code, then the program has a bug.
6572 Dump core so we can fix it. */
6573 abort ();
6574
6575 msg = gettext (re_error_msgid[err_code]);
6576
6577 msg_size = strlen (msg) + 1; /* Includes the null. */
6578
6579 if (errbuf_size != 0)
6580 {
6581 if (msg_size > errbuf_size)
6582 {
6583 memcpy (errbuf, msg, errbuf_size - 1);
6584 errbuf[errbuf_size - 1] = 0;
6585 }
6586 else
6587 strcpy (errbuf, msg);
6588 }
6589
6590 return msg_size;
6591}
6592WEAK_ALIAS (__regerror, regerror)
6593
6594
6595/* Free dynamically allocated space used by PREG. */
6596
6597void
6598regfree (regex_t *preg)
6599{
6600 free (preg->buffer);
6601 preg->buffer = NULL;
6602
6603 preg->allocated = 0;
6604 preg->used = 0;
6605
6606 free (preg->fastmap);
6607 preg->fastmap = NULL;
6608 preg->fastmap_accurate = 0;
6609
6610 free (preg->translate);
6611 preg->translate = NULL;
6612}
6613WEAK_ALIAS (__regfree, regfree)
6614
6615#endif /* not emacs */
diff --git a/src/regex-emacs.h b/src/regex-emacs.h
index 9a6214af98c..159c7dcb9b8 100644
--- a/src/regex-emacs.h
+++ b/src/regex-emacs.h
@@ -17,163 +17,24 @@
17 You should have received a copy of the GNU General Public License 17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <https://www.gnu.org/licenses/>. */ 18 along with this program. If not, see <https://www.gnu.org/licenses/>. */
19 19
20#ifndef _REGEX_H 20#ifndef EMACS_REGEX_H
21#define _REGEX_H 1 21#define EMACS_REGEX_H 1
22 22
23#if defined emacs && (defined _REGEX_RE_COMP || defined _LIBC) 23#include <stddef.h>
24/* We're not defining re_set_syntax and using a different prototype of 24
25 re_compile_pattern when building Emacs so fail compilation early with 25/* This is the structure we store register match data in. See
26 a (somewhat helpful) error message when conflict is detected. */ 26 regex.texinfo for a full description of what registers match.
27# error "_REGEX_RE_COMP nor _LIBC can be defined if emacs is defined." 27 Declare this before including lisp.h, since lisp.h (via thread.h)
28#endif 28 uses struct re_registers. */
29 29struct re_registers
30#include <sys/types.h> 30{
31 31 unsigned num_regs;
32/* Allow the use in C++ code. */ 32 ptrdiff_t *start;
33#ifdef __cplusplus 33 ptrdiff_t *end;
34extern "C" { 34};
35#endif 35
36 36#include "lisp.h"
37#if !defined _POSIX_C_SOURCE && !defined _POSIX_SOURCE && defined VMS 37
38/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it
39 should be there. */
40# include <stddef.h>
41#endif
42
43/* The following bits are used to determine the regexp syntax we
44 recognize. The set/not-set meanings where historically chosen so
45 that Emacs syntax had the value 0.
46 The bits are given in alphabetical order, and
47 the definitions shifted by one from the previous bit; thus, when we
48 add or remove a bit, only one other definition need change. */
49typedef unsigned long reg_syntax_t;
50
51/* If this bit is not set, then \ inside a bracket expression is literal.
52 If set, then such a \ quotes the following character. */
53#define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1)
54
55/* If this bit is not set, then + and ? are operators, and \+ and \? are
56 literals.
57 If set, then \+ and \? are operators and + and ? are literals. */
58#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
59
60/* If this bit is set, then character classes are supported. They are:
61 [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
62 [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
63 If not set, then character classes are not supported. */
64#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
65
66/* If this bit is set, then ^ and $ are always anchors (outside bracket
67 expressions, of course).
68 If this bit is not set, then it depends:
69 ^ is an anchor if it is at the beginning of a regular
70 expression or after an open-group or an alternation operator;
71 $ is an anchor if it is at the end of a regular expression, or
72 before a close-group or an alternation operator.
73
74 This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
75 POSIX draft 11.2 says that * etc. in leading positions is undefined.
76 We already implemented a previous draft which made those constructs
77 invalid, though, so we haven't changed the code back. */
78#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
79
80/* If this bit is set, then special characters are always special
81 regardless of where they are in the pattern.
82 If this bit is not set, then special characters are special only in
83 some contexts; otherwise they are ordinary. Specifically,
84 * + ? and intervals are only special when not after the beginning,
85 open-group, or alternation operator. */
86#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
87
88/* If this bit is set, then *, +, ?, and { cannot be first in an re or
89 immediately after an alternation or begin-group operator. */
90#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
91
92/* If this bit is set, then . matches newline.
93 If not set, then it doesn't. */
94#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
95
96/* If this bit is set, then . doesn't match NUL.
97 If not set, then it does. */
98#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
99
100/* If this bit is set, nonmatching lists [^...] do not match newline.
101 If not set, they do. */
102#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
103
104/* If this bit is set, either \{...\} or {...} defines an
105 interval, depending on RE_NO_BK_BRACES.
106 If not set, \{, \}, {, and } are literals. */
107#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
108
109/* If this bit is set, +, ? and | aren't recognized as operators.
110 If not set, they are. */
111#define RE_LIMITED_OPS (RE_INTERVALS << 1)
112
113/* If this bit is set, newline is an alternation operator.
114 If not set, newline is literal. */
115#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
116
117/* If this bit is set, then `{...}' defines an interval, and \{ and \}
118 are literals.
119 If not set, then `\{...\}' defines an interval. */
120#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
121
122/* If this bit is set, (...) defines a group, and \( and \) are literals.
123 If not set, \(...\) defines a group, and ( and ) are literals. */
124#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
125
126/* If this bit is set, then \<digit> matches <digit>.
127 If not set, then \<digit> is a back-reference. */
128#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
129
130/* If this bit is set, then | is an alternation operator, and \| is literal.
131 If not set, then \| is an alternation operator, and | is literal. */
132#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
133
134/* If this bit is set, then an ending range point collating higher
135 than the starting range point, as in [z-a], is invalid.
136 If not set, then when ending range point collates higher than the
137 starting range point, the range is ignored. */
138#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
139
140/* If this bit is set, then an unmatched ) is ordinary.
141 If not set, then an unmatched ) is invalid. */
142#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
143
144/* If this bit is set, succeed as soon as we match the whole pattern,
145 without further backtracking. */
146#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
147
148/* If this bit is set, do not process the GNU regex operators.
149 If not set, then the GNU regex operators are recognized. */
150#define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1)
151
152/* If this bit is set, then *?, +? and ?? match non greedily. */
153#define RE_FRUGAL (RE_NO_GNU_OPS << 1)
154
155/* If this bit is set, then (?:...) is treated as a shy group. */
156#define RE_SHY_GROUPS (RE_FRUGAL << 1)
157
158/* If this bit is set, ^ and $ only match at beg/end of buffer. */
159#define RE_NO_NEWLINE_ANCHOR (RE_SHY_GROUPS << 1)
160
161/* If this bit is set, turn on internal regex debugging.
162 If not set, and debugging was on, turn it off.
163 This only works if regex-emacs.c is compiled -DDEBUG.
164 We define this bit always, so that all that's needed to turn on
165 debugging is to recompile regex-emacs.c; the calling code can always have
166 this bit set, and it won't affect anything in the normal case. */
167#define RE_DEBUG (RE_NO_NEWLINE_ANCHOR << 1)
168
169/* This global variable defines the particular regexp syntax to use (for
170 some interfaces). When a regexp is compiled, the syntax used is
171 stored in the pattern buffer, so changing this does not affect
172 already-compiled regexps. */
173/* extern reg_syntax_t re_syntax_options; */
174
175#ifdef emacs
176# include "lisp.h"
177/* In Emacs, this is the string or buffer in which we are matching. 38/* In Emacs, this is the string or buffer in which we are matching.
178 It is used for looking up syntax properties. 39 It is used for looking up syntax properties.
179 40
@@ -187,187 +48,23 @@ typedef unsigned long reg_syntax_t;
187 and match functions. These functions capture the current value of 48 and match functions. These functions capture the current value of
188 re_match_object into gl_state on entry. 49 re_match_object into gl_state on entry.
189 50
190 TODO: once we get rid of the !emacs case in this code, turn into an 51 TODO: turn into an actual function parameter. */
191 actual function parameter. */
192extern Lisp_Object re_match_object; 52extern Lisp_Object re_match_object;
193#endif
194 53
195/* Roughly the maximum number of failure points on the stack. */ 54/* Roughly the maximum number of failure points on the stack. */
196extern size_t emacs_re_max_failures; 55extern size_t emacs_re_max_failures;
197 56
198#ifdef emacs
199/* Amount of memory that we can safely stack allocate. */ 57/* Amount of memory that we can safely stack allocate. */
200extern ptrdiff_t emacs_re_safe_alloca; 58extern ptrdiff_t emacs_re_safe_alloca;
201#endif
202
203
204/* Define combinations of the above bits for the standard possibilities.
205 (The [[[ comments delimit what gets put into the Texinfo file, so
206 don't delete them!) */
207/* [[[begin syntaxes]]] */
208#define RE_SYNTAX_EMACS \
209 (RE_CHAR_CLASSES | RE_INTERVALS | RE_SHY_GROUPS | RE_FRUGAL)
210
211#define RE_SYNTAX_AWK \
212 (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \
213 | RE_NO_BK_PARENS | RE_NO_BK_REFS \
214 | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \
215 | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \
216 | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
217
218#define RE_SYNTAX_GNU_AWK \
219 ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG) \
220 & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS))
221
222#define RE_SYNTAX_POSIX_AWK \
223 (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \
224 | RE_INTERVALS | RE_NO_GNU_OPS)
225
226#define RE_SYNTAX_GREP \
227 (RE_BK_PLUS_QM | RE_CHAR_CLASSES \
228 | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \
229 | RE_NEWLINE_ALT)
230
231#define RE_SYNTAX_EGREP \
232 (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \
233 | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \
234 | RE_NEWLINE_ALT | RE_NO_BK_PARENS \
235 | RE_NO_BK_VBAR)
236
237#define RE_SYNTAX_POSIX_EGREP \
238 (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)
239
240/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
241#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
242
243#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
244
245/* Syntax bits common to both basic and extended POSIX regex syntax. */
246#define _RE_SYNTAX_POSIX_COMMON \
247 (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \
248 | RE_INTERVALS | RE_NO_EMPTY_RANGES)
249
250#define RE_SYNTAX_POSIX_BASIC \
251 (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)
252
253/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
254 RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this
255 isn't minimal, since other operators, such as \`, aren't disabled. */
256#define RE_SYNTAX_POSIX_MINIMAL_BASIC \
257 (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
258
259#define RE_SYNTAX_POSIX_EXTENDED \
260 (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
261 | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \
262 | RE_NO_BK_PARENS | RE_NO_BK_VBAR \
263 | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD)
264
265/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is
266 removed and RE_NO_BK_REFS is added. */
267#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \
268 (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
269 | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \
270 | RE_NO_BK_PARENS | RE_NO_BK_REFS \
271 | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD)
272/* [[[end syntaxes]]] */
273 59
274/* Maximum number of duplicates an interval can allow. Some systems
275 (erroneously) define this in other header files, but we want our
276 value, so remove any previous define. */
277#ifdef RE_DUP_MAX
278# undef RE_DUP_MAX
279#endif
280/* Repeat counts are stored in opcodes as 2 byte integers. This was
281 previously limited to 7fff because the parsing code uses signed
282 ints. But Emacs only runs on 32 bit platforms anyway. */
283#define RE_DUP_MAX (0xffff)
284
285
286/* POSIX `cflags' bits (i.e., information for `regcomp'). */
287
288/* If this bit is set, then use extended regular expression syntax.
289 If not set, then use basic regular expression syntax. */
290#define REG_EXTENDED 1
291
292/* If this bit is set, then ignore case when matching.
293 If not set, then case is significant. */
294#define REG_ICASE (REG_EXTENDED << 1)
295
296/* If this bit is set, then anchors do not match at newline
297 characters in the string.
298 If not set, then anchors do match at newlines. */
299#define REG_NEWLINE (REG_ICASE << 1)
300
301/* If this bit is set, then report only success or fail in regexec.
302 If not set, then returns differ between not matching and errors. */
303#define REG_NOSUB (REG_NEWLINE << 1)
304
305
306/* POSIX `eflags' bits (i.e., information for regexec). */
307
308/* If this bit is set, then the beginning-of-line operator doesn't match
309 the beginning of the string (presumably because it's not the
310 beginning of a line).
311 If not set, then the beginning-of-line operator does match the
312 beginning of the string. */
313#define REG_NOTBOL 1
314
315/* Like REG_NOTBOL, except for the end-of-line. */
316#define REG_NOTEOL (1 << 1)
317
318
319/* If any error codes are removed, changed, or added, update the
320 `re_error_msg' table in regex-emacs.c. */
321typedef enum
322{
323#ifdef _XOPEN_SOURCE
324 REG_ENOSYS = -1, /* This will never happen for this implementation. */
325#endif
326
327 REG_NOERROR = 0, /* Success. */
328 REG_NOMATCH, /* Didn't find a match (for regexec). */
329
330 /* POSIX regcomp return error codes. (In the order listed in the
331 standard.) */
332 REG_BADPAT, /* Invalid pattern. */
333 REG_ECOLLATE, /* Not implemented. */
334 REG_ECTYPE, /* Invalid character class name. */
335 REG_EESCAPE, /* Trailing backslash. */
336 REG_ESUBREG, /* Invalid back reference. */
337 REG_EBRACK, /* Unmatched left bracket. */
338 REG_EPAREN, /* Parenthesis imbalance. */
339 REG_EBRACE, /* Unmatched \{. */
340 REG_BADBR, /* Invalid contents of \{\}. */
341 REG_ERANGE, /* Invalid range end. */
342 REG_ESPACE, /* Ran out of memory. */
343 REG_BADRPT, /* No preceding re for repetition op. */
344
345 /* Error codes we've added. */
346 REG_EEND, /* Premature end. */
347 REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */
348 REG_ERPAREN, /* Unmatched ) or \); not returned from regcomp. */
349 REG_ERANGEX, /* Range striding over charsets. */
350 REG_ESIZEBR /* n or m too big in \{n,m\} */
351} reg_errcode_t;
352
353/* Use a type compatible with Emacs. */
354#define RE_TRANSLATE_TYPE Lisp_Object
355#define RE_TRANSLATE(TBL, C) char_table_translate (TBL, C)
356#define RE_TRANSLATE_P(TBL) (!EQ (TBL, make_number (0)))
357
358/* This data structure represents a compiled pattern. Before calling 60/* This data structure represents a compiled pattern. Before calling
359 the pattern compiler, the fields `buffer', `allocated', `fastmap', 61 the pattern compiler, the fields `buffer', `allocated', `fastmap',
360 `translate', and `no_sub' can be set. After the pattern has been 62 `translate', and `no_sub' can be set. After the pattern has been
361 compiled, the `re_nsub' field is available. All other fields are 63 compiled, the `re_nsub' field is available. All other fields are
362 private to the regex routines. */ 64 private to the regex routines. */
363 65
364#ifndef RE_TRANSLATE_TYPE
365# define RE_TRANSLATE_TYPE char *
366#endif
367
368struct re_pattern_buffer 66struct re_pattern_buffer
369{ 67{
370/* [[[begin pattern_buffer]]] */
371 /* Space that holds the compiled pattern. It is declared as 68 /* Space that holds the compiled pattern. It is declared as
372 `unsigned char *' because its elements are 69 `unsigned char *' because its elements are
373 sometimes used as array indexes. */ 70 sometimes used as array indexes. */
@@ -379,13 +76,9 @@ struct re_pattern_buffer
379 /* Number of bytes actually used in `buffer'. */ 76 /* Number of bytes actually used in `buffer'. */
380 size_t used; 77 size_t used;
381 78
382#ifdef emacs
383 /* Charset of unibyte characters at compiling time. */ 79 /* Charset of unibyte characters at compiling time. */
384 int charset_unibyte; 80 int charset_unibyte;
385#else 81
386 /* Syntax setting with which the pattern was compiled. */
387 reg_syntax_t syntax;
388#endif
389 /* Pointer to a fastmap, if any, otherwise zero. re_search uses 82 /* Pointer to a fastmap, if any, otherwise zero. re_search uses
390 the fastmap, if there is one, to skip over impossible 83 the fastmap, if there is one, to skip over impossible
391 starting points for matches. */ 84 starting points for matches. */
@@ -395,7 +88,7 @@ struct re_pattern_buffer
395 comparing them, or zero for no translation. The translation 88 comparing them, or zero for no translation. The translation
396 is applied to a pattern when it is compiled and to a string 89 is applied to a pattern when it is compiled and to a string
397 when it is matched. */ 90 when it is matched. */
398 RE_TRANSLATE_TYPE translate; 91 Lisp_Object translate;
399 92
400 /* Number of subexpressions found by the compiler. */ 93 /* Number of subexpressions found by the compiler. */
401 size_t re_nsub; 94 size_t re_nsub;
@@ -410,9 +103,6 @@ struct re_pattern_buffer
410 for `max (RE_NREGS, re_nsub + 1)' groups. 103 for `max (RE_NREGS, re_nsub + 1)' groups.
411 If REGS_REALLOCATE, reallocate space if necessary. 104 If REGS_REALLOCATE, reallocate space if necessary.
412 If REGS_FIXED, use what's there. */ 105 If REGS_FIXED, use what's there. */
413#define REGS_UNALLOCATED 0
414#define REGS_REALLOCATE 1
415#define REGS_FIXED 2
416 unsigned regs_allocated : 2; 106 unsigned regs_allocated : 2;
417 107
418 /* Set to zero when `regex_compile' compiles a pattern; set to one 108 /* Set to zero when `regex_compile' compiles a pattern; set to one
@@ -434,7 +124,6 @@ struct re_pattern_buffer
434 so the compiled pattern is only valid for the current syntax table. */ 124 so the compiled pattern is only valid for the current syntax table. */
435 unsigned used_syntax : 1; 125 unsigned used_syntax : 1;
436 126
437#ifdef emacs
438 /* If true, multi-byte form in the regexp pattern should be 127 /* If true, multi-byte form in the regexp pattern should be
439 recognized as a multibyte character. */ 128 recognized as a multibyte character. */
440 unsigned multibyte : 1; 129 unsigned multibyte : 1;
@@ -442,72 +131,17 @@ struct re_pattern_buffer
442 /* If true, multi-byte form in the target of match should be 131 /* If true, multi-byte form in the target of match should be
443 recognized as a multibyte character. */ 132 recognized as a multibyte character. */
444 unsigned target_multibyte : 1; 133 unsigned target_multibyte : 1;
445#endif
446
447/* [[[end pattern_buffer]]] */
448}; 134};
449
450typedef struct re_pattern_buffer regex_t;
451
452/* POSIX 1003.1-2008 requires that regoff_t be at least as wide as
453 ptrdiff_t and ssize_t. We don't know of any hosts where ptrdiff_t
454 is wider than ssize_t, so ssize_t is safe. ptrdiff_t is not
455 necessarily visible here, so use ssize_t. */
456typedef ssize_t regoff_t;
457
458
459/* This is the structure we store register match data in. See
460 regex.texinfo for a full description of what registers match. */
461struct re_registers
462{
463 unsigned num_regs;
464 regoff_t *start;
465 regoff_t *end;
466};
467
468
469/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
470 `re_match_2' returns information about at least this many registers
471 the first time a `regs' structure is passed. */
472#ifndef RE_NREGS
473# define RE_NREGS 30
474#endif
475
476
477/* POSIX specification for registers. Aside from the different names than
478 `re_registers', POSIX uses an array of structures, instead of a
479 structure of arrays. */
480typedef struct
481{
482 regoff_t rm_so; /* Byte offset from string's start to substring's start. */
483 regoff_t rm_eo; /* Byte offset from string's start to substring's end. */
484} regmatch_t;
485 135
486/* Declarations for routines. */ 136/* Declarations for routines. */
487 137
488#ifndef emacs
489
490/* Sets the current default syntax to SYNTAX, and return the old syntax.
491 You can also simply assign to the `re_syntax_options' variable. */
492extern reg_syntax_t re_set_syntax (reg_syntax_t __syntax);
493
494#endif
495
496/* Compile the regular expression PATTERN, with length LENGTH 138/* Compile the regular expression PATTERN, with length LENGTH
497 and syntax given by the global `re_syntax_options', into the buffer 139 and syntax given by the global `re_syntax_options', into the buffer
498 BUFFER. Return NULL if successful, and an error string if not. */ 140 BUFFER. Return NULL if successful, and an error string if not. */
499extern const char *re_compile_pattern (const char *__pattern, size_t __length, 141extern const char *re_compile_pattern (const char *pattern, size_t length,
500#ifdef emacs
501 bool posix_backtracking, 142 bool posix_backtracking,
502 const char *whitespace_regexp, 143 const char *whitespace_regexp,
503#endif 144 struct re_pattern_buffer *buffer);
504 struct re_pattern_buffer *__buffer);
505
506
507/* Compile a fastmap for the compiled pattern in BUFFER; used to
508 accelerate searches. Return 0 if successful and -2 if was an
509 internal error. */
510extern int re_compile_fastmap (struct re_pattern_buffer *__buffer);
511 145
512 146
513/* Search in the string STRING (with length LENGTH) for the pattern 147/* Search in the string STRING (with length LENGTH) for the pattern
@@ -515,42 +149,36 @@ extern int re_compile_fastmap (struct re_pattern_buffer *__buffer);
515 characters. Return the starting position of the match, -1 for no 149 characters. Return the starting position of the match, -1 for no
516 match, or -2 for an internal error. Also return register 150 match, or -2 for an internal error. Also return register
517 information in REGS (if REGS and BUFFER->no_sub are nonzero). */ 151 information in REGS (if REGS and BUFFER->no_sub are nonzero). */
518extern regoff_t re_search (struct re_pattern_buffer *__buffer, 152extern ptrdiff_t re_search (struct re_pattern_buffer *buffer,
519 const char *__string, size_t __length, 153 const char *string, size_t length,
520 ssize_t __start, ssize_t __range, 154 ptrdiff_t start, ptrdiff_t range,
521 struct re_registers *__regs); 155 struct re_registers *regs);
522 156
523 157
524/* Like `re_search', but search in the concatenation of STRING1 and 158/* Like `re_search', but search in the concatenation of STRING1 and
525 STRING2. Also, stop searching at index START + STOP. */ 159 STRING2. Also, stop searching at index START + STOP. */
526extern regoff_t re_search_2 (struct re_pattern_buffer *__buffer, 160extern ptrdiff_t re_search_2 (struct re_pattern_buffer *buffer,
527 const char *__string1, size_t __length1, 161 const char *string1, size_t length1,
528 const char *__string2, size_t __length2, 162 const char *string2, size_t length2,
529 ssize_t __start, ssize_t __range, 163 ptrdiff_t start, ptrdiff_t range,
530 struct re_registers *__regs, 164 struct re_registers *regs,
531 ssize_t __stop); 165 ptrdiff_t stop);
532 166
533 167
534/* Like `re_search', but return how many characters in STRING the regexp 168/* Like 're_search_2', but return how many characters in STRING the regexp
535 in BUFFER matched, starting at position START. */ 169 in BUFFER matched, starting at position START. */
536extern regoff_t re_match (struct re_pattern_buffer *__buffer, 170extern ptrdiff_t re_match_2 (struct re_pattern_buffer *buffer,
537 const char *__string, size_t __length, 171 const char *string1, size_t length1,
538 ssize_t __start, struct re_registers *__regs); 172 const char *string2, size_t length2,
539 173 ptrdiff_t start, struct re_registers *regs,
540 174 ptrdiff_t stop);
541/* Relates to `re_match' as `re_search_2' relates to `re_search'. */
542extern regoff_t re_match_2 (struct re_pattern_buffer *__buffer,
543 const char *__string1, size_t __length1,
544 const char *__string2, size_t __length2,
545 ssize_t __start, struct re_registers *__regs,
546 ssize_t __stop);
547 175
548 176
549/* Set REGS to hold NUM_REGS registers, storing them in STARTS and 177/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
550 ENDS. Subsequent matches using BUFFER and REGS will use this memory 178 ENDS. Subsequent matches using BUFFER and REGS will use this memory
551 for recording register information. STARTS and ENDS must be 179 for recording register information. STARTS and ENDS must be
552 allocated with malloc, and must each be at least `NUM_REGS * sizeof 180 allocated with malloc, and must each be at least `NUM_REGS * sizeof
553 (regoff_t)' bytes long. 181 (ptrdiff_t)' bytes long.
554 182
555 If NUM_REGS == 0, then subsequent matches should allocate their own 183 If NUM_REGS == 0, then subsequent matches should allocate their own
556 register data. 184 register data.
@@ -558,83 +186,10 @@ extern regoff_t re_match_2 (struct re_pattern_buffer *__buffer,
558 Unless this function is called, the first search or match using 186 Unless this function is called, the first search or match using
559 PATTERN_BUFFER will allocate its own register data, without 187 PATTERN_BUFFER will allocate its own register data, without
560 freeing the old data. */ 188 freeing the old data. */
561extern void re_set_registers (struct re_pattern_buffer *__buffer, 189extern void re_set_registers (struct re_pattern_buffer *buffer,
562 struct re_registers *__regs, 190 struct re_registers *regs,
563 unsigned __num_regs, 191 unsigned num_regs,
564 regoff_t *__starts, regoff_t *__ends); 192 ptrdiff_t *starts, ptrdiff_t *ends);
565
566#if defined _REGEX_RE_COMP || defined _LIBC
567# ifndef _CRAY
568/* 4.2 bsd compatibility. */
569extern char *re_comp (const char *);
570extern int re_exec (const char *);
571# endif
572#endif
573
574/* GCC 2.95 and later have "__restrict"; C99 compilers have
575 "restrict", and "configure" may have defined "restrict".
576 Other compilers use __restrict, __restrict__, and _Restrict, and
577 'configure' might #define 'restrict' to those words, so pick a
578 different name. */
579#ifndef _Restrict_
580# if 199901L <= __STDC_VERSION__
581# define _Restrict_ restrict
582# elif 2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__)
583# define _Restrict_ __restrict
584# else
585# define _Restrict_
586# endif
587#endif
588/* gcc 3.1 and up support the [restrict] syntax. Don't trust
589 sys/cdefs.h's definition of __restrict_arr, though, as it
590 mishandles gcc -ansi -pedantic. */
591#ifndef _Restrict_arr_
592# if ((199901L <= __STDC_VERSION__ \
593 || ((3 < __GNUC__ || (3 == __GNUC__ && 1 <= __GNUC_MINOR__)) \
594 && !defined __STRICT_ANSI__)) \
595 && !defined __GNUG__)
596# define _Restrict_arr_ _Restrict_
597# else
598# define _Restrict_arr_
599# endif
600#endif
601
602/* POSIX compatibility. */
603extern reg_errcode_t regcomp (regex_t *_Restrict_ __preg,
604 const char *_Restrict_ __pattern,
605 int __cflags);
606
607extern reg_errcode_t regexec (const regex_t *_Restrict_ __preg,
608 const char *_Restrict_ __string, size_t __nmatch,
609 regmatch_t __pmatch[_Restrict_arr_],
610 int __eflags);
611
612extern size_t regerror (int __errcode, const regex_t * __preg,
613 char *__errbuf, size_t __errbuf_size);
614
615extern void regfree (regex_t *__preg);
616
617
618#ifdef __cplusplus
619}
620#endif /* C++ */
621
622/* For platform which support the ISO C amendment 1 functionality we
623 support user defined character classes. */
624#if WIDE_CHAR_SUPPORT
625/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */
626# include <wchar.h>
627# include <wctype.h>
628
629typedef wctype_t re_wctype_t;
630typedef wchar_t re_wchar_t;
631# define re_wctype wctype
632# define re_iswctype iswctype
633# define re_wctype_to_bit(cc) 0
634#else
635# ifndef emacs
636# define btowc(c) c
637# endif
638 193
639/* Character classes. */ 194/* Character classes. */
640typedef enum { RECC_ERROR = 0, 195typedef enum { RECC_ERROR = 0,
@@ -648,12 +203,8 @@ typedef enum { RECC_ERROR = 0,
648 RECC_ASCII, RECC_UNIBYTE 203 RECC_ASCII, RECC_UNIBYTE
649} re_wctype_t; 204} re_wctype_t;
650 205
651extern char re_iswctype (int ch, re_wctype_t cc); 206extern bool re_iswctype (int ch, re_wctype_t cc);
652extern re_wctype_t re_wctype_parse (const unsigned char **strp, unsigned limit); 207extern re_wctype_t re_wctype_parse (const unsigned char **strp,
653 208 unsigned limit);
654typedef int re_wchar_t;
655
656#endif /* not WIDE_CHAR_SUPPORT */
657 209
658#endif /* regex-emacs.h */ 210#endif /* regex-emacs.h */
659
diff --git a/src/search.c b/src/search.c
index d4b03220412..f758bb9304a 100644
--- a/src/search.c
+++ b/src/search.c
@@ -59,8 +59,8 @@ static struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
59static struct regexp_cache *searchbuf_head; 59static struct regexp_cache *searchbuf_head;
60 60
61 61
62/* Every call to re_match, etc., must pass &search_regs as the regs 62/* Every call to re_search, etc., must pass &search_regs as the regs
63 argument unless you can show it is unnecessary (i.e., if re_match 63 argument unless you can show it is unnecessary (i.e., if re_search
64 is certainly going to be called again before region-around-match 64 is certainly going to be called again before region-around-match
65 can be called). 65 can be called).
66 66
@@ -2189,8 +2189,8 @@ set_search_regs (ptrdiff_t beg_byte, ptrdiff_t nbytes)
2189 the match position. */ 2189 the match position. */
2190 if (search_regs.num_regs == 0) 2190 if (search_regs.num_regs == 0)
2191 { 2191 {
2192 search_regs.start = xmalloc (2 * sizeof (regoff_t)); 2192 search_regs.start = xmalloc (2 * sizeof *search_regs.start);
2193 search_regs.end = xmalloc (2 * sizeof (regoff_t)); 2193 search_regs.end = xmalloc (2 * sizeof *search_regs.end);
2194 search_regs.num_regs = 2; 2194 search_regs.num_regs = 2;
2195 } 2195 }
2196 2196
@@ -3001,9 +3001,9 @@ If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */)
3001 memory_full (SIZE_MAX); 3001 memory_full (SIZE_MAX);
3002 search_regs.start = 3002 search_regs.start =
3003 xpalloc (search_regs.start, &num_regs, length - num_regs, 3003 xpalloc (search_regs.start, &num_regs, length - num_regs,
3004 min (PTRDIFF_MAX, UINT_MAX), sizeof (regoff_t)); 3004 min (PTRDIFF_MAX, UINT_MAX), sizeof *search_regs.start);
3005 search_regs.end = 3005 search_regs.end =
3006 xrealloc (search_regs.end, num_regs * sizeof (regoff_t)); 3006 xrealloc (search_regs.end, num_regs * sizeof *search_regs.end);
3007 3007
3008 for (i = search_regs.num_regs; i < num_regs; i++) 3008 for (i = search_regs.num_regs; i < num_regs; i++)
3009 search_regs.start[i] = -1; 3009 search_regs.start[i] = -1;
@@ -3058,12 +3058,9 @@ If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */)
3058 XSETFASTINT (marker, 0); 3058 XSETFASTINT (marker, 0);
3059 3059
3060 CHECK_NUMBER_COERCE_MARKER (marker); 3060 CHECK_NUMBER_COERCE_MARKER (marker);
3061 if ((XINT (from) < 0 3061 if (PTRDIFF_MIN <= XINT (from) && XINT (from) <= PTRDIFF_MAX
3062 ? TYPE_MINIMUM (regoff_t) <= XINT (from) 3062 && PTRDIFF_MIN <= XINT (marker)
3063 : XINT (from) <= TYPE_MAXIMUM (regoff_t)) 3063 && XINT (marker) <= PTRDIFF_MAX)
3064 && (XINT (marker) < 0
3065 ? TYPE_MINIMUM (regoff_t) <= XINT (marker)
3066 : XINT (marker) <= TYPE_MAXIMUM (regoff_t)))
3067 { 3064 {
3068 search_regs.start[i] = XINT (from); 3065 search_regs.start[i] = XINT (from);
3069 search_regs.end[i] = XINT (marker); 3066 search_regs.end[i] = XINT (marker);
diff --git a/src/thread.h b/src/thread.h
index e1eb40921b4..8ecb00824df 100644
--- a/src/thread.h
+++ b/src/thread.h
@@ -112,8 +112,8 @@ struct thread_state
112 struct buffer *m_current_buffer; 112 struct buffer *m_current_buffer;
113#define current_buffer (current_thread->m_current_buffer) 113#define current_buffer (current_thread->m_current_buffer)
114 114
115 /* Every call to re_match, etc., must pass &search_regs as the regs 115 /* Every call to re_match_2, etc., must pass &search_regs as the regs
116 argument unless you can show it is unnecessary (i.e., if re_match 116 argument unless you can show it is unnecessary (i.e., if re_match_2
117 is certainly going to be called again before region-around-match 117 is certainly going to be called again before region-around-match
118 can be called). 118 can be called).
119 119