aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorStefan Monnier2000-08-28 00:37:50 +0000
committerStefan Monnier2000-08-28 00:37:50 +0000
commit0b32bf0e347f0f16f9f4c1860af1b85f900e64f0 (patch)
tree46c7f81e011c37ecbd887c26ede3a429fc72105f /src
parent28bb2cef0204faf0dbbce096ec6d18810ff273e8 (diff)
downloademacs-0b32bf0e347f0f16f9f4c1860af1b85f900e64f0.tar.gz
emacs-0b32bf0e347f0f16f9f4c1860af1b85f900e64f0.zip
* regex.c: Indent cpp directives and remove parens after `defined'.
(PTR_TO_OFFSET, POS_AS_IN_BUFFER): Move to a better place. (ISDIGIT, ISCNTRL, ISXDIGIT) [!emacs]: Remove duplicate definition. (regex_compile): Use RE_FRUGAL instead of RE_ALL_GREEDY. (re_compile_pattern): Use size_t for length. (init_syntax_once): Move to a better place. * regex.h: Merge changes from GNU libc. Indent cpp directives. (RE_FRUGAL): Replaces RE_ALL_GREEDY (inverted meaning).
Diffstat (limited to 'src')
-rw-r--r--src/ChangeLog9
-rw-r--r--src/regex.c580
-rw-r--r--src/regex.h176
3 files changed, 409 insertions, 356 deletions
diff --git a/src/ChangeLog b/src/ChangeLog
index b9fcac51fc4..f63e037fd41 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,5 +1,14 @@
12000-08-27 Stefan Monnier <monnier@cs.yale.edu> 12000-08-27 Stefan Monnier <monnier@cs.yale.edu>
2 2
3 * regex.c: Indent cpp directives and remove parens after `defined'.
4 (PTR_TO_OFFSET, POS_AS_IN_BUFFER): Move to a better place.
5 (ISDIGIT, ISCNTRL, ISXDIGIT) [!emacs]: Remove duplicate definition.
6 (regex_compile): Use RE_FRUGAL instead of RE_ALL_GREEDY.
7 (re_compile_pattern): Use size_t for length.
8 (init_syntax_once): Move to a better place.
9 * regex.h: Merge changes from GNU libc. Indent cpp directives.
10 (RE_FRUGAL): Replaces RE_ALL_GREEDY (inverted meaning).
11
3 * syntax.c (back_comment): Detect cases where a comment-starter is 12 * syntax.c (back_comment): Detect cases where a comment-starter is
4 actually inside another comment as in: /* a // b */ c // d \n. 13 actually inside another comment as in: /* a // b */ c // d \n.
5 Make it clear that `comstart_pos' is unused for nested comments. 14 Make it clear that `comstart_pos' is unused for nested comments.
diff --git a/src/regex.c b/src/regex.c
index 0ba9d3b8878..c00c8d630e6 100644
--- a/src/regex.c
+++ b/src/regex.c
@@ -1,5 +1,5 @@
1/* Extended regular expression matching and search library, version 1/* Extended regular expression matching and search library, version
2 0.12. (Implements POSIX draft P10003.2/D11.2, except for 2 0.12. (Implements POSIX draft P1003.2/D11.2, except for some of the
3 internationalization features.) 3 internationalization features.)
4 4
5 Copyright (C) 1993,94,95,96,97,98,2000 Free Software Foundation, Inc. 5 Copyright (C) 1993,94,95,96,97,98,2000 Free Software Foundation, Inc.
@@ -28,28 +28,22 @@
28 */ 28 */
29 29
30/* AIX requires this to be the first thing in the file. */ 30/* AIX requires this to be the first thing in the file. */
31#if defined (_AIX) && !defined (REGEX_MALLOC) 31#if defined _AIX && !defined REGEX_MALLOC
32 #pragma alloca 32 #pragma alloca
33#endif 33#endif
34 34
35#undef _GNU_SOURCE 35#undef _GNU_SOURCE
36#define _GNU_SOURCE 36#define _GNU_SOURCE
37 37
38#ifdef emacs
39/* Converts the pointer to the char to BEG-based offset from the start. */
40#define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d))
41#define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object)))
42#endif
43
44#ifdef HAVE_CONFIG_H 38#ifdef HAVE_CONFIG_H
45#include <config.h> 39# include <config.h>
46#endif 40#endif
47 41
48/* We need this for `regex.h', and perhaps for the Emacs include files. */ 42/* We need this for `regex.h', and perhaps for the Emacs include files. */
49#include <sys/types.h> 43#include <sys/types.h>
50 44
51/* This is for other GNU distributions with internationalized messages. */ 45/* This is for other GNU distributions with internationalized messages. */
52#if HAVE_LIBINTL_H || defined (_LIBC) 46#if HAVE_LIBINTL_H || defined _LIBC
53# include <libintl.h> 47# include <libintl.h>
54#else 48#else
55# define gettext(msgid) (msgid) 49# define gettext(msgid) (msgid)
@@ -58,44 +52,48 @@
58#ifndef gettext_noop 52#ifndef gettext_noop
59/* This define is so xgettext can find the internationalizable 53/* This define is so xgettext can find the internationalizable
60 strings. */ 54 strings. */
61#define gettext_noop(String) String 55# define gettext_noop(String) String
62#endif 56#endif
63 57
64/* The `emacs' switch turns on certain matching commands 58/* The `emacs' switch turns on certain matching commands
65 that make sense only in Emacs. */ 59 that make sense only in Emacs. */
66#ifdef emacs 60#ifdef emacs
67 61
68#include "lisp.h" 62# include "lisp.h"
69#include "buffer.h" 63# include "buffer.h"
70 64
71/* Make syntax table lookup grant data in gl_state. */ 65/* Make syntax table lookup grant data in gl_state. */
72#define SYNTAX_ENTRY_VIA_PROPERTY 66# define SYNTAX_ENTRY_VIA_PROPERTY
73 67
74#include "syntax.h" 68# include "syntax.h"
75#include "charset.h" 69# include "charset.h"
76#include "category.h" 70# include "category.h"
77 71
78#define malloc xmalloc 72# define malloc xmalloc
79#define realloc xrealloc 73# define realloc xrealloc
80#define free xfree 74# define free xfree
81 75
82#define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte) 76/* Converts the pointer to the char to BEG-based offset from the start. */
83#define RE_STRING_CHAR(p, s) \ 77# define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d))
78# define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object)))
79
80# define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte)
81# define RE_STRING_CHAR(p, s) \
84 (multibyte ? (STRING_CHAR (p, s)) : (*(p))) 82 (multibyte ? (STRING_CHAR (p, s)) : (*(p)))
85#define RE_STRING_CHAR_AND_LENGTH(p, s, len) \ 83# define RE_STRING_CHAR_AND_LENGTH(p, s, len) \
86 (multibyte ? (STRING_CHAR_AND_LENGTH (p, s, len)) : ((len) = 1, *(p))) 84 (multibyte ? (STRING_CHAR_AND_LENGTH (p, s, len)) : ((len) = 1, *(p)))
87 85
88/* Set C a (possibly multibyte) character before P. P points into a 86/* Set C a (possibly multibyte) character before P. P points into a
89 string which is the virtual concatenation of STR1 (which ends at 87 string which is the virtual concatenation of STR1 (which ends at
90 END1) or STR2 (which ends at END2). */ 88 END1) or STR2 (which ends at END2). */
91#define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \ 89# define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
92 do { \ 90 do { \
93 if (multibyte) \ 91 if (multibyte) \
94 { \ 92 { \
95 re_char *dtemp = (p) == (str2) ? (end1) : (p); \ 93 re_char *dtemp = (p) == (str2) ? (end1) : (p); \
96 re_char *dlimit = ((p) > (str2) && (p) <= (end2)) ? (str2) : (str1); \ 94 re_char *dlimit = ((p) > (str2) && (p) <= (end2)) ? (str2) : (str1); \
97 while (dtemp-- > dlimit && !CHAR_HEAD_P (*dtemp)); \ 95 while (dtemp-- > dlimit && !CHAR_HEAD_P (*dtemp)); \
98 c = STRING_CHAR (dtemp, (p) - dtemp); \ 96 c = STRING_CHAR (dtemp, (p) - dtemp); \
99 } \ 97 } \
100 else \ 98 else \
101 (c = ((p) == (str2) ? (end1) : (p))[-1]); \ 99 (c = ((p) == (str2) ? (end1) : (p))[-1]); \
@@ -107,120 +105,82 @@
107/* If we are not linking with Emacs proper, 105/* If we are not linking with Emacs proper,
108 we can't use the relocating allocator 106 we can't use the relocating allocator
109 even if config.h says that we can. */ 107 even if config.h says that we can. */
110#undef REL_ALLOC 108# undef REL_ALLOC
111 109
112#if defined (STDC_HEADERS) || defined (_LIBC) 110# if defined STDC_HEADERS || defined _LIBC
113#include <stdlib.h> 111# include <stdlib.h>
114#else 112# else
115char *malloc (); 113char *malloc ();
116char *realloc (); 114char *realloc ();
117#endif 115# endif
118 116
119/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow. 117/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
120 If nothing else has been done, use the method below. */ 118 If nothing else has been done, use the method below. */
121#ifdef INHIBIT_STRING_HEADER 119# ifdef INHIBIT_STRING_HEADER
122#if !(defined (HAVE_BZERO) && defined (HAVE_BCOPY)) 120# if !(defined HAVE_BZERO && defined HAVE_BCOPY)
123#if !defined (bzero) && !defined (bcopy) 121# if !defined bzero && !defined bcopy
124#undef INHIBIT_STRING_HEADER 122# undef INHIBIT_STRING_HEADER
125#endif 123# endif
126#endif 124# endif
127#endif 125# endif
128 126
129/* This is the normal way of making sure we have a bcopy and a bzero. 127/* This is the normal way of making sure we have a bcopy and a bzero.
130 This is used in most programs--a few other programs avoid this 128 This is used in most programs--a few other programs avoid this
131 by defining INHIBIT_STRING_HEADER. */ 129 by defining INHIBIT_STRING_HEADER. */
132#ifndef INHIBIT_STRING_HEADER 130# ifndef INHIBIT_STRING_HEADER
133#if defined (HAVE_STRING_H) || defined (STDC_HEADERS) || defined (_LIBC) 131# if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
134#include <string.h> 132# include <string.h>
135#ifndef bcmp 133# ifndef bcmp
136#define bcmp(s1, s2, n) memcmp ((s1), (s2), (n)) 134# define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
137#endif 135# endif
138#ifndef bcopy 136# ifndef bcopy
139#define bcopy(s, d, n) memcpy ((d), (s), (n)) 137# define bcopy(s, d, n) memcpy ((d), (s), (n))
140#endif 138# endif
141#ifndef bzero 139# ifndef bzero
142#define bzero(s, n) memset ((s), 0, (n)) 140# define bzero(s, n) memset ((s), 0, (n))
143#endif 141# endif
144#else 142# else
145#include <strings.h> 143# include <strings.h>
146#endif 144# endif
147#endif 145# endif
148 146
149/* Define the syntax stuff for \<, \>, etc. */ 147/* Define the syntax stuff for \<, \>, etc. */
150 148
151/* Sword must be nonzero for the wordchar pattern commands in re_match_2. */ 149/* Sword must be nonzero for the wordchar pattern commands in re_match_2. */
152enum syntaxcode { Swhitespace = 0, Sword = 1 }; 150enum syntaxcode { Swhitespace = 0, Sword = 1 };
153 151
154#ifdef SWITCH_ENUM_BUG 152# ifdef SWITCH_ENUM_BUG
155#define SWITCH_ENUM_CAST(x) ((int)(x)) 153# define SWITCH_ENUM_CAST(x) ((int)(x))
156#else 154# else
157#define SWITCH_ENUM_CAST(x) (x) 155# define SWITCH_ENUM_CAST(x) (x)
158#endif 156# endif
159
160#ifdef SYNTAX_TABLE
161
162extern char *re_syntax_table;
163
164#else /* not SYNTAX_TABLE */
165 157
166/* How many characters in the character set. */ 158# define SYNTAX(c) re_syntax_table[c]
167#define CHAR_SET_SIZE 256
168
169static char re_syntax_table[CHAR_SET_SIZE];
170
171static void
172init_syntax_once ()
173{
174 register int c;
175 static int done = 0;
176
177 if (done)
178 return;
179
180 bzero (re_syntax_table, sizeof re_syntax_table);
181
182 for (c = 'a'; c <= 'z'; c++)
183 re_syntax_table[c] = Sword;
184
185 for (c = 'A'; c <= 'Z'; c++)
186 re_syntax_table[c] = Sword;
187
188 for (c = '0'; c <= '9'; c++)
189 re_syntax_table[c] = Sword;
190
191 re_syntax_table['_'] = Sword;
192
193 done = 1;
194}
195
196#endif /* not SYNTAX_TABLE */
197
198#define SYNTAX(c) re_syntax_table[c]
199 159
200/* Dummy macros for non-Emacs environments. */ 160/* Dummy macros for non-Emacs environments. */
201#define BASE_LEADING_CODE_P(c) (0) 161# define BASE_LEADING_CODE_P(c) (0)
202#define CHAR_CHARSET(c) 0 162# define CHAR_CHARSET(c) 0
203#define CHARSET_LEADING_CODE_BASE(c) 0 163# define CHARSET_LEADING_CODE_BASE(c) 0
204#define MAX_MULTIBYTE_LENGTH 1 164# define MAX_MULTIBYTE_LENGTH 1
205#define RE_MULTIBYTE_P(x) 0 165# define RE_MULTIBYTE_P(x) 0
206#define WORD_BOUNDARY_P(c1, c2) (0) 166# define WORD_BOUNDARY_P(c1, c2) (0)
207#define CHAR_HEAD_P(p) (1) 167# define CHAR_HEAD_P(p) (1)
208#define SINGLE_BYTE_CHAR_P(c) (1) 168# define SINGLE_BYTE_CHAR_P(c) (1)
209#define SAME_CHARSET_P(c1, c2) (1) 169# define SAME_CHARSET_P(c1, c2) (1)
210#define MULTIBYTE_FORM_LENGTH(p, s) (1) 170# define MULTIBYTE_FORM_LENGTH(p, s) (1)
211#define STRING_CHAR(p, s) (*(p)) 171# define STRING_CHAR(p, s) (*(p))
212#define RE_STRING_CHAR STRING_CHAR 172# define RE_STRING_CHAR STRING_CHAR
213#define CHAR_STRING(c, s) (*(s) = (c), 1) 173# define CHAR_STRING(c, s) (*(s) = (c), 1)
214#define STRING_CHAR_AND_LENGTH(p, s, actual_len) ((actual_len) = 1, *(p)) 174# define STRING_CHAR_AND_LENGTH(p, s, actual_len) ((actual_len) = 1, *(p))
215#define RE_STRING_CHAR_AND_LENGTH STRING_CHAR_AND_LENGTH 175# define RE_STRING_CHAR_AND_LENGTH STRING_CHAR_AND_LENGTH
216#define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \ 176# define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
217 (c = ((p) == (str2) ? *((end1) - 1) : *((p) - 1))) 177 (c = ((p) == (str2) ? *((end1) - 1) : *((p) - 1)))
218#define MAKE_CHAR(charset, c1, c2) (c1) 178# define MAKE_CHAR(charset, c1, c2) (c1)
219#endif /* not emacs */ 179#endif /* not emacs */
220 180
221#ifndef RE_TRANSLATE 181#ifndef RE_TRANSLATE
222#define RE_TRANSLATE(TBL, C) ((unsigned char)(TBL)[C]) 182# define RE_TRANSLATE(TBL, C) ((unsigned char)(TBL)[C])
223#define RE_TRANSLATE_P(TBL) (TBL) 183# define RE_TRANSLATE_P(TBL) (TBL)
224#endif 184#endif
225 185
226/* Get the interface, including the syntax bits. */ 186/* Get the interface, including the syntax bits. */
@@ -232,58 +192,58 @@ init_syntax_once ()
232#ifdef emacs 192#ifdef emacs
233 193
234/* 1 if C is an ASCII character. */ 194/* 1 if C is an ASCII character. */
235#define IS_REAL_ASCII(c) ((c) < 0200) 195# define IS_REAL_ASCII(c) ((c) < 0200)
236 196
237/* 1 if C is a unibyte character. */ 197/* 1 if C is a unibyte character. */
238#define ISUNIBYTE(c) (SINGLE_BYTE_CHAR_P ((c))) 198# define ISUNIBYTE(c) (SINGLE_BYTE_CHAR_P ((c)))
239 199
240/* The Emacs definitions should not be directly affected by locales. */ 200/* The Emacs definitions should not be directly affected by locales. */
241 201
242/* In Emacs, these are only used for single-byte characters. */ 202/* In Emacs, these are only used for single-byte characters. */
243#define ISDIGIT(c) ((c) >= '0' && (c) <= '9') 203# define ISDIGIT(c) ((c) >= '0' && (c) <= '9')
244#define ISCNTRL(c) ((c) < ' ') 204# define ISCNTRL(c) ((c) < ' ')
245#define ISXDIGIT(c) (((c) >= '0' && (c) <= '9') \ 205# define ISXDIGIT(c) (((c) >= '0' && (c) <= '9') \
246 || ((c) >= 'a' && (c) <= 'f') \ 206 || ((c) >= 'a' && (c) <= 'f') \
247 || ((c) >= 'A' && (c) <= 'F')) 207 || ((c) >= 'A' && (c) <= 'F'))
248 208
249/* This is only used for single-byte characters. */ 209/* This is only used for single-byte characters. */
250#define ISBLANK(c) ((c) == ' ' || (c) == '\t') 210# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
251 211
252/* The rest must handle multibyte characters. */ 212/* The rest must handle multibyte characters. */
253 213
254#define ISGRAPH(c) (SINGLE_BYTE_CHAR_P (c) \ 214# define ISGRAPH(c) (SINGLE_BYTE_CHAR_P (c) \
255 ? (c) > ' ' && !((c) >= 0177 && (c) <= 0237) \ 215 ? (c) > ' ' && !((c) >= 0177 && (c) <= 0237) \
256 : 1) 216 : 1)
257 217
258#define ISPRINT(c) (SINGLE_BYTE_CHAR_P (c) \ 218# define ISPRINT(c) (SINGLE_BYTE_CHAR_P (c) \
259 ? (c) >= ' ' && !((c) >= 0177 && (c) <= 0237) \ 219 ? (c) >= ' ' && !((c) >= 0177 && (c) <= 0237) \
260 : 1) 220 : 1)
261 221
262#define ISALNUM(c) (IS_REAL_ASCII (c) \ 222# define ISALNUM(c) (IS_REAL_ASCII (c) \
263 ? (((c) >= 'a' && (c) <= 'z') \ 223 ? (((c) >= 'a' && (c) <= 'z') \
264 || ((c) >= 'A' && (c) <= 'Z') \ 224 || ((c) >= 'A' && (c) <= 'Z') \
265 || ((c) >= '0' && (c) <= '9')) \ 225 || ((c) >= '0' && (c) <= '9')) \
266 : SYNTAX (c) == Sword) 226 : SYNTAX (c) == Sword)
267 227
268#define ISALPHA(c) (IS_REAL_ASCII (c) \ 228# define ISALPHA(c) (IS_REAL_ASCII (c) \
269 ? (((c) >= 'a' && (c) <= 'z') \ 229 ? (((c) >= 'a' && (c) <= 'z') \
270 || ((c) >= 'A' && (c) <= 'Z')) \ 230 || ((c) >= 'A' && (c) <= 'Z')) \
271 : SYNTAX (c) == Sword) 231 : SYNTAX (c) == Sword)
272 232
273#define ISLOWER(c) (LOWERCASEP (c)) 233# define ISLOWER(c) (LOWERCASEP (c))
274 234
275#define ISPUNCT(c) (IS_REAL_ASCII (c) \ 235# define ISPUNCT(c) (IS_REAL_ASCII (c) \
276 ? ((c) > ' ' && (c) < 0177 \ 236 ? ((c) > ' ' && (c) < 0177 \
277 && !(((c) >= 'a' && (c) <= 'z') \ 237 && !(((c) >= 'a' && (c) <= 'z') \
278 || ((c) >= 'A' && (c) <= 'Z') \ 238 || ((c) >= 'A' && (c) <= 'Z') \
279 || ((c) >= '0' && (c) <= '9'))) \ 239 || ((c) >= '0' && (c) <= '9'))) \
280 : SYNTAX (c) != Sword) 240 : SYNTAX (c) != Sword)
281 241
282#define ISSPACE(c) (SYNTAX (c) == Swhitespace) 242# define ISSPACE(c) (SYNTAX (c) == Swhitespace)
283 243
284#define ISUPPER(c) (UPPERCASEP (c)) 244# define ISUPPER(c) (UPPERCASEP (c))
285 245
286#define ISWORD(c) (SYNTAX (c) == Sword) 246# define ISWORD(c) (SYNTAX (c) == Sword)
287 247
288#else /* not emacs */ 248#else /* not emacs */
289 249
@@ -298,50 +258,84 @@ init_syntax_once ()
298 Defining isascii to 1 should let any compiler worth its salt 258 Defining isascii to 1 should let any compiler worth its salt
299 eliminate the && through constant folding." */ 259 eliminate the && through constant folding." */
300 260
301#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII)) 261# if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
302#define ISASCII(c) 1 262# define ISASCII(c) 1
303#else 263# else
304#define ISASCII(c) isascii(c) 264# define ISASCII(c) isascii(c)
305#endif 265# endif
306 266
307/* 1 if C is an ASCII character. */ 267/* 1 if C is an ASCII character. */
308#define IS_REAL_ASCII(c) ((c) < 0200) 268# define IS_REAL_ASCII(c) ((c) < 0200)
309 269
310/* This distinction is not meaningful, except in Emacs. */ 270/* This distinction is not meaningful, except in Emacs. */
311#define ISUNIBYTE(c) 1 271# define ISUNIBYTE(c) 1
272
273# ifdef isblank
274# define ISBLANK(c) (ISASCII (c) && isblank (c))
275# else
276# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
277# endif
278# ifdef isgraph
279# define ISGRAPH(c) (ISASCII (c) && isgraph (c))
280# else
281# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
282# endif
283
284# define ISPRINT(c) (ISASCII (c) && isprint (c))
285# define ISDIGIT(c) (ISASCII (c) && isdigit (c))
286# define ISALNUM(c) (ISASCII (c) && isalnum (c))
287# define ISALPHA(c) (ISASCII (c) && isalpha (c))
288# define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
289# define ISLOWER(c) (ISASCII (c) && islower (c))
290# define ISPUNCT(c) (ISASCII (c) && ispunct (c))
291# define ISSPACE(c) (ISASCII (c) && isspace (c))
292# define ISUPPER(c) (ISASCII (c) && isupper (c))
293# define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
294
295# define ISWORD(c) ISALPHA(c)
296
297# ifdef SYNTAX_TABLE
312 298
313#define ISDIGIT(c) (ISASCII (c) && isdigit (c)) 299extern char *re_syntax_table;
314#define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
315#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
316 300
317#ifdef isblank 301# else /* not SYNTAX_TABLE */
318#define ISBLANK(c) (ISASCII (c) && isblank (c)) 302
319#else 303/* How many characters in the character set. */
320#define ISBLANK(c) ((c) == ' ' || (c) == '\t') 304# define CHAR_SET_SIZE 256
321#endif 305
322#ifdef isgraph 306static char re_syntax_table[CHAR_SET_SIZE];
323#define ISGRAPH(c) (ISASCII (c) && isgraph (c)) 307
324#else 308static void
325#define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c)) 309init_syntax_once ()
326#endif 310{
311 register int c;
312 static int done = 0;
313
314 if (done)
315 return;
316
317 bzero (re_syntax_table, sizeof re_syntax_table);
318
319 for (c = 'a'; c <= 'z'; c++)
320 re_syntax_table[c] = Sword;
321
322 for (c = 'A'; c <= 'Z'; c++)
323 re_syntax_table[c] = Sword;
324
325 for (c = '0'; c <= '9'; c++)
326 re_syntax_table[c] = Sword;
327 327
328#define ISPRINT(c) (ISASCII (c) && isprint (c)) 328 re_syntax_table['_'] = Sword;
329#define ISDIGIT(c) (ISASCII (c) && isdigit (c))
330#define ISALNUM(c) (ISASCII (c) && isalnum (c))
331#define ISALPHA(c) (ISASCII (c) && isalpha (c))
332#define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
333#define ISLOWER(c) (ISASCII (c) && islower (c))
334#define ISPUNCT(c) (ISASCII (c) && ispunct (c))
335#define ISSPACE(c) (ISASCII (c) && isspace (c))
336#define ISUPPER(c) (ISASCII (c) && isupper (c))
337#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
338 329
339#define ISWORD(c) ISALPHA(c) 330 done = 1;
331}
332
333# endif /* not SYNTAX_TABLE */
340 334
341#endif /* not emacs */ 335#endif /* not emacs */
342 336
343#ifndef NULL 337#ifndef NULL
344#define NULL (void *)0 338# define NULL (void *)0
345#endif 339#endif
346 340
347/* We remove any previous definition of `SIGN_EXTEND_CHAR', 341/* We remove any previous definition of `SIGN_EXTEND_CHAR',
@@ -350,10 +344,10 @@ init_syntax_once ()
350 (Per Bothner suggested the basic approach.) */ 344 (Per Bothner suggested the basic approach.) */
351#undef SIGN_EXTEND_CHAR 345#undef SIGN_EXTEND_CHAR
352#if __STDC__ 346#if __STDC__
353#define SIGN_EXTEND_CHAR(c) ((signed char) (c)) 347# define SIGN_EXTEND_CHAR(c) ((signed char) (c))
354#else /* not __STDC__ */ 348#else /* not __STDC__ */
355/* As in Harbison and Steele. */ 349/* As in Harbison and Steele. */
356#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128) 350# define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
357#endif 351#endif
358 352
359/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we 353/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
@@ -368,74 +362,68 @@ init_syntax_once ()
368 362
369#ifdef REGEX_MALLOC 363#ifdef REGEX_MALLOC
370 364
371#define REGEX_ALLOCATE malloc 365# define REGEX_ALLOCATE malloc
372#define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize) 366# define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
373#define REGEX_FREE free 367# define REGEX_FREE free
374 368
375#else /* not REGEX_MALLOC */ 369#else /* not REGEX_MALLOC */
376 370
377/* Emacs already defines alloca, sometimes. */ 371/* Emacs already defines alloca, sometimes. */
378#ifndef alloca 372# ifndef alloca
379 373
380/* Make alloca work the best possible way. */ 374/* Make alloca work the best possible way. */
381#ifdef __GNUC__ 375# ifdef __GNUC__
382#define alloca __builtin_alloca 376# define alloca __builtin_alloca
383#else /* not __GNUC__ */ 377# else /* not __GNUC__ */
384#if HAVE_ALLOCA_H 378# if HAVE_ALLOCA_H
385#include <alloca.h> 379# include <alloca.h>
386#else /* not __GNUC__ or HAVE_ALLOCA_H */ 380# endif /* HAVE_ALLOCA_H */
387#if 0 /* It is a bad idea to declare alloca. We always cast the result. */ 381# endif /* not __GNUC__ */
388#ifndef _AIX /* Already did AIX, up at the top. */
389char *alloca ();
390#endif /* not _AIX */
391#endif
392#endif /* not HAVE_ALLOCA_H */
393#endif /* not __GNUC__ */
394 382
395#endif /* not alloca */ 383# endif /* not alloca */
396 384
397#define REGEX_ALLOCATE alloca 385# define REGEX_ALLOCATE alloca
398 386
399/* Assumes a `char *destination' variable. */ 387/* Assumes a `char *destination' variable. */
400#define REGEX_REALLOCATE(source, osize, nsize) \ 388# define REGEX_REALLOCATE(source, osize, nsize) \
401 (destination = (char *) alloca (nsize), \ 389 (destination = (char *) alloca (nsize), \
402 bcopy (source, destination, osize), \ 390 bcopy (source, destination, osize), \
403 destination) 391 destination)
404 392
405/* No need to do anything to free, after alloca. */ 393/* No need to do anything to free, after alloca. */
406#define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */ 394# define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */
407 395
408#endif /* not REGEX_MALLOC */ 396#endif /* not REGEX_MALLOC */
409 397
410/* Define how to allocate the failure stack. */ 398/* Define how to allocate the failure stack. */
411 399
412#if defined (REL_ALLOC) && defined (REGEX_MALLOC) 400#if defined REL_ALLOC && defined REGEX_MALLOC
413 401
414#define REGEX_ALLOCATE_STACK(size) \ 402# define REGEX_ALLOCATE_STACK(size) \
415 r_alloc (&failure_stack_ptr, (size)) 403 r_alloc (&failure_stack_ptr, (size))
416#define REGEX_REALLOCATE_STACK(source, osize, nsize) \ 404# define REGEX_REALLOCATE_STACK(source, osize, nsize) \
417 r_re_alloc (&failure_stack_ptr, (nsize)) 405 r_re_alloc (&failure_stack_ptr, (nsize))
418#define REGEX_FREE_STACK(ptr) \ 406# define REGEX_FREE_STACK(ptr) \
419 r_alloc_free (&failure_stack_ptr) 407 r_alloc_free (&failure_stack_ptr)
420 408
421#else /* not using relocating allocator */ 409#else /* not using relocating allocator */
422 410
423#ifdef REGEX_MALLOC 411# ifdef REGEX_MALLOC
424 412
425#define REGEX_ALLOCATE_STACK malloc 413# define REGEX_ALLOCATE_STACK malloc
426#define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize) 414# define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
427#define REGEX_FREE_STACK free 415# define REGEX_FREE_STACK free
428 416
429#else /* not REGEX_MALLOC */ 417# else /* not REGEX_MALLOC */
430 418
431#define REGEX_ALLOCATE_STACK alloca 419# define REGEX_ALLOCATE_STACK alloca
432 420
433#define REGEX_REALLOCATE_STACK(source, osize, nsize) \ 421# define REGEX_REALLOCATE_STACK(source, osize, nsize) \
434 REGEX_REALLOCATE (source, osize, nsize) 422 REGEX_REALLOCATE (source, osize, nsize)
435/* No need to explicitly free anything. */ 423/* No need to explicitly free anything. */
436#define REGEX_FREE_STACK(arg) ((void)0) 424# define REGEX_FREE_STACK(arg) ((void)0)
437 425
438#endif /* not REGEX_MALLOC */ 426# endif /* not REGEX_MALLOC */
439#endif /* not using relocating allocator */ 427#endif /* not using relocating allocator */
440 428
441 429
@@ -498,10 +486,10 @@ typedef enum
498 If the length byte has the 0x80 bit set, then that stuff 486 If the length byte has the 0x80 bit set, then that stuff
499 is followed by a range table: 487 is followed by a range table:
500 2 bytes of flags for character sets (low 8 bits, high 8 bits) 488 2 bytes of flags for character sets (low 8 bits, high 8 bits)
501 See RANGE_TABLE_WORK_BITS below. 489 See RANGE_TABLE_WORK_BITS below.
502 2 bytes, the number of pairs that follow 490 2 bytes, the number of pairs that follow
503 pairs, each 2 multibyte characters, 491 pairs, each 2 multibyte characters,
504 each multibyte character represented as 3 bytes. */ 492 each multibyte character represented as 3 bytes. */
505 charset, 493 charset,
506 494
507 /* Same parameters as charset, but match any character that is 495 /* Same parameters as charset, but match any character that is
@@ -559,7 +547,7 @@ typedef enum
559 by a `no_op'. */ 547 by a `no_op'. */
560 on_failure_jump_nastyloop, 548 on_failure_jump_nastyloop,
561 549
562 /* A smart `on_failure_jump' used for greedy * and + operators. 550 /* A smart `on_failure_jump' used for greedy * and + operators.
563 It analyses the loop before which it is put and if the 551 It analyses the loop before which it is put and if the
564 loop does not require backtracking, it changes itself to 552 loop does not require backtracking, it changes itself to
565 `on_failure_keep_string_jump' and short-circuits the loop, 553 `on_failure_keep_string_jump' and short-circuits the loop,
@@ -652,10 +640,10 @@ extract_number (dest, source)
652 *dest += temp << 8; 640 *dest += temp << 8;
653} 641}
654 642
655#ifndef EXTRACT_MACROS /* To debug the macros. */ 643# ifndef EXTRACT_MACROS /* To debug the macros. */
656#undef EXTRACT_NUMBER 644# undef EXTRACT_NUMBER
657#define EXTRACT_NUMBER(dest, src) extract_number (&dest, src) 645# define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
658#endif /* not EXTRACT_MACROS */ 646# endif /* not EXTRACT_MACROS */
659 647
660#endif /* DEBUG */ 648#endif /* DEBUG */
661 649
@@ -678,11 +666,11 @@ extract_number_and_incr (destination, source)
678 *source += 2; 666 *source += 2;
679} 667}
680 668
681#ifndef EXTRACT_MACROS 669# ifndef EXTRACT_MACROS
682#undef EXTRACT_NUMBER_AND_INCR 670# undef EXTRACT_NUMBER_AND_INCR
683#define EXTRACT_NUMBER_AND_INCR(dest, src) \ 671# define EXTRACT_NUMBER_AND_INCR(dest, src) \
684 extract_number_and_incr (&dest, &src) 672 extract_number_and_incr (&dest, &src)
685#endif /* not EXTRACT_MACROS */ 673# endif /* not EXTRACT_MACROS */
686 674
687#endif /* DEBUG */ 675#endif /* DEBUG */
688 676
@@ -788,21 +776,21 @@ extract_number_and_incr (destination, source)
788#ifdef DEBUG 776#ifdef DEBUG
789 777
790/* We use standard I/O for debugging. */ 778/* We use standard I/O for debugging. */
791#include <stdio.h> 779# include <stdio.h>
792 780
793/* It is useful to test things that ``must'' be true when debugging. */ 781/* It is useful to test things that ``must'' be true when debugging. */
794#include <assert.h> 782# include <assert.h>
795 783
796static int debug = -100000; 784static int debug = -100000;
797 785
798#define DEBUG_STATEMENT(e) e 786# define DEBUG_STATEMENT(e) e
799#define DEBUG_PRINT1(x) if (debug > 0) printf (x) 787# define DEBUG_PRINT1(x) if (debug > 0) printf (x)
800#define DEBUG_PRINT2(x1, x2) if (debug > 0) printf (x1, x2) 788# define DEBUG_PRINT2(x1, x2) if (debug > 0) printf (x1, x2)
801#define DEBUG_PRINT3(x1, x2, x3) if (debug > 0) printf (x1, x2, x3) 789# define DEBUG_PRINT3(x1, x2, x3) if (debug > 0) printf (x1, x2, x3)
802#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug > 0) printf (x1, x2, x3, x4) 790# define DEBUG_PRINT4(x1, x2, x3, x4) if (debug > 0) printf (x1, x2, x3, x4)
803#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \ 791# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
804 if (debug > 0) print_partial_compiled_pattern (s, e) 792 if (debug > 0) print_partial_compiled_pattern (s, e)
805#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \ 793# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
806 if (debug > 0) print_double_string (w, s1, sz1, s2, sz2) 794 if (debug > 0) print_double_string (w, s1, sz1, s2, sz2)
807 795
808 796
@@ -1036,7 +1024,7 @@ print_partial_compiled_pattern (start, end)
1036 printf ("/%d", mcnt); 1024 printf ("/%d", mcnt);
1037 break; 1025 break;
1038 1026
1039#ifdef emacs 1027# ifdef emacs
1040 case before_dot: 1028 case before_dot:
1041 printf ("/before_dot"); 1029 printf ("/before_dot");
1042 break; 1030 break;
@@ -1060,7 +1048,7 @@ print_partial_compiled_pattern (start, end)
1060 mcnt = *p++; 1048 mcnt = *p++;
1061 printf ("/%d", mcnt); 1049 printf ("/%d", mcnt);
1062 break; 1050 break;
1063#endif /* emacs */ 1051# endif /* emacs */
1064 1052
1065 case begbuf: 1053 case begbuf:
1066 printf ("/begbuf"); 1054 printf ("/begbuf");
@@ -1138,16 +1126,16 @@ print_double_string (where, string1, size1, string2, size2)
1138 1126
1139#else /* not DEBUG */ 1127#else /* not DEBUG */
1140 1128
1141#undef assert 1129# undef assert
1142#define assert(e) 1130# define assert(e)
1143 1131
1144#define DEBUG_STATEMENT(e) 1132# define DEBUG_STATEMENT(e)
1145#define DEBUG_PRINT1(x) 1133# define DEBUG_PRINT1(x)
1146#define DEBUG_PRINT2(x1, x2) 1134# define DEBUG_PRINT2(x1, x2)
1147#define DEBUG_PRINT3(x1, x2, x3) 1135# define DEBUG_PRINT3(x1, x2, x3)
1148#define DEBUG_PRINT4(x1, x2, x3, x4) 1136# define DEBUG_PRINT4(x1, x2, x3, x4)
1149#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) 1137# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
1150#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) 1138# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
1151 1139
1152#endif /* not DEBUG */ 1140#endif /* not DEBUG */
1153 1141
@@ -1227,7 +1215,7 @@ static const char *re_error_msgid[] =
1227/* When using GNU C, we are not REALLY using the C alloca, no matter 1215/* When using GNU C, we are not REALLY using the C alloca, no matter
1228 what config.h may say. So don't take precautions for it. */ 1216 what config.h may say. So don't take precautions for it. */
1229#ifdef __GNUC__ 1217#ifdef __GNUC__
1230#undef C_ALLOCA 1218# undef C_ALLOCA
1231#endif 1219#endif
1232 1220
1233/* The match routines may not allocate if (1) they would do it with malloc 1221/* The match routines may not allocate if (1) they would do it with malloc
@@ -1235,8 +1223,8 @@ static const char *re_error_msgid[] =
1235 Note that if REL_ALLOC is defined, matching would not use malloc for the 1223 Note that if REL_ALLOC is defined, matching would not use malloc for the
1236 failure stack, but we would still use it for the register vectors; 1224 failure stack, but we would still use it for the register vectors;
1237 so REL_ALLOC should not affect this. */ 1225 so REL_ALLOC should not affect this. */
1238#if (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && defined (emacs) 1226#if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs
1239#undef MATCH_MAY_ALLOCATE 1227# undef MATCH_MAY_ALLOCATE
1240#endif 1228#endif
1241 1229
1242 1230
@@ -1249,14 +1237,14 @@ static const char *re_error_msgid[] =
1249 when matching. If this number is exceeded, we allocate more 1237 when matching. If this number is exceeded, we allocate more
1250 space, so it is not a hard limit. */ 1238 space, so it is not a hard limit. */
1251#ifndef INIT_FAILURE_ALLOC 1239#ifndef INIT_FAILURE_ALLOC
1252#define INIT_FAILURE_ALLOC 20 1240# define INIT_FAILURE_ALLOC 20
1253#endif 1241#endif
1254 1242
1255/* Roughly the maximum number of failure points on the stack. Would be 1243/* Roughly the maximum number of failure points on the stack. Would be
1256 exactly that if always used TYPICAL_FAILURE_SIZE items each time we failed. 1244 exactly that if always used TYPICAL_FAILURE_SIZE items each time we failed.
1257 This is a variable only so users of regex can assign to it; we never 1245 This is a variable only so users of regex can assign to it; we never
1258 change it ourselves. */ 1246 change it ourselves. */
1259#if defined (MATCH_MAY_ALLOCATE) 1247#if defined MATCH_MAY_ALLOCATE
1260/* Note that 4400 is enough to cause a crash on Alpha OSF/1, 1248/* Note that 4400 is enough to cause a crash on Alpha OSF/1,
1261 whose default stack limit is 2mb. In order for a larger 1249 whose default stack limit is 2mb. In order for a larger
1262 value to work reliably, you have to try to make it accord 1250 value to work reliably, you have to try to make it accord
@@ -1291,7 +1279,7 @@ typedef struct
1291 Do `return -2' if the alloc fails. */ 1279 Do `return -2' if the alloc fails. */
1292 1280
1293#ifdef MATCH_MAY_ALLOCATE 1281#ifdef MATCH_MAY_ALLOCATE
1294#define INIT_FAIL_STACK() \ 1282# define INIT_FAIL_STACK() \
1295 do { \ 1283 do { \
1296 fail_stack.stack = (fail_stack_elt_t *) \ 1284 fail_stack.stack = (fail_stack_elt_t *) \
1297 REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * TYPICAL_FAILURE_SIZE \ 1285 REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * TYPICAL_FAILURE_SIZE \
@@ -1305,15 +1293,15 @@ typedef struct
1305 fail_stack.frame = 0; \ 1293 fail_stack.frame = 0; \
1306 } while (0) 1294 } while (0)
1307 1295
1308#define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack) 1296# define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack)
1309#else 1297#else
1310#define INIT_FAIL_STACK() \ 1298# define INIT_FAIL_STACK() \
1311 do { \ 1299 do { \
1312 fail_stack.avail = 0; \ 1300 fail_stack.avail = 0; \
1313 fail_stack.frame = 0; \ 1301 fail_stack.frame = 0; \
1314 } while (0) 1302 } while (0)
1315 1303
1316#define RESET_FAIL_STACK() ((void)0) 1304# define RESET_FAIL_STACK() ((void)0)
1317#endif 1305#endif
1318 1306
1319 1307
@@ -1617,7 +1605,7 @@ static int analyse_first _RE_ARGS((unsigned char *p, unsigned char *pend,
1617 `char *', to avoid warnings when a string constant is passed. But 1605 `char *', to avoid warnings when a string constant is passed. But
1618 when we use a character as a subscript we must make it unsigned. */ 1606 when we use a character as a subscript we must make it unsigned. */
1619#ifndef TRANSLATE 1607#ifndef TRANSLATE
1620#define TRANSLATE(d) \ 1608# define TRANSLATE(d) \
1621 (RE_TRANSLATE_P (translate) ? RE_TRANSLATE (translate, (d)) : (d)) 1609 (RE_TRANSLATE_P (translate) ? RE_TRANSLATE (translate, (d)) : (d))
1622#endif 1610#endif
1623 1611
@@ -1858,9 +1846,9 @@ struct range_table_work_area
1858 || STREQ (string, "unibyte") || STREQ (string, "multibyte")) 1846 || STREQ (string, "unibyte") || STREQ (string, "multibyte"))
1859 1847
1860/* QUIT is only used on NTemacs. */ 1848/* QUIT is only used on NTemacs. */
1861#if !defined (WINDOWSNT) || !defined (emacs) 1849#if !defined WINDOWSNT || !defined emacs
1862#undef QUIT 1850# undef QUIT
1863#define QUIT 1851# define QUIT
1864#endif 1852#endif
1865 1853
1866#ifndef MATCH_MAY_ALLOCATE 1854#ifndef MATCH_MAY_ALLOCATE
@@ -2045,7 +2033,7 @@ regex_compile (pattern, size, syntax, bufp)
2045 /* Always count groups, whether or not bufp->no_sub is set. */ 2033 /* Always count groups, whether or not bufp->no_sub is set. */
2046 bufp->re_nsub = 0; 2034 bufp->re_nsub = 0;
2047 2035
2048#if !defined (emacs) && !defined (SYNTAX_TABLE) 2036#if !defined emacs && !defined SYNTAX_TABLE
2049 /* Initialize the syntax table. */ 2037 /* Initialize the syntax table. */
2050 init_syntax_once (); 2038 init_syntax_once ();
2051#endif 2039#endif
@@ -2134,7 +2122,7 @@ regex_compile (pattern, size, syntax, bufp)
2134 2122
2135 for (;;) 2123 for (;;)
2136 { 2124 {
2137 if (!(syntax & RE_ALL_GREEDY) 2125 if ((syntax & RE_FRUGAL)
2138 && c == '?' && (zero_times_ok || many_times_ok)) 2126 && c == '?' && (zero_times_ok || many_times_ok))
2139 greedy = 0; 2127 greedy = 0;
2140 else 2128 else
@@ -3322,7 +3310,7 @@ analyse_first (p, pend, fastmap, multibyte)
3322 char *destination; 3310 char *destination;
3323#endif 3311#endif
3324 3312
3325#if defined (REL_ALLOC) && defined (REGEX_MALLOC) 3313#if defined REL_ALLOC && defined REGEX_MALLOC
3326 /* This holds the pointer to the failure stack, when 3314 /* This holds the pointer to the failure stack, when
3327 it is allocated relocatably. */ 3315 it is allocated relocatably. */
3328 fail_stack_elt_t *failure_stack_ptr; 3316 fail_stack_elt_t *failure_stack_ptr;
@@ -3469,7 +3457,7 @@ analyse_first (p, pend, fastmap, multibyte)
3469 int c, count; 3457 int c, count;
3470 3458
3471 /* Make P points the range table. `+ 2' is to skip flag 3459 /* Make P points the range table. `+ 2' is to skip flag
3472 bits for a character class. */ 3460 bits for a character class. */
3473 p += CHARSET_BITMAP_SIZE (&p[-2]) + 2; 3461 p += CHARSET_BITMAP_SIZE (&p[-2]) + 2;
3474 3462
3475 /* Extract the number of ranges in range table into COUNT. */ 3463 /* Extract the number of ranges in range table into COUNT. */
@@ -3900,9 +3888,9 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
3900 val = re_match_2_internal (bufp, string1, size1, string2, size2, 3888 val = re_match_2_internal (bufp, string1, size1, string2, size2,
3901 startpos, regs, stop); 3889 startpos, regs, stop);
3902#ifndef REGEX_MALLOC 3890#ifndef REGEX_MALLOC
3903#ifdef C_ALLOCA 3891# ifdef C_ALLOCA
3904 alloca (0); 3892 alloca (0);
3905#endif 3893# endif
3906#endif 3894#endif
3907 3895
3908 if (val >= 0) 3896 if (val >= 0)
@@ -4043,8 +4031,8 @@ static int bcmp_translate _RE_ARGS((re_char *s1, re_char *s2,
4043 4031
4044/* Free everything we malloc. */ 4032/* Free everything we malloc. */
4045#ifdef MATCH_MAY_ALLOCATE 4033#ifdef MATCH_MAY_ALLOCATE
4046#define FREE_VAR(var) if (var) { REGEX_FREE (var); var = NULL; } else 4034# define FREE_VAR(var) if (var) { REGEX_FREE (var); var = NULL; } else
4047#define FREE_VARIABLES() \ 4035# define FREE_VARIABLES() \
4048 do { \ 4036 do { \
4049 REGEX_FREE_STACK (fail_stack.stack); \ 4037 REGEX_FREE_STACK (fail_stack.stack); \
4050 FREE_VAR (regstart); \ 4038 FREE_VAR (regstart); \
@@ -4053,7 +4041,7 @@ static int bcmp_translate _RE_ARGS((re_char *s1, re_char *s2,
4053 FREE_VAR (best_regend); \ 4041 FREE_VAR (best_regend); \
4054 } while (0) 4042 } while (0)
4055#else 4043#else
4056#define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */ 4044# define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */
4057#endif /* not MATCH_MAY_ALLOCATE */ 4045#endif /* not MATCH_MAY_ALLOCATE */
4058 4046
4059 4047
@@ -4330,9 +4318,9 @@ re_match (bufp, string, size, pos, regs)
4330{ 4318{
4331 int result = re_match_2_internal (bufp, NULL, 0, string, size, 4319 int result = re_match_2_internal (bufp, NULL, 0, string, size,
4332 pos, regs, size); 4320 pos, regs, size);
4333#if defined (C_ALLOCA) && !defined (REGEX_MALLOC) 4321# if defined C_ALLOCA && !defined REGEX_MALLOC
4334 alloca (0); 4322 alloca (0);
4335#endif 4323# endif
4336 return result; 4324 return result;
4337} 4325}
4338#endif /* not emacs */ 4326#endif /* not emacs */
@@ -4376,7 +4364,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
4376 4364
4377 result = re_match_2_internal (bufp, string1, size1, string2, size2, 4365 result = re_match_2_internal (bufp, string1, size1, string2, size2,
4378 pos, regs, stop); 4366 pos, regs, stop);
4379#if defined (C_ALLOCA) && !defined (REGEX_MALLOC) 4367#if defined C_ALLOCA && !defined REGEX_MALLOC
4380 alloca (0); 4368 alloca (0);
4381#endif 4369#endif
4382 return result; 4370 return result;
@@ -4439,7 +4427,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4439 unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0; 4427 unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
4440#endif 4428#endif
4441 4429
4442#if defined (REL_ALLOC) && defined (REGEX_MALLOC) 4430#if defined REL_ALLOC && defined REGEX_MALLOC
4443 /* This holds the pointer to the failure stack, when 4431 /* This holds the pointer to the failure stack, when
4444 it is allocated relocatably. */ 4432 it is allocated relocatably. */
4445 fail_stack_elt_t *failure_stack_ptr; 4433 fail_stack_elt_t *failure_stack_ptr;
@@ -4958,7 +4946,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4958 assert (!REG_UNSET (regstart[*p])); 4946 assert (!REG_UNSET (regstart[*p]));
4959 /* Strictly speaking, there should be code such as: 4947 /* Strictly speaking, there should be code such as:
4960 4948
4961 assert (REG_UNSET (regend[*p])); 4949 assert (REG_UNSET (regend[*p]));
4962 PUSH_FAILURE_REGSTOP ((unsigned int)*p); 4950 PUSH_FAILURE_REGSTOP ((unsigned int)*p);
4963 4951
4964 But the only info to be pushed is regend[*p] and it is known to 4952 But the only info to be pushed is regend[*p] and it is known to
@@ -5475,8 +5463,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5475 5463
5476#endif /* emacs */ 5464#endif /* emacs */
5477 5465
5478 default: 5466 default:
5479 abort (); 5467 abort ();
5480 } 5468 }
5481 continue; /* Successfully executed one pattern command; keep going. */ 5469 continue; /* Successfully executed one pattern command; keep going. */
5482 5470
@@ -5489,8 +5477,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5489 re_char *str; 5477 re_char *str;
5490 unsigned char *pat; 5478 unsigned char *pat;
5491 /* A restart point is known. Restore to that state. */ 5479 /* A restart point is known. Restore to that state. */
5492 DEBUG_PRINT1 ("\nFAIL:\n"); 5480 DEBUG_PRINT1 ("\nFAIL:\n");
5493 POP_FAILURE_POINT (str, pat); 5481 POP_FAILURE_POINT (str, pat);
5494 switch (SWITCH_ENUM_CAST ((re_opcode_t) *pat++)) 5482 switch (SWITCH_ENUM_CAST ((re_opcode_t) *pat++))
5495 { 5483 {
5496 case on_failure_keep_string_jump: 5484 case on_failure_keep_string_jump:
@@ -5521,11 +5509,11 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5521 5509
5522 assert (p >= bufp->buffer && p <= pend); 5510 assert (p >= bufp->buffer && p <= pend);
5523 5511
5524 if (d >= string1 && d <= end1) 5512 if (d >= string1 && d <= end1)
5525 dend = end_match_1; 5513 dend = end_match_1;
5526 } 5514 }
5527 else 5515 else
5528 break; /* Matching at this starting point really fails. */ 5516 break; /* Matching at this starting point really fails. */
5529 } /* for (;;) */ 5517 } /* for (;;) */
5530 5518
5531 if (best_regs_set) 5519 if (best_regs_set)
@@ -5587,7 +5575,7 @@ bcmp_translate (s1, s2, len, translate, multibyte)
5587const char * 5575const char *
5588re_compile_pattern (pattern, length, bufp) 5576re_compile_pattern (pattern, length, bufp)
5589 const char *pattern; 5577 const char *pattern;
5590 int length; 5578 size_t length;
5591 struct re_pattern_buffer *bufp; 5579 struct re_pattern_buffer *bufp;
5592{ 5580{
5593 reg_errcode_t ret; 5581 reg_errcode_t ret;
@@ -5614,18 +5602,18 @@ re_compile_pattern (pattern, length, bufp)
5614/* Entry points compatible with 4.2 BSD regex library. We don't define 5602/* Entry points compatible with 4.2 BSD regex library. We don't define
5615 them unless specifically requested. */ 5603 them unless specifically requested. */
5616 5604
5617#if defined (_REGEX_RE_COMP) || defined (_LIBC) 5605#if defined _REGEX_RE_COMP || defined _LIBC
5618 5606
5619/* BSD has one and only one pattern buffer. */ 5607/* BSD has one and only one pattern buffer. */
5620static struct re_pattern_buffer re_comp_buf; 5608static struct re_pattern_buffer re_comp_buf;
5621 5609
5622char * 5610char *
5623#ifdef _LIBC 5611# ifdef _LIBC
5624/* Make these definitions weak in libc, so POSIX programs can redefine 5612/* Make these definitions weak in libc, so POSIX programs can redefine
5625 these names if they don't use our functions, and still use 5613 these names if they don't use our functions, and still use
5626 regcomp/regexec below without link errors. */ 5614 regcomp/regexec below without link errors. */
5627weak_function 5615weak_function
5628#endif 5616# endif
5629re_comp (s) 5617re_comp (s)
5630 const char *s; 5618 const char *s;
5631{ 5619{
@@ -5634,7 +5622,7 @@ re_comp (s)
5634 if (!s) 5622 if (!s)
5635 { 5623 {
5636 if (!re_comp_buf.buffer) 5624 if (!re_comp_buf.buffer)
5637 /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */ 5625 /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
5638 return (char *) gettext ("No previous regular expression"); 5626 return (char *) gettext ("No previous regular expression");
5639 return 0; 5627 return 0;
5640 } 5628 }
@@ -5643,8 +5631,8 @@ re_comp (s)
5643 { 5631 {
5644 re_comp_buf.buffer = (unsigned char *) malloc (200); 5632 re_comp_buf.buffer = (unsigned char *) malloc (200);
5645 if (re_comp_buf.buffer == NULL) 5633 if (re_comp_buf.buffer == NULL)
5646 /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */ 5634 /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
5647 return (char *) gettext (re_error_msgid[(int) REG_ESPACE]); 5635 return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
5648 re_comp_buf.allocated = 200; 5636 re_comp_buf.allocated = 200;
5649 5637
5650 re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH); 5638 re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
@@ -5670,9 +5658,9 @@ re_comp (s)
5670 5658
5671 5659
5672int 5660int
5673#ifdef _LIBC 5661# ifdef _LIBC
5674weak_function 5662weak_function
5675#endif 5663# endif
5676re_exec (s) 5664re_exec (s)
5677 const char *s; 5665 const char *s;
5678{ 5666{
@@ -5750,11 +5738,11 @@ regcomp (preg, pattern, cflags)
5750 = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE 5738 = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
5751 * sizeof (*(RE_TRANSLATE_TYPE)0)); 5739 * sizeof (*(RE_TRANSLATE_TYPE)0));
5752 if (preg->translate == NULL) 5740 if (preg->translate == NULL)
5753 return (int) REG_ESPACE; 5741 return (int) REG_ESPACE;
5754 5742
5755 /* Map uppercase characters to corresponding lowercase ones. */ 5743 /* Map uppercase characters to corresponding lowercase ones. */
5756 for (i = 0; i < CHAR_SET_SIZE; i++) 5744 for (i = 0; i < CHAR_SET_SIZE; i++)
5757 preg->translate[i] = ISUPPER (i) ? tolower (i) : i; 5745 preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
5758 } 5746 }
5759 else 5747 else
5760 preg->translate = NULL; 5748 preg->translate = NULL;
@@ -5828,27 +5816,27 @@ regexec (preg, string, nmatch, pmatch, eflags)
5828 regs.start = TALLOC (nmatch, regoff_t); 5816 regs.start = TALLOC (nmatch, regoff_t);
5829 regs.end = TALLOC (nmatch, regoff_t); 5817 regs.end = TALLOC (nmatch, regoff_t);
5830 if (regs.start == NULL || regs.end == NULL) 5818 if (regs.start == NULL || regs.end == NULL)
5831 return (int) REG_NOMATCH; 5819 return (int) REG_NOMATCH;
5832 } 5820 }
5833 5821
5834 /* Perform the searching operation. */ 5822 /* Perform the searching operation. */
5835 ret = re_search (&private_preg, string, len, 5823 ret = re_search (&private_preg, string, len,
5836 /* start: */ 0, /* range: */ len, 5824 /* start: */ 0, /* range: */ len,
5837 want_reg_info ? &regs : (struct re_registers *) 0); 5825 want_reg_info ? &regs : (struct re_registers *) 0);
5838 5826
5839 /* Copy the register information to the POSIX structure. */ 5827 /* Copy the register information to the POSIX structure. */
5840 if (want_reg_info) 5828 if (want_reg_info)
5841 { 5829 {
5842 if (ret >= 0) 5830 if (ret >= 0)
5843 { 5831 {
5844 unsigned r; 5832 unsigned r;
5845 5833
5846 for (r = 0; r < nmatch; r++) 5834 for (r = 0; r < nmatch; r++)
5847 { 5835 {
5848 pmatch[r].rm_so = regs.start[r]; 5836 pmatch[r].rm_so = regs.start[r];
5849 pmatch[r].rm_eo = regs.end[r]; 5837 pmatch[r].rm_eo = regs.end[r];
5850 } 5838 }
5851 } 5839 }
5852 5840
5853 /* If we needed the temporary register info, free the space now. */ 5841 /* If we needed the temporary register info, free the space now. */
5854 free (regs.start); 5842 free (regs.start);
@@ -5888,12 +5876,12 @@ regerror (errcode, preg, errbuf, errbuf_size)
5888 if (errbuf_size != 0) 5876 if (errbuf_size != 0)
5889 { 5877 {
5890 if (msg_size > errbuf_size) 5878 if (msg_size > errbuf_size)
5891 { 5879 {
5892 strncpy (errbuf, msg, errbuf_size - 1); 5880 strncpy (errbuf, msg, errbuf_size - 1);
5893 errbuf[errbuf_size - 1] = 0; 5881 errbuf[errbuf_size - 1] = 0;
5894 } 5882 }
5895 else 5883 else
5896 strcpy (errbuf, msg); 5884 strcpy (errbuf, msg);
5897 } 5885 }
5898 5886
5899 return msg_size; 5887 return msg_size;
diff --git a/src/regex.h b/src/regex.h
index 2f6860d9ec5..d0c32d50d27 100644
--- a/src/regex.h
+++ b/src/regex.h
@@ -18,36 +18,37 @@
18 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, 18 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
19 USA. */ 19 USA. */
20 20
21#ifndef __REGEXP_LIBRARY_H__ 21#ifndef _REGEX_H
22#define __REGEXP_LIBRARY_H__ 22#define _REGEX_H 1
23
24/* Allow the use in C++ code. */
25#ifdef __cplusplus
26extern "C" {
27#endif
23 28
24/* POSIX says that <sys/types.h> must be included (by the caller) before 29/* POSIX says that <sys/types.h> must be included (by the caller) before
25 <regex.h>. */ 30 <regex.h>. */
26 31
27#if !defined (_POSIX_C_SOURCE) && !defined (_POSIX_SOURCE) && defined (VMS) 32#if !defined _POSIX_C_SOURCE && !defined _POSIX_SOURCE && defined VMS
28/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it 33/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it
29 should be there. */ 34 should be there. */
30#include <stddef.h> 35# include <stddef.h>
31#endif 36#endif
32 37
33
34/* The following bits are used to determine the regexp syntax we 38/* The following bits are used to determine the regexp syntax we
35 recognize. The set/not-set meanings where historically chosen so 39 recognize. The set/not-set meanings where historically chosen so
36 that Emacs syntax had the value 0. 40 that Emacs syntax had the value 0.
37 The bits are given in alphabetical order, and 41 The bits are given in alphabetical order, and
38 the definitions shifted by one from the previous bit; thus, when we 42 the definitions shifted by one from the previous bit; thus, when we
39 add or remove a bit, only one other definition need change. */ 43 add or remove a bit, only one other definition need change. */
40typedef unsigned reg_syntax_t; 44typedef unsigned long int reg_syntax_t;
41
42/* If this bit is set, then even *?, +? and ?? match greedily. */
43#define RE_ALL_GREEDY (1)
44 45
45/* If this bit is not set, then \ inside a bracket expression is literal. 46/* If this bit is not set, then \ inside a bracket expression is literal.
46 If set, then such a \ quotes the following character. */ 47 If set, then such a \ quotes the following character. */
47#define RE_BACKSLASH_ESCAPE_IN_LISTS (RE_ALL_GREEDY << 1) 48#define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1)
48 49
49/* If this bit is not set, then + and ? are operators, and \+ and \? are 50/* If this bit is not set, then + and ? are operators, and \+ and \? are
50 literals. 51 literals.
51 If set, then \+ and \? are operators and + and ? are literals. */ 52 If set, then \+ and \? are operators and + and ? are literals. */
52#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1) 53#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
53 54
@@ -63,7 +64,7 @@ typedef unsigned reg_syntax_t;
63 ^ is an anchor if it is at the beginning of a regular 64 ^ is an anchor if it is at the beginning of a regular
64 expression or after an open-group or an alternation operator; 65 expression or after an open-group or an alternation operator;
65 $ is an anchor if it is at the end of a regular expression, or 66 $ is an anchor if it is at the end of a regular expression, or
66 before a close-group or an alternation operator. 67 before a close-group or an alternation operator.
67 68
68 This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because 69 This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
69 POSIX draft 11.2 says that * etc. in leading positions is undefined. 70 POSIX draft 11.2 says that * etc. in leading positions is undefined.
@@ -74,7 +75,7 @@ typedef unsigned reg_syntax_t;
74/* If this bit is set, then special characters are always special 75/* If this bit is set, then special characters are always special
75 regardless of where they are in the pattern. 76 regardless of where they are in the pattern.
76 If this bit is not set, then special characters are special only in 77 If this bit is not set, then special characters are special only in
77 some contexts; otherwise they are ordinary. Specifically, 78 some contexts; otherwise they are ordinary. Specifically,
78 * + ? and intervals are only special when not after the beginning, 79 * + ? and intervals are only special when not after the beginning,
79 open-group, or alternation operator. */ 80 open-group, or alternation operator. */
80#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1) 81#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
@@ -96,7 +97,7 @@ typedef unsigned reg_syntax_t;
96#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1) 97#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
97 98
98/* If this bit is set, either \{...\} or {...} defines an 99/* If this bit is set, either \{...\} or {...} defines an
99 interval, depending on RE_NO_BK_BRACES. 100 interval, depending on RE_NO_BK_BRACES.
100 If not set, \{, \}, {, and } are literals. */ 101 If not set, \{, \}, {, and } are literals. */
101#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1) 102#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
102 103
@@ -121,7 +122,7 @@ typedef unsigned reg_syntax_t;
121 If not set, then \<digit> is a back-reference. */ 122 If not set, then \<digit> is a back-reference. */
122#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1) 123#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
123 124
124/* If this bit is set, then | is an alternation operator, and \| is literal. 125/* If this bit is set, then | is an alternation operator, and \| is literal.
125 If not set, then \| is an alternation operator, and | is literal. */ 126 If not set, then \| is an alternation operator, and | is literal. */
126#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1) 127#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
127 128
@@ -131,16 +132,31 @@ typedef unsigned reg_syntax_t;
131 starting range point, the range is ignored. */ 132 starting range point, the range is ignored. */
132#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1) 133#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
133 134
135/* If this bit is set, then an unmatched ) is ordinary.
136 If not set, then an unmatched ) is invalid. */
137#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
138
134/* If this bit is set, succeed as soon as we match the whole pattern, 139/* If this bit is set, succeed as soon as we match the whole pattern,
135 without further backtracking. */ 140 without further backtracking. */
136#define RE_NO_POSIX_BACKTRACKING (RE_NO_EMPTY_RANGES << 1) 141#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
137 142
138/* If this bit is set, then (?:...) is treated as a shy group. */ 143/* If this bit is set, do not process the GNU regex operators.
139#define RE_SHY_GROUPS (RE_NO_POSIX_BACKTRACKING << 1) 144 If not set, then the GNU regex operators are recognized. */
145#define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1)
140 146
141/* If this bit is set, then an unmatched ) is ordinary. 147/* If this bit is set, turn on internal regex debugging.
142 If not set, then an unmatched ) is invalid. */ 148 If not set, and debugging was on, turn it off.
143#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_SHY_GROUPS << 1) 149 This only works if regex.c is compiled -DDEBUG.
150 We define this bit always, so that all that's needed to turn on
151 debugging is to recompile regex.c; the calling code can always have
152 this bit set, and it won't affect anything in the normal case. */
153#define RE_DEBUG (RE_NO_GNU_OPS << 1)
154
155/* If this bit is set, then *?, +? and ?? match non greedily. */
156#define RE_FRUGAL (RE_DEBUG << 1)
157
158/* If this bit is set, then (?:...) is treated as a shy group. */
159#define RE_SHY_GROUPS (RE_FRUGAL << 1)
144 160
145/* This global variable defines the particular regexp syntax to use (for 161/* This global variable defines the particular regexp syntax to use (for
146 some interfaces). When a regexp is compiled, the syntax used is 162 some interfaces). When a regexp is compiled, the syntax used is
@@ -159,27 +175,34 @@ extern Lisp_Object re_match_object;
159 (The [[[ comments delimit what gets put into the Texinfo file, so 175 (The [[[ comments delimit what gets put into the Texinfo file, so
160 don't delete them!) */ 176 don't delete them!) */
161/* [[[begin syntaxes]]] */ 177/* [[[begin syntaxes]]] */
162#define RE_SYNTAX_EMACS (RE_CHAR_CLASSES | RE_INTERVALS | RE_SHY_GROUPS) 178#define RE_SYNTAX_EMACS \
179 (RE_CHAR_CLASSES | RE_INTERVALS | RE_SHY_GROUPS | RE_FRUGAL)
163 180
164#define RE_SYNTAX_AWK \ 181#define RE_SYNTAX_AWK \
165 (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ 182 (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \
166 | RE_NO_BK_PARENS | RE_NO_BK_REFS \ 183 | RE_NO_BK_PARENS | RE_NO_BK_REFS \
167 | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ 184 | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \
168 | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_ALL_GREEDY) 185 | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \
186 | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
187
188#define RE_SYNTAX_GNU_AWK \
189 ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG) \
190 & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS))
169 191
170#define RE_SYNTAX_POSIX_AWK \ 192#define RE_SYNTAX_POSIX_AWK \
171 (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS) 193 (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \
194 | RE_INTERVALS | RE_NO_GNU_OPS)
172 195
173#define RE_SYNTAX_GREP \ 196#define RE_SYNTAX_GREP \
174 (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ 197 (RE_BK_PLUS_QM | RE_CHAR_CLASSES \
175 | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \ 198 | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \
176 | RE_NEWLINE_ALT | RE_ALL_GREEDY) 199 | RE_NEWLINE_ALT)
177 200
178#define RE_SYNTAX_EGREP \ 201#define RE_SYNTAX_EGREP \
179 (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \ 202 (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \
180 | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \ 203 | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \
181 | RE_NEWLINE_ALT | RE_NO_BK_PARENS \ 204 | RE_NEWLINE_ALT | RE_NO_BK_PARENS \
182 | RE_NO_BK_VBAR | RE_ALL_GREEDY) 205 | RE_NO_BK_VBAR)
183 206
184#define RE_SYNTAX_POSIX_EGREP \ 207#define RE_SYNTAX_POSIX_EGREP \
185 (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES) 208 (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)
@@ -192,7 +215,7 @@ extern Lisp_Object re_match_object;
192/* Syntax bits common to both basic and extended POSIX regex syntax. */ 215/* Syntax bits common to both basic and extended POSIX regex syntax. */
193#define _RE_SYNTAX_POSIX_COMMON \ 216#define _RE_SYNTAX_POSIX_COMMON \
194 (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \ 217 (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \
195 | RE_INTERVALS | RE_NO_EMPTY_RANGES | RE_ALL_GREEDY) 218 | RE_INTERVALS | RE_NO_EMPTY_RANGES)
196 219
197#define RE_SYNTAX_POSIX_BASIC \ 220#define RE_SYNTAX_POSIX_BASIC \
198 (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM) 221 (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)
@@ -204,13 +227,13 @@ extern Lisp_Object re_match_object;
204 (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS) 227 (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
205 228
206#define RE_SYNTAX_POSIX_EXTENDED \ 229#define RE_SYNTAX_POSIX_EXTENDED \
207 (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ 230 (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
208 | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \ 231 | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \
209 | RE_NO_BK_PARENS | RE_NO_BK_VBAR \ 232 | RE_NO_BK_PARENS | RE_NO_BK_VBAR \
210 | RE_UNMATCHED_RIGHT_PAREN_ORD) 233 | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD)
211 234
212/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS 235/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is
213 replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */ 236 removed and RE_NO_BK_REFS is added. */
214#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \ 237#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \
215 (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ 238 (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
216 | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \ 239 | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \
@@ -222,9 +245,10 @@ extern Lisp_Object re_match_object;
222 (erroneously) define this in other header files, but we want our 245 (erroneously) define this in other header files, but we want our
223 value, so remove any previous define. */ 246 value, so remove any previous define. */
224#ifdef RE_DUP_MAX 247#ifdef RE_DUP_MAX
225#undef RE_DUP_MAX 248# undef RE_DUP_MAX
226#endif 249#endif
227#define RE_DUP_MAX ((1 << 15) - 1) 250/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows. */
251#define RE_DUP_MAX (0x7fff)
228 252
229 253
230/* POSIX `cflags' bits (i.e., information for `regcomp'). */ 254/* POSIX `cflags' bits (i.e., information for `regcomp'). */
@@ -236,7 +260,7 @@ extern Lisp_Object re_match_object;
236/* If this bit is set, then ignore case when matching. 260/* If this bit is set, then ignore case when matching.
237 If not set, then case is significant. */ 261 If not set, then case is significant. */
238#define REG_ICASE (REG_EXTENDED << 1) 262#define REG_ICASE (REG_EXTENDED << 1)
239 263
240/* If this bit is set, then anchors do not match at newline 264/* If this bit is set, then anchors do not match at newline
241 characters in the string. 265 characters in the string.
242 If not set, then anchors do match at newlines. */ 266 If not set, then anchors do match at newlines. */
@@ -264,6 +288,10 @@ extern Lisp_Object re_match_object;
264 `re_error_msg' table in regex.c. */ 288 `re_error_msg' table in regex.c. */
265typedef enum 289typedef enum
266{ 290{
291#ifdef _XOPEN_SOURCE
292 REG_ENOSYS = -1, /* This will never happen for this implementation. */
293#endif
294
267 REG_NOERROR = 0, /* Success. */ 295 REG_NOERROR = 0, /* Success. */
268 REG_NOMATCH, /* Didn't find a match (for regexec). */ 296 REG_NOMATCH, /* Didn't find a match (for regexec). */
269 297
@@ -275,7 +303,7 @@ typedef enum
275 REG_EESCAPE, /* Trailing backslash. */ 303 REG_EESCAPE, /* Trailing backslash. */
276 REG_ESUBREG, /* Invalid back reference. */ 304 REG_ESUBREG, /* Invalid back reference. */
277 REG_EBRACK, /* Unmatched left bracket. */ 305 REG_EBRACK, /* Unmatched left bracket. */
278 REG_EPAREN, /* Parenthesis imbalance. */ 306 REG_EPAREN, /* Parenthesis imbalance. */
279 REG_EBRACE, /* Unmatched \{. */ 307 REG_EBRACE, /* Unmatched \{. */
280 REG_BADBR, /* Invalid contents of \{\}. */ 308 REG_BADBR, /* Invalid contents of \{\}. */
281 REG_ERANGE, /* Invalid range end. */ 309 REG_ERANGE, /* Invalid range end. */
@@ -295,7 +323,7 @@ typedef enum
295 private to the regex routines. */ 323 private to the regex routines. */
296 324
297#ifndef RE_TRANSLATE_TYPE 325#ifndef RE_TRANSLATE_TYPE
298#define RE_TRANSLATE_TYPE char * 326# define RE_TRANSLATE_TYPE char *
299#endif 327#endif
300 328
301struct re_pattern_buffer 329struct re_pattern_buffer
@@ -307,10 +335,10 @@ struct re_pattern_buffer
307 unsigned char *buffer; 335 unsigned char *buffer;
308 336
309 /* Number of bytes to which `buffer' points. */ 337 /* Number of bytes to which `buffer' points. */
310 unsigned long allocated; 338 unsigned long int allocated;
311 339
312 /* Number of bytes actually used in `buffer'. */ 340 /* Number of bytes actually used in `buffer'. */
313 unsigned long used; 341 unsigned long int used;
314 342
315 /* Syntax setting with which the pattern was compiled. */ 343 /* Syntax setting with which the pattern was compiled. */
316 reg_syntax_t syntax; 344 reg_syntax_t syntax;
@@ -353,7 +381,7 @@ struct re_pattern_buffer
353 unsigned no_sub : 1; 381 unsigned no_sub : 1;
354 382
355 /* If set, a beginning-of-line anchor doesn't match at the 383 /* If set, a beginning-of-line anchor doesn't match at the
356 beginning of the string. */ 384 beginning of the string. */
357 unsigned not_bol : 1; 385 unsigned not_bol : 1;
358 386
359 /* Similarly for an end-of-line anchor. */ 387 /* Similarly for an end-of-line anchor. */
@@ -391,7 +419,7 @@ struct re_registers
391 `re_match_2' returns information about at least this many registers 419 `re_match_2' returns information about at least this many registers
392 the first time a `regs' structure is passed. */ 420 the first time a `regs' structure is passed. */
393#ifndef RE_NREGS 421#ifndef RE_NREGS
394#define RE_NREGS 30 422# define RE_NREGS 30
395#endif 423#endif
396 424
397 425
@@ -414,11 +442,11 @@ typedef struct
414 442
415#if defined __STDC__ || defined PROTOTYPES 443#if defined __STDC__ || defined PROTOTYPES
416 444
417#define _RE_ARGS(args) args 445# define _RE_ARGS(args) args
418 446
419#else /* not __STDC__ || PROTOTYPES */ 447#else /* not __STDC__ || PROTOTYPES */
420 448
421#define _RE_ARGS(args) () 449# define _RE_ARGS(args) ()
422 450
423#endif /* not __STDC__ || PROTOTYPES */ 451#endif /* not __STDC__ || PROTOTYPES */
424 452
@@ -430,7 +458,7 @@ extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax));
430 and syntax given by the global `re_syntax_options', into the buffer 458 and syntax given by the global `re_syntax_options', into the buffer
431 BUFFER. Return NULL if successful, and an error string if not. */ 459 BUFFER. Return NULL if successful, and an error string if not. */
432extern const char *re_compile_pattern 460extern const char *re_compile_pattern
433 _RE_ARGS ((const char *pattern, int length, 461 _RE_ARGS ((const char *pattern, size_t length,
434 struct re_pattern_buffer *buffer)); 462 struct re_pattern_buffer *buffer));
435 463
436 464
@@ -466,7 +494,7 @@ extern int re_match
466 494
467 495
468/* Relates to `re_match' as `re_search_2' relates to `re_search'. */ 496/* Relates to `re_match' as `re_search_2' relates to `re_search'. */
469extern int re_match_2 497extern int re_match_2
470 _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1, 498 _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
471 int length1, const char *string2, int length2, 499 int length1, const char *string2, int length2,
472 int start, struct re_registers *regs, int stop)); 500 int start, struct re_registers *regs, int stop));
@@ -488,23 +516,51 @@ extern void re_set_registers
488 _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs, 516 _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs,
489 unsigned num_regs, regoff_t *starts, regoff_t *ends)); 517 unsigned num_regs, regoff_t *starts, regoff_t *ends));
490 518
491#ifdef _REGEX_RE_COMP 519#if defined _REGEX_RE_COMP || defined _LIBC
520# ifndef _CRAY
492/* 4.2 bsd compatibility. */ 521/* 4.2 bsd compatibility. */
493extern char *re_comp _RE_ARGS ((const char *)); 522extern char *re_comp _RE_ARGS ((const char *));
494extern int re_exec _RE_ARGS ((const char *)); 523extern int re_exec _RE_ARGS ((const char *));
524# endif
525#endif
526
527/* GCC 2.95 and later have "__restrict"; C99 compilers have
528 "restrict", and "configure" may have defined "restrict". */
529#ifndef __restrict
530# if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__))
531# if defined restrict || 199901L <= __STDC_VERSION__
532# define __restrict restrict
533# else
534# define __restrict
535# endif
536# endif
495#endif 537#endif
538/* For now unconditionally define __restrict_arr to expand to nothing.
539 Ideally we would have a test for the compiler which allows defining
540 it to restrict. */
541#define __restrict_arr
496 542
497/* POSIX compatibility. */ 543/* POSIX compatibility. */
498extern int regcomp _RE_ARGS ((regex_t *preg, const char *pattern, int cflags)); 544extern int regcomp _RE_ARGS ((regex_t *__restrict __preg,
499extern int regexec 545 const char *__restrict __pattern,
500 _RE_ARGS ((const regex_t *preg, const char *string, size_t nmatch, 546 int __cflags));
501 regmatch_t pmatch[], int eflags)); 547
502extern size_t regerror 548extern int regexec _RE_ARGS ((const regex_t *__restrict __preg,
503 _RE_ARGS ((int errcode, const regex_t *preg, char *errbuf, 549 const char *__restrict __string, size_t __nmatch,
504 size_t errbuf_size)); 550 regmatch_t __pmatch[__restrict_arr],
505extern void regfree _RE_ARGS ((regex_t *preg)); 551 int __eflags));
506 552
507#endif /* not __REGEXP_LIBRARY_H__ */ 553extern size_t regerror _RE_ARGS ((int __errcode, const regex_t *__preg,
554 char *__errbuf, size_t __errbuf_size));
555
556extern void regfree _RE_ARGS ((regex_t *__preg));
557
558
559#ifdef __cplusplus
560}
561#endif /* C++ */
562
563#endif /* regex.h */
508 564
509/* 565/*
510Local variables: 566Local variables: