aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/ChangeLog22
-rw-r--r--src/regex.c339
2 files changed, 219 insertions, 142 deletions
diff --git a/src/ChangeLog b/src/ChangeLog
index 2544112576b..2e3b494a8c9 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,25 @@
12000-08-30 Stefan Monnier <monnier@cs.yale.edu>
2
3 * regex.h (struct re_pattern_buffer): Use size_t for used/allocated.
4
5 * regex.c: Merge some changes from GNU libc. Add prototypes.
6 (bcopy, bcmp, REGEX_REALLOCATE, re_match_2_internal):
7 Use memcmp and memcpy instead of bcopy and bcmp.
8 (init_syntax_once): Use ISALNUM.
9 (PUSH_FAILURE_POINT, re_match_2_internal): Remove failure_id.
10 (REG_UNSET_VALUE): Remove. Use NULL instead.
11 (REG_UNSET, re_match_2_internal): Use NULL.
12 (SET_HIGH_BOUND, MOVE_BUFFER_POINTER, ELSE_EXTEND_BUFFER_HIGH_BOUND):
13 New macros.
14 (EXTEND_BUFFER): Use them (to work with BOUNDED_POINTERS).
15 (GET_UNSIGNED_NUMBER): Don't use ISDIGIT.
16 (regex_compile): In handle_interval, return an error rather than try to
17 unfetch the interval if we can't find the closing brace.
18 Obey the RE_NO_GNU_OPS syntax bit.
19 (TOLOWER): New macro.
20 (regcomp): Use it.
21 (regexec): Allocate regs.start and regs.end as one block.
22
12000-08-30 Gerd Moellmann <gerd@gnu.org> 232000-08-30 Gerd Moellmann <gerd@gnu.org>
2 24
3 * xdisp.c (echo_area_display): Check display_completed instead 25 * xdisp.c (echo_area_display): Check display_completed instead
diff --git a/src/regex.c b/src/regex.c
index c00c8d630e6..71c9dfe4507 100644
--- a/src/regex.c
+++ b/src/regex.c
@@ -2,7 +2,7 @@
2 0.12. (Implements POSIX draft P1003.2/D11.2, except for some of the 2 0.12. (Implements POSIX draft P1003.2/D11.2, except for some of the
3 internationalization features.) 3 internationalization features.)
4 4
5 Copyright (C) 1993,94,95,96,97,98,2000 Free Software Foundation, Inc. 5 Copyright (C) 1993,94,95,96,97,98,99,2000 Free Software Foundation, Inc.
6 6
7 This program is free software; you can redistribute it and/or modify 7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by 8 it under the terms of the GNU General Public License as published by
@@ -25,6 +25,7 @@
25 - replace succeed_n + jump_n with a combined operation so that the counter 25 - replace succeed_n + jump_n with a combined operation so that the counter
26 can simply be decremented when popping the failure_point without having 26 can simply be decremented when popping the failure_point without having
27 to stack up failure_count entries. 27 to stack up failure_count entries.
28 - get rid of `newline_anchor'.
28 */ 29 */
29 30
30/* AIX requires this to be the first thing in the file. */ 31/* AIX requires this to be the first thing in the file. */
@@ -39,10 +40,14 @@
39# include <config.h> 40# include <config.h>
40#endif 41#endif
41 42
42/* We need this for `regex.h', and perhaps for the Emacs include files. */ 43#if defined STDC_HEADERS && !defined emacs
43#include <sys/types.h> 44# include <stddef.h>
45#else
46/* We need this for `regex.h', and perhaps for the Emacs include files. */
47# include <sys/types.h>
48#endif
44 49
45/* This is for other GNU distributions with internationalized messages. */ 50/* This is for other GNU distributions with internationalized messages. */
46#if HAVE_LIBINTL_H || defined _LIBC 51#if HAVE_LIBINTL_H || defined _LIBC
47# include <libintl.h> 52# include <libintl.h>
48#else 53#else
@@ -115,7 +120,7 @@ char *realloc ();
115# endif 120# endif
116 121
117/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow. 122/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
118 If nothing else has been done, use the method below. */ 123 If nothing else has been done, use the method below. */
119# ifdef INHIBIT_STRING_HEADER 124# ifdef INHIBIT_STRING_HEADER
120# if !(defined HAVE_BZERO && defined HAVE_BCOPY) 125# if !(defined HAVE_BZERO && defined HAVE_BCOPY)
121# if !defined bzero && !defined bcopy 126# if !defined bzero && !defined bcopy
@@ -124,23 +129,27 @@ char *realloc ();
124# endif 129# endif
125# endif 130# endif
126 131
127/* This is the normal way of making sure we have a bcopy and a bzero. 132/* This is the normal way of making sure we have memcpy, memcmp and bzero.
128 This is used in most programs--a few other programs avoid this 133 This is used in most programs--a few other programs avoid this
129 by defining INHIBIT_STRING_HEADER. */ 134 by defining INHIBIT_STRING_HEADER. */
130# ifndef INHIBIT_STRING_HEADER 135# ifndef INHIBIT_STRING_HEADER
131# if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC 136# if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
132# include <string.h> 137# include <string.h>
133# ifndef bcmp
134# define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
135# endif
136# ifndef bcopy
137# define bcopy(s, d, n) memcpy ((d), (s), (n))
138# endif
139# ifndef bzero 138# ifndef bzero
140# define bzero(s, n) memset ((s), 0, (n)) 139# ifndef _LIBC
140# define bzero(s, n) (memset (s, '\0', n), (s))
141# else
142# define bzero(s, n) __bzero (s, n)
143# endif
141# endif 144# endif
142# else 145# else
143# include <strings.h> 146# include <strings.h>
147# ifndef memcmp
148# define memcmp(s1, s2, n) bcmp (s1, s2, n)
149# endif
150# ifndef memcpy
151# define memcpy(d, s, n) (bcopy (s, d, n), (d))
152# endif
144# endif 153# endif
145# endif 154# endif
146 155
@@ -155,8 +164,6 @@ enum syntaxcode { Swhitespace = 0, Sword = 1 };
155# define SWITCH_ENUM_CAST(x) (x) 164# define SWITCH_ENUM_CAST(x) (x)
156# endif 165# endif
157 166
158# define SYNTAX(c) re_syntax_table[c]
159
160/* Dummy macros for non-Emacs environments. */ 167/* Dummy macros for non-Emacs environments. */
161# define BASE_LEADING_CODE_P(c) (0) 168# define BASE_LEADING_CODE_P(c) (0)
162# define CHAR_CHARSET(c) 0 169# define CHAR_CHARSET(c) 0
@@ -235,8 +242,8 @@ enum syntaxcode { Swhitespace = 0, Sword = 1 };
235# define ISPUNCT(c) (IS_REAL_ASCII (c) \ 242# define ISPUNCT(c) (IS_REAL_ASCII (c) \
236 ? ((c) > ' ' && (c) < 0177 \ 243 ? ((c) > ' ' && (c) < 0177 \
237 && !(((c) >= 'a' && (c) <= 'z') \ 244 && !(((c) >= 'a' && (c) <= 'z') \
238 || ((c) >= 'A' && (c) <= 'Z') \ 245 || ((c) >= 'A' && (c) <= 'Z') \
239 || ((c) >= '0' && (c) <= '9'))) \ 246 || ((c) >= '0' && (c) <= '9'))) \
240 : SYNTAX (c) != Sword) 247 : SYNTAX (c) != Sword)
241 248
242# define ISSPACE(c) (SYNTAX (c) == Swhitespace) 249# define ISSPACE(c) (SYNTAX (c) == Swhitespace)
@@ -252,12 +259,14 @@ enum syntaxcode { Swhitespace = 0, Sword = 1 };
252 "... Some ctype macros are valid only for character codes that 259 "... Some ctype macros are valid only for character codes that
253 isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when 260 isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
254 using /bin/cc or gcc but without giving an ansi option). So, all 261 using /bin/cc or gcc but without giving an ansi option). So, all
255 ctype uses should be through macros like ISPRINT... If 262 ctype uses should be through macros like ISPRINT... If
256 STDC_HEADERS is defined, then autoconf has verified that the ctype 263 STDC_HEADERS is defined, then autoconf has verified that the ctype
257 macros don't need to be guarded with references to isascii. ... 264 macros don't need to be guarded with references to isascii. ...
258 Defining isascii to 1 should let any compiler worth its salt 265 Defining isascii to 1 should let any compiler worth its salt
259 eliminate the && through constant folding." */ 266 eliminate the && through constant folding."
267 Solaris defines some of these symbols so we must undefine them first. */
260 268
269# undef ISASCII
261# if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII) 270# if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
262# define ISASCII(c) 1 271# define ISASCII(c) 1
263# else 272# else
@@ -281,6 +290,7 @@ enum syntaxcode { Swhitespace = 0, Sword = 1 };
281# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c)) 290# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
282# endif 291# endif
283 292
293# undef ISPRINT
284# define ISPRINT(c) (ISASCII (c) && isprint (c)) 294# define ISPRINT(c) (ISASCII (c) && isprint (c))
285# define ISDIGIT(c) (ISASCII (c) && isdigit (c)) 295# define ISDIGIT(c) (ISASCII (c) && isdigit (c))
286# define ISALNUM(c) (ISASCII (c) && isalnum (c)) 296# define ISALNUM(c) (ISASCII (c) && isalnum (c))
@@ -294,15 +304,21 @@ enum syntaxcode { Swhitespace = 0, Sword = 1 };
294 304
295# define ISWORD(c) ISALPHA(c) 305# define ISWORD(c) ISALPHA(c)
296 306
307# ifdef _tolower
308# define TOLOWER(c) _tolower(c)
309# else
310# define TOLOWER(c) tolower(c)
311# endif
312
313/* How many characters in the character set. */
314# define CHAR_SET_SIZE 256
315
297# ifdef SYNTAX_TABLE 316# ifdef SYNTAX_TABLE
298 317
299extern char *re_syntax_table; 318extern char *re_syntax_table;
300 319
301# else /* not SYNTAX_TABLE */ 320# else /* not SYNTAX_TABLE */
302 321
303/* How many characters in the character set. */
304# define CHAR_SET_SIZE 256
305
306static char re_syntax_table[CHAR_SET_SIZE]; 322static char re_syntax_table[CHAR_SET_SIZE];
307 323
308static void 324static void
@@ -316,14 +332,9 @@ init_syntax_once ()
316 332
317 bzero (re_syntax_table, sizeof re_syntax_table); 333 bzero (re_syntax_table, sizeof re_syntax_table);
318 334
319 for (c = 'a'; c <= 'z'; c++) 335 for (c = 0; c < CHAR_SET_SIZE; ++c)
320 re_syntax_table[c] = Sword; 336 if (ISALNUM (c))
321 337 re_syntax_table[c] = Sword;
322 for (c = 'A'; c <= 'Z'; c++)
323 re_syntax_table[c] = Sword;
324
325 for (c = '0'; c <= '9'; c++)
326 re_syntax_table[c] = Sword;
327 338
328 re_syntax_table['_'] = Sword; 339 re_syntax_table['_'] = Sword;
329 340
@@ -332,6 +343,8 @@ init_syntax_once ()
332 343
333# endif /* not SYNTAX_TABLE */ 344# endif /* not SYNTAX_TABLE */
334 345
346# define SYNTAX(c) re_syntax_table[(c)]
347
335#endif /* not emacs */ 348#endif /* not emacs */
336 349
337#ifndef NULL 350#ifndef NULL
@@ -341,7 +354,7 @@ init_syntax_once ()
341/* We remove any previous definition of `SIGN_EXTEND_CHAR', 354/* We remove any previous definition of `SIGN_EXTEND_CHAR',
342 since ours (we hope) works properly with all combinations of 355 since ours (we hope) works properly with all combinations of
343 machines, compilers, `char' and `unsigned char' argument types. 356 machines, compilers, `char' and `unsigned char' argument types.
344 (Per Bothner suggested the basic approach.) */ 357 (Per Bothner suggested the basic approach.) */
345#undef SIGN_EXTEND_CHAR 358#undef SIGN_EXTEND_CHAR
346#if __STDC__ 359#if __STDC__
347# define SIGN_EXTEND_CHAR(c) ((signed char) (c)) 360# define SIGN_EXTEND_CHAR(c) ((signed char) (c))
@@ -387,8 +400,7 @@ init_syntax_once ()
387/* Assumes a `char *destination' variable. */ 400/* Assumes a `char *destination' variable. */
388# define REGEX_REALLOCATE(source, osize, nsize) \ 401# define REGEX_REALLOCATE(source, osize, nsize) \
389 (destination = (char *) alloca (nsize), \ 402 (destination = (char *) alloca (nsize), \
390 bcopy (source, destination, osize), \ 403 memcpy (destination, source, osize))
391 destination)
392 404
393/* No need to do anything to free, after alloca. */ 405/* No need to do anything to free, after alloca. */
394# define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */ 406# define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */
@@ -440,7 +452,7 @@ init_syntax_once ()
440 if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t) 452 if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
441#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t))) 453#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
442 454
443#define BYTEWIDTH 8 /* In bits. */ 455#define BYTEWIDTH 8 /* In bits. */
444 456
445#define STREQ(s1, s2) ((strcmp (s1, s2) == 0)) 457#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
446 458
@@ -456,10 +468,15 @@ typedef char boolean;
456#define false 0 468#define false 0
457#define true 1 469#define true 1
458 470
459static int re_match_2_internal (); 471static int re_match_2_internal _RE_ARGS ((struct re_pattern_buffer *bufp,
472 re_char *string1, int size1,
473 re_char *string2, int size2,
474 int pos,
475 struct re_registers *regs,
476 int stop));
460 477
461/* These are the command codes that appear in compiled regular 478/* These are the command codes that appear in compiled regular
462 expressions. Some opcodes are followed by argument bytes. A 479 expressions. Some opcodes are followed by argument bytes. A
463 command code can specify any interpretation whatsoever for its 480 command code can specify any interpretation whatsoever for its
464 arguments. Zero bytes may appear in the compiled regular expression. */ 481 arguments. Zero bytes may appear in the compiled regular expression. */
465 482
@@ -467,7 +484,7 @@ typedef enum
467{ 484{
468 no_op = 0, 485 no_op = 0,
469 486
470 /* Succeed right away--no more backtracking. */ 487 /* Succeed right away--no more backtracking. */
471 succeed, 488 succeed,
472 489
473 /* Followed by one byte giving n, then by n literal bytes. */ 490 /* Followed by one byte giving n, then by n literal bytes. */
@@ -493,7 +510,7 @@ typedef enum
493 charset, 510 charset,
494 511
495 /* Same parameters as charset, but match any character that is 512 /* Same parameters as charset, but match any character that is
496 not one of those specified. */ 513 not one of those specified. */
497 charset_not, 514 charset_not,
498 515
499 /* Start remembering the text that is matched, for storing in a 516 /* Start remembering the text that is matched, for storing in a
@@ -509,13 +526,13 @@ typedef enum
509 stop_memory, 526 stop_memory,
510 527
511 /* Match a duplicate of something remembered. Followed by one 528 /* Match a duplicate of something remembered. Followed by one
512 byte containing the register number. */ 529 byte containing the register number. */
513 duplicate, 530 duplicate,
514 531
515 /* Fail unless at beginning of line. */ 532 /* Fail unless at beginning of line. */
516 begline, 533 begline,
517 534
518 /* Fail unless at end of line. */ 535 /* Fail unless at end of line. */
519 endline, 536 endline,
520 537
521 /* Succeeds if at beginning of buffer (if emacs) or at beginning 538 /* Succeeds if at beginning of buffer (if emacs) or at beginning
@@ -630,6 +647,7 @@ typedef enum
630 } while (0) 647 } while (0)
631 648
632#ifdef DEBUG 649#ifdef DEBUG
650static void extract_number _RE_ARGS ((int *dest, re_char *source));
633static void 651static void
634extract_number (dest, source) 652extract_number (dest, source)
635 int *dest; 653 int *dest;
@@ -640,7 +658,7 @@ extract_number (dest, source)
640 *dest += temp << 8; 658 *dest += temp << 8;
641} 659}
642 660
643# ifndef EXTRACT_MACROS /* To debug the macros. */ 661# ifndef EXTRACT_MACROS /* To debug the macros. */
644# undef EXTRACT_NUMBER 662# undef EXTRACT_NUMBER
645# define EXTRACT_NUMBER(dest, src) extract_number (&dest, src) 663# define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
646# endif /* not EXTRACT_MACROS */ 664# endif /* not EXTRACT_MACROS */
@@ -657,6 +675,8 @@ extract_number (dest, source)
657 } while (0) 675 } while (0)
658 676
659#ifdef DEBUG 677#ifdef DEBUG
678static void extract_number_and_incr _RE_ARGS ((int *destination,
679 re_char **source));
660static void 680static void
661extract_number_and_incr (destination, source) 681extract_number_and_incr (destination, source)
662 int *destination; 682 int *destination;
@@ -771,7 +791,7 @@ extract_number_and_incr (destination, source)
771 it is doing (if the variable `debug' is nonzero). If linked with the 791 it is doing (if the variable `debug' is nonzero). If linked with the
772 main program in `iregex.c', you can enter patterns and strings 792 main program in `iregex.c', you can enter patterns and strings
773 interactively. And if linked with the main program in `main.c' and 793 interactively. And if linked with the main program in `main.c' and
774 the other test files, you can run the already-written tests. */ 794 the other test files, you can run the already-written tests. */
775 795
776#ifdef DEBUG 796#ifdef DEBUG
777 797
@@ -1076,7 +1096,8 @@ print_compiled_pattern (bufp)
1076 unsigned char *buffer = bufp->buffer; 1096 unsigned char *buffer = bufp->buffer;
1077 1097
1078 print_partial_compiled_pattern (buffer, buffer + bufp->used); 1098 print_partial_compiled_pattern (buffer, buffer + bufp->used);
1079 printf ("%ld bytes used/%ld bytes allocated.\n", bufp->used, bufp->allocated); 1099 printf ("%ld bytes used/%ld bytes allocated.\n",
1100 bufp->used, bufp->allocated);
1080 1101
1081 if (bufp->fastmap_accurate && bufp->fastmap) 1102 if (bufp->fastmap_accurate && bufp->fastmap)
1082 { 1103 {
@@ -1091,7 +1112,7 @@ print_compiled_pattern (bufp)
1091 printf ("no_sub: %d\t", bufp->no_sub); 1112 printf ("no_sub: %d\t", bufp->no_sub);
1092 printf ("not_bol: %d\t", bufp->not_bol); 1113 printf ("not_bol: %d\t", bufp->not_bol);
1093 printf ("not_eol: %d\t", bufp->not_eol); 1114 printf ("not_eol: %d\t", bufp->not_eol);
1094 printf ("syntax: %d\n", bufp->syntax); 1115 printf ("syntax: %lx\n", bufp->syntax);
1095 fflush (stdout); 1116 fflush (stdout);
1096 /* Perhaps we should print the translate table? */ 1117 /* Perhaps we should print the translate table? */
1097} 1118}
@@ -1105,7 +1126,7 @@ print_double_string (where, string1, size1, string2, size2)
1105 int size1; 1126 int size1;
1106 int size2; 1127 int size2;
1107{ 1128{
1108 unsigned this_char; 1129 int this_char;
1109 1130
1110 if (where == NULL) 1131 if (where == NULL)
1111 printf ("(null)"); 1132 printf ("(null)");
@@ -1152,7 +1173,7 @@ reg_syntax_t re_syntax_options;
1152 different, incompatible syntaxes. 1173 different, incompatible syntaxes.
1153 1174
1154 The argument SYNTAX is a bit mask comprised of the various bits 1175 The argument SYNTAX is a bit mask comprised of the various bits
1155 defined in regex.h. We return the old syntax. */ 1176 defined in regex.h. We return the old syntax. */
1156 1177
1157reg_syntax_t 1178reg_syntax_t
1158re_set_syntax (syntax) 1179re_set_syntax (syntax)
@@ -1165,9 +1186,9 @@ re_set_syntax (syntax)
1165} 1186}
1166 1187
1167/* This table gives an error message for each of the error codes listed 1188/* This table gives an error message for each of the error codes listed
1168 in regex.h. Obviously the order here has to be same as there. 1189 in regex.h. Obviously the order here has to be same as there.
1169 POSIX doesn't require that we do anything for REG_NOERROR, 1190 POSIX doesn't require that we do anything for REG_NOERROR,
1170 but why not be nice? */ 1191 but why not be nice? */
1171 1192
1172static const char *re_error_msgid[] = 1193static const char *re_error_msgid[] =
1173 { 1194 {
@@ -1190,7 +1211,7 @@ static const char *re_error_msgid[] =
1190 gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */ 1211 gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */
1191 }; 1212 };
1192 1213
1193/* Avoiding alloca during matching, to placate r_alloc. */ 1214/* Avoiding alloca during matching, to placate r_alloc. */
1194 1215
1195/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the 1216/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
1196 searching and matching functions should not call alloca. On some 1217 searching and matching functions should not call alloca. On some
@@ -1222,7 +1243,7 @@ static const char *re_error_msgid[] =
1222 and (2) it's not safe for them to use malloc. 1243 and (2) it's not safe for them to use malloc.
1223 Note that if REL_ALLOC is defined, matching would not use malloc for the 1244 Note that if REL_ALLOC is defined, matching would not use malloc for the
1224 failure stack, but we would still use it for the register vectors; 1245 failure stack, but we would still use it for the register vectors;
1225 so REL_ALLOC should not affect this. */ 1246 so REL_ALLOC should not affect this. */
1226#if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs 1247#if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs
1227# undef MATCH_MAY_ALLOCATE 1248# undef MATCH_MAY_ALLOCATE
1228#endif 1249#endif
@@ -1311,7 +1332,7 @@ typedef struct
1311 Return 1 if succeeds, and 0 if either ran out of memory 1332 Return 1 if succeeds, and 0 if either ran out of memory
1312 allocating space for it or it was already too large. 1333 allocating space for it or it was already too large.
1313 1334
1314 REGEX_REALLOCATE_STACK requires `destination' be declared. */ 1335 REGEX_REALLOCATE_STACK requires `destination' be declared. */
1315 1336
1316/* Factor to increase the failure stack size by 1337/* Factor to increase the failure stack size by
1317 when we increase it. 1338 when we increase it.
@@ -1355,19 +1376,19 @@ typedef struct
1355 1376
1356/* Push a pointer value onto the failure stack. 1377/* Push a pointer value onto the failure stack.
1357 Assumes the variable `fail_stack'. Probably should only 1378 Assumes the variable `fail_stack'. Probably should only
1358 be called from within `PUSH_FAILURE_POINT'. */ 1379 be called from within `PUSH_FAILURE_POINT'. */
1359#define PUSH_FAILURE_POINTER(item) \ 1380#define PUSH_FAILURE_POINTER(item) \
1360 fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item) 1381 fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
1361 1382
1362/* This pushes an integer-valued item onto the failure stack. 1383/* This pushes an integer-valued item onto the failure stack.
1363 Assumes the variable `fail_stack'. Probably should only 1384 Assumes the variable `fail_stack'. Probably should only
1364 be called from within `PUSH_FAILURE_POINT'. */ 1385 be called from within `PUSH_FAILURE_POINT'. */
1365#define PUSH_FAILURE_INT(item) \ 1386#define PUSH_FAILURE_INT(item) \
1366 fail_stack.stack[fail_stack.avail++].integer = (item) 1387 fail_stack.stack[fail_stack.avail++].integer = (item)
1367 1388
1368/* Push a fail_stack_elt_t value onto the failure stack. 1389/* Push a fail_stack_elt_t value onto the failure stack.
1369 Assumes the variable `fail_stack'. Probably should only 1390 Assumes the variable `fail_stack'. Probably should only
1370 be called from within `PUSH_FAILURE_POINT'. */ 1391 be called from within `PUSH_FAILURE_POINT'. */
1371#define PUSH_FAILURE_ELT(item) \ 1392#define PUSH_FAILURE_ELT(item) \
1372 fail_stack.stack[fail_stack.avail++] = (item) 1393 fail_stack.stack[fail_stack.avail++] = (item)
1373 1394
@@ -1474,9 +1495,8 @@ do { \
1474 /* Must be int, so when we don't save any registers, the arithmetic \ 1495 /* Must be int, so when we don't save any registers, the arithmetic \
1475 of 0 + -1 isn't done as unsigned. */ \ 1496 of 0 + -1 isn't done as unsigned. */ \
1476 \ 1497 \
1477 DEBUG_STATEMENT (failure_id++); \
1478 DEBUG_STATEMENT (nfailure_points_pushed++); \ 1498 DEBUG_STATEMENT (nfailure_points_pushed++); \
1479 DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \ 1499 DEBUG_PRINT1 ("\nPUSH_FAILURE_POINT:\n"); \
1480 DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail); \ 1500 DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail); \
1481 DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\ 1501 DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
1482 \ 1502 \
@@ -1557,27 +1577,29 @@ do { \
1557 1577
1558 1578
1559/* Registers are set to a sentinel when they haven't yet matched. */ 1579/* Registers are set to a sentinel when they haven't yet matched. */
1560#define REG_UNSET_VALUE NULL 1580#define REG_UNSET(e) ((e) == NULL)
1561#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
1562 1581
1563/* Subroutine declarations and macros for regex_compile. */ 1582/* Subroutine declarations and macros for regex_compile. */
1564 1583
1565static void store_op1 _RE_ARGS((re_opcode_t op, unsigned char *loc, int arg)); 1584static reg_errcode_t regex_compile _RE_ARGS ((re_char *pattern, size_t size,
1566static void store_op2 _RE_ARGS((re_opcode_t op, unsigned char *loc, 1585 reg_syntax_t syntax,
1567 int arg1, int arg2)); 1586 struct re_pattern_buffer *bufp));
1568static void insert_op1 _RE_ARGS((re_opcode_t op, unsigned char *loc, 1587static void store_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg));
1569 int arg, unsigned char *end)); 1588static void store_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1570static void insert_op2 _RE_ARGS((re_opcode_t op, unsigned char *loc, 1589 int arg1, int arg2));
1571 int arg1, int arg2, unsigned char *end)); 1590static void insert_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1572static boolean at_begline_loc_p _RE_ARGS((const unsigned char *pattern, 1591 int arg, unsigned char *end));
1573 const unsigned char *p, 1592static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1574 reg_syntax_t syntax)); 1593 int arg1, int arg2, unsigned char *end));
1575static boolean at_endline_loc_p _RE_ARGS((const unsigned char *p, 1594static boolean at_begline_loc_p _RE_ARGS ((const unsigned char *pattern,
1576 const unsigned char *pend, 1595 const unsigned char *p,
1577 reg_syntax_t syntax)); 1596 reg_syntax_t syntax));
1578static unsigned char *skip_one_char _RE_ARGS((unsigned char *p)); 1597static boolean at_endline_loc_p _RE_ARGS ((const unsigned char *p,
1579static int analyse_first _RE_ARGS((unsigned char *p, unsigned char *pend, 1598 const unsigned char *pend,
1580 char *fastmap, const int multibyte)); 1599 reg_syntax_t syntax));
1600static unsigned char *skip_one_char _RE_ARGS ((unsigned char *p));
1601static int analyse_first _RE_ARGS ((unsigned char *p, unsigned char *pend,
1602 char *fastmap, const int multibyte));
1581 1603
1582/* Fetch the next character in the uncompiled pattern---translating it 1604/* Fetch the next character in the uncompiled pattern---translating it
1583 if necessary. Also cast from a signed character in the constant 1605 if necessary. Also cast from a signed character in the constant
@@ -1590,7 +1612,7 @@ static int analyse_first _RE_ARGS((unsigned char *p, unsigned char *pend,
1590 } while (0) 1612 } while (0)
1591 1613
1592/* Fetch the next character in the uncompiled pattern, with no 1614/* Fetch the next character in the uncompiled pattern, with no
1593 translation. */ 1615 translation. */
1594#define PATFETCH_RAW(c) \ 1616#define PATFETCH_RAW(c) \
1595 do { \ 1617 do { \
1596 int len; \ 1618 int len; \
@@ -1615,9 +1637,9 @@ static int analyse_first _RE_ARGS((unsigned char *p, unsigned char *pend,
1615/* If the buffer isn't allocated when it comes in, use this. */ 1637/* If the buffer isn't allocated when it comes in, use this. */
1616#define INIT_BUF_SIZE 32 1638#define INIT_BUF_SIZE 32
1617 1639
1618/* Make sure we have at least N more bytes of space in buffer. */ 1640/* Make sure we have at least N more bytes of space in buffer. */
1619#define GET_BUFFER_SPACE(n) \ 1641#define GET_BUFFER_SPACE(n) \
1620 while (b - bufp->buffer + (n) > bufp->allocated) \ 1642 while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated) \
1621 EXTEND_BUFFER () 1643 EXTEND_BUFFER ()
1622 1644
1623/* Make sure we have one more byte of buffer space and then add C to it. */ 1645/* Make sure we have one more byte of buffer space and then add C to it. */
@@ -1637,7 +1659,7 @@ static int analyse_first _RE_ARGS((unsigned char *p, unsigned char *pend,
1637 } while (0) 1659 } while (0)
1638 1660
1639 1661
1640/* As with BUF_PUSH_2, except for three bytes. */ 1662/* As with BUF_PUSH_2, except for three bytes. */
1641#define BUF_PUSH_3(c1, c2, c3) \ 1663#define BUF_PUSH_3(c1, c2, c3) \
1642 do { \ 1664 do { \
1643 GET_BUFFER_SPACE (3); \ 1665 GET_BUFFER_SPACE (3); \
@@ -1648,7 +1670,7 @@ static int analyse_first _RE_ARGS((unsigned char *p, unsigned char *pend,
1648 1670
1649 1671
1650/* Store a jump with opcode OP at LOC to location TO. We store a 1672/* Store a jump with opcode OP at LOC to location TO. We store a
1651 relative address offset by the three bytes the jump itself occupies. */ 1673 relative address offset by the three bytes the jump itself occupies. */
1652#define STORE_JUMP(op, loc, to) \ 1674#define STORE_JUMP(op, loc, to) \
1653 store_op1 (op, loc, (to) - (loc) - 3) 1675 store_op1 (op, loc, (to) - (loc) - 3)
1654 1676
@@ -1656,7 +1678,7 @@ static int analyse_first _RE_ARGS((unsigned char *p, unsigned char *pend,
1656#define STORE_JUMP2(op, loc, to, arg) \ 1678#define STORE_JUMP2(op, loc, to, arg) \
1657 store_op2 (op, loc, (to) - (loc) - 3, arg) 1679 store_op2 (op, loc, (to) - (loc) - 3, arg)
1658 1680
1659/* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */ 1681/* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */
1660#define INSERT_JUMP(op, loc, to) \ 1682#define INSERT_JUMP(op, loc, to) \
1661 insert_op1 (op, loc, (to) - (loc) - 3, b) 1683 insert_op1 (op, loc, (to) - (loc) - 3, b)
1662 1684
@@ -1666,15 +1688,44 @@ static int analyse_first _RE_ARGS((unsigned char *p, unsigned char *pend,
1666 1688
1667 1689
1668/* This is not an arbitrary limit: the arguments which represent offsets 1690/* This is not an arbitrary limit: the arguments which represent offsets
1669 into the pattern are two bytes long. So if 2^16 bytes turns out to 1691 into the pattern are two bytes long. So if 2^16 bytes turns out to
1670 be too small, many things would have to change. */ 1692 be too small, many things would have to change. */
1671#define MAX_BUF_SIZE (1L << 16) 1693/* Any other compiler which, like MSC, has allocation limit below 2^16
1672 1694 bytes will have to use approach similar to what was done below for
1695 MSC and drop MAX_BUF_SIZE a bit. Otherwise you may end up
1696 reallocating to 0 bytes. Such thing is not going to work too well.
1697 You have been warned!! */
1698#if defined _MSC_VER && !defined WIN32
1699/* Microsoft C 16-bit versions limit malloc to approx 65512 bytes. */
1700# define MAX_BUF_SIZE 65500L
1701#else
1702# define MAX_BUF_SIZE (1L << 16)
1703#endif
1673 1704
1674/* Extend the buffer by twice its current size via realloc and 1705/* Extend the buffer by twice its current size via realloc and
1675 reset the pointers that pointed into the old block to point to the 1706 reset the pointers that pointed into the old block to point to the
1676 correct places in the new one. If extending the buffer results in it 1707 correct places in the new one. If extending the buffer results in it
1677 being larger than MAX_BUF_SIZE, then flag memory exhausted. */ 1708 being larger than MAX_BUF_SIZE, then flag memory exhausted. */
1709#if __BOUNDED_POINTERS__
1710# define SET_HIGH_BOUND(P) (__ptrhigh (P) = __ptrlow (P) + bufp->allocated)
1711# define MOVE_BUFFER_POINTER(P) \
1712 (__ptrlow (P) += incr, SET_HIGH_BOUND (P), __ptrvalue (P) += incr)
1713# define ELSE_EXTEND_BUFFER_HIGH_BOUND \
1714 else \
1715 { \
1716 SET_HIGH_BOUND (b); \
1717 SET_HIGH_BOUND (begalt); \
1718 if (fixup_alt_jump) \
1719 SET_HIGH_BOUND (fixup_alt_jump); \
1720 if (laststart) \
1721 SET_HIGH_BOUND (laststart); \
1722 if (pending_exact) \
1723 SET_HIGH_BOUND (pending_exact); \
1724 }
1725#else
1726# define MOVE_BUFFER_POINTER(P) (P) += incr
1727# define ELSE_EXTEND_BUFFER_HIGH_BOUND
1728#endif
1678#define EXTEND_BUFFER() \ 1729#define EXTEND_BUFFER() \
1679 do { \ 1730 do { \
1680 unsigned char *old_buffer = bufp->buffer; \ 1731 unsigned char *old_buffer = bufp->buffer; \
@@ -1689,15 +1740,17 @@ static int analyse_first _RE_ARGS((unsigned char *p, unsigned char *pend,
1689 /* If the buffer moved, move all the pointers into it. */ \ 1740 /* If the buffer moved, move all the pointers into it. */ \
1690 if (old_buffer != bufp->buffer) \ 1741 if (old_buffer != bufp->buffer) \
1691 { \ 1742 { \
1692 b = (b - old_buffer) + bufp->buffer; \ 1743 int incr = bufp->buffer - old_buffer; \
1693 begalt = (begalt - old_buffer) + bufp->buffer; \ 1744 MOVE_BUFFER_POINTER (b); \
1745 MOVE_BUFFER_POINTER (begalt); \
1694 if (fixup_alt_jump) \ 1746 if (fixup_alt_jump) \
1695 fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\ 1747 MOVE_BUFFER_POINTER (fixup_alt_jump); \
1696 if (laststart) \ 1748 if (laststart) \
1697 laststart = (laststart - old_buffer) + bufp->buffer; \ 1749 MOVE_BUFFER_POINTER (laststart); \
1698 if (pending_exact) \ 1750 if (pending_exact) \
1699 pending_exact = (pending_exact - old_buffer) + bufp->buffer; \ 1751 MOVE_BUFFER_POINTER (pending_exact); \
1700 } \ 1752 } \
1753 ELSE_EXTEND_BUFFER_HIGH_BOUND \
1701 } while (0) 1754 } while (0)
1702 1755
1703 1756
@@ -1714,8 +1767,9 @@ typedef unsigned regnum_t;
1714/* Macros for the compile stack. */ 1767/* Macros for the compile stack. */
1715 1768
1716/* Since offsets can go either forwards or backwards, this type needs to 1769/* Since offsets can go either forwards or backwards, this type needs to
1717 be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */ 1770 be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */
1718typedef int pattern_offset_t; 1771/* int may be not enough when sizeof(int) == 2. */
1772typedef long pattern_offset_t;
1719 1773
1720typedef struct 1774typedef struct
1721{ 1775{
@@ -1739,7 +1793,7 @@ typedef struct
1739#define COMPILE_STACK_EMPTY (compile_stack.avail == 0) 1793#define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
1740#define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size) 1794#define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
1741 1795
1742/* The next available element. */ 1796/* The next available element. */
1743#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) 1797#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1744 1798
1745 1799
@@ -1810,8 +1864,8 @@ struct range_table_work_area
1810 1864
1811 1865
1812/* Set the bit for character C in a list. */ 1866/* Set the bit for character C in a list. */
1813#define SET_LIST_BIT(c) \ 1867#define SET_LIST_BIT(c) \
1814 (b[((unsigned char) (c)) / BYTEWIDTH] \ 1868 (b[((unsigned char) (c)) / BYTEWIDTH] \
1815 |= 1 << (((unsigned char) c) % BYTEWIDTH)) 1869 |= 1 << (((unsigned char) c) % BYTEWIDTH))
1816 1870
1817 1871
@@ -1820,7 +1874,7 @@ struct range_table_work_area
1820 do { if (p != pend) \ 1874 do { if (p != pend) \
1821 { \ 1875 { \
1822 PATFETCH (c); \ 1876 PATFETCH (c); \
1823 while (ISDIGIT (c)) \ 1877 while ('0' <= c && c <= '9') \
1824 { \ 1878 { \
1825 if (num < 0) \ 1879 if (num < 0) \
1826 num = 0; \ 1880 num = 0; \
@@ -1846,7 +1900,7 @@ struct range_table_work_area
1846 || STREQ (string, "unibyte") || STREQ (string, "multibyte")) 1900 || STREQ (string, "unibyte") || STREQ (string, "multibyte"))
1847 1901
1848/* QUIT is only used on NTemacs. */ 1902/* QUIT is only used on NTemacs. */
1849#if !defined WINDOWSNT || !defined emacs 1903#if !defined WINDOWSNT || !defined emacs || !defined QUIT
1850# undef QUIT 1904# undef QUIT
1851# define QUIT 1905# define QUIT
1852#endif 1906#endif
@@ -1864,14 +1918,14 @@ static fail_stack_type fail_stack;
1864 1918
1865/* Size with which the following vectors are currently allocated. 1919/* Size with which the following vectors are currently allocated.
1866 That is so we can make them bigger as needed, 1920 That is so we can make them bigger as needed,
1867 but never make them smaller. */ 1921 but never make them smaller. */
1868static int regs_allocated_size; 1922static int regs_allocated_size;
1869 1923
1870static re_char ** regstart, ** regend; 1924static re_char ** regstart, ** regend;
1871static re_char **best_regstart, **best_regend; 1925static re_char **best_regstart, **best_regend;
1872 1926
1873/* Make the register vectors big enough for NUM_REGS registers, 1927/* Make the register vectors big enough for NUM_REGS registers,
1874 but don't make them smaller. */ 1928 but don't make them smaller. */
1875 1929
1876static 1930static
1877regex_grow_registers (num_regs) 1931regex_grow_registers (num_regs)
@@ -1932,7 +1986,7 @@ do { \
1932static reg_errcode_t 1986static reg_errcode_t
1933regex_compile (pattern, size, syntax, bufp) 1987regex_compile (pattern, size, syntax, bufp)
1934 re_char *pattern; 1988 re_char *pattern;
1935 int size; 1989 size_t size;
1936 reg_syntax_t syntax; 1990 reg_syntax_t syntax;
1937 struct re_pattern_buffer *bufp; 1991 struct re_pattern_buffer *bufp;
1938{ 1992{
@@ -2758,9 +2812,7 @@ regex_compile (pattern, size, syntax, bufp)
2758 if (!(syntax & RE_INTERVALS) 2812 if (!(syntax & RE_INTERVALS)
2759 /* If we're at `\{' and it's not the open-interval 2813 /* If we're at `\{' and it's not the open-interval
2760 operator. */ 2814 operator. */
2761 || (syntax & RE_NO_BK_BRACES) 2815 || (syntax & RE_NO_BK_BRACES))
2762 /* What is that? -sm */
2763 /* || (p - 2 == pattern && p == pend) */)
2764 goto normal_backslash; 2816 goto normal_backslash;
2765 2817
2766 handle_interval: 2818 handle_interval:
@@ -2773,12 +2825,7 @@ regex_compile (pattern, size, syntax, bufp)
2773 beg_interval = p; 2825 beg_interval = p;
2774 2826
2775 if (p == pend) 2827 if (p == pend)
2776 { 2828 FREE_STACK_RETURN (REG_EBRACE);
2777 if (syntax & RE_NO_BK_BRACES)
2778 goto unfetch_interval;
2779 else
2780 FREE_STACK_RETURN (REG_EBRACE);
2781 }
2782 2829
2783 GET_UNSIGNED_NUMBER (lower_bound); 2830 GET_UNSIGNED_NUMBER (lower_bound);
2784 2831
@@ -2790,27 +2837,18 @@ regex_compile (pattern, size, syntax, bufp)
2790 2837
2791 if (lower_bound < 0 || upper_bound > RE_DUP_MAX 2838 if (lower_bound < 0 || upper_bound > RE_DUP_MAX
2792 || (upper_bound >= 0 && lower_bound > upper_bound)) 2839 || (upper_bound >= 0 && lower_bound > upper_bound))
2793 { 2840 FREE_STACK_RETURN (REG_BADBR);
2794 if (syntax & RE_NO_BK_BRACES)
2795 goto unfetch_interval;
2796 else
2797 FREE_STACK_RETURN (REG_BADBR);
2798 }
2799 2841
2800 if (!(syntax & RE_NO_BK_BRACES)) 2842 if (!(syntax & RE_NO_BK_BRACES))
2801 { 2843 {
2802 if (c != '\\') FREE_STACK_RETURN (REG_EBRACE); 2844 if (c != '\\')
2845 FREE_STACK_RETURN (REG_BADBR);
2803 2846
2804 PATFETCH (c); 2847 PATFETCH (c);
2805 } 2848 }
2806 2849
2807 if (c != '}') 2850 if (c != '}')
2808 { 2851 FREE_STACK_RETURN (REG_BADBR);
2809 if (syntax & RE_NO_BK_BRACES)
2810 goto unfetch_interval;
2811 else
2812 FREE_STACK_RETURN (REG_BADBR);
2813 }
2814 2852
2815 /* We just parsed a valid interval. */ 2853 /* We just parsed a valid interval. */
2816 2854
@@ -2974,38 +3012,54 @@ regex_compile (pattern, size, syntax, bufp)
2974 3012
2975 3013
2976 case 'w': 3014 case 'w':
3015 if (syntax & RE_NO_GNU_OPS)
3016 goto normal_char;
2977 laststart = b; 3017 laststart = b;
2978 BUF_PUSH_2 (syntaxspec, Sword); 3018 BUF_PUSH_2 (syntaxspec, Sword);
2979 break; 3019 break;
2980 3020
2981 3021
2982 case 'W': 3022 case 'W':
3023 if (syntax & RE_NO_GNU_OPS)
3024 goto normal_char;
2983 laststart = b; 3025 laststart = b;
2984 BUF_PUSH_2 (notsyntaxspec, Sword); 3026 BUF_PUSH_2 (notsyntaxspec, Sword);
2985 break; 3027 break;
2986 3028
2987 3029
2988 case '<': 3030 case '<':
3031 if (syntax & RE_NO_GNU_OPS)
3032 goto normal_char;
2989 BUF_PUSH (wordbeg); 3033 BUF_PUSH (wordbeg);
2990 break; 3034 break;
2991 3035
2992 case '>': 3036 case '>':
3037 if (syntax & RE_NO_GNU_OPS)
3038 goto normal_char;
2993 BUF_PUSH (wordend); 3039 BUF_PUSH (wordend);
2994 break; 3040 break;
2995 3041
2996 case 'b': 3042 case 'b':
3043 if (syntax & RE_NO_GNU_OPS)
3044 goto normal_char;
2997 BUF_PUSH (wordbound); 3045 BUF_PUSH (wordbound);
2998 break; 3046 break;
2999 3047
3000 case 'B': 3048 case 'B':
3049 if (syntax & RE_NO_GNU_OPS)
3050 goto normal_char;
3001 BUF_PUSH (notwordbound); 3051 BUF_PUSH (notwordbound);
3002 break; 3052 break;
3003 3053
3004 case '`': 3054 case '`':
3055 if (syntax & RE_NO_GNU_OPS)
3056 goto normal_char;
3005 BUF_PUSH (begbuf); 3057 BUF_PUSH (begbuf);
3006 break; 3058 break;
3007 3059
3008 case '\'': 3060 case '\'':
3061 if (syntax & RE_NO_GNU_OPS)
3062 goto normal_char;
3009 BUF_PUSH (endbuf); 3063 BUF_PUSH (endbuf);
3010 break; 3064 break;
3011 3065
@@ -3020,7 +3074,7 @@ regex_compile (pattern, size, syntax, bufp)
3020 FREE_STACK_RETURN (REG_ESUBREG); 3074 FREE_STACK_RETURN (REG_ESUBREG);
3021 3075
3022 /* Can't back reference to a subexpression if inside of it. */ 3076 /* Can't back reference to a subexpression if inside of it. */
3023 if (group_in_compile_stack (compile_stack, c1)) 3077 if (group_in_compile_stack (compile_stack, (regnum_t) c1))
3024 goto normal_char; 3078 goto normal_char;
3025 3079
3026 laststart = b; 3080 laststart = b;
@@ -3039,7 +3093,7 @@ regex_compile (pattern, size, syntax, bufp)
3039 normal_backslash: 3093 normal_backslash:
3040 /* You might think it would be useful for \ to mean 3094 /* You might think it would be useful for \ to mean
3041 not to translate; but if we don't translate it 3095 not to translate; but if we don't translate it
3042 it will never match anything. */ 3096 it will never match anything. */
3043 c = TRANSLATE (c); 3097 c = TRANSLATE (c);
3044 goto normal_char; 3098 goto normal_char;
3045 } 3099 }
@@ -3317,7 +3371,7 @@ analyse_first (p, pend, fastmap, multibyte)
3317#endif 3371#endif
3318 3372
3319 /* Assume that each path through the pattern can be null until 3373 /* Assume that each path through the pattern can be null until
3320 proven otherwise. We set this false at the bottom of switch 3374 proven otherwise. We set this false at the bottom of switch
3321 statement, to which we get only if a particular path doesn't 3375 statement, to which we get only if a particular path doesn't
3322 match the empty string. */ 3376 match the empty string. */
3323 boolean path_can_be_null = true; 3377 boolean path_can_be_null = true;
@@ -3980,7 +4034,7 @@ static int bcmp_translate _RE_ARGS((re_char *s1, re_char *s2,
3980 /* End of string2 => fail. */ \ 4034 /* End of string2 => fail. */ \
3981 if (dend == end_match_2) \ 4035 if (dend == end_match_2) \
3982 goto fail; \ 4036 goto fail; \
3983 /* End of string1 => advance to string2. */ \ 4037 /* End of string1 => advance to string2. */ \
3984 d = string2; \ 4038 d = string2; \
3985 dend = end_match_2; \ 4039 dend = end_match_2; \
3986 } 4040 }
@@ -4316,7 +4370,7 @@ re_match (bufp, string, size, pos, regs)
4316 int size, pos; 4370 int size, pos;
4317 struct re_registers *regs; 4371 struct re_registers *regs;
4318{ 4372{
4319 int result = re_match_2_internal (bufp, NULL, 0, string, size, 4373 int result = re_match_2_internal (bufp, NULL, 0, (re_char*) string, size,
4320 pos, regs, size); 4374 pos, regs, size);
4321# if defined C_ALLOCA && !defined REGEX_MALLOC 4375# if defined C_ALLOCA && !defined REGEX_MALLOC
4322 alloca (0); 4376 alloca (0);
@@ -4362,7 +4416,8 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
4362 SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1); 4416 SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
4363#endif 4417#endif
4364 4418
4365 result = re_match_2_internal (bufp, string1, size1, string2, size2, 4419 result = re_match_2_internal (bufp, (re_char*) string1, size1,
4420 (re_char*) string2, size2,
4366 pos, regs, stop); 4421 pos, regs, stop);
4367#if defined C_ALLOCA && !defined REGEX_MALLOC 4422#if defined C_ALLOCA && !defined REGEX_MALLOC
4368 alloca (0); 4423 alloca (0);
@@ -4423,7 +4478,6 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4423 fail_stack_type fail_stack; 4478 fail_stack_type fail_stack;
4424#endif 4479#endif
4425#ifdef DEBUG 4480#ifdef DEBUG
4426 static unsigned failure_id = 0;
4427 unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0; 4481 unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
4428#endif 4482#endif
4429 4483
@@ -4436,7 +4490,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4436 /* We fill all the registers internally, independent of what we 4490 /* We fill all the registers internally, independent of what we
4437 return, for use in backreferences. The number here includes 4491 return, for use in backreferences. The number here includes
4438 an element for register zero. */ 4492 an element for register zero. */
4439 unsigned num_regs = bufp->re_nsub + 1; 4493 size_t num_regs = bufp->re_nsub + 1;
4440 4494
4441 /* Information on the contents of registers. These are pointers into 4495 /* Information on the contents of registers. These are pointers into
4442 the input strings; they record just what was matched (on this 4496 the input strings; they record just what was matched (on this
@@ -4515,7 +4569,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4515 start_memory/stop_memory has been seen for. Also initialize the 4569 start_memory/stop_memory has been seen for. Also initialize the
4516 register information struct. */ 4570 register information struct. */
4517 for (mcnt = 1; mcnt < num_regs; mcnt++) 4571 for (mcnt = 1; mcnt < num_regs; mcnt++)
4518 regstart[mcnt] = regend[mcnt] = REG_UNSET_VALUE; 4572 regstart[mcnt] = regend[mcnt] = NULL;
4519 4573
4520 /* We move `string1' into `string2' if the latter's empty -- but not if 4574 /* We move `string1' into `string2' if the latter's empty -- but not if
4521 `string1' is null. */ 4575 `string1' is null. */
@@ -4930,7 +4984,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4930 PUSH_FAILURE_REG ((unsigned int)*p); 4984 PUSH_FAILURE_REG ((unsigned int)*p);
4931 4985
4932 regstart[*p] = d; 4986 regstart[*p] = d;
4933 regend[*p] = REG_UNSET_VALUE; /* probably unnecessary. -sm */ 4987 regend[*p] = NULL; /* probably unnecessary. -sm */
4934 DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p])); 4988 DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
4935 4989
4936 /* Move past the register number and inner group count. */ 4990 /* Move past the register number and inner group count. */
@@ -5023,7 +5077,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5023 past them. */ 5077 past them. */
5024 if (RE_TRANSLATE_P (translate) 5078 if (RE_TRANSLATE_P (translate)
5025 ? bcmp_translate (d, d2, mcnt, translate, multibyte) 5079 ? bcmp_translate (d, d2, mcnt, translate, multibyte)
5026 : bcmp (d, d2, mcnt)) 5080 : memcmp (d, d2, mcnt))
5027 { 5081 {
5028 d = dfail; 5082 d = dfail;
5029 goto fail; 5083 goto fail;
@@ -5237,8 +5291,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5237 mcnt--; 5291 mcnt--;
5238 p += 2; 5292 p += 2;
5239 PUSH_FAILURE_COUNT (p); 5293 PUSH_FAILURE_COUNT (p);
5240 STORE_NUMBER_AND_INCR (p, mcnt);
5241 DEBUG_PRINT3 (" Setting %p to %d.\n", p, mcnt); 5294 DEBUG_PRINT3 (" Setting %p to %d.\n", p, mcnt);
5295 STORE_NUMBER_AND_INCR (p, mcnt);
5242 } 5296 }
5243 else 5297 else
5244 /* The two bytes encoding mcnt == 0 are two no_op opcodes. */ 5298 /* The two bytes encoding mcnt == 0 are two no_op opcodes. */
@@ -5540,7 +5594,9 @@ bcmp_translate (s1, s2, len, translate, multibyte)
5540 re_char *p1_end = s1 + len; 5594 re_char *p1_end = s1 + len;
5541 re_char *p2_end = s2 + len; 5595 re_char *p2_end = s2 + len;
5542 5596
5543 while (p1 != p1_end && p2 != p2_end) 5597 /* FIXME: Checking both p1 and p2 presumes that the two strings might have
5598 different lengths, but relying on a single `len' would break this. -sm */
5599 while (p1 < p1_end && p2 < p2_end)
5544 { 5600 {
5545 int p1_charlen, p2_charlen; 5601 int p1_charlen, p2_charlen;
5546 int p1_ch, p2_ch; 5602 int p1_ch, p2_ch;
@@ -5592,7 +5648,7 @@ re_compile_pattern (pattern, length, bufp)
5592 /* Match anchors at newline. */ 5648 /* Match anchors at newline. */
5593 bufp->newline_anchor = 1; 5649 bufp->newline_anchor = 1;
5594 5650
5595 ret = regex_compile (pattern, length, re_syntax_options, bufp); 5651 ret = regex_compile ((re_char*) pattern, length, re_syntax_options, bufp);
5596 5652
5597 if (!ret) 5653 if (!ret)
5598 return NULL; 5654 return NULL;
@@ -5715,7 +5771,7 @@ regcomp (preg, pattern, cflags)
5715 int cflags; 5771 int cflags;
5716{ 5772{
5717 reg_errcode_t ret; 5773 reg_errcode_t ret;
5718 unsigned syntax 5774 reg_syntax_t syntax
5719 = (cflags & REG_EXTENDED) ? 5775 = (cflags & REG_EXTENDED) ?
5720 RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC; 5776 RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
5721 5777
@@ -5742,7 +5798,7 @@ regcomp (preg, pattern, cflags)
5742 5798
5743 /* Map uppercase characters to corresponding lowercase ones. */ 5799 /* Map uppercase characters to corresponding lowercase ones. */
5744 for (i = 0; i < CHAR_SET_SIZE; i++) 5800 for (i = 0; i < CHAR_SET_SIZE; i++)
5745 preg->translate[i] = ISUPPER (i) ? tolower (i) : i; 5801 preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i;
5746 } 5802 }
5747 else 5803 else
5748 preg->translate = NULL; 5804 preg->translate = NULL;
@@ -5762,7 +5818,7 @@ regcomp (preg, pattern, cflags)
5762 5818
5763 /* POSIX says a null character in the pattern terminates it, so we 5819 /* POSIX says a null character in the pattern terminates it, so we
5764 can use strlen here in compiling the pattern. */ 5820 can use strlen here in compiling the pattern. */
5765 ret = regex_compile (pattern, strlen (pattern), syntax, preg); 5821 ret = regex_compile ((re_char*) pattern, strlen (pattern), syntax, preg);
5766 5822
5767 /* POSIX doesn't distinguish between an unmatched open-group and an 5823 /* POSIX doesn't distinguish between an unmatched open-group and an
5768 unmatched close-group: both are REG_EPAREN. */ 5824 unmatched close-group: both are REG_EPAREN. */
@@ -5813,10 +5869,10 @@ regexec (preg, string, nmatch, pmatch, eflags)
5813 if (want_reg_info) 5869 if (want_reg_info)
5814 { 5870 {
5815 regs.num_regs = nmatch; 5871 regs.num_regs = nmatch;
5816 regs.start = TALLOC (nmatch, regoff_t); 5872 regs.start = TALLOC (nmatch * 2, regoff_t);
5817 regs.end = TALLOC (nmatch, regoff_t); 5873 if (regs.start == NULL)
5818 if (regs.start == NULL || regs.end == NULL)
5819 return (int) REG_NOMATCH; 5874 return (int) REG_NOMATCH;
5875 regs.end = regs.start + nmatch;
5820 } 5876 }
5821 5877
5822 /* Perform the searching operation. */ 5878 /* Perform the searching operation. */
@@ -5840,7 +5896,6 @@ regexec (preg, string, nmatch, pmatch, eflags)
5840 5896
5841 /* If we needed the temporary register info, free the space now. */ 5897 /* If we needed the temporary register info, free the space now. */
5842 free (regs.start); 5898 free (regs.start);
5843 free (regs.end);
5844 } 5899 }
5845 5900
5846 /* We want zero return to mean success, unlike `re_search'. */ 5901 /* We want zero return to mean success, unlike `re_search'. */