aboutsummaryrefslogtreecommitdiffstats
path: root/src/regex.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/regex.c')
-rw-r--r--src/regex.c157
1 files changed, 62 insertions, 95 deletions
diff --git a/src/regex.c b/src/regex.c
index 479239897bc..0f9150193ec 100644
--- a/src/regex.c
+++ b/src/regex.c
@@ -2,7 +2,7 @@
2 0.12. (Implements POSIX draft P1003.2/D11.2, except for some of the 2 0.12. (Implements POSIX draft P1003.2/D11.2, except for some of the
3 internationalization features.) 3 internationalization features.)
4 4
5 Copyright (C) 1993-2011 Free Software Foundation, Inc. 5 Copyright (C) 1993-2012 Free Software Foundation, Inc.
6 6
7 This program is free software; you can redistribute it and/or modify 7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by 8 it under the terms of the GNU General Public License as published by
@@ -37,9 +37,9 @@
37# include <config.h> 37# include <config.h>
38#endif 38#endif
39 39
40#if defined STDC_HEADERS && !defined emacs 40#include <stddef.h>
41# include <stddef.h> 41
42#else 42#ifdef emacs
43/* We need this for `regex.h', and perhaps for the Emacs include files. */ 43/* We need this for `regex.h', and perhaps for the Emacs include files. */
44# include <sys/types.h> 44# include <sys/types.h>
45#endif 45#endif
@@ -53,7 +53,7 @@
53 (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC && !emacs) 53 (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC && !emacs)
54#endif 54#endif
55 55
56/* For platform which support the ISO C amendement 1 functionality we 56/* For platform which support the ISO C amendment 1 functionality we
57 support user defined character classes. */ 57 support user defined character classes. */
58#if WIDE_CHAR_SUPPORT 58#if WIDE_CHAR_SUPPORT
59/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */ 59/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */
@@ -67,7 +67,7 @@
67# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef) 67# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
68# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags) 68# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
69# define regerror(err_code, preg, errbuf, errbuf_size) \ 69# define regerror(err_code, preg, errbuf, errbuf_size) \
70 __regerror(err_code, preg, errbuf, errbuf_size) 70 __regerror (err_code, preg, errbuf, errbuf_size)
71# define re_set_registers(bu, re, nu, st, en) \ 71# define re_set_registers(bu, re, nu, st, en) \
72 __re_set_registers (bu, re, nu, st, en) 72 __re_set_registers (bu, re, nu, st, en)
73# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \ 73# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
@@ -238,18 +238,7 @@ xrealloc (void *block, size_t size)
238# endif 238# endif
239# define realloc xrealloc 239# define realloc xrealloc
240 240
241/* This is the normal way of making sure we have memcpy, memcmp and memset. */ 241# include <string.h>
242# if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
243# include <string.h>
244# else
245# include <strings.h>
246# ifndef memcmp
247# define memcmp(s1, s2, n) bcmp (s1, s2, n)
248# endif
249# ifndef memcpy
250# define memcpy(d, s, n) (bcopy (s, d, n), (d))
251# endif
252# endif
253 242
254/* Define the syntax stuff for \<, \>, etc. */ 243/* Define the syntax stuff for \<, \>, etc. */
255 244
@@ -357,25 +346,6 @@ enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 };
357 346
358#else /* not emacs */ 347#else /* not emacs */
359 348
360/* Jim Meyering writes:
361
362 "... Some ctype macros are valid only for character codes that
363 isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
364 using /bin/cc or gcc but without giving an ansi option). So, all
365 ctype uses should be through macros like ISPRINT... If
366 STDC_HEADERS is defined, then autoconf has verified that the ctype
367 macros don't need to be guarded with references to isascii. ...
368 Defining isascii to 1 should let any compiler worth its salt
369 eliminate the && through constant folding."
370 Solaris defines some of these symbols so we must undefine them first. */
371
372# undef ISASCII
373# if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
374# define ISASCII(c) 1
375# else
376# define ISASCII(c) isascii(c)
377# endif
378
379/* 1 if C is an ASCII character. */ 349/* 1 if C is an ASCII character. */
380# define IS_REAL_ASCII(c) ((c) < 0200) 350# define IS_REAL_ASCII(c) ((c) < 0200)
381 351
@@ -383,34 +353,35 @@ enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 };
383# define ISUNIBYTE(c) 1 353# define ISUNIBYTE(c) 1
384 354
385# ifdef isblank 355# ifdef isblank
386# define ISBLANK(c) (ISASCII (c) && isblank (c)) 356# define ISBLANK(c) isblank (c)
387# else 357# else
388# define ISBLANK(c) ((c) == ' ' || (c) == '\t') 358# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
389# endif 359# endif
390# ifdef isgraph 360# ifdef isgraph
391# define ISGRAPH(c) (ISASCII (c) && isgraph (c)) 361# define ISGRAPH(c) isgraph (c)
392# else 362# else
393# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c)) 363# define ISGRAPH(c) (isprint (c) && !isspace (c))
394# endif 364# endif
395 365
366/* Solaris defines ISPRINT so we must undefine it first. */
396# undef ISPRINT 367# undef ISPRINT
397# define ISPRINT(c) (ISASCII (c) && isprint (c)) 368# define ISPRINT(c) isprint (c)
398# define ISDIGIT(c) (ISASCII (c) && isdigit (c)) 369# define ISDIGIT(c) isdigit (c)
399# define ISALNUM(c) (ISASCII (c) && isalnum (c)) 370# define ISALNUM(c) isalnum (c)
400# define ISALPHA(c) (ISASCII (c) && isalpha (c)) 371# define ISALPHA(c) isalpha (c)
401# define ISCNTRL(c) (ISASCII (c) && iscntrl (c)) 372# define ISCNTRL(c) iscntrl (c)
402# define ISLOWER(c) (ISASCII (c) && islower (c)) 373# define ISLOWER(c) islower (c)
403# define ISPUNCT(c) (ISASCII (c) && ispunct (c)) 374# define ISPUNCT(c) ispunct (c)
404# define ISSPACE(c) (ISASCII (c) && isspace (c)) 375# define ISSPACE(c) isspace (c)
405# define ISUPPER(c) (ISASCII (c) && isupper (c)) 376# define ISUPPER(c) isupper (c)
406# define ISXDIGIT(c) (ISASCII (c) && isxdigit (c)) 377# define ISXDIGIT(c) isxdigit (c)
407 378
408# define ISWORD(c) ISALPHA(c) 379# define ISWORD(c) ISALPHA (c)
409 380
410# ifdef _tolower 381# ifdef _tolower
411# define TOLOWER(c) _tolower(c) 382# define TOLOWER(c) _tolower (c)
412# else 383# else
413# define TOLOWER(c) tolower(c) 384# define TOLOWER(c) tolower (c)
414# endif 385# endif
415 386
416/* How many characters in the character set. */ 387/* How many characters in the character set. */
@@ -450,10 +421,6 @@ init_syntax_once (void)
450 421
451#endif /* not emacs */ 422#endif /* not emacs */
452 423
453#ifndef NULL
454# define NULL (void *)0
455#endif
456
457/* We remove any previous definition of `SIGN_EXTEND_CHAR', 424/* We remove any previous definition of `SIGN_EXTEND_CHAR',
458 since ours (we hope) works properly with all combinations of 425 since ours (we hope) works properly with all combinations of
459 machines, compilers, `char' and `unsigned char' argument types. 426 machines, compilers, `char' and `unsigned char' argument types.
@@ -563,7 +530,11 @@ init_syntax_once (void)
563#define MIN(a, b) ((a) < (b) ? (a) : (b)) 530#define MIN(a, b) ((a) < (b) ? (a) : (b))
564 531
565/* Type of source-pattern and string chars. */ 532/* Type of source-pattern and string chars. */
533#ifdef _MSC_VER
534typedef unsigned char re_char;
535#else
566typedef const unsigned char re_char; 536typedef const unsigned char re_char;
537#endif
567 538
568typedef char boolean; 539typedef char boolean;
569#define false 0 540#define false 0
@@ -666,7 +637,7 @@ typedef enum
666 on_failure_jump_nastyloop, 637 on_failure_jump_nastyloop,
667 638
668 /* A smart `on_failure_jump' used for greedy * and + operators. 639 /* A smart `on_failure_jump' used for greedy * and + operators.
669 It analyses the loop before which it is put and if the 640 It analyzes the loop before which it is put and if the
670 loop does not require backtracking, it changes itself to 641 loop does not require backtracking, it changes itself to
671 `on_failure_keep_string_jump' and short-circuits the loop, 642 `on_failure_keep_string_jump' and short-circuits the loop,
672 else it just defaults to changing itself into `on_failure_jump'. 643 else it just defaults to changing itself into `on_failure_jump'.
@@ -2126,26 +2097,26 @@ re_iswctype (int ch, re_wctype_t cc)
2126{ 2097{
2127 switch (cc) 2098 switch (cc)
2128 { 2099 {
2129 case RECC_ALNUM: return ISALNUM (ch); 2100 case RECC_ALNUM: return ISALNUM (ch) != 0;
2130 case RECC_ALPHA: return ISALPHA (ch); 2101 case RECC_ALPHA: return ISALPHA (ch) != 0;
2131 case RECC_BLANK: return ISBLANK (ch); 2102 case RECC_BLANK: return ISBLANK (ch) != 0;
2132 case RECC_CNTRL: return ISCNTRL (ch); 2103 case RECC_CNTRL: return ISCNTRL (ch) != 0;
2133 case RECC_DIGIT: return ISDIGIT (ch); 2104 case RECC_DIGIT: return ISDIGIT (ch) != 0;
2134 case RECC_GRAPH: return ISGRAPH (ch); 2105 case RECC_GRAPH: return ISGRAPH (ch) != 0;
2135 case RECC_LOWER: return ISLOWER (ch); 2106 case RECC_LOWER: return ISLOWER (ch) != 0;
2136 case RECC_PRINT: return ISPRINT (ch); 2107 case RECC_PRINT: return ISPRINT (ch) != 0;
2137 case RECC_PUNCT: return ISPUNCT (ch); 2108 case RECC_PUNCT: return ISPUNCT (ch) != 0;
2138 case RECC_SPACE: return ISSPACE (ch); 2109 case RECC_SPACE: return ISSPACE (ch) != 0;
2139 case RECC_UPPER: return ISUPPER (ch); 2110 case RECC_UPPER: return ISUPPER (ch) != 0;
2140 case RECC_XDIGIT: return ISXDIGIT (ch); 2111 case RECC_XDIGIT: return ISXDIGIT (ch) != 0;
2141 case RECC_ASCII: return IS_REAL_ASCII (ch); 2112 case RECC_ASCII: return IS_REAL_ASCII (ch) != 0;
2142 case RECC_NONASCII: return !IS_REAL_ASCII (ch); 2113 case RECC_NONASCII: return !IS_REAL_ASCII (ch);
2143 case RECC_UNIBYTE: return ISUNIBYTE (ch); 2114 case RECC_UNIBYTE: return ISUNIBYTE (ch) != 0;
2144 case RECC_MULTIBYTE: return !ISUNIBYTE (ch); 2115 case RECC_MULTIBYTE: return !ISUNIBYTE (ch);
2145 case RECC_WORD: return ISWORD (ch); 2116 case RECC_WORD: return ISWORD (ch) != 0;
2146 case RECC_ERROR: return false; 2117 case RECC_ERROR: return false;
2147 default: 2118 default:
2148 abort(); 2119 abort ();
2149 } 2120 }
2150} 2121}
2151 2122
@@ -2166,7 +2137,7 @@ re_wctype_to_bit (re_wctype_t cc)
2166 case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL: 2137 case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL:
2167 case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0; 2138 case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0;
2168 default: 2139 default:
2169 abort(); 2140 abort ();
2170 } 2141 }
2171} 2142}
2172#endif 2143#endif
@@ -2202,10 +2173,9 @@ extend_range_table_work_area (struct range_table_work_area *work_area)
2202 Returns -1 if successful, REG_ESPACE if ran out of space. */ 2173 Returns -1 if successful, REG_ESPACE if ran out of space. */
2203 2174
2204static int 2175static int
2205set_image_of_range_1 (work_area, start, end, translate) 2176set_image_of_range_1 (struct range_table_work_area *work_area,
2206 RE_TRANSLATE_TYPE translate; 2177 re_wchar_t start, re_wchar_t end,
2207 struct range_table_work_area *work_area; 2178 RE_TRANSLATE_TYPE translate)
2208 re_wchar_t start, end;
2209{ 2179{
2210 /* `one_case' indicates a character, or a run of characters, 2180 /* `one_case' indicates a character, or a run of characters,
2211 each of which is an isolate (no case-equivalents). 2181 each of which is an isolate (no case-equivalents).
@@ -2355,10 +2325,9 @@ set_image_of_range_1 (work_area, start, end, translate)
2355 Returns -1 if successful, REG_ESPACE if ran out of space. */ 2325 Returns -1 if successful, REG_ESPACE if ran out of space. */
2356 2326
2357static int 2327static int
2358set_image_of_range (work_area, start, end, translate) 2328set_image_of_range (struct range_table_work_area *work_area,
2359 RE_TRANSLATE_TYPE translate; 2329 re_wchar_t start, re_wchar_t end,
2360 struct range_table_work_area *work_area; 2330 RE_TRANSLATE_TYPE translate)
2361 re_wchar_t start, end;
2362{ 2331{
2363 re_wchar_t cmin, cmax; 2332 re_wchar_t cmin, cmax;
2364 2333
@@ -2445,8 +2414,7 @@ static re_char **best_regstart, **best_regend;
2445 but don't make them smaller. */ 2414 but don't make them smaller. */
2446 2415
2447static 2416static
2448regex_grow_registers (num_regs) 2417regex_grow_registers (int num_regs)
2449 int num_regs;
2450{ 2418{
2451 if (num_regs > regs_allocated_size) 2419 if (num_regs > regs_allocated_size)
2452 { 2420 {
@@ -4573,10 +4541,10 @@ WEAK_ALIAS (__re_search_2, re_search_2)
4573 4541
4574/* Declarations and macros for re_match_2. */ 4542/* Declarations and macros for re_match_2. */
4575 4543
4576static int bcmp_translate _RE_ARGS((re_char *s1, re_char *s2, 4544static int bcmp_translate _RE_ARGS ((re_char *s1, re_char *s2,
4577 register ssize_t len, 4545 register ssize_t len,
4578 RE_TRANSLATE_TYPE translate, 4546 RE_TRANSLATE_TYPE translate,
4579 const int multibyte)); 4547 const int multibyte));
4580 4548
4581/* This converts PTR, a pointer into one of the search strings `string1' 4549/* This converts PTR, a pointer into one of the search strings `string1'
4582 and `string2' into an offset from the beginning of that string. */ 4550 and `string2' into an offset from the beginning of that string. */
@@ -6312,7 +6280,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, const re_char *string1,
6312 goto fail; 6280 goto fail;
6313 6281
6314 default: 6282 default:
6315 abort(); 6283 abort ();
6316 } 6284 }
6317 6285
6318 assert (p >= bufp->buffer && p <= pend); 6286 assert (p >= bufp->buffer && p <= pend);
@@ -6417,8 +6385,7 @@ char *
6417 regcomp/regexec below without link errors. */ 6385 regcomp/regexec below without link errors. */
6418weak_function 6386weak_function
6419# endif 6387# endif
6420re_comp (s) 6388re_comp (const char *s)
6421 const char *s;
6422{ 6389{
6423 reg_errcode_t ret; 6390 reg_errcode_t ret;
6424 6391
@@ -6457,7 +6424,7 @@ re_comp (s)
6457} 6424}
6458 6425
6459 6426
6460regoff_t 6427int
6461# ifdef _LIBC 6428# ifdef _LIBC
6462weak_function 6429weak_function
6463# endif 6430# endif
@@ -6594,7 +6561,7 @@ reg_errcode_t
6594regexec (const regex_t *__restrict preg, const char *__restrict string, 6561regexec (const regex_t *__restrict preg, const char *__restrict string,
6595 size_t nmatch, regmatch_t pmatch[__restrict_arr], int eflags) 6562 size_t nmatch, regmatch_t pmatch[__restrict_arr], int eflags)
6596{ 6563{
6597 reg_errcode_t ret; 6564 regoff_t ret;
6598 struct re_registers regs; 6565 struct re_registers regs;
6599 regex_t private_preg; 6566 regex_t private_preg;
6600 size_t len = strlen (string); 6567 size_t len = strlen (string);