diff options
Diffstat (limited to 'src/regex.c')
| -rw-r--r-- | src/regex.c | 157 |
1 files changed, 62 insertions, 95 deletions
diff --git a/src/regex.c b/src/regex.c index 479239897bc..0f9150193ec 100644 --- a/src/regex.c +++ b/src/regex.c | |||
| @@ -2,7 +2,7 @@ | |||
| 2 | 0.12. (Implements POSIX draft P1003.2/D11.2, except for some of the | 2 | 0.12. (Implements POSIX draft P1003.2/D11.2, except for some of the |
| 3 | internationalization features.) | 3 | internationalization features.) |
| 4 | 4 | ||
| 5 | Copyright (C) 1993-2011 Free Software Foundation, Inc. | 5 | Copyright (C) 1993-2012 Free Software Foundation, Inc. |
| 6 | 6 | ||
| 7 | This program is free software; you can redistribute it and/or modify | 7 | This program is free software; you can redistribute it and/or modify |
| 8 | it under the terms of the GNU General Public License as published by | 8 | it under the terms of the GNU General Public License as published by |
| @@ -37,9 +37,9 @@ | |||
| 37 | # include <config.h> | 37 | # include <config.h> |
| 38 | #endif | 38 | #endif |
| 39 | 39 | ||
| 40 | #if defined STDC_HEADERS && !defined emacs | 40 | #include <stddef.h> |
| 41 | # include <stddef.h> | 41 | |
| 42 | #else | 42 | #ifdef emacs |
| 43 | /* We need this for `regex.h', and perhaps for the Emacs include files. */ | 43 | /* We need this for `regex.h', and perhaps for the Emacs include files. */ |
| 44 | # include <sys/types.h> | 44 | # include <sys/types.h> |
| 45 | #endif | 45 | #endif |
| @@ -53,7 +53,7 @@ | |||
| 53 | (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC && !emacs) | 53 | (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC && !emacs) |
| 54 | #endif | 54 | #endif |
| 55 | 55 | ||
| 56 | /* For platform which support the ISO C amendement 1 functionality we | 56 | /* For platform which support the ISO C amendment 1 functionality we |
| 57 | support user defined character classes. */ | 57 | support user defined character classes. */ |
| 58 | #if WIDE_CHAR_SUPPORT | 58 | #if WIDE_CHAR_SUPPORT |
| 59 | /* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */ | 59 | /* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */ |
| @@ -67,7 +67,7 @@ | |||
| 67 | # define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef) | 67 | # define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef) |
| 68 | # define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags) | 68 | # define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags) |
| 69 | # define regerror(err_code, preg, errbuf, errbuf_size) \ | 69 | # define regerror(err_code, preg, errbuf, errbuf_size) \ |
| 70 | __regerror(err_code, preg, errbuf, errbuf_size) | 70 | __regerror (err_code, preg, errbuf, errbuf_size) |
| 71 | # define re_set_registers(bu, re, nu, st, en) \ | 71 | # define re_set_registers(bu, re, nu, st, en) \ |
| 72 | __re_set_registers (bu, re, nu, st, en) | 72 | __re_set_registers (bu, re, nu, st, en) |
| 73 | # define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \ | 73 | # define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \ |
| @@ -238,18 +238,7 @@ xrealloc (void *block, size_t size) | |||
| 238 | # endif | 238 | # endif |
| 239 | # define realloc xrealloc | 239 | # define realloc xrealloc |
| 240 | 240 | ||
| 241 | /* This is the normal way of making sure we have memcpy, memcmp and memset. */ | 241 | # include <string.h> |
| 242 | # if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC | ||
| 243 | # include <string.h> | ||
| 244 | # else | ||
| 245 | # include <strings.h> | ||
| 246 | # ifndef memcmp | ||
| 247 | # define memcmp(s1, s2, n) bcmp (s1, s2, n) | ||
| 248 | # endif | ||
| 249 | # ifndef memcpy | ||
| 250 | # define memcpy(d, s, n) (bcopy (s, d, n), (d)) | ||
| 251 | # endif | ||
| 252 | # endif | ||
| 253 | 242 | ||
| 254 | /* Define the syntax stuff for \<, \>, etc. */ | 243 | /* Define the syntax stuff for \<, \>, etc. */ |
| 255 | 244 | ||
| @@ -357,25 +346,6 @@ enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 }; | |||
| 357 | 346 | ||
| 358 | #else /* not emacs */ | 347 | #else /* not emacs */ |
| 359 | 348 | ||
| 360 | /* Jim Meyering writes: | ||
| 361 | |||
| 362 | "... Some ctype macros are valid only for character codes that | ||
| 363 | isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when | ||
| 364 | using /bin/cc or gcc but without giving an ansi option). So, all | ||
| 365 | ctype uses should be through macros like ISPRINT... If | ||
| 366 | STDC_HEADERS is defined, then autoconf has verified that the ctype | ||
| 367 | macros don't need to be guarded with references to isascii. ... | ||
| 368 | Defining isascii to 1 should let any compiler worth its salt | ||
| 369 | eliminate the && through constant folding." | ||
| 370 | Solaris defines some of these symbols so we must undefine them first. */ | ||
| 371 | |||
| 372 | # undef ISASCII | ||
| 373 | # if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII) | ||
| 374 | # define ISASCII(c) 1 | ||
| 375 | # else | ||
| 376 | # define ISASCII(c) isascii(c) | ||
| 377 | # endif | ||
| 378 | |||
| 379 | /* 1 if C is an ASCII character. */ | 349 | /* 1 if C is an ASCII character. */ |
| 380 | # define IS_REAL_ASCII(c) ((c) < 0200) | 350 | # define IS_REAL_ASCII(c) ((c) < 0200) |
| 381 | 351 | ||
| @@ -383,34 +353,35 @@ enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 }; | |||
| 383 | # define ISUNIBYTE(c) 1 | 353 | # define ISUNIBYTE(c) 1 |
| 384 | 354 | ||
| 385 | # ifdef isblank | 355 | # ifdef isblank |
| 386 | # define ISBLANK(c) (ISASCII (c) && isblank (c)) | 356 | # define ISBLANK(c) isblank (c) |
| 387 | # else | 357 | # else |
| 388 | # define ISBLANK(c) ((c) == ' ' || (c) == '\t') | 358 | # define ISBLANK(c) ((c) == ' ' || (c) == '\t') |
| 389 | # endif | 359 | # endif |
| 390 | # ifdef isgraph | 360 | # ifdef isgraph |
| 391 | # define ISGRAPH(c) (ISASCII (c) && isgraph (c)) | 361 | # define ISGRAPH(c) isgraph (c) |
| 392 | # else | 362 | # else |
| 393 | # define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c)) | 363 | # define ISGRAPH(c) (isprint (c) && !isspace (c)) |
| 394 | # endif | 364 | # endif |
| 395 | 365 | ||
| 366 | /* Solaris defines ISPRINT so we must undefine it first. */ | ||
| 396 | # undef ISPRINT | 367 | # undef ISPRINT |
| 397 | # define ISPRINT(c) (ISASCII (c) && isprint (c)) | 368 | # define ISPRINT(c) isprint (c) |
| 398 | # define ISDIGIT(c) (ISASCII (c) && isdigit (c)) | 369 | # define ISDIGIT(c) isdigit (c) |
| 399 | # define ISALNUM(c) (ISASCII (c) && isalnum (c)) | 370 | # define ISALNUM(c) isalnum (c) |
| 400 | # define ISALPHA(c) (ISASCII (c) && isalpha (c)) | 371 | # define ISALPHA(c) isalpha (c) |
| 401 | # define ISCNTRL(c) (ISASCII (c) && iscntrl (c)) | 372 | # define ISCNTRL(c) iscntrl (c) |
| 402 | # define ISLOWER(c) (ISASCII (c) && islower (c)) | 373 | # define ISLOWER(c) islower (c) |
| 403 | # define ISPUNCT(c) (ISASCII (c) && ispunct (c)) | 374 | # define ISPUNCT(c) ispunct (c) |
| 404 | # define ISSPACE(c) (ISASCII (c) && isspace (c)) | 375 | # define ISSPACE(c) isspace (c) |
| 405 | # define ISUPPER(c) (ISASCII (c) && isupper (c)) | 376 | # define ISUPPER(c) isupper (c) |
| 406 | # define ISXDIGIT(c) (ISASCII (c) && isxdigit (c)) | 377 | # define ISXDIGIT(c) isxdigit (c) |
| 407 | 378 | ||
| 408 | # define ISWORD(c) ISALPHA(c) | 379 | # define ISWORD(c) ISALPHA (c) |
| 409 | 380 | ||
| 410 | # ifdef _tolower | 381 | # ifdef _tolower |
| 411 | # define TOLOWER(c) _tolower(c) | 382 | # define TOLOWER(c) _tolower (c) |
| 412 | # else | 383 | # else |
| 413 | # define TOLOWER(c) tolower(c) | 384 | # define TOLOWER(c) tolower (c) |
| 414 | # endif | 385 | # endif |
| 415 | 386 | ||
| 416 | /* How many characters in the character set. */ | 387 | /* How many characters in the character set. */ |
| @@ -450,10 +421,6 @@ init_syntax_once (void) | |||
| 450 | 421 | ||
| 451 | #endif /* not emacs */ | 422 | #endif /* not emacs */ |
| 452 | 423 | ||
| 453 | #ifndef NULL | ||
| 454 | # define NULL (void *)0 | ||
| 455 | #endif | ||
| 456 | |||
| 457 | /* We remove any previous definition of `SIGN_EXTEND_CHAR', | 424 | /* We remove any previous definition of `SIGN_EXTEND_CHAR', |
| 458 | since ours (we hope) works properly with all combinations of | 425 | since ours (we hope) works properly with all combinations of |
| 459 | machines, compilers, `char' and `unsigned char' argument types. | 426 | machines, compilers, `char' and `unsigned char' argument types. |
| @@ -563,7 +530,11 @@ init_syntax_once (void) | |||
| 563 | #define MIN(a, b) ((a) < (b) ? (a) : (b)) | 530 | #define MIN(a, b) ((a) < (b) ? (a) : (b)) |
| 564 | 531 | ||
| 565 | /* Type of source-pattern and string chars. */ | 532 | /* Type of source-pattern and string chars. */ |
| 533 | #ifdef _MSC_VER | ||
| 534 | typedef unsigned char re_char; | ||
| 535 | #else | ||
| 566 | typedef const unsigned char re_char; | 536 | typedef const unsigned char re_char; |
| 537 | #endif | ||
| 567 | 538 | ||
| 568 | typedef char boolean; | 539 | typedef char boolean; |
| 569 | #define false 0 | 540 | #define false 0 |
| @@ -666,7 +637,7 @@ typedef enum | |||
| 666 | on_failure_jump_nastyloop, | 637 | on_failure_jump_nastyloop, |
| 667 | 638 | ||
| 668 | /* A smart `on_failure_jump' used for greedy * and + operators. | 639 | /* A smart `on_failure_jump' used for greedy * and + operators. |
| 669 | It analyses the loop before which it is put and if the | 640 | It analyzes the loop before which it is put and if the |
| 670 | loop does not require backtracking, it changes itself to | 641 | loop does not require backtracking, it changes itself to |
| 671 | `on_failure_keep_string_jump' and short-circuits the loop, | 642 | `on_failure_keep_string_jump' and short-circuits the loop, |
| 672 | else it just defaults to changing itself into `on_failure_jump'. | 643 | else it just defaults to changing itself into `on_failure_jump'. |
| @@ -2126,26 +2097,26 @@ re_iswctype (int ch, re_wctype_t cc) | |||
| 2126 | { | 2097 | { |
| 2127 | switch (cc) | 2098 | switch (cc) |
| 2128 | { | 2099 | { |
| 2129 | case RECC_ALNUM: return ISALNUM (ch); | 2100 | case RECC_ALNUM: return ISALNUM (ch) != 0; |
| 2130 | case RECC_ALPHA: return ISALPHA (ch); | 2101 | case RECC_ALPHA: return ISALPHA (ch) != 0; |
| 2131 | case RECC_BLANK: return ISBLANK (ch); | 2102 | case RECC_BLANK: return ISBLANK (ch) != 0; |
| 2132 | case RECC_CNTRL: return ISCNTRL (ch); | 2103 | case RECC_CNTRL: return ISCNTRL (ch) != 0; |
| 2133 | case RECC_DIGIT: return ISDIGIT (ch); | 2104 | case RECC_DIGIT: return ISDIGIT (ch) != 0; |
| 2134 | case RECC_GRAPH: return ISGRAPH (ch); | 2105 | case RECC_GRAPH: return ISGRAPH (ch) != 0; |
| 2135 | case RECC_LOWER: return ISLOWER (ch); | 2106 | case RECC_LOWER: return ISLOWER (ch) != 0; |
| 2136 | case RECC_PRINT: return ISPRINT (ch); | 2107 | case RECC_PRINT: return ISPRINT (ch) != 0; |
| 2137 | case RECC_PUNCT: return ISPUNCT (ch); | 2108 | case RECC_PUNCT: return ISPUNCT (ch) != 0; |
| 2138 | case RECC_SPACE: return ISSPACE (ch); | 2109 | case RECC_SPACE: return ISSPACE (ch) != 0; |
| 2139 | case RECC_UPPER: return ISUPPER (ch); | 2110 | case RECC_UPPER: return ISUPPER (ch) != 0; |
| 2140 | case RECC_XDIGIT: return ISXDIGIT (ch); | 2111 | case RECC_XDIGIT: return ISXDIGIT (ch) != 0; |
| 2141 | case RECC_ASCII: return IS_REAL_ASCII (ch); | 2112 | case RECC_ASCII: return IS_REAL_ASCII (ch) != 0; |
| 2142 | case RECC_NONASCII: return !IS_REAL_ASCII (ch); | 2113 | case RECC_NONASCII: return !IS_REAL_ASCII (ch); |
| 2143 | case RECC_UNIBYTE: return ISUNIBYTE (ch); | 2114 | case RECC_UNIBYTE: return ISUNIBYTE (ch) != 0; |
| 2144 | case RECC_MULTIBYTE: return !ISUNIBYTE (ch); | 2115 | case RECC_MULTIBYTE: return !ISUNIBYTE (ch); |
| 2145 | case RECC_WORD: return ISWORD (ch); | 2116 | case RECC_WORD: return ISWORD (ch) != 0; |
| 2146 | case RECC_ERROR: return false; | 2117 | case RECC_ERROR: return false; |
| 2147 | default: | 2118 | default: |
| 2148 | abort(); | 2119 | abort (); |
| 2149 | } | 2120 | } |
| 2150 | } | 2121 | } |
| 2151 | 2122 | ||
| @@ -2166,7 +2137,7 @@ re_wctype_to_bit (re_wctype_t cc) | |||
| 2166 | case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL: | 2137 | case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL: |
| 2167 | case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0; | 2138 | case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0; |
| 2168 | default: | 2139 | default: |
| 2169 | abort(); | 2140 | abort (); |
| 2170 | } | 2141 | } |
| 2171 | } | 2142 | } |
| 2172 | #endif | 2143 | #endif |
| @@ -2202,10 +2173,9 @@ extend_range_table_work_area (struct range_table_work_area *work_area) | |||
| 2202 | Returns -1 if successful, REG_ESPACE if ran out of space. */ | 2173 | Returns -1 if successful, REG_ESPACE if ran out of space. */ |
| 2203 | 2174 | ||
| 2204 | static int | 2175 | static int |
| 2205 | set_image_of_range_1 (work_area, start, end, translate) | 2176 | set_image_of_range_1 (struct range_table_work_area *work_area, |
| 2206 | RE_TRANSLATE_TYPE translate; | 2177 | re_wchar_t start, re_wchar_t end, |
| 2207 | struct range_table_work_area *work_area; | 2178 | RE_TRANSLATE_TYPE translate) |
| 2208 | re_wchar_t start, end; | ||
| 2209 | { | 2179 | { |
| 2210 | /* `one_case' indicates a character, or a run of characters, | 2180 | /* `one_case' indicates a character, or a run of characters, |
| 2211 | each of which is an isolate (no case-equivalents). | 2181 | each of which is an isolate (no case-equivalents). |
| @@ -2355,10 +2325,9 @@ set_image_of_range_1 (work_area, start, end, translate) | |||
| 2355 | Returns -1 if successful, REG_ESPACE if ran out of space. */ | 2325 | Returns -1 if successful, REG_ESPACE if ran out of space. */ |
| 2356 | 2326 | ||
| 2357 | static int | 2327 | static int |
| 2358 | set_image_of_range (work_area, start, end, translate) | 2328 | set_image_of_range (struct range_table_work_area *work_area, |
| 2359 | RE_TRANSLATE_TYPE translate; | 2329 | re_wchar_t start, re_wchar_t end, |
| 2360 | struct range_table_work_area *work_area; | 2330 | RE_TRANSLATE_TYPE translate) |
| 2361 | re_wchar_t start, end; | ||
| 2362 | { | 2331 | { |
| 2363 | re_wchar_t cmin, cmax; | 2332 | re_wchar_t cmin, cmax; |
| 2364 | 2333 | ||
| @@ -2445,8 +2414,7 @@ static re_char **best_regstart, **best_regend; | |||
| 2445 | but don't make them smaller. */ | 2414 | but don't make them smaller. */ |
| 2446 | 2415 | ||
| 2447 | static | 2416 | static |
| 2448 | regex_grow_registers (num_regs) | 2417 | regex_grow_registers (int num_regs) |
| 2449 | int num_regs; | ||
| 2450 | { | 2418 | { |
| 2451 | if (num_regs > regs_allocated_size) | 2419 | if (num_regs > regs_allocated_size) |
| 2452 | { | 2420 | { |
| @@ -4573,10 +4541,10 @@ WEAK_ALIAS (__re_search_2, re_search_2) | |||
| 4573 | 4541 | ||
| 4574 | /* Declarations and macros for re_match_2. */ | 4542 | /* Declarations and macros for re_match_2. */ |
| 4575 | 4543 | ||
| 4576 | static int bcmp_translate _RE_ARGS((re_char *s1, re_char *s2, | 4544 | static int bcmp_translate _RE_ARGS ((re_char *s1, re_char *s2, |
| 4577 | register ssize_t len, | 4545 | register ssize_t len, |
| 4578 | RE_TRANSLATE_TYPE translate, | 4546 | RE_TRANSLATE_TYPE translate, |
| 4579 | const int multibyte)); | 4547 | const int multibyte)); |
| 4580 | 4548 | ||
| 4581 | /* This converts PTR, a pointer into one of the search strings `string1' | 4549 | /* This converts PTR, a pointer into one of the search strings `string1' |
| 4582 | and `string2' into an offset from the beginning of that string. */ | 4550 | and `string2' into an offset from the beginning of that string. */ |
| @@ -6312,7 +6280,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, const re_char *string1, | |||
| 6312 | goto fail; | 6280 | goto fail; |
| 6313 | 6281 | ||
| 6314 | default: | 6282 | default: |
| 6315 | abort(); | 6283 | abort (); |
| 6316 | } | 6284 | } |
| 6317 | 6285 | ||
| 6318 | assert (p >= bufp->buffer && p <= pend); | 6286 | assert (p >= bufp->buffer && p <= pend); |
| @@ -6417,8 +6385,7 @@ char * | |||
| 6417 | regcomp/regexec below without link errors. */ | 6385 | regcomp/regexec below without link errors. */ |
| 6418 | weak_function | 6386 | weak_function |
| 6419 | # endif | 6387 | # endif |
| 6420 | re_comp (s) | 6388 | re_comp (const char *s) |
| 6421 | const char *s; | ||
| 6422 | { | 6389 | { |
| 6423 | reg_errcode_t ret; | 6390 | reg_errcode_t ret; |
| 6424 | 6391 | ||
| @@ -6457,7 +6424,7 @@ re_comp (s) | |||
| 6457 | } | 6424 | } |
| 6458 | 6425 | ||
| 6459 | 6426 | ||
| 6460 | regoff_t | 6427 | int |
| 6461 | # ifdef _LIBC | 6428 | # ifdef _LIBC |
| 6462 | weak_function | 6429 | weak_function |
| 6463 | # endif | 6430 | # endif |
| @@ -6594,7 +6561,7 @@ reg_errcode_t | |||
| 6594 | regexec (const regex_t *__restrict preg, const char *__restrict string, | 6561 | regexec (const regex_t *__restrict preg, const char *__restrict string, |
| 6595 | size_t nmatch, regmatch_t pmatch[__restrict_arr], int eflags) | 6562 | size_t nmatch, regmatch_t pmatch[__restrict_arr], int eflags) |
| 6596 | { | 6563 | { |
| 6597 | reg_errcode_t ret; | 6564 | regoff_t ret; |
| 6598 | struct re_registers regs; | 6565 | struct re_registers regs; |
| 6599 | regex_t private_preg; | 6566 | regex_t private_preg; |
| 6600 | size_t len = strlen (string); | 6567 | size_t len = strlen (string); |