diff options
Diffstat (limited to 'src/regex.c')
| -rw-r--r-- | src/regex.c | 255 |
1 files changed, 118 insertions, 137 deletions
diff --git a/src/regex.c b/src/regex.c index 9284be95ffb..a60ff0ce35c 100644 --- a/src/regex.c +++ b/src/regex.c | |||
| @@ -340,7 +340,7 @@ enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 }; | |||
| 340 | || ((c) >= 'A' && (c) <= 'Z')) \ | 340 | || ((c) >= 'A' && (c) <= 'Z')) \ |
| 341 | : SYNTAX (c) == Sword) | 341 | : SYNTAX (c) == Sword) |
| 342 | 342 | ||
| 343 | # define ISLOWER(c) (LOWERCASEP (c)) | 343 | # define ISLOWER(c) lowercasep (c) |
| 344 | 344 | ||
| 345 | # define ISPUNCT(c) (IS_REAL_ASCII (c) \ | 345 | # define ISPUNCT(c) (IS_REAL_ASCII (c) \ |
| 346 | ? ((c) > ' ' && (c) < 0177 \ | 346 | ? ((c) > ' ' && (c) < 0177 \ |
| @@ -351,7 +351,7 @@ enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 }; | |||
| 351 | 351 | ||
| 352 | # define ISSPACE(c) (SYNTAX (c) == Swhitespace) | 352 | # define ISSPACE(c) (SYNTAX (c) == Swhitespace) |
| 353 | 353 | ||
| 354 | # define ISUPPER(c) (UPPERCASEP (c)) | 354 | # define ISUPPER(c) uppercasep (c) |
| 355 | 355 | ||
| 356 | # define ISWORD(c) (SYNTAX (c) == Sword) | 356 | # define ISWORD(c) (SYNTAX (c) == Sword) |
| 357 | 357 | ||
| @@ -551,8 +551,6 @@ init_syntax_once (void) | |||
| 551 | /* (Re)Allocate N items of type T using malloc, or fail. */ | 551 | /* (Re)Allocate N items of type T using malloc, or fail. */ |
| 552 | #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t))) | 552 | #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t))) |
| 553 | #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t))) | 553 | #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t))) |
| 554 | #define RETALLOC_IF(addr, n, t) \ | ||
| 555 | if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t) | ||
| 556 | #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t))) | 554 | #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t))) |
| 557 | 555 | ||
| 558 | #define BYTEWIDTH 8 /* In bits. */ | 556 | #define BYTEWIDTH 8 /* In bits. */ |
| @@ -843,11 +841,6 @@ extract_number_and_incr (destination, source) | |||
| 843 | ((p)[2 + CHARSET_BITMAP_SIZE (p)] \ | 841 | ((p)[2 + CHARSET_BITMAP_SIZE (p)] \ |
| 844 | + (p)[3 + CHARSET_BITMAP_SIZE (p)] * 0x100) | 842 | + (p)[3 + CHARSET_BITMAP_SIZE (p)] * 0x100) |
| 845 | 843 | ||
| 846 | /* Test if C is listed in the bitmap of charset P. */ | ||
| 847 | #define CHARSET_LOOKUP_BITMAP(p, c) \ | ||
| 848 | ((c) < CHARSET_BITMAP_SIZE (p) * BYTEWIDTH \ | ||
| 849 | && (p)[2 + (c) / BYTEWIDTH] & (1 << ((c) % BYTEWIDTH))) | ||
| 850 | |||
| 851 | /* Return the address of end of RANGE_TABLE. COUNT is number of | 844 | /* Return the address of end of RANGE_TABLE. COUNT is number of |
| 852 | ranges (which is a pair of (start, end)) in the RANGE_TABLE. `* 2' | 845 | ranges (which is a pair of (start, end)) in the RANGE_TABLE. `* 2' |
| 853 | is start of range and end of range. `* 3' is size of each start | 846 | is start of range and end of range. `* 3' is size of each start |
| @@ -861,14 +854,14 @@ extract_number_and_incr (destination, source) | |||
| 861 | do \ | 854 | do \ |
| 862 | { \ | 855 | { \ |
| 863 | re_wchar_t range_start, range_end; \ | 856 | re_wchar_t range_start, range_end; \ |
| 864 | re_char *p; \ | 857 | re_char *rtp; \ |
| 865 | re_char *range_table_end \ | 858 | re_char *range_table_end \ |
| 866 | = CHARSET_RANGE_TABLE_END ((range_table), (count)); \ | 859 | = CHARSET_RANGE_TABLE_END ((range_table), (count)); \ |
| 867 | \ | 860 | \ |
| 868 | for (p = (range_table); p < range_table_end; p += 2 * 3) \ | 861 | for (rtp = (range_table); rtp < range_table_end; rtp += 2 * 3) \ |
| 869 | { \ | 862 | { \ |
| 870 | EXTRACT_CHARACTER (range_start, p); \ | 863 | EXTRACT_CHARACTER (range_start, rtp); \ |
| 871 | EXTRACT_CHARACTER (range_end, p + 3); \ | 864 | EXTRACT_CHARACTER (range_end, rtp + 3); \ |
| 872 | \ | 865 | \ |
| 873 | if (range_start <= (c) && (c) <= range_end) \ | 866 | if (range_start <= (c) && (c) <= range_end) \ |
| 874 | { \ | 867 | { \ |
| @@ -1274,6 +1267,13 @@ print_double_string (where, string1, size1, string2, size2) | |||
| 1274 | 1267 | ||
| 1275 | #endif /* not DEBUG */ | 1268 | #endif /* not DEBUG */ |
| 1276 | 1269 | ||
| 1270 | /* Use this to suppress gcc's `...may be used before initialized' warnings. */ | ||
| 1271 | #ifdef lint | ||
| 1272 | # define IF_LINT(Code) Code | ||
| 1273 | #else | ||
| 1274 | # define IF_LINT(Code) /* empty */ | ||
| 1275 | #endif | ||
| 1276 | |||
| 1277 | /* Set by `re_set_syntax' to the current regexp syntax to recognize. Can | 1277 | /* Set by `re_set_syntax' to the current regexp syntax to recognize. Can |
| 1278 | also be assigned to arbitrarily: each pattern buffer stores its own | 1278 | also be assigned to arbitrarily: each pattern buffer stores its own |
| 1279 | syntax, so it can be changed between regex compilations. */ | 1279 | syntax, so it can be changed between regex compilations. */ |
| @@ -1413,7 +1413,6 @@ typedef struct | |||
| 1413 | } fail_stack_type; | 1413 | } fail_stack_type; |
| 1414 | 1414 | ||
| 1415 | #define FAIL_STACK_EMPTY() (fail_stack.frame == 0) | 1415 | #define FAIL_STACK_EMPTY() (fail_stack.frame == 0) |
| 1416 | #define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size) | ||
| 1417 | 1416 | ||
| 1418 | 1417 | ||
| 1419 | /* Define macros to initialize and free the failure stack. | 1418 | /* Define macros to initialize and free the failure stack. |
| @@ -1433,8 +1432,6 @@ typedef struct | |||
| 1433 | fail_stack.avail = 0; \ | 1432 | fail_stack.avail = 0; \ |
| 1434 | fail_stack.frame = 0; \ | 1433 | fail_stack.frame = 0; \ |
| 1435 | } while (0) | 1434 | } while (0) |
| 1436 | |||
| 1437 | # define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack) | ||
| 1438 | #else | 1435 | #else |
| 1439 | # define INIT_FAIL_STACK() \ | 1436 | # define INIT_FAIL_STACK() \ |
| 1440 | do { \ | 1437 | do { \ |
| @@ -1442,7 +1439,8 @@ typedef struct | |||
| 1442 | fail_stack.frame = 0; \ | 1439 | fail_stack.frame = 0; \ |
| 1443 | } while (0) | 1440 | } while (0) |
| 1444 | 1441 | ||
| 1445 | # define RESET_FAIL_STACK() ((void)0) | 1442 | # define RETALLOC_IF(addr, n, t) \ |
| 1443 | if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t) | ||
| 1446 | #endif | 1444 | #endif |
| 1447 | 1445 | ||
| 1448 | 1446 | ||
| @@ -1495,17 +1493,10 @@ typedef struct | |||
| 1495 | #define PUSH_FAILURE_INT(item) \ | 1493 | #define PUSH_FAILURE_INT(item) \ |
| 1496 | fail_stack.stack[fail_stack.avail++].integer = (item) | 1494 | fail_stack.stack[fail_stack.avail++].integer = (item) |
| 1497 | 1495 | ||
| 1498 | /* Push a fail_stack_elt_t value onto the failure stack. | 1496 | /* These POP... operations complement the PUSH... operations. |
| 1499 | Assumes the variable `fail_stack'. Probably should only | ||
| 1500 | be called from within `PUSH_FAILURE_POINT'. */ | ||
| 1501 | #define PUSH_FAILURE_ELT(item) \ | ||
| 1502 | fail_stack.stack[fail_stack.avail++] = (item) | ||
| 1503 | |||
| 1504 | /* These three POP... operations complement the three PUSH... operations. | ||
| 1505 | All assume that `fail_stack' is nonempty. */ | 1497 | All assume that `fail_stack' is nonempty. */ |
| 1506 | #define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer | 1498 | #define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer |
| 1507 | #define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer | 1499 | #define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer |
| 1508 | #define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail] | ||
| 1509 | 1500 | ||
| 1510 | /* Individual items aside from the registers. */ | 1501 | /* Individual items aside from the registers. */ |
| 1511 | #define NUM_NONREG_ITEMS 3 | 1502 | #define NUM_NONREG_ITEMS 3 |
| @@ -1555,22 +1546,22 @@ do { \ | |||
| 1555 | /* Pop a saved register off the stack. */ | 1546 | /* Pop a saved register off the stack. */ |
| 1556 | #define POP_FAILURE_REG_OR_COUNT() \ | 1547 | #define POP_FAILURE_REG_OR_COUNT() \ |
| 1557 | do { \ | 1548 | do { \ |
| 1558 | int reg = POP_FAILURE_INT (); \ | 1549 | int pfreg = POP_FAILURE_INT (); \ |
| 1559 | if (reg == -1) \ | 1550 | if (pfreg == -1) \ |
| 1560 | { \ | 1551 | { \ |
| 1561 | /* It's a counter. */ \ | 1552 | /* It's a counter. */ \ |
| 1562 | /* Here, we discard `const', making re_match non-reentrant. */ \ | 1553 | /* Here, we discard `const', making re_match non-reentrant. */ \ |
| 1563 | unsigned char *ptr = (unsigned char*) POP_FAILURE_POINTER (); \ | 1554 | unsigned char *ptr = (unsigned char*) POP_FAILURE_POINTER (); \ |
| 1564 | reg = POP_FAILURE_INT (); \ | 1555 | pfreg = POP_FAILURE_INT (); \ |
| 1565 | STORE_NUMBER (ptr, reg); \ | 1556 | STORE_NUMBER (ptr, pfreg); \ |
| 1566 | DEBUG_PRINT3 (" Pop counter %p = %d\n", ptr, reg); \ | 1557 | DEBUG_PRINT3 (" Pop counter %p = %d\n", ptr, pfreg); \ |
| 1567 | } \ | 1558 | } \ |
| 1568 | else \ | 1559 | else \ |
| 1569 | { \ | 1560 | { \ |
| 1570 | regend[reg] = POP_FAILURE_POINTER (); \ | 1561 | regend[pfreg] = POP_FAILURE_POINTER (); \ |
| 1571 | regstart[reg] = POP_FAILURE_POINTER (); \ | 1562 | regstart[pfreg] = POP_FAILURE_POINTER (); \ |
| 1572 | DEBUG_PRINT4 (" Pop reg %d (spanning %p -> %p)\n", \ | 1563 | DEBUG_PRINT4 (" Pop reg %d (spanning %p -> %p)\n", \ |
| 1573 | reg, regstart[reg], regend[reg]); \ | 1564 | pfreg, regstart[pfreg], regend[pfreg]); \ |
| 1574 | } \ | 1565 | } \ |
| 1575 | } while (0) | 1566 | } while (0) |
| 1576 | 1567 | ||
| @@ -1765,16 +1756,6 @@ static int analyse_first _RE_ARGS ((re_char *p, re_char *pend, | |||
| 1765 | } while (0) | 1756 | } while (0) |
| 1766 | 1757 | ||
| 1767 | 1758 | ||
| 1768 | /* As with BUF_PUSH_2, except for three bytes. */ | ||
| 1769 | #define BUF_PUSH_3(c1, c2, c3) \ | ||
| 1770 | do { \ | ||
| 1771 | GET_BUFFER_SPACE (3); \ | ||
| 1772 | *b++ = (unsigned char) (c1); \ | ||
| 1773 | *b++ = (unsigned char) (c2); \ | ||
| 1774 | *b++ = (unsigned char) (c3); \ | ||
| 1775 | } while (0) | ||
| 1776 | |||
| 1777 | |||
| 1778 | /* Store a jump with opcode OP at LOC to location TO. We store a | 1759 | /* Store a jump with opcode OP at LOC to location TO. We store a |
| 1779 | relative address offset by the three bytes the jump itself occupies. */ | 1760 | relative address offset by the three bytes the jump itself occupies. */ |
| 1780 | #define STORE_JUMP(op, loc, to) \ | 1761 | #define STORE_JUMP(op, loc, to) \ |
| @@ -2524,9 +2505,6 @@ regex_compile (const re_char *pattern, size_t size, reg_syntax_t syntax, struct | |||
| 2524 | /* We fetch characters from PATTERN here. */ | 2505 | /* We fetch characters from PATTERN here. */ |
| 2525 | register re_wchar_t c, c1; | 2506 | register re_wchar_t c, c1; |
| 2526 | 2507 | ||
| 2527 | /* A random temporary spot in PATTERN. */ | ||
| 2528 | re_char *p1; | ||
| 2529 | |||
| 2530 | /* Points to the end of the buffer, where we should append. */ | 2508 | /* Points to the end of the buffer, where we should append. */ |
| 2531 | register unsigned char *b; | 2509 | register unsigned char *b; |
| 2532 | 2510 | ||
| @@ -2574,17 +2552,14 @@ regex_compile (const re_char *pattern, size_t size, reg_syntax_t syntax, struct | |||
| 2574 | /* If the object matched can contain multibyte characters. */ | 2552 | /* If the object matched can contain multibyte characters. */ |
| 2575 | const boolean multibyte = RE_MULTIBYTE_P (bufp); | 2553 | const boolean multibyte = RE_MULTIBYTE_P (bufp); |
| 2576 | 2554 | ||
| 2577 | /* If a target of matching can contain multibyte characters. */ | ||
| 2578 | const boolean target_multibyte = RE_TARGET_MULTIBYTE_P (bufp); | ||
| 2579 | |||
| 2580 | /* Nonzero if we have pushed down into a subpattern. */ | 2555 | /* Nonzero if we have pushed down into a subpattern. */ |
| 2581 | int in_subpattern = 0; | 2556 | int in_subpattern = 0; |
| 2582 | 2557 | ||
| 2583 | /* These hold the values of p, pattern, and pend from the main | 2558 | /* These hold the values of p, pattern, and pend from the main |
| 2584 | pattern when we have pushed into a subpattern. */ | 2559 | pattern when we have pushed into a subpattern. */ |
| 2585 | re_char *main_p; | 2560 | re_char *main_p IF_LINT (= NULL); |
| 2586 | re_char *main_pattern; | 2561 | re_char *main_pattern IF_LINT (= NULL); |
| 2587 | re_char *main_pend; | 2562 | re_char *main_pend IF_LINT (= NULL); |
| 2588 | 2563 | ||
| 2589 | #ifdef DEBUG | 2564 | #ifdef DEBUG |
| 2590 | debug++; | 2565 | debug++; |
| @@ -2894,6 +2869,8 @@ regex_compile (const re_char *pattern, size_t size, reg_syntax_t syntax, struct | |||
| 2894 | 2869 | ||
| 2895 | case '[': | 2870 | case '[': |
| 2896 | { | 2871 | { |
| 2872 | re_char *p1; | ||
| 2873 | |||
| 2897 | CLEAR_RANGE_TABLE_WORK_USED (range_table_work); | 2874 | CLEAR_RANGE_TABLE_WORK_USED (range_table_work); |
| 2898 | 2875 | ||
| 2899 | if (p == pend) FREE_STACK_RETURN (REG_EBRACK); | 2876 | if (p == pend) FREE_STACK_RETURN (REG_EBRACK); |
| @@ -2929,7 +2906,7 @@ regex_compile (const re_char *pattern, size_t size, reg_syntax_t syntax, struct | |||
| 2929 | { | 2906 | { |
| 2930 | boolean escaped_char = false; | 2907 | boolean escaped_char = false; |
| 2931 | const unsigned char *p2 = p; | 2908 | const unsigned char *p2 = p; |
| 2932 | re_wchar_t ch, c2; | 2909 | re_wchar_t ch; |
| 2933 | 2910 | ||
| 2934 | if (p == pend) FREE_STACK_RETURN (REG_EBRACK); | 2911 | if (p == pend) FREE_STACK_RETURN (REG_EBRACK); |
| 2935 | 2912 | ||
| @@ -2992,10 +2969,7 @@ regex_compile (const re_char *pattern, size_t size, reg_syntax_t syntax, struct | |||
| 2992 | them). */ | 2969 | them). */ |
| 2993 | if (c == ':' && *p == ']') | 2970 | if (c == ':' && *p == ']') |
| 2994 | { | 2971 | { |
| 2995 | re_wctype_t cc; | 2972 | re_wctype_t cc = re_wctype (str); |
| 2996 | int limit; | ||
| 2997 | |||
| 2998 | cc = re_wctype (str); | ||
| 2999 | 2973 | ||
| 3000 | if (cc == 0) | 2974 | if (cc == 0) |
| 3001 | FREE_STACK_RETURN (REG_ECTYPE); | 2975 | FREE_STACK_RETURN (REG_ECTYPE); |
| @@ -4329,10 +4303,6 @@ WEAK_ALIAS (__re_search, re_search) | |||
| 4329 | #define HEAD_ADDR_VSTRING(P) \ | 4303 | #define HEAD_ADDR_VSTRING(P) \ |
| 4330 | (((P) >= size1 ? string2 : string1)) | 4304 | (((P) >= size1 ? string2 : string1)) |
| 4331 | 4305 | ||
| 4332 | /* End address of virtual concatenation of string. */ | ||
| 4333 | #define STOP_ADDR_VSTRING(P) \ | ||
| 4334 | (((P) >= size1 ? string2 + size2 : string1 + size1)) | ||
| 4335 | |||
| 4336 | /* Address of POS in the concatenation of virtual string. */ | 4306 | /* Address of POS in the concatenation of virtual string. */ |
| 4337 | #define POS_ADDR_VSTRING(POS) \ | 4307 | #define POS_ADDR_VSTRING(POS) \ |
| 4338 | (((POS) >= size1 ? string2 - size1 : string1) + (POS)) | 4308 | (((POS) >= size1 ? string2 - size1 : string1) + (POS)) |
| @@ -4559,7 +4529,6 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, int size1, const | |||
| 4559 | if (multibyte) | 4529 | if (multibyte) |
| 4560 | { | 4530 | { |
| 4561 | re_char *p = POS_ADDR_VSTRING (startpos); | 4531 | re_char *p = POS_ADDR_VSTRING (startpos); |
| 4562 | re_char *pend = STOP_ADDR_VSTRING (startpos); | ||
| 4563 | int len = BYTES_BY_CHAR_HEAD (*p); | 4532 | int len = BYTES_BY_CHAR_HEAD (*p); |
| 4564 | 4533 | ||
| 4565 | range -= len; | 4534 | range -= len; |
| @@ -4644,16 +4613,6 @@ static int bcmp_translate _RE_ARGS((re_char *s1, re_char *s2, | |||
| 4644 | #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) | 4613 | #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) |
| 4645 | #define AT_STRINGS_END(d) ((d) == end2) | 4614 | #define AT_STRINGS_END(d) ((d) == end2) |
| 4646 | 4615 | ||
| 4647 | |||
| 4648 | /* Test if D points to a character which is word-constituent. We have | ||
| 4649 | two special cases to check for: if past the end of string1, look at | ||
| 4650 | the first character in string2; and if before the beginning of | ||
| 4651 | string2, look at the last character in string1. */ | ||
| 4652 | #define WORDCHAR_P(d) \ | ||
| 4653 | (SYNTAX ((d) == end1 ? *string2 \ | ||
| 4654 | : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \ | ||
| 4655 | == Sword) | ||
| 4656 | |||
| 4657 | /* Disabled due to a compiler bug -- see comment at case wordbound */ | 4616 | /* Disabled due to a compiler bug -- see comment at case wordbound */ |
| 4658 | 4617 | ||
| 4659 | /* The comment at case wordbound is following one, but we don't use | 4618 | /* The comment at case wordbound is following one, but we don't use |
| @@ -4665,6 +4624,15 @@ static int bcmp_translate _RE_ARGS((re_char *s1, re_char *s2, | |||
| 4665 | macro and introducing temporary variables works around the bug. */ | 4624 | macro and introducing temporary variables works around the bug. */ |
| 4666 | 4625 | ||
| 4667 | #if 0 | 4626 | #if 0 |
| 4627 | /* Test if D points to a character which is word-constituent. We have | ||
| 4628 | two special cases to check for: if past the end of string1, look at | ||
| 4629 | the first character in string2; and if before the beginning of | ||
| 4630 | string2, look at the last character in string1. */ | ||
| 4631 | #define WORDCHAR_P(d) \ | ||
| 4632 | (SYNTAX ((d) == end1 ? *string2 \ | ||
| 4633 | : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \ | ||
| 4634 | == Sword) | ||
| 4635 | |||
| 4668 | /* Test if the character before D and the one at D differ with respect | 4636 | /* Test if the character before D and the one at D differ with respect |
| 4669 | to being word-constituent. */ | 4637 | to being word-constituent. */ |
| 4670 | #define AT_WORD_BOUNDARY(d) \ | 4638 | #define AT_WORD_BOUNDARY(d) \ |
| @@ -4674,7 +4642,14 @@ static int bcmp_translate _RE_ARGS((re_char *s1, re_char *s2, | |||
| 4674 | 4642 | ||
| 4675 | /* Free everything we malloc. */ | 4643 | /* Free everything we malloc. */ |
| 4676 | #ifdef MATCH_MAY_ALLOCATE | 4644 | #ifdef MATCH_MAY_ALLOCATE |
| 4677 | # define FREE_VAR(var) if (var) { REGEX_FREE (var); var = NULL; } else | 4645 | # define FREE_VAR(var) \ |
| 4646 | do { \ | ||
| 4647 | if (var) \ | ||
| 4648 | { \ | ||
| 4649 | REGEX_FREE (var); \ | ||
| 4650 | var = NULL; \ | ||
| 4651 | } \ | ||
| 4652 | } while (0) | ||
| 4678 | # define FREE_VARIABLES() \ | 4653 | # define FREE_VARIABLES() \ |
| 4679 | do { \ | 4654 | do { \ |
| 4680 | REGEX_FREE_STACK (fail_stack.stack); \ | 4655 | REGEX_FREE_STACK (fail_stack.stack); \ |
| @@ -5024,7 +4999,6 @@ re_match_2_internal (struct re_pattern_buffer *bufp, const re_char *string1, int | |||
| 5024 | /* General temporaries. */ | 4999 | /* General temporaries. */ |
| 5025 | int mcnt; | 5000 | int mcnt; |
| 5026 | size_t reg; | 5001 | size_t reg; |
| 5027 | boolean not; | ||
| 5028 | 5002 | ||
| 5029 | /* Just past the end of the corresponding string. */ | 5003 | /* Just past the end of the corresponding string. */ |
| 5030 | re_char *end1, *end2; | 5004 | re_char *end1, *end2; |
| @@ -5464,7 +5438,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, const re_char *string1, int | |||
| 5464 | else | 5438 | else |
| 5465 | do | 5439 | do |
| 5466 | { | 5440 | { |
| 5467 | int pat_charlen, buf_charlen; | 5441 | int pat_charlen; |
| 5468 | int pat_ch, buf_ch; | 5442 | int pat_ch, buf_ch; |
| 5469 | 5443 | ||
| 5470 | PREFETCH (); | 5444 | PREFETCH (); |
| @@ -5535,7 +5509,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, const re_char *string1, int | |||
| 5535 | 5509 | ||
| 5536 | /* Start of actual range_table, or end of bitmap if there is no | 5510 | /* Start of actual range_table, or end of bitmap if there is no |
| 5537 | range table. */ | 5511 | range table. */ |
| 5538 | re_char *range_table; | 5512 | re_char *range_table IF_LINT (= NULL); |
| 5539 | 5513 | ||
| 5540 | /* Nonzero if there is a range table. */ | 5514 | /* Nonzero if there is a range table. */ |
| 5541 | int range_table_exists; | 5515 | int range_table_exists; |
| @@ -5622,8 +5596,8 @@ re_match_2_internal (struct re_pattern_buffer *bufp, const re_char *string1, int | |||
| 5622 | if (!not) goto fail; | 5596 | if (!not) goto fail; |
| 5623 | 5597 | ||
| 5624 | d += len; | 5598 | d += len; |
| 5625 | break; | ||
| 5626 | } | 5599 | } |
| 5600 | break; | ||
| 5627 | 5601 | ||
| 5628 | 5602 | ||
| 5629 | /* The beginning of a group is represented by start_memory. | 5603 | /* The beginning of a group is represented by start_memory. |
| @@ -6005,46 +5979,48 @@ re_match_2_internal (struct re_pattern_buffer *bufp, const re_char *string1, int | |||
| 6005 | 5979 | ||
| 6006 | case wordbound: | 5980 | case wordbound: |
| 6007 | case notwordbound: | 5981 | case notwordbound: |
| 6008 | not = (re_opcode_t) *(p - 1) == notwordbound; | 5982 | { |
| 6009 | DEBUG_PRINT2 ("EXECUTING %swordbound.\n", not?"not":""); | 5983 | boolean not = (re_opcode_t) *(p - 1) == notwordbound; |
| 5984 | DEBUG_PRINT2 ("EXECUTING %swordbound.\n", not?"not":""); | ||
| 6010 | 5985 | ||
| 6011 | /* We SUCCEED (or FAIL) in one of the following cases: */ | 5986 | /* We SUCCEED (or FAIL) in one of the following cases: */ |
| 6012 | 5987 | ||
| 6013 | /* Case 1: D is at the beginning or the end of string. */ | 5988 | /* Case 1: D is at the beginning or the end of string. */ |
| 6014 | if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)) | 5989 | if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)) |
| 6015 | not = !not; | 5990 | not = !not; |
| 6016 | else | 5991 | else |
| 6017 | { | 5992 | { |
| 6018 | /* C1 is the character before D, S1 is the syntax of C1, C2 | 5993 | /* C1 is the character before D, S1 is the syntax of C1, C2 |
| 6019 | is the character at D, and S2 is the syntax of C2. */ | 5994 | is the character at D, and S2 is the syntax of C2. */ |
| 6020 | re_wchar_t c1, c2; | 5995 | re_wchar_t c1, c2; |
| 6021 | int s1, s2; | 5996 | int s1, s2; |
| 6022 | int dummy; | 5997 | int dummy; |
| 6023 | #ifdef emacs | 5998 | #ifdef emacs |
| 6024 | int offset = PTR_TO_OFFSET (d - 1); | 5999 | int offset = PTR_TO_OFFSET (d - 1); |
| 6025 | int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); | 6000 | int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); |
| 6026 | UPDATE_SYNTAX_TABLE (charpos); | 6001 | UPDATE_SYNTAX_TABLE (charpos); |
| 6027 | #endif | 6002 | #endif |
| 6028 | GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); | 6003 | GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); |
| 6029 | s1 = SYNTAX (c1); | 6004 | s1 = SYNTAX (c1); |
| 6030 | #ifdef emacs | 6005 | #ifdef emacs |
| 6031 | UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1); | 6006 | UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1); |
| 6032 | #endif | 6007 | #endif |
| 6033 | PREFETCH_NOLIMIT (); | 6008 | PREFETCH_NOLIMIT (); |
| 6034 | GET_CHAR_AFTER (c2, d, dummy); | 6009 | GET_CHAR_AFTER (c2, d, dummy); |
| 6035 | s2 = SYNTAX (c2); | 6010 | s2 = SYNTAX (c2); |
| 6036 | 6011 | ||
| 6037 | if (/* Case 2: Only one of S1 and S2 is Sword. */ | 6012 | if (/* Case 2: Only one of S1 and S2 is Sword. */ |
| 6038 | ((s1 == Sword) != (s2 == Sword)) | 6013 | ((s1 == Sword) != (s2 == Sword)) |
| 6039 | /* Case 3: Both of S1 and S2 are Sword, and macro | 6014 | /* Case 3: Both of S1 and S2 are Sword, and macro |
| 6040 | WORD_BOUNDARY_P (C1, C2) returns nonzero. */ | 6015 | WORD_BOUNDARY_P (C1, C2) returns nonzero. */ |
| 6041 | || ((s1 == Sword) && WORD_BOUNDARY_P (c1, c2))) | 6016 | || ((s1 == Sword) && WORD_BOUNDARY_P (c1, c2))) |
| 6042 | not = !not; | 6017 | not = !not; |
| 6043 | } | 6018 | } |
| 6044 | if (not) | 6019 | if (not) |
| 6045 | break; | 6020 | break; |
| 6046 | else | 6021 | else |
| 6047 | goto fail; | 6022 | goto fail; |
| 6023 | } | ||
| 6048 | 6024 | ||
| 6049 | case wordbeg: | 6025 | case wordbeg: |
| 6050 | DEBUG_PRINT1 ("EXECUTING wordbeg.\n"); | 6026 | DEBUG_PRINT1 ("EXECUTING wordbeg.\n"); |
| @@ -6224,25 +6200,27 @@ re_match_2_internal (struct re_pattern_buffer *bufp, const re_char *string1, int | |||
| 6224 | 6200 | ||
| 6225 | case syntaxspec: | 6201 | case syntaxspec: |
| 6226 | case notsyntaxspec: | 6202 | case notsyntaxspec: |
| 6227 | not = (re_opcode_t) *(p - 1) == notsyntaxspec; | ||
| 6228 | mcnt = *p++; | ||
| 6229 | DEBUG_PRINT3 ("EXECUTING %ssyntaxspec %d.\n", not?"not":"", mcnt); | ||
| 6230 | PREFETCH (); | ||
| 6231 | #ifdef emacs | ||
| 6232 | { | 6203 | { |
| 6233 | int offset = PTR_TO_OFFSET (d); | 6204 | boolean not = (re_opcode_t) *(p - 1) == notsyntaxspec; |
| 6234 | int pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (offset); | 6205 | mcnt = *p++; |
| 6235 | UPDATE_SYNTAX_TABLE (pos1); | 6206 | DEBUG_PRINT3 ("EXECUTING %ssyntaxspec %d.\n", not?"not":"", mcnt); |
| 6236 | } | 6207 | PREFETCH (); |
| 6208 | #ifdef emacs | ||
| 6209 | { | ||
| 6210 | int offset = PTR_TO_OFFSET (d); | ||
| 6211 | int pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (offset); | ||
| 6212 | UPDATE_SYNTAX_TABLE (pos1); | ||
| 6213 | } | ||
| 6237 | #endif | 6214 | #endif |
| 6238 | { | 6215 | { |
| 6239 | int len; | 6216 | int len; |
| 6240 | re_wchar_t c; | 6217 | re_wchar_t c; |
| 6241 | 6218 | ||
| 6242 | GET_CHAR_AFTER (c, d, len); | 6219 | GET_CHAR_AFTER (c, d, len); |
| 6243 | if ((SYNTAX (c) != (enum syntaxcode) mcnt) ^ not) | 6220 | if ((SYNTAX (c) != (enum syntaxcode) mcnt) ^ not) |
| 6244 | goto fail; | 6221 | goto fail; |
| 6245 | d += len; | 6222 | d += len; |
| 6223 | } | ||
| 6246 | } | 6224 | } |
| 6247 | break; | 6225 | break; |
| 6248 | 6226 | ||
| @@ -6267,18 +6245,21 @@ re_match_2_internal (struct re_pattern_buffer *bufp, const re_char *string1, int | |||
| 6267 | 6245 | ||
| 6268 | case categoryspec: | 6246 | case categoryspec: |
| 6269 | case notcategoryspec: | 6247 | case notcategoryspec: |
| 6270 | not = (re_opcode_t) *(p - 1) == notcategoryspec; | ||
| 6271 | mcnt = *p++; | ||
| 6272 | DEBUG_PRINT3 ("EXECUTING %scategoryspec %d.\n", not?"not":"", mcnt); | ||
| 6273 | PREFETCH (); | ||
| 6274 | { | 6248 | { |
| 6275 | int len; | 6249 | boolean not = (re_opcode_t) *(p - 1) == notcategoryspec; |
| 6276 | re_wchar_t c; | 6250 | mcnt = *p++; |
| 6251 | DEBUG_PRINT3 ("EXECUTING %scategoryspec %d.\n", | ||
| 6252 | not?"not":"", mcnt); | ||
| 6253 | PREFETCH (); | ||
| 6277 | 6254 | ||
| 6278 | GET_CHAR_AFTER (c, d, len); | 6255 | { |
| 6279 | if ((!CHAR_HAS_CATEGORY (c, mcnt)) ^ not) | 6256 | int len; |
| 6280 | goto fail; | 6257 | re_wchar_t c; |
| 6281 | d += len; | 6258 | GET_CHAR_AFTER (c, d, len); |
| 6259 | if ((!CHAR_HAS_CATEGORY (c, mcnt)) ^ not) | ||
| 6260 | goto fail; | ||
| 6261 | d += len; | ||
| 6262 | } | ||
| 6282 | } | 6263 | } |
| 6283 | break; | 6264 | break; |
| 6284 | 6265 | ||