aboutsummaryrefslogtreecommitdiffstats
path: root/src/regex.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/regex.c')
-rw-r--r--src/regex.c255
1 files changed, 118 insertions, 137 deletions
diff --git a/src/regex.c b/src/regex.c
index 9284be95ffb..a60ff0ce35c 100644
--- a/src/regex.c
+++ b/src/regex.c
@@ -340,7 +340,7 @@ enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 };
340 || ((c) >= 'A' && (c) <= 'Z')) \ 340 || ((c) >= 'A' && (c) <= 'Z')) \
341 : SYNTAX (c) == Sword) 341 : SYNTAX (c) == Sword)
342 342
343# define ISLOWER(c) (LOWERCASEP (c)) 343# define ISLOWER(c) lowercasep (c)
344 344
345# define ISPUNCT(c) (IS_REAL_ASCII (c) \ 345# define ISPUNCT(c) (IS_REAL_ASCII (c) \
346 ? ((c) > ' ' && (c) < 0177 \ 346 ? ((c) > ' ' && (c) < 0177 \
@@ -351,7 +351,7 @@ enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 };
351 351
352# define ISSPACE(c) (SYNTAX (c) == Swhitespace) 352# define ISSPACE(c) (SYNTAX (c) == Swhitespace)
353 353
354# define ISUPPER(c) (UPPERCASEP (c)) 354# define ISUPPER(c) uppercasep (c)
355 355
356# define ISWORD(c) (SYNTAX (c) == Sword) 356# define ISWORD(c) (SYNTAX (c) == Sword)
357 357
@@ -551,8 +551,6 @@ init_syntax_once (void)
551/* (Re)Allocate N items of type T using malloc, or fail. */ 551/* (Re)Allocate N items of type T using malloc, or fail. */
552#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t))) 552#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
553#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t))) 553#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
554#define RETALLOC_IF(addr, n, t) \
555 if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
556#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t))) 554#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
557 555
558#define BYTEWIDTH 8 /* In bits. */ 556#define BYTEWIDTH 8 /* In bits. */
@@ -843,11 +841,6 @@ extract_number_and_incr (destination, source)
843 ((p)[2 + CHARSET_BITMAP_SIZE (p)] \ 841 ((p)[2 + CHARSET_BITMAP_SIZE (p)] \
844 + (p)[3 + CHARSET_BITMAP_SIZE (p)] * 0x100) 842 + (p)[3 + CHARSET_BITMAP_SIZE (p)] * 0x100)
845 843
846/* Test if C is listed in the bitmap of charset P. */
847#define CHARSET_LOOKUP_BITMAP(p, c) \
848 ((c) < CHARSET_BITMAP_SIZE (p) * BYTEWIDTH \
849 && (p)[2 + (c) / BYTEWIDTH] & (1 << ((c) % BYTEWIDTH)))
850
851/* Return the address of end of RANGE_TABLE. COUNT is number of 844/* Return the address of end of RANGE_TABLE. COUNT is number of
852 ranges (which is a pair of (start, end)) in the RANGE_TABLE. `* 2' 845 ranges (which is a pair of (start, end)) in the RANGE_TABLE. `* 2'
853 is start of range and end of range. `* 3' is size of each start 846 is start of range and end of range. `* 3' is size of each start
@@ -861,14 +854,14 @@ extract_number_and_incr (destination, source)
861 do \ 854 do \
862 { \ 855 { \
863 re_wchar_t range_start, range_end; \ 856 re_wchar_t range_start, range_end; \
864 re_char *p; \ 857 re_char *rtp; \
865 re_char *range_table_end \ 858 re_char *range_table_end \
866 = CHARSET_RANGE_TABLE_END ((range_table), (count)); \ 859 = CHARSET_RANGE_TABLE_END ((range_table), (count)); \
867 \ 860 \
868 for (p = (range_table); p < range_table_end; p += 2 * 3) \ 861 for (rtp = (range_table); rtp < range_table_end; rtp += 2 * 3) \
869 { \ 862 { \
870 EXTRACT_CHARACTER (range_start, p); \ 863 EXTRACT_CHARACTER (range_start, rtp); \
871 EXTRACT_CHARACTER (range_end, p + 3); \ 864 EXTRACT_CHARACTER (range_end, rtp + 3); \
872 \ 865 \
873 if (range_start <= (c) && (c) <= range_end) \ 866 if (range_start <= (c) && (c) <= range_end) \
874 { \ 867 { \
@@ -1274,6 +1267,13 @@ print_double_string (where, string1, size1, string2, size2)
1274 1267
1275#endif /* not DEBUG */ 1268#endif /* not DEBUG */
1276 1269
1270/* Use this to suppress gcc's `...may be used before initialized' warnings. */
1271#ifdef lint
1272# define IF_LINT(Code) Code
1273#else
1274# define IF_LINT(Code) /* empty */
1275#endif
1276
1277/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can 1277/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
1278 also be assigned to arbitrarily: each pattern buffer stores its own 1278 also be assigned to arbitrarily: each pattern buffer stores its own
1279 syntax, so it can be changed between regex compilations. */ 1279 syntax, so it can be changed between regex compilations. */
@@ -1413,7 +1413,6 @@ typedef struct
1413} fail_stack_type; 1413} fail_stack_type;
1414 1414
1415#define FAIL_STACK_EMPTY() (fail_stack.frame == 0) 1415#define FAIL_STACK_EMPTY() (fail_stack.frame == 0)
1416#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
1417 1416
1418 1417
1419/* Define macros to initialize and free the failure stack. 1418/* Define macros to initialize and free the failure stack.
@@ -1433,8 +1432,6 @@ typedef struct
1433 fail_stack.avail = 0; \ 1432 fail_stack.avail = 0; \
1434 fail_stack.frame = 0; \ 1433 fail_stack.frame = 0; \
1435 } while (0) 1434 } while (0)
1436
1437# define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack)
1438#else 1435#else
1439# define INIT_FAIL_STACK() \ 1436# define INIT_FAIL_STACK() \
1440 do { \ 1437 do { \
@@ -1442,7 +1439,8 @@ typedef struct
1442 fail_stack.frame = 0; \ 1439 fail_stack.frame = 0; \
1443 } while (0) 1440 } while (0)
1444 1441
1445# define RESET_FAIL_STACK() ((void)0) 1442# define RETALLOC_IF(addr, n, t) \
1443 if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
1446#endif 1444#endif
1447 1445
1448 1446
@@ -1495,17 +1493,10 @@ typedef struct
1495#define PUSH_FAILURE_INT(item) \ 1493#define PUSH_FAILURE_INT(item) \
1496 fail_stack.stack[fail_stack.avail++].integer = (item) 1494 fail_stack.stack[fail_stack.avail++].integer = (item)
1497 1495
1498/* Push a fail_stack_elt_t value onto the failure stack. 1496/* These POP... operations complement the PUSH... operations.
1499 Assumes the variable `fail_stack'. Probably should only
1500 be called from within `PUSH_FAILURE_POINT'. */
1501#define PUSH_FAILURE_ELT(item) \
1502 fail_stack.stack[fail_stack.avail++] = (item)
1503
1504/* These three POP... operations complement the three PUSH... operations.
1505 All assume that `fail_stack' is nonempty. */ 1497 All assume that `fail_stack' is nonempty. */
1506#define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer 1498#define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
1507#define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer 1499#define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
1508#define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
1509 1500
1510/* Individual items aside from the registers. */ 1501/* Individual items aside from the registers. */
1511#define NUM_NONREG_ITEMS 3 1502#define NUM_NONREG_ITEMS 3
@@ -1555,22 +1546,22 @@ do { \
1555/* Pop a saved register off the stack. */ 1546/* Pop a saved register off the stack. */
1556#define POP_FAILURE_REG_OR_COUNT() \ 1547#define POP_FAILURE_REG_OR_COUNT() \
1557do { \ 1548do { \
1558 int reg = POP_FAILURE_INT (); \ 1549 int pfreg = POP_FAILURE_INT (); \
1559 if (reg == -1) \ 1550 if (pfreg == -1) \
1560 { \ 1551 { \
1561 /* It's a counter. */ \ 1552 /* It's a counter. */ \
1562 /* Here, we discard `const', making re_match non-reentrant. */ \ 1553 /* Here, we discard `const', making re_match non-reentrant. */ \
1563 unsigned char *ptr = (unsigned char*) POP_FAILURE_POINTER (); \ 1554 unsigned char *ptr = (unsigned char*) POP_FAILURE_POINTER (); \
1564 reg = POP_FAILURE_INT (); \ 1555 pfreg = POP_FAILURE_INT (); \
1565 STORE_NUMBER (ptr, reg); \ 1556 STORE_NUMBER (ptr, pfreg); \
1566 DEBUG_PRINT3 (" Pop counter %p = %d\n", ptr, reg); \ 1557 DEBUG_PRINT3 (" Pop counter %p = %d\n", ptr, pfreg); \
1567 } \ 1558 } \
1568 else \ 1559 else \
1569 { \ 1560 { \
1570 regend[reg] = POP_FAILURE_POINTER (); \ 1561 regend[pfreg] = POP_FAILURE_POINTER (); \
1571 regstart[reg] = POP_FAILURE_POINTER (); \ 1562 regstart[pfreg] = POP_FAILURE_POINTER (); \
1572 DEBUG_PRINT4 (" Pop reg %d (spanning %p -> %p)\n", \ 1563 DEBUG_PRINT4 (" Pop reg %d (spanning %p -> %p)\n", \
1573 reg, regstart[reg], regend[reg]); \ 1564 pfreg, regstart[pfreg], regend[pfreg]); \
1574 } \ 1565 } \
1575} while (0) 1566} while (0)
1576 1567
@@ -1765,16 +1756,6 @@ static int analyse_first _RE_ARGS ((re_char *p, re_char *pend,
1765 } while (0) 1756 } while (0)
1766 1757
1767 1758
1768/* As with BUF_PUSH_2, except for three bytes. */
1769#define BUF_PUSH_3(c1, c2, c3) \
1770 do { \
1771 GET_BUFFER_SPACE (3); \
1772 *b++ = (unsigned char) (c1); \
1773 *b++ = (unsigned char) (c2); \
1774 *b++ = (unsigned char) (c3); \
1775 } while (0)
1776
1777
1778/* Store a jump with opcode OP at LOC to location TO. We store a 1759/* Store a jump with opcode OP at LOC to location TO. We store a
1779 relative address offset by the three bytes the jump itself occupies. */ 1760 relative address offset by the three bytes the jump itself occupies. */
1780#define STORE_JUMP(op, loc, to) \ 1761#define STORE_JUMP(op, loc, to) \
@@ -2524,9 +2505,6 @@ regex_compile (const re_char *pattern, size_t size, reg_syntax_t syntax, struct
2524 /* We fetch characters from PATTERN here. */ 2505 /* We fetch characters from PATTERN here. */
2525 register re_wchar_t c, c1; 2506 register re_wchar_t c, c1;
2526 2507
2527 /* A random temporary spot in PATTERN. */
2528 re_char *p1;
2529
2530 /* Points to the end of the buffer, where we should append. */ 2508 /* Points to the end of the buffer, where we should append. */
2531 register unsigned char *b; 2509 register unsigned char *b;
2532 2510
@@ -2574,17 +2552,14 @@ regex_compile (const re_char *pattern, size_t size, reg_syntax_t syntax, struct
2574 /* If the object matched can contain multibyte characters. */ 2552 /* If the object matched can contain multibyte characters. */
2575 const boolean multibyte = RE_MULTIBYTE_P (bufp); 2553 const boolean multibyte = RE_MULTIBYTE_P (bufp);
2576 2554
2577 /* If a target of matching can contain multibyte characters. */
2578 const boolean target_multibyte = RE_TARGET_MULTIBYTE_P (bufp);
2579
2580 /* Nonzero if we have pushed down into a subpattern. */ 2555 /* Nonzero if we have pushed down into a subpattern. */
2581 int in_subpattern = 0; 2556 int in_subpattern = 0;
2582 2557
2583 /* These hold the values of p, pattern, and pend from the main 2558 /* These hold the values of p, pattern, and pend from the main
2584 pattern when we have pushed into a subpattern. */ 2559 pattern when we have pushed into a subpattern. */
2585 re_char *main_p; 2560 re_char *main_p IF_LINT (= NULL);
2586 re_char *main_pattern; 2561 re_char *main_pattern IF_LINT (= NULL);
2587 re_char *main_pend; 2562 re_char *main_pend IF_LINT (= NULL);
2588 2563
2589#ifdef DEBUG 2564#ifdef DEBUG
2590 debug++; 2565 debug++;
@@ -2894,6 +2869,8 @@ regex_compile (const re_char *pattern, size_t size, reg_syntax_t syntax, struct
2894 2869
2895 case '[': 2870 case '[':
2896 { 2871 {
2872 re_char *p1;
2873
2897 CLEAR_RANGE_TABLE_WORK_USED (range_table_work); 2874 CLEAR_RANGE_TABLE_WORK_USED (range_table_work);
2898 2875
2899 if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 2876 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
@@ -2929,7 +2906,7 @@ regex_compile (const re_char *pattern, size_t size, reg_syntax_t syntax, struct
2929 { 2906 {
2930 boolean escaped_char = false; 2907 boolean escaped_char = false;
2931 const unsigned char *p2 = p; 2908 const unsigned char *p2 = p;
2932 re_wchar_t ch, c2; 2909 re_wchar_t ch;
2933 2910
2934 if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 2911 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2935 2912
@@ -2992,10 +2969,7 @@ regex_compile (const re_char *pattern, size_t size, reg_syntax_t syntax, struct
2992 them). */ 2969 them). */
2993 if (c == ':' && *p == ']') 2970 if (c == ':' && *p == ']')
2994 { 2971 {
2995 re_wctype_t cc; 2972 re_wctype_t cc = re_wctype (str);
2996 int limit;
2997
2998 cc = re_wctype (str);
2999 2973
3000 if (cc == 0) 2974 if (cc == 0)
3001 FREE_STACK_RETURN (REG_ECTYPE); 2975 FREE_STACK_RETURN (REG_ECTYPE);
@@ -4329,10 +4303,6 @@ WEAK_ALIAS (__re_search, re_search)
4329#define HEAD_ADDR_VSTRING(P) \ 4303#define HEAD_ADDR_VSTRING(P) \
4330 (((P) >= size1 ? string2 : string1)) 4304 (((P) >= size1 ? string2 : string1))
4331 4305
4332/* End address of virtual concatenation of string. */
4333#define STOP_ADDR_VSTRING(P) \
4334 (((P) >= size1 ? string2 + size2 : string1 + size1))
4335
4336/* Address of POS in the concatenation of virtual string. */ 4306/* Address of POS in the concatenation of virtual string. */
4337#define POS_ADDR_VSTRING(POS) \ 4307#define POS_ADDR_VSTRING(POS) \
4338 (((POS) >= size1 ? string2 - size1 : string1) + (POS)) 4308 (((POS) >= size1 ? string2 - size1 : string1) + (POS))
@@ -4559,7 +4529,6 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, int size1, const
4559 if (multibyte) 4529 if (multibyte)
4560 { 4530 {
4561 re_char *p = POS_ADDR_VSTRING (startpos); 4531 re_char *p = POS_ADDR_VSTRING (startpos);
4562 re_char *pend = STOP_ADDR_VSTRING (startpos);
4563 int len = BYTES_BY_CHAR_HEAD (*p); 4532 int len = BYTES_BY_CHAR_HEAD (*p);
4564 4533
4565 range -= len; 4534 range -= len;
@@ -4644,16 +4613,6 @@ static int bcmp_translate _RE_ARGS((re_char *s1, re_char *s2,
4644#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) 4613#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
4645#define AT_STRINGS_END(d) ((d) == end2) 4614#define AT_STRINGS_END(d) ((d) == end2)
4646 4615
4647
4648/* Test if D points to a character which is word-constituent. We have
4649 two special cases to check for: if past the end of string1, look at
4650 the first character in string2; and if before the beginning of
4651 string2, look at the last character in string1. */
4652#define WORDCHAR_P(d) \
4653 (SYNTAX ((d) == end1 ? *string2 \
4654 : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \
4655 == Sword)
4656
4657/* Disabled due to a compiler bug -- see comment at case wordbound */ 4616/* Disabled due to a compiler bug -- see comment at case wordbound */
4658 4617
4659/* The comment at case wordbound is following one, but we don't use 4618/* The comment at case wordbound is following one, but we don't use
@@ -4665,6 +4624,15 @@ static int bcmp_translate _RE_ARGS((re_char *s1, re_char *s2,
4665 macro and introducing temporary variables works around the bug. */ 4624 macro and introducing temporary variables works around the bug. */
4666 4625
4667#if 0 4626#if 0
4627/* Test if D points to a character which is word-constituent. We have
4628 two special cases to check for: if past the end of string1, look at
4629 the first character in string2; and if before the beginning of
4630 string2, look at the last character in string1. */
4631#define WORDCHAR_P(d) \
4632 (SYNTAX ((d) == end1 ? *string2 \
4633 : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \
4634 == Sword)
4635
4668/* Test if the character before D and the one at D differ with respect 4636/* Test if the character before D and the one at D differ with respect
4669 to being word-constituent. */ 4637 to being word-constituent. */
4670#define AT_WORD_BOUNDARY(d) \ 4638#define AT_WORD_BOUNDARY(d) \
@@ -4674,7 +4642,14 @@ static int bcmp_translate _RE_ARGS((re_char *s1, re_char *s2,
4674 4642
4675/* Free everything we malloc. */ 4643/* Free everything we malloc. */
4676#ifdef MATCH_MAY_ALLOCATE 4644#ifdef MATCH_MAY_ALLOCATE
4677# define FREE_VAR(var) if (var) { REGEX_FREE (var); var = NULL; } else 4645# define FREE_VAR(var) \
4646 do { \
4647 if (var) \
4648 { \
4649 REGEX_FREE (var); \
4650 var = NULL; \
4651 } \
4652 } while (0)
4678# define FREE_VARIABLES() \ 4653# define FREE_VARIABLES() \
4679 do { \ 4654 do { \
4680 REGEX_FREE_STACK (fail_stack.stack); \ 4655 REGEX_FREE_STACK (fail_stack.stack); \
@@ -5024,7 +4999,6 @@ re_match_2_internal (struct re_pattern_buffer *bufp, const re_char *string1, int
5024 /* General temporaries. */ 4999 /* General temporaries. */
5025 int mcnt; 5000 int mcnt;
5026 size_t reg; 5001 size_t reg;
5027 boolean not;
5028 5002
5029 /* Just past the end of the corresponding string. */ 5003 /* Just past the end of the corresponding string. */
5030 re_char *end1, *end2; 5004 re_char *end1, *end2;
@@ -5464,7 +5438,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, const re_char *string1, int
5464 else 5438 else
5465 do 5439 do
5466 { 5440 {
5467 int pat_charlen, buf_charlen; 5441 int pat_charlen;
5468 int pat_ch, buf_ch; 5442 int pat_ch, buf_ch;
5469 5443
5470 PREFETCH (); 5444 PREFETCH ();
@@ -5535,7 +5509,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, const re_char *string1, int
5535 5509
5536 /* Start of actual range_table, or end of bitmap if there is no 5510 /* Start of actual range_table, or end of bitmap if there is no
5537 range table. */ 5511 range table. */
5538 re_char *range_table; 5512 re_char *range_table IF_LINT (= NULL);
5539 5513
5540 /* Nonzero if there is a range table. */ 5514 /* Nonzero if there is a range table. */
5541 int range_table_exists; 5515 int range_table_exists;
@@ -5622,8 +5596,8 @@ re_match_2_internal (struct re_pattern_buffer *bufp, const re_char *string1, int
5622 if (!not) goto fail; 5596 if (!not) goto fail;
5623 5597
5624 d += len; 5598 d += len;
5625 break;
5626 } 5599 }
5600 break;
5627 5601
5628 5602
5629 /* The beginning of a group is represented by start_memory. 5603 /* The beginning of a group is represented by start_memory.
@@ -6005,46 +5979,48 @@ re_match_2_internal (struct re_pattern_buffer *bufp, const re_char *string1, int
6005 5979
6006 case wordbound: 5980 case wordbound:
6007 case notwordbound: 5981 case notwordbound:
6008 not = (re_opcode_t) *(p - 1) == notwordbound; 5982 {
6009 DEBUG_PRINT2 ("EXECUTING %swordbound.\n", not?"not":""); 5983 boolean not = (re_opcode_t) *(p - 1) == notwordbound;
5984 DEBUG_PRINT2 ("EXECUTING %swordbound.\n", not?"not":"");
6010 5985
6011 /* We SUCCEED (or FAIL) in one of the following cases: */ 5986 /* We SUCCEED (or FAIL) in one of the following cases: */
6012 5987
6013 /* Case 1: D is at the beginning or the end of string. */ 5988 /* Case 1: D is at the beginning or the end of string. */
6014 if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)) 5989 if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
6015 not = !not; 5990 not = !not;
6016 else 5991 else
6017 { 5992 {
6018 /* C1 is the character before D, S1 is the syntax of C1, C2 5993 /* C1 is the character before D, S1 is the syntax of C1, C2
6019 is the character at D, and S2 is the syntax of C2. */ 5994 is the character at D, and S2 is the syntax of C2. */
6020 re_wchar_t c1, c2; 5995 re_wchar_t c1, c2;
6021 int s1, s2; 5996 int s1, s2;
6022 int dummy; 5997 int dummy;
6023#ifdef emacs 5998#ifdef emacs
6024 int offset = PTR_TO_OFFSET (d - 1); 5999 int offset = PTR_TO_OFFSET (d - 1);
6025 int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); 6000 int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
6026 UPDATE_SYNTAX_TABLE (charpos); 6001 UPDATE_SYNTAX_TABLE (charpos);
6027#endif 6002#endif
6028 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); 6003 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
6029 s1 = SYNTAX (c1); 6004 s1 = SYNTAX (c1);
6030#ifdef emacs 6005#ifdef emacs
6031 UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1); 6006 UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
6032#endif 6007#endif
6033 PREFETCH_NOLIMIT (); 6008 PREFETCH_NOLIMIT ();
6034 GET_CHAR_AFTER (c2, d, dummy); 6009 GET_CHAR_AFTER (c2, d, dummy);
6035 s2 = SYNTAX (c2); 6010 s2 = SYNTAX (c2);
6036 6011
6037 if (/* Case 2: Only one of S1 and S2 is Sword. */ 6012 if (/* Case 2: Only one of S1 and S2 is Sword. */
6038 ((s1 == Sword) != (s2 == Sword)) 6013 ((s1 == Sword) != (s2 == Sword))
6039 /* Case 3: Both of S1 and S2 are Sword, and macro 6014 /* Case 3: Both of S1 and S2 are Sword, and macro
6040 WORD_BOUNDARY_P (C1, C2) returns nonzero. */ 6015 WORD_BOUNDARY_P (C1, C2) returns nonzero. */
6041 || ((s1 == Sword) && WORD_BOUNDARY_P (c1, c2))) 6016 || ((s1 == Sword) && WORD_BOUNDARY_P (c1, c2)))
6042 not = !not; 6017 not = !not;
6043 } 6018 }
6044 if (not) 6019 if (not)
6045 break; 6020 break;
6046 else 6021 else
6047 goto fail; 6022 goto fail;
6023 }
6048 6024
6049 case wordbeg: 6025 case wordbeg:
6050 DEBUG_PRINT1 ("EXECUTING wordbeg.\n"); 6026 DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
@@ -6224,25 +6200,27 @@ re_match_2_internal (struct re_pattern_buffer *bufp, const re_char *string1, int
6224 6200
6225 case syntaxspec: 6201 case syntaxspec:
6226 case notsyntaxspec: 6202 case notsyntaxspec:
6227 not = (re_opcode_t) *(p - 1) == notsyntaxspec;
6228 mcnt = *p++;
6229 DEBUG_PRINT3 ("EXECUTING %ssyntaxspec %d.\n", not?"not":"", mcnt);
6230 PREFETCH ();
6231#ifdef emacs
6232 { 6203 {
6233 int offset = PTR_TO_OFFSET (d); 6204 boolean not = (re_opcode_t) *(p - 1) == notsyntaxspec;
6234 int pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (offset); 6205 mcnt = *p++;
6235 UPDATE_SYNTAX_TABLE (pos1); 6206 DEBUG_PRINT3 ("EXECUTING %ssyntaxspec %d.\n", not?"not":"", mcnt);
6236 } 6207 PREFETCH ();
6208#ifdef emacs
6209 {
6210 int offset = PTR_TO_OFFSET (d);
6211 int pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
6212 UPDATE_SYNTAX_TABLE (pos1);
6213 }
6237#endif 6214#endif
6238 { 6215 {
6239 int len; 6216 int len;
6240 re_wchar_t c; 6217 re_wchar_t c;
6241 6218
6242 GET_CHAR_AFTER (c, d, len); 6219 GET_CHAR_AFTER (c, d, len);
6243 if ((SYNTAX (c) != (enum syntaxcode) mcnt) ^ not) 6220 if ((SYNTAX (c) != (enum syntaxcode) mcnt) ^ not)
6244 goto fail; 6221 goto fail;
6245 d += len; 6222 d += len;
6223 }
6246 } 6224 }
6247 break; 6225 break;
6248 6226
@@ -6267,18 +6245,21 @@ re_match_2_internal (struct re_pattern_buffer *bufp, const re_char *string1, int
6267 6245
6268 case categoryspec: 6246 case categoryspec:
6269 case notcategoryspec: 6247 case notcategoryspec:
6270 not = (re_opcode_t) *(p - 1) == notcategoryspec;
6271 mcnt = *p++;
6272 DEBUG_PRINT3 ("EXECUTING %scategoryspec %d.\n", not?"not":"", mcnt);
6273 PREFETCH ();
6274 { 6248 {
6275 int len; 6249 boolean not = (re_opcode_t) *(p - 1) == notcategoryspec;
6276 re_wchar_t c; 6250 mcnt = *p++;
6251 DEBUG_PRINT3 ("EXECUTING %scategoryspec %d.\n",
6252 not?"not":"", mcnt);
6253 PREFETCH ();
6277 6254
6278 GET_CHAR_AFTER (c, d, len); 6255 {
6279 if ((!CHAR_HAS_CATEGORY (c, mcnt)) ^ not) 6256 int len;
6280 goto fail; 6257 re_wchar_t c;
6281 d += len; 6258 GET_CHAR_AFTER (c, d, len);
6259 if ((!CHAR_HAS_CATEGORY (c, mcnt)) ^ not)
6260 goto fail;
6261 d += len;
6262 }
6282 } 6263 }
6283 break; 6264 break;
6284 6265