aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStefan Monnier2000-03-14 00:27:57 +0000
committerStefan Monnier2000-03-14 00:27:57 +0000
commit66f0296e4897de71e31a1663d4b92d580476e2b2 (patch)
treeb9622b13082f428cb56d652782663157a69c3387
parenta735b7e1cff3f1a5bfa20b4388f5d3da41491a53 (diff)
downloademacs-66f0296e4897de71e31a1663d4b92d580476e2b2.tar.gz
emacs-66f0296e4897de71e31a1663d4b92d580476e2b2.zip
* regex.c: Declare a new type `re_char' used throughout the code for the
string char type. It's `const unsigned char' to match the rest of Emacs. Consistently make sure all pointers to strings use it and make sure all pointers into the pattern use `unsigned char'. (re_match_2_internal): Use `PREFETCH+STRING_CHAR' instead of GET_CHAR_AFTER_2. Also merge wordbound and notwordbound to reduce code duplication. * charset.h (GET_CHAR_AFTER_2): Remove. (GET_CHAR_BEFORE_2): Use unsigned chars, like everywhere else.
-rw-r--r--src/ChangeLog13
-rw-r--r--src/charset.h17
-rw-r--r--src/regex.c198
3 files changed, 100 insertions, 128 deletions
diff --git a/src/ChangeLog b/src/ChangeLog
index 6d0f268fe65..552d4cf2f7c 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,16 @@
12000-03-13 Stefan Monnier <monnier@cs.yale.edu>
2
3 * regex.c: Declare a new type `re_char' used throughout the code for the
4 string char type. It's `const unsigned char' to match the rest of Emacs.
5 Consistently make sure all pointers to strings use it and make sure all
6 pointers into the pattern use `unsigned char'.
7 (re_match_2_internal): Use `PREFETCH+STRING_CHAR' instead of
8 GET_CHAR_AFTER_2.
9 Also merge wordbound and notwordbound to reduce code duplication.
10
11 * charset.h (GET_CHAR_AFTER_2): Remove.
12 (GET_CHAR_BEFORE_2): Use unsigned chars, like everywhere else.
13
12000-03-12 Ken Raeburn <raeburn@gnu.org> 142000-03-12 Ken Raeburn <raeburn@gnu.org>
2 15
3 * Makefile.in (temacs): Evaluate ALL_LDFLAGS into a temporary 16 * Makefile.in (temacs): Evaluate ALL_LDFLAGS into a temporary
diff --git a/src/charset.h b/src/charset.h
index 6b2e25b89dd..3acc447c5f8 100644
--- a/src/charset.h
+++ b/src/charset.h
@@ -577,26 +577,15 @@ else
577 ? 1 \ 577 ? 1 \
578 : multibyte_form_length (str, len)) 578 : multibyte_form_length (str, len))
579 579
580/* Set C a (possibly multibyte) character at P. P points into a
581 string which is the virtual concatenation of STR1 (which ends at
582 END1) or STR2 (which ends at END2). */
583
584#define GET_CHAR_AFTER_2(c, p, str1, end1, str2, end2) \
585 do { \
586 const char *dtemp = (p) == (end1) ? (str2) : (p); \
587 const char *dlimit = ((p) >= (str1) && (p) < (end1)) ? (end1) : (end2); \
588 c = STRING_CHAR (dtemp, dlimit - dtemp); \
589 } while (0)
590
591/* Set C a (possibly multibyte) character before P. P points into a 580/* Set C a (possibly multibyte) character before P. P points into a
592 string which is the virtual concatenation of STR1 (which ends at 581 string which is the virtual concatenation of STR1 (which ends at
593 END1) or STR2 (which ends at END2). */ 582 END1) or STR2 (which ends at END2). */
594 583
595#define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \ 584#define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
596 do { \ 585 do { \
597 const char *dtemp = (p); \ 586 const unsigned char *dtemp = (p); \
598 const char *dlimit = ((p) > (str2) && (p) <= (end2)) ? (str2) : (str1); \ 587 const unsigned char *dlimit = ((p) > (str2) && (p) <= (end2)) ? (str2) : (str1); \
599 while (dtemp-- > dlimit && (unsigned char) *dtemp >= 0xA0); \ 588 while (dtemp-- > dlimit && *dtemp >= 0xA0); \
600 c = STRING_CHAR (dtemp, p - dtemp); \ 589 c = STRING_CHAR (dtemp, p - dtemp); \
601 } while (0) 590 } while (0)
602 591
diff --git a/src/regex.c b/src/regex.c
index 809a7d24219..c118af0fdb6 100644
--- a/src/regex.c
+++ b/src/regex.c
@@ -434,6 +434,9 @@ char *alloca ();
434#define MAX(a, b) ((a) > (b) ? (a) : (b)) 434#define MAX(a, b) ((a) > (b) ? (a) : (b))
435#define MIN(a, b) ((a) < (b) ? (a) : (b)) 435#define MIN(a, b) ((a) < (b) ? (a) : (b))
436 436
437/* Type of source-pattern and string chars. */
438typedef const unsigned char re_char;
439
437typedef char boolean; 440typedef char boolean;
438#define false 0 441#define false 0
439#define true 1 442#define true 1
@@ -1072,9 +1075,9 @@ print_compiled_pattern (bufp)
1072 1075
1073void 1076void
1074print_double_string (where, string1, size1, string2, size2) 1077print_double_string (where, string1, size1, string2, size2)
1075 const char *where; 1078 re_char *where;
1076 const char *string1; 1079 re_char *string1;
1077 const char *string2; 1080 re_char *string2;
1078 int size1; 1081 int size1;
1079 int size2; 1082 int size2;
1080{ 1083{
@@ -1229,7 +1232,7 @@ int re_max_failures = 4000;
1229 1232
1230union fail_stack_elt 1233union fail_stack_elt
1231{ 1234{
1232 unsigned char *pointer; 1235 const unsigned char *pointer;
1233 unsigned int integer; 1236 unsigned int integer;
1234}; 1237};
1235 1238
@@ -1355,7 +1358,7 @@ typedef struct
1355 1358
1356/* Used to examine the stack (to detect infinite loops). */ 1359/* Used to examine the stack (to detect infinite loops). */
1357#define FAILURE_PAT(h) fail_stack.stack[(h) - 1].pointer 1360#define FAILURE_PAT(h) fail_stack.stack[(h) - 1].pointer
1358#define FAILURE_STR(h) ((char*)fail_stack.stack[(h) - 2].pointer) 1361#define FAILURE_STR(h) (fail_stack.stack[(h) - 2].pointer)
1359#define NEXT_FAILURE_HANDLE(h) fail_stack.stack[(h) - 3].integer 1362#define NEXT_FAILURE_HANDLE(h) fail_stack.stack[(h) - 3].integer
1360#define TOP_FAILURE_HANDLE() fail_stack.frame 1363#define TOP_FAILURE_HANDLE() fail_stack.frame
1361 1364
@@ -1400,10 +1403,10 @@ do { \
1400 || FAILURE_STR (failure) == NULL)) \ 1403 || FAILURE_STR (failure) == NULL)) \
1401 { \ 1404 { \
1402 assert (FAILURE_PAT (failure) >= bufp->buffer \ 1405 assert (FAILURE_PAT (failure) >= bufp->buffer \
1403 && FAILURE_PAT (failure) <= bufp->buffer + bufp->used);\ 1406 && FAILURE_PAT (failure) <= bufp->buffer + bufp->used); \
1404 if (FAILURE_PAT (failure) == pat_cur) \ 1407 if (FAILURE_PAT (failure) == pat_cur) \
1405 goto fail; \ 1408 goto fail; \
1406 DEBUG_PRINT2 (" Other pattern: %p\n", FAILURE_PAT (failure));\ 1409 DEBUG_PRINT2 (" Other pattern: %p\n", FAILURE_PAT (failure)); \
1407 failure = NEXT_FAILURE_HANDLE(failure); \ 1410 failure = NEXT_FAILURE_HANDLE(failure); \
1408 } \ 1411 } \
1409 DEBUG_PRINT2 (" Other string: %p\n", FAILURE_STR (failure)); \ 1412 DEBUG_PRINT2 (" Other string: %p\n", FAILURE_STR (failure)); \
@@ -1490,7 +1493,7 @@ do { \
1490 /* If the saved string location is NULL, it came from an \ 1493 /* If the saved string location is NULL, it came from an \
1491 on_failure_keep_string_jump opcode, and we want to throw away the \ 1494 on_failure_keep_string_jump opcode, and we want to throw away the \
1492 saved NULL, thus retaining our current position in the string. */ \ 1495 saved NULL, thus retaining our current position in the string. */ \
1493 str = (char *) POP_FAILURE_POINTER (); \ 1496 str = (re_char *) POP_FAILURE_POINTER (); \
1494 DEBUG_PRINT2 (" Popping string %p: `", str); \ 1497 DEBUG_PRINT2 (" Popping string %p: `", str); \
1495 DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \ 1498 DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
1496 DEBUG_PRINT1 ("'\n"); \ 1499 DEBUG_PRINT1 ("'\n"); \
@@ -1524,7 +1527,7 @@ static boolean group_in_compile_stack ();
1524#ifndef PATFETCH 1527#ifndef PATFETCH
1525#define PATFETCH(c) \ 1528#define PATFETCH(c) \
1526 do {if (p == pend) return REG_EEND; \ 1529 do {if (p == pend) return REG_EEND; \
1527 c = (unsigned char) *p++; \ 1530 c = *p++; \
1528 if (RE_TRANSLATE_P (translate)) c = RE_TRANSLATE (translate, c); \ 1531 if (RE_TRANSLATE_P (translate)) c = RE_TRANSLATE (translate, c); \
1529 } while (0) 1532 } while (0)
1530#endif 1533#endif
@@ -1533,7 +1536,7 @@ static boolean group_in_compile_stack ();
1533 translation. */ 1536 translation. */
1534#define PATFETCH_RAW(c) \ 1537#define PATFETCH_RAW(c) \
1535 do {if (p == pend) return REG_EEND; \ 1538 do {if (p == pend) return REG_EEND; \
1536 c = (unsigned char) *p++; \ 1539 c = *p++; \
1537 } while (0) 1540 } while (0)
1538 1541
1539/* Go backwards one character in the pattern. */ 1542/* Go backwards one character in the pattern. */
@@ -1546,8 +1549,7 @@ static boolean group_in_compile_stack ();
1546 when we use a character as a subscript we must make it unsigned. */ 1549 when we use a character as a subscript we must make it unsigned. */
1547#ifndef TRANSLATE 1550#ifndef TRANSLATE
1548#define TRANSLATE(d) \ 1551#define TRANSLATE(d) \
1549 (RE_TRANSLATE_P (translate) \ 1552 (RE_TRANSLATE_P (translate) ? RE_TRANSLATE (translate, (d)) : (d))
1550 ? (unsigned) RE_TRANSLATE (translate, (unsigned) (d)) : (d))
1551#endif 1553#endif
1552 1554
1553 1555
@@ -1802,8 +1804,8 @@ static fail_stack_type fail_stack;
1802 but never make them smaller. */ 1804 but never make them smaller. */
1803static int regs_allocated_size; 1805static int regs_allocated_size;
1804 1806
1805static const char ** regstart, ** regend; 1807static re_char ** regstart, ** regend;
1806static const char **best_regstart, **best_regend; 1808static re_char **best_regstart, **best_regend;
1807 1809
1808/* Make the register vectors big enough for NUM_REGS registers, 1810/* Make the register vectors big enough for NUM_REGS registers,
1809 but don't make them smaller. */ 1811 but don't make them smaller. */
@@ -1814,10 +1816,10 @@ regex_grow_registers (num_regs)
1814{ 1816{
1815 if (num_regs > regs_allocated_size) 1817 if (num_regs > regs_allocated_size)
1816 { 1818 {
1817 RETALLOC_IF (regstart, num_regs, const char *); 1819 RETALLOC_IF (regstart, num_regs, re_char *);
1818 RETALLOC_IF (regend, num_regs, const char *); 1820 RETALLOC_IF (regend, num_regs, re_char *);
1819 RETALLOC_IF (best_regstart, num_regs, const char *); 1821 RETALLOC_IF (best_regstart, num_regs, re_char *);
1820 RETALLOC_IF (best_regend, num_regs, const char *); 1822 RETALLOC_IF (best_regend, num_regs, re_char *);
1821 1823
1822 regs_allocated_size = num_regs; 1824 regs_allocated_size = num_regs;
1823 } 1825 }
@@ -1862,7 +1864,7 @@ do { \
1862 1864
1863static reg_errcode_t 1865static reg_errcode_t
1864regex_compile (pattern, size, syntax, bufp) 1866regex_compile (pattern, size, syntax, bufp)
1865 const char *pattern; 1867 re_char *pattern;
1866 int size; 1868 int size;
1867 reg_syntax_t syntax; 1869 reg_syntax_t syntax;
1868 struct re_pattern_buffer *bufp; 1870 struct re_pattern_buffer *bufp;
@@ -1873,7 +1875,7 @@ regex_compile (pattern, size, syntax, bufp)
1873 register unsigned int c, c1; 1875 register unsigned int c, c1;
1874 1876
1875 /* A random temporary spot in PATTERN. */ 1877 /* A random temporary spot in PATTERN. */
1876 const char *p1; 1878 re_char *p1;
1877 1879
1878 /* Points to the end of the buffer, where we should append. */ 1880 /* Points to the end of the buffer, where we should append. */
1879 register unsigned char *b; 1881 register unsigned char *b;
@@ -1884,11 +1886,11 @@ regex_compile (pattern, size, syntax, bufp)
1884 /* Points to the current (ending) position in the pattern. */ 1886 /* Points to the current (ending) position in the pattern. */
1885#ifdef AIX 1887#ifdef AIX
1886 /* `const' makes AIX compiler fail. */ 1888 /* `const' makes AIX compiler fail. */
1887 char *p = pattern; 1889 unsigned char *p = pattern;
1888#else 1890#else
1889 const char *p = pattern; 1891 re_char *p = pattern;
1890#endif 1892#endif
1891 const char *pend = pattern + size; 1893 re_char *pend = pattern + size;
1892 1894
1893 /* How to translate the characters in the pattern. */ 1895 /* How to translate the characters in the pattern. */
1894 RE_TRANSLATE_TYPE translate = bufp->translate; 1896 RE_TRANSLATE_TYPE translate = bufp->translate;
@@ -1909,7 +1911,7 @@ regex_compile (pattern, size, syntax, bufp)
1909 1911
1910 /* Place in the uncompiled pattern (i.e., the {) to 1912 /* Place in the uncompiled pattern (i.e., the {) to
1911 which to go back if the interval is invalid. */ 1913 which to go back if the interval is invalid. */
1912 const char *beg_interval; 1914 re_char *beg_interval;
1913 1915
1914 /* Address of the place where a forward jump should go to the end of 1916 /* Address of the place where a forward jump should go to the end of
1915 the containing expression. Each alternative of an `or' -- except the 1917 the containing expression. Each alternative of an `or' -- except the
@@ -2051,8 +2053,8 @@ regex_compile (pattern, size, syntax, bufp)
2051 boolean keep_string_p = false; 2053 boolean keep_string_p = false;
2052 2054
2053 /* 1 means zero (many) matches is allowed. */ 2055 /* 1 means zero (many) matches is allowed. */
2054 char zero_times_ok = 0, many_times_ok = 0; 2056 boolean zero_times_ok = 0, many_times_ok = 0;
2055 char greedy = 1; 2057 boolean greedy = 1;
2056 2058
2057 /* If there is a sequence of repetition chars, collapse it 2059 /* If there is a sequence of repetition chars, collapse it
2058 down to just one (the right one). We can't combine 2060 down to just one (the right one). We can't combine
@@ -2132,10 +2134,10 @@ regex_compile (pattern, size, syntax, bufp)
2132 incremented `p', by the way, to be the character after 2134 incremented `p', by the way, to be the character after
2133 the `*'. Do we have to do something analogous here 2135 the `*'. Do we have to do something analogous here
2134 for null bytes, because of RE_DOT_NOT_NULL? */ 2136 for null bytes, because of RE_DOT_NOT_NULL? */
2135 if (TRANSLATE ((unsigned char)*(p - 2)) == TRANSLATE ('.') 2137 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
2136 && zero_times_ok 2138 && zero_times_ok
2137 && p < pend 2139 && p < pend
2138 && TRANSLATE ((unsigned char)*p) == TRANSLATE ('\n') 2140 && TRANSLATE (*p) == TRANSLATE ('\n')
2139 && !(syntax & RE_DOT_NEWLINE)) 2141 && !(syntax & RE_DOT_NEWLINE))
2140 { /* We have .*\n. */ 2142 { /* We have .*\n. */
2141 STORE_JUMP (jump, b, laststart); 2143 STORE_JUMP (jump, b, laststart);
@@ -3189,10 +3191,10 @@ insert_op2 (op, loc, arg1, arg2, end)
3189 3191
3190static boolean 3192static boolean
3191at_begline_loc_p (pattern, p, syntax) 3193at_begline_loc_p (pattern, p, syntax)
3192 const char *pattern, *p; 3194 re_char *pattern, *p;
3193 reg_syntax_t syntax; 3195 reg_syntax_t syntax;
3194{ 3196{
3195 const char *prev = p - 2; 3197 re_char *prev = p - 2;
3196 boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\'; 3198 boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
3197 3199
3198 return 3200 return
@@ -3208,12 +3210,12 @@ at_begline_loc_p (pattern, p, syntax)
3208 3210
3209static boolean 3211static boolean
3210at_endline_loc_p (p, pend, syntax) 3212at_endline_loc_p (p, pend, syntax)
3211 const char *p, *pend; 3213 re_char *p, *pend;
3212 int syntax; 3214 int syntax;
3213{ 3215{
3214 const char *next = p; 3216 re_char *next = p;
3215 boolean next_backslash = *next == '\\'; 3217 boolean next_backslash = *next == '\\';
3216 const char *next_next = p + 1 < pend ? p + 1 : 0; 3218 re_char *next_next = p + 1 < pend ? p + 1 : 0;
3217 3219
3218 return 3220 return
3219 /* Before a subexpression? */ 3221 /* Before a subexpression? */
@@ -3344,7 +3346,7 @@ re_compile_fastmap (bufp)
3344 /* Reset for next path. */ 3346 /* Reset for next path. */
3345 path_can_be_null = true; 3347 path_can_be_null = true;
3346 3348
3347 p = POP_PATTERN_OP (); 3349 p = (unsigned char*) POP_PATTERN_OP ();
3348 3350
3349 continue; 3351 continue;
3350 } 3352 }
@@ -3814,9 +3816,9 @@ re_search (bufp, string, size, startpos, range, regs)
3814 stack overflow). */ 3816 stack overflow). */
3815 3817
3816int 3818int
3817re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) 3819re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
3818 struct re_pattern_buffer *bufp; 3820 struct re_pattern_buffer *bufp;
3819 const char *string1, *string2; 3821 const char *str1, *str2;
3820 int size1, size2; 3822 int size1, size2;
3821 int startpos; 3823 int startpos;
3822 int range; 3824 int range;
@@ -3824,6 +3826,8 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
3824 int stop; 3826 int stop;
3825{ 3827{
3826 int val; 3828 int val;
3829 re_char *string1 = (re_char*) str1;
3830 re_char *string2 = (re_char*) str2;
3827 register char *fastmap = bufp->fastmap; 3831 register char *fastmap = bufp->fastmap;
3828 register RE_TRANSLATE_TYPE translate = bufp->translate; 3832 register RE_TRANSLATE_TYPE translate = bufp->translate;
3829 int total_size = size1 + size2; 3833 int total_size = size1 + size2;
@@ -3907,7 +3911,7 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
3907 the first null string. */ 3911 the first null string. */
3908 if (fastmap && startpos < total_size && !bufp->can_be_null) 3912 if (fastmap && startpos < total_size && !bufp->can_be_null)
3909 { 3913 {
3910 register const char *d; 3914 register re_char *d;
3911 register unsigned int buf_ch; 3915 register unsigned int buf_ch;
3912 3916
3913 d = POS_ADDR_VSTRING (startpos); 3917 d = POS_ADDR_VSTRING (startpos);
@@ -3942,15 +3946,14 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
3942 } 3946 }
3943 else 3947 else
3944 while (range > lim 3948 while (range > lim
3945 && !fastmap[(unsigned char) 3949 && !fastmap[RE_TRANSLATE (translate, *d)])
3946 RE_TRANSLATE (translate, (unsigned char) *d)])
3947 { 3950 {
3948 d++; 3951 d++;
3949 range--; 3952 range--;
3950 } 3953 }
3951 } 3954 }
3952 else 3955 else
3953 while (range > lim && !fastmap[(unsigned char) *d]) 3956 while (range > lim && !fastmap[*d])
3954 { 3957 {
3955 d++; 3958 d++;
3956 range--; 3959 range--;
@@ -4001,10 +4004,8 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
4001 /* Update STARTPOS to the next character boundary. */ 4004 /* Update STARTPOS to the next character boundary. */
4002 if (multibyte) 4005 if (multibyte)
4003 { 4006 {
4004 const unsigned char *p 4007 re_char *p = POS_ADDR_VSTRING (startpos);
4005 = (const unsigned char *) POS_ADDR_VSTRING (startpos); 4008 re_char *pend = STOP_ADDR_VSTRING (startpos);
4006 const unsigned char *pend
4007 = (const unsigned char *) STOP_ADDR_VSTRING (startpos);
4008 int len = MULTIBYTE_FORM_LENGTH (p, pend - p); 4009 int len = MULTIBYTE_FORM_LENGTH (p, pend - p);
4009 4010
4010 range -= len; 4011 range -= len;
@@ -4026,8 +4027,7 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
4026 /* Update STARTPOS to the previous character boundary. */ 4027 /* Update STARTPOS to the previous character boundary. */
4027 if (multibyte) 4028 if (multibyte)
4028 { 4029 {
4029 const unsigned char *p 4030 re_char *p = POS_ADDR_VSTRING (startpos);
4030 = (const unsigned char *) POS_ADDR_VSTRING (startpos);
4031 int len = 0; 4031 int len = 0;
4032 4032
4033 /* Find the head of multibyte form. */ 4033 /* Find the head of multibyte form. */
@@ -4394,7 +4394,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
4394static int 4394static int
4395re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) 4395re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4396 struct re_pattern_buffer *bufp; 4396 struct re_pattern_buffer *bufp;
4397 const char *string1, *string2; 4397 re_char *string1, *string2;
4398 int size1, size2; 4398 int size1, size2;
4399 int pos; 4399 int pos;
4400 struct re_registers *regs; 4400 struct re_registers *regs;
@@ -4402,17 +4402,18 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4402{ 4402{
4403 /* General temporaries. */ 4403 /* General temporaries. */
4404 int mcnt; 4404 int mcnt;
4405 boolean not;
4405 unsigned char *p1; 4406 unsigned char *p1;
4406 4407
4407 /* Just past the end of the corresponding string. */ 4408 /* Just past the end of the corresponding string. */
4408 const char *end1, *end2; 4409 re_char *end1, *end2;
4409 4410
4410 /* Pointers into string1 and string2, just past the last characters in 4411 /* Pointers into string1 and string2, just past the last characters in
4411 each to consider matching. */ 4412 each to consider matching. */
4412 const char *end_match_1, *end_match_2; 4413 re_char *end_match_1, *end_match_2;
4413 4414
4414 /* Where we are in the data, and the end of the current string. */ 4415 /* Where we are in the data, and the end of the current string. */
4415 const char *d, *dend; 4416 re_char *d, *dend;
4416 4417
4417 /* Where we are in the pattern, and the end of the pattern. */ 4418 /* Where we are in the pattern, and the end of the pattern. */
4418 unsigned char *p = bufp->buffer; 4419 unsigned char *p = bufp->buffer;
@@ -4456,7 +4457,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4456 stopped matching the regnum-th subexpression. (The zeroth register 4457 stopped matching the regnum-th subexpression. (The zeroth register
4457 keeps track of what the whole pattern matches.) */ 4458 keeps track of what the whole pattern matches.) */
4458#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ 4459#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
4459 const char **regstart, **regend; 4460 re_char **regstart, **regend;
4460#endif 4461#endif
4461 4462
4462 /* The following record the register info as found in the above 4463 /* The following record the register info as found in the above
@@ -4465,7 +4466,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4465 turn happens only if we have not yet matched the entire string. */ 4466 turn happens only if we have not yet matched the entire string. */
4466 unsigned best_regs_set = false; 4467 unsigned best_regs_set = false;
4467#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ 4468#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
4468 const char **best_regstart, **best_regend; 4469 re_char **best_regstart, **best_regend;
4469#endif 4470#endif
4470 4471
4471 /* Logically, this is `best_regend[0]'. But we don't want to have to 4472 /* Logically, this is `best_regend[0]'. But we don't want to have to
@@ -4476,7 +4477,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4476 the end of the best match so far in a separate variable. We 4477 the end of the best match so far in a separate variable. We
4477 initialize this to NULL so that when we backtrack the first time 4478 initialize this to NULL so that when we backtrack the first time
4478 and need to test it, it's not garbage. */ 4479 and need to test it, it's not garbage. */
4479 const char *match_end = NULL; 4480 re_char *match_end = NULL;
4480 4481
4481#ifdef DEBUG 4482#ifdef DEBUG
4482 /* Counts the total number of registers pushed. */ 4483 /* Counts the total number of registers pushed. */
@@ -4495,10 +4496,10 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4495 array indexing. We should fix this. */ 4496 array indexing. We should fix this. */
4496 if (bufp->re_nsub) 4497 if (bufp->re_nsub)
4497 { 4498 {
4498 regstart = REGEX_TALLOC (num_regs, const char *); 4499 regstart = REGEX_TALLOC (num_regs, re_char *);
4499 regend = REGEX_TALLOC (num_regs, const char *); 4500 regend = REGEX_TALLOC (num_regs, re_char *);
4500 best_regstart = REGEX_TALLOC (num_regs, const char *); 4501 best_regstart = REGEX_TALLOC (num_regs, re_char *);
4501 best_regend = REGEX_TALLOC (num_regs, const char *); 4502 best_regend = REGEX_TALLOC (num_regs, re_char *);
4502 4503
4503 if (!(regstart && regend && best_regstart && best_regend)) 4504 if (!(regstart && regend && best_regstart && best_regend))
4504 { 4505 {
@@ -4795,8 +4796,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4795 do 4796 do
4796 { 4797 {
4797 PREFETCH (); 4798 PREFETCH ();
4798 if ((unsigned char) RE_TRANSLATE (translate, (unsigned char) *d) 4799 if (RE_TRANSLATE (translate, *d) != *p++)
4799 != (unsigned char) *p++)
4800 goto fail; 4800 goto fail;
4801 d++; 4801 d++;
4802 } 4802 }
@@ -4807,7 +4807,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4807 do 4807 do
4808 { 4808 {
4809 PREFETCH (); 4809 PREFETCH ();
4810 if (*d++ != (char) *p++) goto fail; 4810 if (*d++ != *p++) goto fail;
4811 } 4811 }
4812 while (--mcnt); 4812 while (--mcnt);
4813 } 4813 }
@@ -4830,7 +4830,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4830 else 4830 else
4831#endif /* not emacs */ 4831#endif /* not emacs */
4832 { 4832 {
4833 buf_ch = (unsigned char) *d; 4833 buf_ch = *d;
4834 buf_charlen = 1; 4834 buf_charlen = 1;
4835 } 4835 }
4836 4836
@@ -4869,7 +4869,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4869 DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : ""); 4869 DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
4870 4870
4871 PREFETCH (); 4871 PREFETCH ();
4872 c = (unsigned char) *d; 4872 c = *d;
4873 4873
4874 range_table_exists = CHARSET_RANGE_TABLE_EXISTS_P (&p[-1]); 4874 range_table_exists = CHARSET_RANGE_TABLE_EXISTS_P (&p[-1]);
4875 4875
@@ -4982,7 +4982,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4982 followed by the numeric value of <digit> as the register number. */ 4982 followed by the numeric value of <digit> as the register number. */
4983 case duplicate: 4983 case duplicate:
4984 { 4984 {
4985 register const char *d2, *dend2; 4985 register re_char *d2, *dend2;
4986 int regno = *p++; /* Get which register to match against. */ 4986 int regno = *p++; /* Get which register to match against. */
4987 DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno); 4987 DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
4988 4988
@@ -5282,31 +5282,33 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5282 } 5282 }
5283 5283
5284 case wordbound: 5284 case wordbound:
5285 DEBUG_PRINT1 ("EXECUTING wordbound.\n"); 5285 case notwordbound:
5286 not = (re_opcode_t) *(p - 1) == notwordbound;
5287 DEBUG_PRINT2 ("EXECUTING %swordbound.\n", not?"not":"");
5286 5288
5287 /* We SUCCEED in one of the following cases: */ 5289 /* We SUCCEED in one of the following cases: */
5288 5290
5289 /* Case 1: D is at the beginning or the end of string. */ 5291 /* Case 1: D is at the beginning or the end of string. */
5290 if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)) 5292 if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
5291 break; 5293 not = !not;
5292 else 5294 else
5293 { 5295 {
5294 /* C1 is the character before D, S1 is the syntax of C1, C2 5296 /* C1 is the character before D, S1 is the syntax of C1, C2
5295 is the character at D, and S2 is the syntax of C2. */ 5297 is the character at D, and S2 is the syntax of C2. */
5296 int c1, c2, s1, s2; 5298 int c1, c2, s1, s2;
5297 int pos1 = PTR_TO_OFFSET (d - 1);
5298 int charpos;
5299
5300 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
5301 GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
5302#ifdef emacs 5299#ifdef emacs
5303 charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1); 5300 int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d - 1));
5304 UPDATE_SYNTAX_TABLE (charpos); 5301 UPDATE_SYNTAX_TABLE (charpos);
5305#endif 5302#endif
5303 /* FIXME: This does a STRING_CHAR even for unibyte buffers. */
5304 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
5306 s1 = SYNTAX (c1); 5305 s1 = SYNTAX (c1);
5307#ifdef emacs 5306#ifdef emacs
5308 UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1); 5307 UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
5309#endif 5308#endif
5309 PREFETCH ();
5310 /* FIXME: This does a STRING_CHAR even for unibyte buffers. */
5311 c2 = STRING_CHAR (d, dend - d);
5310 s2 = SYNTAX (c2); 5312 s2 = SYNTAX (c2);
5311 5313
5312 if (/* Case 2: Only one of S1 and S2 is Sword. */ 5314 if (/* Case 2: Only one of S1 and S2 is Sword. */
@@ -5314,46 +5316,12 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5314 /* Case 3: Both of S1 and S2 are Sword, and macro 5316 /* Case 3: Both of S1 and S2 are Sword, and macro
5315 WORD_BOUNDARY_P (C1, C2) returns nonzero. */ 5317 WORD_BOUNDARY_P (C1, C2) returns nonzero. */
5316 || ((s1 == Sword) && WORD_BOUNDARY_P (c1, c2))) 5318 || ((s1 == Sword) && WORD_BOUNDARY_P (c1, c2)))
5319 not = !not;
5320 }
5321 if (not)
5317 break; 5322 break;
5318 }
5319 goto fail;
5320
5321 case notwordbound:
5322 DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
5323
5324 /* We FAIL in one of the following cases: */
5325
5326 /* Case 1: D is at the beginning or the end of string. */
5327 if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
5328 goto fail;
5329 else 5323 else
5330 {
5331 /* C1 is the character before D, S1 is the syntax of C1, C2
5332 is the character at D, and S2 is the syntax of C2. */
5333 int c1, c2, s1, s2;
5334 int pos1 = PTR_TO_OFFSET (d - 1);
5335 int charpos;
5336
5337 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
5338 GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
5339#ifdef emacs
5340 charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1);
5341 UPDATE_SYNTAX_TABLE (charpos);
5342#endif
5343 s1 = SYNTAX (c1);
5344#ifdef emacs
5345 UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
5346#endif
5347 s2 = SYNTAX (c2);
5348
5349 if (/* Case 2: Only one of S1 and S2 is Sword. */
5350 ((s1 == Sword) != (s2 == Sword))
5351 /* Case 3: Both of S1 and S2 are Sword, and macro
5352 WORD_BOUNDARY_P (C1, C2) returns nonzero. */
5353 || ((s1 == Sword) && WORD_BOUNDARY_P (c1, c2)))
5354 goto fail; 5324 goto fail;
5355 }
5356 break;
5357 5325
5358 case wordbeg: 5326 case wordbeg:
5359 DEBUG_PRINT1 ("EXECUTING wordbeg.\n"); 5327 DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
@@ -5371,7 +5339,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5371 int pos1 = PTR_TO_OFFSET (d); 5339 int pos1 = PTR_TO_OFFSET (d);
5372 int charpos; 5340 int charpos;
5373 5341
5374 GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2); 5342 PREFETCH ();
5343 c2 = STRING_CHAR (d, dend - d);
5375#ifdef emacs 5344#ifdef emacs
5376 charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1); 5345 charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1);
5377 UPDATE_SYNTAX_TABLE (charpos); 5346 UPDATE_SYNTAX_TABLE (charpos);
@@ -5429,7 +5398,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5429 /* Case 3: D is not at the end of string ... */ 5398 /* Case 3: D is not at the end of string ... */
5430 if (!AT_STRINGS_END (d)) 5399 if (!AT_STRINGS_END (d))
5431 { 5400 {
5432 GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2); 5401 PREFETCH ();
5402 c2 = STRING_CHAR (d, dend - d);
5433#ifdef emacs 5403#ifdef emacs
5434 UPDATE_SYNTAX_TABLE_FORWARD (charpos); 5404 UPDATE_SYNTAX_TABLE_FORWARD (charpos);
5435#endif 5405#endif
@@ -5446,19 +5416,19 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5446#ifdef emacs 5416#ifdef emacs
5447 case before_dot: 5417 case before_dot:
5448 DEBUG_PRINT1 ("EXECUTING before_dot.\n"); 5418 DEBUG_PRINT1 ("EXECUTING before_dot.\n");
5449 if (PTR_BYTE_POS ((unsigned char *) d) >= PT_BYTE) 5419 if (PTR_BYTE_POS (d) >= PT_BYTE)
5450 goto fail; 5420 goto fail;
5451 break; 5421 break;
5452 5422
5453 case at_dot: 5423 case at_dot:
5454 DEBUG_PRINT1 ("EXECUTING at_dot.\n"); 5424 DEBUG_PRINT1 ("EXECUTING at_dot.\n");
5455 if (PTR_BYTE_POS ((unsigned char *) d) != PT_BYTE) 5425 if (PTR_BYTE_POS (d) != PT_BYTE)
5456 goto fail; 5426 goto fail;
5457 break; 5427 break;
5458 5428
5459 case after_dot: 5429 case after_dot:
5460 DEBUG_PRINT1 ("EXECUTING after_dot.\n"); 5430 DEBUG_PRINT1 ("EXECUTING after_dot.\n");
5461 if (PTR_BYTE_POS ((unsigned char *) d) <= PT_BYTE) 5431 if (PTR_BYTE_POS (d) <= PT_BYTE)
5462 goto fail; 5432 goto fail;
5463 break; 5433 break;
5464 5434
@@ -5592,7 +5562,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5592#endif 5562#endif
5593 if (!FAIL_STACK_EMPTY ()) 5563 if (!FAIL_STACK_EMPTY ())
5594 { 5564 {
5595 char *str; 5565 re_char *str;
5596 unsigned char *pat; 5566 unsigned char *pat;
5597 /* A restart point is known. Restore to that state. */ 5567 /* A restart point is known. Restore to that state. */
5598 DEBUG_PRINT1 ("\nFAIL:\n"); 5568 DEBUG_PRINT1 ("\nFAIL:\n");