diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/ChangeLog | 13 | ||||
| -rw-r--r-- | src/charset.h | 17 | ||||
| -rw-r--r-- | src/regex.c | 198 |
3 files changed, 100 insertions, 128 deletions
diff --git a/src/ChangeLog b/src/ChangeLog index 6d0f268fe65..552d4cf2f7c 100644 --- a/src/ChangeLog +++ b/src/ChangeLog | |||
| @@ -1,3 +1,16 @@ | |||
| 1 | 2000-03-13 Stefan Monnier <monnier@cs.yale.edu> | ||
| 2 | |||
| 3 | * regex.c: Declare a new type `re_char' used throughout the code for the | ||
| 4 | string char type. It's `const unsigned char' to match the rest of Emacs. | ||
| 5 | Consistently make sure all pointers to strings use it and make sure all | ||
| 6 | pointers into the pattern use `unsigned char'. | ||
| 7 | (re_match_2_internal): Use `PREFETCH+STRING_CHAR' instead of | ||
| 8 | GET_CHAR_AFTER_2. | ||
| 9 | Also merge wordbound and notwordbound to reduce code duplication. | ||
| 10 | |||
| 11 | * charset.h (GET_CHAR_AFTER_2): Remove. | ||
| 12 | (GET_CHAR_BEFORE_2): Use unsigned chars, like everywhere else. | ||
| 13 | |||
| 1 | 2000-03-12 Ken Raeburn <raeburn@gnu.org> | 14 | 2000-03-12 Ken Raeburn <raeburn@gnu.org> |
| 2 | 15 | ||
| 3 | * Makefile.in (temacs): Evaluate ALL_LDFLAGS into a temporary | 16 | * Makefile.in (temacs): Evaluate ALL_LDFLAGS into a temporary |
diff --git a/src/charset.h b/src/charset.h index 6b2e25b89dd..3acc447c5f8 100644 --- a/src/charset.h +++ b/src/charset.h | |||
| @@ -577,26 +577,15 @@ else | |||
| 577 | ? 1 \ | 577 | ? 1 \ |
| 578 | : multibyte_form_length (str, len)) | 578 | : multibyte_form_length (str, len)) |
| 579 | 579 | ||
| 580 | /* Set C a (possibly multibyte) character at P. P points into a | ||
| 581 | string which is the virtual concatenation of STR1 (which ends at | ||
| 582 | END1) or STR2 (which ends at END2). */ | ||
| 583 | |||
| 584 | #define GET_CHAR_AFTER_2(c, p, str1, end1, str2, end2) \ | ||
| 585 | do { \ | ||
| 586 | const char *dtemp = (p) == (end1) ? (str2) : (p); \ | ||
| 587 | const char *dlimit = ((p) >= (str1) && (p) < (end1)) ? (end1) : (end2); \ | ||
| 588 | c = STRING_CHAR (dtemp, dlimit - dtemp); \ | ||
| 589 | } while (0) | ||
| 590 | |||
| 591 | /* Set C a (possibly multibyte) character before P. P points into a | 580 | /* Set C a (possibly multibyte) character before P. P points into a |
| 592 | string which is the virtual concatenation of STR1 (which ends at | 581 | string which is the virtual concatenation of STR1 (which ends at |
| 593 | END1) or STR2 (which ends at END2). */ | 582 | END1) or STR2 (which ends at END2). */ |
| 594 | 583 | ||
| 595 | #define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \ | 584 | #define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \ |
| 596 | do { \ | 585 | do { \ |
| 597 | const char *dtemp = (p); \ | 586 | const unsigned char *dtemp = (p); \ |
| 598 | const char *dlimit = ((p) > (str2) && (p) <= (end2)) ? (str2) : (str1); \ | 587 | const unsigned char *dlimit = ((p) > (str2) && (p) <= (end2)) ? (str2) : (str1); \ |
| 599 | while (dtemp-- > dlimit && (unsigned char) *dtemp >= 0xA0); \ | 588 | while (dtemp-- > dlimit && *dtemp >= 0xA0); \ |
| 600 | c = STRING_CHAR (dtemp, p - dtemp); \ | 589 | c = STRING_CHAR (dtemp, p - dtemp); \ |
| 601 | } while (0) | 590 | } while (0) |
| 602 | 591 | ||
diff --git a/src/regex.c b/src/regex.c index 809a7d24219..c118af0fdb6 100644 --- a/src/regex.c +++ b/src/regex.c | |||
| @@ -434,6 +434,9 @@ char *alloca (); | |||
| 434 | #define MAX(a, b) ((a) > (b) ? (a) : (b)) | 434 | #define MAX(a, b) ((a) > (b) ? (a) : (b)) |
| 435 | #define MIN(a, b) ((a) < (b) ? (a) : (b)) | 435 | #define MIN(a, b) ((a) < (b) ? (a) : (b)) |
| 436 | 436 | ||
| 437 | /* Type of source-pattern and string chars. */ | ||
| 438 | typedef const unsigned char re_char; | ||
| 439 | |||
| 437 | typedef char boolean; | 440 | typedef char boolean; |
| 438 | #define false 0 | 441 | #define false 0 |
| 439 | #define true 1 | 442 | #define true 1 |
| @@ -1072,9 +1075,9 @@ print_compiled_pattern (bufp) | |||
| 1072 | 1075 | ||
| 1073 | void | 1076 | void |
| 1074 | print_double_string (where, string1, size1, string2, size2) | 1077 | print_double_string (where, string1, size1, string2, size2) |
| 1075 | const char *where; | 1078 | re_char *where; |
| 1076 | const char *string1; | 1079 | re_char *string1; |
| 1077 | const char *string2; | 1080 | re_char *string2; |
| 1078 | int size1; | 1081 | int size1; |
| 1079 | int size2; | 1082 | int size2; |
| 1080 | { | 1083 | { |
| @@ -1229,7 +1232,7 @@ int re_max_failures = 4000; | |||
| 1229 | 1232 | ||
| 1230 | union fail_stack_elt | 1233 | union fail_stack_elt |
| 1231 | { | 1234 | { |
| 1232 | unsigned char *pointer; | 1235 | const unsigned char *pointer; |
| 1233 | unsigned int integer; | 1236 | unsigned int integer; |
| 1234 | }; | 1237 | }; |
| 1235 | 1238 | ||
| @@ -1355,7 +1358,7 @@ typedef struct | |||
| 1355 | 1358 | ||
| 1356 | /* Used to examine the stack (to detect infinite loops). */ | 1359 | /* Used to examine the stack (to detect infinite loops). */ |
| 1357 | #define FAILURE_PAT(h) fail_stack.stack[(h) - 1].pointer | 1360 | #define FAILURE_PAT(h) fail_stack.stack[(h) - 1].pointer |
| 1358 | #define FAILURE_STR(h) ((char*)fail_stack.stack[(h) - 2].pointer) | 1361 | #define FAILURE_STR(h) (fail_stack.stack[(h) - 2].pointer) |
| 1359 | #define NEXT_FAILURE_HANDLE(h) fail_stack.stack[(h) - 3].integer | 1362 | #define NEXT_FAILURE_HANDLE(h) fail_stack.stack[(h) - 3].integer |
| 1360 | #define TOP_FAILURE_HANDLE() fail_stack.frame | 1363 | #define TOP_FAILURE_HANDLE() fail_stack.frame |
| 1361 | 1364 | ||
| @@ -1400,10 +1403,10 @@ do { \ | |||
| 1400 | || FAILURE_STR (failure) == NULL)) \ | 1403 | || FAILURE_STR (failure) == NULL)) \ |
| 1401 | { \ | 1404 | { \ |
| 1402 | assert (FAILURE_PAT (failure) >= bufp->buffer \ | 1405 | assert (FAILURE_PAT (failure) >= bufp->buffer \ |
| 1403 | && FAILURE_PAT (failure) <= bufp->buffer + bufp->used);\ | 1406 | && FAILURE_PAT (failure) <= bufp->buffer + bufp->used); \ |
| 1404 | if (FAILURE_PAT (failure) == pat_cur) \ | 1407 | if (FAILURE_PAT (failure) == pat_cur) \ |
| 1405 | goto fail; \ | 1408 | goto fail; \ |
| 1406 | DEBUG_PRINT2 (" Other pattern: %p\n", FAILURE_PAT (failure));\ | 1409 | DEBUG_PRINT2 (" Other pattern: %p\n", FAILURE_PAT (failure)); \ |
| 1407 | failure = NEXT_FAILURE_HANDLE(failure); \ | 1410 | failure = NEXT_FAILURE_HANDLE(failure); \ |
| 1408 | } \ | 1411 | } \ |
| 1409 | DEBUG_PRINT2 (" Other string: %p\n", FAILURE_STR (failure)); \ | 1412 | DEBUG_PRINT2 (" Other string: %p\n", FAILURE_STR (failure)); \ |
| @@ -1490,7 +1493,7 @@ do { \ | |||
| 1490 | /* If the saved string location is NULL, it came from an \ | 1493 | /* If the saved string location is NULL, it came from an \ |
| 1491 | on_failure_keep_string_jump opcode, and we want to throw away the \ | 1494 | on_failure_keep_string_jump opcode, and we want to throw away the \ |
| 1492 | saved NULL, thus retaining our current position in the string. */ \ | 1495 | saved NULL, thus retaining our current position in the string. */ \ |
| 1493 | str = (char *) POP_FAILURE_POINTER (); \ | 1496 | str = (re_char *) POP_FAILURE_POINTER (); \ |
| 1494 | DEBUG_PRINT2 (" Popping string %p: `", str); \ | 1497 | DEBUG_PRINT2 (" Popping string %p: `", str); \ |
| 1495 | DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \ | 1498 | DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \ |
| 1496 | DEBUG_PRINT1 ("'\n"); \ | 1499 | DEBUG_PRINT1 ("'\n"); \ |
| @@ -1524,7 +1527,7 @@ static boolean group_in_compile_stack (); | |||
| 1524 | #ifndef PATFETCH | 1527 | #ifndef PATFETCH |
| 1525 | #define PATFETCH(c) \ | 1528 | #define PATFETCH(c) \ |
| 1526 | do {if (p == pend) return REG_EEND; \ | 1529 | do {if (p == pend) return REG_EEND; \ |
| 1527 | c = (unsigned char) *p++; \ | 1530 | c = *p++; \ |
| 1528 | if (RE_TRANSLATE_P (translate)) c = RE_TRANSLATE (translate, c); \ | 1531 | if (RE_TRANSLATE_P (translate)) c = RE_TRANSLATE (translate, c); \ |
| 1529 | } while (0) | 1532 | } while (0) |
| 1530 | #endif | 1533 | #endif |
| @@ -1533,7 +1536,7 @@ static boolean group_in_compile_stack (); | |||
| 1533 | translation. */ | 1536 | translation. */ |
| 1534 | #define PATFETCH_RAW(c) \ | 1537 | #define PATFETCH_RAW(c) \ |
| 1535 | do {if (p == pend) return REG_EEND; \ | 1538 | do {if (p == pend) return REG_EEND; \ |
| 1536 | c = (unsigned char) *p++; \ | 1539 | c = *p++; \ |
| 1537 | } while (0) | 1540 | } while (0) |
| 1538 | 1541 | ||
| 1539 | /* Go backwards one character in the pattern. */ | 1542 | /* Go backwards one character in the pattern. */ |
| @@ -1546,8 +1549,7 @@ static boolean group_in_compile_stack (); | |||
| 1546 | when we use a character as a subscript we must make it unsigned. */ | 1549 | when we use a character as a subscript we must make it unsigned. */ |
| 1547 | #ifndef TRANSLATE | 1550 | #ifndef TRANSLATE |
| 1548 | #define TRANSLATE(d) \ | 1551 | #define TRANSLATE(d) \ |
| 1549 | (RE_TRANSLATE_P (translate) \ | 1552 | (RE_TRANSLATE_P (translate) ? RE_TRANSLATE (translate, (d)) : (d)) |
| 1550 | ? (unsigned) RE_TRANSLATE (translate, (unsigned) (d)) : (d)) | ||
| 1551 | #endif | 1553 | #endif |
| 1552 | 1554 | ||
| 1553 | 1555 | ||
| @@ -1802,8 +1804,8 @@ static fail_stack_type fail_stack; | |||
| 1802 | but never make them smaller. */ | 1804 | but never make them smaller. */ |
| 1803 | static int regs_allocated_size; | 1805 | static int regs_allocated_size; |
| 1804 | 1806 | ||
| 1805 | static const char ** regstart, ** regend; | 1807 | static re_char ** regstart, ** regend; |
| 1806 | static const char **best_regstart, **best_regend; | 1808 | static re_char **best_regstart, **best_regend; |
| 1807 | 1809 | ||
| 1808 | /* Make the register vectors big enough for NUM_REGS registers, | 1810 | /* Make the register vectors big enough for NUM_REGS registers, |
| 1809 | but don't make them smaller. */ | 1811 | but don't make them smaller. */ |
| @@ -1814,10 +1816,10 @@ regex_grow_registers (num_regs) | |||
| 1814 | { | 1816 | { |
| 1815 | if (num_regs > regs_allocated_size) | 1817 | if (num_regs > regs_allocated_size) |
| 1816 | { | 1818 | { |
| 1817 | RETALLOC_IF (regstart, num_regs, const char *); | 1819 | RETALLOC_IF (regstart, num_regs, re_char *); |
| 1818 | RETALLOC_IF (regend, num_regs, const char *); | 1820 | RETALLOC_IF (regend, num_regs, re_char *); |
| 1819 | RETALLOC_IF (best_regstart, num_regs, const char *); | 1821 | RETALLOC_IF (best_regstart, num_regs, re_char *); |
| 1820 | RETALLOC_IF (best_regend, num_regs, const char *); | 1822 | RETALLOC_IF (best_regend, num_regs, re_char *); |
| 1821 | 1823 | ||
| 1822 | regs_allocated_size = num_regs; | 1824 | regs_allocated_size = num_regs; |
| 1823 | } | 1825 | } |
| @@ -1862,7 +1864,7 @@ do { \ | |||
| 1862 | 1864 | ||
| 1863 | static reg_errcode_t | 1865 | static reg_errcode_t |
| 1864 | regex_compile (pattern, size, syntax, bufp) | 1866 | regex_compile (pattern, size, syntax, bufp) |
| 1865 | const char *pattern; | 1867 | re_char *pattern; |
| 1866 | int size; | 1868 | int size; |
| 1867 | reg_syntax_t syntax; | 1869 | reg_syntax_t syntax; |
| 1868 | struct re_pattern_buffer *bufp; | 1870 | struct re_pattern_buffer *bufp; |
| @@ -1873,7 +1875,7 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 1873 | register unsigned int c, c1; | 1875 | register unsigned int c, c1; |
| 1874 | 1876 | ||
| 1875 | /* A random temporary spot in PATTERN. */ | 1877 | /* A random temporary spot in PATTERN. */ |
| 1876 | const char *p1; | 1878 | re_char *p1; |
| 1877 | 1879 | ||
| 1878 | /* Points to the end of the buffer, where we should append. */ | 1880 | /* Points to the end of the buffer, where we should append. */ |
| 1879 | register unsigned char *b; | 1881 | register unsigned char *b; |
| @@ -1884,11 +1886,11 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 1884 | /* Points to the current (ending) position in the pattern. */ | 1886 | /* Points to the current (ending) position in the pattern. */ |
| 1885 | #ifdef AIX | 1887 | #ifdef AIX |
| 1886 | /* `const' makes AIX compiler fail. */ | 1888 | /* `const' makes AIX compiler fail. */ |
| 1887 | char *p = pattern; | 1889 | unsigned char *p = pattern; |
| 1888 | #else | 1890 | #else |
| 1889 | const char *p = pattern; | 1891 | re_char *p = pattern; |
| 1890 | #endif | 1892 | #endif |
| 1891 | const char *pend = pattern + size; | 1893 | re_char *pend = pattern + size; |
| 1892 | 1894 | ||
| 1893 | /* How to translate the characters in the pattern. */ | 1895 | /* How to translate the characters in the pattern. */ |
| 1894 | RE_TRANSLATE_TYPE translate = bufp->translate; | 1896 | RE_TRANSLATE_TYPE translate = bufp->translate; |
| @@ -1909,7 +1911,7 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 1909 | 1911 | ||
| 1910 | /* Place in the uncompiled pattern (i.e., the {) to | 1912 | /* Place in the uncompiled pattern (i.e., the {) to |
| 1911 | which to go back if the interval is invalid. */ | 1913 | which to go back if the interval is invalid. */ |
| 1912 | const char *beg_interval; | 1914 | re_char *beg_interval; |
| 1913 | 1915 | ||
| 1914 | /* Address of the place where a forward jump should go to the end of | 1916 | /* Address of the place where a forward jump should go to the end of |
| 1915 | the containing expression. Each alternative of an `or' -- except the | 1917 | the containing expression. Each alternative of an `or' -- except the |
| @@ -2051,8 +2053,8 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2051 | boolean keep_string_p = false; | 2053 | boolean keep_string_p = false; |
| 2052 | 2054 | ||
| 2053 | /* 1 means zero (many) matches is allowed. */ | 2055 | /* 1 means zero (many) matches is allowed. */ |
| 2054 | char zero_times_ok = 0, many_times_ok = 0; | 2056 | boolean zero_times_ok = 0, many_times_ok = 0; |
| 2055 | char greedy = 1; | 2057 | boolean greedy = 1; |
| 2056 | 2058 | ||
| 2057 | /* If there is a sequence of repetition chars, collapse it | 2059 | /* If there is a sequence of repetition chars, collapse it |
| 2058 | down to just one (the right one). We can't combine | 2060 | down to just one (the right one). We can't combine |
| @@ -2132,10 +2134,10 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2132 | incremented `p', by the way, to be the character after | 2134 | incremented `p', by the way, to be the character after |
| 2133 | the `*'. Do we have to do something analogous here | 2135 | the `*'. Do we have to do something analogous here |
| 2134 | for null bytes, because of RE_DOT_NOT_NULL? */ | 2136 | for null bytes, because of RE_DOT_NOT_NULL? */ |
| 2135 | if (TRANSLATE ((unsigned char)*(p - 2)) == TRANSLATE ('.') | 2137 | if (TRANSLATE (*(p - 2)) == TRANSLATE ('.') |
| 2136 | && zero_times_ok | 2138 | && zero_times_ok |
| 2137 | && p < pend | 2139 | && p < pend |
| 2138 | && TRANSLATE ((unsigned char)*p) == TRANSLATE ('\n') | 2140 | && TRANSLATE (*p) == TRANSLATE ('\n') |
| 2139 | && !(syntax & RE_DOT_NEWLINE)) | 2141 | && !(syntax & RE_DOT_NEWLINE)) |
| 2140 | { /* We have .*\n. */ | 2142 | { /* We have .*\n. */ |
| 2141 | STORE_JUMP (jump, b, laststart); | 2143 | STORE_JUMP (jump, b, laststart); |
| @@ -3189,10 +3191,10 @@ insert_op2 (op, loc, arg1, arg2, end) | |||
| 3189 | 3191 | ||
| 3190 | static boolean | 3192 | static boolean |
| 3191 | at_begline_loc_p (pattern, p, syntax) | 3193 | at_begline_loc_p (pattern, p, syntax) |
| 3192 | const char *pattern, *p; | 3194 | re_char *pattern, *p; |
| 3193 | reg_syntax_t syntax; | 3195 | reg_syntax_t syntax; |
| 3194 | { | 3196 | { |
| 3195 | const char *prev = p - 2; | 3197 | re_char *prev = p - 2; |
| 3196 | boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\'; | 3198 | boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\'; |
| 3197 | 3199 | ||
| 3198 | return | 3200 | return |
| @@ -3208,12 +3210,12 @@ at_begline_loc_p (pattern, p, syntax) | |||
| 3208 | 3210 | ||
| 3209 | static boolean | 3211 | static boolean |
| 3210 | at_endline_loc_p (p, pend, syntax) | 3212 | at_endline_loc_p (p, pend, syntax) |
| 3211 | const char *p, *pend; | 3213 | re_char *p, *pend; |
| 3212 | int syntax; | 3214 | int syntax; |
| 3213 | { | 3215 | { |
| 3214 | const char *next = p; | 3216 | re_char *next = p; |
| 3215 | boolean next_backslash = *next == '\\'; | 3217 | boolean next_backslash = *next == '\\'; |
| 3216 | const char *next_next = p + 1 < pend ? p + 1 : 0; | 3218 | re_char *next_next = p + 1 < pend ? p + 1 : 0; |
| 3217 | 3219 | ||
| 3218 | return | 3220 | return |
| 3219 | /* Before a subexpression? */ | 3221 | /* Before a subexpression? */ |
| @@ -3344,7 +3346,7 @@ re_compile_fastmap (bufp) | |||
| 3344 | /* Reset for next path. */ | 3346 | /* Reset for next path. */ |
| 3345 | path_can_be_null = true; | 3347 | path_can_be_null = true; |
| 3346 | 3348 | ||
| 3347 | p = POP_PATTERN_OP (); | 3349 | p = (unsigned char*) POP_PATTERN_OP (); |
| 3348 | 3350 | ||
| 3349 | continue; | 3351 | continue; |
| 3350 | } | 3352 | } |
| @@ -3814,9 +3816,9 @@ re_search (bufp, string, size, startpos, range, regs) | |||
| 3814 | stack overflow). */ | 3816 | stack overflow). */ |
| 3815 | 3817 | ||
| 3816 | int | 3818 | int |
| 3817 | re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) | 3819 | re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop) |
| 3818 | struct re_pattern_buffer *bufp; | 3820 | struct re_pattern_buffer *bufp; |
| 3819 | const char *string1, *string2; | 3821 | const char *str1, *str2; |
| 3820 | int size1, size2; | 3822 | int size1, size2; |
| 3821 | int startpos; | 3823 | int startpos; |
| 3822 | int range; | 3824 | int range; |
| @@ -3824,6 +3826,8 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) | |||
| 3824 | int stop; | 3826 | int stop; |
| 3825 | { | 3827 | { |
| 3826 | int val; | 3828 | int val; |
| 3829 | re_char *string1 = (re_char*) str1; | ||
| 3830 | re_char *string2 = (re_char*) str2; | ||
| 3827 | register char *fastmap = bufp->fastmap; | 3831 | register char *fastmap = bufp->fastmap; |
| 3828 | register RE_TRANSLATE_TYPE translate = bufp->translate; | 3832 | register RE_TRANSLATE_TYPE translate = bufp->translate; |
| 3829 | int total_size = size1 + size2; | 3833 | int total_size = size1 + size2; |
| @@ -3907,7 +3911,7 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) | |||
| 3907 | the first null string. */ | 3911 | the first null string. */ |
| 3908 | if (fastmap && startpos < total_size && !bufp->can_be_null) | 3912 | if (fastmap && startpos < total_size && !bufp->can_be_null) |
| 3909 | { | 3913 | { |
| 3910 | register const char *d; | 3914 | register re_char *d; |
| 3911 | register unsigned int buf_ch; | 3915 | register unsigned int buf_ch; |
| 3912 | 3916 | ||
| 3913 | d = POS_ADDR_VSTRING (startpos); | 3917 | d = POS_ADDR_VSTRING (startpos); |
| @@ -3942,15 +3946,14 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) | |||
| 3942 | } | 3946 | } |
| 3943 | else | 3947 | else |
| 3944 | while (range > lim | 3948 | while (range > lim |
| 3945 | && !fastmap[(unsigned char) | 3949 | && !fastmap[RE_TRANSLATE (translate, *d)]) |
| 3946 | RE_TRANSLATE (translate, (unsigned char) *d)]) | ||
| 3947 | { | 3950 | { |
| 3948 | d++; | 3951 | d++; |
| 3949 | range--; | 3952 | range--; |
| 3950 | } | 3953 | } |
| 3951 | } | 3954 | } |
| 3952 | else | 3955 | else |
| 3953 | while (range > lim && !fastmap[(unsigned char) *d]) | 3956 | while (range > lim && !fastmap[*d]) |
| 3954 | { | 3957 | { |
| 3955 | d++; | 3958 | d++; |
| 3956 | range--; | 3959 | range--; |
| @@ -4001,10 +4004,8 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) | |||
| 4001 | /* Update STARTPOS to the next character boundary. */ | 4004 | /* Update STARTPOS to the next character boundary. */ |
| 4002 | if (multibyte) | 4005 | if (multibyte) |
| 4003 | { | 4006 | { |
| 4004 | const unsigned char *p | 4007 | re_char *p = POS_ADDR_VSTRING (startpos); |
| 4005 | = (const unsigned char *) POS_ADDR_VSTRING (startpos); | 4008 | re_char *pend = STOP_ADDR_VSTRING (startpos); |
| 4006 | const unsigned char *pend | ||
| 4007 | = (const unsigned char *) STOP_ADDR_VSTRING (startpos); | ||
| 4008 | int len = MULTIBYTE_FORM_LENGTH (p, pend - p); | 4009 | int len = MULTIBYTE_FORM_LENGTH (p, pend - p); |
| 4009 | 4010 | ||
| 4010 | range -= len; | 4011 | range -= len; |
| @@ -4026,8 +4027,7 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) | |||
| 4026 | /* Update STARTPOS to the previous character boundary. */ | 4027 | /* Update STARTPOS to the previous character boundary. */ |
| 4027 | if (multibyte) | 4028 | if (multibyte) |
| 4028 | { | 4029 | { |
| 4029 | const unsigned char *p | 4030 | re_char *p = POS_ADDR_VSTRING (startpos); |
| 4030 | = (const unsigned char *) POS_ADDR_VSTRING (startpos); | ||
| 4031 | int len = 0; | 4031 | int len = 0; |
| 4032 | 4032 | ||
| 4033 | /* Find the head of multibyte form. */ | 4033 | /* Find the head of multibyte form. */ |
| @@ -4394,7 +4394,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4394 | static int | 4394 | static int |
| 4395 | re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | 4395 | re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) |
| 4396 | struct re_pattern_buffer *bufp; | 4396 | struct re_pattern_buffer *bufp; |
| 4397 | const char *string1, *string2; | 4397 | re_char *string1, *string2; |
| 4398 | int size1, size2; | 4398 | int size1, size2; |
| 4399 | int pos; | 4399 | int pos; |
| 4400 | struct re_registers *regs; | 4400 | struct re_registers *regs; |
| @@ -4402,17 +4402,18 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4402 | { | 4402 | { |
| 4403 | /* General temporaries. */ | 4403 | /* General temporaries. */ |
| 4404 | int mcnt; | 4404 | int mcnt; |
| 4405 | boolean not; | ||
| 4405 | unsigned char *p1; | 4406 | unsigned char *p1; |
| 4406 | 4407 | ||
| 4407 | /* Just past the end of the corresponding string. */ | 4408 | /* Just past the end of the corresponding string. */ |
| 4408 | const char *end1, *end2; | 4409 | re_char *end1, *end2; |
| 4409 | 4410 | ||
| 4410 | /* Pointers into string1 and string2, just past the last characters in | 4411 | /* Pointers into string1 and string2, just past the last characters in |
| 4411 | each to consider matching. */ | 4412 | each to consider matching. */ |
| 4412 | const char *end_match_1, *end_match_2; | 4413 | re_char *end_match_1, *end_match_2; |
| 4413 | 4414 | ||
| 4414 | /* Where we are in the data, and the end of the current string. */ | 4415 | /* Where we are in the data, and the end of the current string. */ |
| 4415 | const char *d, *dend; | 4416 | re_char *d, *dend; |
| 4416 | 4417 | ||
| 4417 | /* Where we are in the pattern, and the end of the pattern. */ | 4418 | /* Where we are in the pattern, and the end of the pattern. */ |
| 4418 | unsigned char *p = bufp->buffer; | 4419 | unsigned char *p = bufp->buffer; |
| @@ -4456,7 +4457,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4456 | stopped matching the regnum-th subexpression. (The zeroth register | 4457 | stopped matching the regnum-th subexpression. (The zeroth register |
| 4457 | keeps track of what the whole pattern matches.) */ | 4458 | keeps track of what the whole pattern matches.) */ |
| 4458 | #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ | 4459 | #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ |
| 4459 | const char **regstart, **regend; | 4460 | re_char **regstart, **regend; |
| 4460 | #endif | 4461 | #endif |
| 4461 | 4462 | ||
| 4462 | /* The following record the register info as found in the above | 4463 | /* The following record the register info as found in the above |
| @@ -4465,7 +4466,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4465 | turn happens only if we have not yet matched the entire string. */ | 4466 | turn happens only if we have not yet matched the entire string. */ |
| 4466 | unsigned best_regs_set = false; | 4467 | unsigned best_regs_set = false; |
| 4467 | #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ | 4468 | #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ |
| 4468 | const char **best_regstart, **best_regend; | 4469 | re_char **best_regstart, **best_regend; |
| 4469 | #endif | 4470 | #endif |
| 4470 | 4471 | ||
| 4471 | /* Logically, this is `best_regend[0]'. But we don't want to have to | 4472 | /* Logically, this is `best_regend[0]'. But we don't want to have to |
| @@ -4476,7 +4477,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4476 | the end of the best match so far in a separate variable. We | 4477 | the end of the best match so far in a separate variable. We |
| 4477 | initialize this to NULL so that when we backtrack the first time | 4478 | initialize this to NULL so that when we backtrack the first time |
| 4478 | and need to test it, it's not garbage. */ | 4479 | and need to test it, it's not garbage. */ |
| 4479 | const char *match_end = NULL; | 4480 | re_char *match_end = NULL; |
| 4480 | 4481 | ||
| 4481 | #ifdef DEBUG | 4482 | #ifdef DEBUG |
| 4482 | /* Counts the total number of registers pushed. */ | 4483 | /* Counts the total number of registers pushed. */ |
| @@ -4495,10 +4496,10 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4495 | array indexing. We should fix this. */ | 4496 | array indexing. We should fix this. */ |
| 4496 | if (bufp->re_nsub) | 4497 | if (bufp->re_nsub) |
| 4497 | { | 4498 | { |
| 4498 | regstart = REGEX_TALLOC (num_regs, const char *); | 4499 | regstart = REGEX_TALLOC (num_regs, re_char *); |
| 4499 | regend = REGEX_TALLOC (num_regs, const char *); | 4500 | regend = REGEX_TALLOC (num_regs, re_char *); |
| 4500 | best_regstart = REGEX_TALLOC (num_regs, const char *); | 4501 | best_regstart = REGEX_TALLOC (num_regs, re_char *); |
| 4501 | best_regend = REGEX_TALLOC (num_regs, const char *); | 4502 | best_regend = REGEX_TALLOC (num_regs, re_char *); |
| 4502 | 4503 | ||
| 4503 | if (!(regstart && regend && best_regstart && best_regend)) | 4504 | if (!(regstart && regend && best_regstart && best_regend)) |
| 4504 | { | 4505 | { |
| @@ -4795,8 +4796,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4795 | do | 4796 | do |
| 4796 | { | 4797 | { |
| 4797 | PREFETCH (); | 4798 | PREFETCH (); |
| 4798 | if ((unsigned char) RE_TRANSLATE (translate, (unsigned char) *d) | 4799 | if (RE_TRANSLATE (translate, *d) != *p++) |
| 4799 | != (unsigned char) *p++) | ||
| 4800 | goto fail; | 4800 | goto fail; |
| 4801 | d++; | 4801 | d++; |
| 4802 | } | 4802 | } |
| @@ -4807,7 +4807,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4807 | do | 4807 | do |
| 4808 | { | 4808 | { |
| 4809 | PREFETCH (); | 4809 | PREFETCH (); |
| 4810 | if (*d++ != (char) *p++) goto fail; | 4810 | if (*d++ != *p++) goto fail; |
| 4811 | } | 4811 | } |
| 4812 | while (--mcnt); | 4812 | while (--mcnt); |
| 4813 | } | 4813 | } |
| @@ -4830,7 +4830,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4830 | else | 4830 | else |
| 4831 | #endif /* not emacs */ | 4831 | #endif /* not emacs */ |
| 4832 | { | 4832 | { |
| 4833 | buf_ch = (unsigned char) *d; | 4833 | buf_ch = *d; |
| 4834 | buf_charlen = 1; | 4834 | buf_charlen = 1; |
| 4835 | } | 4835 | } |
| 4836 | 4836 | ||
| @@ -4869,7 +4869,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4869 | DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : ""); | 4869 | DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : ""); |
| 4870 | 4870 | ||
| 4871 | PREFETCH (); | 4871 | PREFETCH (); |
| 4872 | c = (unsigned char) *d; | 4872 | c = *d; |
| 4873 | 4873 | ||
| 4874 | range_table_exists = CHARSET_RANGE_TABLE_EXISTS_P (&p[-1]); | 4874 | range_table_exists = CHARSET_RANGE_TABLE_EXISTS_P (&p[-1]); |
| 4875 | 4875 | ||
| @@ -4982,7 +4982,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4982 | followed by the numeric value of <digit> as the register number. */ | 4982 | followed by the numeric value of <digit> as the register number. */ |
| 4983 | case duplicate: | 4983 | case duplicate: |
| 4984 | { | 4984 | { |
| 4985 | register const char *d2, *dend2; | 4985 | register re_char *d2, *dend2; |
| 4986 | int regno = *p++; /* Get which register to match against. */ | 4986 | int regno = *p++; /* Get which register to match against. */ |
| 4987 | DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno); | 4987 | DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno); |
| 4988 | 4988 | ||
| @@ -5282,31 +5282,33 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5282 | } | 5282 | } |
| 5283 | 5283 | ||
| 5284 | case wordbound: | 5284 | case wordbound: |
| 5285 | DEBUG_PRINT1 ("EXECUTING wordbound.\n"); | 5285 | case notwordbound: |
| 5286 | not = (re_opcode_t) *(p - 1) == notwordbound; | ||
| 5287 | DEBUG_PRINT2 ("EXECUTING %swordbound.\n", not?"not":""); | ||
| 5286 | 5288 | ||
| 5287 | /* We SUCCEED in one of the following cases: */ | 5289 | /* We SUCCEED in one of the following cases: */ |
| 5288 | 5290 | ||
| 5289 | /* Case 1: D is at the beginning or the end of string. */ | 5291 | /* Case 1: D is at the beginning or the end of string. */ |
| 5290 | if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)) | 5292 | if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)) |
| 5291 | break; | 5293 | not = !not; |
| 5292 | else | 5294 | else |
| 5293 | { | 5295 | { |
| 5294 | /* C1 is the character before D, S1 is the syntax of C1, C2 | 5296 | /* C1 is the character before D, S1 is the syntax of C1, C2 |
| 5295 | is the character at D, and S2 is the syntax of C2. */ | 5297 | is the character at D, and S2 is the syntax of C2. */ |
| 5296 | int c1, c2, s1, s2; | 5298 | int c1, c2, s1, s2; |
| 5297 | int pos1 = PTR_TO_OFFSET (d - 1); | ||
| 5298 | int charpos; | ||
| 5299 | |||
| 5300 | GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); | ||
| 5301 | GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2); | ||
| 5302 | #ifdef emacs | 5299 | #ifdef emacs |
| 5303 | charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1); | 5300 | int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d - 1)); |
| 5304 | UPDATE_SYNTAX_TABLE (charpos); | 5301 | UPDATE_SYNTAX_TABLE (charpos); |
| 5305 | #endif | 5302 | #endif |
| 5303 | /* FIXME: This does a STRING_CHAR even for unibyte buffers. */ | ||
| 5304 | GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); | ||
| 5306 | s1 = SYNTAX (c1); | 5305 | s1 = SYNTAX (c1); |
| 5307 | #ifdef emacs | 5306 | #ifdef emacs |
| 5308 | UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1); | 5307 | UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1); |
| 5309 | #endif | 5308 | #endif |
| 5309 | PREFETCH (); | ||
| 5310 | /* FIXME: This does a STRING_CHAR even for unibyte buffers. */ | ||
| 5311 | c2 = STRING_CHAR (d, dend - d); | ||
| 5310 | s2 = SYNTAX (c2); | 5312 | s2 = SYNTAX (c2); |
| 5311 | 5313 | ||
| 5312 | if (/* Case 2: Only one of S1 and S2 is Sword. */ | 5314 | if (/* Case 2: Only one of S1 and S2 is Sword. */ |
| @@ -5314,46 +5316,12 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5314 | /* Case 3: Both of S1 and S2 are Sword, and macro | 5316 | /* Case 3: Both of S1 and S2 are Sword, and macro |
| 5315 | WORD_BOUNDARY_P (C1, C2) returns nonzero. */ | 5317 | WORD_BOUNDARY_P (C1, C2) returns nonzero. */ |
| 5316 | || ((s1 == Sword) && WORD_BOUNDARY_P (c1, c2))) | 5318 | || ((s1 == Sword) && WORD_BOUNDARY_P (c1, c2))) |
| 5319 | not = !not; | ||
| 5320 | } | ||
| 5321 | if (not) | ||
| 5317 | break; | 5322 | break; |
| 5318 | } | ||
| 5319 | goto fail; | ||
| 5320 | |||
| 5321 | case notwordbound: | ||
| 5322 | DEBUG_PRINT1 ("EXECUTING notwordbound.\n"); | ||
| 5323 | |||
| 5324 | /* We FAIL in one of the following cases: */ | ||
| 5325 | |||
| 5326 | /* Case 1: D is at the beginning or the end of string. */ | ||
| 5327 | if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)) | ||
| 5328 | goto fail; | ||
| 5329 | else | 5323 | else |
| 5330 | { | ||
| 5331 | /* C1 is the character before D, S1 is the syntax of C1, C2 | ||
| 5332 | is the character at D, and S2 is the syntax of C2. */ | ||
| 5333 | int c1, c2, s1, s2; | ||
| 5334 | int pos1 = PTR_TO_OFFSET (d - 1); | ||
| 5335 | int charpos; | ||
| 5336 | |||
| 5337 | GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); | ||
| 5338 | GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2); | ||
| 5339 | #ifdef emacs | ||
| 5340 | charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1); | ||
| 5341 | UPDATE_SYNTAX_TABLE (charpos); | ||
| 5342 | #endif | ||
| 5343 | s1 = SYNTAX (c1); | ||
| 5344 | #ifdef emacs | ||
| 5345 | UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1); | ||
| 5346 | #endif | ||
| 5347 | s2 = SYNTAX (c2); | ||
| 5348 | |||
| 5349 | if (/* Case 2: Only one of S1 and S2 is Sword. */ | ||
| 5350 | ((s1 == Sword) != (s2 == Sword)) | ||
| 5351 | /* Case 3: Both of S1 and S2 are Sword, and macro | ||
| 5352 | WORD_BOUNDARY_P (C1, C2) returns nonzero. */ | ||
| 5353 | || ((s1 == Sword) && WORD_BOUNDARY_P (c1, c2))) | ||
| 5354 | goto fail; | 5324 | goto fail; |
| 5355 | } | ||
| 5356 | break; | ||
| 5357 | 5325 | ||
| 5358 | case wordbeg: | 5326 | case wordbeg: |
| 5359 | DEBUG_PRINT1 ("EXECUTING wordbeg.\n"); | 5327 | DEBUG_PRINT1 ("EXECUTING wordbeg.\n"); |
| @@ -5371,7 +5339,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5371 | int pos1 = PTR_TO_OFFSET (d); | 5339 | int pos1 = PTR_TO_OFFSET (d); |
| 5372 | int charpos; | 5340 | int charpos; |
| 5373 | 5341 | ||
| 5374 | GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2); | 5342 | PREFETCH (); |
| 5343 | c2 = STRING_CHAR (d, dend - d); | ||
| 5375 | #ifdef emacs | 5344 | #ifdef emacs |
| 5376 | charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1); | 5345 | charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1); |
| 5377 | UPDATE_SYNTAX_TABLE (charpos); | 5346 | UPDATE_SYNTAX_TABLE (charpos); |
| @@ -5429,7 +5398,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5429 | /* Case 3: D is not at the end of string ... */ | 5398 | /* Case 3: D is not at the end of string ... */ |
| 5430 | if (!AT_STRINGS_END (d)) | 5399 | if (!AT_STRINGS_END (d)) |
| 5431 | { | 5400 | { |
| 5432 | GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2); | 5401 | PREFETCH (); |
| 5402 | c2 = STRING_CHAR (d, dend - d); | ||
| 5433 | #ifdef emacs | 5403 | #ifdef emacs |
| 5434 | UPDATE_SYNTAX_TABLE_FORWARD (charpos); | 5404 | UPDATE_SYNTAX_TABLE_FORWARD (charpos); |
| 5435 | #endif | 5405 | #endif |
| @@ -5446,19 +5416,19 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5446 | #ifdef emacs | 5416 | #ifdef emacs |
| 5447 | case before_dot: | 5417 | case before_dot: |
| 5448 | DEBUG_PRINT1 ("EXECUTING before_dot.\n"); | 5418 | DEBUG_PRINT1 ("EXECUTING before_dot.\n"); |
| 5449 | if (PTR_BYTE_POS ((unsigned char *) d) >= PT_BYTE) | 5419 | if (PTR_BYTE_POS (d) >= PT_BYTE) |
| 5450 | goto fail; | 5420 | goto fail; |
| 5451 | break; | 5421 | break; |
| 5452 | 5422 | ||
| 5453 | case at_dot: | 5423 | case at_dot: |
| 5454 | DEBUG_PRINT1 ("EXECUTING at_dot.\n"); | 5424 | DEBUG_PRINT1 ("EXECUTING at_dot.\n"); |
| 5455 | if (PTR_BYTE_POS ((unsigned char *) d) != PT_BYTE) | 5425 | if (PTR_BYTE_POS (d) != PT_BYTE) |
| 5456 | goto fail; | 5426 | goto fail; |
| 5457 | break; | 5427 | break; |
| 5458 | 5428 | ||
| 5459 | case after_dot: | 5429 | case after_dot: |
| 5460 | DEBUG_PRINT1 ("EXECUTING after_dot.\n"); | 5430 | DEBUG_PRINT1 ("EXECUTING after_dot.\n"); |
| 5461 | if (PTR_BYTE_POS ((unsigned char *) d) <= PT_BYTE) | 5431 | if (PTR_BYTE_POS (d) <= PT_BYTE) |
| 5462 | goto fail; | 5432 | goto fail; |
| 5463 | break; | 5433 | break; |
| 5464 | 5434 | ||
| @@ -5592,7 +5562,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5592 | #endif | 5562 | #endif |
| 5593 | if (!FAIL_STACK_EMPTY ()) | 5563 | if (!FAIL_STACK_EMPTY ()) |
| 5594 | { | 5564 | { |
| 5595 | char *str; | 5565 | re_char *str; |
| 5596 | unsigned char *pat; | 5566 | unsigned char *pat; |
| 5597 | /* A restart point is known. Restore to that state. */ | 5567 | /* A restart point is known. Restore to that state. */ |
| 5598 | DEBUG_PRINT1 ("\nFAIL:\n"); | 5568 | DEBUG_PRINT1 ("\nFAIL:\n"); |