diff options
| author | Stefan Monnier | 2000-03-16 02:55:33 +0000 |
|---|---|---|
| committer | Stefan Monnier | 2000-03-16 02:55:33 +0000 |
| commit | 99633e97e9550fdf274c64f213de0d59c759bc98 (patch) | |
| tree | 478e695c3809a60d6427ce7af573934c1cc2e607 /src | |
| parent | fd0e837bd18160028ccdc7dcdc6d436b7552d2a0 (diff) | |
| download | emacs-99633e97e9550fdf274c64f213de0d59c759bc98.tar.gz emacs-99633e97e9550fdf274c64f213de0d59c759bc98.zip | |
(re_match_2): Fix string shortening (to fit `stop') to make sure
POINTER_TO_OFFSET gives the same value before and after PREFETCH.
Use `dfail' to guarantee "atomic" matching.
(PTR_TO_OFFSET): Use POINTER_TO_OFFSET.
(debug): Now only active if > 0 rather than if != 0.
(DEBUG_*): Update for the new meaning of `debug'.
(print_partial_compiled_pattern): Add missing `succeed' case.
Use CHARSET_* macros in the charset(_not) branch.
Fix off-by-two bugs in `succeed_n', `jump_n' and `set_number_at'.
(store_op1, store_op2, insert_op1, insert_op2)
(at_begline_loc_p, at_endline_loc_p): Add prototype.
(group_in_compile_stack): Move to after its arg's types are declared
and add a prototype.
(PATFETCH): Define in terms of PATFETCH_RAW.
(GET_UNSIGNED_NUMBER): Add the usual `do { ... } while(0)' wrapper.
(QUIT): Redefine as a nop except for NTemacs.
(regex_compile): Handle intervals {,M} as if it was {0,M}.
Fix indentation of the greedy-op and shy-group code.
(at_(beg|end)line_loc_p): Fix argument's types.
(re_compile_fastmap): Ifdef out failure_stack_ptr to shut up gcc.
(re_search_2): Use POS_AS_IN_BUFFER. Simplify `room' computation.
(MATCHING_IN_FIRST_STRING): Remove.
(re_match_2): Use POS_AS_IN_BUFFER.
Ifdef out failure_stack_ptr to shut up gcc.
Use FIRST_STRING_P and POINTER_TO_OFFSET.
Use QUIT unconditionally.
Diffstat (limited to 'src')
| -rw-r--r-- | src/ChangeLog | 29 | ||||
| -rw-r--r-- | src/regex.c | 394 |
2 files changed, 240 insertions, 183 deletions
diff --git a/src/ChangeLog b/src/ChangeLog index 097e72d13ae..58990675315 100644 --- a/src/ChangeLog +++ b/src/ChangeLog | |||
| @@ -1,3 +1,32 @@ | |||
| 1 | 2000-03-15 Stefan Monnier <monnier@cs.yale.edu> | ||
| 2 | |||
| 3 | * regex.c (re_match_2): Fix string shortening (to fit `stop') to make sure | ||
| 4 | POINTER_TO_OFFSET gives the same value before and after PREFETCH. | ||
| 5 | Use `dfail' to guarantee "atomic" matching. | ||
| 6 | (PTR_TO_OFFSET): Use POINTER_TO_OFFSET. | ||
| 7 | (debug): Now only active if > 0 rather than if != 0. | ||
| 8 | (DEBUG_*): Update for the new meaning of `debug'. | ||
| 9 | (print_partial_compiled_pattern): Add missing `succeed' case. | ||
| 10 | Use CHARSET_* macros in the charset(_not) branch. | ||
| 11 | Fix off-by-two bugs in `succeed_n', `jump_n' and `set_number_at'. | ||
| 12 | (store_op1, store_op2, insert_op1, insert_op2) | ||
| 13 | (at_begline_loc_p, at_endline_loc_p): Add prototype. | ||
| 14 | (group_in_compile_stack): Move to after its arg's types are declared | ||
| 15 | and add a prototype. | ||
| 16 | (PATFETCH): Define in terms of PATFETCH_RAW. | ||
| 17 | (GET_UNSIGNED_NUMBER): Add the usual `do { ... } while(0)' wrapper. | ||
| 18 | (QUIT): Redefine as a nop except for NTemacs. | ||
| 19 | (regex_compile): Handle intervals {,M} as if it was {0,M}. | ||
| 20 | Fix indentation of the greedy-op and shy-group code. | ||
| 21 | (at_(beg|end)line_loc_p): Fix argument's types. | ||
| 22 | (re_compile_fastmap): Ifdef out failure_stack_ptr to shut up gcc. | ||
| 23 | (re_search_2): Use POS_AS_IN_BUFFER. Simplify `room' computation. | ||
| 24 | (MATCHING_IN_FIRST_STRING): Remove. | ||
| 25 | (re_match_2): Use POS_AS_IN_BUFFER. | ||
| 26 | Ifdef out failure_stack_ptr to shut up gcc. | ||
| 27 | Use FIRST_STRING_P and POINTER_TO_OFFSET. | ||
| 28 | Use QUIT unconditionally. | ||
| 29 | |||
| 1 | 2000-03-15 Gerd Moellmann <gerd@gnu.org> | 30 | 2000-03-15 Gerd Moellmann <gerd@gnu.org> |
| 2 | 31 | ||
| 3 | * minibuf.c (Fminibuffer_complete): Set point to ZV if finding | 32 | * minibuf.c (Fminibuffer_complete): Set point to ZV if finding |
diff --git a/src/regex.c b/src/regex.c index c118af0fdb6..82c5d76f4dc 100644 --- a/src/regex.c +++ b/src/regex.c | |||
| @@ -37,9 +37,7 @@ | |||
| 37 | 37 | ||
| 38 | #ifdef emacs | 38 | #ifdef emacs |
| 39 | /* Converts the pointer to the char to BEG-based offset from the start. */ | 39 | /* Converts the pointer to the char to BEG-based offset from the start. */ |
| 40 | #define PTR_TO_OFFSET(d) \ | 40 | #define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d)) |
| 41 | POS_AS_IN_BUFFER (MATCHING_IN_FIRST_STRING \ | ||
| 42 | ? (d) - string1 : (d) - (string2 - size1)) | ||
| 43 | #define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object))) | 41 | #define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object))) |
| 44 | #else | 42 | #else |
| 45 | #define PTR_TO_OFFSET(d) 0 | 43 | #define PTR_TO_OFFSET(d) 0 |
| @@ -767,17 +765,17 @@ extract_number_and_incr (destination, source) | |||
| 767 | /* It is useful to test things that ``must'' be true when debugging. */ | 765 | /* It is useful to test things that ``must'' be true when debugging. */ |
| 768 | #include <assert.h> | 766 | #include <assert.h> |
| 769 | 767 | ||
| 770 | static int debug = 0; | 768 | static int debug = -100000; |
| 771 | 769 | ||
| 772 | #define DEBUG_STATEMENT(e) e | 770 | #define DEBUG_STATEMENT(e) e |
| 773 | #define DEBUG_PRINT1(x) if (debug) printf (x) | 771 | #define DEBUG_PRINT1(x) if (debug > 0) printf (x) |
| 774 | #define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2) | 772 | #define DEBUG_PRINT2(x1, x2) if (debug > 0) printf (x1, x2) |
| 775 | #define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3) | 773 | #define DEBUG_PRINT3(x1, x2, x3) if (debug > 0) printf (x1, x2, x3) |
| 776 | #define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4) | 774 | #define DEBUG_PRINT4(x1, x2, x3, x4) if (debug > 0) printf (x1, x2, x3, x4) |
| 777 | #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \ | 775 | #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \ |
| 778 | if (debug) print_partial_compiled_pattern (s, e) | 776 | if (debug > 0) print_partial_compiled_pattern (s, e) |
| 779 | #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \ | 777 | #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \ |
| 780 | if (debug) print_double_string (w, s1, sz1, s2, sz2) | 778 | if (debug > 0) print_double_string (w, s1, sz1, s2, sz2) |
| 781 | 779 | ||
| 782 | 780 | ||
| 783 | /* Print the fastmap in human-readable form. */ | 781 | /* Print the fastmap in human-readable form. */ |
| @@ -840,6 +838,10 @@ print_partial_compiled_pattern (start, end) | |||
| 840 | printf ("/no_op"); | 838 | printf ("/no_op"); |
| 841 | break; | 839 | break; |
| 842 | 840 | ||
| 841 | case succeed: | ||
| 842 | printf ("/succeed"); | ||
| 843 | break; | ||
| 844 | |||
| 843 | case exactn: | 845 | case exactn: |
| 844 | mcnt = *p++; | 846 | mcnt = *p++; |
| 845 | printf ("/exactn/%d", mcnt); | 847 | printf ("/exactn/%d", mcnt); |
| @@ -872,9 +874,8 @@ print_partial_compiled_pattern (start, end) | |||
| 872 | { | 874 | { |
| 873 | register int c, last = -100; | 875 | register int c, last = -100; |
| 874 | register int in_range = 0; | 876 | register int in_range = 0; |
| 875 | int length = *p & 0x7f; | 877 | int length = CHARSET_BITMAP_SIZE (p - 1); |
| 876 | int has_range_table = *p & 0x80; | 878 | int has_range_table = CHARSET_RANGE_TABLE_EXISTS_P (p - 1); |
| 877 | int range_length = p[length + 2] + p[length + 3] * 0x100; | ||
| 878 | 879 | ||
| 879 | printf ("/charset [%s", | 880 | printf ("/charset [%s", |
| 880 | (re_opcode_t) *(p - 1) == charset_not ? "^" : ""); | 881 | (re_opcode_t) *(p - 1) == charset_not ? "^" : ""); |
| @@ -904,20 +905,23 @@ print_partial_compiled_pattern (start, end) | |||
| 904 | last = c; | 905 | last = c; |
| 905 | } | 906 | } |
| 906 | 907 | ||
| 907 | p += 1 + length; | ||
| 908 | |||
| 909 | if (in_range) | 908 | if (in_range) |
| 910 | putchar (last); | 909 | putchar (last); |
| 911 | 910 | ||
| 912 | putchar (']'); | 911 | putchar (']'); |
| 913 | 912 | ||
| 914 | if (has_range_table) | 913 | p += 1 + length; |
| 915 | printf ("has-range-table"); | ||
| 916 | 914 | ||
| 917 | /* ??? Should print the range table; for now, | ||
| 918 | just skip it. */ | ||
| 919 | if (has_range_table) | 915 | if (has_range_table) |
| 920 | p += 4 + 6 * range_length; | 916 | { |
| 917 | int count; | ||
| 918 | printf ("has-range-table"); | ||
| 919 | |||
| 920 | /* ??? Should print the range table; for now, just skip it. */ | ||
| 921 | p += 2; /* skip range table bits */ | ||
| 922 | EXTRACT_NUMBER_AND_INCR (count, p); | ||
| 923 | p = CHARSET_RANGE_TABLE_END (p, count); | ||
| 924 | } | ||
| 921 | } | 925 | } |
| 922 | break; | 926 | break; |
| 923 | 927 | ||
| @@ -962,19 +966,19 @@ print_partial_compiled_pattern (start, end) | |||
| 962 | case succeed_n: | 966 | case succeed_n: |
| 963 | extract_number_and_incr (&mcnt, &p); | 967 | extract_number_and_incr (&mcnt, &p); |
| 964 | extract_number_and_incr (&mcnt2, &p); | 968 | extract_number_and_incr (&mcnt2, &p); |
| 965 | printf ("/succeed_n to %d, %d times", p + mcnt - start, mcnt2); | 969 | printf ("/succeed_n to %d, %d times", p - 2 + mcnt - start, mcnt2); |
| 966 | break; | 970 | break; |
| 967 | 971 | ||
| 968 | case jump_n: | 972 | case jump_n: |
| 969 | extract_number_and_incr (&mcnt, &p); | 973 | extract_number_and_incr (&mcnt, &p); |
| 970 | extract_number_and_incr (&mcnt2, &p); | 974 | extract_number_and_incr (&mcnt2, &p); |
| 971 | printf ("/jump_n to %d, %d times", p + mcnt - start, mcnt2); | 975 | printf ("/jump_n to %d, %d times", p - 2 + mcnt - start, mcnt2); |
| 972 | break; | 976 | break; |
| 973 | 977 | ||
| 974 | case set_number_at: | 978 | case set_number_at: |
| 975 | extract_number_and_incr (&mcnt, &p); | 979 | extract_number_and_incr (&mcnt, &p); |
| 976 | extract_number_and_incr (&mcnt2, &p); | 980 | extract_number_and_incr (&mcnt2, &p); |
| 977 | printf ("/set_number_at location %d to %d", p + mcnt - start, mcnt2); | 981 | printf ("/set_number_at location %d to %d", p - 2 + mcnt - start, mcnt2); |
| 978 | break; | 982 | break; |
| 979 | 983 | ||
| 980 | case wordbound: | 984 | case wordbound: |
| @@ -1515,10 +1519,19 @@ do { \ | |||
| 1515 | 1519 | ||
| 1516 | /* Subroutine declarations and macros for regex_compile. */ | 1520 | /* Subroutine declarations and macros for regex_compile. */ |
| 1517 | 1521 | ||
| 1518 | static void store_op1 (), store_op2 (); | 1522 | static void store_op1 _RE_ARGS((re_opcode_t op, unsigned char *loc, int arg)); |
| 1519 | static void insert_op1 (), insert_op2 (); | 1523 | static void store_op2 _RE_ARGS((re_opcode_t op, unsigned char *loc, |
| 1520 | static boolean at_begline_loc_p (), at_endline_loc_p (); | 1524 | int arg1, int arg2)); |
| 1521 | static boolean group_in_compile_stack (); | 1525 | static void insert_op1 _RE_ARGS((re_opcode_t op, unsigned char *loc, |
| 1526 | int arg, unsigned char *end)); | ||
| 1527 | static void insert_op2 _RE_ARGS((re_opcode_t op, unsigned char *loc, | ||
| 1528 | int arg1, int arg2, unsigned char *end)); | ||
| 1529 | static boolean at_begline_loc_p _RE_ARGS((const unsigned char *pattern, | ||
| 1530 | const unsigned char *p, | ||
| 1531 | reg_syntax_t syntax)); | ||
| 1532 | static boolean at_endline_loc_p _RE_ARGS((const unsigned char *p, | ||
| 1533 | const unsigned char *pend, | ||
| 1534 | reg_syntax_t syntax)); | ||
| 1522 | 1535 | ||
| 1523 | /* Fetch the next character in the uncompiled pattern---translating it | 1536 | /* Fetch the next character in the uncompiled pattern---translating it |
| 1524 | if necessary. Also cast from a signed character in the constant | 1537 | if necessary. Also cast from a signed character in the constant |
| @@ -1526,8 +1539,8 @@ static boolean group_in_compile_stack (); | |||
| 1526 | as an array index (in, e.g., `translate'). */ | 1539 | as an array index (in, e.g., `translate'). */ |
| 1527 | #ifndef PATFETCH | 1540 | #ifndef PATFETCH |
| 1528 | #define PATFETCH(c) \ | 1541 | #define PATFETCH(c) \ |
| 1529 | do {if (p == pend) return REG_EEND; \ | 1542 | do { \ |
| 1530 | c = *p++; \ | 1543 | PATFETCH_RAW (c); \ |
| 1531 | if (RE_TRANSLATE_P (translate)) c = RE_TRANSLATE (translate, c); \ | 1544 | if (RE_TRANSLATE_P (translate)) c = RE_TRANSLATE (translate, c); \ |
| 1532 | } while (0) | 1545 | } while (0) |
| 1533 | #endif | 1546 | #endif |
| @@ -1760,7 +1773,7 @@ struct range_table_work_area | |||
| 1760 | 1773 | ||
| 1761 | /* Get the next unsigned number in the uncompiled pattern. */ | 1774 | /* Get the next unsigned number in the uncompiled pattern. */ |
| 1762 | #define GET_UNSIGNED_NUMBER(num) \ | 1775 | #define GET_UNSIGNED_NUMBER(num) \ |
| 1763 | { if (p != pend) \ | 1776 | do { if (p != pend) \ |
| 1764 | { \ | 1777 | { \ |
| 1765 | PATFETCH (c); \ | 1778 | PATFETCH (c); \ |
| 1766 | while (ISDIGIT (c)) \ | 1779 | while (ISDIGIT (c)) \ |
| @@ -1773,7 +1786,7 @@ struct range_table_work_area | |||
| 1773 | PATFETCH (c); \ | 1786 | PATFETCH (c); \ |
| 1774 | } \ | 1787 | } \ |
| 1775 | } \ | 1788 | } \ |
| 1776 | } | 1789 | } while (0) |
| 1777 | 1790 | ||
| 1778 | #define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */ | 1791 | #define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */ |
| 1779 | 1792 | ||
| @@ -1787,6 +1800,12 @@ struct range_table_work_area | |||
| 1787 | || STREQ (string, "word") \ | 1800 | || STREQ (string, "word") \ |
| 1788 | || STREQ (string, "ascii") || STREQ (string, "nonascii") \ | 1801 | || STREQ (string, "ascii") || STREQ (string, "nonascii") \ |
| 1789 | || STREQ (string, "unibyte") || STREQ (string, "multibyte")) | 1802 | || STREQ (string, "unibyte") || STREQ (string, "multibyte")) |
| 1803 | |||
| 1804 | /* QUIT is only used on NTemacs. */ | ||
| 1805 | #if !defined (WINDOWSNT) || !defined (emacs) | ||
| 1806 | #undef QUIT | ||
| 1807 | #define QUIT | ||
| 1808 | #endif | ||
| 1790 | 1809 | ||
| 1791 | #ifndef MATCH_MAY_ALLOCATE | 1810 | #ifndef MATCH_MAY_ALLOCATE |
| 1792 | 1811 | ||
| @@ -1827,6 +1846,10 @@ regex_grow_registers (num_regs) | |||
| 1827 | 1846 | ||
| 1828 | #endif /* not MATCH_MAY_ALLOCATE */ | 1847 | #endif /* not MATCH_MAY_ALLOCATE */ |
| 1829 | 1848 | ||
| 1849 | static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type | ||
| 1850 | compile_stack, | ||
| 1851 | regnum_t regnum)); | ||
| 1852 | |||
| 1830 | /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. | 1853 | /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. |
| 1831 | Returns one of error codes defined in `regex.h', or zero for success. | 1854 | Returns one of error codes defined in `regex.h', or zero for success. |
| 1832 | 1855 | ||
| @@ -1927,9 +1950,9 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 1927 | struct range_table_work_area range_table_work; | 1950 | struct range_table_work_area range_table_work; |
| 1928 | 1951 | ||
| 1929 | #ifdef DEBUG | 1952 | #ifdef DEBUG |
| 1930 | /* debug = 1; */ | 1953 | debug++; |
| 1931 | DEBUG_PRINT1 ("\nCompiling pattern: "); | 1954 | DEBUG_PRINT1 ("\nCompiling pattern: "); |
| 1932 | if (debug) | 1955 | if (debug > 0) |
| 1933 | { | 1956 | { |
| 1934 | unsigned debug_count; | 1957 | unsigned debug_count; |
| 1935 | 1958 | ||
| @@ -2102,7 +2125,7 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2102 | } | 2125 | } |
| 2103 | 2126 | ||
| 2104 | /* If we get here, we found another repeat character. */ | 2127 | /* If we get here, we found another repeat character. */ |
| 2105 | } | 2128 | } |
| 2106 | 2129 | ||
| 2107 | /* Star, etc. applied to an empty pattern is equivalent | 2130 | /* Star, etc. applied to an empty pattern is equivalent |
| 2108 | to an empty pattern. */ | 2131 | to an empty pattern. */ |
| @@ -2113,62 +2136,62 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2113 | and also whether or not two or more matches is allowed. */ | 2136 | and also whether or not two or more matches is allowed. */ |
| 2114 | if (greedy) | 2137 | if (greedy) |
| 2115 | { | 2138 | { |
| 2116 | if (many_times_ok) | 2139 | if (many_times_ok) |
| 2117 | { /* More than one repetition is allowed, so put in at the | 2140 | { /* More than one repetition is allowed, so put in at the |
| 2118 | end a backward relative jump from `b' to before the next | 2141 | end a backward relative jump from `b' to before the next |
| 2119 | jump we're going to put in below (which jumps from | 2142 | jump we're going to put in below (which jumps from |
| 2120 | laststart to after this jump). | 2143 | laststart to after this jump). |
| 2121 | 2144 | ||
| 2122 | But if we are at the `*' in the exact sequence `.*\n', | 2145 | But if we are at the `*' in the exact sequence `.*\n', |
| 2123 | insert an unconditional jump backwards to the ., | 2146 | insert an unconditional jump backwards to the ., |
| 2124 | instead of the beginning of the loop. This way we only | 2147 | instead of the beginning of the loop. This way we only |
| 2125 | push a failure point once, instead of every time | 2148 | push a failure point once, instead of every time |
| 2126 | through the loop. */ | 2149 | through the loop. */ |
| 2127 | assert (p - 1 > pattern); | 2150 | assert (p - 1 > pattern); |
| 2128 | 2151 | ||
| 2129 | /* Allocate the space for the jump. */ | 2152 | /* Allocate the space for the jump. */ |
| 2130 | GET_BUFFER_SPACE (3); | 2153 | GET_BUFFER_SPACE (3); |
| 2131 | 2154 | ||
| 2132 | /* We know we are not at the first character of the pattern, | 2155 | /* We know we are not at the first character of the pattern, |
| 2133 | because laststart was nonzero. And we've already | 2156 | because laststart was nonzero. And we've already |
| 2134 | incremented `p', by the way, to be the character after | 2157 | incremented `p', by the way, to be the character after |
| 2135 | the `*'. Do we have to do something analogous here | 2158 | the `*'. Do we have to do something analogous here |
| 2136 | for null bytes, because of RE_DOT_NOT_NULL? */ | 2159 | for null bytes, because of RE_DOT_NOT_NULL? */ |
| 2137 | if (TRANSLATE (*(p - 2)) == TRANSLATE ('.') | 2160 | if (TRANSLATE (*(p - 2)) == TRANSLATE ('.') |
| 2138 | && zero_times_ok | 2161 | && zero_times_ok |
| 2139 | && p < pend | 2162 | && p < pend |
| 2140 | && TRANSLATE (*p) == TRANSLATE ('\n') | 2163 | && TRANSLATE (*p) == TRANSLATE ('\n') |
| 2141 | && !(syntax & RE_DOT_NEWLINE)) | 2164 | && !(syntax & RE_DOT_NEWLINE)) |
| 2142 | { /* We have .*\n. */ | 2165 | { /* We have .*\n. */ |
| 2143 | STORE_JUMP (jump, b, laststart); | 2166 | STORE_JUMP (jump, b, laststart); |
| 2144 | keep_string_p = true; | 2167 | keep_string_p = true; |
| 2145 | } | 2168 | } |
| 2146 | else | 2169 | else |
| 2147 | STORE_JUMP (jump, b, laststart - 3); | 2170 | STORE_JUMP (jump, b, laststart - 3); |
| 2148 | 2171 | ||
| 2149 | /* We've added more stuff to the buffer. */ | 2172 | /* We've added more stuff to the buffer. */ |
| 2150 | b += 3; | 2173 | b += 3; |
| 2151 | } | 2174 | } |
| 2152 | 2175 | ||
| 2153 | /* On failure, jump from laststart to b + 3, which will be the | 2176 | /* On failure, jump from laststart to b + 3, which will be the |
| 2154 | end of the buffer after this jump is inserted. */ | 2177 | end of the buffer after this jump is inserted. */ |
| 2155 | GET_BUFFER_SPACE (3); | 2178 | GET_BUFFER_SPACE (3); |
| 2156 | if (!zero_times_ok) | 2179 | if (!zero_times_ok) |
| 2157 | { | 2180 | { |
| 2158 | assert (many_times_ok); | 2181 | assert (many_times_ok); |
| 2159 | INSERT_JUMP (on_failure_jump_smart, b - 3, b + 3); | 2182 | INSERT_JUMP (on_failure_jump_smart, b - 3, b + 3); |
| 2160 | pending_exact = 0; | 2183 | pending_exact = 0; |
| 2161 | b += 3; | 2184 | b += 3; |
| 2162 | } | 2185 | } |
| 2163 | else | 2186 | else |
| 2164 | { | 2187 | { |
| 2165 | INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump | 2188 | INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump |
| 2166 | : !many_times_ok ? | 2189 | : !many_times_ok ? |
| 2167 | on_failure_jump : on_failure_jump_smart, | 2190 | on_failure_jump : on_failure_jump_smart, |
| 2168 | laststart, b + 3); | 2191 | laststart, b + 3); |
| 2169 | pending_exact = 0; | 2192 | pending_exact = 0; |
| 2170 | b += 3; | 2193 | b += 3; |
| 2171 | } | 2194 | } |
| 2172 | } | 2195 | } |
| 2173 | else /* not greedy */ | 2196 | else /* not greedy */ |
| 2174 | { /* I wish the greedy and non-greedy cases could be merged. */ | 2197 | { /* I wish the greedy and non-greedy cases could be merged. */ |
| @@ -2589,41 +2612,41 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2589 | regnum++; | 2612 | regnum++; |
| 2590 | } | 2613 | } |
| 2591 | 2614 | ||
| 2592 | if (COMPILE_STACK_FULL) | 2615 | if (COMPILE_STACK_FULL) |
| 2593 | { | 2616 | { |
| 2594 | RETALLOC (compile_stack.stack, compile_stack.size << 1, | 2617 | RETALLOC (compile_stack.stack, compile_stack.size << 1, |
| 2595 | compile_stack_elt_t); | 2618 | compile_stack_elt_t); |
| 2596 | if (compile_stack.stack == NULL) return REG_ESPACE; | 2619 | if (compile_stack.stack == NULL) return REG_ESPACE; |
| 2597 | 2620 | ||
| 2598 | compile_stack.size <<= 1; | 2621 | compile_stack.size <<= 1; |
| 2599 | } | 2622 | } |
| 2600 | 2623 | ||
| 2601 | /* These are the values to restore when we hit end of this | 2624 | /* These are the values to restore when we hit end of this |
| 2602 | group. They are all relative offsets, so that if the | 2625 | group. They are all relative offsets, so that if the |
| 2603 | whole pattern moves because of realloc, they will still | 2626 | whole pattern moves because of realloc, they will still |
| 2604 | be valid. */ | 2627 | be valid. */ |
| 2605 | COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; | 2628 | COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; |
| 2606 | COMPILE_STACK_TOP.fixup_alt_jump | 2629 | COMPILE_STACK_TOP.fixup_alt_jump |
| 2607 | = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; | 2630 | = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; |
| 2608 | COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer; | 2631 | COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer; |
| 2609 | COMPILE_STACK_TOP.regnum = shy ? -regnum : regnum; | 2632 | COMPILE_STACK_TOP.regnum = shy ? -regnum : regnum; |
| 2610 | 2633 | ||
| 2611 | /* Do not push a | 2634 | /* Do not push a |
| 2612 | start_memory for groups beyond the last one we can | 2635 | start_memory for groups beyond the last one we can |
| 2613 | represent in the compiled pattern. */ | 2636 | represent in the compiled pattern. */ |
| 2614 | if (regnum <= MAX_REGNUM && !shy) | 2637 | if (regnum <= MAX_REGNUM && !shy) |
| 2615 | BUF_PUSH_2 (start_memory, regnum); | 2638 | BUF_PUSH_2 (start_memory, regnum); |
| 2616 | 2639 | ||
| 2617 | compile_stack.avail++; | 2640 | compile_stack.avail++; |
| 2618 | 2641 | ||
| 2619 | fixup_alt_jump = 0; | 2642 | fixup_alt_jump = 0; |
| 2620 | laststart = 0; | 2643 | laststart = 0; |
| 2621 | begalt = b; | 2644 | begalt = b; |
| 2622 | /* If we've reached MAX_REGNUM groups, then this open | 2645 | /* If we've reached MAX_REGNUM groups, then this open |
| 2623 | won't actually generate any code, so we'll have to | 2646 | won't actually generate any code, so we'll have to |
| 2624 | clear pending_exact explicitly. */ | 2647 | clear pending_exact explicitly. */ |
| 2625 | pending_exact = 0; | 2648 | pending_exact = 0; |
| 2626 | break; | 2649 | break; |
| 2627 | } | 2650 | } |
| 2628 | 2651 | ||
| 2629 | case ')': | 2652 | case ')': |
| @@ -2737,7 +2760,7 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2737 | /* If got here, then the syntax allows intervals. */ | 2760 | /* If got here, then the syntax allows intervals. */ |
| 2738 | 2761 | ||
| 2739 | /* At least (most) this many matches must be made. */ | 2762 | /* At least (most) this many matches must be made. */ |
| 2740 | int lower_bound = -1, upper_bound = -1; | 2763 | int lower_bound = 0, upper_bound = -1; |
| 2741 | 2764 | ||
| 2742 | beg_interval = p - 1; | 2765 | beg_interval = p - 1; |
| 2743 | 2766 | ||
| @@ -3079,13 +3102,13 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 3079 | bufp->used = b - bufp->buffer; | 3102 | bufp->used = b - bufp->buffer; |
| 3080 | 3103 | ||
| 3081 | #ifdef DEBUG | 3104 | #ifdef DEBUG |
| 3082 | if (debug) | 3105 | if (debug > 0) |
| 3083 | { | 3106 | { |
| 3084 | re_compile_fastmap (bufp); | 3107 | re_compile_fastmap (bufp); |
| 3085 | DEBUG_PRINT1 ("\nCompiled pattern: \n"); | 3108 | DEBUG_PRINT1 ("\nCompiled pattern: \n"); |
| 3086 | print_compiled_pattern (bufp); | 3109 | print_compiled_pattern (bufp); |
| 3087 | /* debug = 0; */ | ||
| 3088 | } | 3110 | } |
| 3111 | debug--; | ||
| 3089 | #endif /* DEBUG */ | 3112 | #endif /* DEBUG */ |
| 3090 | 3113 | ||
| 3091 | #ifndef MATCH_MAY_ALLOCATE | 3114 | #ifndef MATCH_MAY_ALLOCATE |
| @@ -3191,7 +3214,7 @@ insert_op2 (op, loc, arg1, arg2, end) | |||
| 3191 | 3214 | ||
| 3192 | static boolean | 3215 | static boolean |
| 3193 | at_begline_loc_p (pattern, p, syntax) | 3216 | at_begline_loc_p (pattern, p, syntax) |
| 3194 | re_char *pattern, *p; | 3217 | const unsigned char *pattern, *p; |
| 3195 | reg_syntax_t syntax; | 3218 | reg_syntax_t syntax; |
| 3196 | { | 3219 | { |
| 3197 | re_char *prev = p - 2; | 3220 | re_char *prev = p - 2; |
| @@ -3210,8 +3233,8 @@ at_begline_loc_p (pattern, p, syntax) | |||
| 3210 | 3233 | ||
| 3211 | static boolean | 3234 | static boolean |
| 3212 | at_endline_loc_p (p, pend, syntax) | 3235 | at_endline_loc_p (p, pend, syntax) |
| 3213 | re_char *p, *pend; | 3236 | const unsigned char *p, *pend; |
| 3214 | int syntax; | 3237 | reg_syntax_t syntax; |
| 3215 | { | 3238 | { |
| 3216 | re_char *next = p; | 3239 | re_char *next = p; |
| 3217 | boolean next_backslash = *next == '\\'; | 3240 | boolean next_backslash = *next == '\\'; |
| @@ -3281,9 +3304,11 @@ re_compile_fastmap (bufp) | |||
| 3281 | unsigned char *p = pattern; | 3304 | unsigned char *p = pattern; |
| 3282 | register unsigned char *pend = pattern + size; | 3305 | register unsigned char *pend = pattern + size; |
| 3283 | 3306 | ||
| 3307 | #if defined (REL_ALLOC) && defined (REGEX_MALLOC) | ||
| 3284 | /* This holds the pointer to the failure stack, when | 3308 | /* This holds the pointer to the failure stack, when |
| 3285 | it is allocated relocatably. */ | 3309 | it is allocated relocatably. */ |
| 3286 | fail_stack_elt_t *failure_stack_ptr; | 3310 | fail_stack_elt_t *failure_stack_ptr; |
| 3311 | #endif | ||
| 3287 | 3312 | ||
| 3288 | /* Assume that each path through the pattern can be null until | 3313 | /* Assume that each path through the pattern can be null until |
| 3289 | proven otherwise. We set this false at the bottom of switch | 3314 | proven otherwise. We set this false at the bottom of switch |
| @@ -3882,8 +3907,7 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop) | |||
| 3882 | #ifdef emacs | 3907 | #ifdef emacs |
| 3883 | gl_state.object = re_match_object; | 3908 | gl_state.object = re_match_object; |
| 3884 | { | 3909 | { |
| 3885 | int adjpos = NILP (re_match_object) || BUFFERP (re_match_object); | 3910 | int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (startpos)); |
| 3886 | int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (startpos + adjpos); | ||
| 3887 | 3911 | ||
| 3888 | SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1); | 3912 | SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1); |
| 3889 | } | 3913 | } |
| @@ -3963,7 +3987,7 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop) | |||
| 3963 | } | 3987 | } |
| 3964 | else /* Searching backwards. */ | 3988 | else /* Searching backwards. */ |
| 3965 | { | 3989 | { |
| 3966 | int room = (size1 == 0 || startpos >= size1 | 3990 | int room = (startpos >= size1 |
| 3967 | ? size2 + size1 - startpos | 3991 | ? size2 + size1 - startpos |
| 3968 | : size1 - startpos); | 3992 | : size1 - startpos); |
| 3969 | 3993 | ||
| @@ -4064,10 +4088,6 @@ static int bcmp_translate (); | |||
| 4064 | ? ((regoff_t) ((ptr) - string1)) \ | 4088 | ? ((regoff_t) ((ptr) - string1)) \ |
| 4065 | : ((regoff_t) ((ptr) - string2 + size1))) | 4089 | : ((regoff_t) ((ptr) - string2 + size1))) |
| 4066 | 4090 | ||
| 4067 | /* Macros for dealing with the split strings in re_match_2. */ | ||
| 4068 | |||
| 4069 | #define MATCHING_IN_FIRST_STRING (dend == end_match_1) | ||
| 4070 | |||
| 4071 | /* Call before fetching a character with *d. This switches over to | 4091 | /* Call before fetching a character with *d. This switches over to |
| 4072 | string2 if necessary. */ | 4092 | string2 if necessary. */ |
| 4073 | #define PREFETCH() \ | 4093 | #define PREFETCH() \ |
| @@ -4377,9 +4397,8 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4377 | 4397 | ||
| 4378 | #ifdef emacs | 4398 | #ifdef emacs |
| 4379 | int charpos; | 4399 | int charpos; |
| 4380 | int adjpos = NILP (re_match_object) || BUFFERP (re_match_object); | ||
| 4381 | gl_state.object = re_match_object; | 4400 | gl_state.object = re_match_object; |
| 4382 | charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos + adjpos); | 4401 | charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (pos)); |
| 4383 | SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1); | 4402 | SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1); |
| 4384 | #endif | 4403 | #endif |
| 4385 | 4404 | ||
| @@ -4415,6 +4434,12 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4415 | /* Where we are in the data, and the end of the current string. */ | 4434 | /* Where we are in the data, and the end of the current string. */ |
| 4416 | re_char *d, *dend; | 4435 | re_char *d, *dend; |
| 4417 | 4436 | ||
| 4437 | /* Used sometimes to remember where we were before starting matching | ||
| 4438 | an operator so that we can go back in case of failure. This "atomic" | ||
| 4439 | behavior of matching opcodes is indispensable to the correctness | ||
| 4440 | of the on_failure_keep_string_jump optimization. */ | ||
| 4441 | re_char *dfail; | ||
| 4442 | |||
| 4418 | /* Where we are in the pattern, and the end of the pattern. */ | 4443 | /* Where we are in the pattern, and the end of the pattern. */ |
| 4419 | unsigned char *p = bufp->buffer; | 4444 | unsigned char *p = bufp->buffer; |
| 4420 | register unsigned char *pend = p + bufp->used; | 4445 | register unsigned char *pend = p + bufp->used; |
| @@ -4440,9 +4465,11 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4440 | unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0; | 4465 | unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0; |
| 4441 | #endif | 4466 | #endif |
| 4442 | 4467 | ||
| 4468 | #if defined (REL_ALLOC) && defined (REGEX_MALLOC) | ||
| 4443 | /* This holds the pointer to the failure stack, when | 4469 | /* This holds the pointer to the failure stack, when |
| 4444 | it is allocated relocatably. */ | 4470 | it is allocated relocatably. */ |
| 4445 | fail_stack_elt_t *failure_stack_ptr; | 4471 | fail_stack_elt_t *failure_stack_ptr; |
| 4472 | #endif | ||
| 4446 | 4473 | ||
| 4447 | /* We fill all the registers internally, independent of what we | 4474 | /* We fill all the registers internally, independent of what we |
| 4448 | return, for use in backreferences. The number here includes | 4475 | return, for use in backreferences. The number here includes |
| @@ -4526,9 +4553,16 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4526 | start_memory/stop_memory has been seen for. Also initialize the | 4553 | start_memory/stop_memory has been seen for. Also initialize the |
| 4527 | register information struct. */ | 4554 | register information struct. */ |
| 4528 | for (mcnt = 1; mcnt < num_regs; mcnt++) | 4555 | for (mcnt = 1; mcnt < num_regs; mcnt++) |
| 4556 | regstart[mcnt] = regend[mcnt] = REG_UNSET_VALUE; | ||
| 4557 | |||
| 4558 | /* Shorten strings to `stop'. */ | ||
| 4559 | if (stop <= size1) | ||
| 4529 | { | 4560 | { |
| 4530 | regstart[mcnt] = regend[mcnt] = REG_UNSET_VALUE; | 4561 | size1 = stop; |
| 4562 | size2 = 0; | ||
| 4531 | } | 4563 | } |
| 4564 | else if (stop <= size1 + size2) | ||
| 4565 | size2 = stop - size1; | ||
| 4532 | 4566 | ||
| 4533 | /* We move `string1' into `string2' if the latter's empty -- but not if | 4567 | /* We move `string1' into `string2' if the latter's empty -- but not if |
| 4534 | `string1' is null. */ | 4568 | `string1' is null. */ |
| @@ -4543,16 +4577,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4543 | end2 = string2 + size2; | 4577 | end2 = string2 + size2; |
| 4544 | 4578 | ||
| 4545 | /* Compute where to stop matching, within the two strings. */ | 4579 | /* Compute where to stop matching, within the two strings. */ |
| 4546 | if (stop <= size1) | 4580 | end_match_1 = end1; |
| 4547 | { | 4581 | end_match_2 = end2; |
| 4548 | end_match_1 = string1 + stop; | ||
| 4549 | end_match_2 = string2; | ||
| 4550 | } | ||
| 4551 | else | ||
| 4552 | { | ||
| 4553 | end_match_1 = end1; | ||
| 4554 | end_match_2 = string2 + stop - size1; | ||
| 4555 | } | ||
| 4556 | 4582 | ||
| 4557 | /* `p' scans through the pattern as `d' scans through the data. | 4583 | /* `p' scans through the pattern as `d' scans through the data. |
| 4558 | `dend' is the end of the input string that `d' points within. `d' | 4584 | `dend' is the end of the input string that `d' points within. `d' |
| @@ -4595,7 +4621,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4595 | /* 1 if this match ends in the same string (string1 or string2) | 4621 | /* 1 if this match ends in the same string (string1 or string2) |
| 4596 | as the best previous match. */ | 4622 | as the best previous match. */ |
| 4597 | boolean same_str_p = (FIRST_STRING_P (match_end) | 4623 | boolean same_str_p = (FIRST_STRING_P (match_end) |
| 4598 | == MATCHING_IN_FIRST_STRING); | 4624 | == FIRST_STRING_P (d)); |
| 4599 | /* 1 if this match is the best seen so far. */ | 4625 | /* 1 if this match is the best seen so far. */ |
| 4600 | boolean best_match_p; | 4626 | boolean best_match_p; |
| 4601 | 4627 | ||
| @@ -4604,7 +4630,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4604 | if (same_str_p) | 4630 | if (same_str_p) |
| 4605 | best_match_p = d > match_end; | 4631 | best_match_p = d > match_end; |
| 4606 | else | 4632 | else |
| 4607 | best_match_p = !MATCHING_IN_FIRST_STRING; | 4633 | best_match_p = !FIRST_STRING_P (d); |
| 4608 | 4634 | ||
| 4609 | DEBUG_PRINT1 ("backtracking.\n"); | 4635 | DEBUG_PRINT1 ("backtracking.\n"); |
| 4610 | 4636 | ||
| @@ -4703,9 +4729,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4703 | if (regs->num_regs > 0) | 4729 | if (regs->num_regs > 0) |
| 4704 | { | 4730 | { |
| 4705 | regs->start[0] = pos; | 4731 | regs->start[0] = pos; |
| 4706 | regs->end[0] = (MATCHING_IN_FIRST_STRING | 4732 | regs->end[0] = POINTER_TO_OFFSET (d); |
| 4707 | ? ((regoff_t) (d - string1)) | ||
| 4708 | : ((regoff_t) (d - string2 + size1))); | ||
| 4709 | } | 4733 | } |
| 4710 | 4734 | ||
| 4711 | /* Go through the first `min (num_regs, regs->num_regs)' | 4735 | /* Go through the first `min (num_regs, regs->num_regs)' |
| @@ -4737,9 +4761,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4737 | nfailure_points_pushed - nfailure_points_popped); | 4761 | nfailure_points_pushed - nfailure_points_popped); |
| 4738 | DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed); | 4762 | DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed); |
| 4739 | 4763 | ||
| 4740 | mcnt = d - pos - (MATCHING_IN_FIRST_STRING | 4764 | mcnt = POINTER_TO_OFFSET (d) - pos; |
| 4741 | ? string1 | ||
| 4742 | : string2 - size1); | ||
| 4743 | 4765 | ||
| 4744 | DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt); | 4766 | DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt); |
| 4745 | 4767 | ||
| @@ -4767,6 +4789,9 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4767 | mcnt = *p++; | 4789 | mcnt = *p++; |
| 4768 | DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt); | 4790 | DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt); |
| 4769 | 4791 | ||
| 4792 | /* Remember the start point to rollback upon failure. */ | ||
| 4793 | dfail = d; | ||
| 4794 | |||
| 4770 | /* This is written out as an if-else so we don't waste time | 4795 | /* This is written out as an if-else so we don't waste time |
| 4771 | testing `translate' inside the loop. */ | 4796 | testing `translate' inside the loop. */ |
| 4772 | if (RE_TRANSLATE_P (translate)) | 4797 | if (RE_TRANSLATE_P (translate)) |
| @@ -4784,7 +4809,10 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4784 | 4809 | ||
| 4785 | if (RE_TRANSLATE (translate, buf_ch) | 4810 | if (RE_TRANSLATE (translate, buf_ch) |
| 4786 | != pat_ch) | 4811 | != pat_ch) |
| 4787 | goto fail; | 4812 | { |
| 4813 | d = dfail; | ||
| 4814 | goto fail; | ||
| 4815 | } | ||
| 4788 | 4816 | ||
| 4789 | p += pat_charlen; | 4817 | p += pat_charlen; |
| 4790 | d += buf_charlen; | 4818 | d += buf_charlen; |
| @@ -4797,7 +4825,10 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4797 | { | 4825 | { |
| 4798 | PREFETCH (); | 4826 | PREFETCH (); |
| 4799 | if (RE_TRANSLATE (translate, *d) != *p++) | 4827 | if (RE_TRANSLATE (translate, *d) != *p++) |
| 4800 | goto fail; | 4828 | { |
| 4829 | d = dfail; | ||
| 4830 | goto fail; | ||
| 4831 | } | ||
| 4801 | d++; | 4832 | d++; |
| 4802 | } | 4833 | } |
| 4803 | while (--mcnt); | 4834 | while (--mcnt); |
| @@ -4807,7 +4838,11 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4807 | do | 4838 | do |
| 4808 | { | 4839 | { |
| 4809 | PREFETCH (); | 4840 | PREFETCH (); |
| 4810 | if (*d++ != *p++) goto fail; | 4841 | if (*d++ != *p++) |
| 4842 | { | ||
| 4843 | d = dfail; | ||
| 4844 | goto fail; | ||
| 4845 | } | ||
| 4811 | } | 4846 | } |
| 4812 | while (--mcnt); | 4847 | while (--mcnt); |
| 4813 | } | 4848 | } |
| @@ -4993,6 +5028,9 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4993 | /* Where in input to try to start matching. */ | 5028 | /* Where in input to try to start matching. */ |
| 4994 | d2 = regstart[regno]; | 5029 | d2 = regstart[regno]; |
| 4995 | 5030 | ||
| 5031 | /* Remember the start point to rollback upon failure. */ | ||
| 5032 | dfail = d; | ||
| 5033 | |||
| 4996 | /* Where to stop matching; if both the place to start and | 5034 | /* Where to stop matching; if both the place to start and |
| 4997 | the place to stop matching are in the same string, then | 5035 | the place to stop matching are in the same string, then |
| 4998 | set to the place to stop, otherwise, for now have to use | 5036 | set to the place to stop, otherwise, for now have to use |
| @@ -5033,7 +5071,10 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5033 | if (RE_TRANSLATE_P (translate) | 5071 | if (RE_TRANSLATE_P (translate) |
| 5034 | ? bcmp_translate (d, d2, mcnt, translate) | 5072 | ? bcmp_translate (d, d2, mcnt, translate) |
| 5035 | : bcmp (d, d2, mcnt)) | 5073 | : bcmp (d, d2, mcnt)) |
| 5036 | goto fail; | 5074 | { |
| 5075 | d = dfail; | ||
| 5076 | goto fail; | ||
| 5077 | } | ||
| 5037 | d += mcnt, d2 += mcnt; | 5078 | d += mcnt, d2 += mcnt; |
| 5038 | } | 5079 | } |
| 5039 | } | 5080 | } |
| @@ -5166,11 +5207,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5166 | the repetition text and either the following jump or | 5207 | the repetition text and either the following jump or |
| 5167 | pop_failure_jump back to this on_failure_jump. */ | 5208 | pop_failure_jump back to this on_failure_jump. */ |
| 5168 | case on_failure_jump: | 5209 | case on_failure_jump: |
| 5169 | |||
| 5170 | #if defined (WINDOWSNT) && defined (emacs) | ||
| 5171 | QUIT; | 5210 | QUIT; |
| 5172 | #endif | ||
| 5173 | |||
| 5174 | EXTRACT_NUMBER_AND_INCR (mcnt, p); | 5211 | EXTRACT_NUMBER_AND_INCR (mcnt, p); |
| 5175 | DEBUG_PRINT3 ("EXECUTING on_failure_jump %d (to %p):\n", | 5212 | DEBUG_PRINT3 ("EXECUTING on_failure_jump %d (to %p):\n", |
| 5176 | mcnt, p + mcnt); | 5213 | mcnt, p + mcnt); |
| @@ -5186,9 +5223,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5186 | then we can use a non-backtracking loop based on | 5223 | then we can use a non-backtracking loop based on |
| 5187 | on_failure_jump_exclusive instead of on_failure_jump_loop. */ | 5224 | on_failure_jump_exclusive instead of on_failure_jump_loop. */ |
| 5188 | case on_failure_jump_smart: | 5225 | case on_failure_jump_smart: |
| 5189 | #if defined (WINDOWSNT) && defined (emacs) | ||
| 5190 | QUIT; | 5226 | QUIT; |
| 5191 | #endif | ||
| 5192 | EXTRACT_NUMBER_AND_INCR (mcnt, p); | 5227 | EXTRACT_NUMBER_AND_INCR (mcnt, p); |
| 5193 | DEBUG_PRINT3 ("EXECUTING on_failure_jump_smart %d (to %p).\n", | 5228 | DEBUG_PRINT3 ("EXECUTING on_failure_jump_smart %d (to %p).\n", |
| 5194 | mcnt, p + mcnt); | 5229 | mcnt, p + mcnt); |
| @@ -5199,7 +5234,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5199 | p -= 3; /* Reset so that we will re-execute the | 5234 | p -= 3; /* Reset so that we will re-execute the |
| 5200 | instruction once it's been changed. */ | 5235 | instruction once it's been changed. */ |
| 5201 | 5236 | ||
| 5202 | /* DEBUG_STATEMENT (debug = 1); */ | 5237 | DEBUG_STATEMENT (debug += 2); |
| 5203 | if (mutually_exclusive_p (bufp, p1, p2)) | 5238 | if (mutually_exclusive_p (bufp, p1, p2)) |
| 5204 | { | 5239 | { |
| 5205 | /* Use a fast `on_failure_keep_string_jump' loop. */ | 5240 | /* Use a fast `on_failure_keep_string_jump' loop. */ |
| @@ -5212,16 +5247,14 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5212 | DEBUG_PRINT1 (" smart default => slow loop.\n"); | 5247 | DEBUG_PRINT1 (" smart default => slow loop.\n"); |
| 5213 | *p = (unsigned char) on_failure_jump_loop; | 5248 | *p = (unsigned char) on_failure_jump_loop; |
| 5214 | } | 5249 | } |
| 5215 | /* DEBUG_STATEMENT (debug = 0); */ | 5250 | DEBUG_STATEMENT (debug -= 2); |
| 5216 | } | 5251 | } |
| 5217 | break; | 5252 | break; |
| 5218 | 5253 | ||
| 5219 | /* Unconditionally jump (without popping any failure points). */ | 5254 | /* Unconditionally jump (without popping any failure points). */ |
| 5220 | case jump: | 5255 | case jump: |
| 5221 | unconditional_jump: | 5256 | unconditional_jump: |
| 5222 | #if defined (WINDOWSNT) && defined (emacs) | ||
| 5223 | QUIT; | 5257 | QUIT; |
| 5224 | #endif | ||
| 5225 | EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */ | 5258 | EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */ |
| 5226 | DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt); | 5259 | DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt); |
| 5227 | p += mcnt; /* Do the jump. */ | 5260 | p += mcnt; /* Do the jump. */ |
| @@ -5286,7 +5319,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5286 | not = (re_opcode_t) *(p - 1) == notwordbound; | 5319 | not = (re_opcode_t) *(p - 1) == notwordbound; |
| 5287 | DEBUG_PRINT2 ("EXECUTING %swordbound.\n", not?"not":""); | 5320 | DEBUG_PRINT2 ("EXECUTING %swordbound.\n", not?"not":""); |
| 5288 | 5321 | ||
| 5289 | /* We SUCCEED in one of the following cases: */ | 5322 | /* We SUCCEED (or FAIL) in one of the following cases: */ |
| 5290 | 5323 | ||
| 5291 | /* Case 1: D is at the beginning or the end of string. */ | 5324 | /* Case 1: D is at the beginning or the end of string. */ |
| 5292 | if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)) | 5325 | if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)) |
| @@ -5330,21 +5363,19 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5330 | 5363 | ||
| 5331 | /* Case 1: D is at the end of string. */ | 5364 | /* Case 1: D is at the end of string. */ |
| 5332 | if (AT_STRINGS_END (d)) | 5365 | if (AT_STRINGS_END (d)) |
| 5333 | goto fail; | 5366 | goto fail; |
| 5334 | else | 5367 | else |
| 5335 | { | 5368 | { |
| 5336 | /* C1 is the character before D, S1 is the syntax of C1, C2 | 5369 | /* C1 is the character before D, S1 is the syntax of C1, C2 |
| 5337 | is the character at D, and S2 is the syntax of C2. */ | 5370 | is the character at D, and S2 is the syntax of C2. */ |
| 5338 | int c1, c2, s1, s2; | 5371 | int c1, c2, s1, s2; |
| 5339 | int pos1 = PTR_TO_OFFSET (d); | ||
| 5340 | int charpos; | ||
| 5341 | |||
| 5342 | PREFETCH (); | ||
| 5343 | c2 = STRING_CHAR (d, dend - d); | ||
| 5344 | #ifdef emacs | 5372 | #ifdef emacs |
| 5345 | charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1); | 5373 | int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d)); |
| 5346 | UPDATE_SYNTAX_TABLE (charpos); | 5374 | UPDATE_SYNTAX_TABLE (charpos); |
| 5347 | #endif | 5375 | #endif |
| 5376 | PREFETCH (); | ||
| 5377 | /* FIXME: This does a STRING_CHAR even for unibyte buffers. */ | ||
| 5378 | c2 = STRING_CHAR (d, dend - d); | ||
| 5348 | s2 = SYNTAX (c2); | 5379 | s2 = SYNTAX (c2); |
| 5349 | 5380 | ||
| 5350 | /* Case 2: S2 is not Sword. */ | 5381 | /* Case 2: S2 is not Sword. */ |
| @@ -5381,14 +5412,11 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5381 | /* C1 is the character before D, S1 is the syntax of C1, C2 | 5412 | /* C1 is the character before D, S1 is the syntax of C1, C2 |
| 5382 | is the character at D, and S2 is the syntax of C2. */ | 5413 | is the character at D, and S2 is the syntax of C2. */ |
| 5383 | int c1, c2, s1, s2; | 5414 | int c1, c2, s1, s2; |
| 5384 | int pos1 = PTR_TO_OFFSET (d); | ||
| 5385 | int charpos; | ||
| 5386 | |||
| 5387 | GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); | ||
| 5388 | #ifdef emacs | 5415 | #ifdef emacs |
| 5389 | charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1 - 1); | 5416 | int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d) - 1); |
| 5390 | UPDATE_SYNTAX_TABLE (charpos); | 5417 | UPDATE_SYNTAX_TABLE (charpos); |
| 5391 | #endif | 5418 | #endif |
| 5419 | GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); | ||
| 5392 | s1 = SYNTAX (c1); | 5420 | s1 = SYNTAX (c1); |
| 5393 | 5421 | ||
| 5394 | /* Case 2: S1 is not Sword. */ | 5422 | /* Case 2: S1 is not Sword. */ |
| @@ -5399,6 +5427,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5399 | if (!AT_STRINGS_END (d)) | 5427 | if (!AT_STRINGS_END (d)) |
| 5400 | { | 5428 | { |
| 5401 | PREFETCH (); | 5429 | PREFETCH (); |
| 5430 | /* FIXME: This does a STRING_CHAR even for unibyte buffers. */ | ||
| 5402 | c2 = STRING_CHAR (d, dend - d); | 5431 | c2 = STRING_CHAR (d, dend - d); |
| 5403 | #ifdef emacs | 5432 | #ifdef emacs |
| 5404 | UPDATE_SYNTAX_TABLE_FORWARD (charpos); | 5433 | UPDATE_SYNTAX_TABLE_FORWARD (charpos); |
| @@ -5557,9 +5586,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5557 | 5586 | ||
| 5558 | /* We goto here if a matching operation fails. */ | 5587 | /* We goto here if a matching operation fails. */ |
| 5559 | fail: | 5588 | fail: |
| 5560 | #if defined (WINDOWSNT) && defined (emacs) | ||
| 5561 | QUIT; | 5589 | QUIT; |
| 5562 | #endif | ||
| 5563 | if (!FAIL_STACK_EMPTY ()) | 5590 | if (!FAIL_STACK_EMPTY ()) |
| 5564 | { | 5591 | { |
| 5565 | re_char *str; | 5592 | re_char *str; |
| @@ -5631,6 +5658,7 @@ bcmp_translate (s1, s2, len, translate) | |||
| 5631 | int p1_charlen, p2_charlen; | 5658 | int p1_charlen, p2_charlen; |
| 5632 | int p1_ch, p2_ch; | 5659 | int p1_ch, p2_ch; |
| 5633 | 5660 | ||
| 5661 | /* FIXME: This assumes `multibyte = true'. */ | ||
| 5634 | p1_ch = STRING_CHAR_AND_LENGTH (p1, p1_end - p1, p1_charlen); | 5662 | p1_ch = STRING_CHAR_AND_LENGTH (p1, p1_end - p1, p1_charlen); |
| 5635 | p2_ch = STRING_CHAR_AND_LENGTH (p2, p2_end - p2, p2_charlen); | 5663 | p2_ch = STRING_CHAR_AND_LENGTH (p2, p2_end - p2, p2_charlen); |
| 5636 | 5664 | ||