diff options
| author | Stefan Monnier | 2000-10-26 00:45:01 +0000 |
|---|---|---|
| committer | Stefan Monnier | 2000-10-26 00:45:01 +0000 |
| commit | 0161849810909a9971a78f298e3da2d0dc36d15a (patch) | |
| tree | aa8e96790f762599ed9484746eb03cff173262a4 /src | |
| parent | d97151cb57618491364b74fccef2a4a67fd4d78b (diff) | |
| download | emacs-0161849810909a9971a78f298e3da2d0dc36d15a.tar.gz emacs-0161849810909a9971a78f298e3da2d0dc36d15a.zip | |
More `unsigned char' -> `re_char' changes.
Also change several `int' into `re_wchar_t'.
(PATTERN_STACK_EMPTY, PUSH_PATTERN_OP, POP_PATTERN_OP): Remove.
(PUSH_FAILURE_POINTER): Don't cast any more.
(POP_FAILURE_REG_OR_COUNT): Remove the cast that strips `const'.
We want GCC to complain, since this piece of code makes
re_match non-reentrant, which *should* be fixed.
(GET_BUFFER_SPACE): Use size_t rather than unsigned long.
(EXTEND_BUFFER): Use RETALLOC.
(SET_LIST_BIT): Don't cast.
(re_wchar_t): New type.
(re_iswctype, re_wctype_to_bit): Make it crystal clear to GCC
that those two functions will always properly return.
(IMMEDIATE_QUIT_CHECK): Cast to void.
(analyse_first): Use recursion rather than an explicit stack.
(re_compile_fastmap): Can't fail anymore.
(re_search_2): Don't check re_compile_fastmap for failure.
(PUSH_NUMBER): Renamed from PUSH_FAILURE_COUNT.
Now also sets the new value (passed in a new argument).
(re_match_2_internal): Use it.
Also, use a new var `reg' of type size_t when looping through regs
rather than reuse the inappropriate `mcnt'.
Diffstat (limited to 'src')
| -rw-r--r-- | src/regex.c | 360 |
1 files changed, 165 insertions, 195 deletions
diff --git a/src/regex.c b/src/regex.c index fab989813d5..e10a3565f2f 100644 --- a/src/regex.c +++ b/src/regex.c | |||
| @@ -22,10 +22,9 @@ | |||
| 22 | /* TODO: | 22 | /* TODO: |
| 23 | - structure the opcode space into opcode+flag. | 23 | - structure the opcode space into opcode+flag. |
| 24 | - merge with glibc's regex.[ch]. | 24 | - merge with glibc's regex.[ch]. |
| 25 | - replace succeed_n + jump_n with a combined operation so that the counter | 25 | - replace (succeed_n + jump_n + set_number_at) with something that doesn't |
| 26 | can simply be decremented when popping the failure_point without having | 26 | need to modify the compiled regexp. |
| 27 | to stack up failure_count entries. | 27 | */ |
| 28 | */ | ||
| 29 | 28 | ||
| 30 | /* AIX requires this to be the first thing in the file. */ | 29 | /* AIX requires this to be the first thing in the file. */ |
| 31 | #if defined _AIX && !defined REGEX_MALLOC | 30 | #if defined _AIX && !defined REGEX_MALLOC |
| @@ -553,7 +552,7 @@ typedef enum | |||
| 553 | is followed by a range table: | 552 | is followed by a range table: |
| 554 | 2 bytes of flags for character sets (low 8 bits, high 8 bits) | 553 | 2 bytes of flags for character sets (low 8 bits, high 8 bits) |
| 555 | See RANGE_TABLE_WORK_BITS below. | 554 | See RANGE_TABLE_WORK_BITS below. |
| 556 | 2 bytes, the number of pairs that follow | 555 | 2 bytes, the number of pairs that follow (upto 32767) |
| 557 | pairs, each 2 multibyte characters, | 556 | pairs, each 2 multibyte characters, |
| 558 | each multibyte character represented as 3 bytes. */ | 557 | each multibyte character represented as 3 bytes. */ |
| 559 | charset, | 558 | charset, |
| @@ -700,7 +699,7 @@ static void extract_number _RE_ARGS ((int *dest, re_char *source)); | |||
| 700 | static void | 699 | static void |
| 701 | extract_number (dest, source) | 700 | extract_number (dest, source) |
| 702 | int *dest; | 701 | int *dest; |
| 703 | unsigned char *source; | 702 | re_char *source; |
| 704 | { | 703 | { |
| 705 | int temp = SIGN_EXTEND_CHAR (*(source + 1)); | 704 | int temp = SIGN_EXTEND_CHAR (*(source + 1)); |
| 706 | *dest = *source & 0377; | 705 | *dest = *source & 0377; |
| @@ -729,7 +728,7 @@ static void extract_number_and_incr _RE_ARGS ((int *destination, | |||
| 729 | static void | 728 | static void |
| 730 | extract_number_and_incr (destination, source) | 729 | extract_number_and_incr (destination, source) |
| 731 | int *destination; | 730 | int *destination; |
| 732 | unsigned char **source; | 731 | re_char **source; |
| 733 | { | 732 | { |
| 734 | extract_number (destination, *source); | 733 | extract_number (destination, *source); |
| 735 | *source += 2; | 734 | *source += 2; |
| @@ -803,9 +802,9 @@ extract_number_and_incr (destination, source) | |||
| 803 | #define CHARSET_LOOKUP_RANGE_TABLE_RAW(not, c, range_table, count) \ | 802 | #define CHARSET_LOOKUP_RANGE_TABLE_RAW(not, c, range_table, count) \ |
| 804 | do \ | 803 | do \ |
| 805 | { \ | 804 | { \ |
| 806 | int range_start, range_end; \ | 805 | re_wchar_t range_start, range_end; \ |
| 807 | unsigned char *p; \ | 806 | re_char *p; \ |
| 808 | unsigned char *range_table_end \ | 807 | re_char *range_table_end \ |
| 809 | = CHARSET_RANGE_TABLE_END ((range_table), (count)); \ | 808 | = CHARSET_RANGE_TABLE_END ((range_table), (count)); \ |
| 810 | \ | 809 | \ |
| 811 | for (p = (range_table); p < range_table_end; p += 2 * 3) \ | 810 | for (p = (range_table); p < range_table_end; p += 2 * 3) \ |
| @@ -829,8 +828,8 @@ extract_number_and_incr (destination, source) | |||
| 829 | { \ | 828 | { \ |
| 830 | /* Number of ranges in range table. */ \ | 829 | /* Number of ranges in range table. */ \ |
| 831 | int count; \ | 830 | int count; \ |
| 832 | unsigned char *range_table = CHARSET_RANGE_TABLE (charset); \ | 831 | re_char *range_table = CHARSET_RANGE_TABLE (charset); \ |
| 833 | \ | 832 | \ |
| 834 | EXTRACT_NUMBER_AND_INCR (count, range_table); \ | 833 | EXTRACT_NUMBER_AND_INCR (count, range_table); \ |
| 835 | CHARSET_LOOKUP_RANGE_TABLE_RAW ((not), (c), range_table, count); \ | 834 | CHARSET_LOOKUP_RANGE_TABLE_RAW ((not), (c), range_table, count); \ |
| 836 | } \ | 835 | } \ |
| @@ -899,12 +898,12 @@ print_fastmap (fastmap) | |||
| 899 | 898 | ||
| 900 | void | 899 | void |
| 901 | print_partial_compiled_pattern (start, end) | 900 | print_partial_compiled_pattern (start, end) |
| 902 | unsigned char *start; | 901 | re_char *start; |
| 903 | unsigned char *end; | 902 | re_char *end; |
| 904 | { | 903 | { |
| 905 | int mcnt, mcnt2; | 904 | int mcnt, mcnt2; |
| 906 | unsigned char *p = start; | 905 | re_char *p = start; |
| 907 | unsigned char *pend = end; | 906 | re_char *pend = end; |
| 908 | 907 | ||
| 909 | if (start == NULL) | 908 | if (start == NULL) |
| 910 | { | 909 | { |
| @@ -1142,7 +1141,7 @@ void | |||
| 1142 | print_compiled_pattern (bufp) | 1141 | print_compiled_pattern (bufp) |
| 1143 | struct re_pattern_buffer *bufp; | 1142 | struct re_pattern_buffer *bufp; |
| 1144 | { | 1143 | { |
| 1145 | unsigned char *buffer = bufp->buffer; | 1144 | re_char *buffer = bufp->buffer; |
| 1146 | 1145 | ||
| 1147 | print_partial_compiled_pattern (buffer, buffer + bufp->used); | 1146 | print_partial_compiled_pattern (buffer, buffer + bufp->used); |
| 1148 | printf ("%ld bytes used/%ld bytes allocated.\n", | 1147 | printf ("%ld bytes used/%ld bytes allocated.\n", |
| @@ -1326,7 +1325,7 @@ size_t re_max_failures = 4000; | |||
| 1326 | 1325 | ||
| 1327 | union fail_stack_elt | 1326 | union fail_stack_elt |
| 1328 | { | 1327 | { |
| 1329 | const unsigned char *pointer; | 1328 | re_char *pointer; |
| 1330 | /* This should be the biggest `int' that's no bigger than a pointer. */ | 1329 | /* This should be the biggest `int' that's no bigger than a pointer. */ |
| 1331 | long integer; | 1330 | long integer; |
| 1332 | }; | 1331 | }; |
| @@ -1341,7 +1340,6 @@ typedef struct | |||
| 1341 | size_t frame; /* Offset of the cur constructed frame. */ | 1340 | size_t frame; /* Offset of the cur constructed frame. */ |
| 1342 | } fail_stack_type; | 1341 | } fail_stack_type; |
| 1343 | 1342 | ||
| 1344 | #define PATTERN_STACK_EMPTY() (fail_stack.avail == 0) | ||
| 1345 | #define FAIL_STACK_EMPTY() (fail_stack.frame == 0) | 1343 | #define FAIL_STACK_EMPTY() (fail_stack.frame == 0) |
| 1346 | #define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size) | 1344 | #define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size) |
| 1347 | 1345 | ||
| @@ -1413,22 +1411,11 @@ typedef struct | |||
| 1413 | 1))) | 1411 | 1))) |
| 1414 | 1412 | ||
| 1415 | 1413 | ||
| 1416 | /* Push pointer POINTER on FAIL_STACK. | ||
| 1417 | Return 1 if was able to do so and 0 if ran out of memory allocating | ||
| 1418 | space to do so. */ | ||
| 1419 | #define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \ | ||
| 1420 | ((FAIL_STACK_FULL () \ | ||
| 1421 | && !GROW_FAIL_STACK (FAIL_STACK)) \ | ||
| 1422 | ? 0 \ | ||
| 1423 | : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \ | ||
| 1424 | 1)) | ||
| 1425 | #define POP_PATTERN_OP() POP_FAILURE_POINTER () | ||
| 1426 | |||
| 1427 | /* Push a pointer value onto the failure stack. | 1414 | /* Push a pointer value onto the failure stack. |
| 1428 | Assumes the variable `fail_stack'. Probably should only | 1415 | Assumes the variable `fail_stack'. Probably should only |
| 1429 | be called from within `PUSH_FAILURE_POINT'. */ | 1416 | be called from within `PUSH_FAILURE_POINT'. */ |
| 1430 | #define PUSH_FAILURE_POINTER(item) \ | 1417 | #define PUSH_FAILURE_POINTER(item) \ |
| 1431 | fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item) | 1418 | fail_stack.stack[fail_stack.avail++].pointer = (item) |
| 1432 | 1419 | ||
| 1433 | /* This pushes an integer-valued item onto the failure stack. | 1420 | /* This pushes an integer-valued item onto the failure stack. |
| 1434 | Assumes the variable `fail_stack'. Probably should only | 1421 | Assumes the variable `fail_stack'. Probably should only |
| @@ -1478,16 +1465,19 @@ do { \ | |||
| 1478 | PUSH_FAILURE_INT (num); \ | 1465 | PUSH_FAILURE_INT (num); \ |
| 1479 | } while (0) | 1466 | } while (0) |
| 1480 | 1467 | ||
| 1481 | #define PUSH_FAILURE_COUNT(ptr) \ | 1468 | /* Change the counter's value to VAL, but make sure that it will |
| 1469 | be reset when backtracking. */ | ||
| 1470 | #define PUSH_NUMBER(ptr,val) \ | ||
| 1482 | do { \ | 1471 | do { \ |
| 1483 | char *destination; \ | 1472 | char *destination; \ |
| 1484 | int c; \ | 1473 | int c; \ |
| 1485 | ENSURE_FAIL_STACK(3); \ | 1474 | ENSURE_FAIL_STACK(3); \ |
| 1486 | EXTRACT_NUMBER (c, ptr); \ | 1475 | EXTRACT_NUMBER (c, ptr); \ |
| 1487 | DEBUG_PRINT3 (" Push counter %p = %d\n", ptr, c); \ | 1476 | DEBUG_PRINT4 (" Push number %p = %d -> %d\n", ptr, c, val); \ |
| 1488 | PUSH_FAILURE_INT (c); \ | 1477 | PUSH_FAILURE_INT (c); \ |
| 1489 | PUSH_FAILURE_POINTER (ptr); \ | 1478 | PUSH_FAILURE_POINTER (ptr); \ |
| 1490 | PUSH_FAILURE_INT (-1); \ | 1479 | PUSH_FAILURE_INT (-1); \ |
| 1480 | STORE_NUMBER (ptr, val); \ | ||
| 1491 | } while (0) | 1481 | } while (0) |
| 1492 | 1482 | ||
| 1493 | /* Pop a saved register off the stack. */ | 1483 | /* Pop a saved register off the stack. */ |
| @@ -1497,7 +1487,9 @@ do { \ | |||
| 1497 | if (reg == -1) \ | 1487 | if (reg == -1) \ |
| 1498 | { \ | 1488 | { \ |
| 1499 | /* It's a counter. */ \ | 1489 | /* It's a counter. */ \ |
| 1500 | unsigned char *ptr = (unsigned char*) POP_FAILURE_POINTER (); \ | 1490 | /* Here, we discard `const', which makes re_match non-reentrant. \ |
| 1491 | Gcc gives a warning for it, which is good. */ \ | ||
| 1492 | unsigned char *ptr = POP_FAILURE_POINTER (); \ | ||
| 1501 | reg = POP_FAILURE_INT (); \ | 1493 | reg = POP_FAILURE_INT (); \ |
| 1502 | STORE_NUMBER (ptr, reg); \ | 1494 | STORE_NUMBER (ptr, reg); \ |
| 1503 | DEBUG_PRINT3 (" Pop counter %p = %d\n", ptr, reg); \ | 1495 | DEBUG_PRINT3 (" Pop counter %p = %d\n", ptr, reg); \ |
| @@ -1603,14 +1595,14 @@ do { \ | |||
| 1603 | while (fail_stack.frame < fail_stack.avail) \ | 1595 | while (fail_stack.frame < fail_stack.avail) \ |
| 1604 | POP_FAILURE_REG_OR_COUNT (); \ | 1596 | POP_FAILURE_REG_OR_COUNT (); \ |
| 1605 | \ | 1597 | \ |
| 1606 | pat = (unsigned char *) POP_FAILURE_POINTER (); \ | 1598 | pat = POP_FAILURE_POINTER (); \ |
| 1607 | DEBUG_PRINT2 (" Popping pattern %p: ", pat); \ | 1599 | DEBUG_PRINT2 (" Popping pattern %p: ", pat); \ |
| 1608 | DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ | 1600 | DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ |
| 1609 | \ | 1601 | \ |
| 1610 | /* If the saved string location is NULL, it came from an \ | 1602 | /* If the saved string location is NULL, it came from an \ |
| 1611 | on_failure_keep_string_jump opcode, and we want to throw away the \ | 1603 | on_failure_keep_string_jump opcode, and we want to throw away the \ |
| 1612 | saved NULL, thus retaining our current position in the string. */ \ | 1604 | saved NULL, thus retaining our current position in the string. */ \ |
| 1613 | str = (re_char *) POP_FAILURE_POINTER (); \ | 1605 | str = POP_FAILURE_POINTER (); \ |
| 1614 | DEBUG_PRINT2 (" Popping string %p: `", str); \ | 1606 | DEBUG_PRINT2 (" Popping string %p: `", str); \ |
| 1615 | DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \ | 1607 | DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \ |
| 1616 | DEBUG_PRINT1 ("'\n"); \ | 1608 | DEBUG_PRINT1 ("'\n"); \ |
| @@ -1641,20 +1633,18 @@ static void insert_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, | |||
| 1641 | int arg, unsigned char *end)); | 1633 | int arg, unsigned char *end)); |
| 1642 | static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc, | 1634 | static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc, |
| 1643 | int arg1, int arg2, unsigned char *end)); | 1635 | int arg1, int arg2, unsigned char *end)); |
| 1644 | static boolean at_begline_loc_p _RE_ARGS ((const unsigned char *pattern, | 1636 | static boolean at_begline_loc_p _RE_ARGS ((re_char *pattern, |
| 1645 | const unsigned char *p, | 1637 | re_char *p, |
| 1646 | reg_syntax_t syntax)); | 1638 | reg_syntax_t syntax)); |
| 1647 | static boolean at_endline_loc_p _RE_ARGS ((const unsigned char *p, | 1639 | static boolean at_endline_loc_p _RE_ARGS ((re_char *p, |
| 1648 | const unsigned char *pend, | 1640 | re_char *pend, |
| 1649 | reg_syntax_t syntax)); | 1641 | reg_syntax_t syntax)); |
| 1650 | static unsigned char *skip_one_char _RE_ARGS ((unsigned char *p)); | 1642 | static re_char *skip_one_char _RE_ARGS ((re_char *p)); |
| 1651 | static int analyse_first _RE_ARGS ((unsigned char *p, unsigned char *pend, | 1643 | static int analyse_first _RE_ARGS ((re_char *p, re_char *pend, |
| 1652 | char *fastmap, const int multibyte)); | 1644 | char *fastmap, const int multibyte)); |
| 1653 | 1645 | ||
| 1654 | /* Fetch the next character in the uncompiled pattern---translating it | 1646 | /* Fetch the next character in the uncompiled pattern---translating it |
| 1655 | if necessary. Also cast from a signed character in the constant | 1647 | if necessary. */ |
| 1656 | string passed to us by the user to an unsigned char that we can use | ||
| 1657 | as an array index (in, e.g., `translate'). */ | ||
| 1658 | #define PATFETCH(c) \ | 1648 | #define PATFETCH(c) \ |
| 1659 | do { \ | 1649 | do { \ |
| 1660 | PATFETCH_RAW (c); \ | 1650 | PATFETCH_RAW (c); \ |
| @@ -1689,7 +1679,7 @@ static int analyse_first _RE_ARGS ((unsigned char *p, unsigned char *pend, | |||
| 1689 | 1679 | ||
| 1690 | /* Make sure we have at least N more bytes of space in buffer. */ | 1680 | /* Make sure we have at least N more bytes of space in buffer. */ |
| 1691 | #define GET_BUFFER_SPACE(n) \ | 1681 | #define GET_BUFFER_SPACE(n) \ |
| 1692 | while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated) \ | 1682 | while ((size_t) (b - bufp->buffer + (n)) > bufp->allocated) \ |
| 1693 | EXTEND_BUFFER () | 1683 | EXTEND_BUFFER () |
| 1694 | 1684 | ||
| 1695 | /* Make sure we have one more byte of buffer space and then add C to it. */ | 1685 | /* Make sure we have one more byte of buffer space and then add C to it. */ |
| @@ -1778,13 +1768,13 @@ static int analyse_first _RE_ARGS ((unsigned char *p, unsigned char *pend, | |||
| 1778 | #endif | 1768 | #endif |
| 1779 | #define EXTEND_BUFFER() \ | 1769 | #define EXTEND_BUFFER() \ |
| 1780 | do { \ | 1770 | do { \ |
| 1781 | unsigned char *old_buffer = bufp->buffer; \ | 1771 | re_char *old_buffer = bufp->buffer; \ |
| 1782 | if (bufp->allocated == MAX_BUF_SIZE) \ | 1772 | if (bufp->allocated == MAX_BUF_SIZE) \ |
| 1783 | return REG_ESIZE; \ | 1773 | return REG_ESIZE; \ |
| 1784 | bufp->allocated <<= 1; \ | 1774 | bufp->allocated <<= 1; \ |
| 1785 | if (bufp->allocated > MAX_BUF_SIZE) \ | 1775 | if (bufp->allocated > MAX_BUF_SIZE) \ |
| 1786 | bufp->allocated = MAX_BUF_SIZE; \ | 1776 | bufp->allocated = MAX_BUF_SIZE; \ |
| 1787 | bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\ | 1777 | RETALLOC (bufp->buffer, bufp->allocated, unsigned char); \ |
| 1788 | if (bufp->buffer == NULL) \ | 1778 | if (bufp->buffer == NULL) \ |
| 1789 | return REG_ESPACE; \ | 1779 | return REG_ESPACE; \ |
| 1790 | /* If the buffer moved, move all the pointers into it. */ \ | 1780 | /* If the buffer moved, move all the pointers into it. */ \ |
| @@ -1907,9 +1897,7 @@ struct range_table_work_area | |||
| 1907 | 1897 | ||
| 1908 | 1898 | ||
| 1909 | /* Set the bit for character C in a list. */ | 1899 | /* Set the bit for character C in a list. */ |
| 1910 | #define SET_LIST_BIT(c) \ | 1900 | #define SET_LIST_BIT(c) (b[((c)) / BYTEWIDTH] |= 1 << ((c) % BYTEWIDTH)) |
| 1911 | (b[((unsigned char) (c)) / BYTEWIDTH] \ | ||
| 1912 | |= 1 << (((unsigned char) c) % BYTEWIDTH)) | ||
| 1913 | 1901 | ||
| 1914 | 1902 | ||
| 1915 | /* Get the next unsigned number in the uncompiled pattern. */ | 1903 | /* Get the next unsigned number in the uncompiled pattern. */ |
| @@ -1940,6 +1928,7 @@ struct range_table_work_area | |||
| 1940 | # define CHAR_CLASS_MAX_LENGTH 256 | 1928 | # define CHAR_CLASS_MAX_LENGTH 256 |
| 1941 | # endif | 1929 | # endif |
| 1942 | typedef wctype_t re_wctype_t; | 1930 | typedef wctype_t re_wctype_t; |
| 1931 | typedef wchar_t re_wchar_t; | ||
| 1943 | # define re_wctype wctype | 1932 | # define re_wctype wctype |
| 1944 | # define re_iswctype iswctype | 1933 | # define re_iswctype iswctype |
| 1945 | # define re_wctype_to_bit(cc) 0 | 1934 | # define re_wctype_to_bit(cc) 0 |
| @@ -1947,7 +1936,7 @@ typedef wctype_t re_wctype_t; | |||
| 1947 | # define CHAR_CLASS_MAX_LENGTH 9 /* Namely, `multibyte'. */ | 1936 | # define CHAR_CLASS_MAX_LENGTH 9 /* Namely, `multibyte'. */ |
| 1948 | # define btowc(c) c | 1937 | # define btowc(c) c |
| 1949 | 1938 | ||
| 1950 | /* Character classes' indices. */ | 1939 | /* Character classes. */ |
| 1951 | typedef enum { RECC_ERROR = 0, | 1940 | typedef enum { RECC_ERROR = 0, |
| 1952 | RECC_ALNUM, RECC_ALPHA, RECC_WORD, | 1941 | RECC_ALNUM, RECC_ALPHA, RECC_WORD, |
| 1953 | RECC_GRAPH, RECC_PRINT, | 1942 | RECC_GRAPH, RECC_PRINT, |
| @@ -1959,10 +1948,12 @@ typedef enum { RECC_ERROR = 0, | |||
| 1959 | RECC_ASCII, RECC_UNIBYTE | 1948 | RECC_ASCII, RECC_UNIBYTE |
| 1960 | } re_wctype_t; | 1949 | } re_wctype_t; |
| 1961 | 1950 | ||
| 1951 | typedef int re_wchar_t; | ||
| 1952 | |||
| 1962 | /* Map a string to the char class it names (if any). */ | 1953 | /* Map a string to the char class it names (if any). */ |
| 1963 | static re_wctype_t | 1954 | static re_wctype_t |
| 1964 | re_wctype (string) | 1955 | re_wctype (string) |
| 1965 | unsigned char *string; | 1956 | re_char *string; |
| 1966 | { | 1957 | { |
| 1967 | if (STREQ (string, "alnum")) return RECC_ALNUM; | 1958 | if (STREQ (string, "alnum")) return RECC_ALNUM; |
| 1968 | else if (STREQ (string, "alpha")) return RECC_ALPHA; | 1959 | else if (STREQ (string, "alpha")) return RECC_ALPHA; |
| @@ -1990,27 +1981,30 @@ re_iswctype (ch, cc) | |||
| 1990 | int ch; | 1981 | int ch; |
| 1991 | re_wctype_t cc; | 1982 | re_wctype_t cc; |
| 1992 | { | 1983 | { |
| 1984 | boolean ret = false; | ||
| 1985 | |||
| 1993 | switch (cc) | 1986 | switch (cc) |
| 1994 | { | 1987 | { |
| 1995 | case RECC_ALNUM: return ISALNUM (ch); | 1988 | case RECC_ALNUM: ret = ISALNUM (ch); |
| 1996 | case RECC_ALPHA: return ISALPHA (ch); | 1989 | case RECC_ALPHA: ret = ISALPHA (ch); |
| 1997 | case RECC_BLANK: return ISBLANK (ch); | 1990 | case RECC_BLANK: ret = ISBLANK (ch); |
| 1998 | case RECC_CNTRL: return ISCNTRL (ch); | 1991 | case RECC_CNTRL: ret = ISCNTRL (ch); |
| 1999 | case RECC_DIGIT: return ISDIGIT (ch); | 1992 | case RECC_DIGIT: ret = ISDIGIT (ch); |
| 2000 | case RECC_GRAPH: return ISGRAPH (ch); | 1993 | case RECC_GRAPH: ret = ISGRAPH (ch); |
| 2001 | case RECC_LOWER: return ISLOWER (ch); | 1994 | case RECC_LOWER: ret = ISLOWER (ch); |
| 2002 | case RECC_PRINT: return ISPRINT (ch); | 1995 | case RECC_PRINT: ret = ISPRINT (ch); |
| 2003 | case RECC_PUNCT: return ISPUNCT (ch); | 1996 | case RECC_PUNCT: ret = ISPUNCT (ch); |
| 2004 | case RECC_SPACE: return ISSPACE (ch); | 1997 | case RECC_SPACE: ret = ISSPACE (ch); |
| 2005 | case RECC_UPPER: return ISUPPER (ch); | 1998 | case RECC_UPPER: ret = ISUPPER (ch); |
| 2006 | case RECC_XDIGIT: return ISXDIGIT (ch); | 1999 | case RECC_XDIGIT: ret = ISXDIGIT (ch); |
| 2007 | case RECC_ASCII: return IS_REAL_ASCII (ch); | 2000 | case RECC_ASCII: ret = IS_REAL_ASCII (ch); |
| 2008 | case RECC_NONASCII: return !IS_REAL_ASCII (ch); | 2001 | case RECC_NONASCII: ret = !IS_REAL_ASCII (ch); |
| 2009 | case RECC_UNIBYTE: return ISUNIBYTE (ch); | 2002 | case RECC_UNIBYTE: ret = ISUNIBYTE (ch); |
| 2010 | case RECC_MULTIBYTE: return !ISUNIBYTE (ch); | 2003 | case RECC_MULTIBYTE: ret = !ISUNIBYTE (ch); |
| 2011 | case RECC_WORD: return ISWORD (ch); | 2004 | case RECC_WORD: ret = ISWORD (ch); |
| 2012 | case RECC_ERROR: return false; | 2005 | case RECC_ERROR: ret = false; |
| 2013 | } | 2006 | } |
| 2007 | return ret; | ||
| 2014 | } | 2008 | } |
| 2015 | 2009 | ||
| 2016 | /* Return a bit-pattern to use in the range-table bits to match multibyte | 2010 | /* Return a bit-pattern to use in the range-table bits to match multibyte |
| @@ -2019,18 +2013,21 @@ static int | |||
| 2019 | re_wctype_to_bit (cc) | 2013 | re_wctype_to_bit (cc) |
| 2020 | re_wctype_t cc; | 2014 | re_wctype_t cc; |
| 2021 | { | 2015 | { |
| 2016 | int ret = 0; | ||
| 2017 | |||
| 2022 | switch (cc) | 2018 | switch (cc) |
| 2023 | { | 2019 | { |
| 2024 | case RECC_NONASCII: case RECC_PRINT: case RECC_GRAPH: | 2020 | case RECC_NONASCII: case RECC_PRINT: case RECC_GRAPH: |
| 2025 | case RECC_MULTIBYTE: return BIT_MULTIBYTE; | 2021 | case RECC_MULTIBYTE: ret = BIT_MULTIBYTE; |
| 2026 | case RECC_ALPHA: case RECC_ALNUM: case RECC_WORD: return BIT_WORD; | 2022 | case RECC_ALPHA: case RECC_ALNUM: case RECC_WORD: ret = BIT_WORD; |
| 2027 | case RECC_LOWER: return BIT_LOWER; | 2023 | case RECC_LOWER: ret = BIT_LOWER; |
| 2028 | case RECC_UPPER: return BIT_UPPER; | 2024 | case RECC_UPPER: ret = BIT_UPPER; |
| 2029 | case RECC_PUNCT: return BIT_PUNCT; | 2025 | case RECC_PUNCT: ret = BIT_PUNCT; |
| 2030 | case RECC_SPACE: return BIT_SPACE; | 2026 | case RECC_SPACE: ret = BIT_SPACE; |
| 2031 | case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL: | 2027 | case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL: |
| 2032 | case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0; | 2028 | case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: ret = 0; |
| 2033 | } | 2029 | } |
| 2030 | return ret; | ||
| 2034 | } | 2031 | } |
| 2035 | #endif | 2032 | #endif |
| 2036 | 2033 | ||
| @@ -2042,7 +2039,7 @@ extern int immediate_quit; | |||
| 2042 | if (immediate_quit) QUIT; \ | 2039 | if (immediate_quit) QUIT; \ |
| 2043 | } while (0) | 2040 | } while (0) |
| 2044 | #else | 2041 | #else |
| 2045 | # define IMMEDIATE_QUIT_CHECK (0) | 2042 | # define IMMEDIATE_QUIT_CHECK ((void)0) |
| 2046 | #endif | 2043 | #endif |
| 2047 | 2044 | ||
| 2048 | #ifndef MATCH_MAY_ALLOCATE | 2045 | #ifndef MATCH_MAY_ALLOCATE |
| @@ -2129,10 +2126,8 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2129 | reg_syntax_t syntax; | 2126 | reg_syntax_t syntax; |
| 2130 | struct re_pattern_buffer *bufp; | 2127 | struct re_pattern_buffer *bufp; |
| 2131 | { | 2128 | { |
| 2132 | /* We fetch characters from PATTERN here. Even though PATTERN is | 2129 | /* We fetch characters from PATTERN here. */ |
| 2133 | `char *' (i.e., signed), we declare these variables as unsigned, so | 2130 | register re_wchar_t c, c1; |
| 2134 | they can be reliably used as array indices. */ | ||
| 2135 | register unsigned int c, c1; | ||
| 2136 | 2131 | ||
| 2137 | /* A random temporary spot in PATTERN. */ | 2132 | /* A random temporary spot in PATTERN. */ |
| 2138 | re_char *p1; | 2133 | re_char *p1; |
| @@ -2359,6 +2354,7 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2359 | boolean simple = skip_one_char (laststart) == b; | 2354 | boolean simple = skip_one_char (laststart) == b; |
| 2360 | unsigned int startoffset = 0; | 2355 | unsigned int startoffset = 0; |
| 2361 | re_opcode_t ofj = | 2356 | re_opcode_t ofj = |
| 2357 | /* Check if the loop can match the empty string. */ | ||
| 2362 | (simple || !analyse_first (laststart, b, NULL, 0)) ? | 2358 | (simple || !analyse_first (laststart, b, NULL, 0)) ? |
| 2363 | on_failure_jump : on_failure_jump_loop; | 2359 | on_failure_jump : on_failure_jump_loop; |
| 2364 | assert (skip_one_char (laststart) <= b); | 2360 | assert (skip_one_char (laststart) <= b); |
| @@ -2629,7 +2625,7 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2629 | if (SINGLE_BYTE_CHAR_P (c)) | 2625 | if (SINGLE_BYTE_CHAR_P (c)) |
| 2630 | /* ... into bitmap. */ | 2626 | /* ... into bitmap. */ |
| 2631 | { | 2627 | { |
| 2632 | unsigned this_char; | 2628 | re_wchar_t this_char; |
| 2633 | int range_start = c, range_end = c1; | 2629 | int range_start = c, range_end = c1; |
| 2634 | 2630 | ||
| 2635 | /* If the start is after the end, the range is empty. */ | 2631 | /* If the start is after the end, the range is empty. */ |
| @@ -3365,10 +3361,10 @@ insert_op2 (op, loc, arg1, arg2, end) | |||
| 3365 | 3361 | ||
| 3366 | static boolean | 3362 | static boolean |
| 3367 | at_begline_loc_p (pattern, p, syntax) | 3363 | at_begline_loc_p (pattern, p, syntax) |
| 3368 | const unsigned char *pattern, *p; | 3364 | re_char *pattern, *p; |
| 3369 | reg_syntax_t syntax; | 3365 | reg_syntax_t syntax; |
| 3370 | { | 3366 | { |
| 3371 | const unsigned char *prev = p - 2; | 3367 | re_char *prev = p - 2; |
| 3372 | boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\'; | 3368 | boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\'; |
| 3373 | 3369 | ||
| 3374 | return | 3370 | return |
| @@ -3389,12 +3385,12 @@ at_begline_loc_p (pattern, p, syntax) | |||
| 3389 | 3385 | ||
| 3390 | static boolean | 3386 | static boolean |
| 3391 | at_endline_loc_p (p, pend, syntax) | 3387 | at_endline_loc_p (p, pend, syntax) |
| 3392 | const unsigned char *p, *pend; | 3388 | re_char *p, *pend; |
| 3393 | reg_syntax_t syntax; | 3389 | reg_syntax_t syntax; |
| 3394 | { | 3390 | { |
| 3395 | const unsigned char *next = p; | 3391 | re_char *next = p; |
| 3396 | boolean next_backslash = *next == '\\'; | 3392 | boolean next_backslash = *next == '\\'; |
| 3397 | const unsigned char *next_next = p + 1 < pend ? p + 1 : 0; | 3393 | re_char *next_next = p + 1 < pend ? p + 1 : 0; |
| 3398 | 3394 | ||
| 3399 | return | 3395 | return |
| 3400 | /* Before a subexpression? */ | 3396 | /* Before a subexpression? */ |
| @@ -3433,36 +3429,16 @@ group_in_compile_stack (compile_stack, regnum) | |||
| 3433 | 3429 | ||
| 3434 | Return 1 if p..pend might match the empty string. | 3430 | Return 1 if p..pend might match the empty string. |
| 3435 | Return 0 if p..pend matches at least one char. | 3431 | Return 0 if p..pend matches at least one char. |
| 3436 | Return -1 if p..pend matches at least one char, but fastmap was not | 3432 | Return -1 if fastmap was not updated accurately. */ |
| 3437 | updated accurately. | ||
| 3438 | Return -2 if an error occurred. */ | ||
| 3439 | 3433 | ||
| 3440 | static int | 3434 | static int |
| 3441 | analyse_first (p, pend, fastmap, multibyte) | 3435 | analyse_first (p, pend, fastmap, multibyte) |
| 3442 | unsigned char *p, *pend; | 3436 | re_char *p, *pend; |
| 3443 | char *fastmap; | 3437 | char *fastmap; |
| 3444 | const int multibyte; | 3438 | const int multibyte; |
| 3445 | { | 3439 | { |
| 3446 | int j, k; | 3440 | int j, k; |
| 3447 | boolean not; | 3441 | boolean not; |
| 3448 | #ifdef MATCH_MAY_ALLOCATE | ||
| 3449 | fail_stack_type fail_stack; | ||
| 3450 | #endif | ||
| 3451 | #ifndef REGEX_MALLOC | ||
| 3452 | char *destination; | ||
| 3453 | #endif | ||
| 3454 | |||
| 3455 | #if defined REL_ALLOC && defined REGEX_MALLOC | ||
| 3456 | /* This holds the pointer to the failure stack, when | ||
| 3457 | it is allocated relocatably. */ | ||
| 3458 | fail_stack_elt_t *failure_stack_ptr; | ||
| 3459 | #endif | ||
| 3460 | |||
| 3461 | /* Assume that each path through the pattern can be null until | ||
| 3462 | proven otherwise. We set this false at the bottom of switch | ||
| 3463 | statement, to which we get only if a particular path doesn't | ||
| 3464 | match the empty string. */ | ||
| 3465 | boolean path_can_be_null = true; | ||
| 3466 | 3442 | ||
| 3467 | /* If all elements for base leading-codes in fastmap is set, this | 3443 | /* If all elements for base leading-codes in fastmap is set, this |
| 3468 | flag is set true. */ | 3444 | flag is set true. */ |
| @@ -3470,8 +3446,6 @@ analyse_first (p, pend, fastmap, multibyte) | |||
| 3470 | 3446 | ||
| 3471 | assert (p); | 3447 | assert (p); |
| 3472 | 3448 | ||
| 3473 | INIT_FAIL_STACK (); | ||
| 3474 | |||
| 3475 | /* The loop below works as follows: | 3449 | /* The loop below works as follows: |
| 3476 | - It has a working-list kept in the PATTERN_STACK and which basically | 3450 | - It has a working-list kept in the PATTERN_STACK and which basically |
| 3477 | starts by only containing a pointer to the first operation. | 3451 | starts by only containing a pointer to the first operation. |
| @@ -3487,7 +3461,7 @@ analyse_first (p, pend, fastmap, multibyte) | |||
| 3487 | so that `p' is monotonically increasing. More to the point, we | 3461 | so that `p' is monotonically increasing. More to the point, we |
| 3488 | never set `p' (or push) anything `<= p1'. */ | 3462 | never set `p' (or push) anything `<= p1'. */ |
| 3489 | 3463 | ||
| 3490 | while (1) | 3464 | while (p < pend) |
| 3491 | { | 3465 | { |
| 3492 | /* `p1' is used as a marker of how far back a `on_failure_jump' | 3466 | /* `p1' is used as a marker of how far back a `on_failure_jump' |
| 3493 | can go without being ignored. It is normally equal to `p' | 3467 | can go without being ignored. It is normally equal to `p' |
| @@ -3497,29 +3471,12 @@ analyse_first (p, pend, fastmap, multibyte) | |||
| 3497 | 3..9: <body> | 3471 | 3..9: <body> |
| 3498 | 10: on_failure_jump 3 | 3472 | 10: on_failure_jump 3 |
| 3499 | as used for the *? operator. */ | 3473 | as used for the *? operator. */ |
| 3500 | unsigned char *p1 = p; | 3474 | re_char *p1 = p; |
| 3501 | |||
| 3502 | if (p >= pend) | ||
| 3503 | { | ||
| 3504 | if (path_can_be_null) | ||
| 3505 | return (RESET_FAIL_STACK (), 1); | ||
| 3506 | |||
| 3507 | /* We have reached the (effective) end of pattern. */ | ||
| 3508 | if (PATTERN_STACK_EMPTY ()) | ||
| 3509 | return (RESET_FAIL_STACK (), 0); | ||
| 3510 | |||
| 3511 | p = (unsigned char*) POP_PATTERN_OP (); | ||
| 3512 | path_can_be_null = true; | ||
| 3513 | continue; | ||
| 3514 | } | ||
| 3515 | |||
| 3516 | /* We should never be about to go beyond the end of the pattern. */ | ||
| 3517 | assert (p < pend); | ||
| 3518 | 3475 | ||
| 3519 | switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++)) | 3476 | switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++)) |
| 3520 | { | 3477 | { |
| 3521 | case succeed: | 3478 | case succeed: |
| 3522 | p = pend; | 3479 | return 1; |
| 3523 | continue; | 3480 | continue; |
| 3524 | 3481 | ||
| 3525 | case duplicate: | 3482 | case duplicate: |
| @@ -3551,7 +3508,7 @@ analyse_first (p, pend, fastmap, multibyte) | |||
| 3551 | /* We could put all the chars except for \n (and maybe \0) | 3508 | /* We could put all the chars except for \n (and maybe \0) |
| 3552 | but we don't bother since it is generally not worth it. */ | 3509 | but we don't bother since it is generally not worth it. */ |
| 3553 | if (!fastmap) break; | 3510 | if (!fastmap) break; |
| 3554 | return (RESET_FAIL_STACK (), -1); | 3511 | return -1; |
| 3555 | 3512 | ||
| 3556 | 3513 | ||
| 3557 | case charset_not: | 3514 | case charset_not: |
| @@ -3626,7 +3583,7 @@ analyse_first (p, pend, fastmap, multibyte) | |||
| 3626 | #else /* emacs */ | 3583 | #else /* emacs */ |
| 3627 | /* This match depends on text properties. These end with | 3584 | /* This match depends on text properties. These end with |
| 3628 | aborting optimizations. */ | 3585 | aborting optimizations. */ |
| 3629 | return (RESET_FAIL_STACK (), -1); | 3586 | return -1; |
| 3630 | 3587 | ||
| 3631 | case categoryspec: | 3588 | case categoryspec: |
| 3632 | case notcategoryspec: | 3589 | case notcategoryspec: |
| @@ -3693,8 +3650,14 @@ analyse_first (p, pend, fastmap, multibyte) | |||
| 3693 | EXTRACT_NUMBER_AND_INCR (j, p); | 3650 | EXTRACT_NUMBER_AND_INCR (j, p); |
| 3694 | if (p + j <= p1) | 3651 | if (p + j <= p1) |
| 3695 | ; /* Backward jump to be ignored. */ | 3652 | ; /* Backward jump to be ignored. */ |
| 3696 | else if (!PUSH_PATTERN_OP (p + j, fail_stack)) | 3653 | else |
| 3697 | return (RESET_FAIL_STACK (), -2); | 3654 | { /* We have to look down both arms. |
| 3655 | We first go down the "straight" path so as to minimize | ||
| 3656 | stack usage when going through alternatives. */ | ||
| 3657 | int r = analyse_first (p, pend, fastmap, multibyte); | ||
| 3658 | if (r) return r; | ||
| 3659 | p += j; | ||
| 3660 | } | ||
| 3698 | continue; | 3661 | continue; |
| 3699 | 3662 | ||
| 3700 | 3663 | ||
| @@ -3734,15 +3697,13 @@ analyse_first (p, pend, fastmap, multibyte) | |||
| 3734 | 3697 | ||
| 3735 | /* Getting here means we have found the possible starting | 3698 | /* Getting here means we have found the possible starting |
| 3736 | characters for one path of the pattern -- and that the empty | 3699 | characters for one path of the pattern -- and that the empty |
| 3737 | string does not match. We need not follow this path further. | 3700 | string does not match. We need not follow this path further. */ |
| 3738 | Instead, look at the next alternative (remembered on the | 3701 | return 0; |
| 3739 | stack), or quit if no more. The test at the top of the loop | ||
| 3740 | does these things. */ | ||
| 3741 | path_can_be_null = false; | ||
| 3742 | p = pend; | ||
| 3743 | } /* while p */ | 3702 | } /* while p */ |
| 3744 | 3703 | ||
| 3745 | return (RESET_FAIL_STACK (), 0); | 3704 | /* We reached the end without matching anything. */ |
| 3705 | return 1; | ||
| 3706 | |||
| 3746 | } /* analyse_first */ | 3707 | } /* analyse_first */ |
| 3747 | 3708 | ||
| 3748 | /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in | 3709 | /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in |
| @@ -3777,8 +3738,6 @@ re_compile_fastmap (bufp) | |||
| 3777 | analysis = analyse_first (bufp->buffer, bufp->buffer + bufp->used, | 3738 | analysis = analyse_first (bufp->buffer, bufp->buffer + bufp->used, |
| 3778 | fastmap, RE_MULTIBYTE_P (bufp)); | 3739 | fastmap, RE_MULTIBYTE_P (bufp)); |
| 3779 | bufp->can_be_null = (analysis != 0); | 3740 | bufp->can_be_null = (analysis != 0); |
| 3780 | if (analysis < -1) | ||
| 3781 | return analysis; | ||
| 3782 | return 0; | 3741 | return 0; |
| 3783 | } /* re_compile_fastmap */ | 3742 | } /* re_compile_fastmap */ |
| 3784 | 3743 | ||
| @@ -3921,8 +3880,7 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop) | |||
| 3921 | 3880 | ||
| 3922 | /* Update the fastmap now if not correct already. */ | 3881 | /* Update the fastmap now if not correct already. */ |
| 3923 | if (fastmap && !bufp->fastmap_accurate) | 3882 | if (fastmap && !bufp->fastmap_accurate) |
| 3924 | if (re_compile_fastmap (bufp) == -2) | 3883 | re_compile_fastmap (bufp); |
| 3925 | return -2; | ||
| 3926 | 3884 | ||
| 3927 | /* See whether the pattern is anchored. */ | 3885 | /* See whether the pattern is anchored. */ |
| 3928 | anchored_start = (bufp->buffer[0] == begline); | 3886 | anchored_start = (bufp->buffer[0] == begline); |
| @@ -3958,7 +3916,7 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop) | |||
| 3958 | if (fastmap && startpos < total_size && !bufp->can_be_null) | 3916 | if (fastmap && startpos < total_size && !bufp->can_be_null) |
| 3959 | { | 3917 | { |
| 3960 | register re_char *d; | 3918 | register re_char *d; |
| 3961 | register unsigned int buf_ch; | 3919 | register re_wchar_t buf_ch; |
| 3962 | 3920 | ||
| 3963 | d = POS_ADDR_VSTRING (startpos); | 3921 | d = POS_ADDR_VSTRING (startpos); |
| 3964 | 3922 | ||
| @@ -4191,9 +4149,9 @@ static int bcmp_translate _RE_ARGS((re_char *s1, re_char *s2, | |||
| 4191 | 4149 | ||
| 4192 | /* If the operation is a match against one or more chars, | 4150 | /* If the operation is a match against one or more chars, |
| 4193 | return a pointer to the next operation, else return NULL. */ | 4151 | return a pointer to the next operation, else return NULL. */ |
| 4194 | static unsigned char * | 4152 | static re_char * |
| 4195 | skip_one_char (p) | 4153 | skip_one_char (p) |
| 4196 | unsigned char *p; | 4154 | re_char *p; |
| 4197 | { | 4155 | { |
| 4198 | switch (SWITCH_ENUM_CAST (*p++)) | 4156 | switch (SWITCH_ENUM_CAST (*p++)) |
| 4199 | { | 4157 | { |
| @@ -4303,7 +4261,7 @@ mutually_exclusive_p (bufp, p1, p2) | |||
| 4303 | case endline: | 4261 | case endline: |
| 4304 | case exactn: | 4262 | case exactn: |
| 4305 | { | 4263 | { |
| 4306 | register unsigned int c | 4264 | register re_wchar_t c |
| 4307 | = (re_opcode_t) *p2 == endline ? '\n' | 4265 | = (re_opcode_t) *p2 == endline ? '\n' |
| 4308 | : RE_STRING_CHAR(p2 + 2, pend - p2 - 2); | 4266 | : RE_STRING_CHAR(p2 + 2, pend - p2 - 2); |
| 4309 | 4267 | ||
| @@ -4525,8 +4483,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4525 | { | 4483 | { |
| 4526 | /* General temporaries. */ | 4484 | /* General temporaries. */ |
| 4527 | int mcnt; | 4485 | int mcnt; |
| 4486 | size_t reg; | ||
| 4528 | boolean not; | 4487 | boolean not; |
| 4529 | unsigned char *p1; | ||
| 4530 | 4488 | ||
| 4531 | /* Just past the end of the corresponding string. */ | 4489 | /* Just past the end of the corresponding string. */ |
| 4532 | re_char *end1, *end2; | 4490 | re_char *end1, *end2; |
| @@ -4545,8 +4503,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4545 | re_char *dfail; | 4503 | re_char *dfail; |
| 4546 | 4504 | ||
| 4547 | /* Where we are in the pattern, and the end of the pattern. */ | 4505 | /* Where we are in the pattern, and the end of the pattern. */ |
| 4548 | unsigned char *p = bufp->buffer; | 4506 | re_char *p = bufp->buffer; |
| 4549 | register unsigned char *pend = p + bufp->used; | 4507 | re_char *pend = p + bufp->used; |
| 4550 | 4508 | ||
| 4551 | /* We use this to map every character in the string. */ | 4509 | /* We use this to map every character in the string. */ |
| 4552 | RE_TRANSLATE_TYPE translate = bufp->translate; | 4510 | RE_TRANSLATE_TYPE translate = bufp->translate; |
| @@ -4655,8 +4613,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4655 | /* Initialize subexpression text positions to -1 to mark ones that no | 4613 | /* Initialize subexpression text positions to -1 to mark ones that no |
| 4656 | start_memory/stop_memory has been seen for. Also initialize the | 4614 | start_memory/stop_memory has been seen for. Also initialize the |
| 4657 | register information struct. */ | 4615 | register information struct. */ |
| 4658 | for (mcnt = 1; mcnt < num_regs; mcnt++) | 4616 | for (reg = 1; reg < num_regs; reg++) |
| 4659 | regstart[mcnt] = regend[mcnt] = NULL; | 4617 | regstart[reg] = regend[reg] = NULL; |
| 4660 | 4618 | ||
| 4661 | /* We move `string1' into `string2' if the latter's empty -- but not if | 4619 | /* We move `string1' into `string2' if the latter's empty -- but not if |
| 4662 | `string1' is null. */ | 4620 | `string1' is null. */ |
| @@ -4758,10 +4716,10 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4758 | 4716 | ||
| 4759 | DEBUG_PRINT1 ("\nSAVING match as best so far.\n"); | 4717 | DEBUG_PRINT1 ("\nSAVING match as best so far.\n"); |
| 4760 | 4718 | ||
| 4761 | for (mcnt = 1; mcnt < num_regs; mcnt++) | 4719 | for (reg = 1; reg < num_regs; reg++) |
| 4762 | { | 4720 | { |
| 4763 | best_regstart[mcnt] = regstart[mcnt]; | 4721 | best_regstart[reg] = regstart[reg]; |
| 4764 | best_regend[mcnt] = regend[mcnt]; | 4722 | best_regend[reg] = regend[reg]; |
| 4765 | } | 4723 | } |
| 4766 | } | 4724 | } |
| 4767 | goto fail; | 4725 | goto fail; |
| @@ -4784,10 +4742,10 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4784 | dend = ((d >= string1 && d <= end1) | 4742 | dend = ((d >= string1 && d <= end1) |
| 4785 | ? end_match_1 : end_match_2); | 4743 | ? end_match_1 : end_match_2); |
| 4786 | 4744 | ||
| 4787 | for (mcnt = 1; mcnt < num_regs; mcnt++) | 4745 | for (reg = 1; reg < num_regs; reg++) |
| 4788 | { | 4746 | { |
| 4789 | regstart[mcnt] = best_regstart[mcnt]; | 4747 | regstart[reg] = best_regstart[reg]; |
| 4790 | regend[mcnt] = best_regend[mcnt]; | 4748 | regend[reg] = best_regend[reg]; |
| 4791 | } | 4749 | } |
| 4792 | } | 4750 | } |
| 4793 | } /* d != end_match_2 */ | 4751 | } /* d != end_match_2 */ |
| @@ -4847,16 +4805,16 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4847 | 4805 | ||
| 4848 | /* Go through the first `min (num_regs, regs->num_regs)' | 4806 | /* Go through the first `min (num_regs, regs->num_regs)' |
| 4849 | registers, since that is all we initialized. */ | 4807 | registers, since that is all we initialized. */ |
| 4850 | for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++) | 4808 | for (reg = 1; reg < MIN (num_regs, regs->num_regs); reg++) |
| 4851 | { | 4809 | { |
| 4852 | if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt])) | 4810 | if (REG_UNSET (regstart[reg]) || REG_UNSET (regend[reg])) |
| 4853 | regs->start[mcnt] = regs->end[mcnt] = -1; | 4811 | regs->start[reg] = regs->end[reg] = -1; |
| 4854 | else | 4812 | else |
| 4855 | { | 4813 | { |
| 4856 | regs->start[mcnt] | 4814 | regs->start[reg] |
| 4857 | = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]); | 4815 | = (regoff_t) POINTER_TO_OFFSET (regstart[reg]); |
| 4858 | regs->end[mcnt] | 4816 | regs->end[reg] |
| 4859 | = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]); | 4817 | = (regoff_t) POINTER_TO_OFFSET (regend[reg]); |
| 4860 | } | 4818 | } |
| 4861 | } | 4819 | } |
| 4862 | 4820 | ||
| @@ -4865,8 +4823,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4865 | we (re)allocated the registers, this is the case, | 4823 | we (re)allocated the registers, this is the case, |
| 4866 | because we always allocate enough to have at least one | 4824 | because we always allocate enough to have at least one |
| 4867 | -1 at the end. */ | 4825 | -1 at the end. */ |
| 4868 | for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++) | 4826 | for (reg = num_regs; reg < regs->num_regs; reg++) |
| 4869 | regs->start[mcnt] = regs->end[mcnt] = -1; | 4827 | regs->start[reg] = regs->end[reg] = -1; |
| 4870 | } /* regs && !bufp->no_sub */ | 4828 | } /* regs && !bufp->no_sub */ |
| 4871 | 4829 | ||
| 4872 | DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", | 4830 | DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", |
| @@ -4964,7 +4922,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4964 | case anychar: | 4922 | case anychar: |
| 4965 | { | 4923 | { |
| 4966 | int buf_charlen; | 4924 | int buf_charlen; |
| 4967 | unsigned int buf_ch; | 4925 | re_wchar_t buf_ch; |
| 4968 | 4926 | ||
| 4969 | DEBUG_PRINT1 ("EXECUTING anychar.\n"); | 4927 | DEBUG_PRINT1 ("EXECUTING anychar.\n"); |
| 4970 | 4928 | ||
| @@ -4993,7 +4951,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4993 | 4951 | ||
| 4994 | /* Start of actual range_table, or end of bitmap if there is no | 4952 | /* Start of actual range_table, or end of bitmap if there is no |
| 4995 | range table. */ | 4953 | range table. */ |
| 4996 | unsigned char *range_table; | 4954 | re_char *range_table; |
| 4997 | 4955 | ||
| 4998 | /* Nonzero if there is a range table. */ | 4956 | /* Nonzero if there is a range table. */ |
| 4999 | int range_table_exists; | 4957 | int range_table_exists; |
| @@ -5317,8 +5275,10 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5317 | DEBUG_PRINT3 ("EXECUTING on_failure_jump_smart %d (to %p).\n", | 5275 | DEBUG_PRINT3 ("EXECUTING on_failure_jump_smart %d (to %p).\n", |
| 5318 | mcnt, p + mcnt); | 5276 | mcnt, p + mcnt); |
| 5319 | { | 5277 | { |
| 5320 | unsigned char *p1 = p; /* Next operation. */ | 5278 | re_char *p1 = p; /* Next operation. */ |
| 5279 | /* Please don't add casts to try and shut up GCC. */ | ||
| 5321 | unsigned char *p2 = p + mcnt; /* Destination of the jump. */ | 5280 | unsigned char *p2 = p + mcnt; /* Destination of the jump. */ |
| 5281 | unsigned char *p3 = p - 3; /* Location of the opcode. */ | ||
| 5322 | 5282 | ||
| 5323 | p -= 3; /* Reset so that we will re-execute the | 5283 | p -= 3; /* Reset so that we will re-execute the |
| 5324 | instruction once it's been changed. */ | 5284 | instruction once it's been changed. */ |
| @@ -5334,14 +5294,14 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5334 | { | 5294 | { |
| 5335 | /* Use a fast `on_failure_keep_string_jump' loop. */ | 5295 | /* Use a fast `on_failure_keep_string_jump' loop. */ |
| 5336 | DEBUG_PRINT1 (" smart exclusive => fast loop.\n"); | 5296 | DEBUG_PRINT1 (" smart exclusive => fast loop.\n"); |
| 5337 | *p = (unsigned char) on_failure_keep_string_jump; | 5297 | *p3 = (unsigned char) on_failure_keep_string_jump; |
| 5338 | STORE_NUMBER (p2 - 2, mcnt + 3); | 5298 | STORE_NUMBER (p2 - 2, mcnt + 3); |
| 5339 | } | 5299 | } |
| 5340 | else | 5300 | else |
| 5341 | { | 5301 | { |
| 5342 | /* Default to a safe `on_failure_jump' loop. */ | 5302 | /* Default to a safe `on_failure_jump' loop. */ |
| 5343 | DEBUG_PRINT1 (" smart default => slow loop.\n"); | 5303 | DEBUG_PRINT1 (" smart default => slow loop.\n"); |
| 5344 | *p = (unsigned char) on_failure_jump; | 5304 | *p3 = (unsigned char) on_failure_jump; |
| 5345 | } | 5305 | } |
| 5346 | DEBUG_STATEMENT (debug -= 2); | 5306 | DEBUG_STATEMENT (debug -= 2); |
| 5347 | } | 5307 | } |
| @@ -5361,17 +5321,18 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5361 | /* Have to succeed matching what follows at least n times. | 5321 | /* Have to succeed matching what follows at least n times. |
| 5362 | After that, handle like `on_failure_jump'. */ | 5322 | After that, handle like `on_failure_jump'. */ |
| 5363 | case succeed_n: | 5323 | case succeed_n: |
| 5324 | /* Signedness doesn't matter since we only compare MCNT to 0. */ | ||
| 5364 | EXTRACT_NUMBER (mcnt, p + 2); | 5325 | EXTRACT_NUMBER (mcnt, p + 2); |
| 5365 | DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt); | 5326 | DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt); |
| 5366 | 5327 | ||
| 5367 | /* Originally, mcnt is how many times we HAVE to succeed. */ | 5328 | /* Originally, mcnt is how many times we HAVE to succeed. */ |
| 5368 | if (mcnt != 0) | 5329 | if (mcnt != 0) |
| 5369 | { | 5330 | { |
| 5331 | /* Please don't add a cast to try and shut up GCC. */ | ||
| 5332 | unsigned char *p2 = p + 2; /* Location of the counter. */ | ||
| 5370 | mcnt--; | 5333 | mcnt--; |
| 5371 | p += 2; | 5334 | p += 4; |
| 5372 | PUSH_FAILURE_COUNT (p); | 5335 | PUSH_NUMBER (p2, mcnt); |
| 5373 | DEBUG_PRINT3 (" Setting %p to %d.\n", p, mcnt); | ||
| 5374 | STORE_NUMBER_AND_INCR (p, mcnt); | ||
| 5375 | } | 5336 | } |
| 5376 | else | 5337 | else |
| 5377 | /* The two bytes encoding mcnt == 0 are two no_op opcodes. */ | 5338 | /* The two bytes encoding mcnt == 0 are two no_op opcodes. */ |
| @@ -5379,15 +5340,17 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5379 | break; | 5340 | break; |
| 5380 | 5341 | ||
| 5381 | case jump_n: | 5342 | case jump_n: |
| 5343 | /* Signedness doesn't matter since we only compare MCNT to 0. */ | ||
| 5382 | EXTRACT_NUMBER (mcnt, p + 2); | 5344 | EXTRACT_NUMBER (mcnt, p + 2); |
| 5383 | DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt); | 5345 | DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt); |
| 5384 | 5346 | ||
| 5385 | /* Originally, this is how many times we CAN jump. */ | 5347 | /* Originally, this is how many times we CAN jump. */ |
| 5386 | if (mcnt != 0) | 5348 | if (mcnt != 0) |
| 5387 | { | 5349 | { |
| 5350 | /* Please don't add a cast to try and shut up GCC. */ | ||
| 5351 | unsigned char *p2 = p + 2; /* Location of the counter. */ | ||
| 5388 | mcnt--; | 5352 | mcnt--; |
| 5389 | PUSH_FAILURE_COUNT (p + 2); | 5353 | PUSH_NUMBER (p2, mcnt); |
| 5390 | STORE_NUMBER (p + 2, mcnt); | ||
| 5391 | goto unconditional_jump; | 5354 | goto unconditional_jump; |
| 5392 | } | 5355 | } |
| 5393 | /* If don't have to jump any more, skip over the rest of command. */ | 5356 | /* If don't have to jump any more, skip over the rest of command. */ |
| @@ -5397,14 +5360,16 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5397 | 5360 | ||
| 5398 | case set_number_at: | 5361 | case set_number_at: |
| 5399 | { | 5362 | { |
| 5363 | unsigned char *p2; /* Location of the counter. */ | ||
| 5400 | DEBUG_PRINT1 ("EXECUTING set_number_at.\n"); | 5364 | DEBUG_PRINT1 ("EXECUTING set_number_at.\n"); |
| 5401 | 5365 | ||
| 5402 | EXTRACT_NUMBER_AND_INCR (mcnt, p); | 5366 | EXTRACT_NUMBER_AND_INCR (mcnt, p); |
| 5403 | p1 = p + mcnt; | 5367 | /* Please don't add a cast to try and shut up GCC. */ |
| 5368 | p2 = p + mcnt; | ||
| 5369 | /* Signedness doesn't matter since we only copy MCNT's bits . */ | ||
| 5404 | EXTRACT_NUMBER_AND_INCR (mcnt, p); | 5370 | EXTRACT_NUMBER_AND_INCR (mcnt, p); |
| 5405 | DEBUG_PRINT3 (" Setting %p to %d.\n", p1, mcnt); | 5371 | DEBUG_PRINT3 (" Setting %p to %d.\n", p2, mcnt); |
| 5406 | PUSH_FAILURE_COUNT (p1); | 5372 | PUSH_NUMBER (p2, mcnt); |
| 5407 | STORE_NUMBER (p1, mcnt); | ||
| 5408 | break; | 5373 | break; |
| 5409 | } | 5374 | } |
| 5410 | 5375 | ||
| @@ -5422,7 +5387,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5422 | { | 5387 | { |
| 5423 | /* C1 is the character before D, S1 is the syntax of C1, C2 | 5388 | /* C1 is the character before D, S1 is the syntax of C1, C2 |
| 5424 | is the character at D, and S2 is the syntax of C2. */ | 5389 | is the character at D, and S2 is the syntax of C2. */ |
| 5425 | int c1, c2, s1, s2; | 5390 | re_wchar_t c1, c2; |
| 5391 | int s1, s2; | ||
| 5426 | #ifdef emacs | 5392 | #ifdef emacs |
| 5427 | int offset = PTR_TO_OFFSET (d - 1); | 5393 | int offset = PTR_TO_OFFSET (d - 1); |
| 5428 | int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); | 5394 | int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); |
| @@ -5461,7 +5427,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5461 | { | 5427 | { |
| 5462 | /* C1 is the character before D, S1 is the syntax of C1, C2 | 5428 | /* C1 is the character before D, S1 is the syntax of C1, C2 |
| 5463 | is the character at D, and S2 is the syntax of C2. */ | 5429 | is the character at D, and S2 is the syntax of C2. */ |
| 5464 | int c1, c2, s1, s2; | 5430 | re_wchar_t c1, c2; |
| 5431 | int s1, s2; | ||
| 5465 | #ifdef emacs | 5432 | #ifdef emacs |
| 5466 | int offset = PTR_TO_OFFSET (d); | 5433 | int offset = PTR_TO_OFFSET (d); |
| 5467 | int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); | 5434 | int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); |
| @@ -5504,7 +5471,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5504 | { | 5471 | { |
| 5505 | /* C1 is the character before D, S1 is the syntax of C1, C2 | 5472 | /* C1 is the character before D, S1 is the syntax of C1, C2 |
| 5506 | is the character at D, and S2 is the syntax of C2. */ | 5473 | is the character at D, and S2 is the syntax of C2. */ |
| 5507 | int c1, c2, s1, s2; | 5474 | re_wchar_t c1, c2; |
| 5475 | int s1, s2; | ||
| 5508 | #ifdef emacs | 5476 | #ifdef emacs |
| 5509 | int offset = PTR_TO_OFFSET (d) - 1; | 5477 | int offset = PTR_TO_OFFSET (d) - 1; |
| 5510 | int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); | 5478 | int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); |
| @@ -5549,7 +5517,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5549 | } | 5517 | } |
| 5550 | #endif | 5518 | #endif |
| 5551 | { | 5519 | { |
| 5552 | int c, len; | 5520 | int len; |
| 5521 | re_wchar_t c; | ||
| 5553 | 5522 | ||
| 5554 | c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len); | 5523 | c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len); |
| 5555 | 5524 | ||
| @@ -5585,7 +5554,9 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5585 | DEBUG_PRINT3 ("EXECUTING %scategoryspec %d.\n", not?"not":"", mcnt); | 5554 | DEBUG_PRINT3 ("EXECUTING %scategoryspec %d.\n", not?"not":"", mcnt); |
| 5586 | PREFETCH (); | 5555 | PREFETCH (); |
| 5587 | { | 5556 | { |
| 5588 | int c, len; | 5557 | int len; |
| 5558 | re_wchar_t c; | ||
| 5559 | |||
| 5589 | c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len); | 5560 | c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len); |
| 5590 | 5561 | ||
| 5591 | if ((!CHAR_HAS_CATEGORY (c, mcnt)) ^ not) | 5562 | if ((!CHAR_HAS_CATEGORY (c, mcnt)) ^ not) |
| @@ -5607,8 +5578,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5607 | IMMEDIATE_QUIT_CHECK; | 5578 | IMMEDIATE_QUIT_CHECK; |
| 5608 | if (!FAIL_STACK_EMPTY ()) | 5579 | if (!FAIL_STACK_EMPTY ()) |
| 5609 | { | 5580 | { |
| 5610 | re_char *str; | 5581 | re_char *str, *pat; |
| 5611 | unsigned char *pat; | ||
| 5612 | /* A restart point is known. Restore to that state. */ | 5582 | /* A restart point is known. Restore to that state. */ |
| 5613 | DEBUG_PRINT1 ("\nFAIL:\n"); | 5583 | DEBUG_PRINT1 ("\nFAIL:\n"); |
| 5614 | POP_FAILURE_POINT (str, pat); | 5584 | POP_FAILURE_POINT (str, pat); |
| @@ -5678,7 +5648,7 @@ bcmp_translate (s1, s2, len, translate, multibyte) | |||
| 5678 | while (p1 < p1_end && p2 < p2_end) | 5648 | while (p1 < p1_end && p2 < p2_end) |
| 5679 | { | 5649 | { |
| 5680 | int p1_charlen, p2_charlen; | 5650 | int p1_charlen, p2_charlen; |
| 5681 | int p1_ch, p2_ch; | 5651 | re_wchar_t p1_ch, p2_ch; |
| 5682 | 5652 | ||
| 5683 | p1_ch = RE_STRING_CHAR_AND_LENGTH (p1, p1_end - p1, p1_charlen); | 5653 | p1_ch = RE_STRING_CHAR_AND_LENGTH (p1, p1_end - p1, p1_charlen); |
| 5684 | p2_ch = RE_STRING_CHAR_AND_LENGTH (p2, p2_end - p2, p2_charlen); | 5654 | p2_ch = RE_STRING_CHAR_AND_LENGTH (p2, p2_end - p2, p2_charlen); |