aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorStefan Monnier2000-10-26 00:45:01 +0000
committerStefan Monnier2000-10-26 00:45:01 +0000
commit0161849810909a9971a78f298e3da2d0dc36d15a (patch)
treeaa8e96790f762599ed9484746eb03cff173262a4 /src
parentd97151cb57618491364b74fccef2a4a67fd4d78b (diff)
downloademacs-0161849810909a9971a78f298e3da2d0dc36d15a.tar.gz
emacs-0161849810909a9971a78f298e3da2d0dc36d15a.zip
More `unsigned char' -> `re_char' changes.
Also change several `int' into `re_wchar_t'. (PATTERN_STACK_EMPTY, PUSH_PATTERN_OP, POP_PATTERN_OP): Remove. (PUSH_FAILURE_POINTER): Don't cast any more. (POP_FAILURE_REG_OR_COUNT): Remove the cast that strips `const'. We want GCC to complain, since this piece of code makes re_match non-reentrant, which *should* be fixed. (GET_BUFFER_SPACE): Use size_t rather than unsigned long. (EXTEND_BUFFER): Use RETALLOC. (SET_LIST_BIT): Don't cast. (re_wchar_t): New type. (re_iswctype, re_wctype_to_bit): Make it crystal clear to GCC that those two functions will always properly return. (IMMEDIATE_QUIT_CHECK): Cast to void. (analyse_first): Use recursion rather than an explicit stack. (re_compile_fastmap): Can't fail anymore. (re_search_2): Don't check re_compile_fastmap for failure. (PUSH_NUMBER): Renamed from PUSH_FAILURE_COUNT. Now also sets the new value (passed in a new argument). (re_match_2_internal): Use it. Also, use a new var `reg' of type size_t when looping through regs rather than reuse the inappropriate `mcnt'.
Diffstat (limited to 'src')
-rw-r--r--src/regex.c360
1 files changed, 165 insertions, 195 deletions
diff --git a/src/regex.c b/src/regex.c
index fab989813d5..e10a3565f2f 100644
--- a/src/regex.c
+++ b/src/regex.c
@@ -22,10 +22,9 @@
22/* TODO: 22/* TODO:
23 - structure the opcode space into opcode+flag. 23 - structure the opcode space into opcode+flag.
24 - merge with glibc's regex.[ch]. 24 - merge with glibc's regex.[ch].
25 - replace succeed_n + jump_n with a combined operation so that the counter 25 - replace (succeed_n + jump_n + set_number_at) with something that doesn't
26 can simply be decremented when popping the failure_point without having 26 need to modify the compiled regexp.
27 to stack up failure_count entries. 27*/
28 */
29 28
30/* AIX requires this to be the first thing in the file. */ 29/* AIX requires this to be the first thing in the file. */
31#if defined _AIX && !defined REGEX_MALLOC 30#if defined _AIX && !defined REGEX_MALLOC
@@ -553,7 +552,7 @@ typedef enum
553 is followed by a range table: 552 is followed by a range table:
554 2 bytes of flags for character sets (low 8 bits, high 8 bits) 553 2 bytes of flags for character sets (low 8 bits, high 8 bits)
555 See RANGE_TABLE_WORK_BITS below. 554 See RANGE_TABLE_WORK_BITS below.
556 2 bytes, the number of pairs that follow 555 2 bytes, the number of pairs that follow (upto 32767)
557 pairs, each 2 multibyte characters, 556 pairs, each 2 multibyte characters,
558 each multibyte character represented as 3 bytes. */ 557 each multibyte character represented as 3 bytes. */
559 charset, 558 charset,
@@ -700,7 +699,7 @@ static void extract_number _RE_ARGS ((int *dest, re_char *source));
700static void 699static void
701extract_number (dest, source) 700extract_number (dest, source)
702 int *dest; 701 int *dest;
703 unsigned char *source; 702 re_char *source;
704{ 703{
705 int temp = SIGN_EXTEND_CHAR (*(source + 1)); 704 int temp = SIGN_EXTEND_CHAR (*(source + 1));
706 *dest = *source & 0377; 705 *dest = *source & 0377;
@@ -729,7 +728,7 @@ static void extract_number_and_incr _RE_ARGS ((int *destination,
729static void 728static void
730extract_number_and_incr (destination, source) 729extract_number_and_incr (destination, source)
731 int *destination; 730 int *destination;
732 unsigned char **source; 731 re_char **source;
733{ 732{
734 extract_number (destination, *source); 733 extract_number (destination, *source);
735 *source += 2; 734 *source += 2;
@@ -803,9 +802,9 @@ extract_number_and_incr (destination, source)
803#define CHARSET_LOOKUP_RANGE_TABLE_RAW(not, c, range_table, count) \ 802#define CHARSET_LOOKUP_RANGE_TABLE_RAW(not, c, range_table, count) \
804 do \ 803 do \
805 { \ 804 { \
806 int range_start, range_end; \ 805 re_wchar_t range_start, range_end; \
807 unsigned char *p; \ 806 re_char *p; \
808 unsigned char *range_table_end \ 807 re_char *range_table_end \
809 = CHARSET_RANGE_TABLE_END ((range_table), (count)); \ 808 = CHARSET_RANGE_TABLE_END ((range_table), (count)); \
810 \ 809 \
811 for (p = (range_table); p < range_table_end; p += 2 * 3) \ 810 for (p = (range_table); p < range_table_end; p += 2 * 3) \
@@ -829,8 +828,8 @@ extract_number_and_incr (destination, source)
829 { \ 828 { \
830 /* Number of ranges in range table. */ \ 829 /* Number of ranges in range table. */ \
831 int count; \ 830 int count; \
832 unsigned char *range_table = CHARSET_RANGE_TABLE (charset); \ 831 re_char *range_table = CHARSET_RANGE_TABLE (charset); \
833 \ 832 \
834 EXTRACT_NUMBER_AND_INCR (count, range_table); \ 833 EXTRACT_NUMBER_AND_INCR (count, range_table); \
835 CHARSET_LOOKUP_RANGE_TABLE_RAW ((not), (c), range_table, count); \ 834 CHARSET_LOOKUP_RANGE_TABLE_RAW ((not), (c), range_table, count); \
836 } \ 835 } \
@@ -899,12 +898,12 @@ print_fastmap (fastmap)
899 898
900void 899void
901print_partial_compiled_pattern (start, end) 900print_partial_compiled_pattern (start, end)
902 unsigned char *start; 901 re_char *start;
903 unsigned char *end; 902 re_char *end;
904{ 903{
905 int mcnt, mcnt2; 904 int mcnt, mcnt2;
906 unsigned char *p = start; 905 re_char *p = start;
907 unsigned char *pend = end; 906 re_char *pend = end;
908 907
909 if (start == NULL) 908 if (start == NULL)
910 { 909 {
@@ -1142,7 +1141,7 @@ void
1142print_compiled_pattern (bufp) 1141print_compiled_pattern (bufp)
1143 struct re_pattern_buffer *bufp; 1142 struct re_pattern_buffer *bufp;
1144{ 1143{
1145 unsigned char *buffer = bufp->buffer; 1144 re_char *buffer = bufp->buffer;
1146 1145
1147 print_partial_compiled_pattern (buffer, buffer + bufp->used); 1146 print_partial_compiled_pattern (buffer, buffer + bufp->used);
1148 printf ("%ld bytes used/%ld bytes allocated.\n", 1147 printf ("%ld bytes used/%ld bytes allocated.\n",
@@ -1326,7 +1325,7 @@ size_t re_max_failures = 4000;
1326 1325
1327union fail_stack_elt 1326union fail_stack_elt
1328{ 1327{
1329 const unsigned char *pointer; 1328 re_char *pointer;
1330 /* This should be the biggest `int' that's no bigger than a pointer. */ 1329 /* This should be the biggest `int' that's no bigger than a pointer. */
1331 long integer; 1330 long integer;
1332}; 1331};
@@ -1341,7 +1340,6 @@ typedef struct
1341 size_t frame; /* Offset of the cur constructed frame. */ 1340 size_t frame; /* Offset of the cur constructed frame. */
1342} fail_stack_type; 1341} fail_stack_type;
1343 1342
1344#define PATTERN_STACK_EMPTY() (fail_stack.avail == 0)
1345#define FAIL_STACK_EMPTY() (fail_stack.frame == 0) 1343#define FAIL_STACK_EMPTY() (fail_stack.frame == 0)
1346#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size) 1344#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
1347 1345
@@ -1413,22 +1411,11 @@ typedef struct
1413 1))) 1411 1)))
1414 1412
1415 1413
1416/* Push pointer POINTER on FAIL_STACK.
1417 Return 1 if was able to do so and 0 if ran out of memory allocating
1418 space to do so. */
1419#define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \
1420 ((FAIL_STACK_FULL () \
1421 && !GROW_FAIL_STACK (FAIL_STACK)) \
1422 ? 0 \
1423 : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \
1424 1))
1425#define POP_PATTERN_OP() POP_FAILURE_POINTER ()
1426
1427/* Push a pointer value onto the failure stack. 1414/* Push a pointer value onto the failure stack.
1428 Assumes the variable `fail_stack'. Probably should only 1415 Assumes the variable `fail_stack'. Probably should only
1429 be called from within `PUSH_FAILURE_POINT'. */ 1416 be called from within `PUSH_FAILURE_POINT'. */
1430#define PUSH_FAILURE_POINTER(item) \ 1417#define PUSH_FAILURE_POINTER(item) \
1431 fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item) 1418 fail_stack.stack[fail_stack.avail++].pointer = (item)
1432 1419
1433/* This pushes an integer-valued item onto the failure stack. 1420/* This pushes an integer-valued item onto the failure stack.
1434 Assumes the variable `fail_stack'. Probably should only 1421 Assumes the variable `fail_stack'. Probably should only
@@ -1478,16 +1465,19 @@ do { \
1478 PUSH_FAILURE_INT (num); \ 1465 PUSH_FAILURE_INT (num); \
1479} while (0) 1466} while (0)
1480 1467
1481#define PUSH_FAILURE_COUNT(ptr) \ 1468/* Change the counter's value to VAL, but make sure that it will
1469 be reset when backtracking. */
1470#define PUSH_NUMBER(ptr,val) \
1482do { \ 1471do { \
1483 char *destination; \ 1472 char *destination; \
1484 int c; \ 1473 int c; \
1485 ENSURE_FAIL_STACK(3); \ 1474 ENSURE_FAIL_STACK(3); \
1486 EXTRACT_NUMBER (c, ptr); \ 1475 EXTRACT_NUMBER (c, ptr); \
1487 DEBUG_PRINT3 (" Push counter %p = %d\n", ptr, c); \ 1476 DEBUG_PRINT4 (" Push number %p = %d -> %d\n", ptr, c, val); \
1488 PUSH_FAILURE_INT (c); \ 1477 PUSH_FAILURE_INT (c); \
1489 PUSH_FAILURE_POINTER (ptr); \ 1478 PUSH_FAILURE_POINTER (ptr); \
1490 PUSH_FAILURE_INT (-1); \ 1479 PUSH_FAILURE_INT (-1); \
1480 STORE_NUMBER (ptr, val); \
1491} while (0) 1481} while (0)
1492 1482
1493/* Pop a saved register off the stack. */ 1483/* Pop a saved register off the stack. */
@@ -1497,7 +1487,9 @@ do { \
1497 if (reg == -1) \ 1487 if (reg == -1) \
1498 { \ 1488 { \
1499 /* It's a counter. */ \ 1489 /* It's a counter. */ \
1500 unsigned char *ptr = (unsigned char*) POP_FAILURE_POINTER (); \ 1490 /* Here, we discard `const', which makes re_match non-reentrant. \
1491 Gcc gives a warning for it, which is good. */ \
1492 unsigned char *ptr = POP_FAILURE_POINTER (); \
1501 reg = POP_FAILURE_INT (); \ 1493 reg = POP_FAILURE_INT (); \
1502 STORE_NUMBER (ptr, reg); \ 1494 STORE_NUMBER (ptr, reg); \
1503 DEBUG_PRINT3 (" Pop counter %p = %d\n", ptr, reg); \ 1495 DEBUG_PRINT3 (" Pop counter %p = %d\n", ptr, reg); \
@@ -1603,14 +1595,14 @@ do { \
1603 while (fail_stack.frame < fail_stack.avail) \ 1595 while (fail_stack.frame < fail_stack.avail) \
1604 POP_FAILURE_REG_OR_COUNT (); \ 1596 POP_FAILURE_REG_OR_COUNT (); \
1605 \ 1597 \
1606 pat = (unsigned char *) POP_FAILURE_POINTER (); \ 1598 pat = POP_FAILURE_POINTER (); \
1607 DEBUG_PRINT2 (" Popping pattern %p: ", pat); \ 1599 DEBUG_PRINT2 (" Popping pattern %p: ", pat); \
1608 DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ 1600 DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
1609 \ 1601 \
1610 /* If the saved string location is NULL, it came from an \ 1602 /* If the saved string location is NULL, it came from an \
1611 on_failure_keep_string_jump opcode, and we want to throw away the \ 1603 on_failure_keep_string_jump opcode, and we want to throw away the \
1612 saved NULL, thus retaining our current position in the string. */ \ 1604 saved NULL, thus retaining our current position in the string. */ \
1613 str = (re_char *) POP_FAILURE_POINTER (); \ 1605 str = POP_FAILURE_POINTER (); \
1614 DEBUG_PRINT2 (" Popping string %p: `", str); \ 1606 DEBUG_PRINT2 (" Popping string %p: `", str); \
1615 DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \ 1607 DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
1616 DEBUG_PRINT1 ("'\n"); \ 1608 DEBUG_PRINT1 ("'\n"); \
@@ -1641,20 +1633,18 @@ static void insert_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1641 int arg, unsigned char *end)); 1633 int arg, unsigned char *end));
1642static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc, 1634static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1643 int arg1, int arg2, unsigned char *end)); 1635 int arg1, int arg2, unsigned char *end));
1644static boolean at_begline_loc_p _RE_ARGS ((const unsigned char *pattern, 1636static boolean at_begline_loc_p _RE_ARGS ((re_char *pattern,
1645 const unsigned char *p, 1637 re_char *p,
1646 reg_syntax_t syntax)); 1638 reg_syntax_t syntax));
1647static boolean at_endline_loc_p _RE_ARGS ((const unsigned char *p, 1639static boolean at_endline_loc_p _RE_ARGS ((re_char *p,
1648 const unsigned char *pend, 1640 re_char *pend,
1649 reg_syntax_t syntax)); 1641 reg_syntax_t syntax));
1650static unsigned char *skip_one_char _RE_ARGS ((unsigned char *p)); 1642static re_char *skip_one_char _RE_ARGS ((re_char *p));
1651static int analyse_first _RE_ARGS ((unsigned char *p, unsigned char *pend, 1643static int analyse_first _RE_ARGS ((re_char *p, re_char *pend,
1652 char *fastmap, const int multibyte)); 1644 char *fastmap, const int multibyte));
1653 1645
1654/* Fetch the next character in the uncompiled pattern---translating it 1646/* Fetch the next character in the uncompiled pattern---translating it
1655 if necessary. Also cast from a signed character in the constant 1647 if necessary. */
1656 string passed to us by the user to an unsigned char that we can use
1657 as an array index (in, e.g., `translate'). */
1658#define PATFETCH(c) \ 1648#define PATFETCH(c) \
1659 do { \ 1649 do { \
1660 PATFETCH_RAW (c); \ 1650 PATFETCH_RAW (c); \
@@ -1689,7 +1679,7 @@ static int analyse_first _RE_ARGS ((unsigned char *p, unsigned char *pend,
1689 1679
1690/* Make sure we have at least N more bytes of space in buffer. */ 1680/* Make sure we have at least N more bytes of space in buffer. */
1691#define GET_BUFFER_SPACE(n) \ 1681#define GET_BUFFER_SPACE(n) \
1692 while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated) \ 1682 while ((size_t) (b - bufp->buffer + (n)) > bufp->allocated) \
1693 EXTEND_BUFFER () 1683 EXTEND_BUFFER ()
1694 1684
1695/* Make sure we have one more byte of buffer space and then add C to it. */ 1685/* Make sure we have one more byte of buffer space and then add C to it. */
@@ -1778,13 +1768,13 @@ static int analyse_first _RE_ARGS ((unsigned char *p, unsigned char *pend,
1778#endif 1768#endif
1779#define EXTEND_BUFFER() \ 1769#define EXTEND_BUFFER() \
1780 do { \ 1770 do { \
1781 unsigned char *old_buffer = bufp->buffer; \ 1771 re_char *old_buffer = bufp->buffer; \
1782 if (bufp->allocated == MAX_BUF_SIZE) \ 1772 if (bufp->allocated == MAX_BUF_SIZE) \
1783 return REG_ESIZE; \ 1773 return REG_ESIZE; \
1784 bufp->allocated <<= 1; \ 1774 bufp->allocated <<= 1; \
1785 if (bufp->allocated > MAX_BUF_SIZE) \ 1775 if (bufp->allocated > MAX_BUF_SIZE) \
1786 bufp->allocated = MAX_BUF_SIZE; \ 1776 bufp->allocated = MAX_BUF_SIZE; \
1787 bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\ 1777 RETALLOC (bufp->buffer, bufp->allocated, unsigned char); \
1788 if (bufp->buffer == NULL) \ 1778 if (bufp->buffer == NULL) \
1789 return REG_ESPACE; \ 1779 return REG_ESPACE; \
1790 /* If the buffer moved, move all the pointers into it. */ \ 1780 /* If the buffer moved, move all the pointers into it. */ \
@@ -1907,9 +1897,7 @@ struct range_table_work_area
1907 1897
1908 1898
1909/* Set the bit for character C in a list. */ 1899/* Set the bit for character C in a list. */
1910#define SET_LIST_BIT(c) \ 1900#define SET_LIST_BIT(c) (b[((c)) / BYTEWIDTH] |= 1 << ((c) % BYTEWIDTH))
1911 (b[((unsigned char) (c)) / BYTEWIDTH] \
1912 |= 1 << (((unsigned char) c) % BYTEWIDTH))
1913 1901
1914 1902
1915/* Get the next unsigned number in the uncompiled pattern. */ 1903/* Get the next unsigned number in the uncompiled pattern. */
@@ -1940,6 +1928,7 @@ struct range_table_work_area
1940# define CHAR_CLASS_MAX_LENGTH 256 1928# define CHAR_CLASS_MAX_LENGTH 256
1941# endif 1929# endif
1942typedef wctype_t re_wctype_t; 1930typedef wctype_t re_wctype_t;
1931typedef wchar_t re_wchar_t;
1943# define re_wctype wctype 1932# define re_wctype wctype
1944# define re_iswctype iswctype 1933# define re_iswctype iswctype
1945# define re_wctype_to_bit(cc) 0 1934# define re_wctype_to_bit(cc) 0
@@ -1947,7 +1936,7 @@ typedef wctype_t re_wctype_t;
1947# define CHAR_CLASS_MAX_LENGTH 9 /* Namely, `multibyte'. */ 1936# define CHAR_CLASS_MAX_LENGTH 9 /* Namely, `multibyte'. */
1948# define btowc(c) c 1937# define btowc(c) c
1949 1938
1950/* Character classes' indices. */ 1939/* Character classes. */
1951typedef enum { RECC_ERROR = 0, 1940typedef enum { RECC_ERROR = 0,
1952 RECC_ALNUM, RECC_ALPHA, RECC_WORD, 1941 RECC_ALNUM, RECC_ALPHA, RECC_WORD,
1953 RECC_GRAPH, RECC_PRINT, 1942 RECC_GRAPH, RECC_PRINT,
@@ -1959,10 +1948,12 @@ typedef enum { RECC_ERROR = 0,
1959 RECC_ASCII, RECC_UNIBYTE 1948 RECC_ASCII, RECC_UNIBYTE
1960} re_wctype_t; 1949} re_wctype_t;
1961 1950
1951typedef int re_wchar_t;
1952
1962/* Map a string to the char class it names (if any). */ 1953/* Map a string to the char class it names (if any). */
1963static re_wctype_t 1954static re_wctype_t
1964re_wctype (string) 1955re_wctype (string)
1965 unsigned char *string; 1956 re_char *string;
1966{ 1957{
1967 if (STREQ (string, "alnum")) return RECC_ALNUM; 1958 if (STREQ (string, "alnum")) return RECC_ALNUM;
1968 else if (STREQ (string, "alpha")) return RECC_ALPHA; 1959 else if (STREQ (string, "alpha")) return RECC_ALPHA;
@@ -1990,27 +1981,30 @@ re_iswctype (ch, cc)
1990 int ch; 1981 int ch;
1991 re_wctype_t cc; 1982 re_wctype_t cc;
1992{ 1983{
1984 boolean ret = false;
1985
1993 switch (cc) 1986 switch (cc)
1994 { 1987 {
1995 case RECC_ALNUM: return ISALNUM (ch); 1988 case RECC_ALNUM: ret = ISALNUM (ch);
1996 case RECC_ALPHA: return ISALPHA (ch); 1989 case RECC_ALPHA: ret = ISALPHA (ch);
1997 case RECC_BLANK: return ISBLANK (ch); 1990 case RECC_BLANK: ret = ISBLANK (ch);
1998 case RECC_CNTRL: return ISCNTRL (ch); 1991 case RECC_CNTRL: ret = ISCNTRL (ch);
1999 case RECC_DIGIT: return ISDIGIT (ch); 1992 case RECC_DIGIT: ret = ISDIGIT (ch);
2000 case RECC_GRAPH: return ISGRAPH (ch); 1993 case RECC_GRAPH: ret = ISGRAPH (ch);
2001 case RECC_LOWER: return ISLOWER (ch); 1994 case RECC_LOWER: ret = ISLOWER (ch);
2002 case RECC_PRINT: return ISPRINT (ch); 1995 case RECC_PRINT: ret = ISPRINT (ch);
2003 case RECC_PUNCT: return ISPUNCT (ch); 1996 case RECC_PUNCT: ret = ISPUNCT (ch);
2004 case RECC_SPACE: return ISSPACE (ch); 1997 case RECC_SPACE: ret = ISSPACE (ch);
2005 case RECC_UPPER: return ISUPPER (ch); 1998 case RECC_UPPER: ret = ISUPPER (ch);
2006 case RECC_XDIGIT: return ISXDIGIT (ch); 1999 case RECC_XDIGIT: ret = ISXDIGIT (ch);
2007 case RECC_ASCII: return IS_REAL_ASCII (ch); 2000 case RECC_ASCII: ret = IS_REAL_ASCII (ch);
2008 case RECC_NONASCII: return !IS_REAL_ASCII (ch); 2001 case RECC_NONASCII: ret = !IS_REAL_ASCII (ch);
2009 case RECC_UNIBYTE: return ISUNIBYTE (ch); 2002 case RECC_UNIBYTE: ret = ISUNIBYTE (ch);
2010 case RECC_MULTIBYTE: return !ISUNIBYTE (ch); 2003 case RECC_MULTIBYTE: ret = !ISUNIBYTE (ch);
2011 case RECC_WORD: return ISWORD (ch); 2004 case RECC_WORD: ret = ISWORD (ch);
2012 case RECC_ERROR: return false; 2005 case RECC_ERROR: ret = false;
2013 } 2006 }
2007 return ret;
2014} 2008}
2015 2009
2016/* Return a bit-pattern to use in the range-table bits to match multibyte 2010/* Return a bit-pattern to use in the range-table bits to match multibyte
@@ -2019,18 +2013,21 @@ static int
2019re_wctype_to_bit (cc) 2013re_wctype_to_bit (cc)
2020 re_wctype_t cc; 2014 re_wctype_t cc;
2021{ 2015{
2016 int ret = 0;
2017
2022 switch (cc) 2018 switch (cc)
2023 { 2019 {
2024 case RECC_NONASCII: case RECC_PRINT: case RECC_GRAPH: 2020 case RECC_NONASCII: case RECC_PRINT: case RECC_GRAPH:
2025 case RECC_MULTIBYTE: return BIT_MULTIBYTE; 2021 case RECC_MULTIBYTE: ret = BIT_MULTIBYTE;
2026 case RECC_ALPHA: case RECC_ALNUM: case RECC_WORD: return BIT_WORD; 2022 case RECC_ALPHA: case RECC_ALNUM: case RECC_WORD: ret = BIT_WORD;
2027 case RECC_LOWER: return BIT_LOWER; 2023 case RECC_LOWER: ret = BIT_LOWER;
2028 case RECC_UPPER: return BIT_UPPER; 2024 case RECC_UPPER: ret = BIT_UPPER;
2029 case RECC_PUNCT: return BIT_PUNCT; 2025 case RECC_PUNCT: ret = BIT_PUNCT;
2030 case RECC_SPACE: return BIT_SPACE; 2026 case RECC_SPACE: ret = BIT_SPACE;
2031 case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL: 2027 case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL:
2032 case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0; 2028 case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: ret = 0;
2033 } 2029 }
2030 return ret;
2034} 2031}
2035#endif 2032#endif
2036 2033
@@ -2042,7 +2039,7 @@ extern int immediate_quit;
2042 if (immediate_quit) QUIT; \ 2039 if (immediate_quit) QUIT; \
2043 } while (0) 2040 } while (0)
2044#else 2041#else
2045# define IMMEDIATE_QUIT_CHECK (0) 2042# define IMMEDIATE_QUIT_CHECK ((void)0)
2046#endif 2043#endif
2047 2044
2048#ifndef MATCH_MAY_ALLOCATE 2045#ifndef MATCH_MAY_ALLOCATE
@@ -2129,10 +2126,8 @@ regex_compile (pattern, size, syntax, bufp)
2129 reg_syntax_t syntax; 2126 reg_syntax_t syntax;
2130 struct re_pattern_buffer *bufp; 2127 struct re_pattern_buffer *bufp;
2131{ 2128{
2132 /* We fetch characters from PATTERN here. Even though PATTERN is 2129 /* We fetch characters from PATTERN here. */
2133 `char *' (i.e., signed), we declare these variables as unsigned, so 2130 register re_wchar_t c, c1;
2134 they can be reliably used as array indices. */
2135 register unsigned int c, c1;
2136 2131
2137 /* A random temporary spot in PATTERN. */ 2132 /* A random temporary spot in PATTERN. */
2138 re_char *p1; 2133 re_char *p1;
@@ -2359,6 +2354,7 @@ regex_compile (pattern, size, syntax, bufp)
2359 boolean simple = skip_one_char (laststart) == b; 2354 boolean simple = skip_one_char (laststart) == b;
2360 unsigned int startoffset = 0; 2355 unsigned int startoffset = 0;
2361 re_opcode_t ofj = 2356 re_opcode_t ofj =
2357 /* Check if the loop can match the empty string. */
2362 (simple || !analyse_first (laststart, b, NULL, 0)) ? 2358 (simple || !analyse_first (laststart, b, NULL, 0)) ?
2363 on_failure_jump : on_failure_jump_loop; 2359 on_failure_jump : on_failure_jump_loop;
2364 assert (skip_one_char (laststart) <= b); 2360 assert (skip_one_char (laststart) <= b);
@@ -2629,7 +2625,7 @@ regex_compile (pattern, size, syntax, bufp)
2629 if (SINGLE_BYTE_CHAR_P (c)) 2625 if (SINGLE_BYTE_CHAR_P (c))
2630 /* ... into bitmap. */ 2626 /* ... into bitmap. */
2631 { 2627 {
2632 unsigned this_char; 2628 re_wchar_t this_char;
2633 int range_start = c, range_end = c1; 2629 int range_start = c, range_end = c1;
2634 2630
2635 /* If the start is after the end, the range is empty. */ 2631 /* If the start is after the end, the range is empty. */
@@ -3365,10 +3361,10 @@ insert_op2 (op, loc, arg1, arg2, end)
3365 3361
3366static boolean 3362static boolean
3367at_begline_loc_p (pattern, p, syntax) 3363at_begline_loc_p (pattern, p, syntax)
3368 const unsigned char *pattern, *p; 3364 re_char *pattern, *p;
3369 reg_syntax_t syntax; 3365 reg_syntax_t syntax;
3370{ 3366{
3371 const unsigned char *prev = p - 2; 3367 re_char *prev = p - 2;
3372 boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\'; 3368 boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
3373 3369
3374 return 3370 return
@@ -3389,12 +3385,12 @@ at_begline_loc_p (pattern, p, syntax)
3389 3385
3390static boolean 3386static boolean
3391at_endline_loc_p (p, pend, syntax) 3387at_endline_loc_p (p, pend, syntax)
3392 const unsigned char *p, *pend; 3388 re_char *p, *pend;
3393 reg_syntax_t syntax; 3389 reg_syntax_t syntax;
3394{ 3390{
3395 const unsigned char *next = p; 3391 re_char *next = p;
3396 boolean next_backslash = *next == '\\'; 3392 boolean next_backslash = *next == '\\';
3397 const unsigned char *next_next = p + 1 < pend ? p + 1 : 0; 3393 re_char *next_next = p + 1 < pend ? p + 1 : 0;
3398 3394
3399 return 3395 return
3400 /* Before a subexpression? */ 3396 /* Before a subexpression? */
@@ -3433,36 +3429,16 @@ group_in_compile_stack (compile_stack, regnum)
3433 3429
3434 Return 1 if p..pend might match the empty string. 3430 Return 1 if p..pend might match the empty string.
3435 Return 0 if p..pend matches at least one char. 3431 Return 0 if p..pend matches at least one char.
3436 Return -1 if p..pend matches at least one char, but fastmap was not 3432 Return -1 if fastmap was not updated accurately. */
3437 updated accurately.
3438 Return -2 if an error occurred. */
3439 3433
3440static int 3434static int
3441analyse_first (p, pend, fastmap, multibyte) 3435analyse_first (p, pend, fastmap, multibyte)
3442 unsigned char *p, *pend; 3436 re_char *p, *pend;
3443 char *fastmap; 3437 char *fastmap;
3444 const int multibyte; 3438 const int multibyte;
3445{ 3439{
3446 int j, k; 3440 int j, k;
3447 boolean not; 3441 boolean not;
3448#ifdef MATCH_MAY_ALLOCATE
3449 fail_stack_type fail_stack;
3450#endif
3451#ifndef REGEX_MALLOC
3452 char *destination;
3453#endif
3454
3455#if defined REL_ALLOC && defined REGEX_MALLOC
3456 /* This holds the pointer to the failure stack, when
3457 it is allocated relocatably. */
3458 fail_stack_elt_t *failure_stack_ptr;
3459#endif
3460
3461 /* Assume that each path through the pattern can be null until
3462 proven otherwise. We set this false at the bottom of switch
3463 statement, to which we get only if a particular path doesn't
3464 match the empty string. */
3465 boolean path_can_be_null = true;
3466 3442
3467 /* If all elements for base leading-codes in fastmap is set, this 3443 /* If all elements for base leading-codes in fastmap is set, this
3468 flag is set true. */ 3444 flag is set true. */
@@ -3470,8 +3446,6 @@ analyse_first (p, pend, fastmap, multibyte)
3470 3446
3471 assert (p); 3447 assert (p);
3472 3448
3473 INIT_FAIL_STACK ();
3474
3475 /* The loop below works as follows: 3449 /* The loop below works as follows:
3476 - It has a working-list kept in the PATTERN_STACK and which basically 3450 - It has a working-list kept in the PATTERN_STACK and which basically
3477 starts by only containing a pointer to the first operation. 3451 starts by only containing a pointer to the first operation.
@@ -3487,7 +3461,7 @@ analyse_first (p, pend, fastmap, multibyte)
3487 so that `p' is monotonically increasing. More to the point, we 3461 so that `p' is monotonically increasing. More to the point, we
3488 never set `p' (or push) anything `<= p1'. */ 3462 never set `p' (or push) anything `<= p1'. */
3489 3463
3490 while (1) 3464 while (p < pend)
3491 { 3465 {
3492 /* `p1' is used as a marker of how far back a `on_failure_jump' 3466 /* `p1' is used as a marker of how far back a `on_failure_jump'
3493 can go without being ignored. It is normally equal to `p' 3467 can go without being ignored. It is normally equal to `p'
@@ -3497,29 +3471,12 @@ analyse_first (p, pend, fastmap, multibyte)
3497 3..9: <body> 3471 3..9: <body>
3498 10: on_failure_jump 3 3472 10: on_failure_jump 3
3499 as used for the *? operator. */ 3473 as used for the *? operator. */
3500 unsigned char *p1 = p; 3474 re_char *p1 = p;
3501
3502 if (p >= pend)
3503 {
3504 if (path_can_be_null)
3505 return (RESET_FAIL_STACK (), 1);
3506
3507 /* We have reached the (effective) end of pattern. */
3508 if (PATTERN_STACK_EMPTY ())
3509 return (RESET_FAIL_STACK (), 0);
3510
3511 p = (unsigned char*) POP_PATTERN_OP ();
3512 path_can_be_null = true;
3513 continue;
3514 }
3515
3516 /* We should never be about to go beyond the end of the pattern. */
3517 assert (p < pend);
3518 3475
3519 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++)) 3476 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
3520 { 3477 {
3521 case succeed: 3478 case succeed:
3522 p = pend; 3479 return 1;
3523 continue; 3480 continue;
3524 3481
3525 case duplicate: 3482 case duplicate:
@@ -3551,7 +3508,7 @@ analyse_first (p, pend, fastmap, multibyte)
3551 /* We could put all the chars except for \n (and maybe \0) 3508 /* We could put all the chars except for \n (and maybe \0)
3552 but we don't bother since it is generally not worth it. */ 3509 but we don't bother since it is generally not worth it. */
3553 if (!fastmap) break; 3510 if (!fastmap) break;
3554 return (RESET_FAIL_STACK (), -1); 3511 return -1;
3555 3512
3556 3513
3557 case charset_not: 3514 case charset_not:
@@ -3626,7 +3583,7 @@ analyse_first (p, pend, fastmap, multibyte)
3626#else /* emacs */ 3583#else /* emacs */
3627 /* This match depends on text properties. These end with 3584 /* This match depends on text properties. These end with
3628 aborting optimizations. */ 3585 aborting optimizations. */
3629 return (RESET_FAIL_STACK (), -1); 3586 return -1;
3630 3587
3631 case categoryspec: 3588 case categoryspec:
3632 case notcategoryspec: 3589 case notcategoryspec:
@@ -3693,8 +3650,14 @@ analyse_first (p, pend, fastmap, multibyte)
3693 EXTRACT_NUMBER_AND_INCR (j, p); 3650 EXTRACT_NUMBER_AND_INCR (j, p);
3694 if (p + j <= p1) 3651 if (p + j <= p1)
3695 ; /* Backward jump to be ignored. */ 3652 ; /* Backward jump to be ignored. */
3696 else if (!PUSH_PATTERN_OP (p + j, fail_stack)) 3653 else
3697 return (RESET_FAIL_STACK (), -2); 3654 { /* We have to look down both arms.
3655 We first go down the "straight" path so as to minimize
3656 stack usage when going through alternatives. */
3657 int r = analyse_first (p, pend, fastmap, multibyte);
3658 if (r) return r;
3659 p += j;
3660 }
3698 continue; 3661 continue;
3699 3662
3700 3663
@@ -3734,15 +3697,13 @@ analyse_first (p, pend, fastmap, multibyte)
3734 3697
3735 /* Getting here means we have found the possible starting 3698 /* Getting here means we have found the possible starting
3736 characters for one path of the pattern -- and that the empty 3699 characters for one path of the pattern -- and that the empty
3737 string does not match. We need not follow this path further. 3700 string does not match. We need not follow this path further. */
3738 Instead, look at the next alternative (remembered on the 3701 return 0;
3739 stack), or quit if no more. The test at the top of the loop
3740 does these things. */
3741 path_can_be_null = false;
3742 p = pend;
3743 } /* while p */ 3702 } /* while p */
3744 3703
3745 return (RESET_FAIL_STACK (), 0); 3704 /* We reached the end without matching anything. */
3705 return 1;
3706
3746} /* analyse_first */ 3707} /* analyse_first */
3747 3708
3748/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in 3709/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
@@ -3777,8 +3738,6 @@ re_compile_fastmap (bufp)
3777 analysis = analyse_first (bufp->buffer, bufp->buffer + bufp->used, 3738 analysis = analyse_first (bufp->buffer, bufp->buffer + bufp->used,
3778 fastmap, RE_MULTIBYTE_P (bufp)); 3739 fastmap, RE_MULTIBYTE_P (bufp));
3779 bufp->can_be_null = (analysis != 0); 3740 bufp->can_be_null = (analysis != 0);
3780 if (analysis < -1)
3781 return analysis;
3782 return 0; 3741 return 0;
3783} /* re_compile_fastmap */ 3742} /* re_compile_fastmap */
3784 3743
@@ -3921,8 +3880,7 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
3921 3880
3922 /* Update the fastmap now if not correct already. */ 3881 /* Update the fastmap now if not correct already. */
3923 if (fastmap && !bufp->fastmap_accurate) 3882 if (fastmap && !bufp->fastmap_accurate)
3924 if (re_compile_fastmap (bufp) == -2) 3883 re_compile_fastmap (bufp);
3925 return -2;
3926 3884
3927 /* See whether the pattern is anchored. */ 3885 /* See whether the pattern is anchored. */
3928 anchored_start = (bufp->buffer[0] == begline); 3886 anchored_start = (bufp->buffer[0] == begline);
@@ -3958,7 +3916,7 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
3958 if (fastmap && startpos < total_size && !bufp->can_be_null) 3916 if (fastmap && startpos < total_size && !bufp->can_be_null)
3959 { 3917 {
3960 register re_char *d; 3918 register re_char *d;
3961 register unsigned int buf_ch; 3919 register re_wchar_t buf_ch;
3962 3920
3963 d = POS_ADDR_VSTRING (startpos); 3921 d = POS_ADDR_VSTRING (startpos);
3964 3922
@@ -4191,9 +4149,9 @@ static int bcmp_translate _RE_ARGS((re_char *s1, re_char *s2,
4191 4149
4192/* If the operation is a match against one or more chars, 4150/* If the operation is a match against one or more chars,
4193 return a pointer to the next operation, else return NULL. */ 4151 return a pointer to the next operation, else return NULL. */
4194static unsigned char * 4152static re_char *
4195skip_one_char (p) 4153skip_one_char (p)
4196 unsigned char *p; 4154 re_char *p;
4197{ 4155{
4198 switch (SWITCH_ENUM_CAST (*p++)) 4156 switch (SWITCH_ENUM_CAST (*p++))
4199 { 4157 {
@@ -4303,7 +4261,7 @@ mutually_exclusive_p (bufp, p1, p2)
4303 case endline: 4261 case endline:
4304 case exactn: 4262 case exactn:
4305 { 4263 {
4306 register unsigned int c 4264 register re_wchar_t c
4307 = (re_opcode_t) *p2 == endline ? '\n' 4265 = (re_opcode_t) *p2 == endline ? '\n'
4308 : RE_STRING_CHAR(p2 + 2, pend - p2 - 2); 4266 : RE_STRING_CHAR(p2 + 2, pend - p2 - 2);
4309 4267
@@ -4525,8 +4483,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4525{ 4483{
4526 /* General temporaries. */ 4484 /* General temporaries. */
4527 int mcnt; 4485 int mcnt;
4486 size_t reg;
4528 boolean not; 4487 boolean not;
4529 unsigned char *p1;
4530 4488
4531 /* Just past the end of the corresponding string. */ 4489 /* Just past the end of the corresponding string. */
4532 re_char *end1, *end2; 4490 re_char *end1, *end2;
@@ -4545,8 +4503,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4545 re_char *dfail; 4503 re_char *dfail;
4546 4504
4547 /* Where we are in the pattern, and the end of the pattern. */ 4505 /* Where we are in the pattern, and the end of the pattern. */
4548 unsigned char *p = bufp->buffer; 4506 re_char *p = bufp->buffer;
4549 register unsigned char *pend = p + bufp->used; 4507 re_char *pend = p + bufp->used;
4550 4508
4551 /* We use this to map every character in the string. */ 4509 /* We use this to map every character in the string. */
4552 RE_TRANSLATE_TYPE translate = bufp->translate; 4510 RE_TRANSLATE_TYPE translate = bufp->translate;
@@ -4655,8 +4613,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4655 /* Initialize subexpression text positions to -1 to mark ones that no 4613 /* Initialize subexpression text positions to -1 to mark ones that no
4656 start_memory/stop_memory has been seen for. Also initialize the 4614 start_memory/stop_memory has been seen for. Also initialize the
4657 register information struct. */ 4615 register information struct. */
4658 for (mcnt = 1; mcnt < num_regs; mcnt++) 4616 for (reg = 1; reg < num_regs; reg++)
4659 regstart[mcnt] = regend[mcnt] = NULL; 4617 regstart[reg] = regend[reg] = NULL;
4660 4618
4661 /* We move `string1' into `string2' if the latter's empty -- but not if 4619 /* We move `string1' into `string2' if the latter's empty -- but not if
4662 `string1' is null. */ 4620 `string1' is null. */
@@ -4758,10 +4716,10 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4758 4716
4759 DEBUG_PRINT1 ("\nSAVING match as best so far.\n"); 4717 DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
4760 4718
4761 for (mcnt = 1; mcnt < num_regs; mcnt++) 4719 for (reg = 1; reg < num_regs; reg++)
4762 { 4720 {
4763 best_regstart[mcnt] = regstart[mcnt]; 4721 best_regstart[reg] = regstart[reg];
4764 best_regend[mcnt] = regend[mcnt]; 4722 best_regend[reg] = regend[reg];
4765 } 4723 }
4766 } 4724 }
4767 goto fail; 4725 goto fail;
@@ -4784,10 +4742,10 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4784 dend = ((d >= string1 && d <= end1) 4742 dend = ((d >= string1 && d <= end1)
4785 ? end_match_1 : end_match_2); 4743 ? end_match_1 : end_match_2);
4786 4744
4787 for (mcnt = 1; mcnt < num_regs; mcnt++) 4745 for (reg = 1; reg < num_regs; reg++)
4788 { 4746 {
4789 regstart[mcnt] = best_regstart[mcnt]; 4747 regstart[reg] = best_regstart[reg];
4790 regend[mcnt] = best_regend[mcnt]; 4748 regend[reg] = best_regend[reg];
4791 } 4749 }
4792 } 4750 }
4793 } /* d != end_match_2 */ 4751 } /* d != end_match_2 */
@@ -4847,16 +4805,16 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4847 4805
4848 /* Go through the first `min (num_regs, regs->num_regs)' 4806 /* Go through the first `min (num_regs, regs->num_regs)'
4849 registers, since that is all we initialized. */ 4807 registers, since that is all we initialized. */
4850 for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++) 4808 for (reg = 1; reg < MIN (num_regs, regs->num_regs); reg++)
4851 { 4809 {
4852 if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt])) 4810 if (REG_UNSET (regstart[reg]) || REG_UNSET (regend[reg]))
4853 regs->start[mcnt] = regs->end[mcnt] = -1; 4811 regs->start[reg] = regs->end[reg] = -1;
4854 else 4812 else
4855 { 4813 {
4856 regs->start[mcnt] 4814 regs->start[reg]
4857 = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]); 4815 = (regoff_t) POINTER_TO_OFFSET (regstart[reg]);
4858 regs->end[mcnt] 4816 regs->end[reg]
4859 = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]); 4817 = (regoff_t) POINTER_TO_OFFSET (regend[reg]);
4860 } 4818 }
4861 } 4819 }
4862 4820
@@ -4865,8 +4823,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4865 we (re)allocated the registers, this is the case, 4823 we (re)allocated the registers, this is the case,
4866 because we always allocate enough to have at least one 4824 because we always allocate enough to have at least one
4867 -1 at the end. */ 4825 -1 at the end. */
4868 for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++) 4826 for (reg = num_regs; reg < regs->num_regs; reg++)
4869 regs->start[mcnt] = regs->end[mcnt] = -1; 4827 regs->start[reg] = regs->end[reg] = -1;
4870 } /* regs && !bufp->no_sub */ 4828 } /* regs && !bufp->no_sub */
4871 4829
4872 DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", 4830 DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
@@ -4964,7 +4922,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4964 case anychar: 4922 case anychar:
4965 { 4923 {
4966 int buf_charlen; 4924 int buf_charlen;
4967 unsigned int buf_ch; 4925 re_wchar_t buf_ch;
4968 4926
4969 DEBUG_PRINT1 ("EXECUTING anychar.\n"); 4927 DEBUG_PRINT1 ("EXECUTING anychar.\n");
4970 4928
@@ -4993,7 +4951,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4993 4951
4994 /* Start of actual range_table, or end of bitmap if there is no 4952 /* Start of actual range_table, or end of bitmap if there is no
4995 range table. */ 4953 range table. */
4996 unsigned char *range_table; 4954 re_char *range_table;
4997 4955
4998 /* Nonzero if there is a range table. */ 4956 /* Nonzero if there is a range table. */
4999 int range_table_exists; 4957 int range_table_exists;
@@ -5317,8 +5275,10 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5317 DEBUG_PRINT3 ("EXECUTING on_failure_jump_smart %d (to %p).\n", 5275 DEBUG_PRINT3 ("EXECUTING on_failure_jump_smart %d (to %p).\n",
5318 mcnt, p + mcnt); 5276 mcnt, p + mcnt);
5319 { 5277 {
5320 unsigned char *p1 = p; /* Next operation. */ 5278 re_char *p1 = p; /* Next operation. */
5279 /* Please don't add casts to try and shut up GCC. */
5321 unsigned char *p2 = p + mcnt; /* Destination of the jump. */ 5280 unsigned char *p2 = p + mcnt; /* Destination of the jump. */
5281 unsigned char *p3 = p - 3; /* Location of the opcode. */
5322 5282
5323 p -= 3; /* Reset so that we will re-execute the 5283 p -= 3; /* Reset so that we will re-execute the
5324 instruction once it's been changed. */ 5284 instruction once it's been changed. */
@@ -5334,14 +5294,14 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5334 { 5294 {
5335 /* Use a fast `on_failure_keep_string_jump' loop. */ 5295 /* Use a fast `on_failure_keep_string_jump' loop. */
5336 DEBUG_PRINT1 (" smart exclusive => fast loop.\n"); 5296 DEBUG_PRINT1 (" smart exclusive => fast loop.\n");
5337 *p = (unsigned char) on_failure_keep_string_jump; 5297 *p3 = (unsigned char) on_failure_keep_string_jump;
5338 STORE_NUMBER (p2 - 2, mcnt + 3); 5298 STORE_NUMBER (p2 - 2, mcnt + 3);
5339 } 5299 }
5340 else 5300 else
5341 { 5301 {
5342 /* Default to a safe `on_failure_jump' loop. */ 5302 /* Default to a safe `on_failure_jump' loop. */
5343 DEBUG_PRINT1 (" smart default => slow loop.\n"); 5303 DEBUG_PRINT1 (" smart default => slow loop.\n");
5344 *p = (unsigned char) on_failure_jump; 5304 *p3 = (unsigned char) on_failure_jump;
5345 } 5305 }
5346 DEBUG_STATEMENT (debug -= 2); 5306 DEBUG_STATEMENT (debug -= 2);
5347 } 5307 }
@@ -5361,17 +5321,18 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5361 /* Have to succeed matching what follows at least n times. 5321 /* Have to succeed matching what follows at least n times.
5362 After that, handle like `on_failure_jump'. */ 5322 After that, handle like `on_failure_jump'. */
5363 case succeed_n: 5323 case succeed_n:
5324 /* Signedness doesn't matter since we only compare MCNT to 0. */
5364 EXTRACT_NUMBER (mcnt, p + 2); 5325 EXTRACT_NUMBER (mcnt, p + 2);
5365 DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt); 5326 DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
5366 5327
5367 /* Originally, mcnt is how many times we HAVE to succeed. */ 5328 /* Originally, mcnt is how many times we HAVE to succeed. */
5368 if (mcnt != 0) 5329 if (mcnt != 0)
5369 { 5330 {
5331 /* Please don't add a cast to try and shut up GCC. */
5332 unsigned char *p2 = p + 2; /* Location of the counter. */
5370 mcnt--; 5333 mcnt--;
5371 p += 2; 5334 p += 4;
5372 PUSH_FAILURE_COUNT (p); 5335 PUSH_NUMBER (p2, mcnt);
5373 DEBUG_PRINT3 (" Setting %p to %d.\n", p, mcnt);
5374 STORE_NUMBER_AND_INCR (p, mcnt);
5375 } 5336 }
5376 else 5337 else
5377 /* The two bytes encoding mcnt == 0 are two no_op opcodes. */ 5338 /* The two bytes encoding mcnt == 0 are two no_op opcodes. */
@@ -5379,15 +5340,17 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5379 break; 5340 break;
5380 5341
5381 case jump_n: 5342 case jump_n:
5343 /* Signedness doesn't matter since we only compare MCNT to 0. */
5382 EXTRACT_NUMBER (mcnt, p + 2); 5344 EXTRACT_NUMBER (mcnt, p + 2);
5383 DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt); 5345 DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
5384 5346
5385 /* Originally, this is how many times we CAN jump. */ 5347 /* Originally, this is how many times we CAN jump. */
5386 if (mcnt != 0) 5348 if (mcnt != 0)
5387 { 5349 {
5350 /* Please don't add a cast to try and shut up GCC. */
5351 unsigned char *p2 = p + 2; /* Location of the counter. */
5388 mcnt--; 5352 mcnt--;
5389 PUSH_FAILURE_COUNT (p + 2); 5353 PUSH_NUMBER (p2, mcnt);
5390 STORE_NUMBER (p + 2, mcnt);
5391 goto unconditional_jump; 5354 goto unconditional_jump;
5392 } 5355 }
5393 /* If don't have to jump any more, skip over the rest of command. */ 5356 /* If don't have to jump any more, skip over the rest of command. */
@@ -5397,14 +5360,16 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5397 5360
5398 case set_number_at: 5361 case set_number_at:
5399 { 5362 {
5363 unsigned char *p2; /* Location of the counter. */
5400 DEBUG_PRINT1 ("EXECUTING set_number_at.\n"); 5364 DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
5401 5365
5402 EXTRACT_NUMBER_AND_INCR (mcnt, p); 5366 EXTRACT_NUMBER_AND_INCR (mcnt, p);
5403 p1 = p + mcnt; 5367 /* Please don't add a cast to try and shut up GCC. */
5368 p2 = p + mcnt;
5369 /* Signedness doesn't matter since we only copy MCNT's bits . */
5404 EXTRACT_NUMBER_AND_INCR (mcnt, p); 5370 EXTRACT_NUMBER_AND_INCR (mcnt, p);
5405 DEBUG_PRINT3 (" Setting %p to %d.\n", p1, mcnt); 5371 DEBUG_PRINT3 (" Setting %p to %d.\n", p2, mcnt);
5406 PUSH_FAILURE_COUNT (p1); 5372 PUSH_NUMBER (p2, mcnt);
5407 STORE_NUMBER (p1, mcnt);
5408 break; 5373 break;
5409 } 5374 }
5410 5375
@@ -5422,7 +5387,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5422 { 5387 {
5423 /* C1 is the character before D, S1 is the syntax of C1, C2 5388 /* C1 is the character before D, S1 is the syntax of C1, C2
5424 is the character at D, and S2 is the syntax of C2. */ 5389 is the character at D, and S2 is the syntax of C2. */
5425 int c1, c2, s1, s2; 5390 re_wchar_t c1, c2;
5391 int s1, s2;
5426#ifdef emacs 5392#ifdef emacs
5427 int offset = PTR_TO_OFFSET (d - 1); 5393 int offset = PTR_TO_OFFSET (d - 1);
5428 int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); 5394 int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
@@ -5461,7 +5427,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5461 { 5427 {
5462 /* C1 is the character before D, S1 is the syntax of C1, C2 5428 /* C1 is the character before D, S1 is the syntax of C1, C2
5463 is the character at D, and S2 is the syntax of C2. */ 5429 is the character at D, and S2 is the syntax of C2. */
5464 int c1, c2, s1, s2; 5430 re_wchar_t c1, c2;
5431 int s1, s2;
5465#ifdef emacs 5432#ifdef emacs
5466 int offset = PTR_TO_OFFSET (d); 5433 int offset = PTR_TO_OFFSET (d);
5467 int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); 5434 int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
@@ -5504,7 +5471,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5504 { 5471 {
5505 /* C1 is the character before D, S1 is the syntax of C1, C2 5472 /* C1 is the character before D, S1 is the syntax of C1, C2
5506 is the character at D, and S2 is the syntax of C2. */ 5473 is the character at D, and S2 is the syntax of C2. */
5507 int c1, c2, s1, s2; 5474 re_wchar_t c1, c2;
5475 int s1, s2;
5508#ifdef emacs 5476#ifdef emacs
5509 int offset = PTR_TO_OFFSET (d) - 1; 5477 int offset = PTR_TO_OFFSET (d) - 1;
5510 int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); 5478 int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
@@ -5549,7 +5517,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5549 } 5517 }
5550#endif 5518#endif
5551 { 5519 {
5552 int c, len; 5520 int len;
5521 re_wchar_t c;
5553 5522
5554 c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len); 5523 c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len);
5555 5524
@@ -5585,7 +5554,9 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5585 DEBUG_PRINT3 ("EXECUTING %scategoryspec %d.\n", not?"not":"", mcnt); 5554 DEBUG_PRINT3 ("EXECUTING %scategoryspec %d.\n", not?"not":"", mcnt);
5586 PREFETCH (); 5555 PREFETCH ();
5587 { 5556 {
5588 int c, len; 5557 int len;
5558 re_wchar_t c;
5559
5589 c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len); 5560 c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len);
5590 5561
5591 if ((!CHAR_HAS_CATEGORY (c, mcnt)) ^ not) 5562 if ((!CHAR_HAS_CATEGORY (c, mcnt)) ^ not)
@@ -5607,8 +5578,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5607 IMMEDIATE_QUIT_CHECK; 5578 IMMEDIATE_QUIT_CHECK;
5608 if (!FAIL_STACK_EMPTY ()) 5579 if (!FAIL_STACK_EMPTY ())
5609 { 5580 {
5610 re_char *str; 5581 re_char *str, *pat;
5611 unsigned char *pat;
5612 /* A restart point is known. Restore to that state. */ 5582 /* A restart point is known. Restore to that state. */
5613 DEBUG_PRINT1 ("\nFAIL:\n"); 5583 DEBUG_PRINT1 ("\nFAIL:\n");
5614 POP_FAILURE_POINT (str, pat); 5584 POP_FAILURE_POINT (str, pat);
@@ -5678,7 +5648,7 @@ bcmp_translate (s1, s2, len, translate, multibyte)
5678 while (p1 < p1_end && p2 < p2_end) 5648 while (p1 < p1_end && p2 < p2_end)
5679 { 5649 {
5680 int p1_charlen, p2_charlen; 5650 int p1_charlen, p2_charlen;
5681 int p1_ch, p2_ch; 5651 re_wchar_t p1_ch, p2_ch;
5682 5652
5683 p1_ch = RE_STRING_CHAR_AND_LENGTH (p1, p1_end - p1, p1_charlen); 5653 p1_ch = RE_STRING_CHAR_AND_LENGTH (p1, p1_end - p1, p1_charlen);
5684 p2_ch = RE_STRING_CHAR_AND_LENGTH (p2, p2_end - p2, p2_charlen); 5654 p2_ch = RE_STRING_CHAR_AND_LENGTH (p2, p2_end - p2, p2_charlen);