diff options
| author | Paul Eggert | 2019-03-25 08:47:57 -0700 |
|---|---|---|
| committer | Paul Eggert | 2019-03-25 09:03:29 -0700 |
| commit | b7a98993789d18bc675798d49038982d5cf41683 (patch) | |
| tree | 664630027dfc0918c3fa61156435250063057e1d /src | |
| parent | 176ababa8cf57564cf9374c15ffdc14fa76be39f (diff) | |
| download | emacs-b7a98993789d18bc675798d49038982d5cf41683.tar.gz emacs-b7a98993789d18bc675798d49038982d5cf41683.zip | |
Fix some integer issues in regex-emacs
Also, remove some duplicate comments related to thread.h.
* src/regex-emacs.h (struct re_registers):
* src/regex-emacs.c (SIGN_EXTEND_CHAR): Remove.
(TALLOC, RETALLOC): Remove. All uses replaced by usual
allocators, which check for integer overflow.
(extract_number): Redo without using ‘unsigned’.
(CHARSET_RANGE_TABLE_EXISTS_P): Clearly return a boolean.
(print_fastmap, print_partial_compiled_pattern, CHECK_INFINITE_LOOP)
(regex_compile, analyze_first, bcmp_translate, mutually_exclusive_p)
(re_match_2_internal):
Use bool for booleans.
(print_fastmap, regex_compile, execute_charset):
Prefer int to unsigned where either will do.
(print_double_string): Prefer ptrdiff_t to ssize_t, since the
latter can in theory be narrower than the former. Use fwrite
instead of repeated putchar.
(emacs_re_max_failures, fail_stack_type, compile_stack_type)
(re_wctype_parse, regex_compile, re_search, re_search_2)
(re_match_2, re_match_2_internal, re_compile_pattern):
Prefer ptrdiff_t to size_t where either will do.
(union fail_stack_elt, PUSH_FAILURE_REG, POP_FAILURE_REG_OR_COUNT):
Make the integer an intptr_t, not long.
(GET_BUFFER_SPACE, EXTEND_BUFFER, regex_compile):
Use xpalloc to simplify allocation.
(regex_compile): Check for integer overflow when calculating
register numbers.
* src/regex-emacs.c (re_set_registers, re_match_2_internal):
* src/regex-emacs.h (struct re_registers, struct re_pattern_buffer):
* src/search.c (Freplace_match):
Prefer ptrdiff_t to unsigned where either will do.
* src/regex-emacs.h (struct re_pattern_buffer):
Prefer bool_bf to unsigned where either will do.
Diffstat (limited to 'src')
| -rw-r--r-- | src/eval.c | 6 | ||||
| -rw-r--r-- | src/lisp.h | 5 | ||||
| -rw-r--r-- | src/regex-emacs.c | 302 | ||||
| -rw-r--r-- | src/regex-emacs.h | 36 | ||||
| -rw-r--r-- | src/search.c | 33 | ||||
| -rw-r--r-- | src/thread.h | 4 |
6 files changed, 161 insertions, 225 deletions
diff --git a/src/eval.c b/src/eval.c index 09e8fdf4c2a..49d6460e6e4 100644 --- a/src/eval.c +++ b/src/eval.c | |||
| @@ -40,10 +40,6 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */ | |||
| 40 | # define CACHEABLE /* empty */ | 40 | # define CACHEABLE /* empty */ |
| 41 | #endif | 41 | #endif |
| 42 | 42 | ||
| 43 | /* Chain of condition and catch handlers currently in effect. */ | ||
| 44 | |||
| 45 | /* struct handler *handlerlist; */ | ||
| 46 | |||
| 47 | /* Non-nil means record all fset's and provide's, to be undone | 43 | /* Non-nil means record all fset's and provide's, to be undone |
| 48 | if the file being autoloaded is not fully loaded. | 44 | if the file being autoloaded is not fully loaded. |
| 49 | They are recorded by being consed onto the front of Vautoload_queue: | 45 | They are recorded by being consed onto the front of Vautoload_queue: |
| @@ -248,8 +244,6 @@ init_eval_once_for_pdumper (void) | |||
| 248 | specpdl = specpdl_ptr = pdlvec + 1; | 244 | specpdl = specpdl_ptr = pdlvec + 1; |
| 249 | } | 245 | } |
| 250 | 246 | ||
| 251 | /* static struct handler handlerlist_sentinel; */ | ||
| 252 | |||
| 253 | void | 247 | void |
| 254 | init_eval (void) | 248 | init_eval (void) |
| 255 | { | 249 | { |
diff --git a/src/lisp.h b/src/lisp.h index 2508e2b804e..178eebed2a5 100644 --- a/src/lisp.h +++ b/src/lisp.h | |||
| @@ -3233,11 +3233,6 @@ union specbinding | |||
| 3233 | } bt; | 3233 | } bt; |
| 3234 | }; | 3234 | }; |
| 3235 | 3235 | ||
| 3236 | /* These 3 are defined as macros in thread.h. */ | ||
| 3237 | /* extern union specbinding *specpdl; */ | ||
| 3238 | /* extern union specbinding *specpdl_ptr; */ | ||
| 3239 | /* extern ptrdiff_t specpdl_size; */ | ||
| 3240 | |||
| 3241 | INLINE ptrdiff_t | 3236 | INLINE ptrdiff_t |
| 3242 | SPECPDL_INDEX (void) | 3237 | SPECPDL_INDEX (void) |
| 3243 | { | 3238 | { |
diff --git a/src/regex-emacs.c b/src/regex-emacs.c index e7849157c85..7629492bcf8 100644 --- a/src/regex-emacs.c +++ b/src/regex-emacs.c | |||
| @@ -151,8 +151,6 @@ | |||
| 151 | 151 | ||
| 152 | #define ISWORD(c) (SYNTAX (c) == Sword) | 152 | #define ISWORD(c) (SYNTAX (c) == Sword) |
| 153 | 153 | ||
| 154 | #define SIGN_EXTEND_CHAR(c) ((signed char) (c)) | ||
| 155 | |||
| 156 | /* Use alloca instead of malloc. This is because using malloc in | 154 | /* Use alloca instead of malloc. This is because using malloc in |
| 157 | re_search* or re_match* could cause memory leaks when C-g is used | 155 | re_search* or re_match* could cause memory leaks when C-g is used |
| 158 | in Emacs (note that SAFE_ALLOCA could also call malloc, but does so | 156 | in Emacs (note that SAFE_ALLOCA could also call malloc, but does so |
| @@ -182,10 +180,6 @@ ptrdiff_t emacs_re_safe_alloca = MAX_ALLOCA; | |||
| 182 | #define FIRST_STRING_P(ptr) \ | 180 | #define FIRST_STRING_P(ptr) \ |
| 183 | (size1 && string1 <= (ptr) && (ptr) <= string1 + size1) | 181 | (size1 && string1 <= (ptr) && (ptr) <= string1 + size1) |
| 184 | 182 | ||
| 185 | /* (Re)Allocate N items of type T using malloc, or fail. */ | ||
| 186 | #define TALLOC(n, t) ((t *) xmalloc ((n) * sizeof (t))) | ||
| 187 | #define RETALLOC(addr, n, t) ((addr) = (t *) xrealloc (addr, (n) * sizeof (t))) | ||
| 188 | |||
| 189 | #define BYTEWIDTH 8 /* In bits. */ | 183 | #define BYTEWIDTH 8 /* In bits. */ |
| 190 | 184 | ||
| 191 | /* Type of source-pattern and string chars. */ | 185 | /* Type of source-pattern and string chars. */ |
| @@ -193,8 +187,8 @@ typedef const unsigned char re_char; | |||
| 193 | 187 | ||
| 194 | static void re_compile_fastmap (struct re_pattern_buffer *); | 188 | static void re_compile_fastmap (struct re_pattern_buffer *); |
| 195 | static ptrdiff_t re_match_2_internal (struct re_pattern_buffer *bufp, | 189 | static ptrdiff_t re_match_2_internal (struct re_pattern_buffer *bufp, |
| 196 | re_char *string1, size_t size1, | 190 | re_char *string1, ptrdiff_t size1, |
| 197 | re_char *string2, size_t size2, | 191 | re_char *string2, ptrdiff_t size2, |
| 198 | ptrdiff_t pos, | 192 | ptrdiff_t pos, |
| 199 | struct re_registers *regs, | 193 | struct re_registers *regs, |
| 200 | ptrdiff_t stop); | 194 | ptrdiff_t stop); |
| @@ -368,8 +362,8 @@ typedef enum | |||
| 368 | static int | 362 | static int |
| 369 | extract_number (re_char *source) | 363 | extract_number (re_char *source) |
| 370 | { | 364 | { |
| 371 | unsigned leading_byte = SIGN_EXTEND_CHAR (source[1]); | 365 | signed char leading_byte = source[1]; |
| 372 | return (leading_byte << 8) + source[0]; | 366 | return leading_byte * 256 + source[0]; |
| 373 | } | 367 | } |
| 374 | 368 | ||
| 375 | /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number. | 369 | /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number. |
| @@ -416,7 +410,7 @@ extract_number_and_incr (re_char **source) | |||
| 416 | #define CHARSET_BITMAP_SIZE(p) ((p)[1] & 0x7F) | 410 | #define CHARSET_BITMAP_SIZE(p) ((p)[1] & 0x7F) |
| 417 | 411 | ||
| 418 | /* Nonzero if charset P has range table. */ | 412 | /* Nonzero if charset P has range table. */ |
| 419 | #define CHARSET_RANGE_TABLE_EXISTS_P(p) ((p)[1] & 0x80) | 413 | #define CHARSET_RANGE_TABLE_EXISTS_P(p) (((p)[1] & 0x80) != 0) |
| 420 | 414 | ||
| 421 | /* Return the address of range table of charset P. But not the start | 415 | /* Return the address of range table of charset P. But not the start |
| 422 | of table itself, but the before where the number of ranges is | 416 | of table itself, but the before where the number of ranges is |
| @@ -460,18 +454,18 @@ static int regex_emacs_debug = -100000; | |||
| 460 | static void | 454 | static void |
| 461 | print_fastmap (char *fastmap) | 455 | print_fastmap (char *fastmap) |
| 462 | { | 456 | { |
| 463 | unsigned was_a_range = 0; | 457 | bool was_a_range = false; |
| 464 | unsigned i = 0; | 458 | int i = 0; |
| 465 | 459 | ||
| 466 | while (i < (1 << BYTEWIDTH)) | 460 | while (i < (1 << BYTEWIDTH)) |
| 467 | { | 461 | { |
| 468 | if (fastmap[i++]) | 462 | if (fastmap[i++]) |
| 469 | { | 463 | { |
| 470 | was_a_range = 0; | 464 | was_a_range = false; |
| 471 | putchar (i - 1); | 465 | putchar (i - 1); |
| 472 | while (i < (1 << BYTEWIDTH) && fastmap[i]) | 466 | while (i < (1 << BYTEWIDTH) && fastmap[i]) |
| 473 | { | 467 | { |
| 474 | was_a_range = 1; | 468 | was_a_range = true; |
| 475 | i++; | 469 | i++; |
| 476 | } | 470 | } |
| 477 | if (was_a_range) | 471 | if (was_a_range) |
| @@ -545,10 +539,10 @@ print_partial_compiled_pattern (re_char *start, re_char *end) | |||
| 545 | case charset: | 539 | case charset: |
| 546 | case charset_not: | 540 | case charset_not: |
| 547 | { | 541 | { |
| 548 | register int c, last = -100; | 542 | int c, last = -100; |
| 549 | register int in_range = 0; | 543 | bool in_range = false; |
| 550 | int length = CHARSET_BITMAP_SIZE (p - 1); | 544 | int length = CHARSET_BITMAP_SIZE (p - 1); |
| 551 | int has_range_table = CHARSET_RANGE_TABLE_EXISTS_P (p - 1); | 545 | bool has_range_table = CHARSET_RANGE_TABLE_EXISTS_P (p - 1); |
| 552 | 546 | ||
| 553 | fprintf (stderr, "/charset [%s", | 547 | fprintf (stderr, "/charset [%s", |
| 554 | (re_opcode_t) *(p - 1) == charset_not ? "^" : ""); | 548 | (re_opcode_t) *(p - 1) == charset_not ? "^" : ""); |
| @@ -564,13 +558,13 @@ print_partial_compiled_pattern (re_char *start, re_char *end) | |||
| 564 | if (last + 1 == c && ! in_range) | 558 | if (last + 1 == c && ! in_range) |
| 565 | { | 559 | { |
| 566 | fprintf (stderr, "-"); | 560 | fprintf (stderr, "-"); |
| 567 | in_range = 1; | 561 | in_range = true; |
| 568 | } | 562 | } |
| 569 | /* Have we broken a range? */ | 563 | /* Have we broken a range? */ |
| 570 | else if (last + 1 != c && in_range) | 564 | else if (last + 1 != c && in_range) |
| 571 | { | 565 | { |
| 572 | fprintf (stderr, "%c", last); | 566 | fprintf (stderr, "%c", last); |
| 573 | in_range = 0; | 567 | in_range = false; |
| 574 | } | 568 | } |
| 575 | 569 | ||
| 576 | if (! in_range) | 570 | if (! in_range) |
| @@ -739,7 +733,7 @@ print_compiled_pattern (struct re_pattern_buffer *bufp) | |||
| 739 | re_char *buffer = bufp->buffer; | 733 | re_char *buffer = bufp->buffer; |
| 740 | 734 | ||
| 741 | print_partial_compiled_pattern (buffer, buffer + bufp->used); | 735 | print_partial_compiled_pattern (buffer, buffer + bufp->used); |
| 742 | printf ("%zu bytes used/%zu bytes allocated.\n", | 736 | printf ("%tu bytes used/%tu bytes allocated.\n", |
| 743 | bufp->used, bufp->allocated); | 737 | bufp->used, bufp->allocated); |
| 744 | 738 | ||
| 745 | if (bufp->fastmap_accurate && bufp->fastmap) | 739 | if (bufp->fastmap_accurate && bufp->fastmap) |
| @@ -748,7 +742,7 @@ print_compiled_pattern (struct re_pattern_buffer *bufp) | |||
| 748 | print_fastmap (bufp->fastmap); | 742 | print_fastmap (bufp->fastmap); |
| 749 | } | 743 | } |
| 750 | 744 | ||
| 751 | printf ("re_nsub: %zu\t", bufp->re_nsub); | 745 | printf ("re_nsub: %tu\t", bufp->re_nsub); |
| 752 | printf ("regs_alloc: %d\t", bufp->regs_allocated); | 746 | printf ("regs_alloc: %d\t", bufp->regs_allocated); |
| 753 | printf ("can_be_null: %d\t", bufp->can_be_null); | 747 | printf ("can_be_null: %d\t", bufp->can_be_null); |
| 754 | fflush (stdout); | 748 | fflush (stdout); |
| @@ -757,25 +751,20 @@ print_compiled_pattern (struct re_pattern_buffer *bufp) | |||
| 757 | 751 | ||
| 758 | 752 | ||
| 759 | static void | 753 | static void |
| 760 | print_double_string (re_char *where, re_char *string1, ssize_t size1, | 754 | print_double_string (re_char *where, re_char *string1, ptrdiff_t size1, |
| 761 | re_char *string2, ssize_t size2) | 755 | re_char *string2, ptrdiff_t size2) |
| 762 | { | 756 | { |
| 763 | ssize_t this_char; | ||
| 764 | |||
| 765 | if (where == NULL) | 757 | if (where == NULL) |
| 766 | printf ("(null)"); | 758 | printf ("(null)"); |
| 767 | else | 759 | else |
| 768 | { | 760 | { |
| 769 | if (FIRST_STRING_P (where)) | 761 | if (FIRST_STRING_P (where)) |
| 770 | { | 762 | { |
| 771 | for (this_char = where - string1; this_char < size1; this_char++) | 763 | fwrite_unlocked (where, 1, string1 + size1 - where, stdout); |
| 772 | putchar (string1[this_char]); | ||
| 773 | |||
| 774 | where = string2; | 764 | where = string2; |
| 775 | } | 765 | } |
| 776 | 766 | ||
| 777 | for (this_char = where - string2; this_char < size2; this_char++) | 767 | fwrite_unlocked (where, 1, string2 + size2 - where, stdout); |
| 778 | putchar (string2[this_char]); | ||
| 779 | } | 768 | } |
| 780 | } | 769 | } |
| 781 | 770 | ||
| @@ -872,13 +861,12 @@ enum { RE_NREGS = 30 }; | |||
| 872 | whose default stack limit is 2mb. In order for a larger | 861 | whose default stack limit is 2mb. In order for a larger |
| 873 | value to work reliably, you have to try to make it accord | 862 | value to work reliably, you have to try to make it accord |
| 874 | with the process stack limit. */ | 863 | with the process stack limit. */ |
| 875 | size_t emacs_re_max_failures = 40000; | 864 | ptrdiff_t emacs_re_max_failures = 40000; |
| 876 | 865 | ||
| 877 | union fail_stack_elt | 866 | union fail_stack_elt |
| 878 | { | 867 | { |
| 879 | re_char *pointer; | 868 | re_char *pointer; |
| 880 | /* This should be the biggest 'int' that's no bigger than a pointer. */ | 869 | intptr_t integer; |
| 881 | long integer; | ||
| 882 | }; | 870 | }; |
| 883 | 871 | ||
| 884 | typedef union fail_stack_elt fail_stack_elt_t; | 872 | typedef union fail_stack_elt fail_stack_elt_t; |
| @@ -886,9 +874,9 @@ typedef union fail_stack_elt fail_stack_elt_t; | |||
| 886 | typedef struct | 874 | typedef struct |
| 887 | { | 875 | { |
| 888 | fail_stack_elt_t *stack; | 876 | fail_stack_elt_t *stack; |
| 889 | size_t size; | 877 | ptrdiff_t size; |
| 890 | size_t avail; /* Offset of next open position. */ | 878 | ptrdiff_t avail; /* Offset of next open position. */ |
| 891 | size_t frame; /* Offset of the cur constructed frame. */ | 879 | ptrdiff_t frame; /* Offset of the cur constructed frame. */ |
| 892 | } fail_stack_type; | 880 | } fail_stack_type; |
| 893 | 881 | ||
| 894 | #define FAIL_STACK_EMPTY() (fail_stack.frame == 0) | 882 | #define FAIL_STACK_EMPTY() (fail_stack.frame == 0) |
| @@ -967,17 +955,17 @@ typedef struct | |||
| 967 | while (REMAINING_AVAIL_SLOTS <= space) { \ | 955 | while (REMAINING_AVAIL_SLOTS <= space) { \ |
| 968 | if (!GROW_FAIL_STACK (fail_stack)) \ | 956 | if (!GROW_FAIL_STACK (fail_stack)) \ |
| 969 | return -2; \ | 957 | return -2; \ |
| 970 | DEBUG_PRINT ("\n Doubled stack; size now: %zu\n", (fail_stack).size);\ | 958 | DEBUG_PRINT ("\n Doubled stack; size now: %tu\n", fail_stack.size); \ |
| 971 | DEBUG_PRINT (" slots available: %zu\n", REMAINING_AVAIL_SLOTS);\ | 959 | DEBUG_PRINT (" slots available: %tu\n", REMAINING_AVAIL_SLOTS);\ |
| 972 | } | 960 | } |
| 973 | 961 | ||
| 974 | /* Push register NUM onto the stack. */ | 962 | /* Push register NUM onto the stack. */ |
| 975 | #define PUSH_FAILURE_REG(num) \ | 963 | #define PUSH_FAILURE_REG(num) \ |
| 976 | do { \ | 964 | do { \ |
| 977 | char *destination; \ | 965 | char *destination; \ |
| 978 | long n = num; \ | 966 | intptr_t n = num; \ |
| 979 | ENSURE_FAIL_STACK(3); \ | 967 | ENSURE_FAIL_STACK(3); \ |
| 980 | DEBUG_PRINT (" Push reg %ld (spanning %p -> %p)\n", \ | 968 | DEBUG_PRINT (" Push reg %"PRIdPTR" (spanning %p -> %p)\n", \ |
| 981 | n, regstart[n], regend[n]); \ | 969 | n, regstart[n], regend[n]); \ |
| 982 | PUSH_FAILURE_POINTER (regstart[n]); \ | 970 | PUSH_FAILURE_POINTER (regstart[n]); \ |
| 983 | PUSH_FAILURE_POINTER (regend[n]); \ | 971 | PUSH_FAILURE_POINTER (regend[n]); \ |
| @@ -1002,7 +990,7 @@ do { \ | |||
| 1002 | /* Pop a saved register off the stack. */ | 990 | /* Pop a saved register off the stack. */ |
| 1003 | #define POP_FAILURE_REG_OR_COUNT() \ | 991 | #define POP_FAILURE_REG_OR_COUNT() \ |
| 1004 | do { \ | 992 | do { \ |
| 1005 | long pfreg = POP_FAILURE_INT (); \ | 993 | intptr_t pfreg = POP_FAILURE_INT (); \ |
| 1006 | if (pfreg == -1) \ | 994 | if (pfreg == -1) \ |
| 1007 | { \ | 995 | { \ |
| 1008 | /* It's a counter. */ \ | 996 | /* It's a counter. */ \ |
| @@ -1010,7 +998,7 @@ do { \ | |||
| 1010 | unsigned char *ptr = (unsigned char *) POP_FAILURE_POINTER (); \ | 998 | unsigned char *ptr = (unsigned char *) POP_FAILURE_POINTER (); \ |
| 1011 | pfreg = POP_FAILURE_INT (); \ | 999 | pfreg = POP_FAILURE_INT (); \ |
| 1012 | STORE_NUMBER (ptr, pfreg); \ | 1000 | STORE_NUMBER (ptr, pfreg); \ |
| 1013 | DEBUG_PRINT (" Pop counter %p = %ld\n", ptr, pfreg); \ | 1001 | DEBUG_PRINT (" Pop counter %p = %"PRIdPTR"\n", ptr, pfreg); \ |
| 1014 | } \ | 1002 | } \ |
| 1015 | else \ | 1003 | else \ |
| 1016 | { \ | 1004 | { \ |
| @@ -1034,7 +1022,7 @@ do { \ | |||
| 1034 | && FAILURE_PAT (failure) <= bufp->buffer + bufp->used); \ | 1022 | && FAILURE_PAT (failure) <= bufp->buffer + bufp->used); \ |
| 1035 | if (FAILURE_PAT (failure) == pat_cur) \ | 1023 | if (FAILURE_PAT (failure) == pat_cur) \ |
| 1036 | { \ | 1024 | { \ |
| 1037 | cycle = 1; \ | 1025 | cycle = true; \ |
| 1038 | break; \ | 1026 | break; \ |
| 1039 | } \ | 1027 | } \ |
| 1040 | DEBUG_PRINT (" Other pattern: %p\n", FAILURE_PAT (failure)); \ | 1028 | DEBUG_PRINT (" Other pattern: %p\n", FAILURE_PAT (failure)); \ |
| @@ -1057,14 +1045,14 @@ do { \ | |||
| 1057 | char *destination; \ | 1045 | char *destination; \ |
| 1058 | DEBUG_STATEMENT (nfailure_points_pushed++); \ | 1046 | DEBUG_STATEMENT (nfailure_points_pushed++); \ |
| 1059 | DEBUG_PRINT ("\nPUSH_FAILURE_POINT:\n"); \ | 1047 | DEBUG_PRINT ("\nPUSH_FAILURE_POINT:\n"); \ |
| 1060 | DEBUG_PRINT (" Before push, next avail: %zu\n", (fail_stack).avail); \ | 1048 | DEBUG_PRINT (" Before push, next avail: %tu\n", fail_stack.avail); \ |
| 1061 | DEBUG_PRINT (" size: %zu\n", (fail_stack).size);\ | 1049 | DEBUG_PRINT (" size: %tu\n", fail_stack.size); \ |
| 1062 | \ | 1050 | \ |
| 1063 | ENSURE_FAIL_STACK (NUM_NONREG_ITEMS); \ | 1051 | ENSURE_FAIL_STACK (NUM_NONREG_ITEMS); \ |
| 1064 | \ | 1052 | \ |
| 1065 | DEBUG_PRINT ("\n"); \ | 1053 | DEBUG_PRINT ("\n"); \ |
| 1066 | \ | 1054 | \ |
| 1067 | DEBUG_PRINT (" Push frame index: %zu\n", fail_stack.frame); \ | 1055 | DEBUG_PRINT (" Push frame index: %tu\n", fail_stack.frame); \ |
| 1068 | PUSH_FAILURE_INT (fail_stack.frame); \ | 1056 | PUSH_FAILURE_INT (fail_stack.frame); \ |
| 1069 | \ | 1057 | \ |
| 1070 | DEBUG_PRINT (" Push string %p: \"", string_place); \ | 1058 | DEBUG_PRINT (" Push string %p: \"", string_place); \ |
| @@ -1106,8 +1094,8 @@ do { \ | |||
| 1106 | \ | 1094 | \ |
| 1107 | /* Remove failure points and point to how many regs pushed. */ \ | 1095 | /* Remove failure points and point to how many regs pushed. */ \ |
| 1108 | DEBUG_PRINT ("POP_FAILURE_POINT:\n"); \ | 1096 | DEBUG_PRINT ("POP_FAILURE_POINT:\n"); \ |
| 1109 | DEBUG_PRINT (" Before pop, next avail: %zu\n", fail_stack.avail); \ | 1097 | DEBUG_PRINT (" Before pop, next avail: %tu\n", fail_stack.avail); \ |
| 1110 | DEBUG_PRINT (" size: %zu\n", fail_stack.size); \ | 1098 | DEBUG_PRINT (" size: %tu\n", fail_stack.size); \ |
| 1111 | \ | 1099 | \ |
| 1112 | /* Pop the saved registers. */ \ | 1100 | /* Pop the saved registers. */ \ |
| 1113 | while (fail_stack.frame < fail_stack.avail) \ | 1101 | while (fail_stack.frame < fail_stack.avail) \ |
| @@ -1141,7 +1129,7 @@ do { \ | |||
| 1141 | 1129 | ||
| 1142 | /* Subroutine declarations and macros for regex_compile. */ | 1130 | /* Subroutine declarations and macros for regex_compile. */ |
| 1143 | 1131 | ||
| 1144 | static reg_errcode_t regex_compile (re_char *pattern, size_t size, | 1132 | static reg_errcode_t regex_compile (re_char *pattern, ptrdiff_t size, |
| 1145 | bool posix_backtracking, | 1133 | bool posix_backtracking, |
| 1146 | const char *whitespace_regexp, | 1134 | const char *whitespace_regexp, |
| 1147 | struct re_pattern_buffer *bufp); | 1135 | struct re_pattern_buffer *bufp); |
| @@ -1155,7 +1143,7 @@ static bool at_begline_loc_p (re_char *pattern, re_char *p); | |||
| 1155 | static bool at_endline_loc_p (re_char *p, re_char *pend); | 1143 | static bool at_endline_loc_p (re_char *p, re_char *pend); |
| 1156 | static re_char *skip_one_char (re_char *p); | 1144 | static re_char *skip_one_char (re_char *p); |
| 1157 | static int analyze_first (re_char *p, re_char *pend, | 1145 | static int analyze_first (re_char *p, re_char *pend, |
| 1158 | char *fastmap, const int multibyte); | 1146 | char *fastmap, bool multibyte); |
| 1159 | 1147 | ||
| 1160 | /* Fetch the next character in the uncompiled pattern, with no | 1148 | /* Fetch the next character in the uncompiled pattern, with no |
| 1161 | translation. */ | 1149 | translation. */ |
| @@ -1178,8 +1166,8 @@ static int analyze_first (re_char *p, re_char *pend, | |||
| 1178 | 1166 | ||
| 1179 | /* Ensure at least N more bytes of space in buffer. */ | 1167 | /* Ensure at least N more bytes of space in buffer. */ |
| 1180 | #define GET_BUFFER_SPACE(n) \ | 1168 | #define GET_BUFFER_SPACE(n) \ |
| 1181 | while ((size_t) (b - bufp->buffer + (n)) > bufp->allocated) \ | 1169 | if (bufp->buffer + bufp->allocated - b < (n)) \ |
| 1182 | EXTEND_BUFFER () | 1170 | EXTEND_BUFFER ((n) - (bufp->buffer + bufp->allocated - b)) |
| 1183 | 1171 | ||
| 1184 | /* Ensure one more byte of buffer space and then add C to it. */ | 1172 | /* Ensure one more byte of buffer space and then add C to it. */ |
| 1185 | #define BUF_PUSH(c) \ | 1173 | #define BUF_PUSH(c) \ |
| @@ -1221,18 +1209,16 @@ static int analyze_first (re_char *p, re_char *pend, | |||
| 1221 | be too small, many things would have to change. */ | 1209 | be too small, many things would have to change. */ |
| 1222 | # define MAX_BUF_SIZE (1 << 15) | 1210 | # define MAX_BUF_SIZE (1 << 15) |
| 1223 | 1211 | ||
| 1224 | /* Extend the buffer by twice its current size via realloc and | 1212 | /* Extend the buffer by at least N bytes via realloc and |
| 1225 | reset the pointers that pointed into the old block to point to the | 1213 | reset the pointers that pointed into the old block to point to the |
| 1226 | correct places in the new one. If extending the buffer results in it | 1214 | correct places in the new one. If extending the buffer results in it |
| 1227 | being larger than MAX_BUF_SIZE, then flag memory exhausted. */ | 1215 | being larger than MAX_BUF_SIZE, then flag memory exhausted. */ |
| 1228 | #define EXTEND_BUFFER() \ | 1216 | #define EXTEND_BUFFER(n) \ |
| 1229 | do { \ | 1217 | do { \ |
| 1218 | ptrdiff_t requested_extension = n; \ | ||
| 1230 | unsigned char *old_buffer = bufp->buffer; \ | 1219 | unsigned char *old_buffer = bufp->buffer; \ |
| 1231 | if (bufp->allocated == MAX_BUF_SIZE) \ | 1220 | if (MAX_BUF_SIZE - bufp->allocated < requested_extension) \ |
| 1232 | return REG_ESIZE; \ | 1221 | return REG_ESIZE; \ |
| 1233 | bufp->allocated <<= 1; \ | ||
| 1234 | if (bufp->allocated > MAX_BUF_SIZE) \ | ||
| 1235 | bufp->allocated = MAX_BUF_SIZE; \ | ||
| 1236 | ptrdiff_t b_off = b - old_buffer; \ | 1222 | ptrdiff_t b_off = b - old_buffer; \ |
| 1237 | ptrdiff_t begalt_off = begalt - old_buffer; \ | 1223 | ptrdiff_t begalt_off = begalt - old_buffer; \ |
| 1238 | bool fixup_alt_jump_set = !!fixup_alt_jump; \ | 1224 | bool fixup_alt_jump_set = !!fixup_alt_jump; \ |
| @@ -1242,7 +1228,8 @@ static int analyze_first (re_char *p, re_char *pend, | |||
| 1242 | if (fixup_alt_jump_set) fixup_alt_jump_off = fixup_alt_jump - old_buffer; \ | 1228 | if (fixup_alt_jump_set) fixup_alt_jump_off = fixup_alt_jump - old_buffer; \ |
| 1243 | if (laststart_set) laststart_off = laststart - old_buffer; \ | 1229 | if (laststart_set) laststart_off = laststart - old_buffer; \ |
| 1244 | if (pending_exact_set) pending_exact_off = pending_exact - old_buffer; \ | 1230 | if (pending_exact_set) pending_exact_off = pending_exact - old_buffer; \ |
| 1245 | RETALLOC (bufp->buffer, bufp->allocated, unsigned char); \ | 1231 | bufp->buffer = xpalloc (bufp->buffer, &bufp->allocated, \ |
| 1232 | requested_extension, MAX_BUF_SIZE, 1); \ | ||
| 1246 | unsigned char *new_buffer = bufp->buffer; \ | 1233 | unsigned char *new_buffer = bufp->buffer; \ |
| 1247 | b = new_buffer + b_off; \ | 1234 | b = new_buffer + b_off; \ |
| 1248 | begalt = new_buffer + begalt_off; \ | 1235 | begalt = new_buffer + begalt_off; \ |
| @@ -1264,9 +1251,8 @@ typedef int regnum_t; | |||
| 1264 | 1251 | ||
| 1265 | /* Macros for the compile stack. */ | 1252 | /* Macros for the compile stack. */ |
| 1266 | 1253 | ||
| 1267 | /* Since offsets can go either forwards or backwards, this type needs to | ||
| 1268 | be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */ | ||
| 1269 | typedef long pattern_offset_t; | 1254 | typedef long pattern_offset_t; |
| 1255 | verify (LONG_MIN <= -(MAX_BUF_SIZE - 1) && MAX_BUF_SIZE - 1 <= LONG_MAX); | ||
| 1270 | 1256 | ||
| 1271 | typedef struct | 1257 | typedef struct |
| 1272 | { | 1258 | { |
| @@ -1280,8 +1266,8 @@ typedef struct | |||
| 1280 | typedef struct | 1266 | typedef struct |
| 1281 | { | 1267 | { |
| 1282 | compile_stack_elt_t *stack; | 1268 | compile_stack_elt_t *stack; |
| 1283 | size_t size; | 1269 | ptrdiff_t size; |
| 1284 | size_t avail; /* Offset of next open position. */ | 1270 | ptrdiff_t avail; /* Offset of next open position. */ |
| 1285 | } compile_stack_type; | 1271 | } compile_stack_type; |
| 1286 | 1272 | ||
| 1287 | 1273 | ||
| @@ -1499,7 +1485,7 @@ struct range_table_work_area | |||
| 1499 | The function can be used on ASCII and multibyte (UTF-8-encoded) strings. | 1485 | The function can be used on ASCII and multibyte (UTF-8-encoded) strings. |
| 1500 | */ | 1486 | */ |
| 1501 | re_wctype_t | 1487 | re_wctype_t |
| 1502 | re_wctype_parse (const unsigned char **strp, unsigned limit) | 1488 | re_wctype_parse (const unsigned char **strp, ptrdiff_t limit) |
| 1503 | { | 1489 | { |
| 1504 | const char *beg = (const char *)*strp, *it; | 1490 | const char *beg = (const char *)*strp, *it; |
| 1505 | 1491 | ||
| @@ -1677,13 +1663,13 @@ do { \ | |||
| 1677 | 'buffer' is the compiled pattern; | 1663 | 'buffer' is the compiled pattern; |
| 1678 | 'syntax' is set to SYNTAX; | 1664 | 'syntax' is set to SYNTAX; |
| 1679 | 'used' is set to the length of the compiled pattern; | 1665 | 'used' is set to the length of the compiled pattern; |
| 1680 | 'fastmap_accurate' is zero; | 1666 | 'fastmap_accurate' is false; |
| 1681 | 're_nsub' is the number of subexpressions in PATTERN; | 1667 | 're_nsub' is the number of subexpressions in PATTERN; |
| 1682 | 1668 | ||
| 1683 | The 'fastmap' field is neither examined nor set. */ | 1669 | The 'fastmap' field is neither examined nor set. */ |
| 1684 | 1670 | ||
| 1685 | static reg_errcode_t | 1671 | static reg_errcode_t |
| 1686 | regex_compile (re_char *pattern, size_t size, | 1672 | regex_compile (re_char *pattern, ptrdiff_t size, |
| 1687 | bool posix_backtracking, | 1673 | bool posix_backtracking, |
| 1688 | const char *whitespace_regexp, | 1674 | const char *whitespace_regexp, |
| 1689 | struct re_pattern_buffer *bufp) | 1675 | struct re_pattern_buffer *bufp) |
| @@ -1747,16 +1733,15 @@ regex_compile (re_char *pattern, size_t size, | |||
| 1747 | DEBUG_PRINT ("\nCompiling pattern: "); | 1733 | DEBUG_PRINT ("\nCompiling pattern: "); |
| 1748 | if (regex_emacs_debug > 0) | 1734 | if (regex_emacs_debug > 0) |
| 1749 | { | 1735 | { |
| 1750 | size_t debug_count; | 1736 | for (ptrdiff_t debug_count = 0; debug_count < size; debug_count++) |
| 1751 | |||
| 1752 | for (debug_count = 0; debug_count < size; debug_count++) | ||
| 1753 | putchar (pattern[debug_count]); | 1737 | putchar (pattern[debug_count]); |
| 1754 | putchar ('\n'); | 1738 | putchar ('\n'); |
| 1755 | } | 1739 | } |
| 1756 | #endif | 1740 | #endif |
| 1757 | 1741 | ||
| 1758 | /* Initialize the compile stack. */ | 1742 | /* Initialize the compile stack. */ |
| 1759 | compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t); | 1743 | compile_stack.stack = xmalloc (INIT_COMPILE_STACK_SIZE |
| 1744 | * sizeof *compile_stack.stack); | ||
| 1760 | compile_stack.size = INIT_COMPILE_STACK_SIZE; | 1745 | compile_stack.size = INIT_COMPILE_STACK_SIZE; |
| 1761 | compile_stack.avail = 0; | 1746 | compile_stack.avail = 0; |
| 1762 | 1747 | ||
| @@ -1764,8 +1749,8 @@ regex_compile (re_char *pattern, size_t size, | |||
| 1764 | range_table_work.allocated = 0; | 1749 | range_table_work.allocated = 0; |
| 1765 | 1750 | ||
| 1766 | /* Initialize the pattern buffer. */ | 1751 | /* Initialize the pattern buffer. */ |
| 1767 | bufp->fastmap_accurate = 0; | 1752 | bufp->fastmap_accurate = false; |
| 1768 | bufp->used_syntax = 0; | 1753 | bufp->used_syntax = false; |
| 1769 | 1754 | ||
| 1770 | /* Set 'used' to zero, so that if we return an error, the pattern | 1755 | /* Set 'used' to zero, so that if we return an error, the pattern |
| 1771 | printer (for debugging) will think there's no pattern. We reset it | 1756 | printer (for debugging) will think there's no pattern. We reset it |
| @@ -1776,16 +1761,9 @@ regex_compile (re_char *pattern, size_t size, | |||
| 1776 | 1761 | ||
| 1777 | if (bufp->allocated == 0) | 1762 | if (bufp->allocated == 0) |
| 1778 | { | 1763 | { |
| 1779 | if (bufp->buffer) | 1764 | /* This loses if BUFP->buffer is bogus, but that is the user's |
| 1780 | { /* If zero allocated, but buffer is non-null, try to realloc | 1765 | responsibility. */ |
| 1781 | enough space. This loses if buffer's address is bogus, but | 1766 | bufp->buffer = xrealloc (bufp->buffer, INIT_BUF_SIZE); |
| 1782 | that is the user's responsibility. */ | ||
| 1783 | RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char); | ||
| 1784 | } | ||
| 1785 | else | ||
| 1786 | { /* Caller did not allocate a buffer. Do it for them. */ | ||
| 1787 | bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char); | ||
| 1788 | } | ||
| 1789 | bufp->allocated = INIT_BUF_SIZE; | 1767 | bufp->allocated = INIT_BUF_SIZE; |
| 1790 | } | 1768 | } |
| 1791 | 1769 | ||
| @@ -1905,10 +1883,10 @@ regex_compile (re_char *pattern, size_t size, | |||
| 1905 | if (many_times_ok) | 1883 | if (many_times_ok) |
| 1906 | { | 1884 | { |
| 1907 | bool simple = skip_one_char (laststart) == b; | 1885 | bool simple = skip_one_char (laststart) == b; |
| 1908 | size_t startoffset = 0; | 1886 | ptrdiff_t startoffset = 0; |
| 1909 | re_opcode_t ofj = | 1887 | re_opcode_t ofj = |
| 1910 | /* Check if the loop can match the empty string. */ | 1888 | /* Check if the loop can match the empty string. */ |
| 1911 | (simple || !analyze_first (laststart, b, NULL, 0)) | 1889 | (simple || !analyze_first (laststart, b, NULL, false)) |
| 1912 | ? on_failure_jump : on_failure_jump_loop; | 1890 | ? on_failure_jump : on_failure_jump_loop; |
| 1913 | eassert (skip_one_char (laststart) <= b); | 1891 | eassert (skip_one_char (laststart) <= b); |
| 1914 | 1892 | ||
| @@ -1955,7 +1933,7 @@ regex_compile (re_char *pattern, size_t size, | |||
| 1955 | GET_BUFFER_SPACE (7); /* We might use less. */ | 1933 | GET_BUFFER_SPACE (7); /* We might use less. */ |
| 1956 | if (many_times_ok) | 1934 | if (many_times_ok) |
| 1957 | { | 1935 | { |
| 1958 | bool emptyp = analyze_first (laststart, b, NULL, 0); | 1936 | bool emptyp = !!analyze_first (laststart, b, NULL, false); |
| 1959 | 1937 | ||
| 1960 | /* The non-greedy multiple match looks like | 1938 | /* The non-greedy multiple match looks like |
| 1961 | a repeat..until: we only need a conditional jump | 1939 | a repeat..until: we only need a conditional jump |
| @@ -2073,7 +2051,7 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2073 | content of the syntax-table is not hardcoded in the | 2051 | content of the syntax-table is not hardcoded in the |
| 2074 | range_table. SPACE and WORD are the two exceptions. */ | 2052 | range_table. SPACE and WORD are the two exceptions. */ |
| 2075 | if ((1 << cc) & ((1 << RECC_SPACE) | (1 << RECC_WORD))) | 2053 | if ((1 << cc) & ((1 << RECC_SPACE) | (1 << RECC_WORD))) |
| 2076 | bufp->used_syntax = 1; | 2054 | bufp->used_syntax = true; |
| 2077 | 2055 | ||
| 2078 | /* Repeat the loop. */ | 2056 | /* Repeat the loop. */ |
| 2079 | continue; | 2057 | continue; |
| @@ -2180,7 +2158,7 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2180 | { | 2158 | { |
| 2181 | case '(': | 2159 | case '(': |
| 2182 | { | 2160 | { |
| 2183 | int shy = 0; | 2161 | bool shy = false; |
| 2184 | regnum_t regnum = 0; | 2162 | regnum_t regnum = 0; |
| 2185 | if (p+1 < pend) | 2163 | if (p+1 < pend) |
| 2186 | { | 2164 | { |
| @@ -2193,7 +2171,7 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2193 | PATFETCH (c); | 2171 | PATFETCH (c); |
| 2194 | switch (c) | 2172 | switch (c) |
| 2195 | { | 2173 | { |
| 2196 | case ':': shy = 1; break; | 2174 | case ':': shy = true; break; |
| 2197 | case '0': | 2175 | case '0': |
| 2198 | /* An explicitly specified regnum must start | 2176 | /* An explicitly specified regnum must start |
| 2199 | with non-0. */ | 2177 | with non-0. */ |
| @@ -2202,7 +2180,11 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2202 | FALLTHROUGH; | 2180 | FALLTHROUGH; |
| 2203 | case '1': case '2': case '3': case '4': | 2181 | case '1': case '2': case '3': case '4': |
| 2204 | case '5': case '6': case '7': case '8': case '9': | 2182 | case '5': case '6': case '7': case '8': case '9': |
| 2205 | regnum = 10*regnum + (c - '0'); break; | 2183 | if (INT_MULTIPLY_WRAPV (regnum, 10, ®num) |
| 2184 | || INT_ADD_WRAPV (regnum, c - '0', | ||
| 2185 | ®num)) | ||
| 2186 | FREE_STACK_RETURN (REG_ESIZE); | ||
| 2187 | break; | ||
| 2206 | default: | 2188 | default: |
| 2207 | /* Only (?:...) is supported right now. */ | 2189 | /* Only (?:...) is supported right now. */ |
| 2208 | FREE_STACK_RETURN (REG_BADPAT); | 2190 | FREE_STACK_RETURN (REG_BADPAT); |
| @@ -2215,7 +2197,7 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2215 | regnum = ++bufp->re_nsub; | 2197 | regnum = ++bufp->re_nsub; |
| 2216 | else if (regnum) | 2198 | else if (regnum) |
| 2217 | { /* It's actually not shy, but explicitly numbered. */ | 2199 | { /* It's actually not shy, but explicitly numbered. */ |
| 2218 | shy = 0; | 2200 | shy = false; |
| 2219 | if (regnum > bufp->re_nsub) | 2201 | if (regnum > bufp->re_nsub) |
| 2220 | bufp->re_nsub = regnum; | 2202 | bufp->re_nsub = regnum; |
| 2221 | else if (regnum > bufp->re_nsub | 2203 | else if (regnum > bufp->re_nsub |
| @@ -2232,11 +2214,9 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2232 | regnum = - bufp->re_nsub; | 2214 | regnum = - bufp->re_nsub; |
| 2233 | 2215 | ||
| 2234 | if (COMPILE_STACK_FULL) | 2216 | if (COMPILE_STACK_FULL) |
| 2235 | { | 2217 | compile_stack.stack |
| 2236 | RETALLOC (compile_stack.stack, compile_stack.size << 1, | 2218 | = xpalloc (compile_stack.stack, &compile_stack.size, |
| 2237 | compile_stack_elt_t); | 2219 | 1, -1, sizeof *compile_stack.stack); |
| 2238 | compile_stack.size <<= 1; | ||
| 2239 | } | ||
| 2240 | 2220 | ||
| 2241 | /* These are the values to restore when we hit end of this | 2221 | /* These are the values to restore when we hit end of this |
| 2242 | group. They are all relative offsets, so that if the | 2222 | group. They are all relative offsets, so that if the |
| @@ -2393,9 +2373,8 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2393 | else | 2373 | else |
| 2394 | { /* If the upper bound is > 1, we need to insert | 2374 | { /* If the upper bound is > 1, we need to insert |
| 2395 | more at the end of the loop. */ | 2375 | more at the end of the loop. */ |
| 2396 | unsigned int nbytes = (upper_bound < 0 ? 3 | 2376 | int nbytes = upper_bound < 0 ? 3 : upper_bound > 1 ? 5 : 0; |
| 2397 | : upper_bound > 1 ? 5 : 0); | 2377 | int startoffset = 0; |
| 2398 | unsigned int startoffset = 0; | ||
| 2399 | 2378 | ||
| 2400 | GET_BUFFER_SPACE (20); /* We might use less. */ | 2379 | GET_BUFFER_SPACE (20); /* We might use less. */ |
| 2401 | 2380 | ||
| @@ -2799,8 +2778,7 @@ group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum) | |||
| 2799 | Return -1 if fastmap was not updated accurately. */ | 2778 | Return -1 if fastmap was not updated accurately. */ |
| 2800 | 2779 | ||
| 2801 | static int | 2780 | static int |
| 2802 | analyze_first (re_char *p, re_char *pend, char *fastmap, | 2781 | analyze_first (re_char *p, re_char *pend, char *fastmap, bool multibyte) |
| 2803 | const int multibyte) | ||
| 2804 | { | 2782 | { |
| 2805 | int j, k; | 2783 | int j, k; |
| 2806 | bool not; | 2784 | bool not; |
| @@ -3102,6 +3080,8 @@ re_compile_fastmap (struct re_pattern_buffer *bufp) | |||
| 3102 | eassert (fastmap && bufp->buffer); | 3080 | eassert (fastmap && bufp->buffer); |
| 3103 | 3081 | ||
| 3104 | memset (fastmap, 0, 1 << BYTEWIDTH); /* Assume nothing's valid. */ | 3082 | memset (fastmap, 0, 1 << BYTEWIDTH); /* Assume nothing's valid. */ |
| 3083 | |||
| 3084 | /* FIXME: Is the following assignment correct even when ANALYSIS < 0? */ | ||
| 3105 | bufp->fastmap_accurate = 1; /* It will be when we're done. */ | 3085 | bufp->fastmap_accurate = 1; /* It will be when we're done. */ |
| 3106 | 3086 | ||
| 3107 | analysis = analyze_first (bufp->buffer, bufp->buffer + bufp->used, | 3087 | analysis = analyze_first (bufp->buffer, bufp->buffer + bufp->used, |
| @@ -3124,7 +3104,7 @@ re_compile_fastmap (struct re_pattern_buffer *bufp) | |||
| 3124 | 3104 | ||
| 3125 | void | 3105 | void |
| 3126 | re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs, | 3106 | re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs, |
| 3127 | unsigned int num_regs, ptrdiff_t *starts, ptrdiff_t *ends) | 3107 | ptrdiff_t num_regs, ptrdiff_t *starts, ptrdiff_t *ends) |
| 3128 | { | 3108 | { |
| 3129 | if (num_regs) | 3109 | if (num_regs) |
| 3130 | { | 3110 | { |
| @@ -3147,7 +3127,7 @@ re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs, | |||
| 3147 | doesn't let you say where to stop matching. */ | 3127 | doesn't let you say where to stop matching. */ |
| 3148 | 3128 | ||
| 3149 | ptrdiff_t | 3129 | ptrdiff_t |
| 3150 | re_search (struct re_pattern_buffer *bufp, const char *string, size_t size, | 3130 | re_search (struct re_pattern_buffer *bufp, const char *string, ptrdiff_t size, |
| 3151 | ptrdiff_t startpos, ptrdiff_t range, struct re_registers *regs) | 3131 | ptrdiff_t startpos, ptrdiff_t range, struct re_registers *regs) |
| 3152 | { | 3132 | { |
| 3153 | return re_search_2 (bufp, NULL, 0, string, size, startpos, range, | 3133 | return re_search_2 (bufp, NULL, 0, string, size, startpos, range, |
| @@ -3184,8 +3164,8 @@ re_search (struct re_pattern_buffer *bufp, const char *string, size_t size, | |||
| 3184 | stack overflow). */ | 3164 | stack overflow). */ |
| 3185 | 3165 | ||
| 3186 | ptrdiff_t | 3166 | ptrdiff_t |
| 3187 | re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1, | 3167 | re_search_2 (struct re_pattern_buffer *bufp, const char *str1, ptrdiff_t size1, |
| 3188 | const char *str2, size_t size2, | 3168 | const char *str2, ptrdiff_t size2, |
| 3189 | ptrdiff_t startpos, ptrdiff_t range, | 3169 | ptrdiff_t startpos, ptrdiff_t range, |
| 3190 | struct re_registers *regs, ptrdiff_t stop) | 3170 | struct re_registers *regs, ptrdiff_t stop) |
| 3191 | { | 3171 | { |
| @@ -3194,7 +3174,7 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1, | |||
| 3194 | re_char *string2 = (re_char *) str2; | 3174 | re_char *string2 = (re_char *) str2; |
| 3195 | char *fastmap = bufp->fastmap; | 3175 | char *fastmap = bufp->fastmap; |
| 3196 | Lisp_Object translate = bufp->translate; | 3176 | Lisp_Object translate = bufp->translate; |
| 3197 | size_t total_size = size1 + size2; | 3177 | ptrdiff_t total_size = size1 + size2; |
| 3198 | ptrdiff_t endpos = startpos + range; | 3178 | ptrdiff_t endpos = startpos + range; |
| 3199 | bool anchored_start; | 3179 | bool anchored_start; |
| 3200 | /* Nonzero if we are searching multibyte string. */ | 3180 | /* Nonzero if we are searching multibyte string. */ |
| @@ -3418,10 +3398,8 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1, | |||
| 3418 | 3398 | ||
| 3419 | /* Declarations and macros for re_match_2. */ | 3399 | /* Declarations and macros for re_match_2. */ |
| 3420 | 3400 | ||
| 3421 | static int bcmp_translate (re_char *s1, re_char *s2, | 3401 | static bool bcmp_translate (re_char *, re_char *, ptrdiff_t, |
| 3422 | ptrdiff_t len, | 3402 | Lisp_Object, bool); |
| 3423 | Lisp_Object translate, | ||
| 3424 | const int multibyte); | ||
| 3425 | 3403 | ||
| 3426 | /* This converts PTR, a pointer into one of the search strings 'string1' | 3404 | /* This converts PTR, a pointer into one of the search strings 'string1' |
| 3427 | and 'string2' into an offset from the beginning of that string. */ | 3405 | and 'string2' into an offset from the beginning of that string. */ |
| @@ -3565,8 +3543,9 @@ skip_noops (re_char *p, re_char *pend) | |||
| 3565 | character (i.e. without any translations). UNIBYTE denotes whether c is | 3543 | character (i.e. without any translations). UNIBYTE denotes whether c is |
| 3566 | unibyte or multibyte character. */ | 3544 | unibyte or multibyte character. */ |
| 3567 | static bool | 3545 | static bool |
| 3568 | execute_charset (re_char **pp, unsigned c, unsigned corig, bool unibyte) | 3546 | execute_charset (re_char **pp, int c, int corig, bool unibyte) |
| 3569 | { | 3547 | { |
| 3548 | eassume (0 <= c && 0 <= corig); | ||
| 3570 | re_char *p = *pp, *rtp = NULL; | 3549 | re_char *p = *pp, *rtp = NULL; |
| 3571 | bool not = (re_opcode_t) *p == charset_not; | 3550 | bool not = (re_opcode_t) *p == charset_not; |
| 3572 | 3551 | ||
| @@ -3626,8 +3605,8 @@ execute_charset (re_char **pp, unsigned c, unsigned corig, bool unibyte) | |||
| 3626 | return not; | 3605 | return not; |
| 3627 | } | 3606 | } |
| 3628 | 3607 | ||
| 3629 | /* Non-zero if "p1 matches something" implies "p2 fails". */ | 3608 | /* True if "p1 matches something" implies "p2 fails". */ |
| 3630 | static int | 3609 | static bool |
| 3631 | mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1, | 3610 | mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1, |
| 3632 | re_char *p2) | 3611 | re_char *p2) |
| 3633 | { | 3612 | { |
| @@ -3660,7 +3639,7 @@ mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1, | |||
| 3660 | if (skip_one_char (p1)) | 3639 | if (skip_one_char (p1)) |
| 3661 | { | 3640 | { |
| 3662 | DEBUG_PRINT (" End of pattern: fast loop.\n"); | 3641 | DEBUG_PRINT (" End of pattern: fast loop.\n"); |
| 3663 | return 1; | 3642 | return true; |
| 3664 | } | 3643 | } |
| 3665 | break; | 3644 | break; |
| 3666 | 3645 | ||
| @@ -3676,7 +3655,7 @@ mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1, | |||
| 3676 | if (c != RE_STRING_CHAR (p1 + 2, multibyte)) | 3655 | if (c != RE_STRING_CHAR (p1 + 2, multibyte)) |
| 3677 | { | 3656 | { |
| 3678 | DEBUG_PRINT (" '%c' != '%c' => fast loop.\n", c, p1[2]); | 3657 | DEBUG_PRINT (" '%c' != '%c' => fast loop.\n", c, p1[2]); |
| 3679 | return 1; | 3658 | return true; |
| 3680 | } | 3659 | } |
| 3681 | } | 3660 | } |
| 3682 | 3661 | ||
| @@ -3686,14 +3665,14 @@ mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1, | |||
| 3686 | if (!execute_charset (&p1, c, c, !multibyte || ASCII_CHAR_P (c))) | 3665 | if (!execute_charset (&p1, c, c, !multibyte || ASCII_CHAR_P (c))) |
| 3687 | { | 3666 | { |
| 3688 | DEBUG_PRINT (" No match => fast loop.\n"); | 3667 | DEBUG_PRINT (" No match => fast loop.\n"); |
| 3689 | return 1; | 3668 | return true; |
| 3690 | } | 3669 | } |
| 3691 | } | 3670 | } |
| 3692 | else if ((re_opcode_t) *p1 == anychar | 3671 | else if ((re_opcode_t) *p1 == anychar |
| 3693 | && c == '\n') | 3672 | && c == '\n') |
| 3694 | { | 3673 | { |
| 3695 | DEBUG_PRINT (" . != \\n => fast loop.\n"); | 3674 | DEBUG_PRINT (" . != \\n => fast loop.\n"); |
| 3696 | return 1; | 3675 | return true; |
| 3697 | } | 3676 | } |
| 3698 | } | 3677 | } |
| 3699 | break; | 3678 | break; |
| @@ -3736,7 +3715,7 @@ mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1, | |||
| 3736 | || idx == CHARSET_BITMAP_SIZE (p1)) | 3715 | || idx == CHARSET_BITMAP_SIZE (p1)) |
| 3737 | { | 3716 | { |
| 3738 | DEBUG_PRINT (" No match => fast loop.\n"); | 3717 | DEBUG_PRINT (" No match => fast loop.\n"); |
| 3739 | return 1; | 3718 | return true; |
| 3740 | } | 3719 | } |
| 3741 | } | 3720 | } |
| 3742 | else if ((re_opcode_t) *p1 == charset_not) | 3721 | else if ((re_opcode_t) *p1 == charset_not) |
| @@ -3753,7 +3732,7 @@ mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1, | |||
| 3753 | if (idx == p2[1]) | 3732 | if (idx == p2[1]) |
| 3754 | { | 3733 | { |
| 3755 | DEBUG_PRINT (" No match => fast loop.\n"); | 3734 | DEBUG_PRINT (" No match => fast loop.\n"); |
| 3756 | return 1; | 3735 | return true; |
| 3757 | } | 3736 | } |
| 3758 | } | 3737 | } |
| 3759 | } | 3738 | } |
| @@ -3807,7 +3786,7 @@ mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1, | |||
| 3807 | } | 3786 | } |
| 3808 | 3787 | ||
| 3809 | /* Safe default. */ | 3788 | /* Safe default. */ |
| 3810 | return 0; | 3789 | return false; |
| 3811 | } | 3790 | } |
| 3812 | 3791 | ||
| 3813 | 3792 | ||
| @@ -3826,9 +3805,10 @@ mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1, | |||
| 3826 | matched substring. */ | 3805 | matched substring. */ |
| 3827 | 3806 | ||
| 3828 | ptrdiff_t | 3807 | ptrdiff_t |
| 3829 | re_match_2 (struct re_pattern_buffer *bufp, const char *string1, | 3808 | re_match_2 (struct re_pattern_buffer *bufp, |
| 3830 | size_t size1, const char *string2, size_t size2, ptrdiff_t pos, | 3809 | char const *string1, ptrdiff_t size1, |
| 3831 | struct re_registers *regs, ptrdiff_t stop) | 3810 | char const *string2, ptrdiff_t size2, |
| 3811 | ptrdiff_t pos, struct re_registers *regs, ptrdiff_t stop) | ||
| 3832 | { | 3812 | { |
| 3833 | ptrdiff_t result; | 3813 | ptrdiff_t result; |
| 3834 | 3814 | ||
| @@ -3847,13 +3827,13 @@ re_match_2 (struct re_pattern_buffer *bufp, const char *string1, | |||
| 3847 | /* This is a separate function so that we can force an alloca cleanup | 3827 | /* This is a separate function so that we can force an alloca cleanup |
| 3848 | afterwards. */ | 3828 | afterwards. */ |
| 3849 | static ptrdiff_t | 3829 | static ptrdiff_t |
| 3850 | re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | 3830 | re_match_2_internal (struct re_pattern_buffer *bufp, |
| 3851 | size_t size1, re_char *string2, size_t size2, | 3831 | re_char *string1, ptrdiff_t size1, |
| 3832 | re_char *string2, ptrdiff_t size2, | ||
| 3852 | ptrdiff_t pos, struct re_registers *regs, ptrdiff_t stop) | 3833 | ptrdiff_t pos, struct re_registers *regs, ptrdiff_t stop) |
| 3853 | { | 3834 | { |
| 3854 | /* General temporaries. */ | 3835 | /* General temporaries. */ |
| 3855 | int mcnt; | 3836 | int mcnt; |
| 3856 | size_t reg; | ||
| 3857 | 3837 | ||
| 3858 | /* Just past the end of the corresponding string. */ | 3838 | /* Just past the end of the corresponding string. */ |
| 3859 | re_char *end1, *end2; | 3839 | re_char *end1, *end2; |
| @@ -3893,13 +3873,14 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 3893 | scanning the strings. */ | 3873 | scanning the strings. */ |
| 3894 | fail_stack_type fail_stack; | 3874 | fail_stack_type fail_stack; |
| 3895 | #ifdef DEBUG_COMPILES_ARGUMENTS | 3875 | #ifdef DEBUG_COMPILES_ARGUMENTS |
| 3896 | unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0; | 3876 | ptrdiff_t nfailure_points_pushed = 0, nfailure_points_popped = 0; |
| 3897 | #endif | 3877 | #endif |
| 3898 | 3878 | ||
| 3899 | /* We fill all the registers internally, independent of what we | 3879 | /* We fill all the registers internally, independent of what we |
| 3900 | return, for use in backreferences. The number here includes | 3880 | return, for use in backreferences. The number here includes |
| 3901 | an element for register zero. */ | 3881 | an element for register zero. */ |
| 3902 | size_t num_regs = bufp->re_nsub + 1; | 3882 | ptrdiff_t num_regs = bufp->re_nsub + 1; |
| 3883 | eassume (0 < num_regs); | ||
| 3903 | 3884 | ||
| 3904 | /* Information on the contents of registers. These are pointers into | 3885 | /* Information on the contents of registers. These are pointers into |
| 3905 | the input strings; they record just what was matched (on this | 3886 | the input strings; they record just what was matched (on this |
| @@ -3914,7 +3895,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 3914 | variables when we find a match better than any we've seen before. | 3895 | variables when we find a match better than any we've seen before. |
| 3915 | This happens as we backtrack through the failure points, which in | 3896 | This happens as we backtrack through the failure points, which in |
| 3916 | turn happens only if we have not yet matched the entire string. */ | 3897 | turn happens only if we have not yet matched the entire string. */ |
| 3917 | unsigned best_regs_set = false; | 3898 | bool best_regs_set = false; |
| 3918 | re_char **best_regstart UNINIT, **best_regend UNINIT; | 3899 | re_char **best_regstart UNINIT, **best_regend UNINIT; |
| 3919 | 3900 | ||
| 3920 | /* Logically, this is 'best_regend[0]'. But we don't want to have to | 3901 | /* Logically, this is 'best_regend[0]'. But we don't want to have to |
| @@ -3929,7 +3910,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 3929 | 3910 | ||
| 3930 | #ifdef DEBUG_COMPILES_ARGUMENTS | 3911 | #ifdef DEBUG_COMPILES_ARGUMENTS |
| 3931 | /* Counts the total number of registers pushed. */ | 3912 | /* Counts the total number of registers pushed. */ |
| 3932 | unsigned num_regs_pushed = 0; | 3913 | ptrdiff_t num_regs_pushed = 0; |
| 3933 | #endif | 3914 | #endif |
| 3934 | 3915 | ||
| 3935 | DEBUG_PRINT ("\n\nEntering re_match_2.\n"); | 3916 | DEBUG_PRINT ("\n\nEntering re_match_2.\n"); |
| @@ -3961,7 +3942,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 3961 | /* Initialize subexpression text positions to -1 to mark ones that no | 3942 | /* Initialize subexpression text positions to -1 to mark ones that no |
| 3962 | start_memory/stop_memory has been seen for. Also initialize the | 3943 | start_memory/stop_memory has been seen for. Also initialize the |
| 3963 | register information struct. */ | 3944 | register information struct. */ |
| 3964 | for (reg = 1; reg < num_regs; reg++) | 3945 | for (ptrdiff_t reg = 1; reg < num_regs; reg++) |
| 3965 | regstart[reg] = regend[reg] = NULL; | 3946 | regstart[reg] = regend[reg] = NULL; |
| 3966 | 3947 | ||
| 3967 | /* We move 'string1' into 'string2' if the latter's empty -- but not if | 3948 | /* We move 'string1' into 'string2' if the latter's empty -- but not if |
| @@ -4068,7 +4049,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 4068 | 4049 | ||
| 4069 | DEBUG_PRINT ("\nSAVING match as best so far.\n"); | 4050 | DEBUG_PRINT ("\nSAVING match as best so far.\n"); |
| 4070 | 4051 | ||
| 4071 | for (reg = 1; reg < num_regs; reg++) | 4052 | for (ptrdiff_t reg = 1; reg < num_regs; reg++) |
| 4072 | { | 4053 | { |
| 4073 | best_regstart[reg] = regstart[reg]; | 4054 | best_regstart[reg] = regstart[reg]; |
| 4074 | best_regend[reg] = regend[reg]; | 4055 | best_regend[reg] = regend[reg]; |
| @@ -4094,7 +4075,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 4094 | dend = ((d >= string1 && d <= end1) | 4075 | dend = ((d >= string1 && d <= end1) |
| 4095 | ? end_match_1 : end_match_2); | 4076 | ? end_match_1 : end_match_2); |
| 4096 | 4077 | ||
| 4097 | for (reg = 1; reg < num_regs; reg++) | 4078 | for (ptrdiff_t reg = 1; reg < num_regs; reg++) |
| 4098 | { | 4079 | { |
| 4099 | regstart[reg] = best_regstart[reg]; | 4080 | regstart[reg] = best_regstart[reg]; |
| 4100 | regend[reg] = best_regend[reg]; | 4081 | regend[reg] = best_regend[reg]; |
| @@ -4113,9 +4094,10 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 4113 | { /* No. So allocate them with malloc. We need one | 4094 | { /* No. So allocate them with malloc. We need one |
| 4114 | extra element beyond 'num_regs' for the '-1' marker | 4095 | extra element beyond 'num_regs' for the '-1' marker |
| 4115 | GNU code uses. */ | 4096 | GNU code uses. */ |
| 4116 | regs->num_regs = max (RE_NREGS, num_regs + 1); | 4097 | ptrdiff_t n = max (RE_NREGS, num_regs + 1); |
| 4117 | regs->start = TALLOC (regs->num_regs, ptrdiff_t); | 4098 | regs->start = xnmalloc (n, sizeof *regs->start); |
| 4118 | regs->end = TALLOC (regs->num_regs, ptrdiff_t); | 4099 | regs->end = xnmalloc (n, sizeof *regs->end); |
| 4100 | regs->num_regs = n; | ||
| 4119 | bufp->regs_allocated = REGS_REALLOCATE; | 4101 | bufp->regs_allocated = REGS_REALLOCATE; |
| 4120 | } | 4102 | } |
| 4121 | else if (bufp->regs_allocated == REGS_REALLOCATE) | 4103 | else if (bufp->regs_allocated == REGS_REALLOCATE) |
| @@ -4124,9 +4106,11 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 4124 | leave it alone. */ | 4106 | leave it alone. */ |
| 4125 | if (regs->num_regs < num_regs + 1) | 4107 | if (regs->num_regs < num_regs + 1) |
| 4126 | { | 4108 | { |
| 4127 | regs->num_regs = num_regs + 1; | 4109 | ptrdiff_t n = num_regs + 1; |
| 4128 | RETALLOC (regs->start, regs->num_regs, ptrdiff_t); | 4110 | regs->start |
| 4129 | RETALLOC (regs->end, regs->num_regs, ptrdiff_t); | 4111 | = xnrealloc (regs->start, n, sizeof *regs->start); |
| 4112 | regs->end = xnrealloc (regs->end, n, sizeof *regs->end); | ||
| 4113 | regs->num_regs = n; | ||
| 4130 | } | 4114 | } |
| 4131 | } | 4115 | } |
| 4132 | else | 4116 | else |
| @@ -4141,9 +4125,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 4141 | regs->end[0] = POINTER_TO_OFFSET (d); | 4125 | regs->end[0] = POINTER_TO_OFFSET (d); |
| 4142 | } | 4126 | } |
| 4143 | 4127 | ||
| 4144 | /* Go through the first 'min (num_regs, regs->num_regs)' | 4128 | for (ptrdiff_t reg = 1; reg < num_regs; reg++) |
| 4145 | registers, since that is all we initialized. */ | ||
| 4146 | for (reg = 1; reg < min (num_regs, regs->num_regs); reg++) | ||
| 4147 | { | 4129 | { |
| 4148 | if (REG_UNSET (regstart[reg]) || REG_UNSET (regend[reg])) | 4130 | if (REG_UNSET (regstart[reg]) || REG_UNSET (regend[reg])) |
| 4149 | regs->start[reg] = regs->end[reg] = -1; | 4131 | regs->start[reg] = regs->end[reg] = -1; |
| @@ -4159,14 +4141,14 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 4159 | we (re)allocated the registers, this is the case, | 4141 | we (re)allocated the registers, this is the case, |
| 4160 | because we always allocate enough to have at least one | 4142 | because we always allocate enough to have at least one |
| 4161 | -1 at the end. */ | 4143 | -1 at the end. */ |
| 4162 | for (reg = num_regs; reg < regs->num_regs; reg++) | 4144 | for (ptrdiff_t reg = num_regs; reg < regs->num_regs; reg++) |
| 4163 | regs->start[reg] = regs->end[reg] = -1; | 4145 | regs->start[reg] = regs->end[reg] = -1; |
| 4164 | } | 4146 | } |
| 4165 | 4147 | ||
| 4166 | DEBUG_PRINT ("%u failure points pushed, %u popped (%u remain).\n", | 4148 | DEBUG_PRINT ("%td failure points pushed, %td popped (%td remain).\n", |
| 4167 | nfailure_points_pushed, nfailure_points_popped, | 4149 | nfailure_points_pushed, nfailure_points_popped, |
| 4168 | nfailure_points_pushed - nfailure_points_popped); | 4150 | nfailure_points_pushed - nfailure_points_popped); |
| 4169 | DEBUG_PRINT ("%u registers pushed.\n", num_regs_pushed); | 4151 | DEBUG_PRINT ("%td registers pushed.\n", num_regs_pushed); |
| 4170 | 4152 | ||
| 4171 | ptrdiff_t dcnt = POINTER_TO_OFFSET (d) - pos; | 4153 | ptrdiff_t dcnt = POINTER_TO_OFFSET (d) - pos; |
| 4172 | 4154 | ||
| @@ -4291,9 +4273,6 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 4291 | case charset: | 4273 | case charset: |
| 4292 | case charset_not: | 4274 | case charset_not: |
| 4293 | { | 4275 | { |
| 4294 | register unsigned int c, corig; | ||
| 4295 | int len; | ||
| 4296 | |||
| 4297 | /* Whether matching against a unibyte character. */ | 4276 | /* Whether matching against a unibyte character. */ |
| 4298 | bool unibyte_char = false; | 4277 | bool unibyte_char = false; |
| 4299 | 4278 | ||
| @@ -4301,7 +4280,9 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 4301 | (re_opcode_t) *(p - 1) == charset_not ? "_not" : ""); | 4280 | (re_opcode_t) *(p - 1) == charset_not ? "_not" : ""); |
| 4302 | 4281 | ||
| 4303 | PREFETCH (); | 4282 | PREFETCH (); |
| 4304 | corig = c = RE_STRING_CHAR_AND_LENGTH (d, len, target_multibyte); | 4283 | int len; |
| 4284 | int corig = RE_STRING_CHAR_AND_LENGTH (d, len, target_multibyte); | ||
| 4285 | int c = corig; | ||
| 4305 | if (target_multibyte) | 4286 | if (target_multibyte) |
| 4306 | { | 4287 | { |
| 4307 | int c1; | 4288 | int c1; |
| @@ -4369,7 +4350,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 4369 | /* Strictly speaking, there should be code such as: | 4350 | /* Strictly speaking, there should be code such as: |
| 4370 | 4351 | ||
| 4371 | eassert (REG_UNSET (regend[*p])); | 4352 | eassert (REG_UNSET (regend[*p])); |
| 4372 | PUSH_FAILURE_REGSTOP ((unsigned int)*p); | 4353 | PUSH_FAILURE_REGSTOP (*p); |
| 4373 | 4354 | ||
| 4374 | But the only info to be pushed is regend[*p] and it is known to | 4355 | But the only info to be pushed is regend[*p] and it is known to |
| 4375 | be UNSET, so there really isn't anything to push. | 4356 | be UNSET, so there really isn't anything to push. |
| @@ -4548,7 +4529,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 4548 | 4529 | ||
| 4549 | eassert ((re_opcode_t)p[-4] == no_op); | 4530 | eassert ((re_opcode_t)p[-4] == no_op); |
| 4550 | { | 4531 | { |
| 4551 | int cycle = 0; | 4532 | bool cycle = false; |
| 4552 | CHECK_INFINITE_LOOP (p - 4, d); | 4533 | CHECK_INFINITE_LOOP (p - 4, d); |
| 4553 | if (!cycle) | 4534 | if (!cycle) |
| 4554 | /* If there's a cycle, just continue without pushing | 4535 | /* If there's a cycle, just continue without pushing |
| @@ -4567,7 +4548,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 4567 | DEBUG_PRINT ("EXECUTING on_failure_jump_loop %d (to %p):\n", | 4548 | DEBUG_PRINT ("EXECUTING on_failure_jump_loop %d (to %p):\n", |
| 4568 | mcnt, p + mcnt); | 4549 | mcnt, p + mcnt); |
| 4569 | { | 4550 | { |
| 4570 | int cycle = 0; | 4551 | bool cycle = false; |
| 4571 | CHECK_INFINITE_LOOP (p - 3, d); | 4552 | CHECK_INFINITE_LOOP (p - 3, d); |
| 4572 | if (cycle) | 4553 | if (cycle) |
| 4573 | /* If there's a cycle, get out of the loop, as if the matching | 4554 | /* If there's a cycle, get out of the loop, as if the matching |
| @@ -5025,12 +5006,12 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 5025 | 5006 | ||
| 5026 | /* Subroutine definitions for re_match_2. */ | 5007 | /* Subroutine definitions for re_match_2. */ |
| 5027 | 5008 | ||
| 5028 | /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN | 5009 | /* Return true if TRANSLATE[S1] and TRANSLATE[S2] are not identical |
| 5029 | bytes; nonzero otherwise. */ | 5010 | for LEN bytes. */ |
| 5030 | 5011 | ||
| 5031 | static int | 5012 | static bool |
| 5032 | bcmp_translate (re_char *s1, re_char *s2, ptrdiff_t len, | 5013 | bcmp_translate (re_char *s1, re_char *s2, ptrdiff_t len, |
| 5033 | Lisp_Object translate, int target_multibyte) | 5014 | Lisp_Object translate, bool target_multibyte) |
| 5034 | { | 5015 | { |
| 5035 | re_char *p1 = s1, *p2 = s2; | 5016 | re_char *p1 = s1, *p2 = s2; |
| 5036 | re_char *p1_end = s1 + len; | 5017 | re_char *p1_end = s1 + len; |
| @@ -5048,15 +5029,12 @@ bcmp_translate (re_char *s1, re_char *s2, ptrdiff_t len, | |||
| 5048 | 5029 | ||
| 5049 | if (RE_TRANSLATE (translate, p1_ch) | 5030 | if (RE_TRANSLATE (translate, p1_ch) |
| 5050 | != RE_TRANSLATE (translate, p2_ch)) | 5031 | != RE_TRANSLATE (translate, p2_ch)) |
| 5051 | return 1; | 5032 | return true; |
| 5052 | 5033 | ||
| 5053 | p1 += p1_charlen, p2 += p2_charlen; | 5034 | p1 += p1_charlen, p2 += p2_charlen; |
| 5054 | } | 5035 | } |
| 5055 | 5036 | ||
| 5056 | if (p1 != p1_end || p2 != p2_end) | 5037 | return p1 != p1_end || p2 != p2_end; |
| 5057 | return 1; | ||
| 5058 | |||
| 5059 | return 0; | ||
| 5060 | } | 5038 | } |
| 5061 | 5039 | ||
| 5062 | /* Entry points for GNU code. */ | 5040 | /* Entry points for GNU code. */ |
| @@ -5071,7 +5049,7 @@ bcmp_translate (re_char *s1, re_char *s2, ptrdiff_t len, | |||
| 5071 | We call regex_compile to do the actual compilation. */ | 5049 | We call regex_compile to do the actual compilation. */ |
| 5072 | 5050 | ||
| 5073 | const char * | 5051 | const char * |
| 5074 | re_compile_pattern (const char *pattern, size_t length, | 5052 | re_compile_pattern (const char *pattern, ptrdiff_t length, |
| 5075 | bool posix_backtracking, const char *whitespace_regexp, | 5053 | bool posix_backtracking, const char *whitespace_regexp, |
| 5076 | struct re_pattern_buffer *bufp) | 5054 | struct re_pattern_buffer *bufp) |
| 5077 | { | 5055 | { |
diff --git a/src/regex-emacs.h b/src/regex-emacs.h index 5a52135bcda..95f743dc2fb 100644 --- a/src/regex-emacs.h +++ b/src/regex-emacs.h | |||
| @@ -26,7 +26,7 @@ | |||
| 26 | uses struct re_registers. */ | 26 | uses struct re_registers. */ |
| 27 | struct re_registers | 27 | struct re_registers |
| 28 | { | 28 | { |
| 29 | unsigned num_regs; | 29 | ptrdiff_t num_regs; |
| 30 | ptrdiff_t *start; | 30 | ptrdiff_t *start; |
| 31 | ptrdiff_t *end; | 31 | ptrdiff_t *end; |
| 32 | }; | 32 | }; |
| @@ -50,7 +50,7 @@ struct re_registers | |||
| 50 | extern Lisp_Object re_match_object; | 50 | extern Lisp_Object re_match_object; |
| 51 | 51 | ||
| 52 | /* Roughly the maximum number of failure points on the stack. */ | 52 | /* Roughly the maximum number of failure points on the stack. */ |
| 53 | extern size_t emacs_re_max_failures; | 53 | extern ptrdiff_t emacs_re_max_failures; |
| 54 | 54 | ||
| 55 | /* Amount of memory that we can safely stack allocate. */ | 55 | /* Amount of memory that we can safely stack allocate. */ |
| 56 | extern ptrdiff_t emacs_re_safe_alloca; | 56 | extern ptrdiff_t emacs_re_safe_alloca; |
| @@ -69,10 +69,10 @@ struct re_pattern_buffer | |||
| 69 | unsigned char *buffer; | 69 | unsigned char *buffer; |
| 70 | 70 | ||
| 71 | /* Number of bytes to which 'buffer' points. */ | 71 | /* Number of bytes to which 'buffer' points. */ |
| 72 | size_t allocated; | 72 | ptrdiff_t allocated; |
| 73 | 73 | ||
| 74 | /* Number of bytes actually used in 'buffer'. */ | 74 | /* Number of bytes actually used in 'buffer'. */ |
| 75 | size_t used; | 75 | ptrdiff_t used; |
| 76 | 76 | ||
| 77 | /* Charset of unibyte characters at compiling time. */ | 77 | /* Charset of unibyte characters at compiling time. */ |
| 78 | int charset_unibyte; | 78 | int charset_unibyte; |
| @@ -89,13 +89,13 @@ struct re_pattern_buffer | |||
| 89 | Lisp_Object translate; | 89 | Lisp_Object translate; |
| 90 | 90 | ||
| 91 | /* Number of subexpressions found by the compiler. */ | 91 | /* Number of subexpressions found by the compiler. */ |
| 92 | size_t re_nsub; | 92 | ptrdiff_t re_nsub; |
| 93 | 93 | ||
| 94 | /* True if and only if this pattern can match the empty string. | 94 | /* True if and only if this pattern can match the empty string. |
| 95 | Well, in truth it's used only in 're_search_2', to see | 95 | Well, in truth it's used only in 're_search_2', to see |
| 96 | whether or not we should use the fastmap, so we don't set | 96 | whether or not we should use the fastmap, so we don't set |
| 97 | this absolutely perfectly; see 're_compile_fastmap'. */ | 97 | this absolutely perfectly; see 're_compile_fastmap'. */ |
| 98 | unsigned can_be_null : 1; | 98 | bool_bf can_be_null : 1; |
| 99 | 99 | ||
| 100 | /* If REGS_UNALLOCATED, allocate space in the 'regs' structure | 100 | /* If REGS_UNALLOCATED, allocate space in the 'regs' structure |
| 101 | for 'max (RE_NREGS, re_nsub + 1)' groups. | 101 | for 'max (RE_NREGS, re_nsub + 1)' groups. |
| @@ -105,19 +105,19 @@ struct re_pattern_buffer | |||
| 105 | 105 | ||
| 106 | /* Set to false when 'regex_compile' compiles a pattern; set to true | 106 | /* Set to false when 'regex_compile' compiles a pattern; set to true |
| 107 | by 're_compile_fastmap' if it updates the fastmap. */ | 107 | by 're_compile_fastmap' if it updates the fastmap. */ |
| 108 | unsigned fastmap_accurate : 1; | 108 | bool_bf fastmap_accurate : 1; |
| 109 | 109 | ||
| 110 | /* If true, the compilation of the pattern had to look up the syntax table, | 110 | /* If true, the compilation of the pattern had to look up the syntax table, |
| 111 | so the compiled pattern is valid for the current syntax table only. */ | 111 | so the compiled pattern is valid for the current syntax table only. */ |
| 112 | unsigned used_syntax : 1; | 112 | bool_bf used_syntax : 1; |
| 113 | 113 | ||
| 114 | /* If true, multi-byte form in the regexp pattern should be | 114 | /* If true, multi-byte form in the regexp pattern should be |
| 115 | recognized as a multibyte character. */ | 115 | recognized as a multibyte character. */ |
| 116 | unsigned multibyte : 1; | 116 | bool_bf multibyte : 1; |
| 117 | 117 | ||
| 118 | /* If true, multi-byte form in the target of match should be | 118 | /* If true, multi-byte form in the target of match should be |
| 119 | recognized as a multibyte character. */ | 119 | recognized as a multibyte character. */ |
| 120 | unsigned target_multibyte : 1; | 120 | bool_bf target_multibyte : 1; |
| 121 | }; | 121 | }; |
| 122 | 122 | ||
| 123 | /* Declarations for routines. */ | 123 | /* Declarations for routines. */ |
| @@ -125,7 +125,7 @@ struct re_pattern_buffer | |||
| 125 | /* Compile the regular expression PATTERN, with length LENGTH | 125 | /* Compile the regular expression PATTERN, with length LENGTH |
| 126 | and syntax given by the global 're_syntax_options', into the buffer | 126 | and syntax given by the global 're_syntax_options', into the buffer |
| 127 | BUFFER. Return NULL if successful, and an error string if not. */ | 127 | BUFFER. Return NULL if successful, and an error string if not. */ |
| 128 | extern const char *re_compile_pattern (const char *pattern, size_t length, | 128 | extern const char *re_compile_pattern (const char *pattern, ptrdiff_t length, |
| 129 | bool posix_backtracking, | 129 | bool posix_backtracking, |
| 130 | const char *whitespace_regexp, | 130 | const char *whitespace_regexp, |
| 131 | struct re_pattern_buffer *buffer); | 131 | struct re_pattern_buffer *buffer); |
| @@ -137,7 +137,7 @@ extern const char *re_compile_pattern (const char *pattern, size_t length, | |||
| 137 | match, or -2 for an internal error. Also return register | 137 | match, or -2 for an internal error. Also return register |
| 138 | information in REGS (if REGS is non-null). */ | 138 | information in REGS (if REGS is non-null). */ |
| 139 | extern ptrdiff_t re_search (struct re_pattern_buffer *buffer, | 139 | extern ptrdiff_t re_search (struct re_pattern_buffer *buffer, |
| 140 | const char *string, size_t length, | 140 | const char *string, ptrdiff_t length, |
| 141 | ptrdiff_t start, ptrdiff_t range, | 141 | ptrdiff_t start, ptrdiff_t range, |
| 142 | struct re_registers *regs); | 142 | struct re_registers *regs); |
| 143 | 143 | ||
| @@ -145,8 +145,8 @@ extern ptrdiff_t re_search (struct re_pattern_buffer *buffer, | |||
| 145 | /* Like 're_search', but search in the concatenation of STRING1 and | 145 | /* Like 're_search', but search in the concatenation of STRING1 and |
| 146 | STRING2. Also, stop searching at index START + STOP. */ | 146 | STRING2. Also, stop searching at index START + STOP. */ |
| 147 | extern ptrdiff_t re_search_2 (struct re_pattern_buffer *buffer, | 147 | extern ptrdiff_t re_search_2 (struct re_pattern_buffer *buffer, |
| 148 | const char *string1, size_t length1, | 148 | const char *string1, ptrdiff_t length1, |
| 149 | const char *string2, size_t length2, | 149 | const char *string2, ptrdiff_t length2, |
| 150 | ptrdiff_t start, ptrdiff_t range, | 150 | ptrdiff_t start, ptrdiff_t range, |
| 151 | struct re_registers *regs, | 151 | struct re_registers *regs, |
| 152 | ptrdiff_t stop); | 152 | ptrdiff_t stop); |
| @@ -155,8 +155,8 @@ extern ptrdiff_t re_search_2 (struct re_pattern_buffer *buffer, | |||
| 155 | /* Like 're_search_2', but return how many characters in STRING the regexp | 155 | /* Like 're_search_2', but return how many characters in STRING the regexp |
| 156 | in BUFFER matched, starting at position START. */ | 156 | in BUFFER matched, starting at position START. */ |
| 157 | extern ptrdiff_t re_match_2 (struct re_pattern_buffer *buffer, | 157 | extern ptrdiff_t re_match_2 (struct re_pattern_buffer *buffer, |
| 158 | const char *string1, size_t length1, | 158 | const char *string1, ptrdiff_t length1, |
| 159 | const char *string2, size_t length2, | 159 | const char *string2, ptrdiff_t length2, |
| 160 | ptrdiff_t start, struct re_registers *regs, | 160 | ptrdiff_t start, struct re_registers *regs, |
| 161 | ptrdiff_t stop); | 161 | ptrdiff_t stop); |
| 162 | 162 | ||
| @@ -175,7 +175,7 @@ extern ptrdiff_t re_match_2 (struct re_pattern_buffer *buffer, | |||
| 175 | freeing the old data. */ | 175 | freeing the old data. */ |
| 176 | extern void re_set_registers (struct re_pattern_buffer *buffer, | 176 | extern void re_set_registers (struct re_pattern_buffer *buffer, |
| 177 | struct re_registers *regs, | 177 | struct re_registers *regs, |
| 178 | unsigned num_regs, | 178 | ptrdiff_t num_regs, |
| 179 | ptrdiff_t *starts, ptrdiff_t *ends); | 179 | ptrdiff_t *starts, ptrdiff_t *ends); |
| 180 | 180 | ||
| 181 | /* Character classes. */ | 181 | /* Character classes. */ |
| @@ -192,6 +192,6 @@ typedef enum { RECC_ERROR = 0, | |||
| 192 | 192 | ||
| 193 | extern bool re_iswctype (int ch, re_wctype_t cc); | 193 | extern bool re_iswctype (int ch, re_wctype_t cc); |
| 194 | extern re_wctype_t re_wctype_parse (const unsigned char **strp, | 194 | extern re_wctype_t re_wctype_parse (const unsigned char **strp, |
| 195 | unsigned limit); | 195 | ptrdiff_t limit); |
| 196 | 196 | ||
| 197 | #endif /* EMACS_REGEX_H */ | 197 | #endif /* EMACS_REGEX_H */ |
diff --git a/src/search.c b/src/search.c index a1e0b0976ed..e55aa767f11 100644 --- a/src/search.c +++ b/src/search.c | |||
| @@ -59,31 +59,6 @@ static struct regexp_cache searchbufs[REGEXP_CACHE_SIZE]; | |||
| 59 | /* The head of the linked list; points to the most recently used buffer. */ | 59 | /* The head of the linked list; points to the most recently used buffer. */ |
| 60 | static struct regexp_cache *searchbuf_head; | 60 | static struct regexp_cache *searchbuf_head; |
| 61 | 61 | ||
| 62 | |||
| 63 | /* Every call to re_search, etc., must pass &search_regs as the regs | ||
| 64 | argument unless you can show it is unnecessary (i.e., if re_search | ||
| 65 | is certainly going to be called again before region-around-match | ||
| 66 | can be called). | ||
| 67 | |||
| 68 | Since the registers are now dynamically allocated, we need to make | ||
| 69 | sure not to refer to the Nth register before checking that it has | ||
| 70 | been allocated by checking search_regs.num_regs. | ||
| 71 | |||
| 72 | The regex code keeps track of whether it has allocated the search | ||
| 73 | buffer using bits in the re_pattern_buffer. This means that whenever | ||
| 74 | you compile a new pattern, it completely forgets whether it has | ||
| 75 | allocated any registers, and will allocate new registers the next | ||
| 76 | time you call a searching or matching function. Therefore, we need | ||
| 77 | to call re_set_registers after compiling a new pattern or after | ||
| 78 | setting the match registers, so that the regex functions will be | ||
| 79 | able to free or re-allocate it properly. */ | ||
| 80 | /* static struct re_registers search_regs; */ | ||
| 81 | |||
| 82 | /* The buffer in which the last search was performed, or | ||
| 83 | Qt if the last search was done in a string; | ||
| 84 | Qnil if no searching has been done yet. */ | ||
| 85 | /* static Lisp_Object last_thing_searched; */ | ||
| 86 | |||
| 87 | static void set_search_regs (ptrdiff_t, ptrdiff_t); | 62 | static void set_search_regs (ptrdiff_t, ptrdiff_t); |
| 88 | static void save_search_regs (void); | 63 | static void save_search_regs (void); |
| 89 | static EMACS_INT simple_search (EMACS_INT, unsigned char *, ptrdiff_t, | 64 | static EMACS_INT simple_search (EMACS_INT, unsigned char *, ptrdiff_t, |
| @@ -2763,7 +2738,7 @@ since only regular expressions have distinguished subexpressions. */) | |||
| 2763 | error out since otherwise this will result in confusing bugs. */ | 2738 | error out since otherwise this will result in confusing bugs. */ |
| 2764 | ptrdiff_t sub_start = search_regs.start[sub]; | 2739 | ptrdiff_t sub_start = search_regs.start[sub]; |
| 2765 | ptrdiff_t sub_end = search_regs.end[sub]; | 2740 | ptrdiff_t sub_end = search_regs.end[sub]; |
| 2766 | unsigned num_regs = search_regs.num_regs; | 2741 | ptrdiff_t num_regs = search_regs.num_regs; |
| 2767 | newpoint = search_regs.start[sub] + SCHARS (newtext); | 2742 | newpoint = search_regs.start[sub] + SCHARS (newtext); |
| 2768 | 2743 | ||
| 2769 | /* Replace the old text with the new in the cleanest possible way. */ | 2744 | /* Replace the old text with the new in the cleanest possible way. */ |
| @@ -3079,12 +3054,6 @@ If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */) | |||
| 3079 | return Qnil; | 3054 | return Qnil; |
| 3080 | } | 3055 | } |
| 3081 | 3056 | ||
| 3082 | /* If true the match data have been saved in saved_search_regs | ||
| 3083 | during the execution of a sentinel or filter. */ | ||
| 3084 | /* static bool search_regs_saved; */ | ||
| 3085 | /* static struct re_registers saved_search_regs; */ | ||
| 3086 | /* static Lisp_Object saved_last_thing_searched; */ | ||
| 3087 | |||
| 3088 | /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data | 3057 | /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data |
| 3089 | if asynchronous code (filter or sentinel) is running. */ | 3058 | if asynchronous code (filter or sentinel) is running. */ |
| 3090 | static void | 3059 | static void |
diff --git a/src/thread.h b/src/thread.h index cb7e60f21f8..1856fddf4cd 100644 --- a/src/thread.h +++ b/src/thread.h | |||
| @@ -111,8 +111,8 @@ struct thread_state | |||
| 111 | struct buffer *m_current_buffer; | 111 | struct buffer *m_current_buffer; |
| 112 | #define current_buffer (current_thread->m_current_buffer) | 112 | #define current_buffer (current_thread->m_current_buffer) |
| 113 | 113 | ||
| 114 | /* Every call to re_match_2, etc., must pass &search_regs as the regs | 114 | /* Every call to re_search, etc., must pass &search_regs as the regs |
| 115 | argument unless you can show it is unnecessary (i.e., if re_match_2 | 115 | argument unless you can show it is unnecessary (i.e., if re_search |
| 116 | is certainly going to be called again before region-around-match | 116 | is certainly going to be called again before region-around-match |
| 117 | can be called). | 117 | can be called). |
| 118 | 118 | ||