diff options
Diffstat (limited to 'src/search.c')
| -rw-r--r-- | src/search.c | 355 |
1 files changed, 192 insertions, 163 deletions
diff --git a/src/search.c b/src/search.c index aacdbe33eef..ece346ecd06 100644 --- a/src/search.c +++ b/src/search.c | |||
| @@ -1,7 +1,7 @@ | |||
| 1 | /* String search routines for GNU Emacs. | 1 | /* String search routines for GNU Emacs. |
| 2 | 2 | ||
| 3 | Copyright (C) 1985-1987, 1993-1994, 1997-1999, 2001-2012 | 3 | Copyright (C) 1985-1987, 1993-1994, 1997-1999, 2001-2013 Free Software |
| 4 | Free Software Foundation, Inc. | 4 | Foundation, Inc. |
| 5 | 5 | ||
| 6 | This file is part of GNU Emacs. | 6 | This file is part of GNU Emacs. |
| 7 | 7 | ||
| @@ -49,8 +49,8 @@ struct regexp_cache | |||
| 49 | Lisp_Object syntax_table; | 49 | Lisp_Object syntax_table; |
| 50 | struct re_pattern_buffer buf; | 50 | struct re_pattern_buffer buf; |
| 51 | char fastmap[0400]; | 51 | char fastmap[0400]; |
| 52 | /* Nonzero means regexp was compiled to do full POSIX backtracking. */ | 52 | /* True means regexp was compiled to do full POSIX backtracking. */ |
| 53 | char posix; | 53 | bool posix; |
| 54 | }; | 54 | }; |
| 55 | 55 | ||
| 56 | /* The instances of that struct. */ | 56 | /* The instances of that struct. */ |
| @@ -100,7 +100,7 @@ static EMACS_INT boyer_moore (EMACS_INT, unsigned char *, ptrdiff_t, | |||
| 100 | ptrdiff_t, int); | 100 | ptrdiff_t, int); |
| 101 | static EMACS_INT search_buffer (Lisp_Object, ptrdiff_t, ptrdiff_t, | 101 | static EMACS_INT search_buffer (Lisp_Object, ptrdiff_t, ptrdiff_t, |
| 102 | ptrdiff_t, ptrdiff_t, EMACS_INT, int, | 102 | ptrdiff_t, ptrdiff_t, EMACS_INT, int, |
| 103 | Lisp_Object, Lisp_Object, int); | 103 | Lisp_Object, Lisp_Object, bool); |
| 104 | 104 | ||
| 105 | static _Noreturn void | 105 | static _Noreturn void |
| 106 | matcher_overflow (void) | 106 | matcher_overflow (void) |
| @@ -112,13 +112,14 @@ matcher_overflow (void) | |||
| 112 | PATTERN is the pattern to compile. | 112 | PATTERN is the pattern to compile. |
| 113 | CP is the place to put the result. | 113 | CP is the place to put the result. |
| 114 | TRANSLATE is a translation table for ignoring case, or nil for none. | 114 | TRANSLATE is a translation table for ignoring case, or nil for none. |
| 115 | POSIX is nonzero if we want full backtracking (POSIX style) | 115 | POSIX is true if we want full backtracking (POSIX style) for this pattern. |
| 116 | for this pattern. 0 means backtrack only enough to get a valid match. | 116 | False means backtrack only enough to get a valid match. |
| 117 | 117 | ||
| 118 | The behavior also depends on Vsearch_spaces_regexp. */ | 118 | The behavior also depends on Vsearch_spaces_regexp. */ |
| 119 | 119 | ||
| 120 | static void | 120 | static void |
| 121 | compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern, Lisp_Object translate, int posix) | 121 | compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern, |
| 122 | Lisp_Object translate, bool posix) | ||
| 122 | { | 123 | { |
| 123 | char *val; | 124 | char *val; |
| 124 | reg_syntax_t old; | 125 | reg_syntax_t old; |
| @@ -205,11 +206,12 @@ clear_regexp_cache (void) | |||
| 205 | values that will result from matching this pattern. | 206 | values that will result from matching this pattern. |
| 206 | If it is 0, we should compile the pattern not to record any | 207 | If it is 0, we should compile the pattern not to record any |
| 207 | subexpression bounds. | 208 | subexpression bounds. |
| 208 | POSIX is nonzero if we want full backtracking (POSIX style) | 209 | POSIX is true if we want full backtracking (POSIX style) for this pattern. |
| 209 | for this pattern. 0 means backtrack only enough to get a valid match. */ | 210 | False means backtrack only enough to get a valid match. */ |
| 210 | 211 | ||
| 211 | struct re_pattern_buffer * | 212 | struct re_pattern_buffer * |
| 212 | compile_pattern (Lisp_Object pattern, struct re_registers *regp, Lisp_Object translate, int posix, int multibyte) | 213 | compile_pattern (Lisp_Object pattern, struct re_registers *regp, |
| 214 | Lisp_Object translate, bool posix, bool multibyte) | ||
| 213 | { | 215 | { |
| 214 | struct regexp_cache *cp, **cpp; | 216 | struct regexp_cache *cp, **cpp; |
| 215 | 217 | ||
| @@ -266,7 +268,7 @@ compile_pattern (Lisp_Object pattern, struct re_registers *regp, Lisp_Object tra | |||
| 266 | 268 | ||
| 267 | 269 | ||
| 268 | static Lisp_Object | 270 | static Lisp_Object |
| 269 | looking_at_1 (Lisp_Object string, int posix) | 271 | looking_at_1 (Lisp_Object string, bool posix) |
| 270 | { | 272 | { |
| 271 | Lisp_Object val; | 273 | Lisp_Object val; |
| 272 | unsigned char *p1, *p2; | 274 | unsigned char *p1, *p2; |
| @@ -324,7 +326,7 @@ looking_at_1 (Lisp_Object string, int posix) | |||
| 324 | if (i == -2) | 326 | if (i == -2) |
| 325 | matcher_overflow (); | 327 | matcher_overflow (); |
| 326 | 328 | ||
| 327 | val = (0 <= i ? Qt : Qnil); | 329 | val = (i >= 0 ? Qt : Qnil); |
| 328 | if (NILP (Vinhibit_changing_match_data) && i >= 0) | 330 | if (NILP (Vinhibit_changing_match_data) && i >= 0) |
| 329 | for (i = 0; i < search_regs.num_regs; i++) | 331 | for (i = 0; i < search_regs.num_regs; i++) |
| 330 | if (search_regs.start[i] >= 0) | 332 | if (search_regs.start[i] >= 0) |
| @@ -364,7 +366,8 @@ data if you want to preserve them. */) | |||
| 364 | } | 366 | } |
| 365 | 367 | ||
| 366 | static Lisp_Object | 368 | static Lisp_Object |
| 367 | string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start, int posix) | 369 | string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start, |
| 370 | bool posix) | ||
| 368 | { | 371 | { |
| 369 | ptrdiff_t val; | 372 | ptrdiff_t val; |
| 370 | struct re_pattern_buffer *bufp; | 373 | struct re_pattern_buffer *bufp; |
| @@ -534,9 +537,10 @@ fast_string_match_ignore_case (Lisp_Object regexp, Lisp_Object string) | |||
| 534 | data. */ | 537 | data. */ |
| 535 | 538 | ||
| 536 | ptrdiff_t | 539 | ptrdiff_t |
| 537 | fast_looking_at (Lisp_Object regexp, ptrdiff_t pos, ptrdiff_t pos_byte, ptrdiff_t limit, ptrdiff_t limit_byte, Lisp_Object string) | 540 | fast_looking_at (Lisp_Object regexp, ptrdiff_t pos, ptrdiff_t pos_byte, |
| 541 | ptrdiff_t limit, ptrdiff_t limit_byte, Lisp_Object string) | ||
| 538 | { | 542 | { |
| 539 | int multibyte; | 543 | bool multibyte; |
| 540 | struct re_pattern_buffer *buf; | 544 | struct re_pattern_buffer *buf; |
| 541 | unsigned char *p1, *p2; | 545 | unsigned char *p1, *p2; |
| 542 | ptrdiff_t s1, s2; | 546 | ptrdiff_t s1, s2; |
| @@ -619,7 +623,7 @@ newline_cache_on_off (struct buffer *buf) | |||
| 619 | } | 623 | } |
| 620 | 624 | ||
| 621 | 625 | ||
| 622 | /* Search for COUNT instances of the character TARGET between START and END. | 626 | /* Search for COUNT newlines between START/START_BYTE and END/END_BYTE. |
| 623 | 627 | ||
| 624 | If COUNT is positive, search forwards; END must be >= START. | 628 | If COUNT is positive, search forwards; END must be >= START. |
| 625 | If COUNT is negative, search backwards for the -COUNTth instance; | 629 | If COUNT is negative, search backwards for the -COUNTth instance; |
| @@ -634,14 +638,18 @@ newline_cache_on_off (struct buffer *buf) | |||
| 634 | this is not the same as the usual convention for Emacs motion commands. | 638 | this is not the same as the usual convention for Emacs motion commands. |
| 635 | 639 | ||
| 636 | If we don't find COUNT instances before reaching END, set *SHORTAGE | 640 | If we don't find COUNT instances before reaching END, set *SHORTAGE |
| 637 | to the number of TARGETs left unfound, and return END. | 641 | to the number of newlines left unfound, and return END. |
| 642 | |||
| 643 | If BYTEPOS is not NULL, set *BYTEPOS to the byte position corresponding | ||
| 644 | to the returned character position. | ||
| 638 | 645 | ||
| 639 | If ALLOW_QUIT, set immediate_quit. That's good to do | 646 | If ALLOW_QUIT, set immediate_quit. That's good to do |
| 640 | except when inside redisplay. */ | 647 | except when inside redisplay. */ |
| 641 | 648 | ||
| 642 | ptrdiff_t | 649 | ptrdiff_t |
| 643 | scan_buffer (int target, ptrdiff_t start, ptrdiff_t end, | 650 | find_newline (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end, |
| 644 | ptrdiff_t count, ptrdiff_t *shortage, bool allow_quit) | 651 | ptrdiff_t end_byte, ptrdiff_t count, ptrdiff_t *shortage, |
| 652 | ptrdiff_t *bytepos, bool allow_quit) | ||
| 645 | { | 653 | { |
| 646 | struct region_cache *newline_cache; | 654 | struct region_cache *newline_cache; |
| 647 | int direction; | 655 | int direction; |
| @@ -649,13 +657,17 @@ scan_buffer (int target, ptrdiff_t start, ptrdiff_t end, | |||
| 649 | if (count > 0) | 657 | if (count > 0) |
| 650 | { | 658 | { |
| 651 | direction = 1; | 659 | direction = 1; |
| 652 | if (! end) end = ZV; | 660 | if (!end) |
| 661 | end = ZV, end_byte = ZV_BYTE; | ||
| 653 | } | 662 | } |
| 654 | else | 663 | else |
| 655 | { | 664 | { |
| 656 | direction = -1; | 665 | direction = -1; |
| 657 | if (! end) end = BEGV; | 666 | if (!end) |
| 667 | end = BEGV, end_byte = BEGV_BYTE; | ||
| 658 | } | 668 | } |
| 669 | if (end_byte == -1) | ||
| 670 | end_byte = CHAR_TO_BYTE (end); | ||
| 659 | 671 | ||
| 660 | newline_cache_on_off (current_buffer); | 672 | newline_cache_on_off (current_buffer); |
| 661 | newline_cache = current_buffer->newline_cache; | 673 | newline_cache = current_buffer->newline_cache; |
| @@ -673,13 +685,11 @@ scan_buffer (int target, ptrdiff_t start, ptrdiff_t end, | |||
| 673 | the position of the last character before the next such | 685 | the position of the last character before the next such |
| 674 | obstacle --- the last character the dumb search loop should | 686 | obstacle --- the last character the dumb search loop should |
| 675 | examine. */ | 687 | examine. */ |
| 676 | ptrdiff_t ceiling_byte = CHAR_TO_BYTE (end) - 1; | 688 | ptrdiff_t tem, ceiling_byte = end_byte - 1; |
| 677 | ptrdiff_t start_byte; | ||
| 678 | ptrdiff_t tem; | ||
| 679 | 689 | ||
| 680 | /* If we're looking for a newline, consult the newline cache | 690 | /* If we're looking for a newline, consult the newline cache |
| 681 | to see where we can avoid some scanning. */ | 691 | to see where we can avoid some scanning. */ |
| 682 | if (target == '\n' && newline_cache) | 692 | if (newline_cache) |
| 683 | { | 693 | { |
| 684 | ptrdiff_t next_change; | 694 | ptrdiff_t next_change; |
| 685 | immediate_quit = 0; | 695 | immediate_quit = 0; |
| @@ -698,7 +708,7 @@ scan_buffer (int target, ptrdiff_t start, ptrdiff_t end, | |||
| 698 | next_change is the position of the next known region. */ | 708 | next_change is the position of the next known region. */ |
| 699 | ceiling_byte = min (CHAR_TO_BYTE (next_change) - 1, ceiling_byte); | 709 | ceiling_byte = min (CHAR_TO_BYTE (next_change) - 1, ceiling_byte); |
| 700 | } | 710 | } |
| 701 | else | 711 | else if (start_byte == -1) |
| 702 | start_byte = CHAR_TO_BYTE (start); | 712 | start_byte = CHAR_TO_BYTE (start); |
| 703 | 713 | ||
| 704 | /* The dumb loop can only scan text stored in contiguous | 714 | /* The dumb loop can only scan text stored in contiguous |
| @@ -718,44 +728,45 @@ scan_buffer (int target, ptrdiff_t start, ptrdiff_t end, | |||
| 718 | 728 | ||
| 719 | while (cursor < ceiling_addr) | 729 | while (cursor < ceiling_addr) |
| 720 | { | 730 | { |
| 721 | unsigned char *scan_start = cursor; | ||
| 722 | |||
| 723 | /* The dumb loop. */ | 731 | /* The dumb loop. */ |
| 724 | while (*cursor != target && ++cursor < ceiling_addr) | 732 | unsigned char *nl = memchr (cursor, '\n', ceiling_addr - cursor); |
| 725 | ; | ||
| 726 | 733 | ||
| 727 | /* If we're looking for newlines, cache the fact that | 734 | /* If we're looking for newlines, cache the fact that |
| 728 | the region from start to cursor is free of them. */ | 735 | the region from start to cursor is free of them. */ |
| 729 | if (target == '\n' && newline_cache) | 736 | if (newline_cache) |
| 730 | know_region_cache (current_buffer, newline_cache, | 737 | { |
| 731 | BYTE_TO_CHAR (start_byte + scan_start - base), | 738 | unsigned char *low = cursor; |
| 732 | BYTE_TO_CHAR (start_byte + cursor - base)); | 739 | unsigned char *lim = nl ? nl : ceiling_addr; |
| 733 | 740 | know_region_cache (current_buffer, newline_cache, | |
| 734 | /* Did we find the target character? */ | 741 | BYTE_TO_CHAR (low - base + start_byte), |
| 735 | if (cursor < ceiling_addr) | 742 | BYTE_TO_CHAR (lim - base + start_byte)); |
| 736 | { | 743 | } |
| 737 | if (--count == 0) | 744 | |
| 738 | { | 745 | if (! nl) |
| 739 | immediate_quit = 0; | 746 | break; |
| 740 | return BYTE_TO_CHAR (start_byte + cursor - base + 1); | 747 | |
| 741 | } | 748 | if (--count == 0) |
| 742 | cursor++; | 749 | { |
| 743 | } | 750 | immediate_quit = 0; |
| 751 | if (bytepos) | ||
| 752 | *bytepos = nl + 1 - base + start_byte; | ||
| 753 | return BYTE_TO_CHAR (nl + 1 - base + start_byte); | ||
| 754 | } | ||
| 755 | cursor = nl + 1; | ||
| 744 | } | 756 | } |
| 745 | 757 | ||
| 746 | start = BYTE_TO_CHAR (start_byte + cursor - base); | 758 | start_byte += ceiling_addr - base; |
| 759 | start = BYTE_TO_CHAR (start_byte); | ||
| 747 | } | 760 | } |
| 748 | } | 761 | } |
| 749 | else | 762 | else |
| 750 | while (start > end) | 763 | while (start > end) |
| 751 | { | 764 | { |
| 752 | /* The last character to check before the next obstacle. */ | 765 | /* The last character to check before the next obstacle. */ |
| 753 | ptrdiff_t ceiling_byte = CHAR_TO_BYTE (end); | 766 | ptrdiff_t tem, ceiling_byte = end_byte; |
| 754 | ptrdiff_t start_byte; | ||
| 755 | ptrdiff_t tem; | ||
| 756 | 767 | ||
| 757 | /* Consult the newline cache, if appropriate. */ | 768 | /* Consult the newline cache, if appropriate. */ |
| 758 | if (target == '\n' && newline_cache) | 769 | if (newline_cache) |
| 759 | { | 770 | { |
| 760 | ptrdiff_t next_change; | 771 | ptrdiff_t next_change; |
| 761 | immediate_quit = 0; | 772 | immediate_quit = 0; |
| @@ -774,7 +785,7 @@ scan_buffer (int target, ptrdiff_t start, ptrdiff_t end, | |||
| 774 | next_change is the position of the next known region. */ | 785 | next_change is the position of the next known region. */ |
| 775 | ceiling_byte = max (CHAR_TO_BYTE (next_change), ceiling_byte); | 786 | ceiling_byte = max (CHAR_TO_BYTE (next_change), ceiling_byte); |
| 776 | } | 787 | } |
| 777 | else | 788 | else if (start_byte == -1) |
| 778 | start_byte = CHAR_TO_BYTE (start); | 789 | start_byte = CHAR_TO_BYTE (start); |
| 779 | 790 | ||
| 780 | /* Stop scanning before the gap. */ | 791 | /* Stop scanning before the gap. */ |
| @@ -789,42 +800,50 @@ scan_buffer (int target, ptrdiff_t start, ptrdiff_t end, | |||
| 789 | 800 | ||
| 790 | while (cursor >= ceiling_addr) | 801 | while (cursor >= ceiling_addr) |
| 791 | { | 802 | { |
| 792 | unsigned char *scan_start = cursor; | 803 | unsigned char *nl = memrchr (ceiling_addr, '\n', |
| 793 | 804 | cursor + 1 - ceiling_addr); | |
| 794 | while (*cursor != target && --cursor >= ceiling_addr) | ||
| 795 | ; | ||
| 796 | 805 | ||
| 797 | /* If we're looking for newlines, cache the fact that | 806 | /* If we're looking for newlines, cache the fact that |
| 798 | the region from after the cursor to start is free of them. */ | 807 | the region from after the cursor to start is free of them. */ |
| 799 | if (target == '\n' && newline_cache) | 808 | if (newline_cache) |
| 800 | know_region_cache (current_buffer, newline_cache, | 809 | { |
| 801 | BYTE_TO_CHAR (start_byte + cursor - base), | 810 | unsigned char *low = nl ? nl : ceiling_addr - 1; |
| 802 | BYTE_TO_CHAR (start_byte + scan_start - base)); | 811 | unsigned char *lim = cursor; |
| 803 | 812 | know_region_cache (current_buffer, newline_cache, | |
| 804 | /* Did we find the target character? */ | 813 | BYTE_TO_CHAR (low - base + start_byte), |
| 805 | if (cursor >= ceiling_addr) | 814 | BYTE_TO_CHAR (lim - base + start_byte)); |
| 806 | { | 815 | } |
| 807 | if (++count >= 0) | 816 | |
| 808 | { | 817 | if (! nl) |
| 809 | immediate_quit = 0; | 818 | break; |
| 810 | return BYTE_TO_CHAR (start_byte + cursor - base); | 819 | |
| 811 | } | 820 | if (++count >= 0) |
| 812 | cursor--; | 821 | { |
| 813 | } | 822 | immediate_quit = 0; |
| 823 | if (bytepos) | ||
| 824 | *bytepos = nl - base + start_byte; | ||
| 825 | return BYTE_TO_CHAR (nl - base + start_byte); | ||
| 826 | } | ||
| 827 | cursor = nl - 1; | ||
| 814 | } | 828 | } |
| 815 | 829 | ||
| 816 | start = BYTE_TO_CHAR (start_byte + cursor - base); | 830 | start_byte += ceiling_addr - 1 - base; |
| 831 | start = BYTE_TO_CHAR (start_byte); | ||
| 817 | } | 832 | } |
| 818 | } | 833 | } |
| 819 | 834 | ||
| 820 | immediate_quit = 0; | 835 | immediate_quit = 0; |
| 821 | if (shortage != 0) | 836 | if (shortage) |
| 822 | *shortage = count * direction; | 837 | *shortage = count * direction; |
| 838 | if (bytepos) | ||
| 839 | { | ||
| 840 | *bytepos = start_byte == -1 ? CHAR_TO_BYTE (start) : start_byte; | ||
| 841 | eassert (*bytepos == CHAR_TO_BYTE (start)); | ||
| 842 | } | ||
| 823 | return start; | 843 | return start; |
| 824 | } | 844 | } |
| 825 | 845 | ||
| 826 | /* Search for COUNT instances of a line boundary, which means either a | 846 | /* Search for COUNT instances of a line boundary. |
| 827 | newline or (if selective display enabled) a carriage return. | ||
| 828 | Start at START. If COUNT is negative, search backwards. | 847 | Start at START. If COUNT is negative, search backwards. |
| 829 | 848 | ||
| 830 | We report the resulting position by calling TEMP_SET_PT_BOTH. | 849 | We report the resulting position by calling TEMP_SET_PT_BOTH. |
| @@ -855,14 +874,9 @@ scan_newline (ptrdiff_t start, ptrdiff_t start_byte, | |||
| 855 | 874 | ||
| 856 | bool old_immediate_quit = immediate_quit; | 875 | bool old_immediate_quit = immediate_quit; |
| 857 | 876 | ||
| 858 | /* The code that follows is like scan_buffer | ||
| 859 | but checks for either newline or carriage return. */ | ||
| 860 | |||
| 861 | if (allow_quit) | 877 | if (allow_quit) |
| 862 | immediate_quit++; | 878 | immediate_quit++; |
| 863 | 879 | ||
| 864 | start_byte = CHAR_TO_BYTE (start); | ||
| 865 | |||
| 866 | if (count > 0) | 880 | if (count > 0) |
| 867 | { | 881 | { |
| 868 | while (start_byte < limit_byte) | 882 | while (start_byte < limit_byte) |
| @@ -871,29 +885,25 @@ scan_newline (ptrdiff_t start, ptrdiff_t start_byte, | |||
| 871 | ceiling = min (limit_byte - 1, ceiling); | 885 | ceiling = min (limit_byte - 1, ceiling); |
| 872 | ceiling_addr = BYTE_POS_ADDR (ceiling) + 1; | 886 | ceiling_addr = BYTE_POS_ADDR (ceiling) + 1; |
| 873 | base = (cursor = BYTE_POS_ADDR (start_byte)); | 887 | base = (cursor = BYTE_POS_ADDR (start_byte)); |
| 874 | while (1) | ||
| 875 | { | ||
| 876 | while (*cursor != '\n' && ++cursor != ceiling_addr) | ||
| 877 | ; | ||
| 878 | 888 | ||
| 879 | if (cursor != ceiling_addr) | 889 | do |
| 890 | { | ||
| 891 | unsigned char *nl = memchr (cursor, '\n', ceiling_addr - cursor); | ||
| 892 | if (! nl) | ||
| 893 | break; | ||
| 894 | if (--count == 0) | ||
| 880 | { | 895 | { |
| 881 | if (--count == 0) | 896 | immediate_quit = old_immediate_quit; |
| 882 | { | 897 | start_byte += nl - base + 1; |
| 883 | immediate_quit = old_immediate_quit; | 898 | start = BYTE_TO_CHAR (start_byte); |
| 884 | start_byte = start_byte + cursor - base + 1; | 899 | TEMP_SET_PT_BOTH (start, start_byte); |
| 885 | start = BYTE_TO_CHAR (start_byte); | 900 | return 0; |
| 886 | TEMP_SET_PT_BOTH (start, start_byte); | ||
| 887 | return 0; | ||
| 888 | } | ||
| 889 | else | ||
| 890 | if (++cursor == ceiling_addr) | ||
| 891 | break; | ||
| 892 | } | 901 | } |
| 893 | else | 902 | cursor = nl + 1; |
| 894 | break; | ||
| 895 | } | 903 | } |
| 896 | start_byte += cursor - base; | 904 | while (cursor < ceiling_addr); |
| 905 | |||
| 906 | start_byte += ceiling_addr - base; | ||
| 897 | } | 907 | } |
| 898 | } | 908 | } |
| 899 | else | 909 | else |
| @@ -902,31 +912,28 @@ scan_newline (ptrdiff_t start, ptrdiff_t start_byte, | |||
| 902 | { | 912 | { |
| 903 | ceiling = BUFFER_FLOOR_OF (start_byte - 1); | 913 | ceiling = BUFFER_FLOOR_OF (start_byte - 1); |
| 904 | ceiling = max (limit_byte, ceiling); | 914 | ceiling = max (limit_byte, ceiling); |
| 905 | ceiling_addr = BYTE_POS_ADDR (ceiling) - 1; | 915 | ceiling_addr = BYTE_POS_ADDR (ceiling); |
| 906 | base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1); | 916 | base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1); |
| 907 | while (1) | 917 | while (1) |
| 908 | { | 918 | { |
| 909 | while (--cursor != ceiling_addr && *cursor != '\n') | 919 | unsigned char *nl = memrchr (ceiling_addr, '\n', |
| 910 | ; | 920 | cursor - ceiling_addr); |
| 921 | if (! nl) | ||
| 922 | break; | ||
| 911 | 923 | ||
| 912 | if (cursor != ceiling_addr) | 924 | if (++count == 0) |
| 913 | { | 925 | { |
| 914 | if (++count == 0) | 926 | immediate_quit = old_immediate_quit; |
| 915 | { | 927 | /* Return the position AFTER the match we found. */ |
| 916 | immediate_quit = old_immediate_quit; | 928 | start_byte += nl - base + 1; |
| 917 | /* Return the position AFTER the match we found. */ | 929 | start = BYTE_TO_CHAR (start_byte); |
| 918 | start_byte = start_byte + cursor - base + 1; | 930 | TEMP_SET_PT_BOTH (start, start_byte); |
| 919 | start = BYTE_TO_CHAR (start_byte); | 931 | return 0; |
| 920 | TEMP_SET_PT_BOTH (start, start_byte); | ||
| 921 | return 0; | ||
| 922 | } | ||
| 923 | } | 932 | } |
| 924 | else | 933 | |
| 925 | break; | 934 | cursor = nl; |
| 926 | } | 935 | } |
| 927 | /* Here we add 1 to compensate for the last decrement | 936 | start_byte += ceiling_addr - base; |
| 928 | of CURSOR, which took it past the valid range. */ | ||
| 929 | start_byte += cursor - base + 1; | ||
| 930 | } | 937 | } |
| 931 | } | 938 | } |
| 932 | 939 | ||
| @@ -936,25 +943,33 @@ scan_newline (ptrdiff_t start, ptrdiff_t start_byte, | |||
| 936 | return count * direction; | 943 | return count * direction; |
| 937 | } | 944 | } |
| 938 | 945 | ||
| 946 | /* Like find_newline, but doesn't allow QUITting and doesn't return | ||
| 947 | SHORTAGE. */ | ||
| 939 | ptrdiff_t | 948 | ptrdiff_t |
| 940 | find_next_newline_no_quit (ptrdiff_t from, ptrdiff_t cnt) | 949 | find_newline_no_quit (ptrdiff_t from, ptrdiff_t frombyte, |
| 950 | ptrdiff_t cnt, ptrdiff_t *bytepos) | ||
| 941 | { | 951 | { |
| 942 | return scan_buffer ('\n', from, 0, cnt, (ptrdiff_t *) 0, 0); | 952 | return find_newline (from, frombyte, 0, -1, cnt, NULL, bytepos, 0); |
| 943 | } | 953 | } |
| 944 | 954 | ||
| 945 | /* Like find_next_newline, but returns position before the newline, | 955 | /* Like find_newline, but returns position before the newline, not |
| 946 | not after, and only search up to TO. This isn't just | 956 | after, and only search up to TO. |
| 947 | find_next_newline (...)-1, because you might hit TO. */ | 957 | This isn't just find_newline_no_quit (...)-1, because you might hit TO. */ |
| 948 | 958 | ||
| 949 | ptrdiff_t | 959 | ptrdiff_t |
| 950 | find_before_next_newline (ptrdiff_t from, ptrdiff_t to, ptrdiff_t cnt) | 960 | find_before_next_newline (ptrdiff_t from, ptrdiff_t to, |
| 961 | ptrdiff_t cnt, ptrdiff_t *bytepos) | ||
| 951 | { | 962 | { |
| 952 | ptrdiff_t shortage; | 963 | ptrdiff_t shortage; |
| 953 | ptrdiff_t pos = scan_buffer ('\n', from, to, cnt, &shortage, 1); | 964 | ptrdiff_t pos = find_newline (from, -1, to, -1, cnt, &shortage, bytepos, 1); |
| 954 | 965 | ||
| 955 | if (shortage == 0) | 966 | if (shortage == 0) |
| 956 | pos--; | 967 | { |
| 957 | 968 | if (bytepos) | |
| 969 | DEC_BOTH (pos, *bytepos); | ||
| 970 | else | ||
| 971 | pos--; | ||
| 972 | } | ||
| 958 | return pos; | 973 | return pos; |
| 959 | } | 974 | } |
| 960 | 975 | ||
| @@ -962,9 +977,9 @@ find_before_next_newline (ptrdiff_t from, ptrdiff_t to, ptrdiff_t cnt) | |||
| 962 | 977 | ||
| 963 | static Lisp_Object | 978 | static Lisp_Object |
| 964 | search_command (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, | 979 | search_command (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, |
| 965 | Lisp_Object count, int direction, int RE, int posix) | 980 | Lisp_Object count, int direction, int RE, bool posix) |
| 966 | { | 981 | { |
| 967 | register EMACS_INT np; | 982 | EMACS_INT np; |
| 968 | EMACS_INT lim; | 983 | EMACS_INT lim; |
| 969 | ptrdiff_t lim_byte; | 984 | ptrdiff_t lim_byte; |
| 970 | EMACS_INT n = direction; | 985 | EMACS_INT n = direction; |
| @@ -1016,8 +1031,7 @@ search_command (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, | |||
| 1016 | 1031 | ||
| 1017 | if (!EQ (noerror, Qt)) | 1032 | if (!EQ (noerror, Qt)) |
| 1018 | { | 1033 | { |
| 1019 | if (lim < BEGV || lim > ZV) | 1034 | eassert (BEGV <= lim && lim <= ZV); |
| 1020 | emacs_abort (); | ||
| 1021 | SET_PT_BOTH (lim, lim_byte); | 1035 | SET_PT_BOTH (lim, lim_byte); |
| 1022 | return Qnil; | 1036 | return Qnil; |
| 1023 | #if 0 /* This would be clean, but maybe programs depend on | 1037 | #if 0 /* This would be clean, but maybe programs depend on |
| @@ -1029,17 +1043,15 @@ search_command (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, | |||
| 1029 | return Qnil; | 1043 | return Qnil; |
| 1030 | } | 1044 | } |
| 1031 | 1045 | ||
| 1032 | if (np < BEGV || np > ZV) | 1046 | eassert (BEGV <= np && np <= ZV); |
| 1033 | emacs_abort (); | ||
| 1034 | |||
| 1035 | SET_PT (np); | 1047 | SET_PT (np); |
| 1036 | 1048 | ||
| 1037 | return make_number (np); | 1049 | return make_number (np); |
| 1038 | } | 1050 | } |
| 1039 | 1051 | ||
| 1040 | /* Return 1 if REGEXP it matches just one constant string. */ | 1052 | /* Return true if REGEXP it matches just one constant string. */ |
| 1041 | 1053 | ||
| 1042 | static int | 1054 | static bool |
| 1043 | trivial_regexp_p (Lisp_Object regexp) | 1055 | trivial_regexp_p (Lisp_Object regexp) |
| 1044 | { | 1056 | { |
| 1045 | ptrdiff_t len = SBYTES (regexp); | 1057 | ptrdiff_t len = SBYTES (regexp); |
| @@ -1108,7 +1120,7 @@ static struct re_registers search_regs_1; | |||
| 1108 | static EMACS_INT | 1120 | static EMACS_INT |
| 1109 | search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte, | 1121 | search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte, |
| 1110 | ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n, | 1122 | ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n, |
| 1111 | int RE, Lisp_Object trt, Lisp_Object inverse_trt, int posix) | 1123 | int RE, Lisp_Object trt, Lisp_Object inverse_trt, bool posix) |
| 1112 | { | 1124 | { |
| 1113 | ptrdiff_t len = SCHARS (string); | 1125 | ptrdiff_t len = SCHARS (string); |
| 1114 | ptrdiff_t len_byte = SBYTES (string); | 1126 | ptrdiff_t len_byte = SBYTES (string); |
| @@ -1258,12 +1270,12 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte, | |||
| 1258 | ptrdiff_t raw_pattern_size; | 1270 | ptrdiff_t raw_pattern_size; |
| 1259 | ptrdiff_t raw_pattern_size_byte; | 1271 | ptrdiff_t raw_pattern_size_byte; |
| 1260 | unsigned char *patbuf; | 1272 | unsigned char *patbuf; |
| 1261 | int multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters)); | 1273 | bool multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters)); |
| 1262 | unsigned char *base_pat; | 1274 | unsigned char *base_pat; |
| 1263 | /* Set to positive if we find a non-ASCII char that need | 1275 | /* Set to positive if we find a non-ASCII char that need |
| 1264 | translation. Otherwise set to zero later. */ | 1276 | translation. Otherwise set to zero later. */ |
| 1265 | int char_base = -1; | 1277 | int char_base = -1; |
| 1266 | int boyer_moore_ok = 1; | 1278 | bool boyer_moore_ok = 1; |
| 1267 | 1279 | ||
| 1268 | /* MULTIBYTE says whether the text to be searched is multibyte. | 1280 | /* MULTIBYTE says whether the text to be searched is multibyte. |
| 1269 | We must convert PATTERN to match that, or we will not really | 1281 | We must convert PATTERN to match that, or we will not really |
| @@ -1313,8 +1325,11 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte, | |||
| 1313 | non-nil, we can use boyer-moore search only if TRT can be | 1325 | non-nil, we can use boyer-moore search only if TRT can be |
| 1314 | represented by the byte array of 256 elements. For that, | 1326 | represented by the byte array of 256 elements. For that, |
| 1315 | all non-ASCII case-equivalents of all case-sensitive | 1327 | all non-ASCII case-equivalents of all case-sensitive |
| 1316 | characters in STRING must belong to the same charset and | 1328 | characters in STRING must belong to the same character |
| 1317 | row. */ | 1329 | group (two characters belong to the same group iff their |
| 1330 | multibyte forms are the same except for the last byte; | ||
| 1331 | i.e. every 64 characters form a group; U+0000..U+003F, | ||
| 1332 | U+0040..U+007F, U+0080..U+00BF, ...). */ | ||
| 1318 | 1333 | ||
| 1319 | while (--len >= 0) | 1334 | while (--len >= 0) |
| 1320 | { | 1335 | { |
| @@ -1406,7 +1421,7 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte, | |||
| 1406 | char_base = 0; | 1421 | char_base = 0; |
| 1407 | while (--len >= 0) | 1422 | while (--len >= 0) |
| 1408 | { | 1423 | { |
| 1409 | int c, translated; | 1424 | int c, translated, inverse; |
| 1410 | 1425 | ||
| 1411 | /* If we got here and the RE flag is set, it's because we're | 1426 | /* If we got here and the RE flag is set, it's because we're |
| 1412 | dealing with a regexp known to be trivial, so the backslash | 1427 | dealing with a regexp known to be trivial, so the backslash |
| @@ -1420,6 +1435,20 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte, | |||
| 1420 | c = *base_pat++; | 1435 | c = *base_pat++; |
| 1421 | TRANSLATE (translated, trt, c); | 1436 | TRANSLATE (translated, trt, c); |
| 1422 | *pat++ = translated; | 1437 | *pat++ = translated; |
| 1438 | /* Check that none of C's equivalents violates the | ||
| 1439 | assumptions of boyer_moore. */ | ||
| 1440 | TRANSLATE (inverse, inverse_trt, c); | ||
| 1441 | while (1) | ||
| 1442 | { | ||
| 1443 | if (inverse >= 0200) | ||
| 1444 | { | ||
| 1445 | boyer_moore_ok = 0; | ||
| 1446 | break; | ||
| 1447 | } | ||
| 1448 | if (c == inverse) | ||
| 1449 | break; | ||
| 1450 | TRANSLATE (inverse, inverse_trt, inverse); | ||
| 1451 | } | ||
| 1423 | } | 1452 | } |
| 1424 | } | 1453 | } |
| 1425 | 1454 | ||
| @@ -1454,8 +1483,8 @@ simple_search (EMACS_INT n, unsigned char *pat, | |||
| 1454 | ptrdiff_t pos, ptrdiff_t pos_byte, | 1483 | ptrdiff_t pos, ptrdiff_t pos_byte, |
| 1455 | ptrdiff_t lim, ptrdiff_t lim_byte) | 1484 | ptrdiff_t lim, ptrdiff_t lim_byte) |
| 1456 | { | 1485 | { |
| 1457 | int multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters)); | 1486 | bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters)); |
| 1458 | int forward = n > 0; | 1487 | bool forward = n > 0; |
| 1459 | /* Number of buffer bytes matched. Note that this may be different | 1488 | /* Number of buffer bytes matched. Note that this may be different |
| 1460 | from len_byte in a multibyte buffer. */ | 1489 | from len_byte in a multibyte buffer. */ |
| 1461 | ptrdiff_t match_byte = PTRDIFF_MIN; | 1490 | ptrdiff_t match_byte = PTRDIFF_MIN; |
| @@ -1674,7 +1703,7 @@ boyer_moore (EMACS_INT n, unsigned char *base_pat, | |||
| 1674 | register ptrdiff_t i; | 1703 | register ptrdiff_t i; |
| 1675 | register int j; | 1704 | register int j; |
| 1676 | unsigned char *pat, *pat_end; | 1705 | unsigned char *pat, *pat_end; |
| 1677 | int multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters)); | 1706 | bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters)); |
| 1678 | 1707 | ||
| 1679 | unsigned char simple_translate[0400]; | 1708 | unsigned char simple_translate[0400]; |
| 1680 | /* These are set to the preceding bytes of a byte to be translated | 1709 | /* These are set to the preceding bytes of a byte to be translated |
| @@ -2255,12 +2284,12 @@ since only regular expressions have distinguished subexpressions. */) | |||
| 2255 | (Lisp_Object newtext, Lisp_Object fixedcase, Lisp_Object literal, Lisp_Object string, Lisp_Object subexp) | 2284 | (Lisp_Object newtext, Lisp_Object fixedcase, Lisp_Object literal, Lisp_Object string, Lisp_Object subexp) |
| 2256 | { | 2285 | { |
| 2257 | enum { nochange, all_caps, cap_initial } case_action; | 2286 | enum { nochange, all_caps, cap_initial } case_action; |
| 2258 | register ptrdiff_t pos, pos_byte; | 2287 | ptrdiff_t pos, pos_byte; |
| 2259 | int some_multiletter_word; | 2288 | bool some_multiletter_word; |
| 2260 | int some_lowercase; | 2289 | bool some_lowercase; |
| 2261 | int some_uppercase; | 2290 | bool some_uppercase; |
| 2262 | int some_nonuppercase_initial; | 2291 | bool some_nonuppercase_initial; |
| 2263 | register int c, prevc; | 2292 | int c, prevc; |
| 2264 | ptrdiff_t sub; | 2293 | ptrdiff_t sub; |
| 2265 | ptrdiff_t opoint, newpoint; | 2294 | ptrdiff_t opoint, newpoint; |
| 2266 | 2295 | ||
| @@ -2405,7 +2434,7 @@ since only regular expressions have distinguished subexpressions. */) | |||
| 2405 | { | 2434 | { |
| 2406 | ptrdiff_t substart = -1; | 2435 | ptrdiff_t substart = -1; |
| 2407 | ptrdiff_t subend = 0; | 2436 | ptrdiff_t subend = 0; |
| 2408 | int delbackslash = 0; | 2437 | bool delbackslash = 0; |
| 2409 | 2438 | ||
| 2410 | FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte); | 2439 | FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte); |
| 2411 | 2440 | ||
| @@ -2421,7 +2450,7 @@ since only regular expressions have distinguished subexpressions. */) | |||
| 2421 | else if (c >= '1' && c <= '9') | 2450 | else if (c >= '1' && c <= '9') |
| 2422 | { | 2451 | { |
| 2423 | if (c - '0' < search_regs.num_regs | 2452 | if (c - '0' < search_regs.num_regs |
| 2424 | && 0 <= search_regs.start[c - '0']) | 2453 | && search_regs.start[c - '0'] >= 0) |
| 2425 | { | 2454 | { |
| 2426 | substart = search_regs.start[c - '0']; | 2455 | substart = search_regs.start[c - '0']; |
| 2427 | subend = search_regs.end[c - '0']; | 2456 | subend = search_regs.end[c - '0']; |
| @@ -2500,11 +2529,11 @@ since only regular expressions have distinguished subexpressions. */) | |||
| 2500 | ptrdiff_t length = SBYTES (newtext); | 2529 | ptrdiff_t length = SBYTES (newtext); |
| 2501 | unsigned char *substed; | 2530 | unsigned char *substed; |
| 2502 | ptrdiff_t substed_alloc_size, substed_len; | 2531 | ptrdiff_t substed_alloc_size, substed_len; |
| 2503 | int buf_multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters)); | 2532 | bool buf_multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters)); |
| 2504 | int str_multibyte = STRING_MULTIBYTE (newtext); | 2533 | bool str_multibyte = STRING_MULTIBYTE (newtext); |
| 2505 | int really_changed = 0; | 2534 | bool really_changed = 0; |
| 2506 | 2535 | ||
| 2507 | substed_alloc_size = ((STRING_BYTES_BOUND - 100) / 2 < length | 2536 | substed_alloc_size = (length > (STRING_BYTES_BOUND - 100) / 2 |
| 2508 | ? STRING_BYTES_BOUND | 2537 | ? STRING_BYTES_BOUND |
| 2509 | : length * 2 + 100); | 2538 | : length * 2 + 100); |
| 2510 | substed = xmalloc (substed_alloc_size); | 2539 | substed = xmalloc (substed_alloc_size); |
| @@ -2585,7 +2614,7 @@ since only regular expressions have distinguished subexpressions. */) | |||
| 2585 | ptrdiff_t begbyte = CHAR_TO_BYTE (search_regs.start[idx]); | 2614 | ptrdiff_t begbyte = CHAR_TO_BYTE (search_regs.start[idx]); |
| 2586 | add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte; | 2615 | add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte; |
| 2587 | if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx]) | 2616 | if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx]) |
| 2588 | move_gap (search_regs.start[idx]); | 2617 | move_gap_both (search_regs.start[idx], begbyte); |
| 2589 | add_stuff = BYTE_POS_ADDR (begbyte); | 2618 | add_stuff = BYTE_POS_ADDR (begbyte); |
| 2590 | } | 2619 | } |
| 2591 | 2620 | ||
| @@ -2668,7 +2697,7 @@ since only regular expressions have distinguished subexpressions. */) | |||
| 2668 | } | 2697 | } |
| 2669 | 2698 | ||
| 2670 | static Lisp_Object | 2699 | static Lisp_Object |
| 2671 | match_limit (Lisp_Object num, int beginningp) | 2700 | match_limit (Lisp_Object num, bool beginningp) |
| 2672 | { | 2701 | { |
| 2673 | EMACS_INT n; | 2702 | EMACS_INT n; |
| 2674 | 2703 | ||
| @@ -2941,9 +2970,9 @@ If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */) | |||
| 2941 | return Qnil; | 2970 | return Qnil; |
| 2942 | } | 2971 | } |
| 2943 | 2972 | ||
| 2944 | /* If non-zero the match data have been saved in saved_search_regs | 2973 | /* If true the match data have been saved in saved_search_regs |
| 2945 | during the execution of a sentinel or filter. */ | 2974 | during the execution of a sentinel or filter. */ |
| 2946 | static int search_regs_saved; | 2975 | static bool search_regs_saved; |
| 2947 | static struct re_registers saved_search_regs; | 2976 | static struct re_registers saved_search_regs; |
| 2948 | static Lisp_Object saved_last_thing_searched; | 2977 | static Lisp_Object saved_last_thing_searched; |
| 2949 | 2978 | ||
| @@ -3008,9 +3037,9 @@ DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0, | |||
| 3008 | doc: /* Return a regexp string which matches exactly STRING and nothing else. */) | 3037 | doc: /* Return a regexp string which matches exactly STRING and nothing else. */) |
| 3009 | (Lisp_Object string) | 3038 | (Lisp_Object string) |
| 3010 | { | 3039 | { |
| 3011 | register char *in, *out, *end; | 3040 | char *in, *out, *end; |
| 3012 | register char *temp; | 3041 | char *temp; |
| 3013 | int backslashes_added = 0; | 3042 | ptrdiff_t backslashes_added = 0; |
| 3014 | 3043 | ||
| 3015 | CHECK_STRING (string); | 3044 | CHECK_STRING (string); |
| 3016 | 3045 | ||