aboutsummaryrefslogtreecommitdiffstats
path: root/src/search.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/search.c')
-rw-r--r--src/search.c355
1 files changed, 192 insertions, 163 deletions
diff --git a/src/search.c b/src/search.c
index aacdbe33eef..ece346ecd06 100644
--- a/src/search.c
+++ b/src/search.c
@@ -1,7 +1,7 @@
1/* String search routines for GNU Emacs. 1/* String search routines for GNU Emacs.
2 2
3Copyright (C) 1985-1987, 1993-1994, 1997-1999, 2001-2012 3Copyright (C) 1985-1987, 1993-1994, 1997-1999, 2001-2013 Free Software
4 Free Software Foundation, Inc. 4Foundation, Inc.
5 5
6This file is part of GNU Emacs. 6This file is part of GNU Emacs.
7 7
@@ -49,8 +49,8 @@ struct regexp_cache
49 Lisp_Object syntax_table; 49 Lisp_Object syntax_table;
50 struct re_pattern_buffer buf; 50 struct re_pattern_buffer buf;
51 char fastmap[0400]; 51 char fastmap[0400];
52 /* Nonzero means regexp was compiled to do full POSIX backtracking. */ 52 /* True means regexp was compiled to do full POSIX backtracking. */
53 char posix; 53 bool posix;
54}; 54};
55 55
56/* The instances of that struct. */ 56/* The instances of that struct. */
@@ -100,7 +100,7 @@ static EMACS_INT boyer_moore (EMACS_INT, unsigned char *, ptrdiff_t,
100 ptrdiff_t, int); 100 ptrdiff_t, int);
101static EMACS_INT search_buffer (Lisp_Object, ptrdiff_t, ptrdiff_t, 101static EMACS_INT search_buffer (Lisp_Object, ptrdiff_t, ptrdiff_t,
102 ptrdiff_t, ptrdiff_t, EMACS_INT, int, 102 ptrdiff_t, ptrdiff_t, EMACS_INT, int,
103 Lisp_Object, Lisp_Object, int); 103 Lisp_Object, Lisp_Object, bool);
104 104
105static _Noreturn void 105static _Noreturn void
106matcher_overflow (void) 106matcher_overflow (void)
@@ -112,13 +112,14 @@ matcher_overflow (void)
112 PATTERN is the pattern to compile. 112 PATTERN is the pattern to compile.
113 CP is the place to put the result. 113 CP is the place to put the result.
114 TRANSLATE is a translation table for ignoring case, or nil for none. 114 TRANSLATE is a translation table for ignoring case, or nil for none.
115 POSIX is nonzero if we want full backtracking (POSIX style) 115 POSIX is true if we want full backtracking (POSIX style) for this pattern.
116 for this pattern. 0 means backtrack only enough to get a valid match. 116 False means backtrack only enough to get a valid match.
117 117
118 The behavior also depends on Vsearch_spaces_regexp. */ 118 The behavior also depends on Vsearch_spaces_regexp. */
119 119
120static void 120static void
121compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern, Lisp_Object translate, int posix) 121compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern,
122 Lisp_Object translate, bool posix)
122{ 123{
123 char *val; 124 char *val;
124 reg_syntax_t old; 125 reg_syntax_t old;
@@ -205,11 +206,12 @@ clear_regexp_cache (void)
205 values that will result from matching this pattern. 206 values that will result from matching this pattern.
206 If it is 0, we should compile the pattern not to record any 207 If it is 0, we should compile the pattern not to record any
207 subexpression bounds. 208 subexpression bounds.
208 POSIX is nonzero if we want full backtracking (POSIX style) 209 POSIX is true if we want full backtracking (POSIX style) for this pattern.
209 for this pattern. 0 means backtrack only enough to get a valid match. */ 210 False means backtrack only enough to get a valid match. */
210 211
211struct re_pattern_buffer * 212struct re_pattern_buffer *
212compile_pattern (Lisp_Object pattern, struct re_registers *regp, Lisp_Object translate, int posix, int multibyte) 213compile_pattern (Lisp_Object pattern, struct re_registers *regp,
214 Lisp_Object translate, bool posix, bool multibyte)
213{ 215{
214 struct regexp_cache *cp, **cpp; 216 struct regexp_cache *cp, **cpp;
215 217
@@ -266,7 +268,7 @@ compile_pattern (Lisp_Object pattern, struct re_registers *regp, Lisp_Object tra
266 268
267 269
268static Lisp_Object 270static Lisp_Object
269looking_at_1 (Lisp_Object string, int posix) 271looking_at_1 (Lisp_Object string, bool posix)
270{ 272{
271 Lisp_Object val; 273 Lisp_Object val;
272 unsigned char *p1, *p2; 274 unsigned char *p1, *p2;
@@ -324,7 +326,7 @@ looking_at_1 (Lisp_Object string, int posix)
324 if (i == -2) 326 if (i == -2)
325 matcher_overflow (); 327 matcher_overflow ();
326 328
327 val = (0 <= i ? Qt : Qnil); 329 val = (i >= 0 ? Qt : Qnil);
328 if (NILP (Vinhibit_changing_match_data) && i >= 0) 330 if (NILP (Vinhibit_changing_match_data) && i >= 0)
329 for (i = 0; i < search_regs.num_regs; i++) 331 for (i = 0; i < search_regs.num_regs; i++)
330 if (search_regs.start[i] >= 0) 332 if (search_regs.start[i] >= 0)
@@ -364,7 +366,8 @@ data if you want to preserve them. */)
364} 366}
365 367
366static Lisp_Object 368static Lisp_Object
367string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start, int posix) 369string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
370 bool posix)
368{ 371{
369 ptrdiff_t val; 372 ptrdiff_t val;
370 struct re_pattern_buffer *bufp; 373 struct re_pattern_buffer *bufp;
@@ -534,9 +537,10 @@ fast_string_match_ignore_case (Lisp_Object regexp, Lisp_Object string)
534 data. */ 537 data. */
535 538
536ptrdiff_t 539ptrdiff_t
537fast_looking_at (Lisp_Object regexp, ptrdiff_t pos, ptrdiff_t pos_byte, ptrdiff_t limit, ptrdiff_t limit_byte, Lisp_Object string) 540fast_looking_at (Lisp_Object regexp, ptrdiff_t pos, ptrdiff_t pos_byte,
541 ptrdiff_t limit, ptrdiff_t limit_byte, Lisp_Object string)
538{ 542{
539 int multibyte; 543 bool multibyte;
540 struct re_pattern_buffer *buf; 544 struct re_pattern_buffer *buf;
541 unsigned char *p1, *p2; 545 unsigned char *p1, *p2;
542 ptrdiff_t s1, s2; 546 ptrdiff_t s1, s2;
@@ -619,7 +623,7 @@ newline_cache_on_off (struct buffer *buf)
619} 623}
620 624
621 625
622/* Search for COUNT instances of the character TARGET between START and END. 626/* Search for COUNT newlines between START/START_BYTE and END/END_BYTE.
623 627
624 If COUNT is positive, search forwards; END must be >= START. 628 If COUNT is positive, search forwards; END must be >= START.
625 If COUNT is negative, search backwards for the -COUNTth instance; 629 If COUNT is negative, search backwards for the -COUNTth instance;
@@ -634,14 +638,18 @@ newline_cache_on_off (struct buffer *buf)
634 this is not the same as the usual convention for Emacs motion commands. 638 this is not the same as the usual convention for Emacs motion commands.
635 639
636 If we don't find COUNT instances before reaching END, set *SHORTAGE 640 If we don't find COUNT instances before reaching END, set *SHORTAGE
637 to the number of TARGETs left unfound, and return END. 641 to the number of newlines left unfound, and return END.
642
643 If BYTEPOS is not NULL, set *BYTEPOS to the byte position corresponding
644 to the returned character position.
638 645
639 If ALLOW_QUIT, set immediate_quit. That's good to do 646 If ALLOW_QUIT, set immediate_quit. That's good to do
640 except when inside redisplay. */ 647 except when inside redisplay. */
641 648
642ptrdiff_t 649ptrdiff_t
643scan_buffer (int target, ptrdiff_t start, ptrdiff_t end, 650find_newline (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end,
644 ptrdiff_t count, ptrdiff_t *shortage, bool allow_quit) 651 ptrdiff_t end_byte, ptrdiff_t count, ptrdiff_t *shortage,
652 ptrdiff_t *bytepos, bool allow_quit)
645{ 653{
646 struct region_cache *newline_cache; 654 struct region_cache *newline_cache;
647 int direction; 655 int direction;
@@ -649,13 +657,17 @@ scan_buffer (int target, ptrdiff_t start, ptrdiff_t end,
649 if (count > 0) 657 if (count > 0)
650 { 658 {
651 direction = 1; 659 direction = 1;
652 if (! end) end = ZV; 660 if (!end)
661 end = ZV, end_byte = ZV_BYTE;
653 } 662 }
654 else 663 else
655 { 664 {
656 direction = -1; 665 direction = -1;
657 if (! end) end = BEGV; 666 if (!end)
667 end = BEGV, end_byte = BEGV_BYTE;
658 } 668 }
669 if (end_byte == -1)
670 end_byte = CHAR_TO_BYTE (end);
659 671
660 newline_cache_on_off (current_buffer); 672 newline_cache_on_off (current_buffer);
661 newline_cache = current_buffer->newline_cache; 673 newline_cache = current_buffer->newline_cache;
@@ -673,13 +685,11 @@ scan_buffer (int target, ptrdiff_t start, ptrdiff_t end,
673 the position of the last character before the next such 685 the position of the last character before the next such
674 obstacle --- the last character the dumb search loop should 686 obstacle --- the last character the dumb search loop should
675 examine. */ 687 examine. */
676 ptrdiff_t ceiling_byte = CHAR_TO_BYTE (end) - 1; 688 ptrdiff_t tem, ceiling_byte = end_byte - 1;
677 ptrdiff_t start_byte;
678 ptrdiff_t tem;
679 689
680 /* If we're looking for a newline, consult the newline cache 690 /* If we're looking for a newline, consult the newline cache
681 to see where we can avoid some scanning. */ 691 to see where we can avoid some scanning. */
682 if (target == '\n' && newline_cache) 692 if (newline_cache)
683 { 693 {
684 ptrdiff_t next_change; 694 ptrdiff_t next_change;
685 immediate_quit = 0; 695 immediate_quit = 0;
@@ -698,7 +708,7 @@ scan_buffer (int target, ptrdiff_t start, ptrdiff_t end,
698 next_change is the position of the next known region. */ 708 next_change is the position of the next known region. */
699 ceiling_byte = min (CHAR_TO_BYTE (next_change) - 1, ceiling_byte); 709 ceiling_byte = min (CHAR_TO_BYTE (next_change) - 1, ceiling_byte);
700 } 710 }
701 else 711 else if (start_byte == -1)
702 start_byte = CHAR_TO_BYTE (start); 712 start_byte = CHAR_TO_BYTE (start);
703 713
704 /* The dumb loop can only scan text stored in contiguous 714 /* The dumb loop can only scan text stored in contiguous
@@ -718,44 +728,45 @@ scan_buffer (int target, ptrdiff_t start, ptrdiff_t end,
718 728
719 while (cursor < ceiling_addr) 729 while (cursor < ceiling_addr)
720 { 730 {
721 unsigned char *scan_start = cursor;
722
723 /* The dumb loop. */ 731 /* The dumb loop. */
724 while (*cursor != target && ++cursor < ceiling_addr) 732 unsigned char *nl = memchr (cursor, '\n', ceiling_addr - cursor);
725 ;
726 733
727 /* If we're looking for newlines, cache the fact that 734 /* If we're looking for newlines, cache the fact that
728 the region from start to cursor is free of them. */ 735 the region from start to cursor is free of them. */
729 if (target == '\n' && newline_cache) 736 if (newline_cache)
730 know_region_cache (current_buffer, newline_cache, 737 {
731 BYTE_TO_CHAR (start_byte + scan_start - base), 738 unsigned char *low = cursor;
732 BYTE_TO_CHAR (start_byte + cursor - base)); 739 unsigned char *lim = nl ? nl : ceiling_addr;
733 740 know_region_cache (current_buffer, newline_cache,
734 /* Did we find the target character? */ 741 BYTE_TO_CHAR (low - base + start_byte),
735 if (cursor < ceiling_addr) 742 BYTE_TO_CHAR (lim - base + start_byte));
736 { 743 }
737 if (--count == 0) 744
738 { 745 if (! nl)
739 immediate_quit = 0; 746 break;
740 return BYTE_TO_CHAR (start_byte + cursor - base + 1); 747
741 } 748 if (--count == 0)
742 cursor++; 749 {
743 } 750 immediate_quit = 0;
751 if (bytepos)
752 *bytepos = nl + 1 - base + start_byte;
753 return BYTE_TO_CHAR (nl + 1 - base + start_byte);
754 }
755 cursor = nl + 1;
744 } 756 }
745 757
746 start = BYTE_TO_CHAR (start_byte + cursor - base); 758 start_byte += ceiling_addr - base;
759 start = BYTE_TO_CHAR (start_byte);
747 } 760 }
748 } 761 }
749 else 762 else
750 while (start > end) 763 while (start > end)
751 { 764 {
752 /* The last character to check before the next obstacle. */ 765 /* The last character to check before the next obstacle. */
753 ptrdiff_t ceiling_byte = CHAR_TO_BYTE (end); 766 ptrdiff_t tem, ceiling_byte = end_byte;
754 ptrdiff_t start_byte;
755 ptrdiff_t tem;
756 767
757 /* Consult the newline cache, if appropriate. */ 768 /* Consult the newline cache, if appropriate. */
758 if (target == '\n' && newline_cache) 769 if (newline_cache)
759 { 770 {
760 ptrdiff_t next_change; 771 ptrdiff_t next_change;
761 immediate_quit = 0; 772 immediate_quit = 0;
@@ -774,7 +785,7 @@ scan_buffer (int target, ptrdiff_t start, ptrdiff_t end,
774 next_change is the position of the next known region. */ 785 next_change is the position of the next known region. */
775 ceiling_byte = max (CHAR_TO_BYTE (next_change), ceiling_byte); 786 ceiling_byte = max (CHAR_TO_BYTE (next_change), ceiling_byte);
776 } 787 }
777 else 788 else if (start_byte == -1)
778 start_byte = CHAR_TO_BYTE (start); 789 start_byte = CHAR_TO_BYTE (start);
779 790
780 /* Stop scanning before the gap. */ 791 /* Stop scanning before the gap. */
@@ -789,42 +800,50 @@ scan_buffer (int target, ptrdiff_t start, ptrdiff_t end,
789 800
790 while (cursor >= ceiling_addr) 801 while (cursor >= ceiling_addr)
791 { 802 {
792 unsigned char *scan_start = cursor; 803 unsigned char *nl = memrchr (ceiling_addr, '\n',
793 804 cursor + 1 - ceiling_addr);
794 while (*cursor != target && --cursor >= ceiling_addr)
795 ;
796 805
797 /* If we're looking for newlines, cache the fact that 806 /* If we're looking for newlines, cache the fact that
798 the region from after the cursor to start is free of them. */ 807 the region from after the cursor to start is free of them. */
799 if (target == '\n' && newline_cache) 808 if (newline_cache)
800 know_region_cache (current_buffer, newline_cache, 809 {
801 BYTE_TO_CHAR (start_byte + cursor - base), 810 unsigned char *low = nl ? nl : ceiling_addr - 1;
802 BYTE_TO_CHAR (start_byte + scan_start - base)); 811 unsigned char *lim = cursor;
803 812 know_region_cache (current_buffer, newline_cache,
804 /* Did we find the target character? */ 813 BYTE_TO_CHAR (low - base + start_byte),
805 if (cursor >= ceiling_addr) 814 BYTE_TO_CHAR (lim - base + start_byte));
806 { 815 }
807 if (++count >= 0) 816
808 { 817 if (! nl)
809 immediate_quit = 0; 818 break;
810 return BYTE_TO_CHAR (start_byte + cursor - base); 819
811 } 820 if (++count >= 0)
812 cursor--; 821 {
813 } 822 immediate_quit = 0;
823 if (bytepos)
824 *bytepos = nl - base + start_byte;
825 return BYTE_TO_CHAR (nl - base + start_byte);
826 }
827 cursor = nl - 1;
814 } 828 }
815 829
816 start = BYTE_TO_CHAR (start_byte + cursor - base); 830 start_byte += ceiling_addr - 1 - base;
831 start = BYTE_TO_CHAR (start_byte);
817 } 832 }
818 } 833 }
819 834
820 immediate_quit = 0; 835 immediate_quit = 0;
821 if (shortage != 0) 836 if (shortage)
822 *shortage = count * direction; 837 *shortage = count * direction;
838 if (bytepos)
839 {
840 *bytepos = start_byte == -1 ? CHAR_TO_BYTE (start) : start_byte;
841 eassert (*bytepos == CHAR_TO_BYTE (start));
842 }
823 return start; 843 return start;
824} 844}
825 845
826/* Search for COUNT instances of a line boundary, which means either a 846/* Search for COUNT instances of a line boundary.
827 newline or (if selective display enabled) a carriage return.
828 Start at START. If COUNT is negative, search backwards. 847 Start at START. If COUNT is negative, search backwards.
829 848
830 We report the resulting position by calling TEMP_SET_PT_BOTH. 849 We report the resulting position by calling TEMP_SET_PT_BOTH.
@@ -855,14 +874,9 @@ scan_newline (ptrdiff_t start, ptrdiff_t start_byte,
855 874
856 bool old_immediate_quit = immediate_quit; 875 bool old_immediate_quit = immediate_quit;
857 876
858 /* The code that follows is like scan_buffer
859 but checks for either newline or carriage return. */
860
861 if (allow_quit) 877 if (allow_quit)
862 immediate_quit++; 878 immediate_quit++;
863 879
864 start_byte = CHAR_TO_BYTE (start);
865
866 if (count > 0) 880 if (count > 0)
867 { 881 {
868 while (start_byte < limit_byte) 882 while (start_byte < limit_byte)
@@ -871,29 +885,25 @@ scan_newline (ptrdiff_t start, ptrdiff_t start_byte,
871 ceiling = min (limit_byte - 1, ceiling); 885 ceiling = min (limit_byte - 1, ceiling);
872 ceiling_addr = BYTE_POS_ADDR (ceiling) + 1; 886 ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
873 base = (cursor = BYTE_POS_ADDR (start_byte)); 887 base = (cursor = BYTE_POS_ADDR (start_byte));
874 while (1)
875 {
876 while (*cursor != '\n' && ++cursor != ceiling_addr)
877 ;
878 888
879 if (cursor != ceiling_addr) 889 do
890 {
891 unsigned char *nl = memchr (cursor, '\n', ceiling_addr - cursor);
892 if (! nl)
893 break;
894 if (--count == 0)
880 { 895 {
881 if (--count == 0) 896 immediate_quit = old_immediate_quit;
882 { 897 start_byte += nl - base + 1;
883 immediate_quit = old_immediate_quit; 898 start = BYTE_TO_CHAR (start_byte);
884 start_byte = start_byte + cursor - base + 1; 899 TEMP_SET_PT_BOTH (start, start_byte);
885 start = BYTE_TO_CHAR (start_byte); 900 return 0;
886 TEMP_SET_PT_BOTH (start, start_byte);
887 return 0;
888 }
889 else
890 if (++cursor == ceiling_addr)
891 break;
892 } 901 }
893 else 902 cursor = nl + 1;
894 break;
895 } 903 }
896 start_byte += cursor - base; 904 while (cursor < ceiling_addr);
905
906 start_byte += ceiling_addr - base;
897 } 907 }
898 } 908 }
899 else 909 else
@@ -902,31 +912,28 @@ scan_newline (ptrdiff_t start, ptrdiff_t start_byte,
902 { 912 {
903 ceiling = BUFFER_FLOOR_OF (start_byte - 1); 913 ceiling = BUFFER_FLOOR_OF (start_byte - 1);
904 ceiling = max (limit_byte, ceiling); 914 ceiling = max (limit_byte, ceiling);
905 ceiling_addr = BYTE_POS_ADDR (ceiling) - 1; 915 ceiling_addr = BYTE_POS_ADDR (ceiling);
906 base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1); 916 base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
907 while (1) 917 while (1)
908 { 918 {
909 while (--cursor != ceiling_addr && *cursor != '\n') 919 unsigned char *nl = memrchr (ceiling_addr, '\n',
910 ; 920 cursor - ceiling_addr);
921 if (! nl)
922 break;
911 923
912 if (cursor != ceiling_addr) 924 if (++count == 0)
913 { 925 {
914 if (++count == 0) 926 immediate_quit = old_immediate_quit;
915 { 927 /* Return the position AFTER the match we found. */
916 immediate_quit = old_immediate_quit; 928 start_byte += nl - base + 1;
917 /* Return the position AFTER the match we found. */ 929 start = BYTE_TO_CHAR (start_byte);
918 start_byte = start_byte + cursor - base + 1; 930 TEMP_SET_PT_BOTH (start, start_byte);
919 start = BYTE_TO_CHAR (start_byte); 931 return 0;
920 TEMP_SET_PT_BOTH (start, start_byte);
921 return 0;
922 }
923 } 932 }
924 else 933
925 break; 934 cursor = nl;
926 } 935 }
927 /* Here we add 1 to compensate for the last decrement 936 start_byte += ceiling_addr - base;
928 of CURSOR, which took it past the valid range. */
929 start_byte += cursor - base + 1;
930 } 937 }
931 } 938 }
932 939
@@ -936,25 +943,33 @@ scan_newline (ptrdiff_t start, ptrdiff_t start_byte,
936 return count * direction; 943 return count * direction;
937} 944}
938 945
946/* Like find_newline, but doesn't allow QUITting and doesn't return
947 SHORTAGE. */
939ptrdiff_t 948ptrdiff_t
940find_next_newline_no_quit (ptrdiff_t from, ptrdiff_t cnt) 949find_newline_no_quit (ptrdiff_t from, ptrdiff_t frombyte,
950 ptrdiff_t cnt, ptrdiff_t *bytepos)
941{ 951{
942 return scan_buffer ('\n', from, 0, cnt, (ptrdiff_t *) 0, 0); 952 return find_newline (from, frombyte, 0, -1, cnt, NULL, bytepos, 0);
943} 953}
944 954
945/* Like find_next_newline, but returns position before the newline, 955/* Like find_newline, but returns position before the newline, not
946 not after, and only search up to TO. This isn't just 956 after, and only search up to TO.
947 find_next_newline (...)-1, because you might hit TO. */ 957 This isn't just find_newline_no_quit (...)-1, because you might hit TO. */
948 958
949ptrdiff_t 959ptrdiff_t
950find_before_next_newline (ptrdiff_t from, ptrdiff_t to, ptrdiff_t cnt) 960find_before_next_newline (ptrdiff_t from, ptrdiff_t to,
961 ptrdiff_t cnt, ptrdiff_t *bytepos)
951{ 962{
952 ptrdiff_t shortage; 963 ptrdiff_t shortage;
953 ptrdiff_t pos = scan_buffer ('\n', from, to, cnt, &shortage, 1); 964 ptrdiff_t pos = find_newline (from, -1, to, -1, cnt, &shortage, bytepos, 1);
954 965
955 if (shortage == 0) 966 if (shortage == 0)
956 pos--; 967 {
957 968 if (bytepos)
969 DEC_BOTH (pos, *bytepos);
970 else
971 pos--;
972 }
958 return pos; 973 return pos;
959} 974}
960 975
@@ -962,9 +977,9 @@ find_before_next_newline (ptrdiff_t from, ptrdiff_t to, ptrdiff_t cnt)
962 977
963static Lisp_Object 978static Lisp_Object
964search_command (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, 979search_command (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror,
965 Lisp_Object count, int direction, int RE, int posix) 980 Lisp_Object count, int direction, int RE, bool posix)
966{ 981{
967 register EMACS_INT np; 982 EMACS_INT np;
968 EMACS_INT lim; 983 EMACS_INT lim;
969 ptrdiff_t lim_byte; 984 ptrdiff_t lim_byte;
970 EMACS_INT n = direction; 985 EMACS_INT n = direction;
@@ -1016,8 +1031,7 @@ search_command (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror,
1016 1031
1017 if (!EQ (noerror, Qt)) 1032 if (!EQ (noerror, Qt))
1018 { 1033 {
1019 if (lim < BEGV || lim > ZV) 1034 eassert (BEGV <= lim && lim <= ZV);
1020 emacs_abort ();
1021 SET_PT_BOTH (lim, lim_byte); 1035 SET_PT_BOTH (lim, lim_byte);
1022 return Qnil; 1036 return Qnil;
1023#if 0 /* This would be clean, but maybe programs depend on 1037#if 0 /* This would be clean, but maybe programs depend on
@@ -1029,17 +1043,15 @@ search_command (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror,
1029 return Qnil; 1043 return Qnil;
1030 } 1044 }
1031 1045
1032 if (np < BEGV || np > ZV) 1046 eassert (BEGV <= np && np <= ZV);
1033 emacs_abort ();
1034
1035 SET_PT (np); 1047 SET_PT (np);
1036 1048
1037 return make_number (np); 1049 return make_number (np);
1038} 1050}
1039 1051
1040/* Return 1 if REGEXP it matches just one constant string. */ 1052/* Return true if REGEXP it matches just one constant string. */
1041 1053
1042static int 1054static bool
1043trivial_regexp_p (Lisp_Object regexp) 1055trivial_regexp_p (Lisp_Object regexp)
1044{ 1056{
1045 ptrdiff_t len = SBYTES (regexp); 1057 ptrdiff_t len = SBYTES (regexp);
@@ -1108,7 +1120,7 @@ static struct re_registers search_regs_1;
1108static EMACS_INT 1120static EMACS_INT
1109search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte, 1121search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
1110 ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n, 1122 ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n,
1111 int RE, Lisp_Object trt, Lisp_Object inverse_trt, int posix) 1123 int RE, Lisp_Object trt, Lisp_Object inverse_trt, bool posix)
1112{ 1124{
1113 ptrdiff_t len = SCHARS (string); 1125 ptrdiff_t len = SCHARS (string);
1114 ptrdiff_t len_byte = SBYTES (string); 1126 ptrdiff_t len_byte = SBYTES (string);
@@ -1258,12 +1270,12 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
1258 ptrdiff_t raw_pattern_size; 1270 ptrdiff_t raw_pattern_size;
1259 ptrdiff_t raw_pattern_size_byte; 1271 ptrdiff_t raw_pattern_size_byte;
1260 unsigned char *patbuf; 1272 unsigned char *patbuf;
1261 int multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters)); 1273 bool multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
1262 unsigned char *base_pat; 1274 unsigned char *base_pat;
1263 /* Set to positive if we find a non-ASCII char that need 1275 /* Set to positive if we find a non-ASCII char that need
1264 translation. Otherwise set to zero later. */ 1276 translation. Otherwise set to zero later. */
1265 int char_base = -1; 1277 int char_base = -1;
1266 int boyer_moore_ok = 1; 1278 bool boyer_moore_ok = 1;
1267 1279
1268 /* MULTIBYTE says whether the text to be searched is multibyte. 1280 /* MULTIBYTE says whether the text to be searched is multibyte.
1269 We must convert PATTERN to match that, or we will not really 1281 We must convert PATTERN to match that, or we will not really
@@ -1313,8 +1325,11 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
1313 non-nil, we can use boyer-moore search only if TRT can be 1325 non-nil, we can use boyer-moore search only if TRT can be
1314 represented by the byte array of 256 elements. For that, 1326 represented by the byte array of 256 elements. For that,
1315 all non-ASCII case-equivalents of all case-sensitive 1327 all non-ASCII case-equivalents of all case-sensitive
1316 characters in STRING must belong to the same charset and 1328 characters in STRING must belong to the same character
1317 row. */ 1329 group (two characters belong to the same group iff their
1330 multibyte forms are the same except for the last byte;
1331 i.e. every 64 characters form a group; U+0000..U+003F,
1332 U+0040..U+007F, U+0080..U+00BF, ...). */
1318 1333
1319 while (--len >= 0) 1334 while (--len >= 0)
1320 { 1335 {
@@ -1406,7 +1421,7 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
1406 char_base = 0; 1421 char_base = 0;
1407 while (--len >= 0) 1422 while (--len >= 0)
1408 { 1423 {
1409 int c, translated; 1424 int c, translated, inverse;
1410 1425
1411 /* If we got here and the RE flag is set, it's because we're 1426 /* If we got here and the RE flag is set, it's because we're
1412 dealing with a regexp known to be trivial, so the backslash 1427 dealing with a regexp known to be trivial, so the backslash
@@ -1420,6 +1435,20 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
1420 c = *base_pat++; 1435 c = *base_pat++;
1421 TRANSLATE (translated, trt, c); 1436 TRANSLATE (translated, trt, c);
1422 *pat++ = translated; 1437 *pat++ = translated;
1438 /* Check that none of C's equivalents violates the
1439 assumptions of boyer_moore. */
1440 TRANSLATE (inverse, inverse_trt, c);
1441 while (1)
1442 {
1443 if (inverse >= 0200)
1444 {
1445 boyer_moore_ok = 0;
1446 break;
1447 }
1448 if (c == inverse)
1449 break;
1450 TRANSLATE (inverse, inverse_trt, inverse);
1451 }
1423 } 1452 }
1424 } 1453 }
1425 1454
@@ -1454,8 +1483,8 @@ simple_search (EMACS_INT n, unsigned char *pat,
1454 ptrdiff_t pos, ptrdiff_t pos_byte, 1483 ptrdiff_t pos, ptrdiff_t pos_byte,
1455 ptrdiff_t lim, ptrdiff_t lim_byte) 1484 ptrdiff_t lim, ptrdiff_t lim_byte)
1456{ 1485{
1457 int multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters)); 1486 bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
1458 int forward = n > 0; 1487 bool forward = n > 0;
1459 /* Number of buffer bytes matched. Note that this may be different 1488 /* Number of buffer bytes matched. Note that this may be different
1460 from len_byte in a multibyte buffer. */ 1489 from len_byte in a multibyte buffer. */
1461 ptrdiff_t match_byte = PTRDIFF_MIN; 1490 ptrdiff_t match_byte = PTRDIFF_MIN;
@@ -1674,7 +1703,7 @@ boyer_moore (EMACS_INT n, unsigned char *base_pat,
1674 register ptrdiff_t i; 1703 register ptrdiff_t i;
1675 register int j; 1704 register int j;
1676 unsigned char *pat, *pat_end; 1705 unsigned char *pat, *pat_end;
1677 int multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters)); 1706 bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
1678 1707
1679 unsigned char simple_translate[0400]; 1708 unsigned char simple_translate[0400];
1680 /* These are set to the preceding bytes of a byte to be translated 1709 /* These are set to the preceding bytes of a byte to be translated
@@ -2255,12 +2284,12 @@ since only regular expressions have distinguished subexpressions. */)
2255 (Lisp_Object newtext, Lisp_Object fixedcase, Lisp_Object literal, Lisp_Object string, Lisp_Object subexp) 2284 (Lisp_Object newtext, Lisp_Object fixedcase, Lisp_Object literal, Lisp_Object string, Lisp_Object subexp)
2256{ 2285{
2257 enum { nochange, all_caps, cap_initial } case_action; 2286 enum { nochange, all_caps, cap_initial } case_action;
2258 register ptrdiff_t pos, pos_byte; 2287 ptrdiff_t pos, pos_byte;
2259 int some_multiletter_word; 2288 bool some_multiletter_word;
2260 int some_lowercase; 2289 bool some_lowercase;
2261 int some_uppercase; 2290 bool some_uppercase;
2262 int some_nonuppercase_initial; 2291 bool some_nonuppercase_initial;
2263 register int c, prevc; 2292 int c, prevc;
2264 ptrdiff_t sub; 2293 ptrdiff_t sub;
2265 ptrdiff_t opoint, newpoint; 2294 ptrdiff_t opoint, newpoint;
2266 2295
@@ -2405,7 +2434,7 @@ since only regular expressions have distinguished subexpressions. */)
2405 { 2434 {
2406 ptrdiff_t substart = -1; 2435 ptrdiff_t substart = -1;
2407 ptrdiff_t subend = 0; 2436 ptrdiff_t subend = 0;
2408 int delbackslash = 0; 2437 bool delbackslash = 0;
2409 2438
2410 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte); 2439 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2411 2440
@@ -2421,7 +2450,7 @@ since only regular expressions have distinguished subexpressions. */)
2421 else if (c >= '1' && c <= '9') 2450 else if (c >= '1' && c <= '9')
2422 { 2451 {
2423 if (c - '0' < search_regs.num_regs 2452 if (c - '0' < search_regs.num_regs
2424 && 0 <= search_regs.start[c - '0']) 2453 && search_regs.start[c - '0'] >= 0)
2425 { 2454 {
2426 substart = search_regs.start[c - '0']; 2455 substart = search_regs.start[c - '0'];
2427 subend = search_regs.end[c - '0']; 2456 subend = search_regs.end[c - '0'];
@@ -2500,11 +2529,11 @@ since only regular expressions have distinguished subexpressions. */)
2500 ptrdiff_t length = SBYTES (newtext); 2529 ptrdiff_t length = SBYTES (newtext);
2501 unsigned char *substed; 2530 unsigned char *substed;
2502 ptrdiff_t substed_alloc_size, substed_len; 2531 ptrdiff_t substed_alloc_size, substed_len;
2503 int buf_multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters)); 2532 bool buf_multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
2504 int str_multibyte = STRING_MULTIBYTE (newtext); 2533 bool str_multibyte = STRING_MULTIBYTE (newtext);
2505 int really_changed = 0; 2534 bool really_changed = 0;
2506 2535
2507 substed_alloc_size = ((STRING_BYTES_BOUND - 100) / 2 < length 2536 substed_alloc_size = (length > (STRING_BYTES_BOUND - 100) / 2
2508 ? STRING_BYTES_BOUND 2537 ? STRING_BYTES_BOUND
2509 : length * 2 + 100); 2538 : length * 2 + 100);
2510 substed = xmalloc (substed_alloc_size); 2539 substed = xmalloc (substed_alloc_size);
@@ -2585,7 +2614,7 @@ since only regular expressions have distinguished subexpressions. */)
2585 ptrdiff_t begbyte = CHAR_TO_BYTE (search_regs.start[idx]); 2614 ptrdiff_t begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2586 add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte; 2615 add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2587 if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx]) 2616 if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2588 move_gap (search_regs.start[idx]); 2617 move_gap_both (search_regs.start[idx], begbyte);
2589 add_stuff = BYTE_POS_ADDR (begbyte); 2618 add_stuff = BYTE_POS_ADDR (begbyte);
2590 } 2619 }
2591 2620
@@ -2668,7 +2697,7 @@ since only regular expressions have distinguished subexpressions. */)
2668} 2697}
2669 2698
2670static Lisp_Object 2699static Lisp_Object
2671match_limit (Lisp_Object num, int beginningp) 2700match_limit (Lisp_Object num, bool beginningp)
2672{ 2701{
2673 EMACS_INT n; 2702 EMACS_INT n;
2674 2703
@@ -2941,9 +2970,9 @@ If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */)
2941 return Qnil; 2970 return Qnil;
2942} 2971}
2943 2972
2944/* If non-zero the match data have been saved in saved_search_regs 2973/* If true the match data have been saved in saved_search_regs
2945 during the execution of a sentinel or filter. */ 2974 during the execution of a sentinel or filter. */
2946static int search_regs_saved; 2975static bool search_regs_saved;
2947static struct re_registers saved_search_regs; 2976static struct re_registers saved_search_regs;
2948static Lisp_Object saved_last_thing_searched; 2977static Lisp_Object saved_last_thing_searched;
2949 2978
@@ -3008,9 +3037,9 @@ DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3008 doc: /* Return a regexp string which matches exactly STRING and nothing else. */) 3037 doc: /* Return a regexp string which matches exactly STRING and nothing else. */)
3009 (Lisp_Object string) 3038 (Lisp_Object string)
3010{ 3039{
3011 register char *in, *out, *end; 3040 char *in, *out, *end;
3012 register char *temp; 3041 char *temp;
3013 int backslashes_added = 0; 3042 ptrdiff_t backslashes_added = 0;
3014 3043
3015 CHECK_STRING (string); 3044 CHECK_STRING (string);
3016 3045