diff options
| author | Stefan Monnier | 2023-04-12 15:44:58 -0400 |
|---|---|---|
| committer | Stefan Monnier | 2023-04-12 15:44:58 -0400 |
| commit | d53ff9fe28b63dcd9bab2479cb7a517ba7233016 (patch) | |
| tree | 4076a26452f8261226cc5b793d07a3a4e332fa9d /src | |
| parent | 1e6463ad22cd74b1b74b9395dff8c8b1485e202e (diff) | |
| download | emacs-d53ff9fe28b63dcd9bab2479cb7a517ba7233016.tar.gz emacs-d53ff9fe28b63dcd9bab2479cb7a517ba7233016.zip | |
src/regex-emacs.c (POS_AS_IN_BUFFER): Delete macro
That macro added 1 to buffer positions because:
Strings are 0-indexed, buffers are 1-indexed
but the reality is that this 1 was added to the regexp engine's "byte
offsets" which are not 1-based byte positions as used throughout
the rest of Emacs, but they are BEGV_BYTE-relative offsets, so the two
did not cancel out.
* src/regex-emacs.c (PTR_TO_OFFSET, POS_AS_IN_BUFFER): Delete macros;
use `POINTER_TO_OFFSET` instead.
(re_search_2, re_match_2, re_match_2_internal): Adjust accordingly.
* src/syntax.h (SYNTAX_TABLE_BYTE_TO_CHAR): Don't remove 1 from buffer
byteoffsets now that `POS_AS_IN_BUFFER` doesn't add it any more.
Diffstat (limited to 'src')
| -rw-r--r-- | src/regex-emacs.c | 23 | ||||
| -rw-r--r-- | src/syntax.h | 12 |
2 files changed, 14 insertions, 21 deletions
diff --git a/src/regex-emacs.c b/src/regex-emacs.c index 2571812cb39..969f2ff9464 100644 --- a/src/regex-emacs.c +++ b/src/regex-emacs.c | |||
| @@ -47,13 +47,6 @@ | |||
| 47 | /* Make syntax table lookup grant data in gl_state. */ | 47 | /* Make syntax table lookup grant data in gl_state. */ |
| 48 | #define SYNTAX(c) syntax_property (c, 1) | 48 | #define SYNTAX(c) syntax_property (c, 1) |
| 49 | 49 | ||
| 50 | /* Convert the pointer to the char to BEG-based offset from the start. */ | ||
| 51 | #define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d)) | ||
| 52 | /* Strings are 0-indexed, buffers are 1-indexed; pun on the boolean | ||
| 53 | result to get the right base index. */ | ||
| 54 | #define POS_AS_IN_BUFFER(p) \ | ||
| 55 | ((p) + (NILP (gl_state.object) || BUFFERP (gl_state.object))) | ||
| 56 | |||
| 57 | #define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte) | 50 | #define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte) |
| 58 | #define RE_TARGET_MULTIBYTE_P(bufp) ((bufp)->target_multibyte) | 51 | #define RE_TARGET_MULTIBYTE_P(bufp) ((bufp)->target_multibyte) |
| 59 | #define RE_STRING_CHAR(p, multibyte) \ | 52 | #define RE_STRING_CHAR(p, multibyte) \ |
| @@ -3260,7 +3253,7 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, ptrdiff_t size1, | |||
| 3260 | 3253 | ||
| 3261 | gl_state.object = re_match_object; /* Used by SYNTAX_TABLE_BYTE_TO_CHAR. */ | 3254 | gl_state.object = re_match_object; /* Used by SYNTAX_TABLE_BYTE_TO_CHAR. */ |
| 3262 | { | 3255 | { |
| 3263 | ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (startpos)); | 3256 | ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (startpos); |
| 3264 | 3257 | ||
| 3265 | SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1); | 3258 | SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1); |
| 3266 | } | 3259 | } |
| @@ -3873,7 +3866,7 @@ re_match_2 (struct re_pattern_buffer *bufp, | |||
| 3873 | 3866 | ||
| 3874 | ptrdiff_t charpos; | 3867 | ptrdiff_t charpos; |
| 3875 | gl_state.object = re_match_object; /* Used by SYNTAX_TABLE_BYTE_TO_CHAR. */ | 3868 | gl_state.object = re_match_object; /* Used by SYNTAX_TABLE_BYTE_TO_CHAR. */ |
| 3876 | charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (pos)); | 3869 | charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos); |
| 3877 | SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1); | 3870 | SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1); |
| 3878 | 3871 | ||
| 3879 | result = re_match_2_internal (bufp, (re_char *) string1, size1, | 3872 | result = re_match_2_internal (bufp, (re_char *) string1, size1, |
| @@ -4806,7 +4799,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, | |||
| 4806 | int c1, c2; | 4799 | int c1, c2; |
| 4807 | int s1, s2; | 4800 | int s1, s2; |
| 4808 | int dummy; | 4801 | int dummy; |
| 4809 | ptrdiff_t offset = PTR_TO_OFFSET (d); | 4802 | ptrdiff_t offset = POINTER_TO_OFFSET (d); |
| 4810 | ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset) - 1; | 4803 | ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset) - 1; |
| 4811 | UPDATE_SYNTAX_TABLE (charpos); | 4804 | UPDATE_SYNTAX_TABLE (charpos); |
| 4812 | GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); | 4805 | GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); |
| @@ -4846,7 +4839,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, | |||
| 4846 | int c1, c2; | 4839 | int c1, c2; |
| 4847 | int s1, s2; | 4840 | int s1, s2; |
| 4848 | int dummy; | 4841 | int dummy; |
| 4849 | ptrdiff_t offset = PTR_TO_OFFSET (d); | 4842 | ptrdiff_t offset = POINTER_TO_OFFSET (d); |
| 4850 | ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); | 4843 | ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); |
| 4851 | UPDATE_SYNTAX_TABLE (charpos); | 4844 | UPDATE_SYNTAX_TABLE (charpos); |
| 4852 | PREFETCH (); | 4845 | PREFETCH (); |
| @@ -4889,7 +4882,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, | |||
| 4889 | int c1, c2; | 4882 | int c1, c2; |
| 4890 | int s1, s2; | 4883 | int s1, s2; |
| 4891 | int dummy; | 4884 | int dummy; |
| 4892 | ptrdiff_t offset = PTR_TO_OFFSET (d); | 4885 | ptrdiff_t offset = POINTER_TO_OFFSET (d); |
| 4893 | ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset) - 1; | 4886 | ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset) - 1; |
| 4894 | UPDATE_SYNTAX_TABLE (charpos); | 4887 | UPDATE_SYNTAX_TABLE (charpos); |
| 4895 | GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); | 4888 | GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); |
| @@ -4931,7 +4924,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, | |||
| 4931 | is the character at D, and S2 is the syntax of C2. */ | 4924 | is the character at D, and S2 is the syntax of C2. */ |
| 4932 | int c1, c2; | 4925 | int c1, c2; |
| 4933 | int s1, s2; | 4926 | int s1, s2; |
| 4934 | ptrdiff_t offset = PTR_TO_OFFSET (d); | 4927 | ptrdiff_t offset = POINTER_TO_OFFSET (d); |
| 4935 | ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); | 4928 | ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); |
| 4936 | UPDATE_SYNTAX_TABLE (charpos); | 4929 | UPDATE_SYNTAX_TABLE (charpos); |
| 4937 | PREFETCH (); | 4930 | PREFETCH (); |
| @@ -4972,7 +4965,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, | |||
| 4972 | is the character at D, and S2 is the syntax of C2. */ | 4965 | is the character at D, and S2 is the syntax of C2. */ |
| 4973 | int c1, c2; | 4966 | int c1, c2; |
| 4974 | int s1, s2; | 4967 | int s1, s2; |
| 4975 | ptrdiff_t offset = PTR_TO_OFFSET (d); | 4968 | ptrdiff_t offset = POINTER_TO_OFFSET (d); |
| 4976 | ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset) - 1; | 4969 | ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset) - 1; |
| 4977 | UPDATE_SYNTAX_TABLE (charpos); | 4970 | UPDATE_SYNTAX_TABLE (charpos); |
| 4978 | GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); | 4971 | GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); |
| @@ -5008,7 +5001,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, | |||
| 5008 | mcnt); | 5001 | mcnt); |
| 5009 | PREFETCH (); | 5002 | PREFETCH (); |
| 5010 | { | 5003 | { |
| 5011 | ptrdiff_t offset = PTR_TO_OFFSET (d); | 5004 | ptrdiff_t offset = POINTER_TO_OFFSET (d); |
| 5012 | ptrdiff_t pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (offset); | 5005 | ptrdiff_t pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (offset); |
| 5013 | UPDATE_SYNTAX_TABLE (pos1); | 5006 | UPDATE_SYNTAX_TABLE (pos1); |
| 5014 | } | 5007 | } |
diff --git a/src/syntax.h b/src/syntax.h index aefe4dafa42..05d58eff05f 100644 --- a/src/syntax.h +++ b/src/syntax.h | |||
| @@ -145,7 +145,7 @@ extern bool syntax_prefix_flag_p (int c); | |||
| 145 | 145 | ||
| 146 | extern unsigned char const syntax_spec_code[0400]; | 146 | extern unsigned char const syntax_spec_code[0400]; |
| 147 | 147 | ||
| 148 | /* Convert the byte offset BYTEPOS into a character position, | 148 | /* Convert the regexp BYTEOFFSET into a character position, |
| 149 | for the object recorded in gl_state with SETUP_SYNTAX_TABLE_FOR_OBJECT. | 149 | for the object recorded in gl_state with SETUP_SYNTAX_TABLE_FOR_OBJECT. |
| 150 | 150 | ||
| 151 | The value is meant for use in code that does nothing when | 151 | The value is meant for use in code that does nothing when |
| @@ -153,19 +153,19 @@ extern unsigned char const syntax_spec_code[0400]; | |||
| 153 | for speed. */ | 153 | for speed. */ |
| 154 | 154 | ||
| 155 | INLINE ptrdiff_t | 155 | INLINE ptrdiff_t |
| 156 | SYNTAX_TABLE_BYTE_TO_CHAR (ptrdiff_t bytepos) | 156 | SYNTAX_TABLE_BYTE_TO_CHAR (ptrdiff_t byteoffset) |
| 157 | { | 157 | { |
| 158 | return (! parse_sexp_lookup_properties | 158 | return (! parse_sexp_lookup_properties |
| 159 | ? 0 | 159 | ? 0 |
| 160 | : STRINGP (gl_state.object) | 160 | : STRINGP (gl_state.object) |
| 161 | ? string_byte_to_char (gl_state.object, bytepos) | 161 | ? string_byte_to_char (gl_state.object, byteoffset) |
| 162 | : BUFFERP (gl_state.object) | 162 | : BUFFERP (gl_state.object) |
| 163 | ? ((buf_bytepos_to_charpos | 163 | ? ((buf_bytepos_to_charpos |
| 164 | (XBUFFER (gl_state.object), | 164 | (XBUFFER (gl_state.object), |
| 165 | (bytepos + BUF_BEGV_BYTE (XBUFFER (gl_state.object)) - 1)))) | 165 | (byteoffset + BUF_BEGV_BYTE (XBUFFER (gl_state.object)))))) |
| 166 | : NILP (gl_state.object) | 166 | : NILP (gl_state.object) |
| 167 | ? BYTE_TO_CHAR (bytepos + BEGV_BYTE - 1) | 167 | ? BYTE_TO_CHAR (byteoffset + BEGV_BYTE) |
| 168 | : bytepos); | 168 | : byteoffset); |
| 169 | } | 169 | } |
| 170 | 170 | ||
| 171 | /* Make syntax table state (gl_state) good for CHARPOS, assuming it is | 171 | /* Make syntax table state (gl_state) good for CHARPOS, assuming it is |