diff options
Diffstat (limited to 'src/regex.c')
| -rw-r--r-- | src/regex.c | 63 |
1 files changed, 55 insertions, 8 deletions
diff --git a/src/regex.c b/src/regex.c index 846c87041b1..ae80ad0cee8 100644 --- a/src/regex.c +++ b/src/regex.c | |||
| @@ -2530,6 +2530,7 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2530 | bufp->syntax = syntax; | 2530 | bufp->syntax = syntax; |
| 2531 | bufp->fastmap_accurate = 0; | 2531 | bufp->fastmap_accurate = 0; |
| 2532 | bufp->not_bol = bufp->not_eol = 0; | 2532 | bufp->not_bol = bufp->not_eol = 0; |
| 2533 | bufp->used_syntax = 0; | ||
| 2533 | 2534 | ||
| 2534 | /* Set `used' to zero, so that if we return an error, the pattern | 2535 | /* Set `used' to zero, so that if we return an error, the pattern |
| 2535 | printer (for debugging) will think there's no pattern. We reset it | 2536 | printer (for debugging) will think there's no pattern. We reset it |
| @@ -2942,6 +2943,14 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2942 | SET_LIST_BIT (translated); | 2943 | SET_LIST_BIT (translated); |
| 2943 | } | 2944 | } |
| 2944 | 2945 | ||
| 2946 | /* In most cases the matching rule for char classes | ||
| 2947 | only uses the syntax table for multibyte chars, | ||
| 2948 | so that the content of the syntax-table it is not | ||
| 2949 | hardcoded in the range_table. SPACE and WORD are | ||
| 2950 | the two exceptions. */ | ||
| 2951 | if ((1 << cc) & ((1 << RECC_SPACE) | (1 << RECC_WORD))) | ||
| 2952 | bufp->used_syntax = 1; | ||
| 2953 | |||
| 2945 | /* Repeat the loop. */ | 2954 | /* Repeat the loop. */ |
| 2946 | continue; | 2955 | continue; |
| 2947 | } | 2956 | } |
| @@ -3877,11 +3886,13 @@ analyse_first (p, pend, fastmap, multibyte) | |||
| 3877 | if (fastmap) | 3886 | if (fastmap) |
| 3878 | { | 3887 | { |
| 3879 | int c = RE_STRING_CHAR (p + 1, pend - p); | 3888 | int c = RE_STRING_CHAR (p + 1, pend - p); |
| 3880 | 3889 | /* When fast-scanning, the fastmap can be indexed either with | |
| 3890 | a char (smaller than 256) or with the first byte of | ||
| 3891 | a char's byte sequence. So we have to conservatively add | ||
| 3892 | both to the table. */ | ||
| 3881 | if (SINGLE_BYTE_CHAR_P (c)) | 3893 | if (SINGLE_BYTE_CHAR_P (c)) |
| 3882 | fastmap[c] = 1; | 3894 | fastmap[c] = 1; |
| 3883 | else | 3895 | fastmap[p[1]] = 1; |
| 3884 | fastmap[p[1]] = 1; | ||
| 3885 | } | 3896 | } |
| 3886 | break; | 3897 | break; |
| 3887 | 3898 | ||
| @@ -3899,6 +3910,10 @@ analyse_first (p, pend, fastmap, multibyte) | |||
| 3899 | So any that are not listed in the charset | 3910 | So any that are not listed in the charset |
| 3900 | are possible matches, even in multibyte buffers. */ | 3911 | are possible matches, even in multibyte buffers. */ |
| 3901 | if (!fastmap) break; | 3912 | if (!fastmap) break; |
| 3913 | /* We don't need to mark LEADING_CODE_8_BIT_CONTROL specially | ||
| 3914 | because it will automatically be set when needed by virtue of | ||
| 3915 | being larger than the highest char of its charset (0xbf) but | ||
| 3916 | smaller than (1<<BYTEWIDTH). */ | ||
| 3902 | for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH; | 3917 | for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH; |
| 3903 | j < (1 << BYTEWIDTH); j++) | 3918 | j < (1 << BYTEWIDTH); j++) |
| 3904 | fastmap[j] = 1; | 3919 | fastmap[j] = 1; |
| @@ -3909,7 +3924,13 @@ analyse_first (p, pend, fastmap, multibyte) | |||
| 3909 | for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++; | 3924 | for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++; |
| 3910 | j >= 0; j--) | 3925 | j >= 0; j--) |
| 3911 | if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not) | 3926 | if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not) |
| 3912 | fastmap[j] = 1; | 3927 | { |
| 3928 | fastmap[j] = 1; | ||
| 3929 | #ifdef emacs | ||
| 3930 | if (j >= 0x80 && j < 0xa0) | ||
| 3931 | fastmap[LEADING_CODE_8_BIT_CONTROL] = 1; | ||
| 3932 | #endif | ||
| 3933 | } | ||
| 3913 | 3934 | ||
| 3914 | if ((not && multibyte) | 3935 | if ((not && multibyte) |
| 3915 | /* Any character set can possibly contain a character | 3936 | /* Any character set can possibly contain a character |
| @@ -4352,11 +4373,33 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop) | |||
| 4352 | } | 4373 | } |
| 4353 | } | 4374 | } |
| 4354 | else | 4375 | else |
| 4355 | while (range > lim && !fastmap[*d]) | 4376 | do |
| 4356 | { | 4377 | { |
| 4357 | d++; | 4378 | re_char *d_start = d; |
| 4358 | range--; | 4379 | while (range > lim && !fastmap[*d]) |
| 4359 | } | 4380 | { |
| 4381 | d++; | ||
| 4382 | range--; | ||
| 4383 | } | ||
| 4384 | #ifdef emacs | ||
| 4385 | if (multibyte && range > lim) | ||
| 4386 | { | ||
| 4387 | /* Check that we are at the beginning of a char. */ | ||
| 4388 | int at_boundary; | ||
| 4389 | AT_CHAR_BOUNDARY_P (at_boundary, d, d_start); | ||
| 4390 | if (at_boundary) | ||
| 4391 | break; | ||
| 4392 | else | ||
| 4393 | { /* We have matched an internal byte of a char | ||
| 4394 | rather than the leading byte, so it's a false | ||
| 4395 | positive: we should keep scanning. */ | ||
| 4396 | d++; range--; | ||
| 4397 | } | ||
| 4398 | } | ||
| 4399 | else | ||
| 4400 | #endif | ||
| 4401 | break; | ||
| 4402 | } while (1); | ||
| 4360 | 4403 | ||
| 4361 | startpos += irange - range; | 4404 | startpos += irange - range; |
| 4362 | } | 4405 | } |
| @@ -6197,6 +6240,10 @@ re_compile_pattern (pattern, length, bufp) | |||
| 6197 | { | 6240 | { |
| 6198 | reg_errcode_t ret; | 6241 | reg_errcode_t ret; |
| 6199 | 6242 | ||
| 6243 | #ifdef emacs | ||
| 6244 | gl_state.current_syntax_table = current_buffer->syntax_table; | ||
| 6245 | #endif | ||
| 6246 | |||
| 6200 | /* GNU code is written to assume at least RE_NREGS registers will be set | 6247 | /* GNU code is written to assume at least RE_NREGS registers will be set |
| 6201 | (and at least one extra will be -1). */ | 6248 | (and at least one extra will be -1). */ |
| 6202 | bufp->regs_allocated = REGS_UNALLOCATED; | 6249 | bufp->regs_allocated = REGS_UNALLOCATED; |