aboutsummaryrefslogtreecommitdiffstats
path: root/src/regex.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/regex.c')
-rw-r--r--src/regex.c63
1 files changed, 55 insertions, 8 deletions
diff --git a/src/regex.c b/src/regex.c
index 846c87041b1..ae80ad0cee8 100644
--- a/src/regex.c
+++ b/src/regex.c
@@ -2530,6 +2530,7 @@ regex_compile (pattern, size, syntax, bufp)
2530 bufp->syntax = syntax; 2530 bufp->syntax = syntax;
2531 bufp->fastmap_accurate = 0; 2531 bufp->fastmap_accurate = 0;
2532 bufp->not_bol = bufp->not_eol = 0; 2532 bufp->not_bol = bufp->not_eol = 0;
2533 bufp->used_syntax = 0;
2533 2534
2534 /* Set `used' to zero, so that if we return an error, the pattern 2535 /* Set `used' to zero, so that if we return an error, the pattern
2535 printer (for debugging) will think there's no pattern. We reset it 2536 printer (for debugging) will think there's no pattern. We reset it
@@ -2942,6 +2943,14 @@ regex_compile (pattern, size, syntax, bufp)
2942 SET_LIST_BIT (translated); 2943 SET_LIST_BIT (translated);
2943 } 2944 }
2944 2945
2946 /* In most cases the matching rule for char classes
2947 only uses the syntax table for multibyte chars,
2948 so that the content of the syntax-table it is not
2949 hardcoded in the range_table. SPACE and WORD are
2950 the two exceptions. */
2951 if ((1 << cc) & ((1 << RECC_SPACE) | (1 << RECC_WORD)))
2952 bufp->used_syntax = 1;
2953
2945 /* Repeat the loop. */ 2954 /* Repeat the loop. */
2946 continue; 2955 continue;
2947 } 2956 }
@@ -3877,11 +3886,13 @@ analyse_first (p, pend, fastmap, multibyte)
3877 if (fastmap) 3886 if (fastmap)
3878 { 3887 {
3879 int c = RE_STRING_CHAR (p + 1, pend - p); 3888 int c = RE_STRING_CHAR (p + 1, pend - p);
3880 3889 /* When fast-scanning, the fastmap can be indexed either with
3890 a char (smaller than 256) or with the first byte of
3891 a char's byte sequence. So we have to conservatively add
3892 both to the table. */
3881 if (SINGLE_BYTE_CHAR_P (c)) 3893 if (SINGLE_BYTE_CHAR_P (c))
3882 fastmap[c] = 1; 3894 fastmap[c] = 1;
3883 else 3895 fastmap[p[1]] = 1;
3884 fastmap[p[1]] = 1;
3885 } 3896 }
3886 break; 3897 break;
3887 3898
@@ -3899,6 +3910,10 @@ analyse_first (p, pend, fastmap, multibyte)
3899 So any that are not listed in the charset 3910 So any that are not listed in the charset
3900 are possible matches, even in multibyte buffers. */ 3911 are possible matches, even in multibyte buffers. */
3901 if (!fastmap) break; 3912 if (!fastmap) break;
3913 /* We don't need to mark LEADING_CODE_8_BIT_CONTROL specially
3914 because it will automatically be set when needed by virtue of
3915 being larger than the highest char of its charset (0xbf) but
3916 smaller than (1<<BYTEWIDTH). */
3902 for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH; 3917 for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH;
3903 j < (1 << BYTEWIDTH); j++) 3918 j < (1 << BYTEWIDTH); j++)
3904 fastmap[j] = 1; 3919 fastmap[j] = 1;
@@ -3909,7 +3924,13 @@ analyse_first (p, pend, fastmap, multibyte)
3909 for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++; 3924 for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++;
3910 j >= 0; j--) 3925 j >= 0; j--)
3911 if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not) 3926 if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not)
3912 fastmap[j] = 1; 3927 {
3928 fastmap[j] = 1;
3929#ifdef emacs
3930 if (j >= 0x80 && j < 0xa0)
3931 fastmap[LEADING_CODE_8_BIT_CONTROL] = 1;
3932#endif
3933 }
3913 3934
3914 if ((not && multibyte) 3935 if ((not && multibyte)
3915 /* Any character set can possibly contain a character 3936 /* Any character set can possibly contain a character
@@ -4352,11 +4373,33 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
4352 } 4373 }
4353 } 4374 }
4354 else 4375 else
4355 while (range > lim && !fastmap[*d]) 4376 do
4356 { 4377 {
4357 d++; 4378 re_char *d_start = d;
4358 range--; 4379 while (range > lim && !fastmap[*d])
4359 } 4380 {
4381 d++;
4382 range--;
4383 }
4384#ifdef emacs
4385 if (multibyte && range > lim)
4386 {
4387 /* Check that we are at the beginning of a char. */
4388 int at_boundary;
4389 AT_CHAR_BOUNDARY_P (at_boundary, d, d_start);
4390 if (at_boundary)
4391 break;
4392 else
4393 { /* We have matched an internal byte of a char
4394 rather than the leading byte, so it's a false
4395 positive: we should keep scanning. */
4396 d++; range--;
4397 }
4398 }
4399 else
4400#endif
4401 break;
4402 } while (1);
4360 4403
4361 startpos += irange - range; 4404 startpos += irange - range;
4362 } 4405 }
@@ -6197,6 +6240,10 @@ re_compile_pattern (pattern, length, bufp)
6197{ 6240{
6198 reg_errcode_t ret; 6241 reg_errcode_t ret;
6199 6242
6243#ifdef emacs
6244 gl_state.current_syntax_table = current_buffer->syntax_table;
6245#endif
6246
6200 /* GNU code is written to assume at least RE_NREGS registers will be set 6247 /* GNU code is written to assume at least RE_NREGS registers will be set
6201 (and at least one extra will be -1). */ 6248 (and at least one extra will be -1). */
6202 bufp->regs_allocated = REGS_UNALLOCATED; 6249 bufp->regs_allocated = REGS_UNALLOCATED;