aboutsummaryrefslogtreecommitdiffstats
path: root/src/regex.c
diff options
context:
space:
mode:
authorStefan Monnier2006-09-22 17:30:13 +0000
committerStefan Monnier2006-09-22 17:30:13 +0000
commit4560a582d623dbf040f4176bdebb8107c12c2bb8 (patch)
tree4e9eed296a37bf4d9f85a6a8c96dddd3ff9469ed /src/regex.c
parent3ffcda547185fe2950f0ffe108604a1a13dd7b8b (diff)
downloademacs-4560a582d623dbf040f4176bdebb8107c12c2bb8.tar.gz
emacs-4560a582d623dbf040f4176bdebb8107c12c2bb8.zip
(analyse_first): For eight-bit-control chars, mark both the
char's value and its leading byte in the fastmap. (re_search_2): When fast-scanning without translation, be careful to check that we only match the leading byte of a multibyte char.
Diffstat (limited to 'src/regex.c')
-rw-r--r--src/regex.c50
1 files changed, 42 insertions, 8 deletions
diff --git a/src/regex.c b/src/regex.c
index 763b490c906..66e363e731c 100644
--- a/src/regex.c
+++ b/src/regex.c
@@ -3877,11 +3877,13 @@ analyse_first (p, pend, fastmap, multibyte)
3877 if (fastmap) 3877 if (fastmap)
3878 { 3878 {
3879 int c = RE_STRING_CHAR (p + 1, pend - p); 3879 int c = RE_STRING_CHAR (p + 1, pend - p);
3880 3880 /* When fast-scanning, the fastmap can be indexed either with
3881 a char (smaller than 256) or with the first byte of
3882 a char's byte sequence. So we have to conservatively add
3883 both to the table. */
3881 if (SINGLE_BYTE_CHAR_P (c)) 3884 if (SINGLE_BYTE_CHAR_P (c))
3882 fastmap[c] = 1; 3885 fastmap[c] = 1;
3883 else 3886 fastmap[p[1]] = 1;
3884 fastmap[p[1]] = 1;
3885 } 3887 }
3886 break; 3888 break;
3887 3889
@@ -3899,6 +3901,10 @@ analyse_first (p, pend, fastmap, multibyte)
3899 So any that are not listed in the charset 3901 So any that are not listed in the charset
3900 are possible matches, even in multibyte buffers. */ 3902 are possible matches, even in multibyte buffers. */
3901 if (!fastmap) break; 3903 if (!fastmap) break;
3904 /* We don't need to mark LEADING_CODE_8_BIT_CONTROL specially
3905 because it will automatically be set when needed by virtue of
3906 being larger than the highest char of its charset (0xbf) but
3907 smaller than (1<<BYTEWIDTH). */
3902 for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH; 3908 for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH;
3903 j < (1 << BYTEWIDTH); j++) 3909 j < (1 << BYTEWIDTH); j++)
3904 fastmap[j] = 1; 3910 fastmap[j] = 1;
@@ -3909,7 +3915,13 @@ analyse_first (p, pend, fastmap, multibyte)
3909 for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++; 3915 for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++;
3910 j >= 0; j--) 3916 j >= 0; j--)
3911 if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not) 3917 if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not)
3912 fastmap[j] = 1; 3918 {
3919 fastmap[j] = 1;
3920#ifdef emacs
3921 if (j >= 0x80 && j < 0xa0)
3922 fastmap[LEADING_CODE_8_BIT_CONTROL] = 1;
3923#endif
3924 }
3913 3925
3914 if ((not && multibyte) 3926 if ((not && multibyte)
3915 /* Any character set can possibly contain a character 3927 /* Any character set can possibly contain a character
@@ -4352,11 +4364,33 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
4352 } 4364 }
4353 } 4365 }
4354 else 4366 else
4355 while (range > lim && !fastmap[*d]) 4367 do
4356 { 4368 {
4357 d++; 4369 re_char *d_start = d;
4358 range--; 4370 while (range > lim && !fastmap[*d])
4359 } 4371 {
4372 d++;
4373 range--;
4374 }
4375#ifdef emacs
4376 if (multibyte && range > lim)
4377 {
4378 /* Check that we are at the beginning of a char. */
4379 int at_boundary;
4380 AT_CHAR_BOUNDARY_P (at_boundary, d, d_start);
4381 if (at_boundary)
4382 break;
4383 else
4384 { /* We have matched an internal byte of a char
4385 rather than the leading byte, so it's a false
4386 positive: we should keep scanning. */
4387 d++; range--;
4388 }
4389 }
4390 else
4391#endif
4392 break;
4393 } while (1);
4360 4394
4361 startpos += irange - range; 4395 startpos += irange - range;
4362 } 4396 }