diff options
| author | Stefan Monnier | 2006-09-22 17:30:13 +0000 |
|---|---|---|
| committer | Stefan Monnier | 2006-09-22 17:30:13 +0000 |
| commit | 4560a582d623dbf040f4176bdebb8107c12c2bb8 (patch) | |
| tree | 4e9eed296a37bf4d9f85a6a8c96dddd3ff9469ed /src | |
| parent | 3ffcda547185fe2950f0ffe108604a1a13dd7b8b (diff) | |
| download | emacs-4560a582d623dbf040f4176bdebb8107c12c2bb8.tar.gz emacs-4560a582d623dbf040f4176bdebb8107c12c2bb8.zip | |
(analyse_first): For eight-bit-control chars, mark both the
char's value and its leading byte in the fastmap.
(re_search_2): When fast-scanning without translation, be careful to
check that we only match the leading byte of a multibyte char.
Diffstat (limited to 'src')
| -rw-r--r-- | src/ChangeLog | 13 | ||||
| -rw-r--r-- | src/regex.c | 50 |
2 files changed, 54 insertions, 9 deletions
diff --git a/src/ChangeLog b/src/ChangeLog index b4ddda11f88..cec46b21557 100644 --- a/src/ChangeLog +++ b/src/ChangeLog | |||
| @@ -1,3 +1,14 @@ | |||
| 1 | 2006-09-22 Stefan Monnier <monnier@iro.umontreal.ca> | ||
| 2 | |||
| 3 | * regex.c (analyse_first): For eight-bit-control chars, mark both the | ||
| 4 | char's value and its leading byte in the fastmap. | ||
| 5 | (re_search_2): When fast-scanning without translation, be careful to | ||
| 6 | check that we only match the leading byte of a multibyte char. | ||
| 7 | |||
| 8 | * charset.h (PREV_CHAR_BOUNDARY): Make it work from within a char's | ||
| 9 | byte sequence. | ||
| 10 | (AT_CHAR_BOUNDARY): New macro. | ||
| 11 | |||
| 1 | 2006-09-22 Kenichi Handa <handa@m17n.org> | 12 | 2006-09-22 Kenichi Handa <handa@m17n.org> |
| 2 | 13 | ||
| 3 | * fns.c (optimize_sub_char_table): Don't optimize a sub-char-table | 14 | * fns.c (optimize_sub_char_table): Don't optimize a sub-char-table |
| @@ -271,7 +282,7 @@ | |||
| 271 | 282 | ||
| 272 | 2006-08-27 Martin Rudalics <rudalics@gmx.at> | 283 | 2006-08-27 Martin Rudalics <rudalics@gmx.at> |
| 273 | 284 | ||
| 274 | * xdisp.c (mouse_autoselect_window): Removed. | 285 | * xdisp.c (mouse_autoselect_window): Remove. |
| 275 | (Vmouse_autoselect_window): New variable. DEFVAR_LISP it. | 286 | (Vmouse_autoselect_window): New variable. DEFVAR_LISP it. |
| 276 | 287 | ||
| 277 | * dispextern.h (mouse_autoselect_window): Remove extern. | 288 | * dispextern.h (mouse_autoselect_window): Remove extern. |
diff --git a/src/regex.c b/src/regex.c index 763b490c906..66e363e731c 100644 --- a/src/regex.c +++ b/src/regex.c | |||
| @@ -3877,11 +3877,13 @@ analyse_first (p, pend, fastmap, multibyte) | |||
| 3877 | if (fastmap) | 3877 | if (fastmap) |
| 3878 | { | 3878 | { |
| 3879 | int c = RE_STRING_CHAR (p + 1, pend - p); | 3879 | int c = RE_STRING_CHAR (p + 1, pend - p); |
| 3880 | 3880 | /* When fast-scanning, the fastmap can be indexed either with | |
| 3881 | a char (smaller than 256) or with the first byte of | ||
| 3882 | a char's byte sequence. So we have to conservatively add | ||
| 3883 | both to the table. */ | ||
| 3881 | if (SINGLE_BYTE_CHAR_P (c)) | 3884 | if (SINGLE_BYTE_CHAR_P (c)) |
| 3882 | fastmap[c] = 1; | 3885 | fastmap[c] = 1; |
| 3883 | else | 3886 | fastmap[p[1]] = 1; |
| 3884 | fastmap[p[1]] = 1; | ||
| 3885 | } | 3887 | } |
| 3886 | break; | 3888 | break; |
| 3887 | 3889 | ||
| @@ -3899,6 +3901,10 @@ analyse_first (p, pend, fastmap, multibyte) | |||
| 3899 | So any that are not listed in the charset | 3901 | So any that are not listed in the charset |
| 3900 | are possible matches, even in multibyte buffers. */ | 3902 | are possible matches, even in multibyte buffers. */ |
| 3901 | if (!fastmap) break; | 3903 | if (!fastmap) break; |
| 3904 | /* We don't need to mark LEADING_CODE_8_BIT_CONTROL specially | ||
| 3905 | because it will automatically be set when needed by virtue of | ||
| 3906 | being larger than the highest char of its charset (0xbf) but | ||
| 3907 | smaller than (1<<BYTEWIDTH). */ | ||
| 3902 | for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH; | 3908 | for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH; |
| 3903 | j < (1 << BYTEWIDTH); j++) | 3909 | j < (1 << BYTEWIDTH); j++) |
| 3904 | fastmap[j] = 1; | 3910 | fastmap[j] = 1; |
| @@ -3909,7 +3915,13 @@ analyse_first (p, pend, fastmap, multibyte) | |||
| 3909 | for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++; | 3915 | for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++; |
| 3910 | j >= 0; j--) | 3916 | j >= 0; j--) |
| 3911 | if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not) | 3917 | if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not) |
| 3912 | fastmap[j] = 1; | 3918 | { |
| 3919 | fastmap[j] = 1; | ||
| 3920 | #ifdef emacs | ||
| 3921 | if (j >= 0x80 && j < 0xa0) | ||
| 3922 | fastmap[LEADING_CODE_8_BIT_CONTROL] = 1; | ||
| 3923 | #endif | ||
| 3924 | } | ||
| 3913 | 3925 | ||
| 3914 | if ((not && multibyte) | 3926 | if ((not && multibyte) |
| 3915 | /* Any character set can possibly contain a character | 3927 | /* Any character set can possibly contain a character |
| @@ -4352,11 +4364,33 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop) | |||
| 4352 | } | 4364 | } |
| 4353 | } | 4365 | } |
| 4354 | else | 4366 | else |
| 4355 | while (range > lim && !fastmap[*d]) | 4367 | do |
| 4356 | { | 4368 | { |
| 4357 | d++; | 4369 | re_char *d_start = d; |
| 4358 | range--; | 4370 | while (range > lim && !fastmap[*d]) |
| 4359 | } | 4371 | { |
| 4372 | d++; | ||
| 4373 | range--; | ||
| 4374 | } | ||
| 4375 | #ifdef emacs | ||
| 4376 | if (multibyte && range > lim) | ||
| 4377 | { | ||
| 4378 | /* Check that we are at the beginning of a char. */ | ||
| 4379 | int at_boundary; | ||
| 4380 | AT_CHAR_BOUNDARY_P (at_boundary, d, d_start); | ||
| 4381 | if (at_boundary) | ||
| 4382 | break; | ||
| 4383 | else | ||
| 4384 | { /* We have matched an internal byte of a char | ||
| 4385 | rather than the leading byte, so it's a false | ||
| 4386 | positive: we should keep scanning. */ | ||
| 4387 | d++; range--; | ||
| 4388 | } | ||
| 4389 | } | ||
| 4390 | else | ||
| 4391 | #endif | ||
| 4392 | break; | ||
| 4393 | } while (1); | ||
| 4360 | 4394 | ||
| 4361 | startpos += irange - range; | 4395 | startpos += irange - range; |
| 4362 | } | 4396 | } |