diff options
| author | Kenichi Handa | 2003-05-30 07:00:29 +0000 |
|---|---|---|
| committer | Kenichi Handa | 2003-05-30 07:00:29 +0000 |
| commit | 6fdd04b0986362911b55e6dc5308f951debf21a2 (patch) | |
| tree | d4b75740d30704d398c682e8d08c72e4ce035e89 /src | |
| parent | 09d1b24e07787b770002ea01b034bc4c4cf62699 (diff) | |
| download | emacs-6fdd04b0986362911b55e6dc5308f951debf21a2.tar.gz emacs-6fdd04b0986362911b55e6dc5308f951debf21a2.zip | |
(GET_CHAR_BEFORE_2): Check multibyte, not
target_multibyte. Even in a unibyte case, return a converted
multibyte char.
(GET_CHAR_AFTER): New macro.
(PATFETCH): Translate via multibyte char.
(HANDLE_UNIBYTE_RANGE): Delete this macro.
(SETUP_MULTIBYTE_RANGE): New macro.
(regex_compile): Setup compiled code so that its multibyteness
matches that of a target. Fix the handling of "[X-YZ]" using
SETUP_MULTIBYTE_RANGE.
(analyse_first) <charset>: For filling fastmap for all multibyte
characters, don't check by BASE_LEADING_CODE_P.
(re_search_2): Don't check RE_TARGET_MULTIBYTE_P (bufp). It is
the same as RE_MULTIBYTE_P (bufp) now.
(mutually_exclusive_p): Check by (! multibyte ||
IS_REAL_ASCII (c)).
(TARGET_CHAR_AND_LENGTH): Delete this macro.
(TRANSLATE_VIA_MULTIBYTE): New macro.
(re_match_2_internal): Don't check RE_TARGET_MULTIBYTE_P (bufp).
It is the same as RE_MULTIBYTE_P (bufp) now.
<exactn>: Translate via multibyte.
<anychar>: Fetch a character by RE_STRING_CHAR_AND_LENGTH. Don't
translate it.
<charset, charset_not>: Fetch a character by
RE_STRING_CHAR_AND_LENGTH. Translate via multibyte.
<duplicate>: Call bcmp_translate with the last arg `multibyte'.
<wordbound, notwordbound, wordbeg, wordend, syntaxspec,
notsyntaxspec, categoryspec, notcategoryspec> Fetch a character
by GET_CHAR_AFTER.
(bcmp_translate): Likewise.
Diffstat (limited to 'src')
| -rw-r--r-- | src/regex.c | 490 |
1 files changed, 227 insertions, 263 deletions
diff --git a/src/regex.c b/src/regex.c index 8cbc5f7949a..bea8433153d 100644 --- a/src/regex.c +++ b/src/regex.c | |||
| @@ -152,12 +152,12 @@ | |||
| 152 | # define RE_STRING_CHAR_AND_LENGTH(p, s, len) \ | 152 | # define RE_STRING_CHAR_AND_LENGTH(p, s, len) \ |
| 153 | (multibyte ? (STRING_CHAR_AND_LENGTH (p, s, len)) : ((len) = 1, *(p))) | 153 | (multibyte ? (STRING_CHAR_AND_LENGTH (p, s, len)) : ((len) = 1, *(p))) |
| 154 | 154 | ||
| 155 | /* Set C a (possibly multibyte) character before P. P points into a | 155 | /* Set C a (possibly converted to multibyte) character before P. P |
| 156 | string which is the virtual concatenation of STR1 (which ends at | 156 | points into a string which is the virtual concatenation of STR1 |
| 157 | END1) or STR2 (which ends at END2). */ | 157 | (which ends at END1) or STR2 (which ends at END2). */ |
| 158 | # define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \ | 158 | # define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \ |
| 159 | do { \ | 159 | do { \ |
| 160 | if (target_multibyte) \ | 160 | if (multibyte) \ |
| 161 | { \ | 161 | { \ |
| 162 | re_char *dtemp = (p) == (str2) ? (end1) : (p); \ | 162 | re_char *dtemp = (p) == (str2) ? (end1) : (p); \ |
| 163 | re_char *dlimit = ((p) > (str2) && (p) <= (end2)) ? (str2) : (str1); \ | 163 | re_char *dlimit = ((p) > (str2) && (p) <= (end2)) ? (str2) : (str1); \ |
| @@ -167,11 +167,24 @@ | |||
| 167 | else \ | 167 | else \ |
| 168 | { \ | 168 | { \ |
| 169 | (c = ((p) == (str2) ? (end1) : (p))[-1]); \ | 169 | (c = ((p) == (str2) ? (end1) : (p))[-1]); \ |
| 170 | if (multibyte) \ | 170 | MAKE_CHAR_MULTIBYTE (c); \ |
| 171 | MAKE_CHAR_MULTIBYTE (c); \ | ||
| 172 | } \ | 171 | } \ |
| 173 | } while (0) | 172 | } while (0) |
| 174 | 173 | ||
| 174 | /* Set C a (possibly converted to multibyte) character at P, and set | ||
| 175 | LEN to the byte length of that character. */ | ||
| 176 | # define GET_CHAR_AFTER(c, p, len) \ | ||
| 177 | do { \ | ||
| 178 | if (multibyte) \ | ||
| 179 | c = STRING_CHAR_AND_LENGTH (p, 0, len); \ | ||
| 180 | else \ | ||
| 181 | { \ | ||
| 182 | c = *p; \ | ||
| 183 | len = 1; \ | ||
| 184 | MAKE_CHAR_MULTIBYTE (c); \ | ||
| 185 | } \ | ||
| 186 | } while (0) | ||
| 187 | |||
| 175 | 188 | ||
| 176 | #else /* not emacs */ | 189 | #else /* not emacs */ |
| 177 | 190 | ||
| @@ -251,10 +264,13 @@ enum syntaxcode { Swhitespace = 0, Sword = 1 }; | |||
| 251 | # define RE_STRING_CHAR_AND_LENGTH STRING_CHAR_AND_LENGTH | 264 | # define RE_STRING_CHAR_AND_LENGTH STRING_CHAR_AND_LENGTH |
| 252 | # define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \ | 265 | # define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \ |
| 253 | (c = ((p) == (str2) ? *((end1) - 1) : *((p) - 1))) | 266 | (c = ((p) == (str2) ? *((end1) - 1) : *((p) - 1))) |
| 267 | # define GET_CHAR_AFTER(c, p, len) \ | ||
| 268 | (c = *p, len = 1) | ||
| 254 | # define MAKE_CHAR(charset, c1, c2) (c1) | 269 | # define MAKE_CHAR(charset, c1, c2) (c1) |
| 255 | # define BYTE8_TO_CHAR(c) (c) | 270 | # define BYTE8_TO_CHAR(c) (c) |
| 256 | # define CHAR_BYTE8_P(c) (0) | 271 | # define CHAR_BYTE8_P(c) (0) |
| 257 | # define MAKE_CHAR_MULTIBYTE(c) 0 | 272 | # define MAKE_CHAR_MULTIBYTE(c) (c) |
| 273 | # define MAKE_CHAR_UNIBYTE(c) (c) | ||
| 258 | # define CHAR_LEADING_CODE(c) (c) | 274 | # define CHAR_LEADING_CODE(c) (c) |
| 259 | #endif /* not emacs */ | 275 | #endif /* not emacs */ |
| 260 | 276 | ||
| @@ -1676,6 +1692,8 @@ static int analyse_first _RE_ARGS ((re_char *p, re_char *pend, | |||
| 1676 | if (! multibyte) \ | 1692 | if (! multibyte) \ |
| 1677 | MAKE_CHAR_MULTIBYTE (c); \ | 1693 | MAKE_CHAR_MULTIBYTE (c); \ |
| 1678 | c = TRANSLATE (c); \ | 1694 | c = TRANSLATE (c); \ |
| 1695 | if (! target_multibyte) \ | ||
| 1696 | MAKE_CHAR_UNIBYTE (c); \ | ||
| 1679 | } while (0) | 1697 | } while (0) |
| 1680 | 1698 | ||
| 1681 | /* Fetch the next character in the uncompiled pattern, with no | 1699 | /* Fetch the next character in the uncompiled pattern, with no |
| @@ -1933,46 +1951,27 @@ struct range_table_work_area | |||
| 1933 | not that easy because macros called within it assumes various | 1951 | not that easy because macros called within it assumes various |
| 1934 | variables being defined. */ | 1952 | variables being defined. */ |
| 1935 | 1953 | ||
| 1936 | #define HANDLE_UNIBYTE_RANGE(work_area, c1, c2) \ | 1954 | #define SETUP_MULTIBYTE_RANGE(work_area, c0, c1) \ |
| 1937 | do { \ | 1955 | do { \ |
| 1938 | int char_table[257]; \ | 1956 | re_wchar_t c, t, t_last; \ |
| 1939 | int i, j, c; \ | 1957 | int n; \ |
| 1940 | \ | 1958 | \ |
| 1941 | char_table[(c1) - 1] = -2; /* head sentinel */ \ | 1959 | c = (c0); \ |
| 1942 | for (i = (c1); i <= (c2); i++) \ | 1960 | t_last = multibyte ? TRANSLATE (c) : TRANSLATE (MAKE_CHAR_MULTIBYTE (c)); \ |
| 1943 | char_table[i] = TRANSLATE (unibyte_char_to_multibyte (i)); \ | 1961 | for (c++, n = 1; c <= (c1); c++, n++) \ |
| 1944 | char_table[i] = MAX_CHAR + 2; /* tail sentinel */ \ | 1962 | { \ |
| 1945 | \ | 1963 | t = multibyte ? TRANSLATE (c) : TRANSLATE (MAKE_CHAR_MULTIBYTE (c)); \ |
| 1946 | /* As the number of data is small (at most 128) and we can expect \ | 1964 | if (t_last + n == t) \ |
| 1947 | that data in char_table are mostly sorted, we use fairly simple \ | 1965 | continue; \ |
| 1948 | `insertion sort'. */ \ | 1966 | SET_RANGE_TABLE_WORK_AREA ((work_area), t_last, t_last + n - 1); \ |
| 1949 | for (i = (c1) + 1; i <= (c2); i++) \ | 1967 | t_last = t; \ |
| 1950 | { \ | 1968 | n = 1; \ |
| 1951 | c = char_table[i]; \ | 1969 | } \ |
| 1952 | j = i; \ | 1970 | if (n > 0) \ |
| 1953 | while (char_table[j - 1] > c) \ | 1971 | SET_RANGE_TABLE_WORK_AREA ((work_area), t_last, t_last + n - 1); \ |
| 1954 | char_table[j] = char_table[j - 1], j--; \ | ||
| 1955 | char_table[j] = c; \ | ||
| 1956 | } \ | ||
| 1957 | \ | ||
| 1958 | for (i = (c1); i <= (c2); i++) \ | ||
| 1959 | { \ | ||
| 1960 | c = char_table[i]; \ | ||
| 1961 | if (! IS_REAL_ASCII (c)) \ | ||
| 1962 | break; \ | ||
| 1963 | SET_LIST_BIT (c); \ | ||
| 1964 | } \ | ||
| 1965 | while (i <= (c2)) \ | ||
| 1966 | { \ | ||
| 1967 | c = char_table[i]; \ | ||
| 1968 | for (j = i + 1; j <= (c2); j++) \ | ||
| 1969 | if (char_table[j] - c != j - i) \ | ||
| 1970 | break; \ | ||
| 1971 | SET_RANGE_TABLE_WORK_AREA ((work_area), c, char_table[j - 1]); \ | ||
| 1972 | i = j; \ | ||
| 1973 | } \ | ||
| 1974 | } while (0) | 1972 | } while (0) |
| 1975 | 1973 | ||
| 1974 | |||
| 1976 | #endif /* emacs */ | 1975 | #endif /* emacs */ |
| 1977 | 1976 | ||
| 1978 | /* Get the next unsigned number in the uncompiled pattern. */ | 1977 | /* Get the next unsigned number in the uncompiled pattern. */ |
| @@ -2258,6 +2257,9 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2258 | /* If the object matched can contain multibyte characters. */ | 2257 | /* If the object matched can contain multibyte characters. */ |
| 2259 | const boolean multibyte = RE_MULTIBYTE_P (bufp); | 2258 | const boolean multibyte = RE_MULTIBYTE_P (bufp); |
| 2260 | 2259 | ||
| 2260 | /* If a target can contain multibyte characters. */ | ||
| 2261 | const boolean target_multibyte = RE_TARGET_MULTIBYTE_P (bufp); | ||
| 2262 | |||
| 2261 | #ifdef DEBUG | 2263 | #ifdef DEBUG |
| 2262 | debug++; | 2264 | debug++; |
| 2263 | DEBUG_PRINT1 ("\nCompiling pattern: "); | 2265 | DEBUG_PRINT1 ("\nCompiling pattern: "); |
| @@ -2572,10 +2574,6 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2572 | break; | 2574 | break; |
| 2573 | } | 2575 | } |
| 2574 | 2576 | ||
| 2575 | /* What should we do for the character which is | ||
| 2576 | greater than 0x7F, but not BASE_LEADING_CODE_P? | ||
| 2577 | XXX */ | ||
| 2578 | |||
| 2579 | /* See if we're at the beginning of a possible character | 2577 | /* See if we're at the beginning of a possible character |
| 2580 | class. */ | 2578 | class. */ |
| 2581 | 2579 | ||
| @@ -2671,65 +2669,41 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2671 | 2669 | ||
| 2672 | /* Fetch the character which ends the range. */ | 2670 | /* Fetch the character which ends the range. */ |
| 2673 | PATFETCH_RAW (c1); | 2671 | PATFETCH_RAW (c1); |
| 2674 | #ifdef emacs | 2672 | if (c > c1) |
| 2675 | if (multibyte) | ||
| 2676 | { | ||
| 2677 | c = TRANSLATE (c); | ||
| 2678 | c1 = TRANSLATE (c1); | ||
| 2679 | if (! IS_REAL_ASCII (c1)) | ||
| 2680 | { | ||
| 2681 | SET_RANGE_TABLE_WORK_AREA (range_table_work, | ||
| 2682 | c, c1); | ||
| 2683 | c1 = 127; | ||
| 2684 | } | ||
| 2685 | } | ||
| 2686 | else | ||
| 2687 | { | 2673 | { |
| 2688 | if (! IS_REAL_ASCII (c1)) | 2674 | if (syntax & RE_NO_EMPTY_RANGES) |
| 2689 | { | 2675 | FREE_STACK_RETURN (REG_ERANGE); |
| 2690 | int c2 = MAX (c, 128); | 2676 | /* Else, repeat the loop. */ |
| 2691 | |||
| 2692 | HANDLE_UNIBYTE_RANGE (range_table_work, c2, c1); | ||
| 2693 | c1 = 127; | ||
| 2694 | } | ||
| 2695 | } | 2677 | } |
| 2696 | #endif | ||
| 2697 | } | 2678 | } |
| 2698 | else | 2679 | else |
| 2680 | c1 = c; | ||
| 2681 | #ifndef emacs | ||
| 2682 | c = TRANSLATE (c); | ||
| 2683 | c1 = TRANSLATE (c1); | ||
| 2684 | #else /* not emacs */ | ||
| 2685 | if (target_multibyte) | ||
| 2699 | { | 2686 | { |
| 2700 | /* Range from C to C. */ | 2687 | if (! IS_REAL_ASCII (c1)) |
| 2701 | if (! multibyte) | ||
| 2702 | MAKE_CHAR_MULTIBYTE (c); | ||
| 2703 | c = TRANSLATE (c); | ||
| 2704 | if (IS_REAL_ASCII (c)) | ||
| 2705 | c1 = c; | ||
| 2706 | else | ||
| 2707 | { | 2688 | { |
| 2708 | SET_RANGE_TABLE_WORK_AREA (range_table_work, c, c); | 2689 | re_wchar_t c0 = MAX (c, 128); |
| 2709 | c = -1; /* Suppress setting bitmap. */ | 2690 | |
| 2691 | SETUP_MULTIBYTE_RANGE (range_table_work, c0, c1); | ||
| 2692 | c1 = MIN (127, c1); | ||
| 2710 | } | 2693 | } |
| 2711 | } | 2694 | } |
| 2712 | 2695 | else | |
| 2713 | /* Set the range into bitmap */ | ||
| 2714 | if (c >= 0) | ||
| 2715 | { | 2696 | { |
| 2716 | re_wchar_t this_char; | 2697 | if (multibyte) |
| 2717 | int range_start = c, range_end = c1; | ||
| 2718 | |||
| 2719 | /* If the start is after the end, the range is empty. */ | ||
| 2720 | if (range_start > range_end) | ||
| 2721 | { | ||
| 2722 | if (syntax & RE_NO_EMPTY_RANGES) | ||
| 2723 | FREE_STACK_RETURN (REG_ERANGE); | ||
| 2724 | /* Else, repeat the loop. */ | ||
| 2725 | } | ||
| 2726 | else | ||
| 2727 | { | 2698 | { |
| 2728 | for (this_char = range_start; this_char <= range_end; | 2699 | MAKE_CHAR_UNIBYTE (c); |
| 2729 | this_char++) | 2700 | MAKE_CHAR_UNIBYTE (c1); |
| 2730 | SET_LIST_BIT (TRANSLATE (this_char)); | ||
| 2731 | } | 2701 | } |
| 2732 | } | 2702 | } |
| 2703 | #endif /* not emacs */ | ||
| 2704 | /* Set the range into bitmap */ | ||
| 2705 | for (; c <= c1; c++) | ||
| 2706 | SET_LIST_BIT (TRANSLATE (c)); | ||
| 2733 | } | 2707 | } |
| 2734 | 2708 | ||
| 2735 | /* Discard any (non)matching list bytes that are all 0 at the | 2709 | /* Discard any (non)matching list bytes that are all 0 at the |
| @@ -3264,7 +3238,11 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 3264 | /* You might think it would be useful for \ to mean | 3238 | /* You might think it would be useful for \ to mean |
| 3265 | not to translate; but if we don't translate it | 3239 | not to translate; but if we don't translate it |
| 3266 | it will never match anything. */ | 3240 | it will never match anything. */ |
| 3241 | /* Actually we don't have to translate it now, because | ||
| 3242 | it is anyway translated later. */ | ||
| 3243 | #if 0 | ||
| 3267 | c = TRANSLATE (c); | 3244 | c = TRANSLATE (c); |
| 3245 | #endif | ||
| 3268 | goto normal_char; | 3246 | goto normal_char; |
| 3269 | } | 3247 | } |
| 3270 | break; | 3248 | break; |
| @@ -3307,8 +3285,17 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 3307 | if (! multibyte) | 3285 | if (! multibyte) |
| 3308 | MAKE_CHAR_MULTIBYTE (c); | 3286 | MAKE_CHAR_MULTIBYTE (c); |
| 3309 | c = TRANSLATE (c); | 3287 | c = TRANSLATE (c); |
| 3310 | len = CHAR_STRING (c, b); | 3288 | if (target_multibyte) |
| 3311 | b += len; | 3289 | { |
| 3290 | len = CHAR_STRING (c, b); | ||
| 3291 | b += len; | ||
| 3292 | } | ||
| 3293 | else | ||
| 3294 | { | ||
| 3295 | MAKE_CHAR_UNIBYTE (c); | ||
| 3296 | *b++ = c; | ||
| 3297 | len = 1; | ||
| 3298 | } | ||
| 3312 | (*pending_exact) += len; | 3299 | (*pending_exact) += len; |
| 3313 | } | 3300 | } |
| 3314 | 3301 | ||
| @@ -3334,6 +3321,11 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 3334 | /* We have succeeded; set the length of the buffer. */ | 3321 | /* We have succeeded; set the length of the buffer. */ |
| 3335 | bufp->used = b - bufp->buffer; | 3322 | bufp->used = b - bufp->buffer; |
| 3336 | 3323 | ||
| 3324 | #ifdef emacs | ||
| 3325 | /* Now the buffer is adjusted for the multibyteness of a target. */ | ||
| 3326 | bufp->multibyte = bufp->target_multibyte; | ||
| 3327 | #endif | ||
| 3328 | |||
| 3337 | #ifdef DEBUG | 3329 | #ifdef DEBUG |
| 3338 | if (debug > 0) | 3330 | if (debug > 0) |
| 3339 | { | 3331 | { |
| @@ -3513,8 +3505,6 @@ group_in_compile_stack (compile_stack, regnum) | |||
| 3513 | bother filling it up (obviously) and only return whether the | 3505 | bother filling it up (obviously) and only return whether the |
| 3514 | pattern could potentially match the empty string. | 3506 | pattern could potentially match the empty string. |
| 3515 | 3507 | ||
| 3516 | MULTIBYTE is always 1 for Emacs, and 0 otherwise. | ||
| 3517 | |||
| 3518 | Return 1 if p..pend might match the empty string. | 3508 | Return 1 if p..pend might match the empty string. |
| 3519 | Return 0 if p..pend matches at least one char. | 3509 | Return 0 if p..pend matches at least one char. |
| 3520 | Return -1 if fastmap was not updated accurately. */ | 3510 | Return -1 if fastmap was not updated accurately. */ |
| @@ -3600,7 +3590,8 @@ analyse_first (p, pend, fastmap, multibyte) | |||
| 3600 | if (!fastmap) break; | 3590 | if (!fastmap) break; |
| 3601 | { | 3591 | { |
| 3602 | /* Chars beyond end of bitmap are possible matches. */ | 3592 | /* Chars beyond end of bitmap are possible matches. */ |
| 3603 | /* Emacs uses the bitmap only for ASCII characters. */ | 3593 | /* In a multibyte case, the bitmap is used only for ASCII |
| 3594 | characters. */ | ||
| 3604 | int limit = multibyte ? 128 : (1 << BYTEWIDTH); | 3595 | int limit = multibyte ? 128 : (1 << BYTEWIDTH); |
| 3605 | 3596 | ||
| 3606 | for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH; | 3597 | for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH; |
| @@ -3623,14 +3614,12 @@ analyse_first (p, pend, fastmap, multibyte) | |||
| 3623 | || (CHARSET_RANGE_TABLE_EXISTS_P (&p[-2]) | 3614 | || (CHARSET_RANGE_TABLE_EXISTS_P (&p[-2]) |
| 3624 | && CHARSET_RANGE_TABLE_BITS (&p[-2]) != 0)) | 3615 | && CHARSET_RANGE_TABLE_BITS (&p[-2]) != 0)) |
| 3625 | /* If we can match a character class, we can match | 3616 | /* If we can match a character class, we can match |
| 3626 | any character set. */ | 3617 | any multibyte characters. */ |
| 3627 | { | 3618 | { |
| 3628 | set_fastmap_for_multibyte_characters: | ||
| 3629 | if (match_any_multibyte_characters == false) | 3619 | if (match_any_multibyte_characters == false) |
| 3630 | { | 3620 | { |
| 3631 | for (j = 0x80; j < 0x100; j++) /* XXX */ | 3621 | for (j = 0x80; j < (1 << BYTEWIDTH); j++) |
| 3632 | if (BASE_LEADING_CODE_P (j)) | 3622 | fastmap[j] = 1; |
| 3633 | fastmap[j] = 1; | ||
| 3634 | match_any_multibyte_characters = true; | 3623 | match_any_multibyte_characters = true; |
| 3635 | } | 3624 | } |
| 3636 | } | 3625 | } |
| @@ -3688,9 +3677,16 @@ analyse_first (p, pend, fastmap, multibyte) | |||
| 3688 | fastmap[j] = 1; | 3677 | fastmap[j] = 1; |
| 3689 | 3678 | ||
| 3690 | if (multibyte) | 3679 | if (multibyte) |
| 3691 | /* Any character set can possibly contain a character | 3680 | { |
| 3692 | whose category is K (or not). */ | 3681 | /* Any character set can possibly contain a character |
| 3693 | goto set_fastmap_for_multibyte_characters; | 3682 | whose category is K (or not). */ |
| 3683 | if (match_any_multibyte_characters == false) | ||
| 3684 | { | ||
| 3685 | for (j = 0x80; j < (1 << BYTEWIDTH); j++) | ||
| 3686 | fastmap[j] = 1; | ||
| 3687 | match_any_multibyte_characters = true; | ||
| 3688 | } | ||
| 3689 | } | ||
| 3694 | break; | 3690 | break; |
| 3695 | 3691 | ||
| 3696 | /* All cases after this match the empty string. These end with | 3692 | /* All cases after this match the empty string. These end with |
| @@ -3942,15 +3938,9 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop) | |||
| 3942 | int total_size = size1 + size2; | 3938 | int total_size = size1 + size2; |
| 3943 | int endpos = startpos + range; | 3939 | int endpos = startpos + range; |
| 3944 | boolean anchored_start; | 3940 | boolean anchored_start; |
| 3945 | 3941 | /* Nonzero if BUFP is setup for multibyte characters. We are sure | |
| 3946 | /* Nonzero if BUFP is setup for multibyte characters. */ | 3942 | that it is the same as RE_TARGET_MULTIBYTE_P (bufp). */ |
| 3947 | #ifdef emacs | 3943 | const boolean multibyte = RE_MULTIBYTE_P (bufp); |
| 3948 | const boolean multibyte = 1; | ||
| 3949 | #else | ||
| 3950 | const boolean multibyte = 0; | ||
| 3951 | #endif | ||
| 3952 | /* Nonzero if STR1 and STR2 contains multibyte characters. */ | ||
| 3953 | const boolean target_multibyte = RE_TARGET_MULTIBYTE_P (bufp); | ||
| 3954 | 3944 | ||
| 3955 | /* Check for out-of-range STARTPOS. */ | 3945 | /* Check for out-of-range STARTPOS. */ |
| 3956 | if (startpos < 0 || startpos > total_size) | 3946 | if (startpos < 0 || startpos > total_size) |
| @@ -4039,59 +4029,57 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop) | |||
| 4039 | inside the loop. */ | 4029 | inside the loop. */ |
| 4040 | if (RE_TRANSLATE_P (translate)) | 4030 | if (RE_TRANSLATE_P (translate)) |
| 4041 | { | 4031 | { |
| 4042 | if (target_multibyte) | 4032 | if (multibyte) |
| 4043 | while (range > lim) | 4033 | while (range > lim) |
| 4044 | { | 4034 | { |
| 4045 | int buf_charlen; | 4035 | int buf_charlen; |
| 4046 | 4036 | ||
| 4047 | buf_ch = STRING_CHAR_AND_LENGTH (d, range - lim, | 4037 | buf_ch = STRING_CHAR_AND_LENGTH (d, range - lim, |
| 4048 | buf_charlen); | 4038 | buf_charlen); |
| 4049 | |||
| 4050 | buf_ch = RE_TRANSLATE (translate, buf_ch); | 4039 | buf_ch = RE_TRANSLATE (translate, buf_ch); |
| 4051 | if (fastmap[CHAR_LEADING_CODE (buf_ch)]) | 4040 | if (fastmap[CHAR_LEADING_CODE (buf_ch)]) |
| 4052 | break; | 4041 | break; |
| 4053 | |||
| 4054 | range -= buf_charlen; | 4042 | range -= buf_charlen; |
| 4055 | d += buf_charlen; | 4043 | d += buf_charlen; |
| 4056 | } | 4044 | } |
| 4057 | else if (multibyte) | 4045 | else |
| 4058 | while (range > lim) | 4046 | while (range > lim) |
| 4059 | { | 4047 | { |
| 4060 | buf_ch = *d; | 4048 | buf_ch = *d; |
| 4049 | #ifdef emacs | ||
| 4061 | MAKE_CHAR_MULTIBYTE (buf_ch); | 4050 | MAKE_CHAR_MULTIBYTE (buf_ch); |
| 4062 | buf_ch = RE_TRANSLATE (translate, buf_ch); | 4051 | #endif |
| 4063 | if (fastmap[CHAR_LEADING_CODE (buf_ch)]) | 4052 | buf_ch = RE_TRANSLATE (buf_ch); |
| 4053 | #ifdef emacs | ||
| 4054 | MAKE_CHAR_UNIBYTE (buf_ch); | ||
| 4055 | #endif | ||
| 4056 | if (fastmap[buf_ch]) | ||
| 4064 | break; | 4057 | break; |
| 4065 | |||
| 4066 | d++; | 4058 | d++; |
| 4067 | range--; | 4059 | range--; |
| 4068 | } | 4060 | } |
| 4061 | } | ||
| 4062 | else | ||
| 4063 | { | ||
| 4064 | if (multibyte) | ||
| 4065 | while (range > lim) | ||
| 4066 | { | ||
| 4067 | int buf_charlen; | ||
| 4068 | |||
| 4069 | buf_ch = STRING_CHAR_AND_LENGTH (d, range - lim, | ||
| 4070 | buf_charlen); | ||
| 4071 | if (fastmap[CHAR_LEADING_CODE (buf_ch)]) | ||
| 4072 | break; | ||
| 4073 | range -= buf_charlen; | ||
| 4074 | d += buf_charlen; | ||
| 4075 | } | ||
| 4069 | else | 4076 | else |
| 4070 | while (range > lim | 4077 | while (range > lim && !fastmap[*d]) |
| 4071 | && !fastmap[RE_TRANSLATE (translate, *d)]) | ||
| 4072 | { | 4078 | { |
| 4073 | d++; | 4079 | d++; |
| 4074 | range--; | 4080 | range--; |
| 4075 | } | 4081 | } |
| 4076 | } | 4082 | } |
| 4077 | else if (multibyte && ! target_multibyte) | ||
| 4078 | while (range > lim) | ||
| 4079 | { | ||
| 4080 | buf_ch = *d; | ||
| 4081 | MAKE_CHAR_MULTIBYTE (buf_ch); | ||
| 4082 | if (fastmap[CHAR_LEADING_CODE (buf_ch)]) | ||
| 4083 | break; | ||
| 4084 | |||
| 4085 | d++; | ||
| 4086 | range--; | ||
| 4087 | } | ||
| 4088 | else | ||
| 4089 | while (range > lim && !fastmap[*d]) | ||
| 4090 | { | ||
| 4091 | d++; | ||
| 4092 | range--; | ||
| 4093 | } | ||
| 4094 | |||
| 4095 | startpos += irange - range; | 4083 | startpos += irange - range; |
| 4096 | } | 4084 | } |
| 4097 | else /* Searching backwards. */ | 4085 | else /* Searching backwards. */ |
| @@ -4102,14 +4090,7 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop) | |||
| 4102 | 4090 | ||
| 4103 | if (multibyte) | 4091 | if (multibyte) |
| 4104 | { | 4092 | { |
| 4105 | /* Case of Emacs. */ | 4093 | buf_ch = STRING_CHAR (d, room); |
| 4106 | if (target_multibyte) | ||
| 4107 | buf_ch = RE_STRING_CHAR (d, room); | ||
| 4108 | else | ||
| 4109 | { | ||
| 4110 | buf_ch = *d; | ||
| 4111 | MAKE_CHAR_MULTIBYTE (buf_ch); | ||
| 4112 | } | ||
| 4113 | buf_ch = TRANSLATE (buf_ch); | 4094 | buf_ch = TRANSLATE (buf_ch); |
| 4114 | if (! fastmap[CHAR_LEADING_CODE (buf_ch)]) | 4095 | if (! fastmap[CHAR_LEADING_CODE (buf_ch)]) |
| 4115 | goto advance; | 4096 | goto advance; |
| @@ -4147,7 +4128,7 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop) | |||
| 4147 | else if (range > 0) | 4128 | else if (range > 0) |
| 4148 | { | 4129 | { |
| 4149 | /* Update STARTPOS to the next character boundary. */ | 4130 | /* Update STARTPOS to the next character boundary. */ |
| 4150 | if (target_multibyte) | 4131 | if (multibyte) |
| 4151 | { | 4132 | { |
| 4152 | re_char *p = POS_ADDR_VSTRING (startpos); | 4133 | re_char *p = POS_ADDR_VSTRING (startpos); |
| 4153 | re_char *pend = STOP_ADDR_VSTRING (startpos); | 4134 | re_char *pend = STOP_ADDR_VSTRING (startpos); |
| @@ -4170,7 +4151,7 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop) | |||
| 4170 | startpos--; | 4151 | startpos--; |
| 4171 | 4152 | ||
| 4172 | /* Update STARTPOS to the previous character boundary. */ | 4153 | /* Update STARTPOS to the previous character boundary. */ |
| 4173 | if (target_multibyte) | 4154 | if (multibyte) |
| 4174 | { | 4155 | { |
| 4175 | re_char *p = POS_ADDR_VSTRING (startpos); | 4156 | re_char *p = POS_ADDR_VSTRING (startpos); |
| 4176 | int len = 0; | 4157 | int len = 0; |
| @@ -4178,20 +4159,10 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop) | |||
| 4178 | /* Find the head of multibyte form. */ | 4159 | /* Find the head of multibyte form. */ |
| 4179 | while (!CHAR_HEAD_P (*p)) | 4160 | while (!CHAR_HEAD_P (*p)) |
| 4180 | p--, len++; | 4161 | p--, len++; |
| 4181 | 4162 | range += len; | |
| 4182 | /* Adjust it. */ | 4163 | if (range > 0) |
| 4183 | #if 0 /* XXX */ | 4164 | break; |
| 4184 | if (MULTIBYTE_FORM_LENGTH (p, len + 1) != (len + 1)) | 4165 | startpos -= len; |
| 4185 | ; | ||
| 4186 | else | ||
| 4187 | #endif | ||
| 4188 | { | ||
| 4189 | range += len; | ||
| 4190 | if (range > 0) | ||
| 4191 | break; | ||
| 4192 | |||
| 4193 | startpos -= len; | ||
| 4194 | } | ||
| 4195 | } | 4166 | } |
| 4196 | } | 4167 | } |
| 4197 | } | 4168 | } |
| @@ -4424,7 +4395,7 @@ mutually_exclusive_p (bufp, p1, p2) | |||
| 4424 | 4395 | ||
| 4425 | /* Test if C is listed in charset (or charset_not) | 4396 | /* Test if C is listed in charset (or charset_not) |
| 4426 | at `p1'. */ | 4397 | at `p1'. */ |
| 4427 | if (SINGLE_BYTE_CHAR_P (c)) | 4398 | if (! multibyte || IS_REAL_ASCII (c)) |
| 4428 | { | 4399 | { |
| 4429 | if (c < CHARSET_BITMAP_SIZE (p1) * BYTEWIDTH | 4400 | if (c < CHARSET_BITMAP_SIZE (p1) * BYTEWIDTH |
| 4430 | && p1[2 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) | 4401 | && p1[2 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) |
| @@ -4467,9 +4438,10 @@ mutually_exclusive_p (bufp, p1, p2) | |||
| 4467 | size of bitmap table of P1 is extracted by | 4438 | size of bitmap table of P1 is extracted by |
| 4468 | using macro `CHARSET_BITMAP_SIZE'. | 4439 | using macro `CHARSET_BITMAP_SIZE'. |
| 4469 | 4440 | ||
| 4470 | Since we know that all the character listed in | 4441 | In a multibyte case, we know that all the character |
| 4471 | P2 is ASCII, it is enough to test only bitmap | 4442 | listed in P2 is ASCII. In a unibyte case, P1 has only a |
| 4472 | table of P1. */ | 4443 | bitmap table. So, in both cases, it is enough to test |
| 4444 | only the bitmap table of P1. */ | ||
| 4473 | 4445 | ||
| 4474 | if ((re_opcode_t) *p1 == charset) | 4446 | if ((re_opcode_t) *p1 == charset) |
| 4475 | { | 4447 | { |
| @@ -4628,13 +4600,20 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4628 | WEAK_ALIAS (__re_match_2, re_match_2) | 4600 | WEAK_ALIAS (__re_match_2, re_match_2) |
| 4629 | 4601 | ||
| 4630 | #ifdef emacs | 4602 | #ifdef emacs |
| 4631 | #define TARGET_CHAR_AND_LENGTH(d, len, actual_len) \ | 4603 | #define TRANSLATE_VIA_MULTIBYTE(c) \ |
| 4632 | (target_multibyte \ | 4604 | do { \ |
| 4633 | ? STRING_CHAR_AND_LENGTH (d, len, actual_len) \ | 4605 | if (multibyte) \ |
| 4634 | : (actual_len = 1, unibyte_char_to_multibyte (*d))) | 4606 | (c) = TRANSLATE (c); \ |
| 4607 | else \ | ||
| 4608 | { \ | ||
| 4609 | MAKE_CHAR_MULTIBYTE (c); \ | ||
| 4610 | (c) = TRANSLATE (c); \ | ||
| 4611 | MAKE_CHAR_UNIBYTE (c); \ | ||
| 4612 | } \ | ||
| 4613 | } while (0) | ||
| 4614 | |||
| 4635 | #else | 4615 | #else |
| 4636 | #define TARGET_CHAR_AND_LENGTH(d, len, actual_len) \ | 4616 | #define TRANSLATE_VIA_MULTIBYTE(c) ((c) = TRANSLATE (c)) |
| 4637 | (actual_len = 1, *d) | ||
| 4638 | #endif | 4617 | #endif |
| 4639 | 4618 | ||
| 4640 | 4619 | ||
| @@ -4677,14 +4656,9 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4677 | /* We use this to map every character in the string. */ | 4656 | /* We use this to map every character in the string. */ |
| 4678 | RE_TRANSLATE_TYPE translate = bufp->translate; | 4657 | RE_TRANSLATE_TYPE translate = bufp->translate; |
| 4679 | 4658 | ||
| 4680 | /* Nonzero if BUFP is setup for multibyte characters. */ | 4659 | /* Nonzero if BUFP is setup for multibyte characters. We are sure |
| 4681 | #ifdef emacs | 4660 | that it is the same as RE_TARGET_MULTIBYTE_P (bufp). */ |
| 4682 | const boolean multibyte = 1; | 4661 | const boolean multibyte = RE_MULTIBYTE_P (bufp); |
| 4683 | #else | ||
| 4684 | const boolean multibyte = 0; | ||
| 4685 | #endif | ||
| 4686 | /* Nonzero if STR1 and STR2 contains multibyte characters. */ | ||
| 4687 | const boolean target_multibyte = RE_TARGET_MULTIBYTE_P (bufp); | ||
| 4688 | 4662 | ||
| 4689 | /* Failure point stack. Each place that can handle a failure further | 4663 | /* Failure point stack. Each place that can handle a failure further |
| 4690 | down the line pushes a failure point on this stack. It consists of | 4664 | down the line pushes a failure point on this stack. It consists of |
| @@ -5037,82 +5011,73 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5037 | /* Remember the start point to rollback upon failure. */ | 5011 | /* Remember the start point to rollback upon failure. */ |
| 5038 | dfail = d; | 5012 | dfail = d; |
| 5039 | 5013 | ||
| 5014 | #ifndef emacs | ||
| 5040 | /* This is written out as an if-else so we don't waste time | 5015 | /* This is written out as an if-else so we don't waste time |
| 5041 | testing `translate' inside the loop. */ | 5016 | testing `translate' inside the loop. */ |
| 5042 | if (RE_TRANSLATE_P (translate)) | 5017 | if (RE_TRANSLATE_P (translate)) |
| 5043 | { | 5018 | do |
| 5044 | if (multibyte) | 5019 | { |
| 5045 | do | 5020 | PREFETCH (); |
| 5046 | { | 5021 | if (RE_TRANSLATE (translate, *d) != *p++) |
| 5047 | int pat_charlen, buf_charlen; | ||
| 5048 | unsigned int pat_ch, buf_ch; | ||
| 5049 | |||
| 5050 | PREFETCH (); | ||
| 5051 | pat_ch = STRING_CHAR_AND_LENGTH (p, pend - p, pat_charlen); | ||
| 5052 | buf_ch = TARGET_CHAR_AND_LENGTH (d, dend - d, buf_charlen); | ||
| 5053 | |||
| 5054 | if (RE_TRANSLATE (translate, buf_ch) | ||
| 5055 | != pat_ch) | ||
| 5056 | { | ||
| 5057 | d = dfail; | ||
| 5058 | goto fail; | ||
| 5059 | } | ||
| 5060 | |||
| 5061 | p += pat_charlen; | ||
| 5062 | d += buf_charlen; | ||
| 5063 | mcnt -= pat_charlen; | ||
| 5064 | } | ||
| 5065 | while (mcnt > 0); | ||
| 5066 | else | ||
| 5067 | do | ||
| 5068 | { | 5022 | { |
| 5069 | PREFETCH (); | 5023 | d = dfail; |
| 5070 | if (RE_TRANSLATE (translate, *d) != *p++) | 5024 | goto fail; |
| 5071 | { | ||
| 5072 | d = dfail; | ||
| 5073 | goto fail; | ||
| 5074 | } | ||
| 5075 | d++; | ||
| 5076 | } | 5025 | } |
| 5077 | while (--mcnt); | 5026 | d++; |
| 5078 | } | 5027 | } |
| 5028 | while (--mcnt); | ||
| 5079 | else | 5029 | else |
| 5080 | { | 5030 | do |
| 5081 | if (multibyte == target_multibyte) | 5031 | { |
| 5082 | do | 5032 | PREFETCH (); |
| 5033 | if (*d++ != *p++) | ||
| 5083 | { | 5034 | { |
| 5084 | PREFETCH (); | 5035 | d = dfail; |
| 5085 | if (*d++ != *p++) | 5036 | goto fail; |
| 5086 | { | ||
| 5087 | d = dfail; | ||
| 5088 | goto fail; | ||
| 5089 | } | ||
| 5090 | } | 5037 | } |
| 5091 | while (--mcnt); | 5038 | } |
| 5092 | else /* i.e. multibyte && ! target_multibyte */ | 5039 | while (--mcnt); |
| 5093 | do | 5040 | #else /* emacs */ |
| 5041 | /* The cost of testing `translate' is comparatively small. */ | ||
| 5042 | if (multibyte) | ||
| 5043 | do | ||
| 5044 | { | ||
| 5045 | int pat_charlen, buf_charlen; | ||
| 5046 | unsigned int pat_ch, buf_ch; | ||
| 5047 | |||
| 5048 | PREFETCH (); | ||
| 5049 | pat_ch = STRING_CHAR_AND_LENGTH (p, pend - p, pat_charlen); | ||
| 5050 | buf_ch = STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen); | ||
| 5051 | |||
| 5052 | if (TRANSLATE (buf_ch) != pat_ch) | ||
| 5094 | { | 5053 | { |
| 5095 | int pat_charlen, buf_charlen; | 5054 | d = dfail; |
| 5096 | unsigned int pat_ch, buf_ch; | 5055 | goto fail; |
| 5056 | } | ||
| 5097 | 5057 | ||
| 5098 | PREFETCH (); | 5058 | p += pat_charlen; |
| 5099 | pat_ch = STRING_CHAR_AND_LENGTH (p, pend - p, pat_charlen); | 5059 | d += buf_charlen; |
| 5100 | buf_ch = TARGET_CHAR_AND_LENGTH (d, dend - d, buf_charlen); | 5060 | mcnt -= pat_charlen; |
| 5061 | } | ||
| 5062 | while (mcnt > 0); | ||
| 5063 | else | ||
| 5064 | do | ||
| 5065 | { | ||
| 5066 | unsigned int buf_ch; | ||
| 5101 | 5067 | ||
| 5102 | if (pat_ch != buf_ch) | 5068 | PREFETCH (); |
| 5103 | { | 5069 | buf_ch = *d++; |
| 5104 | d = dfail; | 5070 | TRANSLATE_VIA_MULTIBYTE (buf_ch); |
| 5105 | goto fail; | 5071 | if (buf_ch != *p++) |
| 5106 | } | 5072 | { |
| 5107 | p += pat_charlen; | 5073 | d = dfail; |
| 5108 | d += buf_charlen; | 5074 | goto fail; |
| 5109 | mcnt -= pat_charlen; | ||
| 5110 | } | 5075 | } |
| 5111 | while (mcnt > 0); | 5076 | } |
| 5112 | } | 5077 | while (--mcnt); |
| 5078 | #endif | ||
| 5113 | break; | 5079 | break; |
| 5114 | 5080 | ||
| 5115 | |||
| 5116 | /* Match any character except possibly a newline or a null. */ | 5081 | /* Match any character except possibly a newline or a null. */ |
| 5117 | case anychar: | 5082 | case anychar: |
| 5118 | { | 5083 | { |
| @@ -5122,8 +5087,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5122 | DEBUG_PRINT1 ("EXECUTING anychar.\n"); | 5087 | DEBUG_PRINT1 ("EXECUTING anychar.\n"); |
| 5123 | 5088 | ||
| 5124 | PREFETCH (); | 5089 | PREFETCH (); |
| 5125 | buf_ch = TARGET_CHAR_AND_LENGTH (d, dend - d, buf_charlen); | 5090 | buf_ch = RE_STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen); |
| 5126 | buf_ch = TRANSLATE (buf_ch); | ||
| 5127 | 5091 | ||
| 5128 | if ((!(bufp->syntax & RE_DOT_NEWLINE) | 5092 | if ((!(bufp->syntax & RE_DOT_NEWLINE) |
| 5129 | && buf_ch == '\n') | 5093 | && buf_ch == '\n') |
| @@ -5166,8 +5130,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5166 | } | 5130 | } |
| 5167 | 5131 | ||
| 5168 | PREFETCH (); | 5132 | PREFETCH (); |
| 5169 | c = TARGET_CHAR_AND_LENGTH (d, dend - d, len); | 5133 | c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len); |
| 5170 | c = TRANSLATE (c); /* The character to match. */ | 5134 | TRANSLATE_VIA_MULTIBYTE (c); /* The character to match. */ |
| 5171 | 5135 | ||
| 5172 | if (! multibyte || IS_REAL_ASCII (c)) | 5136 | if (! multibyte || IS_REAL_ASCII (c)) |
| 5173 | { /* Lookup bitmap. */ | 5137 | { /* Lookup bitmap. */ |
| @@ -5309,7 +5273,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5309 | /* Compare that many; failure if mismatch, else move | 5273 | /* Compare that many; failure if mismatch, else move |
| 5310 | past them. */ | 5274 | past them. */ |
| 5311 | if (RE_TRANSLATE_P (translate) | 5275 | if (RE_TRANSLATE_P (translate) |
| 5312 | ? bcmp_translate (d, d2, mcnt, translate, target_multibyte) | 5276 | ? bcmp_translate (d, d2, mcnt, translate, multibyte) |
| 5313 | : memcmp (d, d2, mcnt)) | 5277 | : memcmp (d, d2, mcnt)) |
| 5314 | { | 5278 | { |
| 5315 | d = dfail; | 5279 | d = dfail; |
| @@ -5596,7 +5560,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5596 | UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1); | 5560 | UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1); |
| 5597 | #endif | 5561 | #endif |
| 5598 | PREFETCH_NOLIMIT (); | 5562 | PREFETCH_NOLIMIT (); |
| 5599 | c2 = TARGET_CHAR_AND_LENGTH (d, dend - d, dummy); | 5563 | GET_CHAR_AFTER (c2, d, dummy); |
| 5600 | s2 = SYNTAX (c2); | 5564 | s2 = SYNTAX (c2); |
| 5601 | 5565 | ||
| 5602 | if (/* Case 2: Only one of S1 and S2 is Sword. */ | 5566 | if (/* Case 2: Only one of S1 and S2 is Sword. */ |
| @@ -5632,7 +5596,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5632 | UPDATE_SYNTAX_TABLE (charpos); | 5596 | UPDATE_SYNTAX_TABLE (charpos); |
| 5633 | #endif | 5597 | #endif |
| 5634 | PREFETCH (); | 5598 | PREFETCH (); |
| 5635 | c2 = TARGET_CHAR_AND_LENGTH (d, dend - d, dummy); | 5599 | GET_CHAR_AFTER (c2, d, dummy); |
| 5636 | s2 = SYNTAX (c2); | 5600 | s2 = SYNTAX (c2); |
| 5637 | 5601 | ||
| 5638 | /* Case 2: S2 is not Sword. */ | 5602 | /* Case 2: S2 is not Sword. */ |
| @@ -5687,7 +5651,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5687 | if (!AT_STRINGS_END (d)) | 5651 | if (!AT_STRINGS_END (d)) |
| 5688 | { | 5652 | { |
| 5689 | PREFETCH_NOLIMIT (); | 5653 | PREFETCH_NOLIMIT (); |
| 5690 | c2 = TARGET_CHAR_AND_LENGTH (d, dend - d, dummy); | 5654 | GET_CHAR_AFTER (c2, d, dummy); |
| 5691 | #ifdef emacs | 5655 | #ifdef emacs |
| 5692 | UPDATE_SYNTAX_TABLE_FORWARD (charpos); | 5656 | UPDATE_SYNTAX_TABLE_FORWARD (charpos); |
| 5693 | #endif | 5657 | #endif |
| @@ -5718,7 +5682,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5718 | int len; | 5682 | int len; |
| 5719 | re_wchar_t c; | 5683 | re_wchar_t c; |
| 5720 | 5684 | ||
| 5721 | c = TARGET_CHAR_AND_LENGTH (d, dend - d, len); | 5685 | GET_CHAR_AFTER (c, d, len); |
| 5722 | if ((SYNTAX (c) != (enum syntaxcode) mcnt) ^ not) | 5686 | if ((SYNTAX (c) != (enum syntaxcode) mcnt) ^ not) |
| 5723 | goto fail; | 5687 | goto fail; |
| 5724 | d += len; | 5688 | d += len; |
| @@ -5754,7 +5718,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5754 | int len; | 5718 | int len; |
| 5755 | re_wchar_t c; | 5719 | re_wchar_t c; |
| 5756 | 5720 | ||
| 5757 | c = TARGET_CHAR_AND_LENGTH (d, dend - d, len); | 5721 | GET_CHAR_AFTER (c, d, len); |
| 5758 | 5722 | ||
| 5759 | if ((!CHAR_HAS_CATEGORY (c, mcnt)) ^ not) | 5723 | if ((!CHAR_HAS_CATEGORY (c, mcnt)) ^ not) |
| 5760 | goto fail; | 5724 | goto fail; |
| @@ -5830,11 +5794,11 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5830 | bytes; nonzero otherwise. */ | 5794 | bytes; nonzero otherwise. */ |
| 5831 | 5795 | ||
| 5832 | static int | 5796 | static int |
| 5833 | bcmp_translate (s1, s2, len, translate, target_multibyte) | 5797 | bcmp_translate (s1, s2, len, translate, multibyte) |
| 5834 | re_char *s1, *s2; | 5798 | re_char *s1, *s2; |
| 5835 | register int len; | 5799 | register int len; |
| 5836 | RE_TRANSLATE_TYPE translate; | 5800 | RE_TRANSLATE_TYPE translate; |
| 5837 | const int target_multibyte; | 5801 | const int multibyte; |
| 5838 | { | 5802 | { |
| 5839 | register re_char *p1 = s1, *p2 = s2; | 5803 | register re_char *p1 = s1, *p2 = s2; |
| 5840 | re_char *p1_end = s1 + len; | 5804 | re_char *p1_end = s1 + len; |
| @@ -5847,8 +5811,8 @@ bcmp_translate (s1, s2, len, translate, target_multibyte) | |||
| 5847 | int p1_charlen, p2_charlen; | 5811 | int p1_charlen, p2_charlen; |
| 5848 | re_wchar_t p1_ch, p2_ch; | 5812 | re_wchar_t p1_ch, p2_ch; |
| 5849 | 5813 | ||
| 5850 | p1_ch = TARGET_CHAR_AND_LENGTH (p1, p1_end - p1, p1_charlen); | 5814 | GET_CHAR_AFTER (p1_ch, p1, p1_charlen); |
| 5851 | p2_ch = TARGET_CHAR_AND_LENGTH (p2, p2_end - p2, p2_charlen); | 5815 | GET_CHAR_AFTER (p2_ch, p2, p2_charlen); |
| 5852 | 5816 | ||
| 5853 | if (RE_TRANSLATE (translate, p1_ch) | 5817 | if (RE_TRANSLATE (translate, p1_ch) |
| 5854 | != RE_TRANSLATE (translate, p2_ch)) | 5818 | != RE_TRANSLATE (translate, p2_ch)) |