diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/regex.c | 389 |
1 files changed, 270 insertions, 119 deletions
diff --git a/src/regex.c b/src/regex.c index 9974b2d41ec..ee190497e5c 100644 --- a/src/regex.c +++ b/src/regex.c | |||
| @@ -146,6 +146,7 @@ | |||
| 146 | # define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object))) | 146 | # define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object))) |
| 147 | 147 | ||
| 148 | # define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte) | 148 | # define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte) |
| 149 | # define RE_TARGET_MULTIBYTE_P(bufp) ((bufp)->target_multibyte) | ||
| 149 | # define RE_STRING_CHAR(p, s) \ | 150 | # define RE_STRING_CHAR(p, s) \ |
| 150 | (multibyte ? (STRING_CHAR (p, s)) : (*(p))) | 151 | (multibyte ? (STRING_CHAR (p, s)) : (*(p))) |
| 151 | # define RE_STRING_CHAR_AND_LENGTH(p, s, len) \ | 152 | # define RE_STRING_CHAR_AND_LENGTH(p, s, len) \ |
| @@ -154,17 +155,21 @@ | |||
| 154 | /* Set C a (possibly multibyte) character before P. P points into a | 155 | /* Set C a (possibly multibyte) character before P. P points into a |
| 155 | string which is the virtual concatenation of STR1 (which ends at | 156 | string which is the virtual concatenation of STR1 (which ends at |
| 156 | END1) or STR2 (which ends at END2). */ | 157 | END1) or STR2 (which ends at END2). */ |
| 157 | # define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \ | 158 | # define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \ |
| 158 | do { \ | 159 | do { \ |
| 159 | if (multibyte) \ | 160 | if (target_multibyte) \ |
| 160 | { \ | 161 | { \ |
| 161 | re_char *dtemp = (p) == (str2) ? (end1) : (p); \ | 162 | re_char *dtemp = (p) == (str2) ? (end1) : (p); \ |
| 162 | re_char *dlimit = ((p) > (str2) && (p) <= (end2)) ? (str2) : (str1); \ | 163 | re_char *dlimit = ((p) > (str2) && (p) <= (end2)) ? (str2) : (str1); \ |
| 163 | while (dtemp-- > dlimit && !CHAR_HEAD_P (*dtemp)); \ | 164 | while (dtemp-- > dlimit && !CHAR_HEAD_P (*dtemp)); \ |
| 164 | c = STRING_CHAR (dtemp, (p) - dtemp); \ | 165 | c = STRING_CHAR (dtemp, (p) - dtemp); \ |
| 165 | } \ | 166 | } \ |
| 166 | else \ | 167 | else \ |
| 167 | (c = ((p) == (str2) ? (end1) : (p))[-1]); \ | 168 | { \ |
| 169 | (c = ((p) == (str2) ? (end1) : (p))[-1]); \ | ||
| 170 | if (multibyte) \ | ||
| 171 | MAKE_CHAR_MULTIBYTE (c); \ | ||
| 172 | } \ | ||
| 168 | } while (0) | 173 | } while (0) |
| 169 | 174 | ||
| 170 | 175 | ||
| @@ -233,6 +238,7 @@ enum syntaxcode { Swhitespace = 0, Sword = 1 }; | |||
| 233 | # define CHARSET_LEADING_CODE_BASE(c) 0 | 238 | # define CHARSET_LEADING_CODE_BASE(c) 0 |
| 234 | # define MAX_MULTIBYTE_LENGTH 1 | 239 | # define MAX_MULTIBYTE_LENGTH 1 |
| 235 | # define RE_MULTIBYTE_P(x) 0 | 240 | # define RE_MULTIBYTE_P(x) 0 |
| 241 | # define RE_TARGET_MULTIBYTE_P(x) 0 | ||
| 236 | # define WORD_BOUNDARY_P(c1, c2) (0) | 242 | # define WORD_BOUNDARY_P(c1, c2) (0) |
| 237 | # define CHAR_HEAD_P(p) (1) | 243 | # define CHAR_HEAD_P(p) (1) |
| 238 | # define SINGLE_BYTE_CHAR_P(c) (1) | 244 | # define SINGLE_BYTE_CHAR_P(c) (1) |
| @@ -248,6 +254,8 @@ enum syntaxcode { Swhitespace = 0, Sword = 1 }; | |||
| 248 | # define MAKE_CHAR(charset, c1, c2) (c1) | 254 | # define MAKE_CHAR(charset, c1, c2) (c1) |
| 249 | # define BYTE8_TO_CHAR(c) (c) | 255 | # define BYTE8_TO_CHAR(c) (c) |
| 250 | # define CHAR_BYTE8_P(c) (0) | 256 | # define CHAR_BYTE8_P(c) (0) |
| 257 | # define MAKE_CHAR_MULTIBYTE(c) 0 | ||
| 258 | # define CHAR_LEADING_CODE(c) (c) | ||
| 251 | #endif /* not emacs */ | 259 | #endif /* not emacs */ |
| 252 | 260 | ||
| 253 | #ifndef RE_TRANSLATE | 261 | #ifndef RE_TRANSLATE |
| @@ -1665,6 +1673,8 @@ static int analyse_first _RE_ARGS ((re_char *p, re_char *pend, | |||
| 1665 | #define PATFETCH(c) \ | 1673 | #define PATFETCH(c) \ |
| 1666 | do { \ | 1674 | do { \ |
| 1667 | PATFETCH_RAW (c); \ | 1675 | PATFETCH_RAW (c); \ |
| 1676 | if (! multibyte) \ | ||
| 1677 | MAKE_CHAR_MULTIBYTE (c); \ | ||
| 1668 | c = TRANSLATE (c); \ | 1678 | c = TRANSLATE (c); \ |
| 1669 | } while (0) | 1679 | } while (0) |
| 1670 | 1680 | ||
| @@ -1917,6 +1927,54 @@ struct range_table_work_area | |||
| 1917 | #define SET_LIST_BIT(c) (b[((c)) / BYTEWIDTH] |= 1 << ((c) % BYTEWIDTH)) | 1927 | #define SET_LIST_BIT(c) (b[((c)) / BYTEWIDTH] |= 1 << ((c) % BYTEWIDTH)) |
| 1918 | 1928 | ||
| 1919 | 1929 | ||
| 1930 | #ifdef emacs | ||
| 1931 | |||
| 1932 | /* It is better to implement this jumbo macro by a function, but it's | ||
| 1933 | not that easy because macros called within it assumes various | ||
| 1934 | variables being defined. */ | ||
| 1935 | |||
| 1936 | #define HANDLE_UNIBYTE_RANGE(work_area, c1, c2) \ | ||
| 1937 | do { \ | ||
| 1938 | int char_table[257]; \ | ||
| 1939 | int i, j, c; \ | ||
| 1940 | \ | ||
| 1941 | char_table[(c1) - 1] = -2; /* head sentinel */ \ | ||
| 1942 | for (i = (c1); i <= (c2); i++) \ | ||
| 1943 | char_table[i] = TRANSLATE (unibyte_char_to_multibyte (i)); \ | ||
| 1944 | char_table[i] = MAX_CHAR + 2; /* tail sentinel */ \ | ||
| 1945 | \ | ||
| 1946 | /* As the number of data is small (at most 128) and we can expect \ | ||
| 1947 | that data in char_table are mostly sorted, we use fairly simple \ | ||
| 1948 | `insertion sort'. */ \ | ||
| 1949 | for (i = (c1) + 1; i <= (c2); i++) \ | ||
| 1950 | { \ | ||
| 1951 | c = char_table[i]; \ | ||
| 1952 | j = i; \ | ||
| 1953 | while (char_table[j - 1] > c) \ | ||
| 1954 | char_table[j] = char_table[j - 1], j--; \ | ||
| 1955 | char_table[j] = c; \ | ||
| 1956 | } \ | ||
| 1957 | \ | ||
| 1958 | for (i = (c1); i <= (c2); i++) \ | ||
| 1959 | { \ | ||
| 1960 | c = char_table[i]; \ | ||
| 1961 | if (! IS_REAL_ASCII (c)) \ | ||
| 1962 | break; \ | ||
| 1963 | SET_LIST_BIT (c); \ | ||
| 1964 | } \ | ||
| 1965 | while (i <= (c2)) \ | ||
| 1966 | { \ | ||
| 1967 | c = char_table[i]; \ | ||
| 1968 | for (j = i + 1; j <= (c2); j++) \ | ||
| 1969 | if (char_table[j] - c != j - i) \ | ||
| 1970 | break; \ | ||
| 1971 | SET_RANGE_TABLE_WORK_AREA ((work_area), c, char_table[j - 1]); \ | ||
| 1972 | i = j; \ | ||
| 1973 | } \ | ||
| 1974 | } while (0) | ||
| 1975 | |||
| 1976 | #endif /* emacs */ | ||
| 1977 | |||
| 1920 | /* Get the next unsigned number in the uncompiled pattern. */ | 1978 | /* Get the next unsigned number in the uncompiled pattern. */ |
| 1921 | #define GET_UNSIGNED_NUMBER(num) \ | 1979 | #define GET_UNSIGNED_NUMBER(num) \ |
| 1922 | do { if (p != pend) \ | 1980 | do { if (p != pend) \ |
| @@ -2264,7 +2322,7 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2264 | /* Loop through the uncompiled pattern until we're at the end. */ | 2322 | /* Loop through the uncompiled pattern until we're at the end. */ |
| 2265 | while (p != pend) | 2323 | while (p != pend) |
| 2266 | { | 2324 | { |
| 2267 | PATFETCH (c); | 2325 | PATFETCH_RAW (c); |
| 2268 | 2326 | ||
| 2269 | switch (c) | 2327 | switch (c) |
| 2270 | { | 2328 | { |
| @@ -2346,15 +2404,15 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2346 | if (p+1 == pend) | 2404 | if (p+1 == pend) |
| 2347 | FREE_STACK_RETURN (REG_EESCAPE); | 2405 | FREE_STACK_RETURN (REG_EESCAPE); |
| 2348 | if (p[1] == '+' || p[1] == '?') | 2406 | if (p[1] == '+' || p[1] == '?') |
| 2349 | PATFETCH (c); /* Gobble up the backslash. */ | 2407 | PATFETCH_RAW (c); /* Gobble up the backslash. */ |
| 2350 | else | 2408 | else |
| 2351 | break; | 2409 | break; |
| 2352 | } | 2410 | } |
| 2353 | else | 2411 | else |
| 2354 | break; | 2412 | break; |
| 2355 | /* If we get here, we found another repeat character. */ | 2413 | /* If we get here, we found another repeat character. */ |
| 2356 | PATFETCH (c); | 2414 | PATFETCH_RAW (c); |
| 2357 | } | 2415 | } |
| 2358 | 2416 | ||
| 2359 | /* Star, etc. applied to an empty pattern is equivalent | 2417 | /* Star, etc. applied to an empty pattern is equivalent |
| 2360 | to an empty pattern. */ | 2418 | to an empty pattern. */ |
| @@ -2495,14 +2553,14 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2495 | 2553 | ||
| 2496 | if (p == pend) FREE_STACK_RETURN (REG_EBRACK); | 2554 | if (p == pend) FREE_STACK_RETURN (REG_EBRACK); |
| 2497 | 2555 | ||
| 2498 | PATFETCH (c); | 2556 | PATFETCH_RAW (c); |
| 2499 | 2557 | ||
| 2500 | /* \ might escape characters inside [...] and [^...]. */ | 2558 | /* \ might escape characters inside [...] and [^...]. */ |
| 2501 | if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') | 2559 | if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') |
| 2502 | { | 2560 | { |
| 2503 | if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); | 2561 | if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); |
| 2504 | 2562 | ||
| 2505 | PATFETCH (c); | 2563 | PATFETCH_RAW (c); |
| 2506 | escaped_char = true; | 2564 | escaped_char = true; |
| 2507 | } | 2565 | } |
| 2508 | else | 2566 | else |
| @@ -2528,7 +2586,7 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2528 | unsigned char str[CHAR_CLASS_MAX_LENGTH + 1]; | 2586 | unsigned char str[CHAR_CLASS_MAX_LENGTH + 1]; |
| 2529 | const unsigned char *class_beg; | 2587 | const unsigned char *class_beg; |
| 2530 | 2588 | ||
| 2531 | PATFETCH (c); | 2589 | PATFETCH_RAW (c); |
| 2532 | c1 = 0; | 2590 | c1 = 0; |
| 2533 | class_beg = p; | 2591 | class_beg = p; |
| 2534 | 2592 | ||
| @@ -2537,7 +2595,7 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2537 | 2595 | ||
| 2538 | for (;;) | 2596 | for (;;) |
| 2539 | { | 2597 | { |
| 2540 | PATFETCH (c); | 2598 | PATFETCH_RAW (c); |
| 2541 | if ((c == ':' && *p == ']') || p == pend) | 2599 | if ((c == ':' && *p == ']') || p == pend) |
| 2542 | break; | 2600 | break; |
| 2543 | if (c1 < CHAR_CLASS_MAX_LENGTH) | 2601 | if (c1 < CHAR_CLASS_MAX_LENGTH) |
| @@ -2564,7 +2622,7 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2564 | 2622 | ||
| 2565 | /* Throw away the ] at the end of the character | 2623 | /* Throw away the ] at the end of the character |
| 2566 | class. */ | 2624 | class. */ |
| 2567 | PATFETCH (c); | 2625 | PATFETCH_RAW (c); |
| 2568 | 2626 | ||
| 2569 | if (p == pend) FREE_STACK_RETURN (REG_EBRACK); | 2627 | if (p == pend) FREE_STACK_RETURN (REG_EBRACK); |
| 2570 | 2628 | ||
| @@ -2573,17 +2631,20 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2573 | is_digit, is_cntrl, and is_xdigit, since | 2631 | is_digit, is_cntrl, and is_xdigit, since |
| 2574 | they can only match ASCII characters. We | 2632 | they can only match ASCII characters. We |
| 2575 | don't need to handle them for multibyte. | 2633 | don't need to handle them for multibyte. |
| 2576 | They are distinguished by a negative wctype. */ | 2634 | They are distinguished by a negative wctype. |
| 2577 | 2635 | We need this only for Emacs. */ | |
| 2578 | if (multibyte) | 2636 | #ifdef emacs |
| 2579 | SET_RANGE_TABLE_WORK_AREA_BIT (range_table_work, | 2637 | SET_RANGE_TABLE_WORK_AREA_BIT (range_table_work, |
| 2580 | re_wctype_to_bit (cc)); | 2638 | re_wctype_to_bit (cc)); |
| 2639 | #endif | ||
| 2581 | 2640 | ||
| 2582 | for (ch = 0; ch < 1 << BYTEWIDTH; ++ch) | 2641 | for (ch = 0; ch < 1 << BYTEWIDTH; ++ch) |
| 2583 | { | 2642 | { |
| 2584 | int translated = TRANSLATE (ch); | 2643 | MAKE_CHAR_MULTIBYTE (ch); |
| 2585 | if (re_iswctype (btowc (ch), cc)) | 2644 | ch = TRANSLATE (ch); |
| 2586 | SET_LIST_BIT (translated); | 2645 | if (IS_REAL_ASCII (ch) |
| 2646 | & re_iswctype (btowc (ch), cc)) | ||
| 2647 | SET_LIST_BIT (ch); | ||
| 2587 | } | 2648 | } |
| 2588 | 2649 | ||
| 2589 | /* Repeat the loop. */ | 2650 | /* Repeat the loop. */ |
| @@ -2606,35 +2667,51 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2606 | { | 2667 | { |
| 2607 | 2668 | ||
| 2608 | /* Discard the `-'. */ | 2669 | /* Discard the `-'. */ |
| 2609 | PATFETCH (c1); | 2670 | PATFETCH_RAW (c1); |
| 2610 | 2671 | ||
| 2611 | /* Fetch the character which ends the range. */ | 2672 | /* Fetch the character which ends the range. */ |
| 2612 | PATFETCH (c1); | 2673 | PATFETCH_RAW (c1); |
| 2613 | 2674 | #ifdef emacs | |
| 2614 | if (SINGLE_BYTE_CHAR_P (c) | 2675 | if (multibyte) |
| 2615 | && ! SINGLE_BYTE_CHAR_P (c1)) | 2676 | { |
| 2677 | c = TRANSLATE (c); | ||
| 2678 | c1 = TRANSLATE (c1); | ||
| 2679 | if (! IS_REAL_ASCII (c1)) | ||
| 2680 | { | ||
| 2681 | SET_RANGE_TABLE_WORK_AREA (range_table_work, | ||
| 2682 | c, c1); | ||
| 2683 | c1 = 127; | ||
| 2684 | } | ||
| 2685 | } | ||
| 2686 | else | ||
| 2616 | { | 2687 | { |
| 2617 | /* Handle a range starting with a character | 2688 | if (! IS_REAL_ASCII (c1)) |
| 2618 | fitting in a bitmap to a character not | 2689 | { |
| 2619 | fitting in a bitmap (thus require range | 2690 | int c2 = MAX (c, 128); |
| 2620 | table). We use both a bitmap (for the | 2691 | |
| 2621 | range from C to 255) and a range table (for | 2692 | HANDLE_UNIBYTE_RANGE (range_table_work, c2, c1); |
| 2622 | the remaining range). Here, we setup only | 2693 | c1 = 127; |
| 2623 | a range table. A bitmap is setup later. */ | 2694 | } |
| 2624 | re_wchar_t c2 | ||
| 2625 | = CHAR_BYTE8_P (c1) ? BYTE8_TO_CHAR (0x80) : 256; | ||
| 2626 | |||
| 2627 | SET_RANGE_TABLE_WORK_AREA (range_table_work, c2, c1); | ||
| 2628 | c1 = 255; | ||
| 2629 | } | 2695 | } |
| 2696 | #endif | ||
| 2630 | } | 2697 | } |
| 2631 | else | 2698 | else |
| 2632 | /* Range from C to C. */ | 2699 | { |
| 2633 | c1 = c; | 2700 | /* Range from C to C. */ |
| 2701 | if (! multibyte) | ||
| 2702 | MAKE_CHAR_MULTIBYTE (c); | ||
| 2703 | c = TRANSLATE (c); | ||
| 2704 | if (IS_REAL_ASCII (c)) | ||
| 2705 | c1 = c; | ||
| 2706 | else | ||
| 2707 | { | ||
| 2708 | SET_RANGE_TABLE_WORK_AREA (range_table_work, c, c); | ||
| 2709 | c = -1; /* Suppress setting bitmap. */ | ||
| 2710 | } | ||
| 2711 | } | ||
| 2634 | 2712 | ||
| 2635 | /* Set the range ... */ | 2713 | /* Set the range into bitmap */ |
| 2636 | if (SINGLE_BYTE_CHAR_P (c)) | 2714 | if (c >= 0) |
| 2637 | /* ... into bitmap. */ | ||
| 2638 | { | 2715 | { |
| 2639 | re_wchar_t this_char; | 2716 | re_wchar_t this_char; |
| 2640 | int range_start = c, range_end = c1; | 2717 | int range_start = c, range_end = c1; |
| @@ -2653,9 +2730,6 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2653 | SET_LIST_BIT (TRANSLATE (this_char)); | 2730 | SET_LIST_BIT (TRANSLATE (this_char)); |
| 2654 | } | 2731 | } |
| 2655 | } | 2732 | } |
| 2656 | else | ||
| 2657 | /* ... into range table. */ | ||
| 2658 | SET_RANGE_TABLE_WORK_AREA (range_table_work, c, c1); | ||
| 2659 | } | 2733 | } |
| 2660 | 2734 | ||
| 2661 | /* Discard any (non)matching list bytes that are all 0 at the | 2735 | /* Discard any (non)matching list bytes that are all 0 at the |
| @@ -2750,7 +2824,7 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2750 | /* Look for a special (?...) construct */ | 2824 | /* Look for a special (?...) construct */ |
| 2751 | if ((syntax & RE_SHY_GROUPS) && *p == '?') | 2825 | if ((syntax & RE_SHY_GROUPS) && *p == '?') |
| 2752 | { | 2826 | { |
| 2753 | PATFETCH (c); /* Gobble up the '?'. */ | 2827 | PATFETCH_RAW (c); /* Gobble up the '?'. */ |
| 2754 | PATFETCH (c); | 2828 | PATFETCH (c); |
| 2755 | switch (c) | 2829 | switch (c) |
| 2756 | { | 2830 | { |
| @@ -3230,10 +3304,10 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 3230 | { | 3304 | { |
| 3231 | int len; | 3305 | int len; |
| 3232 | 3306 | ||
| 3233 | if (multibyte) | 3307 | if (! multibyte) |
| 3234 | len = CHAR_STRING (c, b); | 3308 | MAKE_CHAR_MULTIBYTE (c); |
| 3235 | else | 3309 | c = TRANSLATE (c); |
| 3236 | *b = c, len = 1; | 3310 | len = CHAR_STRING (c, b); |
| 3237 | b += len; | 3311 | b += len; |
| 3238 | (*pending_exact) += len; | 3312 | (*pending_exact) += len; |
| 3239 | } | 3313 | } |
| @@ -3439,6 +3513,8 @@ group_in_compile_stack (compile_stack, regnum) | |||
| 3439 | bother filling it up (obviously) and only return whether the | 3513 | bother filling it up (obviously) and only return whether the |
| 3440 | pattern could potentially match the empty string. | 3514 | pattern could potentially match the empty string. |
| 3441 | 3515 | ||
| 3516 | MULTIBYTE is always 1 for Emacs, and 0 otherwise. | ||
| 3517 | |||
| 3442 | Return 1 if p..pend might match the empty string. | 3518 | Return 1 if p..pend might match the empty string. |
| 3443 | Return 0 if p..pend matches at least one char. | 3519 | Return 0 if p..pend matches at least one char. |
| 3444 | Return -1 if fastmap was not updated accurately. */ | 3520 | Return -1 if fastmap was not updated accurately. */ |
| @@ -3505,14 +3581,11 @@ analyse_first (p, pend, fastmap, multibyte) | |||
| 3505 | 3581 | ||
| 3506 | case exactn: | 3582 | case exactn: |
| 3507 | if (fastmap) | 3583 | if (fastmap) |
| 3508 | { | 3584 | /* If multibyte is nonzero, the first byte of each |
| 3509 | int c = RE_STRING_CHAR (p + 1, pend - p); | 3585 | character is an ASCII or a leading code. Otherwise, |
| 3510 | 3586 | each byte is a character. Thus, this works in both | |
| 3511 | if (SINGLE_BYTE_CHAR_P (c)) | 3587 | cases. */ |
| 3512 | fastmap[c] = 1; | 3588 | fastmap[p[1]] = 1; |
| 3513 | else | ||
| 3514 | fastmap[p[1]] = 1; | ||
| 3515 | } | ||
| 3516 | break; | 3589 | break; |
| 3517 | 3590 | ||
| 3518 | 3591 | ||
| @@ -3524,14 +3597,17 @@ analyse_first (p, pend, fastmap, multibyte) | |||
| 3524 | 3597 | ||
| 3525 | 3598 | ||
| 3526 | case charset_not: | 3599 | case charset_not: |
| 3527 | /* Chars beyond end of bitmap are possible matches. | ||
| 3528 | All the single-byte codes can occur in multibyte buffers. | ||
| 3529 | So any that are not listed in the charset | ||
| 3530 | are possible matches, even in multibyte buffers. */ | ||
| 3531 | if (!fastmap) break; | 3600 | if (!fastmap) break; |
| 3532 | for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH; | 3601 | { |
| 3533 | j < (1 << BYTEWIDTH); j++) | 3602 | /* Chars beyond end of bitmap are possible matches. */ |
| 3534 | fastmap[j] = 1; | 3603 | /* Emacs uses the bitmap only for ASCII characters. */ |
| 3604 | int limit = multibyte ? 128 : (1 << BYTEWIDTH); | ||
| 3605 | |||
| 3606 | for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH; | ||
| 3607 | j < limit; j++) | ||
| 3608 | fastmap[j] = 1; | ||
| 3609 | } | ||
| 3610 | |||
| 3535 | /* Fallthrough */ | 3611 | /* Fallthrough */ |
| 3536 | case charset: | 3612 | case charset: |
| 3537 | if (!fastmap) break; | 3613 | if (!fastmap) break; |
| @@ -3542,7 +3618,7 @@ analyse_first (p, pend, fastmap, multibyte) | |||
| 3542 | fastmap[j] = 1; | 3618 | fastmap[j] = 1; |
| 3543 | 3619 | ||
| 3544 | if ((not && multibyte) | 3620 | if ((not && multibyte) |
| 3545 | /* Any character set can possibly contain a character | 3621 | /* Any leading code can possibly start a character |
| 3546 | which doesn't match the specified set of characters. */ | 3622 | which doesn't match the specified set of characters. */ |
| 3547 | || (CHARSET_RANGE_TABLE_EXISTS_P (&p[-2]) | 3623 | || (CHARSET_RANGE_TABLE_EXISTS_P (&p[-2]) |
| 3548 | && CHARSET_RANGE_TABLE_BITS (&p[-2]) != 0)) | 3624 | && CHARSET_RANGE_TABLE_BITS (&p[-2]) != 0)) |
| @@ -3562,11 +3638,10 @@ analyse_first (p, pend, fastmap, multibyte) | |||
| 3562 | else if (!not && CHARSET_RANGE_TABLE_EXISTS_P (&p[-2]) | 3638 | else if (!not && CHARSET_RANGE_TABLE_EXISTS_P (&p[-2]) |
| 3563 | && match_any_multibyte_characters == false) | 3639 | && match_any_multibyte_characters == false) |
| 3564 | { | 3640 | { |
| 3565 | /* Set fastmap[I] to 1 where I is a base leading code of each | 3641 | /* Set fastmap[I] to 1 where I is a leading code of each |
| 3566 | multibyte characer in the range table. */ | 3642 | multibyte characer in the range table. */ |
| 3567 | int c, count; | 3643 | int c, count; |
| 3568 | unsigned char buf1[MAX_MULTIBYTE_LENGTH]; | 3644 | unsigned char lc1, lc2; |
| 3569 | unsigned char buf2[MAX_MULTIBYTE_LENGTH]; | ||
| 3570 | 3645 | ||
| 3571 | /* Make P points the range table. `+ 2' is to skip flag | 3646 | /* Make P points the range table. `+ 2' is to skip flag |
| 3572 | bits for a character class. */ | 3647 | bits for a character class. */ |
| @@ -3578,11 +3653,11 @@ analyse_first (p, pend, fastmap, multibyte) | |||
| 3578 | { | 3653 | { |
| 3579 | /* Extract the start and end of each range. */ | 3654 | /* Extract the start and end of each range. */ |
| 3580 | EXTRACT_CHARACTER (c, p); | 3655 | EXTRACT_CHARACTER (c, p); |
| 3581 | CHAR_STRING (c, buf1); | 3656 | lc1 = CHAR_LEADING_CODE (c); |
| 3582 | p += 3; | 3657 | p += 3; |
| 3583 | EXTRACT_CHARACTER (c, p); | 3658 | EXTRACT_CHARACTER (c, p); |
| 3584 | CHAR_STRING (c, buf2); | 3659 | lc2 = CHAR_LEADING_CODE (c); |
| 3585 | for (j = buf1[0]; j <= buf2[0]; j++) | 3660 | for (j = lc1; j <= lc2; j++) |
| 3586 | fastmap[j] = 1; | 3661 | fastmap[j] = 1; |
| 3587 | } | 3662 | } |
| 3588 | } | 3663 | } |
| @@ -3608,7 +3683,7 @@ analyse_first (p, pend, fastmap, multibyte) | |||
| 3608 | if (!fastmap) break; | 3683 | if (!fastmap) break; |
| 3609 | not = (re_opcode_t)p[-1] == notcategoryspec; | 3684 | not = (re_opcode_t)p[-1] == notcategoryspec; |
| 3610 | k = *p++; | 3685 | k = *p++; |
| 3611 | for (j = 0; j < (1 << BYTEWIDTH); j++) | 3686 | for (j = (multibyte ? 127 : (1 << BYTEWIDTH)); j >= 0; j--) |
| 3612 | if ((CHAR_HAS_CATEGORY (j, k)) ^ not) | 3687 | if ((CHAR_HAS_CATEGORY (j, k)) ^ not) |
| 3613 | fastmap[j] = 1; | 3688 | fastmap[j] = 1; |
| 3614 | 3689 | ||
| @@ -3754,7 +3829,15 @@ re_compile_fastmap (bufp) | |||
| 3754 | bufp->fastmap_accurate = 1; /* It will be when we're done. */ | 3829 | bufp->fastmap_accurate = 1; /* It will be when we're done. */ |
| 3755 | 3830 | ||
| 3756 | analysis = analyse_first (bufp->buffer, bufp->buffer + bufp->used, | 3831 | analysis = analyse_first (bufp->buffer, bufp->buffer + bufp->used, |
| 3757 | fastmap, RE_MULTIBYTE_P (bufp)); | 3832 | fastmap, |
| 3833 | #ifdef emacs | ||
| 3834 | /* The compiled pattern buffer is always | ||
| 3835 | setup for multibyte characters. */ | ||
| 3836 | 1 | ||
| 3837 | #else | ||
| 3838 | 0 | ||
| 3839 | #endif | ||
| 3840 | ); | ||
| 3758 | bufp->can_be_null = (analysis != 0); | 3841 | bufp->can_be_null = (analysis != 0); |
| 3759 | return 0; | 3842 | return 0; |
| 3760 | } /* re_compile_fastmap */ | 3843 | } /* re_compile_fastmap */ |
| @@ -3860,8 +3943,14 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop) | |||
| 3860 | int endpos = startpos + range; | 3943 | int endpos = startpos + range; |
| 3861 | boolean anchored_start; | 3944 | boolean anchored_start; |
| 3862 | 3945 | ||
| 3863 | /* Nonzero if we have to concern multibyte character. */ | 3946 | /* Nonzero if BUFP is setup for multibyte characters. */ |
| 3864 | const boolean multibyte = RE_MULTIBYTE_P (bufp); | 3947 | #ifdef emacs |
| 3948 | const boolean multibyte = 1; | ||
| 3949 | #else | ||
| 3950 | const boolean multibyte = 0; | ||
| 3951 | #endif | ||
| 3952 | /* Nonzero if STR1 and STR2 contains multibyte characters. */ | ||
| 3953 | const boolean target_multibyte = RE_TARGET_MULTIBYTE_P (bufp); | ||
| 3865 | 3954 | ||
| 3866 | /* Check for out-of-range STARTPOS. */ | 3955 | /* Check for out-of-range STARTPOS. */ |
| 3867 | if (startpos < 0 || startpos > total_size) | 3956 | if (startpos < 0 || startpos > total_size) |
| @@ -3950,7 +4039,7 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop) | |||
| 3950 | inside the loop. */ | 4039 | inside the loop. */ |
| 3951 | if (RE_TRANSLATE_P (translate)) | 4040 | if (RE_TRANSLATE_P (translate)) |
| 3952 | { | 4041 | { |
| 3953 | if (multibyte) | 4042 | if (target_multibyte) |
| 3954 | while (range > lim) | 4043 | while (range > lim) |
| 3955 | { | 4044 | { |
| 3956 | int buf_charlen; | 4045 | int buf_charlen; |
| @@ -3959,13 +4048,24 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop) | |||
| 3959 | buf_charlen); | 4048 | buf_charlen); |
| 3960 | 4049 | ||
| 3961 | buf_ch = RE_TRANSLATE (translate, buf_ch); | 4050 | buf_ch = RE_TRANSLATE (translate, buf_ch); |
| 3962 | if (buf_ch >= 0400 | 4051 | if (fastmap[CHAR_LEADING_CODE (buf_ch)]) |
| 3963 | || fastmap[buf_ch]) | ||
| 3964 | break; | 4052 | break; |
| 3965 | 4053 | ||
| 3966 | range -= buf_charlen; | 4054 | range -= buf_charlen; |
| 3967 | d += buf_charlen; | 4055 | d += buf_charlen; |
| 3968 | } | 4056 | } |
| 4057 | else if (multibyte) | ||
| 4058 | while (range > lim) | ||
| 4059 | { | ||
| 4060 | buf_ch = *d; | ||
| 4061 | MAKE_CHAR_MULTIBYTE (buf_ch); | ||
| 4062 | buf_ch = RE_TRANSLATE (translate, buf_ch); | ||
| 4063 | if (fastmap[CHAR_LEADING_CODE (buf_ch)]) | ||
| 4064 | break; | ||
| 4065 | |||
| 4066 | d++; | ||
| 4067 | range--; | ||
| 4068 | } | ||
| 3969 | else | 4069 | else |
| 3970 | while (range > lim | 4070 | while (range > lim |
| 3971 | && !fastmap[RE_TRANSLATE (translate, *d)]) | 4071 | && !fastmap[RE_TRANSLATE (translate, *d)]) |
| @@ -3974,6 +4074,16 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop) | |||
| 3974 | range--; | 4074 | range--; |
| 3975 | } | 4075 | } |
| 3976 | } | 4076 | } |
| 4077 | else if (multibyte && ! target_multibyte) | ||
| 4078 | { | ||
| 4079 | buf_ch = *d; | ||
| 4080 | MAKE_CHAR_MULTIBYTE (buf_ch); | ||
| 4081 | if (fastmap[CHAR_LEADING_CODE (buf_ch)]) | ||
| 4082 | break; | ||
| 4083 | |||
| 4084 | d++; | ||
| 4085 | range--; | ||
| 4086 | } | ||
| 3977 | else | 4087 | else |
| 3978 | while (range > lim && !fastmap[*d]) | 4088 | while (range > lim && !fastmap[*d]) |
| 3979 | { | 4089 | { |
| @@ -3989,10 +4099,11 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop) | |||
| 3989 | ? size2 + size1 - startpos | 4099 | ? size2 + size1 - startpos |
| 3990 | : size1 - startpos); | 4100 | : size1 - startpos); |
| 3991 | buf_ch = RE_STRING_CHAR (d, room); | 4101 | buf_ch = RE_STRING_CHAR (d, room); |
| 4102 | if (! target_multibyte) | ||
| 4103 | MAKE_CHAR_MULTIBYTE (buf_ch); | ||
| 3992 | buf_ch = TRANSLATE (buf_ch); | 4104 | buf_ch = TRANSLATE (buf_ch); |
| 3993 | 4105 | ||
| 3994 | if (! (buf_ch >= 0400 | 4106 | if (! fastmap[CHAR_LEADING_CODE (buf_ch)]) |
| 3995 | || fastmap[buf_ch])) | ||
| 3996 | goto advance; | 4107 | goto advance; |
| 3997 | } | 4108 | } |
| 3998 | } | 4109 | } |
| @@ -4022,7 +4133,7 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop) | |||
| 4022 | else if (range > 0) | 4133 | else if (range > 0) |
| 4023 | { | 4134 | { |
| 4024 | /* Update STARTPOS to the next character boundary. */ | 4135 | /* Update STARTPOS to the next character boundary. */ |
| 4025 | if (multibyte) | 4136 | if (target_multibyte) |
| 4026 | { | 4137 | { |
| 4027 | re_char *p = POS_ADDR_VSTRING (startpos); | 4138 | re_char *p = POS_ADDR_VSTRING (startpos); |
| 4028 | re_char *pend = STOP_ADDR_VSTRING (startpos); | 4139 | re_char *pend = STOP_ADDR_VSTRING (startpos); |
| @@ -4045,7 +4156,7 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop) | |||
| 4045 | startpos--; | 4156 | startpos--; |
| 4046 | 4157 | ||
| 4047 | /* Update STARTPOS to the previous character boundary. */ | 4158 | /* Update STARTPOS to the previous character boundary. */ |
| 4048 | if (multibyte) | 4159 | if (target_multibyte) |
| 4049 | { | 4160 | { |
| 4050 | re_char *p = POS_ADDR_VSTRING (startpos); | 4161 | re_char *p = POS_ADDR_VSTRING (startpos); |
| 4051 | int len = 0; | 4162 | int len = 0; |
| @@ -4502,6 +4613,17 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4502 | } | 4613 | } |
| 4503 | WEAK_ALIAS (__re_match_2, re_match_2) | 4614 | WEAK_ALIAS (__re_match_2, re_match_2) |
| 4504 | 4615 | ||
| 4616 | #ifdef emacs | ||
| 4617 | #define TARGET_CHAR_AND_LENGTH(d, len, actual_len) \ | ||
| 4618 | (target_multibyte \ | ||
| 4619 | ? STRING_CHAR_AND_LENGTH (d, len, actual_len) \ | ||
| 4620 | : (actual_len = 1, unibyte_char_to_multibyte (*d))) | ||
| 4621 | #else | ||
| 4622 | #define TARGET_CHAR_AND_LENGTH(d, len, actual_len) \ | ||
| 4623 | (actual_len = 1, *d) | ||
| 4624 | #endif | ||
| 4625 | |||
| 4626 | |||
| 4505 | /* This is a separate function so that we can force an alloca cleanup | 4627 | /* This is a separate function so that we can force an alloca cleanup |
| 4506 | afterwards. */ | 4628 | afterwards. */ |
| 4507 | static int | 4629 | static int |
| @@ -4541,8 +4663,14 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4541 | /* We use this to map every character in the string. */ | 4663 | /* We use this to map every character in the string. */ |
| 4542 | RE_TRANSLATE_TYPE translate = bufp->translate; | 4664 | RE_TRANSLATE_TYPE translate = bufp->translate; |
| 4543 | 4665 | ||
| 4544 | /* Nonzero if we have to concern multibyte character. */ | 4666 | /* Nonzero if BUFP is setup for multibyte characters. */ |
| 4545 | const boolean multibyte = RE_MULTIBYTE_P (bufp); | 4667 | #ifdef emacs |
| 4668 | const boolean multibyte = 1; | ||
| 4669 | #else | ||
| 4670 | const boolean multibyte = 0; | ||
| 4671 | #endif | ||
| 4672 | /* Nonzero if STR1 and STR2 contains multibyte characters. */ | ||
| 4673 | const boolean target_multibyte = RE_TARGET_MULTIBYTE_P (bufp); | ||
| 4546 | 4674 | ||
| 4547 | /* Failure point stack. Each place that can handle a failure further | 4675 | /* Failure point stack. Each place that can handle a failure further |
| 4548 | down the line pushes a failure point on this stack. It consists of | 4676 | down the line pushes a failure point on this stack. It consists of |
| @@ -4907,7 +5035,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4907 | 5035 | ||
| 4908 | PREFETCH (); | 5036 | PREFETCH (); |
| 4909 | pat_ch = STRING_CHAR_AND_LENGTH (p, pend - p, pat_charlen); | 5037 | pat_ch = STRING_CHAR_AND_LENGTH (p, pend - p, pat_charlen); |
| 4910 | buf_ch = STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen); | 5038 | buf_ch = TARGET_CHAR_AND_LENGTH (d, dend - d, buf_charlen); |
| 4911 | 5039 | ||
| 4912 | if (RE_TRANSLATE (translate, buf_ch) | 5040 | if (RE_TRANSLATE (translate, buf_ch) |
| 4913 | != pat_ch) | 5041 | != pat_ch) |
| @@ -4936,16 +5064,37 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4936 | } | 5064 | } |
| 4937 | else | 5065 | else |
| 4938 | { | 5066 | { |
| 4939 | do | 5067 | if (multibyte == target_multibyte) |
| 4940 | { | 5068 | do |
| 4941 | PREFETCH (); | 5069 | { |
| 4942 | if (*d++ != *p++) | 5070 | PREFETCH (); |
| 4943 | { | 5071 | if (*d++ != *p++) |
| 4944 | d = dfail; | 5072 | { |
| 4945 | goto fail; | 5073 | d = dfail; |
| 4946 | } | 5074 | goto fail; |
| 4947 | } | 5075 | } |
| 4948 | while (--mcnt); | 5076 | } |
| 5077 | while (--mcnt); | ||
| 5078 | else /* i.e. multibyte && ! target_multibyte */ | ||
| 5079 | do | ||
| 5080 | { | ||
| 5081 | int pat_charlen, buf_charlen; | ||
| 5082 | unsigned int pat_ch, buf_ch; | ||
| 5083 | |||
| 5084 | PREFETCH (); | ||
| 5085 | pat_ch = STRING_CHAR_AND_LENGTH (p, pend - p, pat_charlen); | ||
| 5086 | buf_ch = TARGET_CHAR_AND_LENGTH (d, dend - d, buf_charlen); | ||
| 5087 | |||
| 5088 | if (pat_ch != buf_ch) | ||
| 5089 | { | ||
| 5090 | d = dfail; | ||
| 5091 | goto fail; | ||
| 5092 | } | ||
| 5093 | p += pat_charlen; | ||
| 5094 | d += buf_charlen; | ||
| 5095 | mcnt -= pat_charlen; | ||
| 5096 | } | ||
| 5097 | while (mcnt > 0); | ||
| 4949 | } | 5098 | } |
| 4950 | break; | 5099 | break; |
| 4951 | 5100 | ||
| @@ -4959,7 +5108,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 4959 | DEBUG_PRINT1 ("EXECUTING anychar.\n"); | 5108 | DEBUG_PRINT1 ("EXECUTING anychar.\n"); |
| 4960 | 5109 | ||
| 4961 | PREFETCH (); | 5110 | PREFETCH (); |
| 4962 | buf_ch = RE_STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen); | 5111 | buf_ch = TARGET_CHAR_AND_LENGTH (d, dend - d, buf_charlen); |
| 4963 | buf_ch = TRANSLATE (buf_ch); | 5112 | buf_ch = TRANSLATE (buf_ch); |
| 4964 | 5113 | ||
| 4965 | if ((!(bufp->syntax & RE_DOT_NEWLINE) | 5114 | if ((!(bufp->syntax & RE_DOT_NEWLINE) |
| @@ -5003,10 +5152,10 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5003 | } | 5152 | } |
| 5004 | 5153 | ||
| 5005 | PREFETCH (); | 5154 | PREFETCH (); |
| 5006 | c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len); | 5155 | c = TARGET_CHAR_AND_LENGTH (d, dend - d, len); |
| 5007 | c = TRANSLATE (c); /* The character to match. */ | 5156 | c = TRANSLATE (c); /* The character to match. */ |
| 5008 | 5157 | ||
| 5009 | if (SINGLE_BYTE_CHAR_P (c)) | 5158 | if (! multibyte || IS_REAL_ASCII (c)) |
| 5010 | { /* Lookup bitmap. */ | 5159 | { /* Lookup bitmap. */ |
| 5011 | /* Cast to `unsigned' instead of `unsigned char' in | 5160 | /* Cast to `unsigned' instead of `unsigned char' in |
| 5012 | case the bit list is a full 32 bytes long. */ | 5161 | case the bit list is a full 32 bytes long. */ |
| @@ -5146,7 +5295,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5146 | /* Compare that many; failure if mismatch, else move | 5295 | /* Compare that many; failure if mismatch, else move |
| 5147 | past them. */ | 5296 | past them. */ |
| 5148 | if (RE_TRANSLATE_P (translate) | 5297 | if (RE_TRANSLATE_P (translate) |
| 5149 | ? bcmp_translate (d, d2, mcnt, translate, multibyte) | 5298 | ? bcmp_translate (d, d2, mcnt, translate, target_multibyte) |
| 5150 | : memcmp (d, d2, mcnt)) | 5299 | : memcmp (d, d2, mcnt)) |
| 5151 | { | 5300 | { |
| 5152 | d = dfail; | 5301 | d = dfail; |
| @@ -5169,7 +5318,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5169 | } | 5318 | } |
| 5170 | else | 5319 | else |
| 5171 | { | 5320 | { |
| 5172 | unsigned char c; | 5321 | unsigned c; |
| 5173 | GET_CHAR_BEFORE_2 (c, d, string1, end1, string2, end2); | 5322 | GET_CHAR_BEFORE_2 (c, d, string1, end1, string2, end2); |
| 5174 | if (c == '\n') | 5323 | if (c == '\n') |
| 5175 | break; | 5324 | break; |
| @@ -5421,6 +5570,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5421 | is the character at D, and S2 is the syntax of C2. */ | 5570 | is the character at D, and S2 is the syntax of C2. */ |
| 5422 | re_wchar_t c1, c2; | 5571 | re_wchar_t c1, c2; |
| 5423 | int s1, s2; | 5572 | int s1, s2; |
| 5573 | int dummy; | ||
| 5424 | #ifdef emacs | 5574 | #ifdef emacs |
| 5425 | int offset = PTR_TO_OFFSET (d - 1); | 5575 | int offset = PTR_TO_OFFSET (d - 1); |
| 5426 | int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); | 5576 | int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); |
| @@ -5432,7 +5582,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5432 | UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1); | 5582 | UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1); |
| 5433 | #endif | 5583 | #endif |
| 5434 | PREFETCH_NOLIMIT (); | 5584 | PREFETCH_NOLIMIT (); |
| 5435 | c2 = RE_STRING_CHAR (d, dend - d); | 5585 | c2 = TARGET_CHAR_AND_LENGTH (d, dend - d, dummy); |
| 5436 | s2 = SYNTAX (c2); | 5586 | s2 = SYNTAX (c2); |
| 5437 | 5587 | ||
| 5438 | if (/* Case 2: Only one of S1 and S2 is Sword. */ | 5588 | if (/* Case 2: Only one of S1 and S2 is Sword. */ |
| @@ -5461,13 +5611,14 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5461 | is the character at D, and S2 is the syntax of C2. */ | 5611 | is the character at D, and S2 is the syntax of C2. */ |
| 5462 | re_wchar_t c1, c2; | 5612 | re_wchar_t c1, c2; |
| 5463 | int s1, s2; | 5613 | int s1, s2; |
| 5614 | int dummy; | ||
| 5464 | #ifdef emacs | 5615 | #ifdef emacs |
| 5465 | int offset = PTR_TO_OFFSET (d); | 5616 | int offset = PTR_TO_OFFSET (d); |
| 5466 | int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); | 5617 | int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); |
| 5467 | UPDATE_SYNTAX_TABLE (charpos); | 5618 | UPDATE_SYNTAX_TABLE (charpos); |
| 5468 | #endif | 5619 | #endif |
| 5469 | PREFETCH (); | 5620 | PREFETCH (); |
| 5470 | c2 = RE_STRING_CHAR (d, dend - d); | 5621 | c2 = TARGET_CHAR_AND_LENGTH (d, dend - d, dummy); |
| 5471 | s2 = SYNTAX (c2); | 5622 | s2 = SYNTAX (c2); |
| 5472 | 5623 | ||
| 5473 | /* Case 2: S2 is not Sword. */ | 5624 | /* Case 2: S2 is not Sword. */ |
| @@ -5505,6 +5656,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5505 | is the character at D, and S2 is the syntax of C2. */ | 5656 | is the character at D, and S2 is the syntax of C2. */ |
| 5506 | re_wchar_t c1, c2; | 5657 | re_wchar_t c1, c2; |
| 5507 | int s1, s2; | 5658 | int s1, s2; |
| 5659 | int dummy; | ||
| 5508 | #ifdef emacs | 5660 | #ifdef emacs |
| 5509 | int offset = PTR_TO_OFFSET (d) - 1; | 5661 | int offset = PTR_TO_OFFSET (d) - 1; |
| 5510 | int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); | 5662 | int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); |
| @@ -5521,7 +5673,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5521 | if (!AT_STRINGS_END (d)) | 5673 | if (!AT_STRINGS_END (d)) |
| 5522 | { | 5674 | { |
| 5523 | PREFETCH_NOLIMIT (); | 5675 | PREFETCH_NOLIMIT (); |
| 5524 | c2 = RE_STRING_CHAR (d, dend - d); | 5676 | c2 = TARGET_CHAR_AND_LENGTH (d, dend - d, dummy); |
| 5525 | #ifdef emacs | 5677 | #ifdef emacs |
| 5526 | UPDATE_SYNTAX_TABLE_FORWARD (charpos); | 5678 | UPDATE_SYNTAX_TABLE_FORWARD (charpos); |
| 5527 | #endif | 5679 | #endif |
| @@ -5552,8 +5704,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5552 | int len; | 5704 | int len; |
| 5553 | re_wchar_t c; | 5705 | re_wchar_t c; |
| 5554 | 5706 | ||
| 5555 | c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len); | 5707 | c = TARGET_CHAR_AND_LENGTH (d, dend - d, len); |
| 5556 | |||
| 5557 | if ((SYNTAX (c) != (enum syntaxcode) mcnt) ^ not) | 5708 | if ((SYNTAX (c) != (enum syntaxcode) mcnt) ^ not) |
| 5558 | goto fail; | 5709 | goto fail; |
| 5559 | d += len; | 5710 | d += len; |
| @@ -5589,7 +5740,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5589 | int len; | 5740 | int len; |
| 5590 | re_wchar_t c; | 5741 | re_wchar_t c; |
| 5591 | 5742 | ||
| 5592 | c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len); | 5743 | c = TARGET_CHAR_AND_LENGTH (d, dend - d, len); |
| 5593 | 5744 | ||
| 5594 | if ((!CHAR_HAS_CATEGORY (c, mcnt)) ^ not) | 5745 | if ((!CHAR_HAS_CATEGORY (c, mcnt)) ^ not) |
| 5595 | goto fail; | 5746 | goto fail; |
| @@ -5665,11 +5816,11 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5665 | bytes; nonzero otherwise. */ | 5816 | bytes; nonzero otherwise. */ |
| 5666 | 5817 | ||
| 5667 | static int | 5818 | static int |
| 5668 | bcmp_translate (s1, s2, len, translate, multibyte) | 5819 | bcmp_translate (s1, s2, len, translate, target_multibyte) |
| 5669 | re_char *s1, *s2; | 5820 | re_char *s1, *s2; |
| 5670 | register int len; | 5821 | register int len; |
| 5671 | RE_TRANSLATE_TYPE translate; | 5822 | RE_TRANSLATE_TYPE translate; |
| 5672 | const int multibyte; | 5823 | const int target_multibyte; |
| 5673 | { | 5824 | { |
| 5674 | register re_char *p1 = s1, *p2 = s2; | 5825 | register re_char *p1 = s1, *p2 = s2; |
| 5675 | re_char *p1_end = s1 + len; | 5826 | re_char *p1_end = s1 + len; |
| @@ -5682,8 +5833,8 @@ bcmp_translate (s1, s2, len, translate, multibyte) | |||
| 5682 | int p1_charlen, p2_charlen; | 5833 | int p1_charlen, p2_charlen; |
| 5683 | re_wchar_t p1_ch, p2_ch; | 5834 | re_wchar_t p1_ch, p2_ch; |
| 5684 | 5835 | ||
| 5685 | p1_ch = RE_STRING_CHAR_AND_LENGTH (p1, p1_end - p1, p1_charlen); | 5836 | p1_ch = TARGET_CHAR_AND_LENGTH (p1, p1_end - p1, p1_charlen); |
| 5686 | p2_ch = RE_STRING_CHAR_AND_LENGTH (p2, p2_end - p2, p2_charlen); | 5837 | p2_ch = TARGET_CHAR_AND_LENGTH (p2, p2_end - p2, p2_charlen); |
| 5687 | 5838 | ||
| 5688 | if (RE_TRANSLATE (translate, p1_ch) | 5839 | if (RE_TRANSLATE (translate, p1_ch) |
| 5689 | != RE_TRANSLATE (translate, p2_ch)) | 5840 | != RE_TRANSLATE (translate, p2_ch)) |