diff options
| author | Paul Eggert | 2021-12-18 16:12:38 -0800 |
|---|---|---|
| committer | Paul Eggert | 2021-12-18 16:15:49 -0800 |
| commit | f05a93e8232e6f56458ac16d733b03e96a63e930 (patch) | |
| tree | b6b95198cf4b5e0b2969b7a427db1c5146a7d856 /lib/regexec.c | |
| parent | 35da3ed05212e0222841becf614c109011f9ad80 (diff) | |
| download | emacs-f05a93e8232e6f56458ac16d733b03e96a63e930.tar.gz emacs-f05a93e8232e6f56458ac16d733b03e96a63e930.zip | |
Update from gnulib
Make the following changes by hand, and run ‘admin/merge-gnulib’.
* configure.ac (AM_CONDITIONAL): Adjust to new Gnulib convention.
Diffstat (limited to 'lib/regexec.c')
| -rw-r--r-- | lib/regexec.c | 84 |
1 files changed, 20 insertions, 64 deletions
diff --git a/lib/regexec.c b/lib/regexec.c index 83e9aaf8cad..3196708373f 100644 --- a/lib/regexec.c +++ b/lib/regexec.c | |||
| @@ -67,11 +67,9 @@ static reg_errcode_t set_regs (const regex_t *preg, | |||
| 67 | bool fl_backtrack); | 67 | bool fl_backtrack); |
| 68 | static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs); | 68 | static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs); |
| 69 | 69 | ||
| 70 | #ifdef RE_ENABLE_I18N | ||
| 71 | static int sift_states_iter_mb (const re_match_context_t *mctx, | 70 | static int sift_states_iter_mb (const re_match_context_t *mctx, |
| 72 | re_sift_context_t *sctx, | 71 | re_sift_context_t *sctx, |
| 73 | Idx node_idx, Idx str_idx, Idx max_str_idx); | 72 | Idx node_idx, Idx str_idx, Idx max_str_idx); |
| 74 | #endif /* RE_ENABLE_I18N */ | ||
| 75 | static reg_errcode_t sift_states_backward (const re_match_context_t *mctx, | 73 | static reg_errcode_t sift_states_backward (const re_match_context_t *mctx, |
| 76 | re_sift_context_t *sctx); | 74 | re_sift_context_t *sctx); |
| 77 | static reg_errcode_t build_sifted_states (const re_match_context_t *mctx, | 75 | static reg_errcode_t build_sifted_states (const re_match_context_t *mctx, |
| @@ -123,10 +121,8 @@ static re_dfastate_t *transit_state_sb (reg_errcode_t *err, | |||
| 123 | re_match_context_t *mctx, | 121 | re_match_context_t *mctx, |
| 124 | re_dfastate_t *pstate); | 122 | re_dfastate_t *pstate); |
| 125 | #endif | 123 | #endif |
| 126 | #ifdef RE_ENABLE_I18N | ||
| 127 | static reg_errcode_t transit_state_mb (re_match_context_t *mctx, | 124 | static reg_errcode_t transit_state_mb (re_match_context_t *mctx, |
| 128 | re_dfastate_t *pstate); | 125 | re_dfastate_t *pstate); |
| 129 | #endif /* RE_ENABLE_I18N */ | ||
| 130 | static reg_errcode_t transit_state_bkref (re_match_context_t *mctx, | 126 | static reg_errcode_t transit_state_bkref (re_match_context_t *mctx, |
| 131 | const re_node_set *nodes); | 127 | const re_node_set *nodes); |
| 132 | static reg_errcode_t get_subexp (re_match_context_t *mctx, | 128 | static reg_errcode_t get_subexp (re_match_context_t *mctx, |
| @@ -156,14 +152,12 @@ static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx, | |||
| 156 | re_node_set *cur_nodes, Idx cur_str, | 152 | re_node_set *cur_nodes, Idx cur_str, |
| 157 | Idx subexp_num, int type); | 153 | Idx subexp_num, int type); |
| 158 | static bool build_trtable (const re_dfa_t *dfa, re_dfastate_t *state); | 154 | static bool build_trtable (const re_dfa_t *dfa, re_dfastate_t *state); |
| 159 | #ifdef RE_ENABLE_I18N | ||
| 160 | static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, | 155 | static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, |
| 161 | const re_string_t *input, Idx idx); | 156 | const re_string_t *input, Idx idx); |
| 162 | # ifdef _LIBC | 157 | #ifdef _LIBC |
| 163 | static unsigned int find_collation_sequence_value (const unsigned char *mbs, | 158 | static unsigned int find_collation_sequence_value (const unsigned char *mbs, |
| 164 | size_t name_len); | 159 | size_t name_len); |
| 165 | # endif /* _LIBC */ | 160 | #endif |
| 166 | #endif /* RE_ENABLE_I18N */ | ||
| 167 | static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa, | 161 | static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa, |
| 168 | const re_dfastate_t *state, | 162 | const re_dfastate_t *state, |
| 169 | re_node_set *states_node, | 163 | re_node_set *states_node, |
| @@ -758,10 +752,9 @@ re_search_internal (const regex_t *preg, const char *string, Idx length, | |||
| 758 | 752 | ||
| 759 | offset = match_first - mctx.input.raw_mbs_idx; | 753 | offset = match_first - mctx.input.raw_mbs_idx; |
| 760 | } | 754 | } |
| 761 | /* If MATCH_FIRST is out of the buffer, leave it as '\0'. | 755 | /* Use buffer byte if OFFSET is in buffer, otherwise '\0'. */ |
| 762 | Note that MATCH_FIRST must not be smaller than 0. */ | 756 | ch = (offset < mctx.input.valid_len |
| 763 | ch = (match_first >= length | 757 | ? re_string_byte_at (&mctx.input, offset) : 0); |
| 764 | ? 0 : re_string_byte_at (&mctx.input, offset)); | ||
| 765 | if (fastmap[ch]) | 758 | if (fastmap[ch]) |
| 766 | break; | 759 | break; |
| 767 | match_first += incr; | 760 | match_first += incr; |
| @@ -780,12 +773,10 @@ re_search_internal (const regex_t *preg, const char *string, Idx length, | |||
| 780 | if (__glibc_unlikely (err != REG_NOERROR)) | 773 | if (__glibc_unlikely (err != REG_NOERROR)) |
| 781 | goto free_return; | 774 | goto free_return; |
| 782 | 775 | ||
| 783 | #ifdef RE_ENABLE_I18N | 776 | /* Don't consider this char as a possible match start if it part, |
| 784 | /* Don't consider this char as a possible match start if it part, | 777 | yet isn't the head, of a multibyte character. */ |
| 785 | yet isn't the head, of a multibyte character. */ | ||
| 786 | if (!sb && !re_string_first_byte (&mctx.input, 0)) | 778 | if (!sb && !re_string_first_byte (&mctx.input, 0)) |
| 787 | continue; | 779 | continue; |
| 788 | #endif | ||
| 789 | 780 | ||
| 790 | /* It seems to be appropriate one, then use the matcher. */ | 781 | /* It seems to be appropriate one, then use the matcher. */ |
| 791 | /* We assume that the matching starts from 0. */ | 782 | /* We assume that the matching starts from 0. */ |
| @@ -859,7 +850,6 @@ re_search_internal (const regex_t *preg, const char *string, Idx length, | |||
| 859 | for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) | 850 | for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) |
| 860 | if (pmatch[reg_idx].rm_so != -1) | 851 | if (pmatch[reg_idx].rm_so != -1) |
| 861 | { | 852 | { |
| 862 | #ifdef RE_ENABLE_I18N | ||
| 863 | if (__glibc_unlikely (mctx.input.offsets_needed != 0)) | 853 | if (__glibc_unlikely (mctx.input.offsets_needed != 0)) |
| 864 | { | 854 | { |
| 865 | pmatch[reg_idx].rm_so = | 855 | pmatch[reg_idx].rm_so = |
| @@ -871,9 +861,6 @@ re_search_internal (const regex_t *preg, const char *string, Idx length, | |||
| 871 | ? mctx.input.valid_raw_len | 861 | ? mctx.input.valid_raw_len |
| 872 | : mctx.input.offsets[pmatch[reg_idx].rm_eo]); | 862 | : mctx.input.offsets[pmatch[reg_idx].rm_eo]); |
| 873 | } | 863 | } |
| 874 | #else | ||
| 875 | DEBUG_ASSERT (mctx.input.offsets_needed == 0); | ||
| 876 | #endif | ||
| 877 | pmatch[reg_idx].rm_so += match_first; | 864 | pmatch[reg_idx].rm_so += match_first; |
| 878 | pmatch[reg_idx].rm_eo += match_first; | 865 | pmatch[reg_idx].rm_eo += match_first; |
| 879 | } | 866 | } |
| @@ -997,8 +984,7 @@ prune_impossible_nodes (re_match_context_t *mctx) | |||
| 997 | We must select appropriate initial state depending on the context, | 984 | We must select appropriate initial state depending on the context, |
| 998 | since initial states may have constraints like "\<", "^", etc.. */ | 985 | since initial states may have constraints like "\<", "^", etc.. */ |
| 999 | 986 | ||
| 1000 | static inline re_dfastate_t * | 987 | static __always_inline re_dfastate_t * |
| 1001 | __attribute__ ((always_inline)) | ||
| 1002 | acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx, | 988 | acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx, |
| 1003 | Idx idx) | 989 | Idx idx) |
| 1004 | { | 990 | { |
| @@ -1262,12 +1248,9 @@ proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs, | |||
| 1262 | Idx naccepted = 0; | 1248 | Idx naccepted = 0; |
| 1263 | re_token_type_t type = dfa->nodes[node].type; | 1249 | re_token_type_t type = dfa->nodes[node].type; |
| 1264 | 1250 | ||
| 1265 | #ifdef RE_ENABLE_I18N | ||
| 1266 | if (dfa->nodes[node].accept_mb) | 1251 | if (dfa->nodes[node].accept_mb) |
| 1267 | naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx); | 1252 | naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx); |
| 1268 | else | 1253 | else if (type == OP_BACK_REF) |
| 1269 | #endif /* RE_ENABLE_I18N */ | ||
| 1270 | if (type == OP_BACK_REF) | ||
| 1271 | { | 1254 | { |
| 1272 | Idx subexp_idx = dfa->nodes[node].opr.idx + 1; | 1255 | Idx subexp_idx = dfa->nodes[node].opr.idx + 1; |
| 1273 | if (subexp_idx < nregs) | 1256 | if (subexp_idx < nregs) |
| @@ -1635,12 +1618,10 @@ build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx, | |||
| 1635 | bool ok; | 1618 | bool ok; |
| 1636 | DEBUG_ASSERT (!IS_EPSILON_NODE (dfa->nodes[prev_node].type)); | 1619 | DEBUG_ASSERT (!IS_EPSILON_NODE (dfa->nodes[prev_node].type)); |
| 1637 | 1620 | ||
| 1638 | #ifdef RE_ENABLE_I18N | ||
| 1639 | /* If the node may accept "multi byte". */ | 1621 | /* If the node may accept "multi byte". */ |
| 1640 | if (dfa->nodes[prev_node].accept_mb) | 1622 | if (dfa->nodes[prev_node].accept_mb) |
| 1641 | naccepted = sift_states_iter_mb (mctx, sctx, prev_node, | 1623 | naccepted = sift_states_iter_mb (mctx, sctx, prev_node, |
| 1642 | str_idx, sctx->last_str_idx); | 1624 | str_idx, sctx->last_str_idx); |
| 1643 | #endif /* RE_ENABLE_I18N */ | ||
| 1644 | 1625 | ||
| 1645 | /* We don't check backreferences here. | 1626 | /* We don't check backreferences here. |
| 1646 | See update_cur_sifted_state(). */ | 1627 | See update_cur_sifted_state(). */ |
| @@ -1689,6 +1670,7 @@ clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx) | |||
| 1689 | 1670 | ||
| 1690 | if (top < next_state_log_idx) | 1671 | if (top < next_state_log_idx) |
| 1691 | { | 1672 | { |
| 1673 | DEBUG_ASSERT (mctx->state_log != NULL); | ||
| 1692 | memset (mctx->state_log + top + 1, '\0', | 1674 | memset (mctx->state_log + top + 1, '\0', |
| 1693 | sizeof (re_dfastate_t *) * (next_state_log_idx - top)); | 1675 | sizeof (re_dfastate_t *) * (next_state_log_idx - top)); |
| 1694 | mctx->state_log_top = next_state_log_idx; | 1676 | mctx->state_log_top = next_state_log_idx; |
| @@ -2177,7 +2159,6 @@ sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx, | |||
| 2177 | } | 2159 | } |
| 2178 | 2160 | ||
| 2179 | 2161 | ||
| 2180 | #ifdef RE_ENABLE_I18N | ||
| 2181 | static int | 2162 | static int |
| 2182 | sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx, | 2163 | sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx, |
| 2183 | Idx node_idx, Idx str_idx, Idx max_str_idx) | 2164 | Idx node_idx, Idx str_idx, Idx max_str_idx) |
| @@ -2197,8 +2178,6 @@ sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx, | |||
| 2197 | 'naccepted' bytes input. */ | 2178 | 'naccepted' bytes input. */ |
| 2198 | return naccepted; | 2179 | return naccepted; |
| 2199 | } | 2180 | } |
| 2200 | #endif /* RE_ENABLE_I18N */ | ||
| 2201 | |||
| 2202 | 2181 | ||
| 2203 | /* Functions for state transition. */ | 2182 | /* Functions for state transition. */ |
| 2204 | 2183 | ||
| @@ -2216,7 +2195,6 @@ transit_state (reg_errcode_t *err, re_match_context_t *mctx, | |||
| 2216 | re_dfastate_t **trtable; | 2195 | re_dfastate_t **trtable; |
| 2217 | unsigned char ch; | 2196 | unsigned char ch; |
| 2218 | 2197 | ||
| 2219 | #ifdef RE_ENABLE_I18N | ||
| 2220 | /* If the current state can accept multibyte. */ | 2198 | /* If the current state can accept multibyte. */ |
| 2221 | if (__glibc_unlikely (state->accept_mb)) | 2199 | if (__glibc_unlikely (state->accept_mb)) |
| 2222 | { | 2200 | { |
| @@ -2224,7 +2202,6 @@ transit_state (reg_errcode_t *err, re_match_context_t *mctx, | |||
| 2224 | if (__glibc_unlikely (*err != REG_NOERROR)) | 2202 | if (__glibc_unlikely (*err != REG_NOERROR)) |
| 2225 | return NULL; | 2203 | return NULL; |
| 2226 | } | 2204 | } |
| 2227 | #endif /* RE_ENABLE_I18N */ | ||
| 2228 | 2205 | ||
| 2229 | /* Then decide the next state with the single byte. */ | 2206 | /* Then decide the next state with the single byte. */ |
| 2230 | #if 0 | 2207 | #if 0 |
| @@ -2445,7 +2422,6 @@ transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx, | |||
| 2445 | } | 2422 | } |
| 2446 | #endif | 2423 | #endif |
| 2447 | 2424 | ||
| 2448 | #ifdef RE_ENABLE_I18N | ||
| 2449 | static reg_errcode_t | 2425 | static reg_errcode_t |
| 2450 | transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate) | 2426 | transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate) |
| 2451 | { | 2427 | { |
| @@ -2513,7 +2489,6 @@ transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate) | |||
| 2513 | } | 2489 | } |
| 2514 | return REG_NOERROR; | 2490 | return REG_NOERROR; |
| 2515 | } | 2491 | } |
| 2516 | #endif /* RE_ENABLE_I18N */ | ||
| 2517 | 2492 | ||
| 2518 | static reg_errcode_t | 2493 | static reg_errcode_t |
| 2519 | transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes) | 2494 | transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes) |
| @@ -3003,9 +2978,7 @@ check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx, | |||
| 3003 | const re_dfa_t *const dfa = mctx->dfa; | 2978 | const re_dfa_t *const dfa = mctx->dfa; |
| 3004 | bool ok; | 2979 | bool ok; |
| 3005 | Idx cur_idx; | 2980 | Idx cur_idx; |
| 3006 | #ifdef RE_ENABLE_I18N | ||
| 3007 | reg_errcode_t err = REG_NOERROR; | 2981 | reg_errcode_t err = REG_NOERROR; |
| 3008 | #endif | ||
| 3009 | re_node_set union_set; | 2982 | re_node_set union_set; |
| 3010 | re_node_set_init_empty (&union_set); | 2983 | re_node_set_init_empty (&union_set); |
| 3011 | for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx) | 2984 | for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx) |
| @@ -3014,7 +2987,6 @@ check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx, | |||
| 3014 | Idx cur_node = cur_nodes->elems[cur_idx]; | 2987 | Idx cur_node = cur_nodes->elems[cur_idx]; |
| 3015 | DEBUG_ASSERT (!IS_EPSILON_NODE (dfa->nodes[cur_node].type)); | 2988 | DEBUG_ASSERT (!IS_EPSILON_NODE (dfa->nodes[cur_node].type)); |
| 3016 | 2989 | ||
| 3017 | #ifdef RE_ENABLE_I18N | ||
| 3018 | /* If the node may accept "multi byte". */ | 2990 | /* If the node may accept "multi byte". */ |
| 3019 | if (dfa->nodes[cur_node].accept_mb) | 2991 | if (dfa->nodes[cur_node].accept_mb) |
| 3020 | { | 2992 | { |
| @@ -3052,7 +3024,7 @@ check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx, | |||
| 3052 | } | 3024 | } |
| 3053 | } | 3025 | } |
| 3054 | } | 3026 | } |
| 3055 | #endif /* RE_ENABLE_I18N */ | 3027 | |
| 3056 | if (naccepted | 3028 | if (naccepted |
| 3057 | || check_node_accept (mctx, dfa->nodes + cur_node, str_idx)) | 3029 | || check_node_accept (mctx, dfa->nodes + cur_node, str_idx)) |
| 3058 | { | 3030 | { |
| @@ -3476,18 +3448,15 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, | |||
| 3476 | } | 3448 | } |
| 3477 | else if (type == OP_PERIOD) | 3449 | else if (type == OP_PERIOD) |
| 3478 | { | 3450 | { |
| 3479 | #ifdef RE_ENABLE_I18N | ||
| 3480 | if (dfa->mb_cur_max > 1) | 3451 | if (dfa->mb_cur_max > 1) |
| 3481 | bitset_merge (accepts, dfa->sb_char); | 3452 | bitset_merge (accepts, dfa->sb_char); |
| 3482 | else | 3453 | else |
| 3483 | #endif | ||
| 3484 | bitset_set_all (accepts); | 3454 | bitset_set_all (accepts); |
| 3485 | if (!(dfa->syntax & RE_DOT_NEWLINE)) | 3455 | if (!(dfa->syntax & RE_DOT_NEWLINE)) |
| 3486 | bitset_clear (accepts, '\n'); | 3456 | bitset_clear (accepts, '\n'); |
| 3487 | if (dfa->syntax & RE_DOT_NOT_NULL) | 3457 | if (dfa->syntax & RE_DOT_NOT_NULL) |
| 3488 | bitset_clear (accepts, '\0'); | 3458 | bitset_clear (accepts, '\0'); |
| 3489 | } | 3459 | } |
| 3490 | #ifdef RE_ENABLE_I18N | ||
| 3491 | else if (type == OP_UTF8_PERIOD) | 3460 | else if (type == OP_UTF8_PERIOD) |
| 3492 | { | 3461 | { |
| 3493 | if (ASCII_CHARS % BITSET_WORD_BITS == 0) | 3462 | if (ASCII_CHARS % BITSET_WORD_BITS == 0) |
| @@ -3499,7 +3468,6 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, | |||
| 3499 | if (dfa->syntax & RE_DOT_NOT_NULL) | 3468 | if (dfa->syntax & RE_DOT_NOT_NULL) |
| 3500 | bitset_clear (accepts, '\0'); | 3469 | bitset_clear (accepts, '\0'); |
| 3501 | } | 3470 | } |
| 3502 | #endif | ||
| 3503 | else | 3471 | else |
| 3504 | continue; | 3472 | continue; |
| 3505 | 3473 | ||
| @@ -3530,12 +3498,10 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, | |||
| 3530 | bitset_empty (accepts); | 3498 | bitset_empty (accepts); |
| 3531 | continue; | 3499 | continue; |
| 3532 | } | 3500 | } |
| 3533 | #ifdef RE_ENABLE_I18N | ||
| 3534 | if (dfa->mb_cur_max > 1) | 3501 | if (dfa->mb_cur_max > 1) |
| 3535 | for (j = 0; j < BITSET_WORDS; ++j) | 3502 | for (j = 0; j < BITSET_WORDS; ++j) |
| 3536 | any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j])); | 3503 | any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j])); |
| 3537 | else | 3504 | else |
| 3538 | #endif | ||
| 3539 | for (j = 0; j < BITSET_WORDS; ++j) | 3505 | for (j = 0; j < BITSET_WORDS; ++j) |
| 3540 | any_set |= (accepts[j] &= dfa->word_char[j]); | 3506 | any_set |= (accepts[j] &= dfa->word_char[j]); |
| 3541 | if (!any_set) | 3507 | if (!any_set) |
| @@ -3549,12 +3515,10 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, | |||
| 3549 | bitset_empty (accepts); | 3515 | bitset_empty (accepts); |
| 3550 | continue; | 3516 | continue; |
| 3551 | } | 3517 | } |
| 3552 | #ifdef RE_ENABLE_I18N | ||
| 3553 | if (dfa->mb_cur_max > 1) | 3518 | if (dfa->mb_cur_max > 1) |
| 3554 | for (j = 0; j < BITSET_WORDS; ++j) | 3519 | for (j = 0; j < BITSET_WORDS; ++j) |
| 3555 | any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j])); | 3520 | any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j])); |
| 3556 | else | 3521 | else |
| 3557 | #endif | ||
| 3558 | for (j = 0; j < BITSET_WORDS; ++j) | 3522 | for (j = 0; j < BITSET_WORDS; ++j) |
| 3559 | any_set |= (accepts[j] &= ~dfa->word_char[j]); | 3523 | any_set |= (accepts[j] &= ~dfa->word_char[j]); |
| 3560 | if (!any_set) | 3524 | if (!any_set) |
| @@ -3631,7 +3595,6 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, | |||
| 3631 | return -1; | 3595 | return -1; |
| 3632 | } | 3596 | } |
| 3633 | 3597 | ||
| 3634 | #ifdef RE_ENABLE_I18N | ||
| 3635 | /* Check how many bytes the node 'dfa->nodes[node_idx]' accepts. | 3598 | /* Check how many bytes the node 'dfa->nodes[node_idx]' accepts. |
| 3636 | Return the number of the bytes the node accepts. | 3599 | Return the number of the bytes the node accepts. |
| 3637 | STR_IDX is the current index of the input string. | 3600 | STR_IDX is the current index of the input string. |
| @@ -3640,9 +3603,9 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, | |||
| 3640 | one collating element like '.', '[a-z]', opposite to the other nodes | 3603 | one collating element like '.', '[a-z]', opposite to the other nodes |
| 3641 | can only accept one byte. */ | 3604 | can only accept one byte. */ |
| 3642 | 3605 | ||
| 3643 | # ifdef _LIBC | 3606 | #ifdef _LIBC |
| 3644 | # include <locale/weight.h> | 3607 | # include <locale/weight.h> |
| 3645 | # endif | 3608 | #endif |
| 3646 | 3609 | ||
| 3647 | static int | 3610 | static int |
| 3648 | check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, | 3611 | check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, |
| @@ -3726,12 +3689,12 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, | |||
| 3726 | if (node->type == COMPLEX_BRACKET) | 3689 | if (node->type == COMPLEX_BRACKET) |
| 3727 | { | 3690 | { |
| 3728 | const re_charset_t *cset = node->opr.mbcset; | 3691 | const re_charset_t *cset = node->opr.mbcset; |
| 3729 | # ifdef _LIBC | 3692 | #ifdef _LIBC |
| 3730 | const unsigned char *pin | 3693 | const unsigned char *pin |
| 3731 | = ((const unsigned char *) re_string_get_buffer (input) + str_idx); | 3694 | = ((const unsigned char *) re_string_get_buffer (input) + str_idx); |
| 3732 | Idx j; | 3695 | Idx j; |
| 3733 | uint32_t nrules; | 3696 | uint32_t nrules; |
| 3734 | # endif /* _LIBC */ | 3697 | #endif |
| 3735 | int match_len = 0; | 3698 | int match_len = 0; |
| 3736 | wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars) | 3699 | wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars) |
| 3737 | ? re_string_wchar_at (input, str_idx) : 0); | 3700 | ? re_string_wchar_at (input, str_idx) : 0); |
| @@ -3754,7 +3717,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, | |||
| 3754 | } | 3717 | } |
| 3755 | } | 3718 | } |
| 3756 | 3719 | ||
| 3757 | # ifdef _LIBC | 3720 | #ifdef _LIBC |
| 3758 | nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); | 3721 | nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); |
| 3759 | if (nrules != 0) | 3722 | if (nrules != 0) |
| 3760 | { | 3723 | { |
| @@ -3843,7 +3806,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, | |||
| 3843 | } | 3806 | } |
| 3844 | } | 3807 | } |
| 3845 | else | 3808 | else |
| 3846 | # endif /* _LIBC */ | 3809 | #endif /* _LIBC */ |
| 3847 | { | 3810 | { |
| 3848 | /* match with range expression? */ | 3811 | /* match with range expression? */ |
| 3849 | for (i = 0; i < cset->nranges; ++i) | 3812 | for (i = 0; i < cset->nranges; ++i) |
| @@ -3869,7 +3832,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, | |||
| 3869 | return 0; | 3832 | return 0; |
| 3870 | } | 3833 | } |
| 3871 | 3834 | ||
| 3872 | # ifdef _LIBC | 3835 | #ifdef _LIBC |
| 3873 | static unsigned int | 3836 | static unsigned int |
| 3874 | find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len) | 3837 | find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len) |
| 3875 | { | 3838 | { |
| @@ -3927,8 +3890,7 @@ find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len) | |||
| 3927 | return UINT_MAX; | 3890 | return UINT_MAX; |
| 3928 | } | 3891 | } |
| 3929 | } | 3892 | } |
| 3930 | # endif /* _LIBC */ | 3893 | #endif /* _LIBC */ |
| 3931 | #endif /* RE_ENABLE_I18N */ | ||
| 3932 | 3894 | ||
| 3933 | /* Check whether the node accepts the byte which is IDX-th | 3895 | /* Check whether the node accepts the byte which is IDX-th |
| 3934 | byte of the INPUT. */ | 3896 | byte of the INPUT. */ |
| @@ -3951,12 +3913,10 @@ check_node_accept (const re_match_context_t *mctx, const re_token_t *node, | |||
| 3951 | return false; | 3913 | return false; |
| 3952 | break; | 3914 | break; |
| 3953 | 3915 | ||
| 3954 | #ifdef RE_ENABLE_I18N | ||
| 3955 | case OP_UTF8_PERIOD: | 3916 | case OP_UTF8_PERIOD: |
| 3956 | if (ch >= ASCII_CHARS) | 3917 | if (ch >= ASCII_CHARS) |
| 3957 | return false; | 3918 | return false; |
| 3958 | FALLTHROUGH; | 3919 | FALLTHROUGH; |
| 3959 | #endif | ||
| 3960 | case OP_PERIOD: | 3920 | case OP_PERIOD: |
| 3961 | if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE)) | 3921 | if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE)) |
| 3962 | || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL))) | 3922 | || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL))) |
| @@ -4017,7 +3977,6 @@ extend_buffers (re_match_context_t *mctx, int min_len) | |||
| 4017 | /* Then reconstruct the buffers. */ | 3977 | /* Then reconstruct the buffers. */ |
| 4018 | if (pstr->icase) | 3978 | if (pstr->icase) |
| 4019 | { | 3979 | { |
| 4020 | #ifdef RE_ENABLE_I18N | ||
| 4021 | if (pstr->mb_cur_max > 1) | 3980 | if (pstr->mb_cur_max > 1) |
| 4022 | { | 3981 | { |
| 4023 | ret = build_wcs_upper_buffer (pstr); | 3982 | ret = build_wcs_upper_buffer (pstr); |
| @@ -4025,16 +3984,13 @@ extend_buffers (re_match_context_t *mctx, int min_len) | |||
| 4025 | return ret; | 3984 | return ret; |
| 4026 | } | 3985 | } |
| 4027 | else | 3986 | else |
| 4028 | #endif /* RE_ENABLE_I18N */ | ||
| 4029 | build_upper_buffer (pstr); | 3987 | build_upper_buffer (pstr); |
| 4030 | } | 3988 | } |
| 4031 | else | 3989 | else |
| 4032 | { | 3990 | { |
| 4033 | #ifdef RE_ENABLE_I18N | ||
| 4034 | if (pstr->mb_cur_max > 1) | 3991 | if (pstr->mb_cur_max > 1) |
| 4035 | build_wcs_buffer (pstr); | 3992 | build_wcs_buffer (pstr); |
| 4036 | else | 3993 | else |
| 4037 | #endif /* RE_ENABLE_I18N */ | ||
| 4038 | { | 3994 | { |
| 4039 | if (pstr->trans != NULL) | 3995 | if (pstr->trans != NULL) |
| 4040 | re_string_translate_buffer (pstr); | 3996 | re_string_translate_buffer (pstr); |