aboutsummaryrefslogtreecommitdiffstats
path: root/lib/regexec.c
diff options
context:
space:
mode:
authorPaul Eggert2021-10-04 12:11:39 -0700
committerPaul Eggert2021-10-04 15:21:31 -0700
commit68a256c89270ef9e97bca6097967a9ed2b050f4a (patch)
tree7a3cca947c133bf7def967083f1054dfa4239322 /lib/regexec.c
parent63cb65dccecb1146cdad7134e4b62ea3e1433880 (diff)
downloademacs-68a256c89270ef9e97bca6097967a9ed2b050f4a.tar.gz
emacs-68a256c89270ef9e97bca6097967a9ed2b050f4a.zip
Update from Gnulib
Make the following changes by hand, and run 'admin/merge-gnulib'. * .gitignore: Add lib/malloc/*.gl.h. * admin/merge-gnulib: Copy lib/af_alg.h and lib/save-cwd.h directly from Gnulib, without worrying about Gnulib modules, as these files are special cases. (AVOIDED_MODULES): Remove malloc-posix. * lib/malloc.c, lib/realloc.c, m4/malloc.m4, m4/realloc.m4: * m4/year2038.m4: New files, copied from Gnulib. * lib/malloca.c, lib/malloca.h: * m4/close-stream.m4, m4/glibc21.m4, m4/malloca.m4: Remove. These are either no longer present in Gnulib, or are no longer needed by modules that Emacs uses. * oldXMenu/AddPane.c, oldXmenu/Addsel.c: Include XmenuInt.h first; needed for new Gnulib. * src/xmenu.c: Call emacs_abort, not abort.
Diffstat (limited to 'lib/regexec.c')
-rw-r--r--lib/regexec.c109
1 files changed, 61 insertions, 48 deletions
diff --git a/lib/regexec.c b/lib/regexec.c
index 15dc57bd0e6..83e9aaf8cad 100644
--- a/lib/regexec.c
+++ b/lib/regexec.c
@@ -4,16 +4,16 @@
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. 4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5 5
6 The GNU C Library is free software; you can redistribute it and/or 6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU General Public 7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either 8 License as published by the Free Software Foundation; either
9 version 3 of the License, or (at your option) any later version. 9 version 2.1 of the License, or (at your option) any later version.
10 10
11 The GNU C Library is distributed in the hope that it will be useful, 11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details. 14 Lesser General Public License for more details.
15 15
16 You should have received a copy of the GNU General Public 16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see 17 License along with the GNU C Library; if not, see
18 <https://www.gnu.org/licenses/>. */ 18 <https://www.gnu.org/licenses/>. */
19 19
@@ -59,7 +59,7 @@ static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
59 Idx cur_idx, Idx nmatch); 59 Idx cur_idx, Idx nmatch);
60static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs, 60static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
61 Idx str_idx, Idx dest_node, Idx nregs, 61 Idx str_idx, Idx dest_node, Idx nregs,
62 regmatch_t *regs, 62 regmatch_t *regs, regmatch_t *prevregs,
63 re_node_set *eps_via_nodes); 63 re_node_set *eps_via_nodes);
64static reg_errcode_t set_regs (const regex_t *preg, 64static reg_errcode_t set_regs (const regex_t *preg,
65 const re_match_context_t *mctx, 65 const re_match_context_t *mctx,
@@ -186,11 +186,12 @@ static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len);
186 REG_NOTBOL is set, then ^ does not match at the beginning of the 186 REG_NOTBOL is set, then ^ does not match at the beginning of the
187 string; if REG_NOTEOL is set, then $ does not match at the end. 187 string; if REG_NOTEOL is set, then $ does not match at the end.
188 188
189 We return 0 if we find a match and REG_NOMATCH if not. */ 189 Return 0 if a match is found, REG_NOMATCH if not, REG_BADPAT if
190 EFLAGS is invalid. */
190 191
191int 192int
192regexec (const regex_t *__restrict preg, const char *__restrict string, 193regexec (const regex_t *__restrict preg, const char *__restrict string,
193 size_t nmatch, regmatch_t pmatch[], int eflags) 194 size_t nmatch, regmatch_t pmatch[_REGEX_NELTS (nmatch)], int eflags)
194{ 195{
195 reg_errcode_t err; 196 reg_errcode_t err;
196 Idx start, length; 197 Idx start, length;
@@ -234,7 +235,7 @@ int
234attribute_compat_text_section 235attribute_compat_text_section
235__compat_regexec (const regex_t *__restrict preg, 236__compat_regexec (const regex_t *__restrict preg,
236 const char *__restrict string, size_t nmatch, 237 const char *__restrict string, size_t nmatch,
237 regmatch_t pmatch[], int eflags) 238 regmatch_t pmatch[_REGEX_NELTS (nmatch)], int eflags)
238{ 239{
239 return regexec (preg, string, nmatch, pmatch, 240 return regexec (preg, string, nmatch, pmatch,
240 eflags & (REG_NOTBOL | REG_NOTEOL)); 241 eflags & (REG_NOTBOL | REG_NOTEOL));
@@ -269,8 +270,8 @@ compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
269 strings.) 270 strings.)
270 271
271 On success, re_match* functions return the length of the match, re_search* 272 On success, re_match* functions return the length of the match, re_search*
272 return the position of the start of the match. Return value -1 means no 273 return the position of the start of the match. They return -1 on
273 match was found and -2 indicates an internal error. */ 274 match failure, -2 on error. */
274 275
275regoff_t 276regoff_t
276re_match (struct re_pattern_buffer *bufp, const char *string, Idx length, 277re_match (struct re_pattern_buffer *bufp, const char *string, Idx length,
@@ -1206,27 +1207,30 @@ check_halt_state_context (const re_match_context_t *mctx,
1206/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA 1207/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
1207 corresponding to the DFA). 1208 corresponding to the DFA).
1208 Return the destination node, and update EPS_VIA_NODES; 1209 Return the destination node, and update EPS_VIA_NODES;
1209 return -1 in case of errors. */ 1210 return -1 on match failure, -2 on error. */
1210 1211
1211static Idx 1212static Idx
1212proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs, 1213proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
1214 regmatch_t *prevregs,
1213 Idx *pidx, Idx node, re_node_set *eps_via_nodes, 1215 Idx *pidx, Idx node, re_node_set *eps_via_nodes,
1214 struct re_fail_stack_t *fs) 1216 struct re_fail_stack_t *fs)
1215{ 1217{
1216 const re_dfa_t *const dfa = mctx->dfa; 1218 const re_dfa_t *const dfa = mctx->dfa;
1217 Idx i;
1218 bool ok;
1219 if (IS_EPSILON_NODE (dfa->nodes[node].type)) 1219 if (IS_EPSILON_NODE (dfa->nodes[node].type))
1220 { 1220 {
1221 re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes; 1221 re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
1222 re_node_set *edests = &dfa->edests[node]; 1222 re_node_set *edests = &dfa->edests[node];
1223 Idx dest_node; 1223
1224 ok = re_node_set_insert (eps_via_nodes, node); 1224 if (! re_node_set_contains (eps_via_nodes, node))
1225 if (__glibc_unlikely (! ok)) 1225 {
1226 return -2; 1226 bool ok = re_node_set_insert (eps_via_nodes, node);
1227 /* Pick up a valid destination, or return -1 if none 1227 if (__glibc_unlikely (! ok))
1228 is found. */ 1228 return -2;
1229 for (dest_node = -1, i = 0; i < edests->nelem; ++i) 1229 }
1230
1231 /* Pick a valid destination, or return -1 if none is found. */
1232 Idx dest_node = -1;
1233 for (Idx i = 0; i < edests->nelem; i++)
1230 { 1234 {
1231 Idx candidate = edests->elems[i]; 1235 Idx candidate = edests->elems[i];
1232 if (!re_node_set_contains (cur_nodes, candidate)) 1236 if (!re_node_set_contains (cur_nodes, candidate))
@@ -1244,7 +1248,7 @@ proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
1244 /* Otherwise, push the second epsilon-transition on the fail stack. */ 1248 /* Otherwise, push the second epsilon-transition on the fail stack. */
1245 else if (fs != NULL 1249 else if (fs != NULL
1246 && push_fail_stack (fs, *pidx, candidate, nregs, regs, 1250 && push_fail_stack (fs, *pidx, candidate, nregs, regs,
1247 eps_via_nodes)) 1251 prevregs, eps_via_nodes))
1248 return -2; 1252 return -2;
1249 1253
1250 /* We know we are going to exit. */ 1254 /* We know we are going to exit. */
@@ -1288,7 +1292,7 @@ proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
1288 if (naccepted == 0) 1292 if (naccepted == 0)
1289 { 1293 {
1290 Idx dest_node; 1294 Idx dest_node;
1291 ok = re_node_set_insert (eps_via_nodes, node); 1295 bool ok = re_node_set_insert (eps_via_nodes, node);
1292 if (__glibc_unlikely (! ok)) 1296 if (__glibc_unlikely (! ok))
1293 return -2; 1297 return -2;
1294 dest_node = dfa->edests[node].elems[0]; 1298 dest_node = dfa->edests[node].elems[0];
@@ -1317,7 +1321,8 @@ proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
1317static reg_errcode_t 1321static reg_errcode_t
1318__attribute_warn_unused_result__ 1322__attribute_warn_unused_result__
1319push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node, 1323push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
1320 Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes) 1324 Idx nregs, regmatch_t *regs, regmatch_t *prevregs,
1325 re_node_set *eps_via_nodes)
1321{ 1326{
1322 reg_errcode_t err; 1327 reg_errcode_t err;
1323 Idx num = fs->num++; 1328 Idx num = fs->num++;
@@ -1333,25 +1338,30 @@ push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
1333 } 1338 }
1334 fs->stack[num].idx = str_idx; 1339 fs->stack[num].idx = str_idx;
1335 fs->stack[num].node = dest_node; 1340 fs->stack[num].node = dest_node;
1336 fs->stack[num].regs = re_malloc (regmatch_t, nregs); 1341 fs->stack[num].regs = re_malloc (regmatch_t, 2 * nregs);
1337 if (fs->stack[num].regs == NULL) 1342 if (fs->stack[num].regs == NULL)
1338 return REG_ESPACE; 1343 return REG_ESPACE;
1339 memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs); 1344 memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1345 memcpy (fs->stack[num].regs + nregs, prevregs, sizeof (regmatch_t) * nregs);
1340 err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes); 1346 err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1341 return err; 1347 return err;
1342} 1348}
1343 1349
1344static Idx 1350static Idx
1345pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs, 1351pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs,
1346 regmatch_t *regs, re_node_set *eps_via_nodes) 1352 regmatch_t *regs, regmatch_t *prevregs,
1353 re_node_set *eps_via_nodes)
1347{ 1354{
1355 if (fs == NULL || fs->num == 0)
1356 return -1;
1348 Idx num = --fs->num; 1357 Idx num = --fs->num;
1349 DEBUG_ASSERT (num >= 0);
1350 *pidx = fs->stack[num].idx; 1358 *pidx = fs->stack[num].idx;
1351 memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs); 1359 memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1360 memcpy (prevregs, fs->stack[num].regs + nregs, sizeof (regmatch_t) * nregs);
1352 re_node_set_free (eps_via_nodes); 1361 re_node_set_free (eps_via_nodes);
1353 re_free (fs->stack[num].regs); 1362 re_free (fs->stack[num].regs);
1354 *eps_via_nodes = fs->stack[num].eps_via_nodes; 1363 *eps_via_nodes = fs->stack[num].eps_via_nodes;
1364 DEBUG_ASSERT (0 <= fs->stack[num].node);
1355 return fs->stack[num].node; 1365 return fs->stack[num].node;
1356} 1366}
1357 1367
@@ -1407,33 +1417,32 @@ set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
1407 { 1417 {
1408 update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch); 1418 update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
1409 1419
1410 if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node) 1420 if ((idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
1421 || (fs && re_node_set_contains (&eps_via_nodes, cur_node)))
1411 { 1422 {
1412 Idx reg_idx; 1423 Idx reg_idx;
1424 cur_node = -1;
1413 if (fs) 1425 if (fs)
1414 { 1426 {
1415 for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) 1427 for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1416 if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1) 1428 if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
1417 break; 1429 {
1418 if (reg_idx == nmatch) 1430 cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1419 { 1431 prev_idx_match, &eps_via_nodes);
1420 re_node_set_free (&eps_via_nodes); 1432 break;
1421 regmatch_list_free (&prev_match); 1433 }
1422 return free_fail_stack_return (fs);
1423 }
1424 cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1425 &eps_via_nodes);
1426 } 1434 }
1427 else 1435 if (cur_node < 0)
1428 { 1436 {
1429 re_node_set_free (&eps_via_nodes); 1437 re_node_set_free (&eps_via_nodes);
1430 regmatch_list_free (&prev_match); 1438 regmatch_list_free (&prev_match);
1431 return REG_NOERROR; 1439 return free_fail_stack_return (fs);
1432 } 1440 }
1433 } 1441 }
1434 1442
1435 /* Proceed to next node. */ 1443 /* Proceed to next node. */
1436 cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node, 1444 cur_node = proceed_next_node (mctx, nmatch, pmatch, prev_idx_match,
1445 &idx, cur_node,
1437 &eps_via_nodes, fs); 1446 &eps_via_nodes, fs);
1438 1447
1439 if (__glibc_unlikely (cur_node < 0)) 1448 if (__glibc_unlikely (cur_node < 0))
@@ -1445,13 +1454,13 @@ set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
1445 free_fail_stack_return (fs); 1454 free_fail_stack_return (fs);
1446 return REG_ESPACE; 1455 return REG_ESPACE;
1447 } 1456 }
1448 if (fs) 1457 cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1449 cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, 1458 prev_idx_match, &eps_via_nodes);
1450 &eps_via_nodes); 1459 if (cur_node < 0)
1451 else
1452 { 1460 {
1453 re_node_set_free (&eps_via_nodes); 1461 re_node_set_free (&eps_via_nodes);
1454 regmatch_list_free (&prev_match); 1462 regmatch_list_free (&prev_match);
1463 free_fail_stack_return (fs);
1455 return REG_NOMATCH; 1464 return REG_NOMATCH;
1456 } 1465 }
1457 } 1466 }
@@ -1495,10 +1504,10 @@ update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
1495 } 1504 }
1496 else if (type == OP_CLOSE_SUBEXP) 1505 else if (type == OP_CLOSE_SUBEXP)
1497 { 1506 {
1507 /* We are at the last node of this sub expression. */
1498 Idx reg_num = dfa->nodes[cur_node].opr.idx + 1; 1508 Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
1499 if (reg_num < nmatch) 1509 if (reg_num < nmatch)
1500 { 1510 {
1501 /* We are at the last node of this sub expression. */
1502 if (pmatch[reg_num].rm_so < cur_idx) 1511 if (pmatch[reg_num].rm_so < cur_idx)
1503 { 1512 {
1504 pmatch[reg_num].rm_eo = cur_idx; 1513 pmatch[reg_num].rm_eo = cur_idx;
@@ -2195,6 +2204,7 @@ sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
2195 2204
2196/* Return the next state to which the current state STATE will transit by 2205/* Return the next state to which the current state STATE will transit by
2197 accepting the current input byte, and update STATE_LOG if necessary. 2206 accepting the current input byte, and update STATE_LOG if necessary.
2207 Return NULL on failure.
2198 If STATE can accept a multibyte char/collating element/back reference 2208 If STATE can accept a multibyte char/collating element/back reference
2199 update the destination of STATE_LOG. */ 2209 update the destination of STATE_LOG. */
2200 2210
@@ -2395,7 +2405,7 @@ check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
2395 2405
2396#if 0 2406#if 0
2397/* Return the next state to which the current state STATE will transit by 2407/* Return the next state to which the current state STATE will transit by
2398 accepting the current input byte. */ 2408 accepting the current input byte. Return NULL on failure. */
2399 2409
2400static re_dfastate_t * 2410static re_dfastate_t *
2401transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx, 2411transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
@@ -2817,7 +2827,8 @@ find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
2817/* Check whether the node TOP_NODE at TOP_STR can arrive to the node 2827/* Check whether the node TOP_NODE at TOP_STR can arrive to the node
2818 LAST_NODE at LAST_STR. We record the path onto PATH since it will be 2828 LAST_NODE at LAST_STR. We record the path onto PATH since it will be
2819 heavily reused. 2829 heavily reused.
2820 Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */ 2830 Return REG_NOERROR if it can arrive, REG_NOMATCH if it cannot,
2831 REG_ESPACE if memory is exhausted. */
2821 2832
2822static reg_errcode_t 2833static reg_errcode_t
2823__attribute_warn_unused_result__ 2834__attribute_warn_unused_result__
@@ -3433,7 +3444,8 @@ build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
3433/* Group all nodes belonging to STATE into several destinations. 3444/* Group all nodes belonging to STATE into several destinations.
3434 Then for all destinations, set the nodes belonging to the destination 3445 Then for all destinations, set the nodes belonging to the destination
3435 to DESTS_NODE[i] and set the characters accepted by the destination 3446 to DESTS_NODE[i] and set the characters accepted by the destination
3436 to DEST_CH[i]. This function return the number of destinations. */ 3447 to DEST_CH[i]. Return the number of destinations if successful,
3448 -1 on internal error. */
3437 3449
3438static Idx 3450static Idx
3439group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, 3451group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
@@ -4211,7 +4223,8 @@ match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx)
4211} 4223}
4212 4224
4213/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches 4225/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
4214 at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */ 4226 at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.
4227 Return the new entry if successful, NULL if memory is exhausted. */
4215 4228
4216static re_sub_match_last_t * 4229static re_sub_match_last_t *
4217match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx) 4230match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx)