aboutsummaryrefslogtreecommitdiffstats
path: root/src/regex.c
diff options
context:
space:
mode:
authorNoam Postavsky2016-10-19 20:23:50 -0400
committerNoam Postavsky2016-10-21 22:24:54 -0400
commitad66b3fadb7ae22a4cbb82bb1507c39ceadf3897 (patch)
treebc3857bb1d0eeccfd16a0fb3e4d8cb44a9ebec56 /src/regex.c
parent5a26c9b0e1b0d9a2de35e0a8b0a803017e70def0 (diff)
downloademacs-ad66b3fadb7ae22a4cbb82bb1507c39ceadf3897.tar.gz
emacs-ad66b3fadb7ae22a4cbb82bb1507c39ceadf3897.zip
Fix handling of allocation in regex matching
`re_match_2_internal' uses pointers to the lisp objects that it searches. Since it may call malloc when growing the "fail stack", these pointers may be invalidated while searching, resulting in memory curruption (Bug #24358). To fix this, we check the pointer that the lisp object (as specified by re_match_object) points to before and after growing the stack, and update existing pointers accordingly. * src/regex.c (STR_BASE_PTR): New macro. (ENSURE_FAIL_STACK, re_search_2): Use it to convert pointers into offsets before possible malloc call, and back into pointers again afterwards. (POS_AS_IN_BUFFER): Add explanatory comment about punning trick. * src/search.c (search_buffer): Instead of storing search location as pointers, store them as pointers and recompute the corresponding address for each call to `re_search_2'. (string_match_1, fast_string_match_internal, fast_looking_at): * src/dired.c (directory_files_internal): Set `re_match_object' to Qnil after calling `re_search' or `re_match_2'. * src/regex.h (re_match_object): Mention new usage in commentary.
Diffstat (limited to 'src/regex.c')
-rw-r--r--src/regex.c76
1 files changed, 73 insertions, 3 deletions
diff --git a/src/regex.c b/src/regex.c
index 164eb4612ae..1346ef401cb 100644
--- a/src/regex.c
+++ b/src/regex.c
@@ -152,6 +152,8 @@
152 152
153/* Converts the pointer to the char to BEG-based offset from the start. */ 153/* Converts the pointer to the char to BEG-based offset from the start. */
154# define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d)) 154# define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d))
155/* Strings are 0-indexed, buffers are 1-indexed; we pun on the boolean
156 result to get the right base index. */
155# define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object))) 157# define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object)))
156 158
157# define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte) 159# define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte)
@@ -1436,11 +1438,62 @@ typedef struct
1436#define NEXT_FAILURE_HANDLE(h) fail_stack.stack[(h) - 3].integer 1438#define NEXT_FAILURE_HANDLE(h) fail_stack.stack[(h) - 3].integer
1437#define TOP_FAILURE_HANDLE() fail_stack.frame 1439#define TOP_FAILURE_HANDLE() fail_stack.frame
1438 1440
1441#ifdef emacs
1442#define STR_BASE_PTR(obj) \
1443 (NILP (obj) ? current_buffer->text->beg : \
1444 STRINGP (obj) ? SDATA (obj) : \
1445 NULL)
1446#else
1447#define STR_BASE_PTR(obj) NULL
1448#endif
1439 1449
1440#define ENSURE_FAIL_STACK(space) \ 1450#define ENSURE_FAIL_STACK(space) \
1441while (REMAINING_AVAIL_SLOTS <= space) { \ 1451while (REMAINING_AVAIL_SLOTS <= space) { \
1452 re_char* orig_base = STR_BASE_PTR (re_match_object); \
1453 ptrdiff_t string1_off, end1_off, end_match_1_off; \
1454 ptrdiff_t string2_off, end2_off, end_match_2_off; \
1455 ptrdiff_t d_off, dend_off, dfail_off; \
1456 if (orig_base) \
1457 { \
1458 if (string1) \
1459 { \
1460 string1_off = string1 - orig_base; \
1461 end1_off = end1 - orig_base; \
1462 end_match_1_off = end_match_1 - orig_base; \
1463 } \
1464 if (string2) \
1465 { \
1466 string2_off = string2 - orig_base; \
1467 end2_off = end2 - orig_base; \
1468 end_match_2_off = end_match_2 - orig_base; \
1469 } \
1470 d_off = d - orig_base; \
1471 dend_off = dend - orig_base; \
1472 dfail_off = dfail - orig_base; \
1473 } \
1442 if (!GROW_FAIL_STACK (fail_stack)) \ 1474 if (!GROW_FAIL_STACK (fail_stack)) \
1443 return -2; \ 1475 return -2; \
1476 /* GROW_FAIL_STACK may call malloc and relocate the string */ \
1477 /* pointers. */ \
1478 re_char* new_base = STR_BASE_PTR (re_match_object); \
1479 if (new_base && new_base != orig_base) \
1480 { \
1481 if (string1) \
1482 { \
1483 string1 = new_base + string1_off; \
1484 end1 = new_base + end1_off; \
1485 end_match_1 = new_base + end_match_1_off; \
1486 } \
1487 if (string2) \
1488 { \
1489 string2 = new_base + string2_off; \
1490 end2 = new_base + end2_off; \
1491 end_match_2 = new_base + end_match_2_off; \
1492 } \
1493 d = new_base + d_off; \
1494 dend = new_base + dend_off; \
1495 dfail = new_base + dfail_off; \
1496 } \
1444 DEBUG_PRINT ("\n Doubled stack; size now: %zd\n", (fail_stack).size);\ 1497 DEBUG_PRINT ("\n Doubled stack; size now: %zd\n", (fail_stack).size);\
1445 DEBUG_PRINT (" slots available: %zd\n", REMAINING_AVAIL_SLOTS);\ 1498 DEBUG_PRINT (" slots available: %zd\n", REMAINING_AVAIL_SLOTS);\
1446} 1499}
@@ -4443,6 +4496,16 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1,
4443 && !bufp->can_be_null) 4496 && !bufp->can_be_null)
4444 return -1; 4497 return -1;
4445 4498
4499 /* re_match_2_internal may allocate, causing a relocation of the
4500 lisp text object that we're searching. */
4501 ptrdiff_t offset1, offset2;
4502 re_char *orig_base = STR_BASE_PTR (re_match_object);
4503 if (orig_base)
4504 {
4505 if (string1) offset1 = string1 - orig_base;
4506 if (string2) offset2 = string2 - orig_base;
4507 }
4508
4446 val = re_match_2_internal (bufp, string1, size1, string2, size2, 4509 val = re_match_2_internal (bufp, string1, size1, string2, size2,
4447 startpos, regs, stop); 4510 startpos, regs, stop);
4448 4511
@@ -4452,6 +4515,13 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1,
4452 if (val == -2) 4515 if (val == -2)
4453 return -2; 4516 return -2;
4454 4517
4518 re_char *new_base = STR_BASE_PTR (re_match_object);
4519 if (new_base && new_base != orig_base)
4520 {
4521 if (string1) string1 = offset1 + new_base;
4522 if (string2) string2 = offset2 + new_base;
4523 }
4524
4455 advance: 4525 advance:
4456 if (!range) 4526 if (!range)
4457 break; 4527 break;
@@ -4887,8 +4957,8 @@ WEAK_ALIAS (__re_match, re_match)
4887#endif /* not emacs */ 4957#endif /* not emacs */
4888 4958
4889#ifdef emacs 4959#ifdef emacs
4890/* In Emacs, this is the string or buffer in which we 4960/* In Emacs, this is the string or buffer in which we are matching.
4891 are matching. It is used for looking up syntax properties. */ 4961 See the declaration in regex.h for details. */
4892Lisp_Object re_match_object; 4962Lisp_Object re_match_object;
4893#endif 4963#endif
4894 4964