aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNoam Postavsky2016-10-19 20:23:50 -0400
committerNoam Postavsky2016-10-21 22:24:54 -0400
commitad66b3fadb7ae22a4cbb82bb1507c39ceadf3897 (patch)
treebc3857bb1d0eeccfd16a0fb3e4d8cb44a9ebec56
parent5a26c9b0e1b0d9a2de35e0a8b0a803017e70def0 (diff)
downloademacs-ad66b3fadb7ae22a4cbb82bb1507c39ceadf3897.tar.gz
emacs-ad66b3fadb7ae22a4cbb82bb1507c39ceadf3897.zip
Fix handling of allocation in regex matching
`re_match_2_internal' uses pointers to the lisp objects that it searches. Since it may call malloc when growing the "fail stack", these pointers may be invalidated while searching, resulting in memory curruption (Bug #24358). To fix this, we check the pointer that the lisp object (as specified by re_match_object) points to before and after growing the stack, and update existing pointers accordingly. * src/regex.c (STR_BASE_PTR): New macro. (ENSURE_FAIL_STACK, re_search_2): Use it to convert pointers into offsets before possible malloc call, and back into pointers again afterwards. (POS_AS_IN_BUFFER): Add explanatory comment about punning trick. * src/search.c (search_buffer): Instead of storing search location as pointers, store them as pointers and recompute the corresponding address for each call to `re_search_2'. (string_match_1, fast_string_match_internal, fast_looking_at): * src/dired.c (directory_files_internal): Set `re_match_object' to Qnil after calling `re_search' or `re_match_2'. * src/regex.h (re_match_object): Mention new usage in commentary.
-rw-r--r--src/dired.c4
-rw-r--r--src/regex.c76
-rw-r--r--src/regex.h4
-rw-r--r--src/search.c36
4 files changed, 103 insertions, 17 deletions
diff --git a/src/dired.c b/src/dired.c
index dba575ce4c2..006f74c834d 100644
--- a/src/dired.c
+++ b/src/dired.c
@@ -259,9 +259,11 @@ directory_files_internal (Lisp_Object directory, Lisp_Object full,
259 QUIT; 259 QUIT;
260 260
261 bool wanted = (NILP (match) 261 bool wanted = (NILP (match)
262 || re_search (bufp, SSDATA (name), len, 0, len, 0) >= 0); 262 || (re_match_object = name,
263 re_search (bufp, SSDATA (name), len, 0, len, 0) >= 0));
263 264
264 immediate_quit = 0; 265 immediate_quit = 0;
266 re_match_object = Qnil; /* Stop protecting name from GC. */
265 267
266 if (wanted) 268 if (wanted)
267 { 269 {
diff --git a/src/regex.c b/src/regex.c
index 164eb4612ae..1346ef401cb 100644
--- a/src/regex.c
+++ b/src/regex.c
@@ -152,6 +152,8 @@
152 152
153/* Converts the pointer to the char to BEG-based offset from the start. */ 153/* Converts the pointer to the char to BEG-based offset from the start. */
154# define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d)) 154# define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d))
155/* Strings are 0-indexed, buffers are 1-indexed; we pun on the boolean
156 result to get the right base index. */
155# define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object))) 157# define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object)))
156 158
157# define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte) 159# define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte)
@@ -1436,11 +1438,62 @@ typedef struct
1436#define NEXT_FAILURE_HANDLE(h) fail_stack.stack[(h) - 3].integer 1438#define NEXT_FAILURE_HANDLE(h) fail_stack.stack[(h) - 3].integer
1437#define TOP_FAILURE_HANDLE() fail_stack.frame 1439#define TOP_FAILURE_HANDLE() fail_stack.frame
1438 1440
1441#ifdef emacs
1442#define STR_BASE_PTR(obj) \
1443 (NILP (obj) ? current_buffer->text->beg : \
1444 STRINGP (obj) ? SDATA (obj) : \
1445 NULL)
1446#else
1447#define STR_BASE_PTR(obj) NULL
1448#endif
1439 1449
1440#define ENSURE_FAIL_STACK(space) \ 1450#define ENSURE_FAIL_STACK(space) \
1441while (REMAINING_AVAIL_SLOTS <= space) { \ 1451while (REMAINING_AVAIL_SLOTS <= space) { \
1452 re_char* orig_base = STR_BASE_PTR (re_match_object); \
1453 ptrdiff_t string1_off, end1_off, end_match_1_off; \
1454 ptrdiff_t string2_off, end2_off, end_match_2_off; \
1455 ptrdiff_t d_off, dend_off, dfail_off; \
1456 if (orig_base) \
1457 { \
1458 if (string1) \
1459 { \
1460 string1_off = string1 - orig_base; \
1461 end1_off = end1 - orig_base; \
1462 end_match_1_off = end_match_1 - orig_base; \
1463 } \
1464 if (string2) \
1465 { \
1466 string2_off = string2 - orig_base; \
1467 end2_off = end2 - orig_base; \
1468 end_match_2_off = end_match_2 - orig_base; \
1469 } \
1470 d_off = d - orig_base; \
1471 dend_off = dend - orig_base; \
1472 dfail_off = dfail - orig_base; \
1473 } \
1442 if (!GROW_FAIL_STACK (fail_stack)) \ 1474 if (!GROW_FAIL_STACK (fail_stack)) \
1443 return -2; \ 1475 return -2; \
1476 /* GROW_FAIL_STACK may call malloc and relocate the string */ \
1477 /* pointers. */ \
1478 re_char* new_base = STR_BASE_PTR (re_match_object); \
1479 if (new_base && new_base != orig_base) \
1480 { \
1481 if (string1) \
1482 { \
1483 string1 = new_base + string1_off; \
1484 end1 = new_base + end1_off; \
1485 end_match_1 = new_base + end_match_1_off; \
1486 } \
1487 if (string2) \
1488 { \
1489 string2 = new_base + string2_off; \
1490 end2 = new_base + end2_off; \
1491 end_match_2 = new_base + end_match_2_off; \
1492 } \
1493 d = new_base + d_off; \
1494 dend = new_base + dend_off; \
1495 dfail = new_base + dfail_off; \
1496 } \
1444 DEBUG_PRINT ("\n Doubled stack; size now: %zd\n", (fail_stack).size);\ 1497 DEBUG_PRINT ("\n Doubled stack; size now: %zd\n", (fail_stack).size);\
1445 DEBUG_PRINT (" slots available: %zd\n", REMAINING_AVAIL_SLOTS);\ 1498 DEBUG_PRINT (" slots available: %zd\n", REMAINING_AVAIL_SLOTS);\
1446} 1499}
@@ -4443,6 +4496,16 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1,
4443 && !bufp->can_be_null) 4496 && !bufp->can_be_null)
4444 return -1; 4497 return -1;
4445 4498
4499 /* re_match_2_internal may allocate, causing a relocation of the
4500 lisp text object that we're searching. */
4501 ptrdiff_t offset1, offset2;
4502 re_char *orig_base = STR_BASE_PTR (re_match_object);
4503 if (orig_base)
4504 {
4505 if (string1) offset1 = string1 - orig_base;
4506 if (string2) offset2 = string2 - orig_base;
4507 }
4508
4446 val = re_match_2_internal (bufp, string1, size1, string2, size2, 4509 val = re_match_2_internal (bufp, string1, size1, string2, size2,
4447 startpos, regs, stop); 4510 startpos, regs, stop);
4448 4511
@@ -4452,6 +4515,13 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1,
4452 if (val == -2) 4515 if (val == -2)
4453 return -2; 4516 return -2;
4454 4517
4518 re_char *new_base = STR_BASE_PTR (re_match_object);
4519 if (new_base && new_base != orig_base)
4520 {
4521 if (string1) string1 = offset1 + new_base;
4522 if (string2) string2 = offset2 + new_base;
4523 }
4524
4455 advance: 4525 advance:
4456 if (!range) 4526 if (!range)
4457 break; 4527 break;
@@ -4887,8 +4957,8 @@ WEAK_ALIAS (__re_match, re_match)
4887#endif /* not emacs */ 4957#endif /* not emacs */
4888 4958
4889#ifdef emacs 4959#ifdef emacs
4890/* In Emacs, this is the string or buffer in which we 4960/* In Emacs, this is the string or buffer in which we are matching.
4891 are matching. It is used for looking up syntax properties. */ 4961 See the declaration in regex.h for details. */
4892Lisp_Object re_match_object; 4962Lisp_Object re_match_object;
4893#endif 4963#endif
4894 4964
diff --git a/src/regex.h b/src/regex.h
index 51f4424ce94..61c771c045f 100644
--- a/src/regex.h
+++ b/src/regex.h
@@ -169,7 +169,9 @@ extern reg_syntax_t re_syntax_options;
169#ifdef emacs 169#ifdef emacs
170# include "lisp.h" 170# include "lisp.h"
171/* In Emacs, this is the string or buffer in which we are matching. 171/* In Emacs, this is the string or buffer in which we are matching.
172 It is used for looking up syntax properties. 172 It is used for looking up syntax properties, and also to recompute
173 pointers in case the object is relocated as a side effect of
174 calling malloc (if it calls r_alloc_sbrk in ralloc.c).
173 175
174 If the value is a Lisp string object, we are matching text in that 176 If the value is a Lisp string object, we are matching text in that
175 string; if it's nil, we are matching text in the current buffer; if 177 string; if it's nil, we are matching text in the current buffer; if
diff --git a/src/search.c b/src/search.c
index dc7e2d88603..ec5a1d7733f 100644
--- a/src/search.c
+++ b/src/search.c
@@ -287,8 +287,10 @@ looking_at_1 (Lisp_Object string, bool posix)
287 immediate_quit = 1; 287 immediate_quit = 1;
288 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */ 288 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
289 289
290 /* Get pointers and sizes of the two strings 290 /* Get pointers and sizes of the two strings that make up the
291 that make up the visible portion of the buffer. */ 291 visible portion of the buffer. Note that we can use pointers
292 here, unlike in search_buffer, because we only call re_match_2
293 once, after which we never use the pointers again. */
292 294
293 p1 = BEGV_ADDR; 295 p1 = BEGV_ADDR;
294 s1 = GPT_BYTE - BEGV_BYTE; 296 s1 = GPT_BYTE - BEGV_BYTE;
@@ -407,6 +409,7 @@ string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
407 (NILP (Vinhibit_changing_match_data) 409 (NILP (Vinhibit_changing_match_data)
408 ? &search_regs : NULL)); 410 ? &search_regs : NULL));
409 immediate_quit = 0; 411 immediate_quit = 0;
412 re_match_object = Qnil; /* Stop protecting string from GC. */
410 413
411 /* Set last_thing_searched only when match data is changed. */ 414 /* Set last_thing_searched only when match data is changed. */
412 if (NILP (Vinhibit_changing_match_data)) 415 if (NILP (Vinhibit_changing_match_data))
@@ -477,6 +480,7 @@ fast_string_match_internal (Lisp_Object regexp, Lisp_Object string,
477 SBYTES (string), 0, 480 SBYTES (string), 0,
478 SBYTES (string), 0); 481 SBYTES (string), 0);
479 immediate_quit = 0; 482 immediate_quit = 0;
483 re_match_object = Qnil; /* Stop protecting string from GC. */
480 return val; 484 return val;
481} 485}
482 486
@@ -564,6 +568,7 @@ fast_looking_at (Lisp_Object regexp, ptrdiff_t pos, ptrdiff_t pos_byte,
564 len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2, 568 len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2,
565 pos_byte, NULL, limit_byte); 569 pos_byte, NULL, limit_byte);
566 immediate_quit = 0; 570 immediate_quit = 0;
571 re_match_object = Qnil; /* Stop protecting string from GC. */
567 572
568 return len; 573 return len;
569} 574}
@@ -1178,8 +1183,8 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
1178 1183
1179 if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp))) 1184 if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1180 { 1185 {
1181 unsigned char *p1, *p2; 1186 unsigned char *base;
1182 ptrdiff_t s1, s2; 1187 ptrdiff_t off1, off2, s1, s2;
1183 struct re_pattern_buffer *bufp; 1188 struct re_pattern_buffer *bufp;
1184 1189
1185 bufp = compile_pattern (string, 1190 bufp = compile_pattern (string,
@@ -1193,16 +1198,19 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
1193 can take too long. */ 1198 can take too long. */
1194 QUIT; /* Do a pending quit right away, 1199 QUIT; /* Do a pending quit right away,
1195 to avoid paradoxical behavior */ 1200 to avoid paradoxical behavior */
1196 /* Get pointers and sizes of the two strings 1201 /* Get offsets and sizes of the two strings that make up the
1197 that make up the visible portion of the buffer. */ 1202 visible portion of the buffer. We compute offsets instead of
1203 pointers because re_search_2 may call malloc and therefore
1204 change the buffer text address. */
1198 1205
1199 p1 = BEGV_ADDR; 1206 base = current_buffer->text->beg;
1207 off1 = BEGV_ADDR - base;
1200 s1 = GPT_BYTE - BEGV_BYTE; 1208 s1 = GPT_BYTE - BEGV_BYTE;
1201 p2 = GAP_END_ADDR; 1209 off2 = GAP_END_ADDR - base;
1202 s2 = ZV_BYTE - GPT_BYTE; 1210 s2 = ZV_BYTE - GPT_BYTE;
1203 if (s1 < 0) 1211 if (s1 < 0)
1204 { 1212 {
1205 p2 = p1; 1213 off2 = off1;
1206 s2 = ZV_BYTE - BEGV_BYTE; 1214 s2 = ZV_BYTE - BEGV_BYTE;
1207 s1 = 0; 1215 s1 = 0;
1208 } 1216 }
@@ -1217,7 +1225,9 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
1217 { 1225 {
1218 ptrdiff_t val; 1226 ptrdiff_t val;
1219 1227
1220 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2, 1228 val = re_search_2 (bufp,
1229 (char*) (base + off1), s1,
1230 (char*) (base + off2), s2,
1221 pos_byte - BEGV_BYTE, lim_byte - pos_byte, 1231 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1222 (NILP (Vinhibit_changing_match_data) 1232 (NILP (Vinhibit_changing_match_data)
1223 ? &search_regs : &search_regs_1), 1233 ? &search_regs : &search_regs_1),
@@ -1262,8 +1272,10 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
1262 { 1272 {
1263 ptrdiff_t val; 1273 ptrdiff_t val;
1264 1274
1265 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2, 1275 val = re_search_2 (bufp,
1266 pos_byte - BEGV_BYTE, lim_byte - pos_byte, 1276 (char*) (base + off1), s1,
1277 (char*) (base + off2), s2,
1278 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1267 (NILP (Vinhibit_changing_match_data) 1279 (NILP (Vinhibit_changing_match_data)
1268 ? &search_regs : &search_regs_1), 1280 ? &search_regs : &search_regs_1),
1269 lim_byte - BEGV_BYTE); 1281 lim_byte - BEGV_BYTE);