aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorLars Ingebrigtsen2020-09-27 02:01:03 +0200
committerLars Ingebrigtsen2020-09-27 02:01:03 +0200
commit8c569683f2ee5d14040f5605fd0570b2eb009c05 (patch)
tree913a8be96d0b7057797ab7b394fba184cdd1b2f8 /src
parent8a148c5976e3fad53d540ce5aa52a36c6b658f85 (diff)
downloademacs-8c569683f2ee5d14040f5605fd0570b2eb009c05.tar.gz
emacs-8c569683f2ee5d14040f5605fd0570b2eb009c05.zip
Fix searching for multibyte needles in unibyte haystacks
* src/fns.c (Fstring_search): Make this work better when searching unibyte haystacks for multibyte needles (bug#43598).
Diffstat (limited to 'src')
-rw-r--r--src/fns.c45
1 files changed, 41 insertions, 4 deletions
diff --git a/src/fns.c b/src/fns.c
index 2fcc282dcb3..0f768711544 100644
--- a/src/fns.c
+++ b/src/fns.c
@@ -5454,6 +5454,21 @@ It should not be used for anything security-related. See
5454 return make_digest_string (digest, SHA1_DIGEST_SIZE); 5454 return make_digest_string (digest, SHA1_DIGEST_SIZE);
5455} 5455}
5456 5456
5457static bool
5458string_ascii_p (Lisp_Object string)
5459{
5460 if (STRING_MULTIBYTE (string))
5461 return SBYTES (string) == SCHARS (string);
5462 else
5463 {
5464 ptrdiff_t nbytes = SBYTES (string);
5465 for (ptrdiff_t i = 0; i < nbytes; i++)
5466 if (SREF (string, i) > 127)
5467 return false;
5468 return true;
5469 }
5470}
5471
5457DEFUN ("string-search", Fstring_search, Sstring_search, 2, 3, 0, 5472DEFUN ("string-search", Fstring_search, Sstring_search, 2, 3, 0,
5458 doc: /* Search for the string NEEDLE in the string HAYSTACK. 5473 doc: /* Search for the string NEEDLE in the string HAYSTACK.
5459The return value is the position of the first occurrence of NEEDLE in 5474The return value is the position of the first occurrence of NEEDLE in
@@ -5490,7 +5505,9 @@ Case is always significant and text properties are ignored. */)
5490 haystart = SSDATA (haystack) + start_byte; 5505 haystart = SSDATA (haystack) + start_byte;
5491 haybytes = SBYTES (haystack) - start_byte; 5506 haybytes = SBYTES (haystack) - start_byte;
5492 5507
5493 if (STRING_MULTIBYTE (haystack) == STRING_MULTIBYTE (needle)) 5508 if (STRING_MULTIBYTE (haystack) == STRING_MULTIBYTE (needle)
5509 || string_ascii_p (needle)
5510 || string_ascii_p (haystack))
5494 res = memmem (haystart, haybytes, 5511 res = memmem (haystart, haybytes,
5495 SSDATA (needle), SBYTES (needle)); 5512 SSDATA (needle), SBYTES (needle));
5496 else if (STRING_MULTIBYTE (haystack)) /* unibyte needle */ 5513 else if (STRING_MULTIBYTE (haystack)) /* unibyte needle */
@@ -5501,9 +5518,29 @@ Case is always significant and text properties are ignored. */)
5501 } 5518 }
5502 else /* unibyte haystack, multibyte needle */ 5519 else /* unibyte haystack, multibyte needle */
5503 { 5520 {
5504 Lisp_Object uni_needle = Fstring_as_unibyte (needle); 5521 /* The only possible way we can find the multibyte needle in the
5505 res = memmem (haystart, haybytes, 5522 unibyte stack (since we know that neither are pure-ASCII) is
5506 SSDATA (uni_needle), SBYTES (uni_needle)); 5523 if they contain "raw bytes" (and no other non-ASCII chars.) */
5524 ptrdiff_t chars = SCHARS (needle);
5525 const unsigned char *src = SDATA (needle);
5526
5527 for (ptrdiff_t i = 0; i < chars; i++)
5528 {
5529 int c = string_char_advance (&src);
5530
5531 if (!CHAR_BYTE8_P (c)
5532 && !ASCII_CHAR_P (c))
5533 /* Found a char that can't be in the haystack. */
5534 return Qnil;
5535 }
5536
5537 {
5538 /* "Raw bytes" (aka eighth-bit) are represented differently in
5539 multibyte and unibyte strings. */
5540 Lisp_Object uni_needle = Fstring_to_unibyte (needle);
5541 res = memmem (haystart, haybytes,
5542 SSDATA (uni_needle), SBYTES (uni_needle));
5543 }
5507 } 5544 }
5508 5545
5509 if (! res) 5546 if (! res)