aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMattias Engdegård2020-09-27 14:28:07 +0200
committerMattias Engdegård2020-09-27 14:28:07 +0200
commit8bd233a7eb6bc4709d0adf0577d30aaf167e75bf (patch)
tree4b5ba2e1390afd42c033b977a110a28a4ff5df1a
parent583cb264ad386a3767a05233f0e50b788bcf31ac (diff)
downloademacs-8bd233a7eb6bc4709d0adf0577d30aaf167e75bf.tar.gz
emacs-8bd233a7eb6bc4709d0adf0577d30aaf167e75bf.zip
Minor string-search optimisations (bug#43598)
* src/fns.c (Fstring_search): Perform cheap all-ASCII checks before more expensive ones. Use a faster loop when searching for non-ASCII non-raw bytes. * test/src/fns-tests.el (string-search): Add more test cases.
-rw-r--r--src/fns.c59
-rw-r--r--test/src/fns-tests.el22
2 files changed, 47 insertions, 34 deletions
diff --git a/src/fns.c b/src/fns.c
index 0f768711544..f626fe11b20 100644
--- a/src/fns.c
+++ b/src/fns.c
@@ -5457,16 +5457,11 @@ It should not be used for anything security-related. See
5457static bool 5457static bool
5458string_ascii_p (Lisp_Object string) 5458string_ascii_p (Lisp_Object string)
5459{ 5459{
5460 if (STRING_MULTIBYTE (string)) 5460 ptrdiff_t nbytes = SBYTES (string);
5461 return SBYTES (string) == SCHARS (string); 5461 for (ptrdiff_t i = 0; i < nbytes; i++)
5462 else 5462 if (SREF (string, i) > 127)
5463 { 5463 return false;
5464 ptrdiff_t nbytes = SBYTES (string); 5464 return true;
5465 for (ptrdiff_t i = 0; i < nbytes; i++)
5466 if (SREF (string, i) > 127)
5467 return false;
5468 return true;
5469 }
5470} 5465}
5471 5466
5472DEFUN ("string-search", Fstring_search, Sstring_search, 2, 3, 0, 5467DEFUN ("string-search", Fstring_search, Sstring_search, 2, 3, 0,
@@ -5505,9 +5500,14 @@ Case is always significant and text properties are ignored. */)
5505 haystart = SSDATA (haystack) + start_byte; 5500 haystart = SSDATA (haystack) + start_byte;
5506 haybytes = SBYTES (haystack) - start_byte; 5501 haybytes = SBYTES (haystack) - start_byte;
5507 5502
5508 if (STRING_MULTIBYTE (haystack) == STRING_MULTIBYTE (needle) 5503 /* We can do a direct byte-string search if both strings have the
5509 || string_ascii_p (needle) 5504 same multibyteness, or if at least one of them consists of ASCII
5510 || string_ascii_p (haystack)) 5505 characters only. */
5506 if (STRING_MULTIBYTE (haystack)
5507 ? (STRING_MULTIBYTE (needle)
5508 || SCHARS (haystack) == SBYTES (haystack) || string_ascii_p (needle))
5509 : (!STRING_MULTIBYTE (needle)
5510 || SCHARS (needle) == SBYTES (needle) || string_ascii_p (haystack)))
5511 res = memmem (haystart, haybytes, 5511 res = memmem (haystart, haybytes,
5512 SSDATA (needle), SBYTES (needle)); 5512 SSDATA (needle), SBYTES (needle));
5513 else if (STRING_MULTIBYTE (haystack)) /* unibyte needle */ 5513 else if (STRING_MULTIBYTE (haystack)) /* unibyte needle */
@@ -5521,26 +5521,21 @@ Case is always significant and text properties are ignored. */)
5521 /* The only possible way we can find the multibyte needle in the 5521 /* The only possible way we can find the multibyte needle in the
5522 unibyte stack (since we know that neither are pure-ASCII) is 5522 unibyte stack (since we know that neither are pure-ASCII) is
5523 if they contain "raw bytes" (and no other non-ASCII chars.) */ 5523 if they contain "raw bytes" (and no other non-ASCII chars.) */
5524 ptrdiff_t chars = SCHARS (needle); 5524 ptrdiff_t nbytes = SBYTES (needle);
5525 const unsigned char *src = SDATA (needle); 5525 for (ptrdiff_t i = 0; i < nbytes; i++)
5526 5526 {
5527 for (ptrdiff_t i = 0; i < chars; i++) 5527 int c = SREF (needle, i);
5528 { 5528 if (CHAR_BYTE8_HEAD_P (c))
5529 int c = string_char_advance (&src); 5529 i++; /* Skip raw byte. */
5530 5530 else if (!ASCII_CHAR_P (c))
5531 if (!CHAR_BYTE8_P (c) 5531 return Qnil; /* Found a char that can't be in the haystack. */
5532 && !ASCII_CHAR_P (c)) 5532 }
5533 /* Found a char that can't be in the haystack. */
5534 return Qnil;
5535 }
5536 5533
5537 { 5534 /* "Raw bytes" (aka eighth-bit) are represented differently in
5538 /* "Raw bytes" (aka eighth-bit) are represented differently in 5535 multibyte and unibyte strings. */
5539 multibyte and unibyte strings. */ 5536 Lisp_Object uni_needle = Fstring_to_unibyte (needle);
5540 Lisp_Object uni_needle = Fstring_to_unibyte (needle); 5537 res = memmem (haystart, haybytes,
5541 res = memmem (haystart, haybytes, 5538 SSDATA (uni_needle), SBYTES (uni_needle));
5542 SSDATA (uni_needle), SBYTES (uni_needle));
5543 }
5544 } 5539 }
5545 5540
5546 if (! res) 5541 if (! res)
diff --git a/test/src/fns-tests.el b/test/src/fns-tests.el
index 41969f2af2c..d3c22f966e6 100644
--- a/test/src/fns-tests.el
+++ b/test/src/fns-tests.el
@@ -913,6 +913,7 @@
913 (should (equal (string-search "ab\0" "ab") nil)) 913 (should (equal (string-search "ab\0" "ab") nil))
914 (should (equal (string-search "ab" "abababab" 3) 4)) 914 (should (equal (string-search "ab" "abababab" 3) 4))
915 (should (equal (string-search "ab" "ababac" 3) nil)) 915 (should (equal (string-search "ab" "ababac" 3) nil))
916 (should (equal (string-search "aaa" "aa") nil))
916 (let ((case-fold-search t)) 917 (let ((case-fold-search t))
917 (should (equal (string-search "ab" "AB") nil))) 918 (should (equal (string-search "ab" "AB") nil)))
918 919
@@ -936,14 +937,16 @@
936 (should (equal (string-search (string-to-multibyte "\377") "ab\377c") 2)) 937 (should (equal (string-search (string-to-multibyte "\377") "ab\377c") 2))
937 (should (equal (string-search "\303" "aøb") nil)) 938 (should (equal (string-search "\303" "aøb") nil))
938 (should (equal (string-search "\270" "aøb") nil)) 939 (should (equal (string-search "\270" "aøb") nil))
939 ;; This test currently fails, but it shouldn't! 940 (should (equal (string-search "ø" "\303\270") nil))
940 ;;(should (equal (string-search "ø" "\303\270") nil)) 941
942 (should (equal (string-search "a\U00010f98z" "a\U00010f98a\U00010f98z") 2))
941 943
942 (should-error (string-search "a" "abc" -1)) 944 (should-error (string-search "a" "abc" -1))
943 (should-error (string-search "a" "abc" 4)) 945 (should-error (string-search "a" "abc" 4))
944 (should-error (string-search "a" "abc" 100000000000)) 946 (should-error (string-search "a" "abc" 100000000000))
945 947
946 (should (equal (string-search "a" "aaa" 3) nil)) 948 (should (equal (string-search "a" "aaa" 3) nil))
949 (should (equal (string-search "aa" "aa" 1) nil))
947 (should (equal (string-search "\0" "") nil)) 950 (should (equal (string-search "\0" "") nil))
948 951
949 (should (equal (string-search "" "") 0)) 952 (should (equal (string-search "" "") 0))
@@ -955,6 +958,21 @@
955 (should-error (string-search "" "abc" -1)) 958 (should-error (string-search "" "abc" -1))
956 959
957 (should-not (string-search "ø" "foo\303\270")) 960 (should-not (string-search "ø" "foo\303\270"))
961 (should-not (string-search "\303\270" "ø"))
962 (should-not (string-search "\370" "ø"))
963 (should-not (string-search (string-to-multibyte "\370") "ø"))
964 (should-not (string-search "ø" "\370"))
965 (should-not (string-search "ø" (string-to-multibyte "\370")))
966 (should-not (string-search "\303\270" "\370"))
967 (should-not (string-search (string-to-multibyte "\303\270") "\370"))
968 (should-not (string-search "\303\270" (string-to-multibyte "\370")))
969 (should-not (string-search (string-to-multibyte "\303\270")
970 (string-to-multibyte "\370")))
971 (should-not (string-search "\370" "\303\270"))
972 (should-not (string-search (string-to-multibyte "\370") "\303\270"))
973 (should-not (string-search "\370" (string-to-multibyte "\303\270")))
974 (should-not (string-search (string-to-multibyte "\370")
975 (string-to-multibyte "\303\270")))
958 (should (equal (string-search (string-to-multibyte "o\303\270") "foo\303\270") 976 (should (equal (string-search (string-to-multibyte "o\303\270") "foo\303\270")
959 2)) 977 2))
960 (should (equal (string-search "\303\270" "foo\303\270") 3))) 978 (should (equal (string-search "\303\270" "foo\303\270") 3)))