diff options
| author | Mattias Engdegård | 2020-09-27 14:28:07 +0200 |
|---|---|---|
| committer | Mattias Engdegård | 2020-09-27 14:28:07 +0200 |
| commit | 8bd233a7eb6bc4709d0adf0577d30aaf167e75bf (patch) | |
| tree | 4b5ba2e1390afd42c033b977a110a28a4ff5df1a | |
| parent | 583cb264ad386a3767a05233f0e50b788bcf31ac (diff) | |
| download | emacs-8bd233a7eb6bc4709d0adf0577d30aaf167e75bf.tar.gz emacs-8bd233a7eb6bc4709d0adf0577d30aaf167e75bf.zip | |
Minor string-search optimisations (bug#43598)
* src/fns.c (Fstring_search): Perform cheap all-ASCII checks before more
expensive ones. Use a faster loop when searching for non-ASCII
non-raw bytes.
* test/src/fns-tests.el (string-search): Add more test cases.
| -rw-r--r-- | src/fns.c | 59 | ||||
| -rw-r--r-- | test/src/fns-tests.el | 22 |
2 files changed, 47 insertions, 34 deletions
| @@ -5457,16 +5457,11 @@ It should not be used for anything security-related. See | |||
| 5457 | static bool | 5457 | static bool |
| 5458 | string_ascii_p (Lisp_Object string) | 5458 | string_ascii_p (Lisp_Object string) |
| 5459 | { | 5459 | { |
| 5460 | if (STRING_MULTIBYTE (string)) | 5460 | ptrdiff_t nbytes = SBYTES (string); |
| 5461 | return SBYTES (string) == SCHARS (string); | 5461 | for (ptrdiff_t i = 0; i < nbytes; i++) |
| 5462 | else | 5462 | if (SREF (string, i) > 127) |
| 5463 | { | 5463 | return false; |
| 5464 | ptrdiff_t nbytes = SBYTES (string); | 5464 | return true; |
| 5465 | for (ptrdiff_t i = 0; i < nbytes; i++) | ||
| 5466 | if (SREF (string, i) > 127) | ||
| 5467 | return false; | ||
| 5468 | return true; | ||
| 5469 | } | ||
| 5470 | } | 5465 | } |
| 5471 | 5466 | ||
| 5472 | DEFUN ("string-search", Fstring_search, Sstring_search, 2, 3, 0, | 5467 | DEFUN ("string-search", Fstring_search, Sstring_search, 2, 3, 0, |
| @@ -5505,9 +5500,14 @@ Case is always significant and text properties are ignored. */) | |||
| 5505 | haystart = SSDATA (haystack) + start_byte; | 5500 | haystart = SSDATA (haystack) + start_byte; |
| 5506 | haybytes = SBYTES (haystack) - start_byte; | 5501 | haybytes = SBYTES (haystack) - start_byte; |
| 5507 | 5502 | ||
| 5508 | if (STRING_MULTIBYTE (haystack) == STRING_MULTIBYTE (needle) | 5503 | /* We can do a direct byte-string search if both strings have the |
| 5509 | || string_ascii_p (needle) | 5504 | same multibyteness, or if at least one of them consists of ASCII |
| 5510 | || string_ascii_p (haystack)) | 5505 | characters only. */ |
| 5506 | if (STRING_MULTIBYTE (haystack) | ||
| 5507 | ? (STRING_MULTIBYTE (needle) | ||
| 5508 | || SCHARS (haystack) == SBYTES (haystack) || string_ascii_p (needle)) | ||
| 5509 | : (!STRING_MULTIBYTE (needle) | ||
| 5510 | || SCHARS (needle) == SBYTES (needle) || string_ascii_p (haystack))) | ||
| 5511 | res = memmem (haystart, haybytes, | 5511 | res = memmem (haystart, haybytes, |
| 5512 | SSDATA (needle), SBYTES (needle)); | 5512 | SSDATA (needle), SBYTES (needle)); |
| 5513 | else if (STRING_MULTIBYTE (haystack)) /* unibyte needle */ | 5513 | else if (STRING_MULTIBYTE (haystack)) /* unibyte needle */ |
| @@ -5521,26 +5521,21 @@ Case is always significant and text properties are ignored. */) | |||
| 5521 | /* The only possible way we can find the multibyte needle in the | 5521 | /* The only possible way we can find the multibyte needle in the |
| 5522 | unibyte stack (since we know that neither are pure-ASCII) is | 5522 | unibyte stack (since we know that neither are pure-ASCII) is |
| 5523 | if they contain "raw bytes" (and no other non-ASCII chars.) */ | 5523 | if they contain "raw bytes" (and no other non-ASCII chars.) */ |
| 5524 | ptrdiff_t chars = SCHARS (needle); | 5524 | ptrdiff_t nbytes = SBYTES (needle); |
| 5525 | const unsigned char *src = SDATA (needle); | 5525 | for (ptrdiff_t i = 0; i < nbytes; i++) |
| 5526 | 5526 | { | |
| 5527 | for (ptrdiff_t i = 0; i < chars; i++) | 5527 | int c = SREF (needle, i); |
| 5528 | { | 5528 | if (CHAR_BYTE8_HEAD_P (c)) |
| 5529 | int c = string_char_advance (&src); | 5529 | i++; /* Skip raw byte. */ |
| 5530 | 5530 | else if (!ASCII_CHAR_P (c)) | |
| 5531 | if (!CHAR_BYTE8_P (c) | 5531 | return Qnil; /* Found a char that can't be in the haystack. */ |
| 5532 | && !ASCII_CHAR_P (c)) | 5532 | } |
| 5533 | /* Found a char that can't be in the haystack. */ | ||
| 5534 | return Qnil; | ||
| 5535 | } | ||
| 5536 | 5533 | ||
| 5537 | { | 5534 | /* "Raw bytes" (aka eighth-bit) are represented differently in |
| 5538 | /* "Raw bytes" (aka eighth-bit) are represented differently in | 5535 | multibyte and unibyte strings. */ |
| 5539 | multibyte and unibyte strings. */ | 5536 | Lisp_Object uni_needle = Fstring_to_unibyte (needle); |
| 5540 | Lisp_Object uni_needle = Fstring_to_unibyte (needle); | 5537 | res = memmem (haystart, haybytes, |
| 5541 | res = memmem (haystart, haybytes, | 5538 | SSDATA (uni_needle), SBYTES (uni_needle)); |
| 5542 | SSDATA (uni_needle), SBYTES (uni_needle)); | ||
| 5543 | } | ||
| 5544 | } | 5539 | } |
| 5545 | 5540 | ||
| 5546 | if (! res) | 5541 | if (! res) |
diff --git a/test/src/fns-tests.el b/test/src/fns-tests.el index 41969f2af2c..d3c22f966e6 100644 --- a/test/src/fns-tests.el +++ b/test/src/fns-tests.el | |||
| @@ -913,6 +913,7 @@ | |||
| 913 | (should (equal (string-search "ab\0" "ab") nil)) | 913 | (should (equal (string-search "ab\0" "ab") nil)) |
| 914 | (should (equal (string-search "ab" "abababab" 3) 4)) | 914 | (should (equal (string-search "ab" "abababab" 3) 4)) |
| 915 | (should (equal (string-search "ab" "ababac" 3) nil)) | 915 | (should (equal (string-search "ab" "ababac" 3) nil)) |
| 916 | (should (equal (string-search "aaa" "aa") nil)) | ||
| 916 | (let ((case-fold-search t)) | 917 | (let ((case-fold-search t)) |
| 917 | (should (equal (string-search "ab" "AB") nil))) | 918 | (should (equal (string-search "ab" "AB") nil))) |
| 918 | 919 | ||
| @@ -936,14 +937,16 @@ | |||
| 936 | (should (equal (string-search (string-to-multibyte "\377") "ab\377c") 2)) | 937 | (should (equal (string-search (string-to-multibyte "\377") "ab\377c") 2)) |
| 937 | (should (equal (string-search "\303" "aøb") nil)) | 938 | (should (equal (string-search "\303" "aøb") nil)) |
| 938 | (should (equal (string-search "\270" "aøb") nil)) | 939 | (should (equal (string-search "\270" "aøb") nil)) |
| 939 | ;; This test currently fails, but it shouldn't! | 940 | (should (equal (string-search "ø" "\303\270") nil)) |
| 940 | ;;(should (equal (string-search "ø" "\303\270") nil)) | 941 | |
| 942 | (should (equal (string-search "a\U00010f98z" "a\U00010f98a\U00010f98z") 2)) | ||
| 941 | 943 | ||
| 942 | (should-error (string-search "a" "abc" -1)) | 944 | (should-error (string-search "a" "abc" -1)) |
| 943 | (should-error (string-search "a" "abc" 4)) | 945 | (should-error (string-search "a" "abc" 4)) |
| 944 | (should-error (string-search "a" "abc" 100000000000)) | 946 | (should-error (string-search "a" "abc" 100000000000)) |
| 945 | 947 | ||
| 946 | (should (equal (string-search "a" "aaa" 3) nil)) | 948 | (should (equal (string-search "a" "aaa" 3) nil)) |
| 949 | (should (equal (string-search "aa" "aa" 1) nil)) | ||
| 947 | (should (equal (string-search "\0" "") nil)) | 950 | (should (equal (string-search "\0" "") nil)) |
| 948 | 951 | ||
| 949 | (should (equal (string-search "" "") 0)) | 952 | (should (equal (string-search "" "") 0)) |
| @@ -955,6 +958,21 @@ | |||
| 955 | (should-error (string-search "" "abc" -1)) | 958 | (should-error (string-search "" "abc" -1)) |
| 956 | 959 | ||
| 957 | (should-not (string-search "ø" "foo\303\270")) | 960 | (should-not (string-search "ø" "foo\303\270")) |
| 961 | (should-not (string-search "\303\270" "ø")) | ||
| 962 | (should-not (string-search "\370" "ø")) | ||
| 963 | (should-not (string-search (string-to-multibyte "\370") "ø")) | ||
| 964 | (should-not (string-search "ø" "\370")) | ||
| 965 | (should-not (string-search "ø" (string-to-multibyte "\370"))) | ||
| 966 | (should-not (string-search "\303\270" "\370")) | ||
| 967 | (should-not (string-search (string-to-multibyte "\303\270") "\370")) | ||
| 968 | (should-not (string-search "\303\270" (string-to-multibyte "\370"))) | ||
| 969 | (should-not (string-search (string-to-multibyte "\303\270") | ||
| 970 | (string-to-multibyte "\370"))) | ||
| 971 | (should-not (string-search "\370" "\303\270")) | ||
| 972 | (should-not (string-search (string-to-multibyte "\370") "\303\270")) | ||
| 973 | (should-not (string-search "\370" (string-to-multibyte "\303\270"))) | ||
| 974 | (should-not (string-search (string-to-multibyte "\370") | ||
| 975 | (string-to-multibyte "\303\270"))) | ||
| 958 | (should (equal (string-search (string-to-multibyte "o\303\270") "foo\303\270") | 976 | (should (equal (string-search (string-to-multibyte "o\303\270") "foo\303\270") |
| 959 | 2)) | 977 | 2)) |
| 960 | (should (equal (string-search "\303\270" "foo\303\270") 3))) | 978 | (should (equal (string-search "\303\270" "foo\303\270") 3))) |