aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChen Bin2018-04-20 00:38:29 +1000
committerEli Zaretskii2018-04-28 09:56:14 +0300
commitc6e6503900534d939dd94b812563c27f22c49b7d (patch)
tree4fd31046412e32e6475e43497bf94040ef79975e
parent4bc74dac281ff2a502fc89e76f6210dc711cfed1 (diff)
downloademacs-c6e6503900534d939dd94b812563c27f22c49b7d.tar.gz
emacs-c6e6503900534d939dd94b812563c27f22c49b7d.zip
New function 'string-distance'
* src/fns.c (Fstring_distance): New primitive. (syms_of_fns): Defsubr it. * test/lisp/subr-tests.el (subr-tests--string-distance): New test. * etc/NEWS: Mention 'string-distance'.
-rw-r--r--etc/NEWS3
-rw-r--r--src/fns.c62
-rw-r--r--test/lisp/subr-tests.el18
3 files changed, 83 insertions, 0 deletions
diff --git a/etc/NEWS b/etc/NEWS
index ca467c0ad49..d40f7816b86 100644
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -534,6 +534,9 @@ manual for more details.
534+++ 534+++
535** New function assoc-delete-all. 535** New function assoc-delete-all.
536 536
537** New function string-distance to calculate Levenshtein distance
538between two strings.
539
537** 'print-quoted' now defaults to t, so if you want to see 540** 'print-quoted' now defaults to t, so if you want to see
538(quote x) instead of 'x you will have to bind it to nil where applicable. 541(quote x) instead of 'x you will have to bind it to nil where applicable.
539 542
diff --git a/src/fns.c b/src/fns.c
index 94b9d984f0d..6e851c8555a 100644
--- a/src/fns.c
+++ b/src/fns.c
@@ -153,6 +153,67 @@ If STRING is multibyte, this may be greater than the length of STRING. */)
153 return make_number (SBYTES (string)); 153 return make_number (SBYTES (string));
154} 154}
155 155
156DEFUN ("string-distance", Fstring_distance, Sstring_distance, 2, 3, 0,
157 doc: /* Return Levenshtein distance between STRING1 and STRING2.
158If BYTECOMPARE is nil, compare character of strings.
159If BYTECOMPARE is t, compare byte of strings.
160Case is significant, but text properties are ignored. */)
161 (Lisp_Object string1, Lisp_Object string2, Lisp_Object bytecompare)
162
163{
164 CHECK_STRING (string1);
165 CHECK_STRING (string2);
166
167 bool use_byte_compare = !NILP (bytecompare)
168 || (!STRING_MULTIBYTE (string1) && !STRING_MULTIBYTE (string2));
169 ptrdiff_t len1 = use_byte_compare? SBYTES (string1) : SCHARS (string1);
170 ptrdiff_t len2 = use_byte_compare? SBYTES (string2) : SCHARS (string2);
171 ptrdiff_t x, y, lastdiag, olddiag;
172
173 USE_SAFE_ALLOCA;
174 ptrdiff_t *column = SAFE_ALLOCA ((len1 + 1) * sizeof (ptrdiff_t));
175 for (y = 1; y <= len1; y++)
176 column[y] = y;
177
178 if (use_byte_compare)
179 {
180 char *s1 = SSDATA (string1);
181 char *s2 = SSDATA (string2);
182
183 for (x = 1; x <= len2; x++)
184 {
185 column[0] = x;
186 for (y = 1, lastdiag = x - 1; y <= len1; y++)
187 {
188 olddiag = column[y];
189 column[y] = min (min (column[y] + 1, column[y-1] + 1), lastdiag + (s1[y-1] == s2[x-1]? 0 : 1));
190 lastdiag = olddiag;
191 }
192 }
193 }
194 else
195 {
196 int c1, c2;
197 ptrdiff_t i1, i1_byte, i2 = 0, i2_byte = 0;
198 for (x = 1; x <= len2; x++)
199 {
200 column[0] = x;
201 FETCH_STRING_CHAR_ADVANCE (c2, string2, i2, i2_byte);
202 i1 = i1_byte = 0;
203 for (y = 1, lastdiag = x - 1; y <= len1; y++)
204 {
205 olddiag = column[y];
206 FETCH_STRING_CHAR_ADVANCE (c1, string1, i1, i1_byte);
207 column[y] = min (min (column[y] + 1, column[y-1] + 1), lastdiag + (c1 == c2? 0 : 1));
208 lastdiag = olddiag;
209 }
210 }
211 }
212
213 SAFE_FREE ();
214 return make_number (column[len1]);
215}
216
156DEFUN ("string-equal", Fstring_equal, Sstring_equal, 2, 2, 0, 217DEFUN ("string-equal", Fstring_equal, Sstring_equal, 2, 2, 0,
157 doc: /* Return t if two strings have identical contents. 218 doc: /* Return t if two strings have identical contents.
158Case is significant, but text properties are ignored. 219Case is significant, but text properties are ignored.
@@ -5226,6 +5287,7 @@ this variable. */);
5226 defsubr (&Slength); 5287 defsubr (&Slength);
5227 defsubr (&Ssafe_length); 5288 defsubr (&Ssafe_length);
5228 defsubr (&Sstring_bytes); 5289 defsubr (&Sstring_bytes);
5290 defsubr (&Sstring_distance);
5229 defsubr (&Sstring_equal); 5291 defsubr (&Sstring_equal);
5230 defsubr (&Scompare_strings); 5292 defsubr (&Scompare_strings);
5231 defsubr (&Sstring_lessp); 5293 defsubr (&Sstring_lessp);
diff --git a/test/lisp/subr-tests.el b/test/lisp/subr-tests.el
index 52b61d9fb97..6b80c743a05 100644
--- a/test/lisp/subr-tests.el
+++ b/test/lisp/subr-tests.el
@@ -281,6 +281,24 @@ indirectly `mapbacktrace'."
281 (should (equal (string-match-p "\\`[[:blank:]]\\'" "\u3000") 0)) 281 (should (equal (string-match-p "\\`[[:blank:]]\\'" "\u3000") 0))
282 (should-not (string-match-p "\\`[[:blank:]]\\'" "\N{LINE SEPARATOR}"))) 282 (should-not (string-match-p "\\`[[:blank:]]\\'" "\N{LINE SEPARATOR}")))
283 283
284(ert-deftest subr-tests--string-distance ()
285 "Test `string-distance' behavior."
286 ;; ASCII characters are always fine
287 (should (equal 1 (string-distance "heelo" "hello")))
288 (should (equal 2 (string-distance "aeelo" "hello")))
289 (should (equal 0 (string-distance "ab" "ab" t)))
290 (should (equal 1 (string-distance "ab" "abc" t)))
291
292 ;; string containing hanzi character, compare by byte
293 (should (equal 6 (string-distance "ab" "ab我她" t)))
294 (should (equal 3 (string-distance "ab" "a我b" t)))
295 (should (equal 3 (string-distance "我" "她" t)))
296
297 ;; string containing hanzi character, compare by character
298 (should (equal 2 (string-distance "ab" "ab我她")))
299 (should (equal 1 (string-distance "ab" "a我b")))
300 (should (equal 1 (string-distance "我" "她"))))
301
284(ert-deftest subr-tests--dolist--wrong-number-of-args () 302(ert-deftest subr-tests--dolist--wrong-number-of-args ()
285 "Test that `dolist' doesn't accept wrong types or length of SPEC, 303 "Test that `dolist' doesn't accept wrong types or length of SPEC,
286cf. Bug#25477." 304cf. Bug#25477."