aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--doc/lispref/strings.texi13
-rw-r--r--etc/NEWS6
-rw-r--r--src/fns.c98
-rw-r--r--test/src/fns-tests.el17
4 files changed, 134 insertions, 0 deletions
diff --git a/doc/lispref/strings.texi b/doc/lispref/strings.texi
index 9d6613c522c..a3efbf2f223 100644
--- a/doc/lispref/strings.texi
+++ b/doc/lispref/strings.texi
@@ -633,6 +633,19 @@ If your system does not support a locale environment, this function
633behaves like @code{string-lessp}. 633behaves like @code{string-lessp}.
634@end defun 634@end defun
635 635
636@defun string-numerical-lessp strin1 string2
637This function behaves like @code{string-lessp} for stretches of
638consecutive non-numerical characters, but compares sequences of
639numerical characters as if they comprised a base-ten number, and then
640compares the numbers. So @samp{foo2.png} is ``smaller'' than
641@samp{foo12.png} according to this predicate, even if @samp{12} is
642lexicographically ``smaller'' than @samp{2}.
643
644If one string has a number in a position in the string, and the other
645doesn't, then lexicograpic comparison is done at that point, so
646@samp{foo.png} is ``smaller'' than @samp{foo2.png}.
647@end defun
648
636@defun string-prefix-p string1 string2 &optional ignore-case 649@defun string-prefix-p string1 string2 &optional ignore-case
637This function returns non-@code{nil} if @var{string1} is a prefix of 650This function returns non-@code{nil} if @var{string1} is a prefix of
638@var{string2}; i.e., if @var{string2} starts with @var{string1}. If 651@var{string2}; i.e., if @var{string2} starts with @var{string1}. If
diff --git a/etc/NEWS b/etc/NEWS
index 33c1b136ebc..9f0fb8d6941 100644
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -1720,6 +1720,12 @@ environment. For the time being this is implemented for modern POSIX
1720systems and for MS-Windows, for other systems they fall back to their 1720systems and for MS-Windows, for other systems they fall back to their
1721counterparts `string-lessp' and `string-equal'. 1721counterparts `string-lessp' and `string-equal'.
1722 1722
1723+++
1724** The new function `string-numeric-lessp' compares strings by
1725interpreting consecutive runs of numerical characters as numbers, and
1726compares their numerical values. According to this predicate,
1727"foo2.png" is smaller than "foo12.png".
1728
1723--- 1729---
1724*** The ls-lisp package uses `string-collate-lessp' to sort file names. 1730*** The ls-lisp package uses `string-collate-lessp' to sort file names.
1725The effect is that, on systems that use ls-lisp for Dired, the default 1731The effect is that, on systems that use ls-lisp for Dired, the default
diff --git a/src/fns.c b/src/fns.c
index d1808440966..927fcdac02d 100644
--- a/src/fns.c
+++ b/src/fns.c
@@ -331,6 +331,103 @@ Symbols are also allowed; their print names are used instead. */)
331 return i1 < SCHARS (string2) ? Qt : Qnil; 331 return i1 < SCHARS (string2) ? Qt : Qnil;
332} 332}
333 333
334/* Return the numerical value of a consecutive run of numerical
335 characters from STRING. The ISP and ISP_BYTE address pointer
336 pointers are increased and left at the next character after the
337 numerical characters. */
338static size_t
339gather_number_from_string (int c, Lisp_Object string,
340 ptrdiff_t *isp, ptrdiff_t *isp_byte)
341{
342 size_t number = c - '0';
343 unsigned char *chp;
344 int chlen;
345
346 do
347 {
348 if (STRING_MULTIBYTE (string))
349 {
350 chp = &SDATA (string)[*isp_byte];
351 c = STRING_CHAR_AND_LENGTH (chp, chlen);
352 }
353 else
354 {
355 c = SREF (string, *isp_byte);
356 chlen = 1;
357 }
358
359 /* If we're still in a number, add it to the sum and continue. */
360 /* FIXME: Integer overflow? */
361 if (c >= '0' && c <= '9')
362 {
363 number = number * 10;
364 number += c - '0';
365 (*isp)++;
366 (*isp_byte) += chlen;
367 }
368 else
369 break;
370 }
371 /* Stop when we get to the end of the string anyway. */
372 while (c != 0);
373
374 return number;
375}
376
377DEFUN ("string-numeric-lessp", Fstring_numeric_lessp,
378 Sstring_numeric_lessp, 2, 2, 0,
379 doc: /* Return non-nil if STRING1 is less than STRING2 in 'numeric' order.
380Sequences of non-numerical characters are compared lexicographically,
381while sequences of numerical characters are converted into numbers,
382and then the numbers are compared. This means that \"foo2.png\" is
383less than \"foo12.png\" according to this predicate.
384Case is significant.
385Symbols are also allowed; their print names are used instead. */)
386 (register Lisp_Object string1, Lisp_Object string2)
387{
388 ptrdiff_t end;
389 ptrdiff_t i1, i1_byte, i2, i2_byte;
390 size_t num1, num2;
391
392 if (SYMBOLP (string1))
393 string1 = SYMBOL_NAME (string1);
394 if (SYMBOLP (string2))
395 string2 = SYMBOL_NAME (string2);
396 CHECK_STRING (string1);
397 CHECK_STRING (string2);
398
399 i1 = i1_byte = i2 = i2_byte = 0;
400
401 end = SCHARS (string1);
402 if (end > SCHARS (string2))
403 end = SCHARS (string2);
404
405 while (i1 < end)
406 {
407 /* When we find a mismatch, we must compare the
408 characters, not just the bytes. */
409 int c1, c2;
410
411 FETCH_STRING_CHAR_ADVANCE (c1, string1, i1, i1_byte);
412 FETCH_STRING_CHAR_ADVANCE (c2, string2, i2, i2_byte);
413
414 if (c1 >= '0' && c1 <= '9' &&
415 c2 >= '0' && c2 <= '9')
416 /* Both strings are numbers, so compare them. */
417 {
418 num1 = gather_number_from_string (c1, string1, &i1, &i1_byte);
419 num2 = gather_number_from_string (c2, string2, &i2, &i2_byte);
420 if (num1 < num2)
421 return Qt;
422 else if (num1 > num2)
423 return Qnil;
424 }
425 else if (c1 != c2)
426 return c1 < c2 ? Qt : Qnil;
427 }
428 return i1 < SCHARS (string2) ? Qt : Qnil;
429}
430
334DEFUN ("string-collate-lessp", Fstring_collate_lessp, Sstring_collate_lessp, 2, 4, 0, 431DEFUN ("string-collate-lessp", Fstring_collate_lessp, Sstring_collate_lessp, 2, 4, 0,
335 doc: /* Return t if first arg string is less than second in collation order. 432 doc: /* Return t if first arg string is less than second in collation order.
336Symbols are also allowed; their print names are used instead. 433Symbols are also allowed; their print names are used instead.
@@ -5049,6 +5146,7 @@ this variable. */);
5049 defsubr (&Sstring_equal); 5146 defsubr (&Sstring_equal);
5050 defsubr (&Scompare_strings); 5147 defsubr (&Scompare_strings);
5051 defsubr (&Sstring_lessp); 5148 defsubr (&Sstring_lessp);
5149 defsubr (&Sstring_numeric_lessp);
5052 defsubr (&Sstring_collate_lessp); 5150 defsubr (&Sstring_collate_lessp);
5053 defsubr (&Sstring_collate_equalp); 5151 defsubr (&Sstring_collate_equalp);
5054 defsubr (&Sappend); 5152 defsubr (&Sappend);
diff --git a/test/src/fns-tests.el b/test/src/fns-tests.el
index 762f7bdd94f..0c6edb89252 100644
--- a/test/src/fns-tests.el
+++ b/test/src/fns-tests.el
@@ -191,3 +191,20 @@
191 (string-collate-lessp 191 (string-collate-lessp
192 a b (if (eq system-type 'windows-nt) "enu_USA" "en_US.UTF-8"))))) 192 a b (if (eq system-type 'windows-nt) "enu_USA" "en_US.UTF-8")))))
193 '("Adrian" "Ævar" "Agustín" "Eli")))) 193 '("Adrian" "Ævar" "Agustín" "Eli"))))
194
195(ert-deftest fns-tests-string-numeric-lessp ()
196 (should (string-numeric-lessp "foo2.png" "foo12.png"))
197 (should (not (string-numeric-lessp "foo12.png" "foo2.png")))
198 (should (string-numeric-lessp "foo12.png" "foo20000.png"))
199 (should (not (string-numeric-lessp "foo20000.png" "foo12.png")))
200 (should (string-numeric-lessp "foo.png" "foo2.png"))
201 (should (not (string-numeric-lessp "foo2.png" "foo.png")))
202 (should (equal (sort '("foo12.png" "foo2.png" "foo1.png")
203 'string-numeric-lessp)
204 '("foo1.png" "foo2.png" "foo12.png")))
205 (should (string-numeric-lessp "foo2" "foo1234"))
206 (should (not (string-numeric-lessp "foo1234" "foo2")))
207 (should (string-numeric-lessp "foo.png" "foo2"))
208 (should (string-numeric-lessp "foo1.25.5.png" "foo1.125.5"))
209 (should (string-numeric-lessp "2" "1245"))
210 (should (not (string-numeric-lessp "1245" "2"))))