diff options
| author | Lars Ingebrigtsen | 2016-02-21 15:32:45 +1100 |
|---|---|---|
| committer | Lars Ingebrigtsen | 2016-02-21 15:32:45 +1100 |
| commit | 71783e90a46ca913ea2c334cdc8cb24cd74055f8 (patch) | |
| tree | 3c35b883caea4392789d6c991a08bb74475407ad | |
| parent | 1ba50a0d8cbef6686ecf752583832e7bbb9137ef (diff) | |
| download | emacs-71783e90a46ca913ea2c334cdc8cb24cd74055f8.tar.gz emacs-71783e90a46ca913ea2c334cdc8cb24cd74055f8.zip | |
Add the string-numeric-lessp function
* doc/lispref/strings.texi (Text Comparison): Document
`string-numerical-lessp'.
* src/fns.c (Fstring_numeric_lessp): New function.
(gather_number_from_string): Helper function for that function.
* test/src/fns-tests.el (fns-tests-string-numeric-lessp): Add tests.
| -rw-r--r-- | doc/lispref/strings.texi | 13 | ||||
| -rw-r--r-- | etc/NEWS | 6 | ||||
| -rw-r--r-- | src/fns.c | 98 | ||||
| -rw-r--r-- | test/src/fns-tests.el | 17 |
4 files changed, 134 insertions, 0 deletions
diff --git a/doc/lispref/strings.texi b/doc/lispref/strings.texi index 9d6613c522c..a3efbf2f223 100644 --- a/doc/lispref/strings.texi +++ b/doc/lispref/strings.texi | |||
| @@ -633,6 +633,19 @@ If your system does not support a locale environment, this function | |||
| 633 | behaves like @code{string-lessp}. | 633 | behaves like @code{string-lessp}. |
| 634 | @end defun | 634 | @end defun |
| 635 | 635 | ||
| 636 | @defun string-numerical-lessp strin1 string2 | ||
| 637 | This function behaves like @code{string-lessp} for stretches of | ||
| 638 | consecutive non-numerical characters, but compares sequences of | ||
| 639 | numerical characters as if they comprised a base-ten number, and then | ||
| 640 | compares the numbers. So @samp{foo2.png} is ``smaller'' than | ||
| 641 | @samp{foo12.png} according to this predicate, even if @samp{12} is | ||
| 642 | lexicographically ``smaller'' than @samp{2}. | ||
| 643 | |||
| 644 | If one string has a number in a position in the string, and the other | ||
| 645 | doesn't, then lexicograpic comparison is done at that point, so | ||
| 646 | @samp{foo.png} is ``smaller'' than @samp{foo2.png}. | ||
| 647 | @end defun | ||
| 648 | |||
| 636 | @defun string-prefix-p string1 string2 &optional ignore-case | 649 | @defun string-prefix-p string1 string2 &optional ignore-case |
| 637 | This function returns non-@code{nil} if @var{string1} is a prefix of | 650 | This function returns non-@code{nil} if @var{string1} is a prefix of |
| 638 | @var{string2}; i.e., if @var{string2} starts with @var{string1}. If | 651 | @var{string2}; i.e., if @var{string2} starts with @var{string1}. If |
| @@ -1720,6 +1720,12 @@ environment. For the time being this is implemented for modern POSIX | |||
| 1720 | systems and for MS-Windows, for other systems they fall back to their | 1720 | systems and for MS-Windows, for other systems they fall back to their |
| 1721 | counterparts `string-lessp' and `string-equal'. | 1721 | counterparts `string-lessp' and `string-equal'. |
| 1722 | 1722 | ||
| 1723 | +++ | ||
| 1724 | ** The new function `string-numeric-lessp' compares strings by | ||
| 1725 | interpreting consecutive runs of numerical characters as numbers, and | ||
| 1726 | compares their numerical values. According to this predicate, | ||
| 1727 | "foo2.png" is smaller than "foo12.png". | ||
| 1728 | |||
| 1723 | --- | 1729 | --- |
| 1724 | *** The ls-lisp package uses `string-collate-lessp' to sort file names. | 1730 | *** The ls-lisp package uses `string-collate-lessp' to sort file names. |
| 1725 | The effect is that, on systems that use ls-lisp for Dired, the default | 1731 | The effect is that, on systems that use ls-lisp for Dired, the default |
| @@ -331,6 +331,103 @@ Symbols are also allowed; their print names are used instead. */) | |||
| 331 | return i1 < SCHARS (string2) ? Qt : Qnil; | 331 | return i1 < SCHARS (string2) ? Qt : Qnil; |
| 332 | } | 332 | } |
| 333 | 333 | ||
| 334 | /* Return the numerical value of a consecutive run of numerical | ||
| 335 | characters from STRING. The ISP and ISP_BYTE address pointer | ||
| 336 | pointers are increased and left at the next character after the | ||
| 337 | numerical characters. */ | ||
| 338 | static size_t | ||
| 339 | gather_number_from_string (int c, Lisp_Object string, | ||
| 340 | ptrdiff_t *isp, ptrdiff_t *isp_byte) | ||
| 341 | { | ||
| 342 | size_t number = c - '0'; | ||
| 343 | unsigned char *chp; | ||
| 344 | int chlen; | ||
| 345 | |||
| 346 | do | ||
| 347 | { | ||
| 348 | if (STRING_MULTIBYTE (string)) | ||
| 349 | { | ||
| 350 | chp = &SDATA (string)[*isp_byte]; | ||
| 351 | c = STRING_CHAR_AND_LENGTH (chp, chlen); | ||
| 352 | } | ||
| 353 | else | ||
| 354 | { | ||
| 355 | c = SREF (string, *isp_byte); | ||
| 356 | chlen = 1; | ||
| 357 | } | ||
| 358 | |||
| 359 | /* If we're still in a number, add it to the sum and continue. */ | ||
| 360 | /* FIXME: Integer overflow? */ | ||
| 361 | if (c >= '0' && c <= '9') | ||
| 362 | { | ||
| 363 | number = number * 10; | ||
| 364 | number += c - '0'; | ||
| 365 | (*isp)++; | ||
| 366 | (*isp_byte) += chlen; | ||
| 367 | } | ||
| 368 | else | ||
| 369 | break; | ||
| 370 | } | ||
| 371 | /* Stop when we get to the end of the string anyway. */ | ||
| 372 | while (c != 0); | ||
| 373 | |||
| 374 | return number; | ||
| 375 | } | ||
| 376 | |||
| 377 | DEFUN ("string-numeric-lessp", Fstring_numeric_lessp, | ||
| 378 | Sstring_numeric_lessp, 2, 2, 0, | ||
| 379 | doc: /* Return non-nil if STRING1 is less than STRING2 in 'numeric' order. | ||
| 380 | Sequences of non-numerical characters are compared lexicographically, | ||
| 381 | while sequences of numerical characters are converted into numbers, | ||
| 382 | and then the numbers are compared. This means that \"foo2.png\" is | ||
| 383 | less than \"foo12.png\" according to this predicate. | ||
| 384 | Case is significant. | ||
| 385 | Symbols are also allowed; their print names are used instead. */) | ||
| 386 | (register Lisp_Object string1, Lisp_Object string2) | ||
| 387 | { | ||
| 388 | ptrdiff_t end; | ||
| 389 | ptrdiff_t i1, i1_byte, i2, i2_byte; | ||
| 390 | size_t num1, num2; | ||
| 391 | |||
| 392 | if (SYMBOLP (string1)) | ||
| 393 | string1 = SYMBOL_NAME (string1); | ||
| 394 | if (SYMBOLP (string2)) | ||
| 395 | string2 = SYMBOL_NAME (string2); | ||
| 396 | CHECK_STRING (string1); | ||
| 397 | CHECK_STRING (string2); | ||
| 398 | |||
| 399 | i1 = i1_byte = i2 = i2_byte = 0; | ||
| 400 | |||
| 401 | end = SCHARS (string1); | ||
| 402 | if (end > SCHARS (string2)) | ||
| 403 | end = SCHARS (string2); | ||
| 404 | |||
| 405 | while (i1 < end) | ||
| 406 | { | ||
| 407 | /* When we find a mismatch, we must compare the | ||
| 408 | characters, not just the bytes. */ | ||
| 409 | int c1, c2; | ||
| 410 | |||
| 411 | FETCH_STRING_CHAR_ADVANCE (c1, string1, i1, i1_byte); | ||
| 412 | FETCH_STRING_CHAR_ADVANCE (c2, string2, i2, i2_byte); | ||
| 413 | |||
| 414 | if (c1 >= '0' && c1 <= '9' && | ||
| 415 | c2 >= '0' && c2 <= '9') | ||
| 416 | /* Both strings are numbers, so compare them. */ | ||
| 417 | { | ||
| 418 | num1 = gather_number_from_string (c1, string1, &i1, &i1_byte); | ||
| 419 | num2 = gather_number_from_string (c2, string2, &i2, &i2_byte); | ||
| 420 | if (num1 < num2) | ||
| 421 | return Qt; | ||
| 422 | else if (num1 > num2) | ||
| 423 | return Qnil; | ||
| 424 | } | ||
| 425 | else if (c1 != c2) | ||
| 426 | return c1 < c2 ? Qt : Qnil; | ||
| 427 | } | ||
| 428 | return i1 < SCHARS (string2) ? Qt : Qnil; | ||
| 429 | } | ||
| 430 | |||
| 334 | DEFUN ("string-collate-lessp", Fstring_collate_lessp, Sstring_collate_lessp, 2, 4, 0, | 431 | DEFUN ("string-collate-lessp", Fstring_collate_lessp, Sstring_collate_lessp, 2, 4, 0, |
| 335 | doc: /* Return t if first arg string is less than second in collation order. | 432 | doc: /* Return t if first arg string is less than second in collation order. |
| 336 | Symbols are also allowed; their print names are used instead. | 433 | Symbols are also allowed; their print names are used instead. |
| @@ -5049,6 +5146,7 @@ this variable. */); | |||
| 5049 | defsubr (&Sstring_equal); | 5146 | defsubr (&Sstring_equal); |
| 5050 | defsubr (&Scompare_strings); | 5147 | defsubr (&Scompare_strings); |
| 5051 | defsubr (&Sstring_lessp); | 5148 | defsubr (&Sstring_lessp); |
| 5149 | defsubr (&Sstring_numeric_lessp); | ||
| 5052 | defsubr (&Sstring_collate_lessp); | 5150 | defsubr (&Sstring_collate_lessp); |
| 5053 | defsubr (&Sstring_collate_equalp); | 5151 | defsubr (&Sstring_collate_equalp); |
| 5054 | defsubr (&Sappend); | 5152 | defsubr (&Sappend); |
diff --git a/test/src/fns-tests.el b/test/src/fns-tests.el index 762f7bdd94f..0c6edb89252 100644 --- a/test/src/fns-tests.el +++ b/test/src/fns-tests.el | |||
| @@ -191,3 +191,20 @@ | |||
| 191 | (string-collate-lessp | 191 | (string-collate-lessp |
| 192 | a b (if (eq system-type 'windows-nt) "enu_USA" "en_US.UTF-8"))))) | 192 | a b (if (eq system-type 'windows-nt) "enu_USA" "en_US.UTF-8"))))) |
| 193 | '("Adrian" "Ævar" "Agustín" "Eli")))) | 193 | '("Adrian" "Ævar" "Agustín" "Eli")))) |
| 194 | |||
| 195 | (ert-deftest fns-tests-string-numeric-lessp () | ||
| 196 | (should (string-numeric-lessp "foo2.png" "foo12.png")) | ||
| 197 | (should (not (string-numeric-lessp "foo12.png" "foo2.png"))) | ||
| 198 | (should (string-numeric-lessp "foo12.png" "foo20000.png")) | ||
| 199 | (should (not (string-numeric-lessp "foo20000.png" "foo12.png"))) | ||
| 200 | (should (string-numeric-lessp "foo.png" "foo2.png")) | ||
| 201 | (should (not (string-numeric-lessp "foo2.png" "foo.png"))) | ||
| 202 | (should (equal (sort '("foo12.png" "foo2.png" "foo1.png") | ||
| 203 | 'string-numeric-lessp) | ||
| 204 | '("foo1.png" "foo2.png" "foo12.png"))) | ||
| 205 | (should (string-numeric-lessp "foo2" "foo1234")) | ||
| 206 | (should (not (string-numeric-lessp "foo1234" "foo2"))) | ||
| 207 | (should (string-numeric-lessp "foo.png" "foo2")) | ||
| 208 | (should (string-numeric-lessp "foo1.25.5.png" "foo1.125.5")) | ||
| 209 | (should (string-numeric-lessp "2" "1245")) | ||
| 210 | (should (not (string-numeric-lessp "1245" "2")))) | ||