diff options
| author | Paul Eggert | 2016-02-21 13:25:24 -0800 |
|---|---|---|
| committer | Paul Eggert | 2016-02-21 13:27:40 -0800 |
| commit | 1f7feecaee0ed3fb79758fe60020aefb30d9ff01 (patch) | |
| tree | b0476096aabea32658be041242caa74e7a5232cd | |
| parent | 3e67708d7239cde24b0988d4d1288bc75585cfea (diff) | |
| download | emacs-1f7feecaee0ed3fb79758fe60020aefb30d9ff01.tar.gz emacs-1f7feecaee0ed3fb79758fe60020aefb30d9ff01.zip | |
Use Gnulib filevercmp for version comparison
* admin/merge-gnulib (GNULIB_MODULES): Add filevercmp.
* doc/lispref/strings.texi (Text Comparison):
* etc/NEWS, src/fns.c:
* test/src/fns-tests.el (fns-tests-string-version-lessp):
Rename newly-introduced function to string-version-lessp, by
analogy with strverscmp.
* lib/filevercmp.c, lib/filevercmp.h: New files, copied from gnulib.
* lib/gnulib.mk, m4/gnulib-comp.m4: Regenerate.
* src/fns.c: Include <filevercmp.h>.
(gather_number_from_string): Remove.
(Fstring_version_lessp): Reimplement via filevercmp.
| -rwxr-xr-x | admin/merge-gnulib | 2 | ||||
| -rw-r--r-- | doc/lispref/strings.texi | 20 | ||||
| -rw-r--r-- | etc/NEWS | 2 | ||||
| -rw-r--r-- | lib/filevercmp.c | 181 | ||||
| -rw-r--r-- | lib/filevercmp.h | 42 | ||||
| -rw-r--r-- | lib/gnulib.mk | 10 | ||||
| -rw-r--r-- | m4/gnulib-comp.m4 | 3 | ||||
| -rw-r--r-- | src/fns.c | 129 | ||||
| -rw-r--r-- | test/src/fns-tests.el | 28 |
9 files changed, 287 insertions, 130 deletions
diff --git a/admin/merge-gnulib b/admin/merge-gnulib index 5463d1b667b..5d6512760d9 100755 --- a/admin/merge-gnulib +++ b/admin/merge-gnulib | |||
| @@ -30,7 +30,7 @@ GNULIB_MODULES=' | |||
| 30 | careadlinkat close-stream count-one-bits count-trailing-zeros | 30 | careadlinkat close-stream count-one-bits count-trailing-zeros |
| 31 | crypto/md5 crypto/sha1 crypto/sha256 crypto/sha512 | 31 | crypto/md5 crypto/sha1 crypto/sha256 crypto/sha512 |
| 32 | dtoastr dtotimespec dup2 environ execinfo faccessat | 32 | dtoastr dtotimespec dup2 environ execinfo faccessat |
| 33 | fcntl fcntl-h fdatasync fdopendir filemode fstatat fsync | 33 | fcntl fcntl-h fdatasync fdopendir filemode filevercmp fstatat fsync |
| 34 | getloadavg getopt-gnu gettime gettimeofday gitlog-to-changelog | 34 | getloadavg getopt-gnu gettime gettimeofday gitlog-to-changelog |
| 35 | ignore-value intprops largefile lstat | 35 | ignore-value intprops largefile lstat |
| 36 | manywarnings memrchr mkostemp mktime | 36 | manywarnings memrchr mkostemp mktime |
diff --git a/doc/lispref/strings.texi b/doc/lispref/strings.texi index 19301de06fa..ce629aa8259 100644 --- a/doc/lispref/strings.texi +++ b/doc/lispref/strings.texi | |||
| @@ -633,20 +633,12 @@ If your system does not support a locale environment, this function | |||
| 633 | behaves like @code{string-lessp}. | 633 | behaves like @code{string-lessp}. |
| 634 | @end defun | 634 | @end defun |
| 635 | 635 | ||
| 636 | @defun string-numerical-lessp strin1 string2 | 636 | @defun string-version-lessp string1 string2 |
| 637 | This function behaves like @code{string-lessp} for stretches of | 637 | This function compares strings lexicographically, except it treats |
| 638 | consecutive non-numerical characters, but compares sequences of | 638 | sequences of numerical characters as if they comprised a base-ten |
| 639 | numerical characters as if they comprised a base-ten number, and then | 639 | number, and then compares the numbers. So @samp{foo2.png} is |
| 640 | compares the numbers. So @samp{foo2.png} is ``smaller'' than | 640 | ``smaller'' than @samp{foo12.png} according to this predicate, even if |
| 641 | @samp{foo12.png} according to this predicate, even if @samp{12} is | 641 | @samp{12} is lexicographically ``smaller'' than @samp{2}. |
| 642 | lexicographically ``smaller'' than @samp{2}. | ||
| 643 | |||
| 644 | If one string has a number in a position in the string, and the other | ||
| 645 | doesn't, then lexicograpic comparison is done at that point, so | ||
| 646 | @samp{foo.png} is ``smaller'' than @samp{foo2.png}. If any of the | ||
| 647 | numbers in the strings are larger than can be represented as an | ||
| 648 | integer number, the entire string is compared using | ||
| 649 | @code{string-less}. | ||
| 650 | @end defun | 642 | @end defun |
| 651 | 643 | ||
| 652 | @defun string-prefix-p string1 string2 &optional ignore-case | 644 | @defun string-prefix-p string1 string2 &optional ignore-case |
| @@ -1726,7 +1726,7 @@ systems and for MS-Windows, for other systems they fall back to their | |||
| 1726 | counterparts `string-lessp' and `string-equal'. | 1726 | counterparts `string-lessp' and `string-equal'. |
| 1727 | 1727 | ||
| 1728 | +++ | 1728 | +++ |
| 1729 | ** The new function `string-numeric-lessp' compares strings by | 1729 | ** The new function `string-version-lessp' compares strings by |
| 1730 | interpreting consecutive runs of numerical characters as numbers, and | 1730 | interpreting consecutive runs of numerical characters as numbers, and |
| 1731 | compares their numerical values. According to this predicate, | 1731 | compares their numerical values. According to this predicate, |
| 1732 | "foo2.png" is smaller than "foo12.png". | 1732 | "foo2.png" is smaller than "foo12.png". |
diff --git a/lib/filevercmp.c b/lib/filevercmp.c new file mode 100644 index 00000000000..a75c9468e31 --- /dev/null +++ b/lib/filevercmp.c | |||
| @@ -0,0 +1,181 @@ | |||
| 1 | /* | ||
| 2 | Copyright (C) 1995 Ian Jackson <iwj10@cus.cam.ac.uk> | ||
| 3 | Copyright (C) 2001 Anthony Towns <aj@azure.humbug.org.au> | ||
| 4 | Copyright (C) 2008-2016 Free Software Foundation, Inc. | ||
| 5 | |||
| 6 | This program is free software: you can redistribute it and/or modify | ||
| 7 | it under the terms of the GNU General Public License as published by | ||
| 8 | the Free Software Foundation, either version 3 of the License, or | ||
| 9 | (at your option) any later version. | ||
| 10 | |||
| 11 | This program is distributed in the hope that it will be useful, | ||
| 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 14 | GNU General Public License for more details. | ||
| 15 | |||
| 16 | You should have received a copy of the GNU General Public License | ||
| 17 | along with this program. If not, see <http://www.gnu.org/licenses/>. */ | ||
| 18 | |||
| 19 | #include <config.h> | ||
| 20 | #include "filevercmp.h" | ||
| 21 | |||
| 22 | #include <sys/types.h> | ||
| 23 | #include <stdlib.h> | ||
| 24 | #include <stdbool.h> | ||
| 25 | #include <string.h> | ||
| 26 | #include <c-ctype.h> | ||
| 27 | #include <limits.h> | ||
| 28 | |||
| 29 | /* Match a file suffix defined by this regular expression: | ||
| 30 | /(\.[A-Za-z~][A-Za-z0-9~]*)*$/ | ||
| 31 | Scan the string *STR and return a pointer to the matching suffix, or | ||
| 32 | NULL if not found. Upon return, *STR points to terminating NUL. */ | ||
| 33 | static const char * | ||
| 34 | match_suffix (const char **str) | ||
| 35 | { | ||
| 36 | const char *match = NULL; | ||
| 37 | bool read_alpha = false; | ||
| 38 | while (**str) | ||
| 39 | { | ||
| 40 | if (read_alpha) | ||
| 41 | { | ||
| 42 | read_alpha = false; | ||
| 43 | if (!c_isalpha (**str) && '~' != **str) | ||
| 44 | match = NULL; | ||
| 45 | } | ||
| 46 | else if ('.' == **str) | ||
| 47 | { | ||
| 48 | read_alpha = true; | ||
| 49 | if (!match) | ||
| 50 | match = *str; | ||
| 51 | } | ||
| 52 | else if (!c_isalnum (**str) && '~' != **str) | ||
| 53 | match = NULL; | ||
| 54 | (*str)++; | ||
| 55 | } | ||
| 56 | return match; | ||
| 57 | } | ||
| 58 | |||
| 59 | /* verrevcmp helper function */ | ||
| 60 | static int | ||
| 61 | order (unsigned char c) | ||
| 62 | { | ||
| 63 | if (c_isdigit (c)) | ||
| 64 | return 0; | ||
| 65 | else if (c_isalpha (c)) | ||
| 66 | return c; | ||
| 67 | else if (c == '~') | ||
| 68 | return -1; | ||
| 69 | else | ||
| 70 | return (int) c + UCHAR_MAX + 1; | ||
| 71 | } | ||
| 72 | |||
| 73 | /* slightly modified verrevcmp function from dpkg | ||
| 74 | S1, S2 - compared string | ||
| 75 | S1_LEN, S2_LEN - length of strings to be scanned | ||
| 76 | |||
| 77 | This implements the algorithm for comparison of version strings | ||
| 78 | specified by Debian and now widely adopted. The detailed | ||
| 79 | specification can be found in the Debian Policy Manual in the | ||
| 80 | section on the 'Version' control field. This version of the code | ||
| 81 | implements that from s5.6.12 of Debian Policy v3.8.0.1 | ||
| 82 | http://www.debian.org/doc/debian-policy/ch-controlfields.html#s-f-Version */ | ||
| 83 | static int _GL_ATTRIBUTE_PURE | ||
| 84 | verrevcmp (const char *s1, size_t s1_len, const char *s2, size_t s2_len) | ||
| 85 | { | ||
| 86 | size_t s1_pos = 0; | ||
| 87 | size_t s2_pos = 0; | ||
| 88 | while (s1_pos < s1_len || s2_pos < s2_len) | ||
| 89 | { | ||
| 90 | int first_diff = 0; | ||
| 91 | while ((s1_pos < s1_len && !c_isdigit (s1[s1_pos])) | ||
| 92 | || (s2_pos < s2_len && !c_isdigit (s2[s2_pos]))) | ||
| 93 | { | ||
| 94 | int s1_c = (s1_pos == s1_len) ? 0 : order (s1[s1_pos]); | ||
| 95 | int s2_c = (s2_pos == s2_len) ? 0 : order (s2[s2_pos]); | ||
| 96 | if (s1_c != s2_c) | ||
| 97 | return s1_c - s2_c; | ||
| 98 | s1_pos++; | ||
| 99 | s2_pos++; | ||
| 100 | } | ||
| 101 | while (s1[s1_pos] == '0') | ||
| 102 | s1_pos++; | ||
| 103 | while (s2[s2_pos] == '0') | ||
| 104 | s2_pos++; | ||
| 105 | while (c_isdigit (s1[s1_pos]) && c_isdigit (s2[s2_pos])) | ||
| 106 | { | ||
| 107 | if (!first_diff) | ||
| 108 | first_diff = s1[s1_pos] - s2[s2_pos]; | ||
| 109 | s1_pos++; | ||
| 110 | s2_pos++; | ||
| 111 | } | ||
| 112 | if (c_isdigit (s1[s1_pos])) | ||
| 113 | return 1; | ||
| 114 | if (c_isdigit (s2[s2_pos])) | ||
| 115 | return -1; | ||
| 116 | if (first_diff) | ||
| 117 | return first_diff; | ||
| 118 | } | ||
| 119 | return 0; | ||
| 120 | } | ||
| 121 | |||
| 122 | /* Compare version strings S1 and S2. | ||
| 123 | See filevercmp.h for function description. */ | ||
| 124 | int | ||
| 125 | filevercmp (const char *s1, const char *s2) | ||
| 126 | { | ||
| 127 | const char *s1_pos; | ||
| 128 | const char *s2_pos; | ||
| 129 | const char *s1_suffix, *s2_suffix; | ||
| 130 | size_t s1_len, s2_len; | ||
| 131 | int result; | ||
| 132 | |||
| 133 | /* easy comparison to see if strings are identical */ | ||
| 134 | int simple_cmp = strcmp (s1, s2); | ||
| 135 | if (simple_cmp == 0) | ||
| 136 | return 0; | ||
| 137 | |||
| 138 | /* special handle for "", "." and ".." */ | ||
| 139 | if (!*s1) | ||
| 140 | return -1; | ||
| 141 | if (!*s2) | ||
| 142 | return 1; | ||
| 143 | if (0 == strcmp (".", s1)) | ||
| 144 | return -1; | ||
| 145 | if (0 == strcmp (".", s2)) | ||
| 146 | return 1; | ||
| 147 | if (0 == strcmp ("..", s1)) | ||
| 148 | return -1; | ||
| 149 | if (0 == strcmp ("..", s2)) | ||
| 150 | return 1; | ||
| 151 | |||
| 152 | /* special handle for other hidden files */ | ||
| 153 | if (*s1 == '.' && *s2 != '.') | ||
| 154 | return -1; | ||
| 155 | if (*s1 != '.' && *s2 == '.') | ||
| 156 | return 1; | ||
| 157 | if (*s1 == '.' && *s2 == '.') | ||
| 158 | { | ||
| 159 | s1++; | ||
| 160 | s2++; | ||
| 161 | } | ||
| 162 | |||
| 163 | /* "cut" file suffixes */ | ||
| 164 | s1_pos = s1; | ||
| 165 | s2_pos = s2; | ||
| 166 | s1_suffix = match_suffix (&s1_pos); | ||
| 167 | s2_suffix = match_suffix (&s2_pos); | ||
| 168 | s1_len = (s1_suffix ? s1_suffix : s1_pos) - s1; | ||
| 169 | s2_len = (s2_suffix ? s2_suffix : s2_pos) - s2; | ||
| 170 | |||
| 171 | /* restore file suffixes if strings are identical after "cut" */ | ||
| 172 | if ((s1_suffix || s2_suffix) && (s1_len == s2_len) | ||
| 173 | && 0 == strncmp (s1, s2, s1_len)) | ||
| 174 | { | ||
| 175 | s1_len = s1_pos - s1; | ||
| 176 | s2_len = s2_pos - s2; | ||
| 177 | } | ||
| 178 | |||
| 179 | result = verrevcmp (s1, s1_len, s2, s2_len); | ||
| 180 | return result == 0 ? simple_cmp : result; | ||
| 181 | } | ||
diff --git a/lib/filevercmp.h b/lib/filevercmp.h new file mode 100644 index 00000000000..220b71b5790 --- /dev/null +++ b/lib/filevercmp.h | |||
| @@ -0,0 +1,42 @@ | |||
| 1 | /* | ||
| 2 | Copyright (C) 1995 Ian Jackson <iwj10@cus.cam.ac.uk> | ||
| 3 | Copyright (C) 2001 Anthony Towns <aj@azure.humbug.org.au> | ||
| 4 | Copyright (C) 2008-2016 Free Software Foundation, Inc. | ||
| 5 | |||
| 6 | This program is free software: you can redistribute it and/or modify | ||
| 7 | it under the terms of the GNU General Public License as published by | ||
| 8 | the Free Software Foundation, either version 3 of the License, or | ||
| 9 | (at your option) any later version. | ||
| 10 | |||
| 11 | This program is distributed in the hope that it will be useful, | ||
| 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 14 | GNU General Public License for more details. | ||
| 15 | |||
| 16 | You should have received a copy of the GNU General Public License | ||
| 17 | along with this program. If not, see <http://www.gnu.org/licenses/>. */ | ||
| 18 | |||
| 19 | #ifndef FILEVERCMP_H | ||
| 20 | #define FILEVERCMP_H | ||
| 21 | |||
| 22 | /* Compare version strings: | ||
| 23 | |||
| 24 | This function compares strings S1 and S2: | ||
| 25 | 1) By PREFIX in the same way as strcmp. | ||
| 26 | 2) Then by VERSION (most similarly to version compare of Debian's dpkg). | ||
| 27 | Leading zeros in version numbers are ignored. | ||
| 28 | 3) If both (PREFIX and VERSION) are equal, strcmp function is used for | ||
| 29 | comparison. So this function can return 0 if (and only if) strings S1 | ||
| 30 | and S2 are identical. | ||
| 31 | |||
| 32 | It returns number >0 for S1 > S2, 0 for S1 == S2 and number <0 for S1 < S2. | ||
| 33 | |||
| 34 | This function compares strings, in a way that if VER1 and VER2 are version | ||
| 35 | numbers and PREFIX and SUFFIX (SUFFIX defined as (\.[A-Za-z~][A-Za-z0-9~]*)*) | ||
| 36 | are strings then VER1 < VER2 implies filevercmp (PREFIX VER1 SUFFIX, | ||
| 37 | PREFIX VER2 SUFFIX) < 0. | ||
| 38 | |||
| 39 | This function is intended to be a replacement for strverscmp. */ | ||
| 40 | int filevercmp (const char *s1, const char *s2) _GL_ATTRIBUTE_PURE; | ||
| 41 | |||
| 42 | #endif /* FILEVERCMP_H */ | ||
diff --git a/lib/gnulib.mk b/lib/gnulib.mk index b1edd86f92c..cc8429658e1 100644 --- a/lib/gnulib.mk +++ b/lib/gnulib.mk | |||
| @@ -21,7 +21,7 @@ | |||
| 21 | # the same distribution terms as the rest of that program. | 21 | # the same distribution terms as the rest of that program. |
| 22 | # | 22 | # |
| 23 | # Generated by gnulib-tool. | 23 | # Generated by gnulib-tool. |
| 24 | # Reproduce by: gnulib-tool --import --lib=libgnu --source-base=lib --m4-base=m4 --doc-base=doc --tests-base=tests --aux-dir=build-aux --avoid=close --avoid=dup --avoid=fchdir --avoid=flexmember --avoid=fstat --avoid=malloc-posix --avoid=msvc-inval --avoid=msvc-nothrow --avoid=open --avoid=openat-die --avoid=opendir --avoid=raise --avoid=save-cwd --avoid=select --avoid=setenv --avoid=sigprocmask --avoid=stdarg --avoid=stdbool --avoid=threadlib --avoid=unsetenv --makefile-name=gnulib.mk --conditional-dependencies --no-libtool --macro-prefix=gl --no-vc-files alloca-opt binary-io byteswap c-ctype c-strcase careadlinkat close-stream count-one-bits count-trailing-zeros crypto/md5 crypto/sha1 crypto/sha256 crypto/sha512 dtoastr dtotimespec dup2 environ execinfo faccessat fcntl fcntl-h fdatasync fdopendir filemode fstatat fsync getloadavg getopt-gnu gettime gettimeofday gitlog-to-changelog ignore-value intprops largefile lstat manywarnings memrchr mkostemp mktime pipe2 pselect pthread_sigmask putenv qcopy-acl readlink readlinkat sig2str socklen stat-time std-gnu11 stdalign stddef stdio stpcpy strftime strtoimax strtoumax symlink sys_stat sys_time time time_r time_rz timegm timer-time timespec-add timespec-sub unsetenv update-copyright utimens vla warnings | 24 | # Reproduce by: gnulib-tool --import --lib=libgnu --source-base=lib --m4-base=m4 --doc-base=doc --tests-base=tests --aux-dir=build-aux --avoid=close --avoid=dup --avoid=fchdir --avoid=flexmember --avoid=fstat --avoid=malloc-posix --avoid=msvc-inval --avoid=msvc-nothrow --avoid=open --avoid=openat-die --avoid=opendir --avoid=raise --avoid=save-cwd --avoid=select --avoid=setenv --avoid=sigprocmask --avoid=stdarg --avoid=stdbool --avoid=threadlib --avoid=unsetenv --makefile-name=gnulib.mk --conditional-dependencies --no-libtool --macro-prefix=gl --no-vc-files alloca-opt binary-io byteswap c-ctype c-strcase careadlinkat close-stream count-one-bits count-trailing-zeros crypto/md5 crypto/sha1 crypto/sha256 crypto/sha512 dtoastr dtotimespec dup2 environ execinfo faccessat fcntl fcntl-h fdatasync fdopendir filemode filevercmp fstatat fsync getloadavg getopt-gnu gettime gettimeofday gitlog-to-changelog ignore-value intprops largefile lstat manywarnings memrchr mkostemp mktime pipe2 pselect pthread_sigmask putenv qcopy-acl readlink readlinkat sig2str socklen stat-time std-gnu11 stdalign stddef stdio stpcpy strftime strtoimax strtoumax symlink sys_stat sys_time time time_r time_rz timegm timer-time timespec-add timespec-sub unsetenv update-copyright utimens vla warnings |
| 25 | 25 | ||
| 26 | 26 | ||
| 27 | MOSTLYCLEANFILES += core *.stackdump | 27 | MOSTLYCLEANFILES += core *.stackdump |
| @@ -441,6 +441,14 @@ EXTRA_DIST += filemode.h | |||
| 441 | 441 | ||
| 442 | ## end gnulib module filemode | 442 | ## end gnulib module filemode |
| 443 | 443 | ||
| 444 | ## begin gnulib module filevercmp | ||
| 445 | |||
| 446 | libgnu_a_SOURCES += filevercmp.c | ||
| 447 | |||
| 448 | EXTRA_DIST += filevercmp.h | ||
| 449 | |||
| 450 | ## end gnulib module filevercmp | ||
| 451 | |||
| 444 | ## begin gnulib module fpending | 452 | ## begin gnulib module fpending |
| 445 | 453 | ||
| 446 | 454 | ||
diff --git a/m4/gnulib-comp.m4 b/m4/gnulib-comp.m4 index 831bb4c4f4b..5a3fc98597c 100644 --- a/m4/gnulib-comp.m4 +++ b/m4/gnulib-comp.m4 | |||
| @@ -78,6 +78,7 @@ AC_DEFUN([gl_EARLY], | |||
| 78 | # Code from module fdatasync: | 78 | # Code from module fdatasync: |
| 79 | # Code from module fdopendir: | 79 | # Code from module fdopendir: |
| 80 | # Code from module filemode: | 80 | # Code from module filemode: |
| 81 | # Code from module filevercmp: | ||
| 81 | # Code from module fpending: | 82 | # Code from module fpending: |
| 82 | # Code from module fstatat: | 83 | # Code from module fstatat: |
| 83 | # Code from module fsync: | 84 | # Code from module fsync: |
| @@ -889,6 +890,8 @@ AC_DEFUN([gl_FILE_LIST], [ | |||
| 889 | lib/fdopendir.c | 890 | lib/fdopendir.c |
| 890 | lib/filemode.c | 891 | lib/filemode.c |
| 891 | lib/filemode.h | 892 | lib/filemode.h |
| 893 | lib/filevercmp.c | ||
| 894 | lib/filevercmp.h | ||
| 892 | lib/fpending.c | 895 | lib/fpending.c |
| 893 | lib/fpending.h | 896 | lib/fpending.h |
| 894 | lib/fstatat.c | 897 | lib/fstatat.c |
| @@ -21,6 +21,7 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */ | |||
| 21 | #include <config.h> | 21 | #include <config.h> |
| 22 | 22 | ||
| 23 | #include <unistd.h> | 23 | #include <unistd.h> |
| 24 | #include <filevercmp.h> | ||
| 24 | #include <intprops.h> | 25 | #include <intprops.h> |
| 25 | #include <vla.h> | 26 | #include <vla.h> |
| 26 | #include <errno.h> | 27 | #include <errno.h> |
| @@ -332,50 +333,21 @@ Symbols are also allowed; their print names are used instead. */) | |||
| 332 | return i1 < SCHARS (string2) ? Qt : Qnil; | 333 | return i1 < SCHARS (string2) ? Qt : Qnil; |
| 333 | } | 334 | } |
| 334 | 335 | ||
| 335 | /* Return the numerical value of a consecutive run of numerical | 336 | DEFUN ("string-version-lessp", Fstring_version_lessp, |
| 336 | characters from STRING. The ISP and ISP_BYTE address pointer | 337 | Sstring_version_lessp, 2, 2, 0, |
| 337 | pointers are increased and left at the next character after the | 338 | doc: /* Return non-nil if S1 is less than S2, as version strings. |
| 338 | numerical characters. */ | 339 | |
| 339 | static size_t | 340 | This function compares version strings S1 and S2: |
| 340 | gather_number_from_string (Lisp_Object string, | 341 | 1) By prefix lexicographically. |
| 341 | ptrdiff_t *isp, ptrdiff_t *isp_byte) | 342 | 2) Then by version (similarly to version comparison of Debian's dpkg). |
| 342 | { | 343 | Leading zeros in version numbers are ignored. |
| 343 | size_t number = 0; | 344 | 3) If both prefix and version are equal, compare as ordinary strings. |
| 344 | char *s = SSDATA (string); | ||
| 345 | char *end; | ||
| 346 | |||
| 347 | errno = 0; | ||
| 348 | number = strtoumax (s + *isp_byte, &end, 10); | ||
| 349 | if (errno == ERANGE) | ||
| 350 | /* If we have an integer overflow, then we fall back on lexical | ||
| 351 | comparison. */ | ||
| 352 | return -1; | ||
| 353 | else | ||
| 354 | { | ||
| 355 | size_t diff = end - (s + *isp_byte); | ||
| 356 | (*isp) += diff; | ||
| 357 | (*isp_byte) += diff; | ||
| 358 | return number; | ||
| 359 | } | ||
| 360 | } | ||
| 361 | 345 | ||
| 362 | DEFUN ("string-numeric-lessp", Fstring_numeric_lessp, | 346 | For example, \"foo2.png\" compares less than \"foo12.png\". |
| 363 | Sstring_numeric_lessp, 2, 2, 0, | ||
| 364 | doc: /* Return non-nil if STRING1 is less than STRING2 in 'numeric' order. | ||
| 365 | Sequences of non-numerical characters are compared lexicographically, | ||
| 366 | while sequences of numerical characters are converted into numbers, | ||
| 367 | and then the numbers are compared. This means that \"foo2.png\" is | ||
| 368 | less than \"foo12.png\" according to this predicate. | ||
| 369 | Case is significant. | 347 | Case is significant. |
| 370 | Symbols are also allowed; their print names are used instead. */) | 348 | Symbols are also allowed; their print names are used instead. */) |
| 371 | (register Lisp_Object string1, Lisp_Object string2) | 349 | (Lisp_Object string1, Lisp_Object string2) |
| 372 | { | 350 | { |
| 373 | ptrdiff_t end; | ||
| 374 | ptrdiff_t i1, i1_byte, i2, i2_byte; | ||
| 375 | size_t num1, num2; | ||
| 376 | unsigned char *chp; | ||
| 377 | int chlen1, chlen2; | ||
| 378 | |||
| 379 | if (SYMBOLP (string1)) | 351 | if (SYMBOLP (string1)) |
| 380 | string1 = SYMBOL_NAME (string1); | 352 | string1 = SYMBOL_NAME (string1); |
| 381 | if (SYMBOLP (string2)) | 353 | if (SYMBOLP (string2)) |
| @@ -383,67 +355,26 @@ Symbols are also allowed; their print names are used instead. */) | |||
| 383 | CHECK_STRING (string1); | 355 | CHECK_STRING (string1); |
| 384 | CHECK_STRING (string2); | 356 | CHECK_STRING (string2); |
| 385 | 357 | ||
| 386 | i1 = i1_byte = i2 = i2_byte = 0; | 358 | char *p1 = SSDATA (string1); |
| 359 | char *p2 = SSDATA (string2); | ||
| 360 | char *lim1 = p1 + SBYTES (string1); | ||
| 361 | char *lim2 = p2 + SBYTES (string2); | ||
| 362 | int cmp; | ||
| 387 | 363 | ||
| 388 | end = SCHARS (string1); | 364 | while ((cmp = filevercmp (p1, p2)) == 0) |
| 389 | if (end > SCHARS (string2)) | ||
| 390 | end = SCHARS (string2); | ||
| 391 | |||
| 392 | while (i1 < end) | ||
| 393 | { | 365 | { |
| 394 | /* When we find a mismatch, we must compare the | 366 | /* If the strings are identical through their first null bytes, |
| 395 | characters, not just the bytes. */ | 367 | skip past identical prefixes and try again. */ |
| 396 | int c1, c2; | 368 | ptrdiff_t size = strlen (p1) + 1; |
| 397 | 369 | p1 += size; | |
| 398 | if (STRING_MULTIBYTE (string1)) | 370 | p2 += size; |
| 399 | { | 371 | if (lim1 < p1) |
| 400 | chp = &SDATA (string1)[i1_byte]; | 372 | return lim2 < p2 ? Qnil : Qt; |
| 401 | c1 = STRING_CHAR_AND_LENGTH (chp, chlen1); | 373 | if (lim2 < p2) |
| 402 | } | 374 | return Qnil; |
| 403 | else | ||
| 404 | { | ||
| 405 | c1 = SREF (string1, i1_byte); | ||
| 406 | chlen1 = 1; | ||
| 407 | } | ||
| 408 | |||
| 409 | if (STRING_MULTIBYTE (string2)) | ||
| 410 | { | ||
| 411 | chp = &SDATA (string1)[i2_byte]; | ||
| 412 | c2 = STRING_CHAR_AND_LENGTH (chp, chlen2); | ||
| 413 | } | ||
| 414 | else | ||
| 415 | { | ||
| 416 | c2 = SREF (string2, i2_byte); | ||
| 417 | chlen2 = 1; | ||
| 418 | } | ||
| 419 | |||
| 420 | if (c1 >= '0' && c1 <= '9' && | ||
| 421 | c2 >= '0' && c2 <= '9') | ||
| 422 | /* Both strings are numbers, so compare them. */ | ||
| 423 | { | ||
| 424 | num1 = gather_number_from_string (string1, &i1, &i1_byte); | ||
| 425 | num2 = gather_number_from_string (string2, &i2, &i2_byte); | ||
| 426 | /* If we have an integer overflow, then resort to sorting | ||
| 427 | the entire string lexicographically. */ | ||
| 428 | if (num1 == -1 || num2 == -1) | ||
| 429 | return Fstring_lessp (string1, string2); | ||
| 430 | else if (num1 < num2) | ||
| 431 | return Qt; | ||
| 432 | else if (num1 > num2) | ||
| 433 | return Qnil; | ||
| 434 | } | ||
| 435 | else | ||
| 436 | { | ||
| 437 | if (c1 != c2) | ||
| 438 | return c1 < c2 ? Qt : Qnil; | ||
| 439 | |||
| 440 | i1++; | ||
| 441 | i2++; | ||
| 442 | i1_byte += chlen1; | ||
| 443 | i2_byte += chlen2; | ||
| 444 | } | ||
| 445 | } | 375 | } |
| 446 | return i1 < SCHARS (string2) ? Qt : Qnil; | 376 | |
| 377 | return cmp < 0 ? Qt : Qnil; | ||
| 447 | } | 378 | } |
| 448 | 379 | ||
| 449 | DEFUN ("string-collate-lessp", Fstring_collate_lessp, Sstring_collate_lessp, 2, 4, 0, | 380 | DEFUN ("string-collate-lessp", Fstring_collate_lessp, Sstring_collate_lessp, 2, 4, 0, |
| @@ -5164,7 +5095,7 @@ this variable. */); | |||
| 5164 | defsubr (&Sstring_equal); | 5095 | defsubr (&Sstring_equal); |
| 5165 | defsubr (&Scompare_strings); | 5096 | defsubr (&Scompare_strings); |
| 5166 | defsubr (&Sstring_lessp); | 5097 | defsubr (&Sstring_lessp); |
| 5167 | defsubr (&Sstring_numeric_lessp); | 5098 | defsubr (&Sstring_version_lessp); |
| 5168 | defsubr (&Sstring_collate_lessp); | 5099 | defsubr (&Sstring_collate_lessp); |
| 5169 | defsubr (&Sstring_collate_equalp); | 5100 | defsubr (&Sstring_collate_equalp); |
| 5170 | defsubr (&Sappend); | 5101 | defsubr (&Sappend); |
diff --git a/test/src/fns-tests.el b/test/src/fns-tests.el index 0c6edb89252..861736995f4 100644 --- a/test/src/fns-tests.el +++ b/test/src/fns-tests.el | |||
| @@ -192,19 +192,19 @@ | |||
| 192 | a b (if (eq system-type 'windows-nt) "enu_USA" "en_US.UTF-8"))))) | 192 | a b (if (eq system-type 'windows-nt) "enu_USA" "en_US.UTF-8"))))) |
| 193 | '("Adrian" "Ævar" "Agustín" "Eli")))) | 193 | '("Adrian" "Ævar" "Agustín" "Eli")))) |
| 194 | 194 | ||
| 195 | (ert-deftest fns-tests-string-numeric-lessp () | 195 | (ert-deftest fns-tests-string-version-lessp () |
| 196 | (should (string-numeric-lessp "foo2.png" "foo12.png")) | 196 | (should (string-version-lessp "foo2.png" "foo12.png")) |
| 197 | (should (not (string-numeric-lessp "foo12.png" "foo2.png"))) | 197 | (should (not (string-version-lessp "foo12.png" "foo2.png"))) |
| 198 | (should (string-numeric-lessp "foo12.png" "foo20000.png")) | 198 | (should (string-version-lessp "foo12.png" "foo20000.png")) |
| 199 | (should (not (string-numeric-lessp "foo20000.png" "foo12.png"))) | 199 | (should (not (string-version-lessp "foo20000.png" "foo12.png"))) |
| 200 | (should (string-numeric-lessp "foo.png" "foo2.png")) | 200 | (should (string-version-lessp "foo.png" "foo2.png")) |
| 201 | (should (not (string-numeric-lessp "foo2.png" "foo.png"))) | 201 | (should (not (string-version-lessp "foo2.png" "foo.png"))) |
| 202 | (should (equal (sort '("foo12.png" "foo2.png" "foo1.png") | 202 | (should (equal (sort '("foo12.png" "foo2.png" "foo1.png") |
| 203 | 'string-numeric-lessp) | 203 | 'string-version-lessp) |
| 204 | '("foo1.png" "foo2.png" "foo12.png"))) | 204 | '("foo1.png" "foo2.png" "foo12.png"))) |
| 205 | (should (string-numeric-lessp "foo2" "foo1234")) | 205 | (should (string-version-lessp "foo2" "foo1234")) |
| 206 | (should (not (string-numeric-lessp "foo1234" "foo2"))) | 206 | (should (not (string-version-lessp "foo1234" "foo2"))) |
| 207 | (should (string-numeric-lessp "foo.png" "foo2")) | 207 | (should (string-version-lessp "foo.png" "foo2")) |
| 208 | (should (string-numeric-lessp "foo1.25.5.png" "foo1.125.5")) | 208 | (should (string-version-lessp "foo1.25.5.png" "foo1.125.5")) |
| 209 | (should (string-numeric-lessp "2" "1245")) | 209 | (should (string-version-lessp "2" "1245")) |
| 210 | (should (not (string-numeric-lessp "1245" "2")))) | 210 | (should (not (string-version-lessp "1245" "2")))) |