aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorEli Zaretskii2014-08-29 22:18:06 +0300
committerEli Zaretskii2014-08-29 22:18:06 +0300
commit21ba51de76390907ca86b1e7715f472dd740fbc3 (patch)
tree0b3418f0563a5da979cacf6894120840b56b8456 /src
parent2ae366c73e27dc695b6bc1cd03d93f48b3db76d4 (diff)
downloademacs-21ba51de76390907ca86b1e7715f472dd740fbc3.tar.gz
emacs-21ba51de76390907ca86b1e7715f472dd740fbc3.zip
Implement case-insensitive and Unicode-compliant collation on MS-Windows.
src/fns.c (Fstring_collate_lessp, Fstring_collate_equalp): Doc fix. src/w32proc.c (w32_compare_strings): Accept additional argument IGNORE_CASE. Set up the flags for CompareStringW to ignore case if requested. If w32-collate-ignore-punctuation is non-nil, add NORM_IGNORESYMBOLS to the flags. (LINGUISTIC_IGNORECASE): Define if not already defined. (syms_of_ntproc) <Vw32_collate_ignore_punctuation>: New variable. src/sysdep.c (str_collate) [WINDOWSNT]: Adapt to the interface change. src/w32.h: Adjust prototype of w32_compare_strings. etc/NEWS: Mention w32-collate-ignore-punctuation. Fixes: debbugs:18051
Diffstat (limited to 'src')
-rw-r--r--src/ChangeLog14
-rw-r--r--src/fns.c23
-rw-r--r--src/sysdep.c2
-rw-r--r--src/w32.h2
-rw-r--r--src/w32proc.c42
5 files changed, 69 insertions, 14 deletions
diff --git a/src/ChangeLog b/src/ChangeLog
index 66588bc3e67..181a43d058f 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,17 @@
12014-08-29 Eli Zaretskii <eliz@gnu.org>
2
3 * fns.c (Fstring_collate_lessp, Fstring_collate_equalp): Doc fix.
4
5 * w32proc.c (w32_compare_strings): Accept additional argument
6 IGNORE_CASE. Set up the flags for CompareStringW to ignore case
7 if requested. If w32-collate-ignore-punctuation is non-nil, add
8 NORM_IGNORESYMBOLS to the flags.
9 (LINGUISTIC_IGNORECASE): Define if not already defined.
10 (syms_of_ntproc) <Vw32_collate_ignore_punctuation>: New variable.
11
12 * sysdep.c (str_collate) [WINDOWSNT]: Adapt to the interface
13 change.
14
12014-08-29 Michael Albinus <michael.albinus@gmx.de> 152014-08-29 Michael Albinus <michael.albinus@gmx.de>
2 16
3 * sysdep.c (LC_CTYPE, LC_CTYPE_MASK, towlower_l): 17 * sysdep.c (LC_CTYPE, LC_CTYPE_MASK, towlower_l):
diff --git a/src/fns.c b/src/fns.c
index 3cca40df50f..f838599230b 100644
--- a/src/fns.c
+++ b/src/fns.c
@@ -350,7 +350,7 @@ Symbols are also allowed; their print names are used instead.
350 350
351This function obeys the conventions for collation order in your 351This function obeys the conventions for collation order in your
352locale settings. For example, punctuation and whitespace characters 352locale settings. For example, punctuation and whitespace characters
353are considered less significant for sorting: 353might be considered less significant for sorting:
354 354
355\(sort '\("11" "12" "1 1" "1 2" "1.1" "1.2") 'string-collate-lessp) 355\(sort '\("11" "12" "1 1" "1 2" "1.1" "1.2") 'string-collate-lessp)
356 => \("11" "1 1" "1.1" "12" "1 2" "1.2") 356 => \("11" "1 1" "1.1" "12" "1 2" "1.2")
@@ -358,11 +358,15 @@ are considered less significant for sorting:
358The optional argument LOCALE, a string, overrides the setting of your 358The optional argument LOCALE, a string, overrides the setting of your
359current locale identifier for collation. The value is system 359current locale identifier for collation. The value is system
360dependent; a LOCALE \"en_US.UTF-8\" is applicable on POSIX systems, 360dependent; a LOCALE \"en_US.UTF-8\" is applicable on POSIX systems,
361while it would be \"English_USA.1252\" on MS Windows systems. 361while it would be, e.g., \"enu_USA.1252\" on MS-Windows systems.
362 362
363If IGNORE-CASE is non-nil, characters are converted to lower-case 363If IGNORE-CASE is non-nil, characters are converted to lower-case
364before comparing them. 364before comparing them.
365 365
366To emulate Unicode-compliant collation on MS-Windows systems,
367bind `w32-collate-ignore-punctuation' to a non-nil value, since
368the codeset part of the locale cannot be \"UTF-8\" on MS-Windows.
369
366If your system does not support a locale environment, this function 370If your system does not support a locale environment, this function
367behaves like `string-lessp'. */) 371behaves like `string-lessp'. */)
368 (Lisp_Object s1, Lisp_Object s2, Lisp_Object locale, Lisp_Object ignore_case) 372 (Lisp_Object s1, Lisp_Object s2, Lisp_Object locale, Lisp_Object ignore_case)
@@ -391,8 +395,8 @@ Symbols are also allowed; their print names are used instead.
391 395
392This function obeys the conventions for collation order in your locale 396This function obeys the conventions for collation order in your locale
393settings. For example, characters with different coding points but 397settings. For example, characters with different coding points but
394the same meaning are considered as equal, like different grave accent 398the same meaning might be considered as equal, like different grave
395unicode characters: 399accent Unicode characters:
396 400
397\(string-collate-equalp \(string ?\\uFF40) \(string ?\\u1FEF)) 401\(string-collate-equalp \(string ?\\uFF40) \(string ?\\u1FEF))
398 => t 402 => t
@@ -400,13 +404,20 @@ unicode characters:
400The optional argument LOCALE, a string, overrides the setting of your 404The optional argument LOCALE, a string, overrides the setting of your
401current locale identifier for collation. The value is system 405current locale identifier for collation. The value is system
402dependent; a LOCALE \"en_US.UTF-8\" is applicable on POSIX systems, 406dependent; a LOCALE \"en_US.UTF-8\" is applicable on POSIX systems,
403while it would be \"English_USA.1252\" on MS Windows systems. 407while it would be \"enu_USA.1252\" on MS Windows systems.
404 408
405If IGNORE-CASE is non-nil, characters are converted to lower-case 409If IGNORE-CASE is non-nil, characters are converted to lower-case
406before comparing them. 410before comparing them.
407 411
412To emulate Unicode-compliant collation on MS-Windows systems,
413bind `w32-collate-ignore-punctuation' to a non-nil value, since
414the codeset part of the locale cannot be \"UTF-8\" on MS-Windows.
415
408If your system does not support a locale environment, this function 416If your system does not support a locale environment, this function
409behaves like `string-equal'. */) 417behaves like `string-equal'.
418
419Do NOT use this function to compare file names for equality, only
420for sorting them. */)
410 (Lisp_Object s1, Lisp_Object s2, Lisp_Object locale, Lisp_Object ignore_case) 421 (Lisp_Object s1, Lisp_Object s2, Lisp_Object locale, Lisp_Object ignore_case)
411{ 422{
412#if defined __STDC_ISO_10646__ || defined WINDOWSNT 423#if defined __STDC_ISO_10646__ || defined WINDOWSNT
diff --git a/src/sysdep.c b/src/sysdep.c
index 7993a59e721..52a72385f46 100644
--- a/src/sysdep.c
+++ b/src/sysdep.c
@@ -3796,6 +3796,6 @@ str_collate (Lisp_Object s1, Lisp_Object s2,
3796 3796
3797 char *loc = STRINGP (locale) ? SSDATA (locale) : NULL; 3797 char *loc = STRINGP (locale) ? SSDATA (locale) : NULL;
3798 3798
3799 return w32_compare_strings (SDATA (s1), SDATA (s2), loc); 3799 return w32_compare_strings (SDATA (s1), SDATA (s2), loc, !NILP (ignore_case));
3800} 3800}
3801#endif /* WINDOWSNT */ 3801#endif /* WINDOWSNT */
diff --git a/src/w32.h b/src/w32.h
index 68ee14c70e3..2cc179a0c36 100644
--- a/src/w32.h
+++ b/src/w32.h
@@ -211,7 +211,7 @@ extern int w32_memory_info (unsigned long long *, unsigned long long *,
211 unsigned long long *, unsigned long long *); 211 unsigned long long *, unsigned long long *);
212 212
213/* Compare 2 UTF-8 strings in locale-dependent fashion. */ 213/* Compare 2 UTF-8 strings in locale-dependent fashion. */
214extern int w32_compare_strings (const char *, const char *, char *); 214extern int w32_compare_strings (const char *, const char *, char *, int);
215 215
216#ifdef HAVE_GNUTLS 216#ifdef HAVE_GNUTLS
217#include <gnutls/gnutls.h> 217#include <gnutls/gnutls.h>
diff --git a/src/w32proc.c b/src/w32proc.c
index ed62de02433..0b441d45186 100644
--- a/src/w32proc.c
+++ b/src/w32proc.c
@@ -3213,15 +3213,20 @@ get_lcid (const char *locale_name)
3213#ifndef _NSLCMPERROR 3213#ifndef _NSLCMPERROR
3214# define _NSLCMPERROR INT_MAX 3214# define _NSLCMPERROR INT_MAX
3215#endif 3215#endif
3216#ifndef LINGUISTIC_IGNORECASE
3217# define LINGUISTIC_IGNORECASE 0x00000010
3218#endif
3216 3219
3217int 3220int
3218w32_compare_strings (const char *s1, const char *s2, char *locname) 3221w32_compare_strings (const char *s1, const char *s2, char *locname,
3222 int ignore_case)
3219{ 3223{
3220 LCID lcid = GetThreadLocale (); 3224 LCID lcid = GetThreadLocale ();
3221 wchar_t *string1_w, *string2_w; 3225 wchar_t *string1_w, *string2_w;
3222 int val, needed; 3226 int val, needed;
3223 extern BOOL g_b_init_compare_string_w; 3227 extern BOOL g_b_init_compare_string_w;
3224 static int (WINAPI *pCompareStringW)(LCID, DWORD, LPCWSTR, int, LPCWSTR, int); 3228 static int (WINAPI *pCompareStringW)(LCID, DWORD, LPCWSTR, int, LPCWSTR, int);
3229 DWORD flags = 0;
3225 3230
3226 USE_SAFE_ALLOCA; 3231 USE_SAFE_ALLOCA;
3227 3232
@@ -3284,11 +3289,22 @@ w32_compare_strings (const char *s1, const char *s2, char *locname)
3284 lcid = new_lcid; 3289 lcid = new_lcid;
3285 } 3290 }
3286 3291
3287 /* FIXME: Need a way to control the FLAGS argument, perhaps via the 3292 if (ignore_case)
3288 CODESET part of LOCNAME. In particular, ls-lisp will want 3293 {
3289 NORM_IGNORESYMBOLS and sometimes LINGUISTIC_IGNORECASE or 3294 /* NORM_IGNORECASE ignores any tertiary distinction, not just
3290 NORM_IGNORECASE. */ 3295 case variants. LINGUISTIC_IGNORECASE is more selective, and
3291 val = pCompareStringW (lcid, 0, string1_w, -1, string2_w, -1); 3296 is sensitive to the locale's language, but it is not
3297 available before Vista. */
3298 if (w32_major_version >= 6)
3299 flags |= LINGUISTIC_IGNORECASE;
3300 else
3301 flags |= NORM_IGNORECASE;
3302 }
3303 /* This approximates what glibc collation functions do when the
3304 locale's codeset is UTF-8. */
3305 if (!NILP (Vw32_collate_ignore_punctuation))
3306 flags |= NORM_IGNORESYMBOLS;
3307 val = pCompareStringW (lcid, flags, string1_w, -1, string2_w, -1);
3292 SAFE_FREE (); 3308 SAFE_FREE ();
3293 if (!val) 3309 if (!val)
3294 { 3310 {
@@ -3408,6 +3424,20 @@ Any other non-nil value means do this even on remote and removable drives
3408where the performance impact may be noticeable even on modern hardware. */); 3424where the performance impact may be noticeable even on modern hardware. */);
3409 Vw32_get_true_file_attributes = Qlocal; 3425 Vw32_get_true_file_attributes = Qlocal;
3410 3426
3427 DEFVAR_LISP ("w32-collate-ignore-punctuation",
3428 Vw32_collate_ignore_punctuation,
3429 doc: /* Non-nil causes string collation functions ignore punctuation on MS-Windows.
3430On Posix platforms, `string-collate-lessp' and `string-collate-equalp'
3431ignore punctuation characters when they compare strings, if the
3432locale's codeset is UTF-8, as in \"en_US.UTF-8\". Binding this option
3433to a non-nil value will achieve a similar effect on MS-Windows, where
3434locales with UTF-8 codeset are not supported.
3435
3436Note that setting this to non-nil will also ignore blanks and symbols
3437in the strings. So do NOT use this option when comparing file names
3438for equality, only when you need to sort them. */);
3439 Vw32_collate_ignore_punctuation = Qnil;
3440
3411 staticpro (&Vw32_valid_locale_ids); 3441 staticpro (&Vw32_valid_locale_ids);
3412 staticpro (&Vw32_valid_codepages); 3442 staticpro (&Vw32_valid_codepages);
3413} 3443}