diff options
| author | Eli Zaretskii | 2014-08-29 22:18:06 +0300 |
|---|---|---|
| committer | Eli Zaretskii | 2014-08-29 22:18:06 +0300 |
| commit | 21ba51de76390907ca86b1e7715f472dd740fbc3 (patch) | |
| tree | 0b3418f0563a5da979cacf6894120840b56b8456 /src | |
| parent | 2ae366c73e27dc695b6bc1cd03d93f48b3db76d4 (diff) | |
| download | emacs-21ba51de76390907ca86b1e7715f472dd740fbc3.tar.gz emacs-21ba51de76390907ca86b1e7715f472dd740fbc3.zip | |
Implement case-insensitive and Unicode-compliant collation on MS-Windows.
src/fns.c (Fstring_collate_lessp, Fstring_collate_equalp): Doc fix.
src/w32proc.c (w32_compare_strings): Accept additional argument
IGNORE_CASE. Set up the flags for CompareStringW to ignore case
if requested. If w32-collate-ignore-punctuation is non-nil, add
NORM_IGNORESYMBOLS to the flags.
(LINGUISTIC_IGNORECASE): Define if not already defined.
(syms_of_ntproc) <Vw32_collate_ignore_punctuation>: New variable.
src/sysdep.c (str_collate) [WINDOWSNT]: Adapt to the interface
change.
src/w32.h: Adjust prototype of w32_compare_strings.
etc/NEWS: Mention w32-collate-ignore-punctuation.
Fixes: debbugs:18051
Diffstat (limited to 'src')
| -rw-r--r-- | src/ChangeLog | 14 | ||||
| -rw-r--r-- | src/fns.c | 23 | ||||
| -rw-r--r-- | src/sysdep.c | 2 | ||||
| -rw-r--r-- | src/w32.h | 2 | ||||
| -rw-r--r-- | src/w32proc.c | 42 |
5 files changed, 69 insertions, 14 deletions
diff --git a/src/ChangeLog b/src/ChangeLog index 66588bc3e67..181a43d058f 100644 --- a/src/ChangeLog +++ b/src/ChangeLog | |||
| @@ -1,3 +1,17 @@ | |||
| 1 | 2014-08-29 Eli Zaretskii <eliz@gnu.org> | ||
| 2 | |||
| 3 | * fns.c (Fstring_collate_lessp, Fstring_collate_equalp): Doc fix. | ||
| 4 | |||
| 5 | * w32proc.c (w32_compare_strings): Accept additional argument | ||
| 6 | IGNORE_CASE. Set up the flags for CompareStringW to ignore case | ||
| 7 | if requested. If w32-collate-ignore-punctuation is non-nil, add | ||
| 8 | NORM_IGNORESYMBOLS to the flags. | ||
| 9 | (LINGUISTIC_IGNORECASE): Define if not already defined. | ||
| 10 | (syms_of_ntproc) <Vw32_collate_ignore_punctuation>: New variable. | ||
| 11 | |||
| 12 | * sysdep.c (str_collate) [WINDOWSNT]: Adapt to the interface | ||
| 13 | change. | ||
| 14 | |||
| 1 | 2014-08-29 Michael Albinus <michael.albinus@gmx.de> | 15 | 2014-08-29 Michael Albinus <michael.albinus@gmx.de> |
| 2 | 16 | ||
| 3 | * sysdep.c (LC_CTYPE, LC_CTYPE_MASK, towlower_l): | 17 | * sysdep.c (LC_CTYPE, LC_CTYPE_MASK, towlower_l): |
| @@ -350,7 +350,7 @@ Symbols are also allowed; their print names are used instead. | |||
| 350 | 350 | ||
| 351 | This function obeys the conventions for collation order in your | 351 | This function obeys the conventions for collation order in your |
| 352 | locale settings. For example, punctuation and whitespace characters | 352 | locale settings. For example, punctuation and whitespace characters |
| 353 | are considered less significant for sorting: | 353 | might be considered less significant for sorting: |
| 354 | 354 | ||
| 355 | \(sort '\("11" "12" "1 1" "1 2" "1.1" "1.2") 'string-collate-lessp) | 355 | \(sort '\("11" "12" "1 1" "1 2" "1.1" "1.2") 'string-collate-lessp) |
| 356 | => \("11" "1 1" "1.1" "12" "1 2" "1.2") | 356 | => \("11" "1 1" "1.1" "12" "1 2" "1.2") |
| @@ -358,11 +358,15 @@ are considered less significant for sorting: | |||
| 358 | The optional argument LOCALE, a string, overrides the setting of your | 358 | The optional argument LOCALE, a string, overrides the setting of your |
| 359 | current locale identifier for collation. The value is system | 359 | current locale identifier for collation. The value is system |
| 360 | dependent; a LOCALE \"en_US.UTF-8\" is applicable on POSIX systems, | 360 | dependent; a LOCALE \"en_US.UTF-8\" is applicable on POSIX systems, |
| 361 | while it would be \"English_USA.1252\" on MS Windows systems. | 361 | while it would be, e.g., \"enu_USA.1252\" on MS-Windows systems. |
| 362 | 362 | ||
| 363 | If IGNORE-CASE is non-nil, characters are converted to lower-case | 363 | If IGNORE-CASE is non-nil, characters are converted to lower-case |
| 364 | before comparing them. | 364 | before comparing them. |
| 365 | 365 | ||
| 366 | To emulate Unicode-compliant collation on MS-Windows systems, | ||
| 367 | bind `w32-collate-ignore-punctuation' to a non-nil value, since | ||
| 368 | the codeset part of the locale cannot be \"UTF-8\" on MS-Windows. | ||
| 369 | |||
| 366 | If your system does not support a locale environment, this function | 370 | If your system does not support a locale environment, this function |
| 367 | behaves like `string-lessp'. */) | 371 | behaves like `string-lessp'. */) |
| 368 | (Lisp_Object s1, Lisp_Object s2, Lisp_Object locale, Lisp_Object ignore_case) | 372 | (Lisp_Object s1, Lisp_Object s2, Lisp_Object locale, Lisp_Object ignore_case) |
| @@ -391,8 +395,8 @@ Symbols are also allowed; their print names are used instead. | |||
| 391 | 395 | ||
| 392 | This function obeys the conventions for collation order in your locale | 396 | This function obeys the conventions for collation order in your locale |
| 393 | settings. For example, characters with different coding points but | 397 | settings. For example, characters with different coding points but |
| 394 | the same meaning are considered as equal, like different grave accent | 398 | the same meaning might be considered as equal, like different grave |
| 395 | unicode characters: | 399 | accent Unicode characters: |
| 396 | 400 | ||
| 397 | \(string-collate-equalp \(string ?\\uFF40) \(string ?\\u1FEF)) | 401 | \(string-collate-equalp \(string ?\\uFF40) \(string ?\\u1FEF)) |
| 398 | => t | 402 | => t |
| @@ -400,13 +404,20 @@ unicode characters: | |||
| 400 | The optional argument LOCALE, a string, overrides the setting of your | 404 | The optional argument LOCALE, a string, overrides the setting of your |
| 401 | current locale identifier for collation. The value is system | 405 | current locale identifier for collation. The value is system |
| 402 | dependent; a LOCALE \"en_US.UTF-8\" is applicable on POSIX systems, | 406 | dependent; a LOCALE \"en_US.UTF-8\" is applicable on POSIX systems, |
| 403 | while it would be \"English_USA.1252\" on MS Windows systems. | 407 | while it would be \"enu_USA.1252\" on MS Windows systems. |
| 404 | 408 | ||
| 405 | If IGNORE-CASE is non-nil, characters are converted to lower-case | 409 | If IGNORE-CASE is non-nil, characters are converted to lower-case |
| 406 | before comparing them. | 410 | before comparing them. |
| 407 | 411 | ||
| 412 | To emulate Unicode-compliant collation on MS-Windows systems, | ||
| 413 | bind `w32-collate-ignore-punctuation' to a non-nil value, since | ||
| 414 | the codeset part of the locale cannot be \"UTF-8\" on MS-Windows. | ||
| 415 | |||
| 408 | If your system does not support a locale environment, this function | 416 | If your system does not support a locale environment, this function |
| 409 | behaves like `string-equal'. */) | 417 | behaves like `string-equal'. |
| 418 | |||
| 419 | Do NOT use this function to compare file names for equality, only | ||
| 420 | for sorting them. */) | ||
| 410 | (Lisp_Object s1, Lisp_Object s2, Lisp_Object locale, Lisp_Object ignore_case) | 421 | (Lisp_Object s1, Lisp_Object s2, Lisp_Object locale, Lisp_Object ignore_case) |
| 411 | { | 422 | { |
| 412 | #if defined __STDC_ISO_10646__ || defined WINDOWSNT | 423 | #if defined __STDC_ISO_10646__ || defined WINDOWSNT |
diff --git a/src/sysdep.c b/src/sysdep.c index 7993a59e721..52a72385f46 100644 --- a/src/sysdep.c +++ b/src/sysdep.c | |||
| @@ -3796,6 +3796,6 @@ str_collate (Lisp_Object s1, Lisp_Object s2, | |||
| 3796 | 3796 | ||
| 3797 | char *loc = STRINGP (locale) ? SSDATA (locale) : NULL; | 3797 | char *loc = STRINGP (locale) ? SSDATA (locale) : NULL; |
| 3798 | 3798 | ||
| 3799 | return w32_compare_strings (SDATA (s1), SDATA (s2), loc); | 3799 | return w32_compare_strings (SDATA (s1), SDATA (s2), loc, !NILP (ignore_case)); |
| 3800 | } | 3800 | } |
| 3801 | #endif /* WINDOWSNT */ | 3801 | #endif /* WINDOWSNT */ |
| @@ -211,7 +211,7 @@ extern int w32_memory_info (unsigned long long *, unsigned long long *, | |||
| 211 | unsigned long long *, unsigned long long *); | 211 | unsigned long long *, unsigned long long *); |
| 212 | 212 | ||
| 213 | /* Compare 2 UTF-8 strings in locale-dependent fashion. */ | 213 | /* Compare 2 UTF-8 strings in locale-dependent fashion. */ |
| 214 | extern int w32_compare_strings (const char *, const char *, char *); | 214 | extern int w32_compare_strings (const char *, const char *, char *, int); |
| 215 | 215 | ||
| 216 | #ifdef HAVE_GNUTLS | 216 | #ifdef HAVE_GNUTLS |
| 217 | #include <gnutls/gnutls.h> | 217 | #include <gnutls/gnutls.h> |
diff --git a/src/w32proc.c b/src/w32proc.c index ed62de02433..0b441d45186 100644 --- a/src/w32proc.c +++ b/src/w32proc.c | |||
| @@ -3213,15 +3213,20 @@ get_lcid (const char *locale_name) | |||
| 3213 | #ifndef _NSLCMPERROR | 3213 | #ifndef _NSLCMPERROR |
| 3214 | # define _NSLCMPERROR INT_MAX | 3214 | # define _NSLCMPERROR INT_MAX |
| 3215 | #endif | 3215 | #endif |
| 3216 | #ifndef LINGUISTIC_IGNORECASE | ||
| 3217 | # define LINGUISTIC_IGNORECASE 0x00000010 | ||
| 3218 | #endif | ||
| 3216 | 3219 | ||
| 3217 | int | 3220 | int |
| 3218 | w32_compare_strings (const char *s1, const char *s2, char *locname) | 3221 | w32_compare_strings (const char *s1, const char *s2, char *locname, |
| 3222 | int ignore_case) | ||
| 3219 | { | 3223 | { |
| 3220 | LCID lcid = GetThreadLocale (); | 3224 | LCID lcid = GetThreadLocale (); |
| 3221 | wchar_t *string1_w, *string2_w; | 3225 | wchar_t *string1_w, *string2_w; |
| 3222 | int val, needed; | 3226 | int val, needed; |
| 3223 | extern BOOL g_b_init_compare_string_w; | 3227 | extern BOOL g_b_init_compare_string_w; |
| 3224 | static int (WINAPI *pCompareStringW)(LCID, DWORD, LPCWSTR, int, LPCWSTR, int); | 3228 | static int (WINAPI *pCompareStringW)(LCID, DWORD, LPCWSTR, int, LPCWSTR, int); |
| 3229 | DWORD flags = 0; | ||
| 3225 | 3230 | ||
| 3226 | USE_SAFE_ALLOCA; | 3231 | USE_SAFE_ALLOCA; |
| 3227 | 3232 | ||
| @@ -3284,11 +3289,22 @@ w32_compare_strings (const char *s1, const char *s2, char *locname) | |||
| 3284 | lcid = new_lcid; | 3289 | lcid = new_lcid; |
| 3285 | } | 3290 | } |
| 3286 | 3291 | ||
| 3287 | /* FIXME: Need a way to control the FLAGS argument, perhaps via the | 3292 | if (ignore_case) |
| 3288 | CODESET part of LOCNAME. In particular, ls-lisp will want | 3293 | { |
| 3289 | NORM_IGNORESYMBOLS and sometimes LINGUISTIC_IGNORECASE or | 3294 | /* NORM_IGNORECASE ignores any tertiary distinction, not just |
| 3290 | NORM_IGNORECASE. */ | 3295 | case variants. LINGUISTIC_IGNORECASE is more selective, and |
| 3291 | val = pCompareStringW (lcid, 0, string1_w, -1, string2_w, -1); | 3296 | is sensitive to the locale's language, but it is not |
| 3297 | available before Vista. */ | ||
| 3298 | if (w32_major_version >= 6) | ||
| 3299 | flags |= LINGUISTIC_IGNORECASE; | ||
| 3300 | else | ||
| 3301 | flags |= NORM_IGNORECASE; | ||
| 3302 | } | ||
| 3303 | /* This approximates what glibc collation functions do when the | ||
| 3304 | locale's codeset is UTF-8. */ | ||
| 3305 | if (!NILP (Vw32_collate_ignore_punctuation)) | ||
| 3306 | flags |= NORM_IGNORESYMBOLS; | ||
| 3307 | val = pCompareStringW (lcid, flags, string1_w, -1, string2_w, -1); | ||
| 3292 | SAFE_FREE (); | 3308 | SAFE_FREE (); |
| 3293 | if (!val) | 3309 | if (!val) |
| 3294 | { | 3310 | { |
| @@ -3408,6 +3424,20 @@ Any other non-nil value means do this even on remote and removable drives | |||
| 3408 | where the performance impact may be noticeable even on modern hardware. */); | 3424 | where the performance impact may be noticeable even on modern hardware. */); |
| 3409 | Vw32_get_true_file_attributes = Qlocal; | 3425 | Vw32_get_true_file_attributes = Qlocal; |
| 3410 | 3426 | ||
| 3427 | DEFVAR_LISP ("w32-collate-ignore-punctuation", | ||
| 3428 | Vw32_collate_ignore_punctuation, | ||
| 3429 | doc: /* Non-nil causes string collation functions ignore punctuation on MS-Windows. | ||
| 3430 | On Posix platforms, `string-collate-lessp' and `string-collate-equalp' | ||
| 3431 | ignore punctuation characters when they compare strings, if the | ||
| 3432 | locale's codeset is UTF-8, as in \"en_US.UTF-8\". Binding this option | ||
| 3433 | to a non-nil value will achieve a similar effect on MS-Windows, where | ||
| 3434 | locales with UTF-8 codeset are not supported. | ||
| 3435 | |||
| 3436 | Note that setting this to non-nil will also ignore blanks and symbols | ||
| 3437 | in the strings. So do NOT use this option when comparing file names | ||
| 3438 | for equality, only when you need to sort them. */); | ||
| 3439 | Vw32_collate_ignore_punctuation = Qnil; | ||
| 3440 | |||
| 3411 | staticpro (&Vw32_valid_locale_ids); | 3441 | staticpro (&Vw32_valid_locale_ids); |
| 3412 | staticpro (&Vw32_valid_codepages); | 3442 | staticpro (&Vw32_valid_codepages); |
| 3413 | } | 3443 | } |