aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEli Zaretskii2013-01-23 18:11:04 +0200
committerEli Zaretskii2013-01-23 18:11:04 +0200
commit6d2851de77e7828218bec1bd57779277fa80e129 (patch)
treee458f57efaff94b64dbdb44293c7246f4ae56698
parent4d4ddaa78c15716a93f790b11e9879d73db5e7d9 (diff)
downloademacs-6d2851de77e7828218bec1bd57779277fa80e129.tar.gz
emacs-6d2851de77e7828218bec1bd57779277fa80e129.zip
Fix bug #13515 with processing DBCS file names on MS-Windows.
src/w32.c (max_filename_mbslen): New function. (normalize_filename, readdir): Use it to detect locales where ANSI encoding of file names uses a double-byte character set (DBCS). If a DBCS encoding is used, advance by characters using CharNextExA, instead of incrementing a 'char *' pointer. Use _mbslwr instead of _strlwr.
-rw-r--r--src/ChangeLog9
-rw-r--r--src/w32.c106
2 files changed, 104 insertions, 11 deletions
diff --git a/src/ChangeLog b/src/ChangeLog
index e7ab13311e1..e9c4fe0c50e 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,12 @@
12013-01-23 Eli Zaretskii <eliz@gnu.org>
2
3 * w32.c (max_filename_mbslen): New function.
4 (normalize_filename, readdir): Use it to detect locales where ANSI
5 encoding of file names uses a double-byte character set (DBCS).
6 If a DBCS encoding is used, advance by characters using
7 CharNextExA, instead of incrementing a 'char *' pointer. Use
8 _mbslwr instead of _strlwr. (Bug#13515)
9
12013-01-22 Eli Zaretskii <eliz@gnu.org> 102013-01-22 Eli Zaretskii <eliz@gnu.org>
2 11
3 * w32heap.c (allocate_heap) [!_WIN64]: Decrease the initial 12 * w32heap.c (allocate_heap) [!_WIN64]: Decrease the initial
diff --git a/src/w32.c b/src/w32.c
index 469dfcf3b6c..51e304af1e9 100644
--- a/src/w32.c
+++ b/src/w32.c
@@ -37,7 +37,7 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
37/* must include CRT headers *before* config.h */ 37/* must include CRT headers *before* config.h */
38 38
39#include <config.h> 39#include <config.h>
40#include <mbstring.h> /* for _mbspbrk */ 40#include <mbstring.h> /* for _mbspbrk and _mbslwr */
41 41
42#undef access 42#undef access
43#undef chdir 43#undef chdir
@@ -1304,6 +1304,67 @@ srandom (int seed)
1304 srand (seed); 1304 srand (seed);
1305} 1305}
1306 1306
1307/* Current codepage for encoding file names. */
1308static int file_name_codepage;
1309
1310/* Return the maximum length in bytes of a multibyte character
1311 sequence encoded in the current ANSI codepage. This is required to
1312 correctly walk the encoded file names one character at a time. */
1313static int
1314max_filename_mbslen (void)
1315{
1316 /* A simple cache to avoid calling GetCPInfo every time we need to
1317 normalize a file name. The file-name encoding is not supposed to
1318 be changed too frequently, if ever. */
1319 static Lisp_Object last_file_name_encoding;
1320 static int last_max_mbslen;
1321 Lisp_Object current_encoding;
1322
1323 current_encoding = Vfile_name_coding_system;
1324 if (NILP (current_encoding))
1325 current_encoding = Vdefault_file_name_coding_system;
1326
1327 if (!EQ (last_file_name_encoding, current_encoding))
1328 {
1329 CPINFO cp_info;
1330
1331 last_file_name_encoding = current_encoding;
1332 /* Default to the current ANSI codepage. */
1333 file_name_codepage = w32_ansi_code_page;
1334 if (!NILP (current_encoding))
1335 {
1336 char *cpname = SDATA (SYMBOL_NAME (current_encoding));
1337 char *cp = NULL, *end;
1338 int cpnum;
1339
1340 if (strncmp (cpname, "cp", 2) == 0)
1341 cp = cpname + 2;
1342 else if (strncmp (cpname, "windows-", 8) == 0)
1343 cp = cpname + 8;
1344
1345 if (cp)
1346 {
1347 end = cp;
1348 cpnum = strtol (cp, &end, 10);
1349 if (cpnum && *end == '\0' && end - cp >= 2)
1350 file_name_codepage = cpnum;
1351 }
1352 }
1353
1354 if (!file_name_codepage)
1355 file_name_codepage = CP_ACP; /* CP_ACP = 0, but let's not assume that */
1356
1357 if (!GetCPInfo (file_name_codepage, &cp_info))
1358 {
1359 file_name_codepage = CP_ACP;
1360 if (!GetCPInfo (file_name_codepage, &cp_info))
1361 emacs_abort ();
1362 }
1363 last_max_mbslen = cp_info.MaxCharSize;
1364 }
1365
1366 return last_max_mbslen;
1367}
1307 1368
1308/* Normalize filename by converting all path separators to 1369/* Normalize filename by converting all path separators to
1309 the specified separator. Also conditionally convert upper 1370 the specified separator. Also conditionally convert upper
@@ -1313,14 +1374,20 @@ static void
1313normalize_filename (register char *fp, char path_sep) 1374normalize_filename (register char *fp, char path_sep)
1314{ 1375{
1315 char sep; 1376 char sep;
1316 char *elem; 1377 char *elem, *p2;
1378 int dbcs_p = max_filename_mbslen () > 1;
1317 1379
1318 /* Always lower-case drive letters a-z, even if the filesystem 1380 /* Always lower-case drive letters a-z, even if the filesystem
1319 preserves case in filenames. 1381 preserves case in filenames.
1320 This is so filenames can be compared by string comparison 1382 This is so filenames can be compared by string comparison
1321 functions that are case-sensitive. Even case-preserving filesystems 1383 functions that are case-sensitive. Even case-preserving filesystems
1322 do not distinguish case in drive letters. */ 1384 do not distinguish case in drive letters. */
1323 if (fp[1] == ':' && *fp >= 'A' && *fp <= 'Z') 1385 if (dbcs_p)
1386 p2 = CharNextExA (file_name_codepage, fp, 0);
1387 else
1388 p2 = fp + 1;
1389
1390 if (*p2 == ':' && *fp >= 'A' && *fp <= 'Z')
1324 { 1391 {
1325 *fp += 'a' - 'A'; 1392 *fp += 'a' - 'A';
1326 fp += 2; 1393 fp += 2;
@@ -1332,7 +1399,10 @@ normalize_filename (register char *fp, char path_sep)
1332 { 1399 {
1333 if (*fp == '/' || *fp == '\\') 1400 if (*fp == '/' || *fp == '\\')
1334 *fp = path_sep; 1401 *fp = path_sep;
1335 fp++; 1402 if (!dbcs_p)
1403 fp++;
1404 else
1405 fp = CharNextExA (file_name_codepage, fp, 0);
1336 } 1406 }
1337 return; 1407 return;
1338 } 1408 }
@@ -1355,13 +1425,20 @@ normalize_filename (register char *fp, char path_sep)
1355 if (elem && elem != fp) 1425 if (elem && elem != fp)
1356 { 1426 {
1357 *fp = 0; /* temporary end of string */ 1427 *fp = 0; /* temporary end of string */
1358 _strlwr (elem); /* while we convert to lower case */ 1428 _mbslwr (elem); /* while we convert to lower case */
1359 } 1429 }
1360 *fp = sep; /* convert (or restore) path separator */ 1430 *fp = sep; /* convert (or restore) path separator */
1361 elem = fp + 1; /* next element starts after separator */ 1431 elem = fp + 1; /* next element starts after separator */
1362 sep = path_sep; 1432 sep = path_sep;
1363 } 1433 }
1364 } while (*fp++); 1434 if (*fp)
1435 {
1436 if (!dbcs_p)
1437 fp++;
1438 else
1439 fp = CharNextExA (file_name_codepage, fp, 0);
1440 }
1441 } while (*fp);
1365} 1442}
1366 1443
1367/* Destructively turn backslashes into slashes. */ 1444/* Destructively turn backslashes into slashes. */
@@ -2588,15 +2665,22 @@ readdir (DIR *dirp)
2588 strcpy (dir_static.d_name, dir_find_data.cFileName); 2665 strcpy (dir_static.d_name, dir_find_data.cFileName);
2589 dir_static.d_namlen = strlen (dir_static.d_name); 2666 dir_static.d_namlen = strlen (dir_static.d_name);
2590 if (dir_is_fat) 2667 if (dir_is_fat)
2591 _strlwr (dir_static.d_name); 2668 _mbslwr (dir_static.d_name);
2592 else if (downcase) 2669 else if (downcase)
2593 { 2670 {
2594 register char *p; 2671 register char *p;
2595 for (p = dir_static.d_name; *p; p++) 2672 int dbcs_p = max_filename_mbslen () > 1;
2596 if (*p >= 'a' && *p <= 'z') 2673 for (p = dir_static.d_name; *p; )
2597 break; 2674 {
2675 if (*p >= 'a' && *p <= 'z')
2676 break;
2677 if (dbcs_p)
2678 p = CharNextExA (file_name_codepage, p, 0);
2679 else
2680 p++;
2681 }
2598 if (!*p) 2682 if (!*p)
2599 _strlwr (dir_static.d_name); 2683 _mbslwr (dir_static.d_name);
2600 } 2684 }
2601 2685
2602 return &dir_static; 2686 return &dir_static;