diff options
| author | Eli Zaretskii | 2018-05-18 16:34:19 +0300 |
|---|---|---|
| committer | Eli Zaretskii | 2018-06-02 12:37:19 +0300 |
| commit | 6e0ff4cc1f261def00f9f9dd581ba6ef72703f0c (patch) | |
| tree | 406a66a7ed14dca5884fb5001473c6a9a624e71e /src | |
| parent | 35c1ab1419174f72010c745d963a55b6c183443c (diff) | |
| download | emacs-6e0ff4cc1f261def00f9f9dd581ba6ef72703f0c.tar.gz emacs-6e0ff4cc1f261def00f9f9dd581ba6ef72703f0c.zip | |
Fix decoding of directories when "~" includes non-ASCII chars
* src/fileio.c (Fexpand_file_name): Don't build multibyte strings
from unibyte non-ASCII strings when NAME and DEFAULT_DIRECTORY
have different multibyteness, as this adds bytes to the byte
sequence, and in some situations, e.g., when the home directory
includes non-ASCII characters, can fail file APIs. (Bug#30755)
* lisp/startup.el (normal-top-level): Make sure default-directory
is set to a multibyte string when decoded on MS-Windows.
(cherry picked from commit 3aab8626ba5080bb04d0fdae52d99c850a842a52)
Diffstat (limited to 'src')
| -rw-r--r-- | src/fileio.c | 75 |
1 files changed, 60 insertions, 15 deletions
diff --git a/src/fileio.c b/src/fileio.c index c4a10000bc3..9dbe3ad788e 100644 --- a/src/fileio.c +++ b/src/fileio.c | |||
| @@ -864,33 +864,78 @@ the root directory. */) | |||
| 864 | } | 864 | } |
| 865 | } | 865 | } |
| 866 | multibyte = STRING_MULTIBYTE (name); | 866 | multibyte = STRING_MULTIBYTE (name); |
| 867 | if (multibyte != STRING_MULTIBYTE (default_directory)) | 867 | bool defdir_multibyte = STRING_MULTIBYTE (default_directory); |
| 868 | if (multibyte != defdir_multibyte) | ||
| 868 | { | 869 | { |
| 870 | /* We want to make both NAME and DEFAULT_DIRECTORY have the same | ||
| 871 | multibyteness. Strategy: | ||
| 872 | . If either NAME or DEFAULT_DIRECTORY is pure-ASCII, they | ||
| 873 | can be converted to the multibyteness of the other one | ||
| 874 | while keeping the same byte sequence. | ||
| 875 | . If both are non-ASCII, the only safe conversion is to | ||
| 876 | convert the multibyte one to be unibyte, because the | ||
| 877 | reverse conversion potentially adds bytes while raw bytes | ||
| 878 | are converted to their multibyte forms, which we will be | ||
| 879 | unable to account for, since the information about the | ||
| 880 | original multibyteness is lost. If those additional bytes | ||
| 881 | later leak to system APIs because they are not encoded or | ||
| 882 | because they are converted to unibyte strings by keeping | ||
| 883 | the data, file APIs will fail. | ||
| 884 | |||
| 885 | Note: One could argue that if we see a multibyte string, it | ||
| 886 | is evidence that file-name decoding was already set up, and | ||
| 887 | we could convert unibyte strings to multibyte using | ||
| 888 | DECODE_FILE. However, this is risky, because the likes of | ||
| 889 | string_to_multibyte are able of creating multibyte strings | ||
| 890 | without any decoding. */ | ||
| 869 | if (multibyte) | 891 | if (multibyte) |
| 870 | { | 892 | { |
| 871 | unsigned char *p = SDATA (name); | 893 | bool name_ascii_p = SCHARS (name) == SBYTES (name); |
| 894 | unsigned char *p = SDATA (default_directory); | ||
| 872 | 895 | ||
| 873 | while (*p && ASCII_CHAR_P (*p)) | 896 | if (!name_ascii_p) |
| 874 | p++; | 897 | while (*p && ASCII_CHAR_P (*p)) |
| 875 | if (*p == '\0') | 898 | p++; |
| 899 | if (name_ascii_p || *p != '\0') | ||
| 876 | { | 900 | { |
| 877 | /* NAME is a pure ASCII string, and DEFAULT_DIRECTORY is | 901 | /* DEFAULT_DIRECTORY is unibyte and possibly non-ASCII. |
| 878 | unibyte. Do not convert DEFAULT_DIRECTORY to | 902 | Make a unibyte string out of NAME, and arrange for |
| 879 | multibyte; instead, convert NAME to a unibyte string, | 903 | the result of this function to be a unibyte string. |
| 880 | so that the result of this function is also a unibyte | 904 | This is needed during bootstrapping and dumping, when |
| 881 | string. This is needed during bootstrapping and | 905 | Emacs cannot decode file names, because the locale |
| 882 | dumping, when Emacs cannot decode file names, because | 906 | environment is not set up. */ |
| 883 | the locale environment is not set up. */ | ||
| 884 | name = make_unibyte_string (SSDATA (name), SBYTES (name)); | 907 | name = make_unibyte_string (SSDATA (name), SBYTES (name)); |
| 885 | multibyte = 0; | 908 | multibyte = 0; |
| 886 | } | 909 | } |
| 887 | else | 910 | else |
| 888 | default_directory = string_to_multibyte (default_directory); | 911 | { |
| 912 | /* NAME is non-ASCII and multibyte, and | ||
| 913 | DEFAULT_DIRECTORY is unibyte and pure-ASCII: make a | ||
| 914 | multibyte string out of DEFAULT_DIRECTORY's data. */ | ||
| 915 | default_directory = | ||
| 916 | make_multibyte_string (SSDATA (default_directory), | ||
| 917 | SCHARS (default_directory), | ||
| 918 | SCHARS (default_directory)); | ||
| 919 | } | ||
| 889 | } | 920 | } |
| 890 | else | 921 | else |
| 891 | { | 922 | { |
| 892 | name = string_to_multibyte (name); | 923 | unsigned char *p = SDATA (name); |
| 893 | multibyte = 1; | 924 | |
| 925 | while (*p && ASCII_CHAR_P (*p)) | ||
| 926 | p++; | ||
| 927 | if (*p == '\0') | ||
| 928 | { | ||
| 929 | /* DEFAULT_DIRECTORY is multibyte and NAME is unibyte | ||
| 930 | and pure-ASCII. Make a multibyte string out of | ||
| 931 | NAME's data. */ | ||
| 932 | name = make_multibyte_string (SSDATA (name), | ||
| 933 | SCHARS (name), SCHARS (name)); | ||
| 934 | multibyte = 1; | ||
| 935 | } | ||
| 936 | else | ||
| 937 | default_directory = make_unibyte_string (SSDATA (default_directory), | ||
| 938 | SBYTES (default_directory)); | ||
| 894 | } | 939 | } |
| 895 | } | 940 | } |
| 896 | 941 | ||