diff options
| author | Eli Zaretskii | 2018-05-18 16:34:19 +0300 |
|---|---|---|
| committer | Eli Zaretskii | 2018-05-18 16:34:19 +0300 |
| commit | 3aab8626ba5080bb04d0fdae52d99c850a842a52 (patch) | |
| tree | 938f2a9a24bde22329770c08e7019c2294a88f2b /src | |
| parent | 60ff8101449eea3a5ca4961299501efd83d011bd (diff) | |
| download | emacs-3aab8626ba5080bb04d0fdae52d99c850a842a52.tar.gz emacs-3aab8626ba5080bb04d0fdae52d99c850a842a52.zip | |
Fix decoding of directories when "~" includes non-ASCII chars
* src/fileio.c (Fexpand_file_name): Don't build multibyte strings
from unibyte non-ASCII strings when NAME and DEFAULT_DIRECTORY
have different multibyteness, as this adds bytes to the byte
sequence, and in some situations, e.g., when the home directory
includes non-ASCII characters, can fail file APIs. (Bug#30755)
* lisp/startup.el (normal-top-level): Make sure default-directory
is set to a multibyte string when decoded on MS-Windows.
Diffstat (limited to 'src')
| -rw-r--r-- | src/fileio.c | 75 |
1 files changed, 60 insertions, 15 deletions
diff --git a/src/fileio.c b/src/fileio.c index 2f8358f01b5..e8d966e1631 100644 --- a/src/fileio.c +++ b/src/fileio.c | |||
| @@ -867,33 +867,78 @@ the root directory. */) | |||
| 867 | } | 867 | } |
| 868 | } | 868 | } |
| 869 | multibyte = STRING_MULTIBYTE (name); | 869 | multibyte = STRING_MULTIBYTE (name); |
| 870 | if (multibyte != STRING_MULTIBYTE (default_directory)) | 870 | bool defdir_multibyte = STRING_MULTIBYTE (default_directory); |
| 871 | if (multibyte != defdir_multibyte) | ||
| 871 | { | 872 | { |
| 873 | /* We want to make both NAME and DEFAULT_DIRECTORY have the same | ||
| 874 | multibyteness. Strategy: | ||
| 875 | . If either NAME or DEFAULT_DIRECTORY is pure-ASCII, they | ||
| 876 | can be converted to the multibyteness of the other one | ||
| 877 | while keeping the same byte sequence. | ||
| 878 | . If both are non-ASCII, the only safe conversion is to | ||
| 879 | convert the multibyte one to be unibyte, because the | ||
| 880 | reverse conversion potentially adds bytes while raw bytes | ||
| 881 | are converted to their multibyte forms, which we will be | ||
| 882 | unable to account for, since the information about the | ||
| 883 | original multibyteness is lost. If those additional bytes | ||
| 884 | later leak to system APIs because they are not encoded or | ||
| 885 | because they are converted to unibyte strings by keeping | ||
| 886 | the data, file APIs will fail. | ||
| 887 | |||
| 888 | Note: One could argue that if we see a multibyte string, it | ||
| 889 | is evidence that file-name decoding was already set up, and | ||
| 890 | we could convert unibyte strings to multibyte using | ||
| 891 | DECODE_FILE. However, this is risky, because the likes of | ||
| 892 | string_to_multibyte are able of creating multibyte strings | ||
| 893 | without any decoding. */ | ||
| 872 | if (multibyte) | 894 | if (multibyte) |
| 873 | { | 895 | { |
| 874 | unsigned char *p = SDATA (name); | 896 | bool name_ascii_p = SCHARS (name) == SBYTES (name); |
| 897 | unsigned char *p = SDATA (default_directory); | ||
| 875 | 898 | ||
| 876 | while (*p && ASCII_CHAR_P (*p)) | 899 | if (!name_ascii_p) |
| 877 | p++; | 900 | while (*p && ASCII_CHAR_P (*p)) |
| 878 | if (*p == '\0') | 901 | p++; |
| 902 | if (name_ascii_p || *p != '\0') | ||
| 879 | { | 903 | { |
| 880 | /* NAME is a pure ASCII string, and DEFAULT_DIRECTORY is | 904 | /* DEFAULT_DIRECTORY is unibyte and possibly non-ASCII. |
| 881 | unibyte. Do not convert DEFAULT_DIRECTORY to | 905 | Make a unibyte string out of NAME, and arrange for |
| 882 | multibyte; instead, convert NAME to a unibyte string, | 906 | the result of this function to be a unibyte string. |
| 883 | so that the result of this function is also a unibyte | 907 | This is needed during bootstrapping and dumping, when |
| 884 | string. This is needed during bootstrapping and | 908 | Emacs cannot decode file names, because the locale |
| 885 | dumping, when Emacs cannot decode file names, because | 909 | environment is not set up. */ |
| 886 | the locale environment is not set up. */ | ||
| 887 | name = make_unibyte_string (SSDATA (name), SBYTES (name)); | 910 | name = make_unibyte_string (SSDATA (name), SBYTES (name)); |
| 888 | multibyte = 0; | 911 | multibyte = 0; |
| 889 | } | 912 | } |
| 890 | else | 913 | else |
| 891 | default_directory = string_to_multibyte (default_directory); | 914 | { |
| 915 | /* NAME is non-ASCII and multibyte, and | ||
| 916 | DEFAULT_DIRECTORY is unibyte and pure-ASCII: make a | ||
| 917 | multibyte string out of DEFAULT_DIRECTORY's data. */ | ||
| 918 | default_directory = | ||
| 919 | make_multibyte_string (SSDATA (default_directory), | ||
| 920 | SCHARS (default_directory), | ||
| 921 | SCHARS (default_directory)); | ||
| 922 | } | ||
| 892 | } | 923 | } |
| 893 | else | 924 | else |
| 894 | { | 925 | { |
| 895 | name = string_to_multibyte (name); | 926 | unsigned char *p = SDATA (name); |
| 896 | multibyte = 1; | 927 | |
| 928 | while (*p && ASCII_CHAR_P (*p)) | ||
| 929 | p++; | ||
| 930 | if (*p == '\0') | ||
| 931 | { | ||
| 932 | /* DEFAULT_DIRECTORY is multibyte and NAME is unibyte | ||
| 933 | and pure-ASCII. Make a multibyte string out of | ||
| 934 | NAME's data. */ | ||
| 935 | name = make_multibyte_string (SSDATA (name), | ||
| 936 | SCHARS (name), SCHARS (name)); | ||
| 937 | multibyte = 1; | ||
| 938 | } | ||
| 939 | else | ||
| 940 | default_directory = make_unibyte_string (SSDATA (default_directory), | ||
| 941 | SBYTES (default_directory)); | ||
| 897 | } | 942 | } |
| 898 | } | 943 | } |
| 899 | 944 | ||