aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorEli Zaretskii2018-05-18 16:34:19 +0300
committerEli Zaretskii2018-06-02 12:37:19 +0300
commit6e0ff4cc1f261def00f9f9dd581ba6ef72703f0c (patch)
tree406a66a7ed14dca5884fb5001473c6a9a624e71e /src
parent35c1ab1419174f72010c745d963a55b6c183443c (diff)
downloademacs-6e0ff4cc1f261def00f9f9dd581ba6ef72703f0c.tar.gz
emacs-6e0ff4cc1f261def00f9f9dd581ba6ef72703f0c.zip
Fix decoding of directories when "~" includes non-ASCII chars
* src/fileio.c (Fexpand_file_name): Don't build multibyte strings from unibyte non-ASCII strings when NAME and DEFAULT_DIRECTORY have different multibyteness, as this adds bytes to the byte sequence, and in some situations, e.g., when the home directory includes non-ASCII characters, can fail file APIs. (Bug#30755) * lisp/startup.el (normal-top-level): Make sure default-directory is set to a multibyte string when decoded on MS-Windows. (cherry picked from commit 3aab8626ba5080bb04d0fdae52d99c850a842a52)
Diffstat (limited to 'src')
-rw-r--r--src/fileio.c75
1 files changed, 60 insertions, 15 deletions
diff --git a/src/fileio.c b/src/fileio.c
index c4a10000bc3..9dbe3ad788e 100644
--- a/src/fileio.c
+++ b/src/fileio.c
@@ -864,33 +864,78 @@ the root directory. */)
864 } 864 }
865 } 865 }
866 multibyte = STRING_MULTIBYTE (name); 866 multibyte = STRING_MULTIBYTE (name);
867 if (multibyte != STRING_MULTIBYTE (default_directory)) 867 bool defdir_multibyte = STRING_MULTIBYTE (default_directory);
868 if (multibyte != defdir_multibyte)
868 { 869 {
870 /* We want to make both NAME and DEFAULT_DIRECTORY have the same
871 multibyteness. Strategy:
872 . If either NAME or DEFAULT_DIRECTORY is pure-ASCII, they
873 can be converted to the multibyteness of the other one
874 while keeping the same byte sequence.
875 . If both are non-ASCII, the only safe conversion is to
876 convert the multibyte one to be unibyte, because the
877 reverse conversion potentially adds bytes while raw bytes
878 are converted to their multibyte forms, which we will be
879 unable to account for, since the information about the
880 original multibyteness is lost. If those additional bytes
881 later leak to system APIs because they are not encoded or
882 because they are converted to unibyte strings by keeping
883 the data, file APIs will fail.
884
885 Note: One could argue that if we see a multibyte string, it
886 is evidence that file-name decoding was already set up, and
887 we could convert unibyte strings to multibyte using
888 DECODE_FILE. However, this is risky, because the likes of
889 string_to_multibyte are able of creating multibyte strings
890 without any decoding. */
869 if (multibyte) 891 if (multibyte)
870 { 892 {
871 unsigned char *p = SDATA (name); 893 bool name_ascii_p = SCHARS (name) == SBYTES (name);
894 unsigned char *p = SDATA (default_directory);
872 895
873 while (*p && ASCII_CHAR_P (*p)) 896 if (!name_ascii_p)
874 p++; 897 while (*p && ASCII_CHAR_P (*p))
875 if (*p == '\0') 898 p++;
899 if (name_ascii_p || *p != '\0')
876 { 900 {
877 /* NAME is a pure ASCII string, and DEFAULT_DIRECTORY is 901 /* DEFAULT_DIRECTORY is unibyte and possibly non-ASCII.
878 unibyte. Do not convert DEFAULT_DIRECTORY to 902 Make a unibyte string out of NAME, and arrange for
879 multibyte; instead, convert NAME to a unibyte string, 903 the result of this function to be a unibyte string.
880 so that the result of this function is also a unibyte 904 This is needed during bootstrapping and dumping, when
881 string. This is needed during bootstrapping and 905 Emacs cannot decode file names, because the locale
882 dumping, when Emacs cannot decode file names, because 906 environment is not set up. */
883 the locale environment is not set up. */
884 name = make_unibyte_string (SSDATA (name), SBYTES (name)); 907 name = make_unibyte_string (SSDATA (name), SBYTES (name));
885 multibyte = 0; 908 multibyte = 0;
886 } 909 }
887 else 910 else
888 default_directory = string_to_multibyte (default_directory); 911 {
912 /* NAME is non-ASCII and multibyte, and
913 DEFAULT_DIRECTORY is unibyte and pure-ASCII: make a
914 multibyte string out of DEFAULT_DIRECTORY's data. */
915 default_directory =
916 make_multibyte_string (SSDATA (default_directory),
917 SCHARS (default_directory),
918 SCHARS (default_directory));
919 }
889 } 920 }
890 else 921 else
891 { 922 {
892 name = string_to_multibyte (name); 923 unsigned char *p = SDATA (name);
893 multibyte = 1; 924
925 while (*p && ASCII_CHAR_P (*p))
926 p++;
927 if (*p == '\0')
928 {
929 /* DEFAULT_DIRECTORY is multibyte and NAME is unibyte
930 and pure-ASCII. Make a multibyte string out of
931 NAME's data. */
932 name = make_multibyte_string (SSDATA (name),
933 SCHARS (name), SCHARS (name));
934 multibyte = 1;
935 }
936 else
937 default_directory = make_unibyte_string (SSDATA (default_directory),
938 SBYTES (default_directory));
894 } 939 }
895 } 940 }
896 941