aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorEli Zaretskii2018-05-18 16:34:19 +0300
committerEli Zaretskii2018-05-18 16:34:19 +0300
commit3aab8626ba5080bb04d0fdae52d99c850a842a52 (patch)
tree938f2a9a24bde22329770c08e7019c2294a88f2b /src
parent60ff8101449eea3a5ca4961299501efd83d011bd (diff)
downloademacs-3aab8626ba5080bb04d0fdae52d99c850a842a52.tar.gz
emacs-3aab8626ba5080bb04d0fdae52d99c850a842a52.zip
Fix decoding of directories when "~" includes non-ASCII chars
* src/fileio.c (Fexpand_file_name): Don't build multibyte strings from unibyte non-ASCII strings when NAME and DEFAULT_DIRECTORY have different multibyteness, as this adds bytes to the byte sequence, and in some situations, e.g., when the home directory includes non-ASCII characters, can fail file APIs. (Bug#30755) * lisp/startup.el (normal-top-level): Make sure default-directory is set to a multibyte string when decoded on MS-Windows.
Diffstat (limited to 'src')
-rw-r--r--src/fileio.c75
1 files changed, 60 insertions, 15 deletions
diff --git a/src/fileio.c b/src/fileio.c
index 2f8358f01b5..e8d966e1631 100644
--- a/src/fileio.c
+++ b/src/fileio.c
@@ -867,33 +867,78 @@ the root directory. */)
867 } 867 }
868 } 868 }
869 multibyte = STRING_MULTIBYTE (name); 869 multibyte = STRING_MULTIBYTE (name);
870 if (multibyte != STRING_MULTIBYTE (default_directory)) 870 bool defdir_multibyte = STRING_MULTIBYTE (default_directory);
871 if (multibyte != defdir_multibyte)
871 { 872 {
873 /* We want to make both NAME and DEFAULT_DIRECTORY have the same
874 multibyteness. Strategy:
875 . If either NAME or DEFAULT_DIRECTORY is pure-ASCII, they
876 can be converted to the multibyteness of the other one
877 while keeping the same byte sequence.
878 . If both are non-ASCII, the only safe conversion is to
879 convert the multibyte one to be unibyte, because the
880 reverse conversion potentially adds bytes while raw bytes
881 are converted to their multibyte forms, which we will be
882 unable to account for, since the information about the
883 original multibyteness is lost. If those additional bytes
884 later leak to system APIs because they are not encoded or
885 because they are converted to unibyte strings by keeping
886 the data, file APIs will fail.
887
888 Note: One could argue that if we see a multibyte string, it
889 is evidence that file-name decoding was already set up, and
890 we could convert unibyte strings to multibyte using
891 DECODE_FILE. However, this is risky, because the likes of
892 string_to_multibyte are able of creating multibyte strings
893 without any decoding. */
872 if (multibyte) 894 if (multibyte)
873 { 895 {
874 unsigned char *p = SDATA (name); 896 bool name_ascii_p = SCHARS (name) == SBYTES (name);
897 unsigned char *p = SDATA (default_directory);
875 898
876 while (*p && ASCII_CHAR_P (*p)) 899 if (!name_ascii_p)
877 p++; 900 while (*p && ASCII_CHAR_P (*p))
878 if (*p == '\0') 901 p++;
902 if (name_ascii_p || *p != '\0')
879 { 903 {
880 /* NAME is a pure ASCII string, and DEFAULT_DIRECTORY is 904 /* DEFAULT_DIRECTORY is unibyte and possibly non-ASCII.
881 unibyte. Do not convert DEFAULT_DIRECTORY to 905 Make a unibyte string out of NAME, and arrange for
882 multibyte; instead, convert NAME to a unibyte string, 906 the result of this function to be a unibyte string.
883 so that the result of this function is also a unibyte 907 This is needed during bootstrapping and dumping, when
884 string. This is needed during bootstrapping and 908 Emacs cannot decode file names, because the locale
885 dumping, when Emacs cannot decode file names, because 909 environment is not set up. */
886 the locale environment is not set up. */
887 name = make_unibyte_string (SSDATA (name), SBYTES (name)); 910 name = make_unibyte_string (SSDATA (name), SBYTES (name));
888 multibyte = 0; 911 multibyte = 0;
889 } 912 }
890 else 913 else
891 default_directory = string_to_multibyte (default_directory); 914 {
915 /* NAME is non-ASCII and multibyte, and
916 DEFAULT_DIRECTORY is unibyte and pure-ASCII: make a
917 multibyte string out of DEFAULT_DIRECTORY's data. */
918 default_directory =
919 make_multibyte_string (SSDATA (default_directory),
920 SCHARS (default_directory),
921 SCHARS (default_directory));
922 }
892 } 923 }
893 else 924 else
894 { 925 {
895 name = string_to_multibyte (name); 926 unsigned char *p = SDATA (name);
896 multibyte = 1; 927
928 while (*p && ASCII_CHAR_P (*p))
929 p++;
930 if (*p == '\0')
931 {
932 /* DEFAULT_DIRECTORY is multibyte and NAME is unibyte
933 and pure-ASCII. Make a multibyte string out of
934 NAME's data. */
935 name = make_multibyte_string (SSDATA (name),
936 SCHARS (name), SCHARS (name));
937 multibyte = 1;
938 }
939 else
940 default_directory = make_unibyte_string (SSDATA (default_directory),
941 SBYTES (default_directory));
897 } 942 }
898 } 943 }
899 944