diff options
| author | Po Lu | 2024-03-24 10:59:54 +0800 |
|---|---|---|
| committer | Po Lu | 2024-03-24 11:00:47 +0800 |
| commit | 2be41da38ef5432b6038058fcb0c284164fcb370 (patch) | |
| tree | 1b81cf7498ce1611e5bf250fed191e04711469a9 /java | |
| parent | a496378c94176930583e63ef5c95477f092a872b (diff) | |
| download | emacs-2be41da38ef5432b6038058fcb0c284164fcb370.tar.gz emacs-2be41da38ef5432b6038058fcb0c284164fcb370.zip | |
Improve consistency of content file name handling
* java/org/gnu/emacs/EmacsService.java (getDisplayNameHash):
Always encode file names as modified UTF-8, as insurance against
future changes to undocumented behavior of the JVM.
Diffstat (limited to 'java')
| -rw-r--r-- | java/org/gnu/emacs/EmacsService.java | 48 |
1 files changed, 39 insertions, 9 deletions
diff --git a/java/org/gnu/emacs/EmacsService.java b/java/org/gnu/emacs/EmacsService.java index 785163c713c..07bfb525be9 100644 --- a/java/org/gnu/emacs/EmacsService.java +++ b/java/org/gnu/emacs/EmacsService.java | |||
| @@ -19,6 +19,7 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */ | |||
| 19 | 19 | ||
| 20 | package org.gnu.emacs; | 20 | package org.gnu.emacs; |
| 21 | 21 | ||
| 22 | import java.io.ByteArrayOutputStream; | ||
| 22 | import java.io.FileNotFoundException; | 23 | import java.io.FileNotFoundException; |
| 23 | import java.io.IOException; | 24 | import java.io.IOException; |
| 24 | import java.io.UnsupportedEncodingException; | 25 | import java.io.UnsupportedEncodingException; |
| @@ -1041,17 +1042,46 @@ public final class EmacsService extends Service | |||
| 1041 | getDisplayNameHash (String string) | 1042 | getDisplayNameHash (String string) |
| 1042 | { | 1043 | { |
| 1043 | byte[] encoded; | 1044 | byte[] encoded; |
| 1044 | 1045 | ByteArrayOutputStream stream; | |
| 1045 | try | 1046 | int i, ch; |
| 1047 | |||
| 1048 | /* Much of the VFS code expects file names to be encoded as modified | ||
| 1049 | UTF-8 data, but Android's JNI implementation produces (while not | ||
| 1050 | accepting!) regular UTF-8 sequences for all characters, even | ||
| 1051 | non-Emoji ones. With no documentation to this effect, save for | ||
| 1052 | two comments nestled in the source code of the Java virtual | ||
| 1053 | machine, it is not sound to assume that this behavior will not be | ||
| 1054 | revised in future or modified releases of Android, and as such, | ||
| 1055 | encode STRING into modified UTF-8 by hand, to protect against | ||
| 1056 | future changes in this respect. */ | ||
| 1057 | |||
| 1058 | stream = new ByteArrayOutputStream (); | ||
| 1059 | |||
| 1060 | for (i = 0; i < string.length (); ++i) | ||
| 1046 | { | 1061 | { |
| 1047 | encoded = string.getBytes ("UTF-8"); | 1062 | ch = string.charAt (i); |
| 1048 | return EmacsNative.displayNameHash (encoded); | 1063 | |
| 1049 | } | 1064 | if (ch != 0 && ch <= 127) |
| 1050 | catch (UnsupportedEncodingException exception) | 1065 | stream.write (ch); |
| 1051 | { | 1066 | else if (ch <= 2047) |
| 1052 | /* This should be impossible. */ | 1067 | { |
| 1053 | return "error"; | 1068 | stream.write (0xc0 | (0x1f & (ch >> 6))); |
| 1069 | stream.write (0x80 | (0x3f & ch)); | ||
| 1070 | } | ||
| 1071 | else | ||
| 1072 | { | ||
| 1073 | stream.write (0xe0 | (0x0f & (ch >> 12))); | ||
| 1074 | stream.write (0x80 | (0x3f & (ch >> 6))); | ||
| 1075 | stream.write (0x80 | (0x3f & ch)); | ||
| 1076 | } | ||
| 1054 | } | 1077 | } |
| 1078 | |||
| 1079 | encoded = stream.toByteArray (); | ||
| 1080 | |||
| 1081 | /* Closing a ByteArrayOutputStream has no effect. | ||
| 1082 | encoded.close (); */ | ||
| 1083 | |||
| 1084 | return EmacsNative.displayNameHash (encoded); | ||
| 1055 | } | 1085 | } |
| 1056 | 1086 | ||
| 1057 | /* Build a content file name for URI. | 1087 | /* Build a content file name for URI. |