diff options
| author | Po Lu | 2023-08-04 14:29:55 +0800 |
|---|---|---|
| committer | Po Lu | 2023-08-04 14:29:55 +0800 |
| commit | bfbdf4eb892935536fc665d6cc986fd669364263 (patch) | |
| tree | f82fdd0cafacdd9133356f5c264514a726018a18 /src/android.c | |
| parent | 709195fea6a082e3512c14fe16c4f9ea2f99824c (diff) | |
| download | emacs-bfbdf4eb892935536fc665d6cc986fd669364263.tar.gz emacs-bfbdf4eb892935536fc665d6cc986fd669364263.zip | |
Optimize creation of multibyte menu items on Android
* src/androidvfs.c (android_verify_jni_string): Move to
android.c.
* src/android.c (android_verify_jni_string): New function.
(android_build_string): Forgo encoding menu text if TEXT is a
multibyte string that's also a valid JNI string.
* src/android.h: Update prototypes.
Diffstat (limited to 'src/android.c')
| -rw-r--r-- | src/android.c | 81 |
1 files changed, 75 insertions, 6 deletions
diff --git a/src/android.c b/src/android.c index c30d7b58979..bd19107f53a 100644 --- a/src/android.c +++ b/src/android.c | |||
| @@ -5480,6 +5480,69 @@ android_check_string (Lisp_Object text) | |||
| 5480 | return true; | 5480 | return true; |
| 5481 | } | 5481 | } |
| 5482 | 5482 | ||
| 5483 | /* Verify that the specified NULL-terminated STRING is a valid JNI | ||
| 5484 | ``UTF-8'' string. Return 0 if so, 1 otherwise. | ||
| 5485 | |||
| 5486 | Do not perform GC, enabling NAME to be a direct reference to string | ||
| 5487 | data. | ||
| 5488 | |||
| 5489 | The native coding system used by the JVM to store strings derives | ||
| 5490 | from UTF-8, but deviates from it in two aspects in an attempt to | ||
| 5491 | better represent the UCS-16 based Java String format, and to let | ||
| 5492 | strings contain NULL characters while remaining valid C strings: | ||
| 5493 | NULL bytes are encoded as two-byte sequences, and Unicode surrogate | ||
| 5494 | pairs encoded as two-byte sequences are prefered to four-byte | ||
| 5495 | sequences when encoding characters above the BMP. */ | ||
| 5496 | |||
| 5497 | int | ||
| 5498 | android_verify_jni_string (const char *name) | ||
| 5499 | { | ||
| 5500 | const unsigned char *chars; | ||
| 5501 | |||
| 5502 | chars = (unsigned char *) name; | ||
| 5503 | while (*chars) | ||
| 5504 | { | ||
| 5505 | /* Switch on the high 4 bits. */ | ||
| 5506 | |||
| 5507 | switch (*chars++ >> 4) | ||
| 5508 | { | ||
| 5509 | case 0 ... 7: | ||
| 5510 | /* The 8th bit is clean, so this is a regular C | ||
| 5511 | character. */ | ||
| 5512 | break; | ||
| 5513 | |||
| 5514 | case 8 ... 0xb: | ||
| 5515 | /* Invalid starting byte! */ | ||
| 5516 | return 1; | ||
| 5517 | |||
| 5518 | case 0xf: | ||
| 5519 | /* The start of a four byte sequence. These aren't allowed | ||
| 5520 | in Java. */ | ||
| 5521 | return 1; | ||
| 5522 | |||
| 5523 | case 0xe: | ||
| 5524 | /* The start of a three byte sequence. Verify that its | ||
| 5525 | continued. */ | ||
| 5526 | |||
| 5527 | if ((*chars++ & 0xc0) != 0x80) | ||
| 5528 | return 1; | ||
| 5529 | |||
| 5530 | FALLTHROUGH; | ||
| 5531 | |||
| 5532 | case 0xc ... 0xd: | ||
| 5533 | /* The start of a two byte sequence. Verify that the | ||
| 5534 | next byte exists and has its high bit set. */ | ||
| 5535 | |||
| 5536 | if ((*chars++ & 0xc0) != 0x80) | ||
| 5537 | return 1; | ||
| 5538 | |||
| 5539 | break; | ||
| 5540 | } | ||
| 5541 | } | ||
| 5542 | |||
| 5543 | return 0; | ||
| 5544 | } | ||
| 5545 | |||
| 5483 | /* Given a Lisp string TEXT, return a local reference to an equivalent | 5546 | /* Given a Lisp string TEXT, return a local reference to an equivalent |
| 5484 | Java string. */ | 5547 | Java string. */ |
| 5485 | 5548 | ||
| @@ -5492,12 +5555,18 @@ android_build_string (Lisp_Object text) | |||
| 5492 | jchar *characters; | 5555 | jchar *characters; |
| 5493 | USE_SAFE_ALLOCA; | 5556 | USE_SAFE_ALLOCA; |
| 5494 | 5557 | ||
| 5495 | /* Directly encode TEXT if it contains no multibyte | 5558 | /* Directly encode TEXT if it contains no non-ASCII characters, or |
| 5496 | characters. This is okay because the Java extended UTF | 5559 | is multibyte and a valid Modified UTF-8 string. This is okay |
| 5497 | format is compatible with ASCII. */ | 5560 | because the Java extended UTF format is compatible with |
| 5498 | 5561 | ASCII. */ | |
| 5499 | if (SBYTES (text) == SCHARS (text) | 5562 | |
| 5500 | && android_check_string (text)) | 5563 | if ((SBYTES (text) == SCHARS (text) |
| 5564 | && android_check_string (text)) | ||
| 5565 | /* If TEXT is a multibyte string, then it's using Emacs's | ||
| 5566 | internal UTF-8 coding system, a significant subset of which | ||
| 5567 | is compatible with JNI. */ | ||
| 5568 | || (STRING_MULTIBYTE (text) | ||
| 5569 | && !android_verify_jni_string (SSDATA (text)))) | ||
| 5501 | { | 5570 | { |
| 5502 | string = (*android_java_env)->NewStringUTF (android_java_env, | 5571 | string = (*android_java_env)->NewStringUTF (android_java_env, |
| 5503 | SSDATA (text)); | 5572 | SSDATA (text)); |