aboutsummaryrefslogtreecommitdiffstats
path: root/src/android.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/android.c')
-rw-r--r--src/android.c81
1 files changed, 75 insertions, 6 deletions
diff --git a/src/android.c b/src/android.c
index c30d7b58979..bd19107f53a 100644
--- a/src/android.c
+++ b/src/android.c
@@ -5480,6 +5480,69 @@ android_check_string (Lisp_Object text)
5480 return true; 5480 return true;
5481} 5481}
5482 5482
5483/* Verify that the specified NULL-terminated STRING is a valid JNI
5484 ``UTF-8'' string. Return 0 if so, 1 otherwise.
5485
5486 Do not perform GC, enabling NAME to be a direct reference to string
5487 data.
5488
5489 The native coding system used by the JVM to store strings derives
5490 from UTF-8, but deviates from it in two aspects in an attempt to
5491 better represent the UCS-16 based Java String format, and to let
5492 strings contain NULL characters while remaining valid C strings:
5493 NULL bytes are encoded as two-byte sequences, and Unicode surrogate
5494 pairs encoded as two-byte sequences are prefered to four-byte
5495 sequences when encoding characters above the BMP. */
5496
5497int
5498android_verify_jni_string (const char *name)
5499{
5500 const unsigned char *chars;
5501
5502 chars = (unsigned char *) name;
5503 while (*chars)
5504 {
5505 /* Switch on the high 4 bits. */
5506
5507 switch (*chars++ >> 4)
5508 {
5509 case 0 ... 7:
5510 /* The 8th bit is clean, so this is a regular C
5511 character. */
5512 break;
5513
5514 case 8 ... 0xb:
5515 /* Invalid starting byte! */
5516 return 1;
5517
5518 case 0xf:
5519 /* The start of a four byte sequence. These aren't allowed
5520 in Java. */
5521 return 1;
5522
5523 case 0xe:
5524 /* The start of a three byte sequence. Verify that its
5525 continued. */
5526
5527 if ((*chars++ & 0xc0) != 0x80)
5528 return 1;
5529
5530 FALLTHROUGH;
5531
5532 case 0xc ... 0xd:
5533 /* The start of a two byte sequence. Verify that the
5534 next byte exists and has its high bit set. */
5535
5536 if ((*chars++ & 0xc0) != 0x80)
5537 return 1;
5538
5539 break;
5540 }
5541 }
5542
5543 return 0;
5544}
5545
5483/* Given a Lisp string TEXT, return a local reference to an equivalent 5546/* Given a Lisp string TEXT, return a local reference to an equivalent
5484 Java string. */ 5547 Java string. */
5485 5548
@@ -5492,12 +5555,18 @@ android_build_string (Lisp_Object text)
5492 jchar *characters; 5555 jchar *characters;
5493 USE_SAFE_ALLOCA; 5556 USE_SAFE_ALLOCA;
5494 5557
5495 /* Directly encode TEXT if it contains no multibyte 5558 /* Directly encode TEXT if it contains no non-ASCII characters, or
5496 characters. This is okay because the Java extended UTF 5559 is multibyte and a valid Modified UTF-8 string. This is okay
5497 format is compatible with ASCII. */ 5560 because the Java extended UTF format is compatible with
5498 5561 ASCII. */
5499 if (SBYTES (text) == SCHARS (text) 5562
5500 && android_check_string (text)) 5563 if ((SBYTES (text) == SCHARS (text)
5564 && android_check_string (text))
5565 /* If TEXT is a multibyte string, then it's using Emacs's
5566 internal UTF-8 coding system, a significant subset of which
5567 is compatible with JNI. */
5568 || (STRING_MULTIBYTE (text)
5569 && !android_verify_jni_string (SSDATA (text))))
5501 { 5570 {
5502 string = (*android_java_env)->NewStringUTF (android_java_env, 5571 string = (*android_java_env)->NewStringUTF (android_java_env,
5503 SSDATA (text)); 5572 SSDATA (text));