aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorPhilipp Stephani2017-12-23 17:56:36 +0100
committerPhilipp Stephani2017-12-30 23:16:51 +0100
commita5835dfee139322de7aa071f1c87ef015acbecad (patch)
tree992b5222049f2aa2a124f8c3484bbd074eaecfd1 /src
parent30ffc256abe7443a02b44490c518baf9a122b4c8 (diff)
downloademacs-a5835dfee139322de7aa071f1c87ef015acbecad.tar.gz
emacs-a5835dfee139322de7aa071f1c87ef015acbecad.zip
Improve error reporting when serializing non-Unicode strings to JSON
* src/coding.c (utf8_string_p): New helper function. (syms_of_coding) <utf-8-unix>: Move from json.c. * src/json.c (json_check_utf8): New helper function. (lisp_to_json_toplevel_1, lisp_to_json): Use it. To save a bit of time, check for invalid UTF-8 strings only after encountering an error, since Jansson already rejects them. * test/src/json-tests.el (json-serialize/invalid-unicode): Adapt expected error symbol.
Diffstat (limited to 'src')
-rw-r--r--src/coding.c22
-rw-r--r--src/coding.h1
-rw-r--r--src/json.c33
3 files changed, 48 insertions, 8 deletions
diff --git a/src/coding.c b/src/coding.c
index 1705838ffad..5ea1e395f20 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -6360,6 +6360,27 @@ check_utf_8 (struct coding_system *coding)
6360} 6360}
6361 6361
6362 6362
6363/* Return whether STRING is a valid UTF-8 string. STRING must be a
6364 unibyte string. */
6365
6366bool
6367utf8_string_p (Lisp_Object string)
6368{
6369 eassert (!STRING_MULTIBYTE (string));
6370 struct coding_system coding;
6371 setup_coding_system (Qutf_8_unix, &coding);
6372 /* We initialize only the fields that check_utf_8 accesses. */
6373 coding.head_ascii = -1;
6374 coding.src_pos = 0;
6375 coding.src_pos_byte = 0;
6376 coding.src_chars = SCHARS (string);
6377 coding.src_bytes = SBYTES (string);
6378 coding.src_object = string;
6379 coding.eol_seen = EOL_SEEN_NONE;
6380 return check_utf_8 (&coding) != -1;
6381}
6382
6383
6363/* Detect how end-of-line of a text of length SRC_BYTES pointed by 6384/* Detect how end-of-line of a text of length SRC_BYTES pointed by
6364 SOURCE is encoded. If CATEGORY is one of 6385 SOURCE is encoded. If CATEGORY is one of
6365 coding_category_utf_16_XXXX, assume that CR and LF are encoded by 6386 coding_category_utf_16_XXXX, assume that CR and LF are encoded by
@@ -10846,6 +10867,7 @@ syms_of_coding (void)
10846 DEFSYM (Qiso_2022, "iso-2022"); 10867 DEFSYM (Qiso_2022, "iso-2022");
10847 10868
10848 DEFSYM (Qutf_8, "utf-8"); 10869 DEFSYM (Qutf_8, "utf-8");
10870 DEFSYM (Qutf_8_unix, "utf-8-unix");
10849 DEFSYM (Qutf_8_emacs, "utf-8-emacs"); 10871 DEFSYM (Qutf_8_emacs, "utf-8-emacs");
10850 10872
10851#if defined (WINDOWSNT) || defined (CYGWIN) 10873#if defined (WINDOWSNT) || defined (CYGWIN)
diff --git a/src/coding.h b/src/coding.h
index 66d125b07e6..bc4ef52e1ed 100644
--- a/src/coding.h
+++ b/src/coding.h
@@ -665,6 +665,7 @@ struct coding_system
665/* Extern declarations. */ 665/* Extern declarations. */
666extern Lisp_Object code_conversion_save (bool, bool); 666extern Lisp_Object code_conversion_save (bool, bool);
667extern bool encode_coding_utf_8 (struct coding_system *); 667extern bool encode_coding_utf_8 (struct coding_system *);
668extern bool utf8_string_p (Lisp_Object);
668extern void setup_coding_system (Lisp_Object, struct coding_system *); 669extern void setup_coding_system (Lisp_Object, struct coding_system *);
669extern Lisp_Object coding_charset_list (struct coding_system *); 670extern Lisp_Object coding_charset_list (struct coding_system *);
670extern Lisp_Object coding_system_charset_list (Lisp_Object); 671extern Lisp_Object coding_system_charset_list (Lisp_Object);
diff --git a/src/json.c b/src/json.c
index 88db86ad2e3..93dcc730dae 100644
--- a/src/json.c
+++ b/src/json.c
@@ -316,6 +316,15 @@ json_check (json_t *object)
316 return object; 316 return object;
317} 317}
318 318
319/* If STRING is not a valid UTF-8 string, signal an error of type
320 `wrong-type-argument'. STRING must be a unibyte string. */
321
322static void
323json_check_utf8 (Lisp_Object string)
324{
325 CHECK_TYPE (utf8_string_p (string), Qutf_8_string_p, string);
326}
327
319static json_t *lisp_to_json (Lisp_Object); 328static json_t *lisp_to_json (Lisp_Object);
320 329
321/* Convert a Lisp object to a toplevel JSON object (array or object). 330/* Convert a Lisp object to a toplevel JSON object (array or object).
@@ -363,9 +372,12 @@ lisp_to_json_toplevel_1 (Lisp_Object lisp, json_t **json)
363 int status = json_object_set_new (*json, key_str, 372 int status = json_object_set_new (*json, key_str,
364 lisp_to_json (HASH_VALUE (h, i))); 373 lisp_to_json (HASH_VALUE (h, i)));
365 if (status == -1) 374 if (status == -1)
366 /* FIXME: A failure here might also indicate that the 375 {
367 key is not a valid Unicode string. */ 376 /* A failure can be caused either by an invalid key or
368 json_out_of_memory (); 377 by low memory. */
378 json_check_utf8 (key);
379 json_out_of_memory ();
380 }
369 } 381 }
370 clear_unwind_protect (count); 382 clear_unwind_protect (count);
371 return unbind_to (count, Qnil); 383 return unbind_to (count, Qnil);
@@ -447,9 +459,15 @@ lisp_to_json (Lisp_Object lisp)
447 else if (STRINGP (lisp)) 459 else if (STRINGP (lisp))
448 { 460 {
449 Lisp_Object encoded = json_encode (lisp); 461 Lisp_Object encoded = json_encode (lisp);
450 /* FIXME: We might throw an out-of-memory error here if the 462 json_t *json = json_stringn (SSDATA (encoded), SBYTES (encoded));
451 string is not valid Unicode. */ 463 if (json == NULL)
452 return json_check (json_stringn (SSDATA (encoded), SBYTES (encoded))); 464 {
465 /* A failure can be caused either by an invalid string or by
466 low memory. */
467 json_check_utf8 (encoded);
468 json_out_of_memory ();
469 }
470 return json;
453 } 471 }
454 472
455 /* LISP now must be a vector, hashtable, or alist. */ 473 /* LISP now must be a vector, hashtable, or alist. */
@@ -863,8 +881,7 @@ syms_of_json (void)
863 881
864 DEFSYM (Qstring_without_embedded_nulls_p, "string-without-embedded-nulls-p"); 882 DEFSYM (Qstring_without_embedded_nulls_p, "string-without-embedded-nulls-p");
865 DEFSYM (Qjson_value_p, "json-value-p"); 883 DEFSYM (Qjson_value_p, "json-value-p");
866 884 DEFSYM (Qutf_8_string_p, "utf-8-string-p");
867 DEFSYM (Qutf_8_unix, "utf-8-unix");
868 885
869 DEFSYM (Qjson_error, "json-error"); 886 DEFSYM (Qjson_error, "json-error");
870 DEFSYM (Qjson_out_of_memory, "json-out-of-memory"); 887 DEFSYM (Qjson_out_of_memory, "json-out-of-memory");