aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMattias Engdegård2024-03-31 15:00:00 +0200
committerMattias Engdegård2024-04-01 10:41:46 +0200
commit734bd005aa0fa955cf1a46d3a60a4d6ef5e7e3d1 (patch)
tree28e48c30e5916247065e8e66d9a80d9991ec1c2b
parentf178a6d8006f1e8afe06bb71d0a413622d73f131 (diff)
downloademacs-734bd005aa0fa955cf1a46d3a60a4d6ef5e7e3d1.tar.gz
emacs-734bd005aa0fa955cf1a46d3a60a4d6ef5e7e3d1.zip
Faster JSON parsing
Speed up JSON parsing substantially by only UTF-8-parsing string literals and only exactly once. Previously, json-parse-string always first parsed the entire input and copied it to a new string, and then validated each string literal twice. We no longer create an extra new string when interning an alist key, nor do we garble plist keys with Unicode characters. * src/lread.c (intern_c_multibyte): New. * src/json.c (json_encode): Remove. (utf8_error): New. (json_parse_string): Faster and more careful UTF-8 decoding. Create and return a new multibyte string or symbol without extra decoding. All callers adapted. (Fjson_parse_string): Skip expensive input pre-decoding. * test/src/json-tests.el (json-parse-string/object-unicode-keys) (json-parse-string/short): New. (json-parse-string/string, json-parse-string/invalid-unicode): Adapt tests. * etc/NEWS: Mentioned change in errors.
-rw-r--r--etc/NEWS5
-rw-r--r--src/json.c187
-rw-r--r--src/lisp.h2
-rw-r--r--src/lread.c12
-rw-r--r--test/src/json-tests.el69
5 files changed, 137 insertions, 138 deletions
diff --git a/etc/NEWS b/etc/NEWS
index 903c60ac97e..11bca90314a 100644
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -1721,6 +1721,11 @@ Use a float value for the first argument instead.
1721Instead, use 'eshell-process-wait-time', which supports floating-point 1721Instead, use 'eshell-process-wait-time', which supports floating-point
1722values. 1722values.
1723 1723
1724---
1725** The JSON parser sometimes signals different types of errors.
1726It will now signal 'json-utf8-decode-error' for inputs that are not
1727correctly UTF-8 encoded.
1728
1724 1729
1725* Lisp Changes in Emacs 30.1 1730* Lisp Changes in Emacs 30.1
1726 1731
diff --git a/src/json.c b/src/json.c
index 908db022c50..8749009a24b 100644
--- a/src/json.c
+++ b/src/json.c
@@ -699,24 +699,6 @@ usage: (json-insert OBJECT &rest ARGS) */)
699} 699}
700 700
701 701
702/* Note that all callers of make_string_from_utf8 and build_string_from_utf8
703 below either pass only value UTF-8 strings or use the function for
704 formatting error messages; in the latter case correctness isn't
705 critical. */
706
707/* Return a unibyte string containing the sequence of UTF-8 encoding
708 units of the UTF-8 representation of STRING. If STRING does not
709 represent a sequence of Unicode scalar values, return a string with
710 unspecified contents. */
711
712static Lisp_Object
713json_encode (Lisp_Object string)
714{
715 /* FIXME: Raise an error if STRING is not a scalar value
716 sequence. */
717 return encode_string_utf_8 (string, Qnil, false, Qt, Qt);
718}
719
720#define JSON_PARSER_INTERNAL_OBJECT_WORKSPACE_SIZE 64 702#define JSON_PARSER_INTERNAL_OBJECT_WORKSPACE_SIZE 64
721#define JSON_PARSER_INTERNAL_BYTE_WORKSPACE_SIZE 512 703#define JSON_PARSER_INTERNAL_BYTE_WORKSPACE_SIZE 512
722 704
@@ -1081,52 +1063,21 @@ json_parse_unicode (struct json_parser *parser)
1081 return v[0] << 12 | v[1] << 8 | v[2] << 4 | v[3]; 1063 return v[0] << 12 | v[1] << 8 | v[2] << 4 | v[3];
1082} 1064}
1083 1065
1084/* Parses an utf-8 code-point encoding (except the first byte), and 1066static AVOID
1085 returns the numeric value of the code-point (without considering 1067utf8_error (struct json_parser *parser)
1086 the first byte) */
1087static int
1088json_handle_utf8_tail_bytes (struct json_parser *parser, int n)
1089{ 1068{
1090 int v = 0; 1069 json_signal_error (parser, Qjson_utf8_decode_error);
1091 for (int i = 0; i < n; i++)
1092 {
1093 int c = json_input_get (parser);
1094 json_byte_workspace_put (parser, c);
1095 if ((c & 0xc0) != 0x80)
1096 json_signal_error (parser, Qjson_utf8_decode_error);
1097 v = (v << 6) | (c & 0x3f);
1098 }
1099 return v;
1100} 1070}
1101 1071
1102/* Reads a JSON string, and puts the result into the byte workspace */ 1072/* Parse a string literal. Optionally prepend a ':'.
1103static void 1073 Return the string or an interned symbol. */
1104json_parse_string (struct json_parser *parser) 1074static Lisp_Object
1105{ 1075json_parse_string (struct json_parser *parser, bool intern, bool leading_colon)
1106 /* a single_uninteresting byte can be simply copied from the input 1076{
1107 to output, it doesn't need any extra care. This means all the 1077 json_byte_workspace_reset (parser);
1108 characters between [0x20;0x7f], except the double quote and 1078 if (leading_colon)
1109 the backslash */ 1079 json_byte_workspace_put (parser, ':');
1110 static const char is_single_uninteresting[256] = { 1080 ptrdiff_t chars_delta = 0; /* nchars - nbytes */
1111 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
1112 /* 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1113 /* 1 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1114 /* 2 */ 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1115 /* 3 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1116 /* 4 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1117 /* 5 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
1118 /* 6 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1119 /* 7 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1120 /* 8 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1121 /* 9 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1122 /* a */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1123 /* b */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1124 /* c */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1125 /* d */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1126 /* e */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1127 /* f */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1128 };
1129
1130 for (;;) 1081 for (;;)
1131 { 1082 {
1132 /* This if is only here for a possible speedup. If there are 4 1083 /* This if is only here for a possible speedup. If there are 4
@@ -1138,10 +1089,10 @@ json_parse_string (struct json_parser *parser)
1138 int c1 = parser->input_current[1]; 1089 int c1 = parser->input_current[1];
1139 int c2 = parser->input_current[2]; 1090 int c2 = parser->input_current[2];
1140 int c3 = parser->input_current[3]; 1091 int c3 = parser->input_current[3];
1141 bool v0 = is_single_uninteresting[c0]; 1092 bool v0 = json_plain_char[c0];
1142 bool v1 = is_single_uninteresting[c1]; 1093 bool v1 = json_plain_char[c1];
1143 bool v2 = is_single_uninteresting[c2]; 1094 bool v2 = json_plain_char[c2];
1144 bool v3 = is_single_uninteresting[c3]; 1095 bool v3 = json_plain_char[c3];
1145 if (v0 && v1 && v2 && v3) 1096 if (v0 && v1 && v2 && v3)
1146 { 1097 {
1147 json_byte_workspace_put (parser, c0); 1098 json_byte_workspace_put (parser, c0);
@@ -1156,43 +1107,62 @@ json_parse_string (struct json_parser *parser)
1156 1107
1157 int c = json_input_get (parser); 1108 int c = json_input_get (parser);
1158 parser->current_column++; 1109 parser->current_column++;
1159 if (is_single_uninteresting[c]) 1110 if (json_plain_char[c])
1160 { 1111 {
1161 json_byte_workspace_put (parser, c); 1112 json_byte_workspace_put (parser, c);
1162 continue; 1113 continue;
1163 } 1114 }
1164 1115
1165 if (c == '"') 1116 if (c == '"')
1166 return;
1167 else if (c & 0x80)
1168 { 1117 {
1169 /* Handle utf-8 encoding */ 1118 ptrdiff_t nbytes
1119 = parser->byte_workspace_current - parser->byte_workspace;
1120 ptrdiff_t nchars = nbytes - chars_delta;
1121 const char *str = (const char *)parser->byte_workspace;
1122 return intern ? intern_c_multibyte (str, nchars, nbytes)
1123 : make_multibyte_string (str, nchars, nbytes);
1124 }
1125
1126 if (c & 0x80)
1127 {
1128 /* Parse UTF-8, strictly. This is the correct thing to do
1129 whether or not the input is a unibyte or multibyte string. */
1170 json_byte_workspace_put (parser, c); 1130 json_byte_workspace_put (parser, c);
1171 if (c < 0xc0) 1131 unsigned char c1 = json_input_get (parser);
1172 json_signal_error (parser, Qjson_utf8_decode_error); 1132 if ((c1 & 0xc0) != 0x80)
1173 else if (c < 0xe0) 1133 utf8_error (parser);
1134 json_byte_workspace_put (parser, c1);
1135 if (c <= 0xc1)
1136 utf8_error (parser);
1137 else if (c <= 0xdf)
1138 chars_delta += 1;
1139 else if (c <= 0xef)
1174 { 1140 {
1175 int n = ((c & 0x1f) << 6 1141 unsigned char c2 = json_input_get (parser);
1176 | json_handle_utf8_tail_bytes (parser, 1)); 1142 if ((c2 & 0xc0) != 0x80)
1177 if (n < 0x80) 1143 utf8_error (parser);
1178 json_signal_error (parser, Qjson_utf8_decode_error); 1144 int v = ((c & 0x0f) << 12) + ((c1 & 0x3f) << 6) + (c2 & 0x3f);
1179 } 1145 if (v < 0x800 || (v >= 0xd800 && v <= 0xdfff))
1180 else if (c < 0xf0) 1146 utf8_error (parser);
1181 { 1147 json_byte_workspace_put (parser, c2);
1182 int n = ((c & 0xf) << 12 1148 chars_delta += 2;
1183 | json_handle_utf8_tail_bytes (parser, 2));
1184 if (n < 0x800 || (n >= 0xd800 && n < 0xe000))
1185 json_signal_error (parser, Qjson_utf8_decode_error);
1186 } 1149 }
1187 else if (c < 0xf8) 1150 else if (c <= 0xf7)
1188 { 1151 {
1189 int n = ((c & 0x7) << 18 1152 unsigned char c2 = json_input_get (parser);
1190 | json_handle_utf8_tail_bytes (parser, 3)); 1153 unsigned char c3 = json_input_get (parser);
1191 if (n < 0x10000 || n > 0x10ffff) 1154 if ((c2 & 0xc0) != 0x80 || (c3 & 0xc0) != 0x80)
1192 json_signal_error (parser, Qjson_utf8_decode_error); 1155 utf8_error (parser);
1156 int v = (((c & 0x07) << 18) + ((c1 & 0x3f) << 12)
1157 + ((c2 & 0x3f) << 6) + (c3 & 0x3f));
1158 if (v < 0x10000 || v > 0x10ffff)
1159 utf8_error (parser);
1160 json_byte_workspace_put (parser, c2);
1161 json_byte_workspace_put (parser, c3);
1162 chars_delta += 3;
1193 } 1163 }
1194 else 1164 else
1195 json_signal_error (parser, Qjson_utf8_decode_error); 1165 utf8_error (parser);
1196 } 1166 }
1197 else if (c == '\\') 1167 else if (c == '\\')
1198 { 1168 {
@@ -1249,6 +1219,7 @@ json_parse_string (struct json_parser *parser)
1249 json_byte_workspace_put (parser, 0xc0 | num >> 6); 1219 json_byte_workspace_put (parser, 0xc0 | num >> 6);
1250 json_byte_workspace_put (parser, 1220 json_byte_workspace_put (parser,
1251 0x80 | (num & 0x3f)); 1221 0x80 | (num & 0x3f));
1222 chars_delta += 1;
1252 } 1223 }
1253 else if (num < 0x10000) 1224 else if (num < 0x10000)
1254 { 1225 {
@@ -1258,6 +1229,7 @@ json_parse_string (struct json_parser *parser)
1258 | ((num >> 6) & 0x3f))); 1229 | ((num >> 6) & 0x3f)));
1259 json_byte_workspace_put (parser, 1230 json_byte_workspace_put (parser,
1260 0x80 | (num & 0x3f)); 1231 0x80 | (num & 0x3f));
1232 chars_delta += 2;
1261 } 1233 }
1262 else 1234 else
1263 { 1235 {
@@ -1270,6 +1242,7 @@ json_parse_string (struct json_parser *parser)
1270 | ((num >> 6) & 0x3f))); 1242 | ((num >> 6) & 0x3f)));
1271 json_byte_workspace_put (parser, 1243 json_byte_workspace_put (parser,
1272 0x80 | (num & 0x3f)); 1244 0x80 | (num & 0x3f));
1245 chars_delta += 3;
1273 } 1246 }
1274 } 1247 }
1275 else 1248 else
@@ -1566,16 +1539,11 @@ json_parse_object (struct json_parser *parser)
1566 if (c != '"') 1539 if (c != '"')
1567 json_signal_error (parser, Qjson_parse_error); 1540 json_signal_error (parser, Qjson_parse_error);
1568 1541
1569 json_byte_workspace_reset (parser);
1570 switch (parser->conf.object_type) 1542 switch (parser->conf.object_type)
1571 { 1543 {
1572 case json_object_hashtable: 1544 case json_object_hashtable:
1573 { 1545 {
1574 json_parse_string (parser); 1546 Lisp_Object key = json_parse_string (parser, false, false);
1575 Lisp_Object key
1576 = make_string_from_utf8 ((char *) parser->byte_workspace,
1577 (parser->byte_workspace_current
1578 - parser->byte_workspace));
1579 Lisp_Object value = json_parse_object_member_value (parser); 1547 Lisp_Object value = json_parse_object_member_value (parser);
1580 json_make_object_workspace_for (parser, 2); 1548 json_make_object_workspace_for (parser, 2);
1581 parser->object_workspace[parser->object_workspace_current] = key; 1549 parser->object_workspace[parser->object_workspace_current] = key;
@@ -1586,13 +1554,7 @@ json_parse_object (struct json_parser *parser)
1586 } 1554 }
1587 case json_object_alist: 1555 case json_object_alist:
1588 { 1556 {
1589 json_parse_string (parser); 1557 Lisp_Object key = json_parse_string (parser, true, false);
1590 char *workspace = (char *) parser->byte_workspace;
1591 ptrdiff_t nbytes
1592 = parser->byte_workspace_current - parser->byte_workspace;
1593 Lisp_Object key = Fintern (make_string_from_utf8 (workspace,
1594 nbytes),
1595 Qnil);
1596 Lisp_Object value = json_parse_object_member_value (parser); 1558 Lisp_Object value = json_parse_object_member_value (parser);
1597 Lisp_Object nc = Fcons (Fcons (key, value), Qnil); 1559 Lisp_Object nc = Fcons (Fcons (key, value), Qnil);
1598 *cdr = nc; 1560 *cdr = nc;
@@ -1601,11 +1563,7 @@ json_parse_object (struct json_parser *parser)
1601 } 1563 }
1602 case json_object_plist: 1564 case json_object_plist:
1603 { 1565 {
1604 json_byte_workspace_put (parser, ':'); 1566 Lisp_Object key = json_parse_string (parser, true, true);
1605 json_parse_string (parser);
1606 Lisp_Object key = intern_1 ((char *) parser->byte_workspace,
1607 (parser->byte_workspace_current
1608 - parser->byte_workspace));
1609 Lisp_Object value = json_parse_object_member_value (parser); 1567 Lisp_Object value = json_parse_object_member_value (parser);
1610 Lisp_Object nc = Fcons (key, Qnil); 1568 Lisp_Object nc = Fcons (key, Qnil);
1611 *cdr = nc; 1569 *cdr = nc;
@@ -1692,15 +1650,7 @@ json_parse_value (struct json_parser *parser, int c)
1692 else if (c == '[') 1650 else if (c == '[')
1693 return json_parse_array (parser); 1651 return json_parse_array (parser);
1694 else if (c == '"') 1652 else if (c == '"')
1695 { 1653 return json_parse_string (parser, false, false);
1696 json_byte_workspace_reset (parser);
1697 json_parse_string (parser);
1698 Lisp_Object result
1699 = make_string_from_utf8 ((const char *) parser->byte_workspace,
1700 (parser->byte_workspace_current
1701 - parser->byte_workspace));
1702 return result;
1703 }
1704 else if ((c >= '0' && c <= '9') || (c == '-')) 1654 else if ((c >= '0' && c <= '9') || (c == '-'))
1705 return json_parse_number (parser, c); 1655 return json_parse_number (parser, c);
1706 else 1656 else
@@ -1816,14 +1766,13 @@ usage: (json-parse-string STRING &rest ARGS) */)
1816 1766
1817 Lisp_Object string = args[0]; 1767 Lisp_Object string = args[0];
1818 CHECK_STRING (string); 1768 CHECK_STRING (string);
1819 Lisp_Object encoded = json_encode (string);
1820 struct json_configuration conf 1769 struct json_configuration conf
1821 = { json_object_hashtable, json_array_array, QCnull, QCfalse }; 1770 = { json_object_hashtable, json_array_array, QCnull, QCfalse };
1822 json_parse_args (nargs - 1, args + 1, &conf, true); 1771 json_parse_args (nargs - 1, args + 1, &conf, true);
1823 1772
1824 struct json_parser p; 1773 struct json_parser p;
1825 const unsigned char *begin = (const unsigned char *) SSDATA (encoded); 1774 const unsigned char *begin = SDATA (string);
1826 json_parser_init (&p, conf, begin, begin + SBYTES (encoded), NULL, NULL); 1775 json_parser_init (&p, conf, begin, begin + SBYTES (string), NULL, NULL);
1827 record_unwind_protect_ptr (json_parser_done, &p); 1776 record_unwind_protect_ptr (json_parser_done, &p);
1828 1777
1829 return unbind_to (count, 1778 return unbind_to (count,
diff --git a/src/lisp.h b/src/lisp.h
index 43a29489a25..3cb4361e75e 100644
--- a/src/lisp.h
+++ b/src/lisp.h
@@ -4744,6 +4744,8 @@ extern ptrdiff_t evxprintf (char **, ptrdiff_t *, char *, ptrdiff_t,
4744extern Lisp_Object intern_1 (const char *, ptrdiff_t); 4744extern Lisp_Object intern_1 (const char *, ptrdiff_t);
4745extern Lisp_Object intern_c_string_1 (const char *, ptrdiff_t); 4745extern Lisp_Object intern_c_string_1 (const char *, ptrdiff_t);
4746extern Lisp_Object intern_driver (Lisp_Object, Lisp_Object, Lisp_Object); 4746extern Lisp_Object intern_driver (Lisp_Object, Lisp_Object, Lisp_Object);
4747extern Lisp_Object intern_c_multibyte (const char *str,
4748 ptrdiff_t nchars, ptrdiff_t nbytes);
4747extern void init_symbol (Lisp_Object, Lisp_Object); 4749extern void init_symbol (Lisp_Object, Lisp_Object);
4748extern Lisp_Object oblookup (Lisp_Object, const char *, ptrdiff_t, ptrdiff_t); 4750extern Lisp_Object oblookup (Lisp_Object, const char *, ptrdiff_t, ptrdiff_t);
4749INLINE void 4751INLINE void
diff --git a/src/lread.c b/src/lread.c
index 1cb941e84fc..09a5589fd0c 100644
--- a/src/lread.c
+++ b/src/lread.c
@@ -4993,6 +4993,18 @@ intern_c_string_1 (const char *str, ptrdiff_t len)
4993 return tem; 4993 return tem;
4994} 4994}
4995 4995
4996/* Intern STR of NBYTES bytes and NCHARS characters in the default obarray. */
4997Lisp_Object
4998intern_c_multibyte (const char *str, ptrdiff_t nchars, ptrdiff_t nbytes)
4999{
5000 Lisp_Object obarray = check_obarray (Vobarray);
5001 Lisp_Object sym = oblookup (obarray, str, nchars, nbytes);
5002 if (BARE_SYMBOL_P (sym))
5003 return sym;
5004 return intern_driver (make_multibyte_string (str, nchars, nbytes),
5005 obarray, sym);
5006}
5007
4996static void 5008static void
4997define_symbol (Lisp_Object sym, char const *str) 5009define_symbol (Lisp_Object sym, char const *str)
4998{ 5010{
diff --git a/test/src/json-tests.el b/test/src/json-tests.el
index fb2384d4a8d..a1bafadaa87 100644
--- a/test/src/json-tests.el
+++ b/test/src/json-tests.el
@@ -25,6 +25,7 @@
25 25
26(require 'cl-lib) 26(require 'cl-lib)
27(require 'map) 27(require 'map)
28(require 'subr-x)
28 29
29(declare-function json-serialize "json.c" (object &rest args)) 30(declare-function json-serialize "json.c" (object &rest args))
30(declare-function json-insert "json.c" (object &rest args)) 31(declare-function json-insert "json.c" (object &rest args))
@@ -155,6 +156,9 @@
155 ) 156 )
156 157
157(ert-deftest json-parse-string/object () 158(ert-deftest json-parse-string/object ()
159 :expected-result :failed
160 ;; FIXME: This currently fails. Should the parser deduplicate keys?
161 ;; Never, always, or for alist and plist only?
158 (let ((input 162 (let ((input
159 "{ \"abc\" : [1, 2, true], \"def\" : null, \"abc\" : [9, false] }\n")) 163 "{ \"abc\" : [1, 2, true], \"def\" : null, \"abc\" : [9, false] }\n"))
160 (let ((actual (json-parse-string input))) 164 (let ((actual (json-parse-string input)))
@@ -167,6 +171,15 @@
167 (should (equal (json-parse-string input :object-type 'plist) 171 (should (equal (json-parse-string input :object-type 'plist)
168 '(:abc [9 :false] :def :null))))) 172 '(:abc [9 :false] :def :null)))))
169 173
174(ert-deftest json-parse-string/object-unicode-keys ()
175 (let ((input "{\"é\":1,\"☃\":2,\"𐌐\":3}"))
176 (let ((actual (json-parse-string input)))
177 (should (equal (sort (hash-table-keys actual)) '("é" "☃" "𐌐"))))
178 (should (equal (json-parse-string input :object-type 'alist)
179 '((é . 1) (☃ . 2) (𐌐 . 3))))
180 (should (equal (json-parse-string input :object-type 'plist)
181 '(:é 1 :☃ 2 :𐌐 3)))))
182
170(ert-deftest json-parse-string/array () 183(ert-deftest json-parse-string/array ()
171 (let ((input "[\"a\", 1, [\"b\", 2]]")) 184 (let ((input "[\"a\", 1, [\"b\", 2]]"))
172 (should (equal (json-parse-string input) 185 (should (equal (json-parse-string input)
@@ -182,8 +195,8 @@
182 ["\nasdфывfgh\t"])) 195 ["\nasdфывfgh\t"]))
183 (should (equal (json-parse-string "[\"\\uD834\\uDD1E\"]") ["\U0001D11E"])) 196 (should (equal (json-parse-string "[\"\\uD834\\uDD1E\"]") ["\U0001D11E"]))
184 (should-error (json-parse-string "foo") :type 'json-parse-error) 197 (should-error (json-parse-string "foo") :type 'json-parse-error)
185 ;; FIXME: Is this the right behavior? 198 (should-error (json-parse-string "[\"\u00C4\xC3\x84\"]")
186 (should (equal (json-parse-string "[\"\u00C4\xC3\x84\"]") ["\u00C4\u00C4"]))) 199 :type 'json-utf8-decode-error))
187 200
188(ert-deftest json-serialize/string () 201(ert-deftest json-serialize/string ()
189 (should (equal (json-serialize ["foo"]) "[\"foo\"]")) 202 (should (equal (json-serialize ["foo"]) "[\"foo\"]"))
@@ -201,9 +214,23 @@
201 (should-error (json-serialize ["u\xCCv"]) :type 'wrong-type-argument) 214 (should-error (json-serialize ["u\xCCv"]) :type 'wrong-type-argument)
202 (should-error (json-serialize ["u\u00C4\xCCv"]) :type 'wrong-type-argument)) 215 (should-error (json-serialize ["u\u00C4\xCCv"]) :type 'wrong-type-argument))
203 216
217(ert-deftest json-parse-string/short ()
218 :expected-result :failed
219 (should-error (json-parse-string "") :type 'json-end-of-file)
220 (should-error (json-parse-string " ") :type 'json-end-of-file)
221 ;; BUG: currently results in `json-end-of-file' for short non-empty inputs.
222 (dolist (s '("a" "ab" "abc" "abcd"
223 "t" "tr" "tru" "truE" "truee"
224 "n" "nu" "nul" "nulL" "nulll"
225 "f" "fa" "fal" "fals" "falsE" "falsee"))
226 (condition-case err
227 (json-parse-string s)
228 (error
229 (should (eq (car err) 'json-parse-error)))
230 (:success (error "parsing %S should fail" s)))))
231
204(ert-deftest json-parse-string/null () 232(ert-deftest json-parse-string/null ()
205 (should-error (json-parse-string "\x00") :type 'wrong-type-argument) 233 (should (equal (json-parse-string "[\"a\\u0000b\"]") ["a\0b"]))
206 (should (json-parse-string "[\"a\\u0000b\"]"))
207 (let* ((string "{\"foo\":\"this is a string including a literal \\u0000\"}") 234 (let* ((string "{\"foo\":\"this is a string including a literal \\u0000\"}")
208 (data (json-parse-string string))) 235 (data (json-parse-string string)))
209 (should (hash-table-p data)) 236 (should (hash-table-p data))
@@ -214,30 +241,34 @@
214https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt. 241https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt.
215Test with both unibyte and multibyte strings." 242Test with both unibyte and multibyte strings."
216 ;; Invalid UTF-8 code unit sequences. 243 ;; Invalid UTF-8 code unit sequences.
217 (should-error (json-parse-string "[\"\x80\"]") :type 'json-parse-error) 244 (should-error (json-parse-string "[\"\x80\"]") :type 'json-utf8-decode-error)
218 (should-error (json-parse-string "[\"\u00C4\x80\"]") :type 'json-parse-error) 245 (should-error (json-parse-string "[\"\u00C4\x80\"]")
219 (should-error (json-parse-string "[\"\xBF\"]") :type 'json-parse-error) 246 :type 'json-utf8-decode-error)
220 (should-error (json-parse-string "[\"\u00C4\xBF\"]") :type 'json-parse-error) 247 (should-error (json-parse-string "[\"\xBF\"]") :type 'json-utf8-decode-error)
221 (should-error (json-parse-string "[\"\xFE\"]") :type 'json-parse-error) 248 (should-error (json-parse-string "[\"\u00C4\xBF\"]")
222 (should-error (json-parse-string "[\"\u00C4\xFE\"]") :type 'json-parse-error) 249 :type 'json-utf8-decode-error)
223 (should-error (json-parse-string "[\"\xC0\xAF\"]") :type 'json-parse-error) 250 (should-error (json-parse-string "[\"\xFE\"]") :type 'json-utf8-decode-error)
251 (should-error (json-parse-string "[\"\u00C4\xFE\"]")
252 :type 'json-utf8-decode-error)
253 (should-error (json-parse-string "[\"\xC0\xAF\"]")
254 :type 'json-utf8-decode-error)
224 (should-error (json-parse-string "[\"\u00C4\xC0\xAF\"]") 255 (should-error (json-parse-string "[\"\u00C4\xC0\xAF\"]")
225 :type 'json-parse-error) 256 :type 'json-utf8-decode-error)
226 (should-error (json-parse-string "[\"\u00C4\xC0\x80\"]") 257 (should-error (json-parse-string "[\"\u00C4\xC0\x80\"]")
227 :type 'json-parse-error) 258 :type 'json-utf8-decode-error)
228 ;; Surrogates. 259 ;; Surrogates.
229 (should-error (json-parse-string "[\"\uDB7F\"]") 260 (should-error (json-parse-string "[\"\uDB7F\"]")
230 :type 'json-parse-error) 261 :type 'json-utf8-decode-error)
231 (should-error (json-parse-string "[\"\xED\xAD\xBF\"]") 262 (should-error (json-parse-string "[\"\xED\xAD\xBF\"]")
232 :type 'json-parse-error) 263 :type 'json-utf8-decode-error)
233 (should-error (json-parse-string "[\"\u00C4\xED\xAD\xBF\"]") 264 (should-error (json-parse-string "[\"\u00C4\xED\xAD\xBF\"]")
234 :type 'json-parse-error) 265 :type 'json-utf8-decode-error)
235 (should-error (json-parse-string "[\"\uDB7F\uDFFF\"]") 266 (should-error (json-parse-string "[\"\uDB7F\uDFFF\"]")
236 :type 'json-parse-error) 267 :type 'json-utf8-decode-error)
237 (should-error (json-parse-string "[\"\xED\xAD\xBF\xED\xBF\xBF\"]") 268 (should-error (json-parse-string "[\"\xED\xAD\xBF\xED\xBF\xBF\"]")
238 :type 'json-parse-error) 269 :type 'json-utf8-decode-error)
239 (should-error (json-parse-string "[\"\u00C4\xED\xAD\xBF\xED\xBF\xBF\"]") 270 (should-error (json-parse-string "[\"\u00C4\xED\xAD\xBF\xED\xBF\xBF\"]")
240 :type 'json-parse-error)) 271 :type 'json-utf8-decode-error))
241 272
242(ert-deftest json-parse-string/incomplete () 273(ert-deftest json-parse-string/incomplete ()
243 (should-error (json-parse-string "[123") :type 'json-end-of-file)) 274 (should-error (json-parse-string "[123") :type 'json-end-of-file))