aboutsummaryrefslogtreecommitdiffstats
path: root/test/src
diff options
context:
space:
mode:
authorMattias Engdegård2024-03-31 15:00:00 +0200
committerMattias Engdegård2024-04-01 10:41:46 +0200
commit734bd005aa0fa955cf1a46d3a60a4d6ef5e7e3d1 (patch)
tree28e48c30e5916247065e8e66d9a80d9991ec1c2b /test/src
parentf178a6d8006f1e8afe06bb71d0a413622d73f131 (diff)
downloademacs-734bd005aa0fa955cf1a46d3a60a4d6ef5e7e3d1.tar.gz
emacs-734bd005aa0fa955cf1a46d3a60a4d6ef5e7e3d1.zip
Faster JSON parsing
Speed up JSON parsing substantially by only UTF-8-parsing string literals and only exactly once. Previously, json-parse-string always first parsed the entire input and copied it to a new string, and then validated each string literal twice. We no longer create an extra new string when interning an alist key, nor do we garble plist keys with Unicode characters. * src/lread.c (intern_c_multibyte): New. * src/json.c (json_encode): Remove. (utf8_error): New. (json_parse_string): Faster and more careful UTF-8 decoding. Create and return a new multibyte string or symbol without extra decoding. All callers adapted. (Fjson_parse_string): Skip expensive input pre-decoding. * test/src/json-tests.el (json-parse-string/object-unicode-keys) (json-parse-string/short): New. (json-parse-string/string, json-parse-string/invalid-unicode): Adapt tests. * etc/NEWS: Mentioned change in errors.
Diffstat (limited to 'test/src')
-rw-r--r--test/src/json-tests.el69
1 files changed, 50 insertions, 19 deletions
diff --git a/test/src/json-tests.el b/test/src/json-tests.el
index fb2384d4a8d..a1bafadaa87 100644
--- a/test/src/json-tests.el
+++ b/test/src/json-tests.el
@@ -25,6 +25,7 @@
25 25
26(require 'cl-lib) 26(require 'cl-lib)
27(require 'map) 27(require 'map)
28(require 'subr-x)
28 29
29(declare-function json-serialize "json.c" (object &rest args)) 30(declare-function json-serialize "json.c" (object &rest args))
30(declare-function json-insert "json.c" (object &rest args)) 31(declare-function json-insert "json.c" (object &rest args))
@@ -155,6 +156,9 @@
155 ) 156 )
156 157
157(ert-deftest json-parse-string/object () 158(ert-deftest json-parse-string/object ()
159 :expected-result :failed
160 ;; FIXME: This currently fails. Should the parser deduplicate keys?
161 ;; Never, always, or for alist and plist only?
158 (let ((input 162 (let ((input
159 "{ \"abc\" : [1, 2, true], \"def\" : null, \"abc\" : [9, false] }\n")) 163 "{ \"abc\" : [1, 2, true], \"def\" : null, \"abc\" : [9, false] }\n"))
160 (let ((actual (json-parse-string input))) 164 (let ((actual (json-parse-string input)))
@@ -167,6 +171,15 @@
167 (should (equal (json-parse-string input :object-type 'plist) 171 (should (equal (json-parse-string input :object-type 'plist)
168 '(:abc [9 :false] :def :null))))) 172 '(:abc [9 :false] :def :null)))))
169 173
174(ert-deftest json-parse-string/object-unicode-keys ()
175 (let ((input "{\"é\":1,\"☃\":2,\"𐌐\":3}"))
176 (let ((actual (json-parse-string input)))
177 (should (equal (sort (hash-table-keys actual)) '("é" "☃" "𐌐"))))
178 (should (equal (json-parse-string input :object-type 'alist)
179 '((é . 1) (☃ . 2) (𐌐 . 3))))
180 (should (equal (json-parse-string input :object-type 'plist)
181 '(:é 1 :☃ 2 :𐌐 3)))))
182
170(ert-deftest json-parse-string/array () 183(ert-deftest json-parse-string/array ()
171 (let ((input "[\"a\", 1, [\"b\", 2]]")) 184 (let ((input "[\"a\", 1, [\"b\", 2]]"))
172 (should (equal (json-parse-string input) 185 (should (equal (json-parse-string input)
@@ -182,8 +195,8 @@
182 ["\nasdфывfgh\t"])) 195 ["\nasdфывfgh\t"]))
183 (should (equal (json-parse-string "[\"\\uD834\\uDD1E\"]") ["\U0001D11E"])) 196 (should (equal (json-parse-string "[\"\\uD834\\uDD1E\"]") ["\U0001D11E"]))
184 (should-error (json-parse-string "foo") :type 'json-parse-error) 197 (should-error (json-parse-string "foo") :type 'json-parse-error)
185 ;; FIXME: Is this the right behavior? 198 (should-error (json-parse-string "[\"\u00C4\xC3\x84\"]")
186 (should (equal (json-parse-string "[\"\u00C4\xC3\x84\"]") ["\u00C4\u00C4"]))) 199 :type 'json-utf8-decode-error))
187 200
188(ert-deftest json-serialize/string () 201(ert-deftest json-serialize/string ()
189 (should (equal (json-serialize ["foo"]) "[\"foo\"]")) 202 (should (equal (json-serialize ["foo"]) "[\"foo\"]"))
@@ -201,9 +214,23 @@
201 (should-error (json-serialize ["u\xCCv"]) :type 'wrong-type-argument) 214 (should-error (json-serialize ["u\xCCv"]) :type 'wrong-type-argument)
202 (should-error (json-serialize ["u\u00C4\xCCv"]) :type 'wrong-type-argument)) 215 (should-error (json-serialize ["u\u00C4\xCCv"]) :type 'wrong-type-argument))
203 216
217(ert-deftest json-parse-string/short ()
218 :expected-result :failed
219 (should-error (json-parse-string "") :type 'json-end-of-file)
220 (should-error (json-parse-string " ") :type 'json-end-of-file)
221 ;; BUG: currently results in `json-end-of-file' for short non-empty inputs.
222 (dolist (s '("a" "ab" "abc" "abcd"
223 "t" "tr" "tru" "truE" "truee"
224 "n" "nu" "nul" "nulL" "nulll"
225 "f" "fa" "fal" "fals" "falsE" "falsee"))
226 (condition-case err
227 (json-parse-string s)
228 (error
229 (should (eq (car err) 'json-parse-error)))
230 (:success (error "parsing %S should fail" s)))))
231
204(ert-deftest json-parse-string/null () 232(ert-deftest json-parse-string/null ()
205 (should-error (json-parse-string "\x00") :type 'wrong-type-argument) 233 (should (equal (json-parse-string "[\"a\\u0000b\"]") ["a\0b"]))
206 (should (json-parse-string "[\"a\\u0000b\"]"))
207 (let* ((string "{\"foo\":\"this is a string including a literal \\u0000\"}") 234 (let* ((string "{\"foo\":\"this is a string including a literal \\u0000\"}")
208 (data (json-parse-string string))) 235 (data (json-parse-string string)))
209 (should (hash-table-p data)) 236 (should (hash-table-p data))
@@ -214,30 +241,34 @@
214https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt. 241https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt.
215Test with both unibyte and multibyte strings." 242Test with both unibyte and multibyte strings."
216 ;; Invalid UTF-8 code unit sequences. 243 ;; Invalid UTF-8 code unit sequences.
217 (should-error (json-parse-string "[\"\x80\"]") :type 'json-parse-error) 244 (should-error (json-parse-string "[\"\x80\"]") :type 'json-utf8-decode-error)
218 (should-error (json-parse-string "[\"\u00C4\x80\"]") :type 'json-parse-error) 245 (should-error (json-parse-string "[\"\u00C4\x80\"]")
219 (should-error (json-parse-string "[\"\xBF\"]") :type 'json-parse-error) 246 :type 'json-utf8-decode-error)
220 (should-error (json-parse-string "[\"\u00C4\xBF\"]") :type 'json-parse-error) 247 (should-error (json-parse-string "[\"\xBF\"]") :type 'json-utf8-decode-error)
221 (should-error (json-parse-string "[\"\xFE\"]") :type 'json-parse-error) 248 (should-error (json-parse-string "[\"\u00C4\xBF\"]")
222 (should-error (json-parse-string "[\"\u00C4\xFE\"]") :type 'json-parse-error) 249 :type 'json-utf8-decode-error)
223 (should-error (json-parse-string "[\"\xC0\xAF\"]") :type 'json-parse-error) 250 (should-error (json-parse-string "[\"\xFE\"]") :type 'json-utf8-decode-error)
251 (should-error (json-parse-string "[\"\u00C4\xFE\"]")
252 :type 'json-utf8-decode-error)
253 (should-error (json-parse-string "[\"\xC0\xAF\"]")
254 :type 'json-utf8-decode-error)
224 (should-error (json-parse-string "[\"\u00C4\xC0\xAF\"]") 255 (should-error (json-parse-string "[\"\u00C4\xC0\xAF\"]")
225 :type 'json-parse-error) 256 :type 'json-utf8-decode-error)
226 (should-error (json-parse-string "[\"\u00C4\xC0\x80\"]") 257 (should-error (json-parse-string "[\"\u00C4\xC0\x80\"]")
227 :type 'json-parse-error) 258 :type 'json-utf8-decode-error)
228 ;; Surrogates. 259 ;; Surrogates.
229 (should-error (json-parse-string "[\"\uDB7F\"]") 260 (should-error (json-parse-string "[\"\uDB7F\"]")
230 :type 'json-parse-error) 261 :type 'json-utf8-decode-error)
231 (should-error (json-parse-string "[\"\xED\xAD\xBF\"]") 262 (should-error (json-parse-string "[\"\xED\xAD\xBF\"]")
232 :type 'json-parse-error) 263 :type 'json-utf8-decode-error)
233 (should-error (json-parse-string "[\"\u00C4\xED\xAD\xBF\"]") 264 (should-error (json-parse-string "[\"\u00C4\xED\xAD\xBF\"]")
234 :type 'json-parse-error) 265 :type 'json-utf8-decode-error)
235 (should-error (json-parse-string "[\"\uDB7F\uDFFF\"]") 266 (should-error (json-parse-string "[\"\uDB7F\uDFFF\"]")
236 :type 'json-parse-error) 267 :type 'json-utf8-decode-error)
237 (should-error (json-parse-string "[\"\xED\xAD\xBF\xED\xBF\xBF\"]") 268 (should-error (json-parse-string "[\"\xED\xAD\xBF\xED\xBF\xBF\"]")
238 :type 'json-parse-error) 269 :type 'json-utf8-decode-error)
239 (should-error (json-parse-string "[\"\u00C4\xED\xAD\xBF\xED\xBF\xBF\"]") 270 (should-error (json-parse-string "[\"\u00C4\xED\xAD\xBF\xED\xBF\xBF\"]")
240 :type 'json-parse-error)) 271 :type 'json-utf8-decode-error))
241 272
242(ert-deftest json-parse-string/incomplete () 273(ert-deftest json-parse-string/incomplete ()
243 (should-error (json-parse-string "[123") :type 'json-end-of-file)) 274 (should-error (json-parse-string "[123") :type 'json-end-of-file))