diff options
| author | Mattias Engdegård | 2024-03-31 15:00:00 +0200 |
|---|---|---|
| committer | Mattias Engdegård | 2024-04-01 10:41:46 +0200 |
| commit | 734bd005aa0fa955cf1a46d3a60a4d6ef5e7e3d1 (patch) | |
| tree | 28e48c30e5916247065e8e66d9a80d9991ec1c2b /test/src | |
| parent | f178a6d8006f1e8afe06bb71d0a413622d73f131 (diff) | |
| download | emacs-734bd005aa0fa955cf1a46d3a60a4d6ef5e7e3d1.tar.gz emacs-734bd005aa0fa955cf1a46d3a60a4d6ef5e7e3d1.zip | |
Faster JSON parsing
Speed up JSON parsing substantially by only UTF-8-parsing string
literals and only exactly once. Previously, json-parse-string always
first parsed the entire input and copied it to a new string, and then
validated each string literal twice.
We no longer create an extra new string when interning an alist key,
nor do we garble plist keys with Unicode characters.
* src/lread.c (intern_c_multibyte): New.
* src/json.c (json_encode): Remove.
(utf8_error): New.
(json_parse_string): Faster and more careful UTF-8 decoding.
Create and return a new multibyte string or symbol without extra
decoding. All callers adapted.
(Fjson_parse_string): Skip expensive input pre-decoding.
* test/src/json-tests.el (json-parse-string/object-unicode-keys)
(json-parse-string/short): New.
(json-parse-string/string, json-parse-string/invalid-unicode):
Adapt tests.
* etc/NEWS: Mentioned change in errors.
Diffstat (limited to 'test/src')
| -rw-r--r-- | test/src/json-tests.el | 69 |
1 files changed, 50 insertions, 19 deletions
diff --git a/test/src/json-tests.el b/test/src/json-tests.el index fb2384d4a8d..a1bafadaa87 100644 --- a/test/src/json-tests.el +++ b/test/src/json-tests.el | |||
| @@ -25,6 +25,7 @@ | |||
| 25 | 25 | ||
| 26 | (require 'cl-lib) | 26 | (require 'cl-lib) |
| 27 | (require 'map) | 27 | (require 'map) |
| 28 | (require 'subr-x) | ||
| 28 | 29 | ||
| 29 | (declare-function json-serialize "json.c" (object &rest args)) | 30 | (declare-function json-serialize "json.c" (object &rest args)) |
| 30 | (declare-function json-insert "json.c" (object &rest args)) | 31 | (declare-function json-insert "json.c" (object &rest args)) |
| @@ -155,6 +156,9 @@ | |||
| 155 | ) | 156 | ) |
| 156 | 157 | ||
| 157 | (ert-deftest json-parse-string/object () | 158 | (ert-deftest json-parse-string/object () |
| 159 | :expected-result :failed | ||
| 160 | ;; FIXME: This currently fails. Should the parser deduplicate keys? | ||
| 161 | ;; Never, always, or for alist and plist only? | ||
| 158 | (let ((input | 162 | (let ((input |
| 159 | "{ \"abc\" : [1, 2, true], \"def\" : null, \"abc\" : [9, false] }\n")) | 163 | "{ \"abc\" : [1, 2, true], \"def\" : null, \"abc\" : [9, false] }\n")) |
| 160 | (let ((actual (json-parse-string input))) | 164 | (let ((actual (json-parse-string input))) |
| @@ -167,6 +171,15 @@ | |||
| 167 | (should (equal (json-parse-string input :object-type 'plist) | 171 | (should (equal (json-parse-string input :object-type 'plist) |
| 168 | '(:abc [9 :false] :def :null))))) | 172 | '(:abc [9 :false] :def :null))))) |
| 169 | 173 | ||
| 174 | (ert-deftest json-parse-string/object-unicode-keys () | ||
| 175 | (let ((input "{\"é\":1,\"☃\":2,\"𐌐\":3}")) | ||
| 176 | (let ((actual (json-parse-string input))) | ||
| 177 | (should (equal (sort (hash-table-keys actual)) '("é" "☃" "𐌐")))) | ||
| 178 | (should (equal (json-parse-string input :object-type 'alist) | ||
| 179 | '((é . 1) (☃ . 2) (𐌐 . 3)))) | ||
| 180 | (should (equal (json-parse-string input :object-type 'plist) | ||
| 181 | '(:é 1 :☃ 2 :𐌐 3))))) | ||
| 182 | |||
| 170 | (ert-deftest json-parse-string/array () | 183 | (ert-deftest json-parse-string/array () |
| 171 | (let ((input "[\"a\", 1, [\"b\", 2]]")) | 184 | (let ((input "[\"a\", 1, [\"b\", 2]]")) |
| 172 | (should (equal (json-parse-string input) | 185 | (should (equal (json-parse-string input) |
| @@ -182,8 +195,8 @@ | |||
| 182 | ["\nasdфывfgh\t"])) | 195 | ["\nasdфывfgh\t"])) |
| 183 | (should (equal (json-parse-string "[\"\\uD834\\uDD1E\"]") ["\U0001D11E"])) | 196 | (should (equal (json-parse-string "[\"\\uD834\\uDD1E\"]") ["\U0001D11E"])) |
| 184 | (should-error (json-parse-string "foo") :type 'json-parse-error) | 197 | (should-error (json-parse-string "foo") :type 'json-parse-error) |
| 185 | ;; FIXME: Is this the right behavior? | 198 | (should-error (json-parse-string "[\"\u00C4\xC3\x84\"]") |
| 186 | (should (equal (json-parse-string "[\"\u00C4\xC3\x84\"]") ["\u00C4\u00C4"]))) | 199 | :type 'json-utf8-decode-error)) |
| 187 | 200 | ||
| 188 | (ert-deftest json-serialize/string () | 201 | (ert-deftest json-serialize/string () |
| 189 | (should (equal (json-serialize ["foo"]) "[\"foo\"]")) | 202 | (should (equal (json-serialize ["foo"]) "[\"foo\"]")) |
| @@ -201,9 +214,23 @@ | |||
| 201 | (should-error (json-serialize ["u\xCCv"]) :type 'wrong-type-argument) | 214 | (should-error (json-serialize ["u\xCCv"]) :type 'wrong-type-argument) |
| 202 | (should-error (json-serialize ["u\u00C4\xCCv"]) :type 'wrong-type-argument)) | 215 | (should-error (json-serialize ["u\u00C4\xCCv"]) :type 'wrong-type-argument)) |
| 203 | 216 | ||
| 217 | (ert-deftest json-parse-string/short () | ||
| 218 | :expected-result :failed | ||
| 219 | (should-error (json-parse-string "") :type 'json-end-of-file) | ||
| 220 | (should-error (json-parse-string " ") :type 'json-end-of-file) | ||
| 221 | ;; BUG: currently results in `json-end-of-file' for short non-empty inputs. | ||
| 222 | (dolist (s '("a" "ab" "abc" "abcd" | ||
| 223 | "t" "tr" "tru" "truE" "truee" | ||
| 224 | "n" "nu" "nul" "nulL" "nulll" | ||
| 225 | "f" "fa" "fal" "fals" "falsE" "falsee")) | ||
| 226 | (condition-case err | ||
| 227 | (json-parse-string s) | ||
| 228 | (error | ||
| 229 | (should (eq (car err) 'json-parse-error))) | ||
| 230 | (:success (error "parsing %S should fail" s))))) | ||
| 231 | |||
| 204 | (ert-deftest json-parse-string/null () | 232 | (ert-deftest json-parse-string/null () |
| 205 | (should-error (json-parse-string "\x00") :type 'wrong-type-argument) | 233 | (should (equal (json-parse-string "[\"a\\u0000b\"]") ["a\0b"])) |
| 206 | (should (json-parse-string "[\"a\\u0000b\"]")) | ||
| 207 | (let* ((string "{\"foo\":\"this is a string including a literal \\u0000\"}") | 234 | (let* ((string "{\"foo\":\"this is a string including a literal \\u0000\"}") |
| 208 | (data (json-parse-string string))) | 235 | (data (json-parse-string string))) |
| 209 | (should (hash-table-p data)) | 236 | (should (hash-table-p data)) |
| @@ -214,30 +241,34 @@ | |||
| 214 | https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt. | 241 | https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt. |
| 215 | Test with both unibyte and multibyte strings." | 242 | Test with both unibyte and multibyte strings." |
| 216 | ;; Invalid UTF-8 code unit sequences. | 243 | ;; Invalid UTF-8 code unit sequences. |
| 217 | (should-error (json-parse-string "[\"\x80\"]") :type 'json-parse-error) | 244 | (should-error (json-parse-string "[\"\x80\"]") :type 'json-utf8-decode-error) |
| 218 | (should-error (json-parse-string "[\"\u00C4\x80\"]") :type 'json-parse-error) | 245 | (should-error (json-parse-string "[\"\u00C4\x80\"]") |
| 219 | (should-error (json-parse-string "[\"\xBF\"]") :type 'json-parse-error) | 246 | :type 'json-utf8-decode-error) |
| 220 | (should-error (json-parse-string "[\"\u00C4\xBF\"]") :type 'json-parse-error) | 247 | (should-error (json-parse-string "[\"\xBF\"]") :type 'json-utf8-decode-error) |
| 221 | (should-error (json-parse-string "[\"\xFE\"]") :type 'json-parse-error) | 248 | (should-error (json-parse-string "[\"\u00C4\xBF\"]") |
| 222 | (should-error (json-parse-string "[\"\u00C4\xFE\"]") :type 'json-parse-error) | 249 | :type 'json-utf8-decode-error) |
| 223 | (should-error (json-parse-string "[\"\xC0\xAF\"]") :type 'json-parse-error) | 250 | (should-error (json-parse-string "[\"\xFE\"]") :type 'json-utf8-decode-error) |
| 251 | (should-error (json-parse-string "[\"\u00C4\xFE\"]") | ||
| 252 | :type 'json-utf8-decode-error) | ||
| 253 | (should-error (json-parse-string "[\"\xC0\xAF\"]") | ||
| 254 | :type 'json-utf8-decode-error) | ||
| 224 | (should-error (json-parse-string "[\"\u00C4\xC0\xAF\"]") | 255 | (should-error (json-parse-string "[\"\u00C4\xC0\xAF\"]") |
| 225 | :type 'json-parse-error) | 256 | :type 'json-utf8-decode-error) |
| 226 | (should-error (json-parse-string "[\"\u00C4\xC0\x80\"]") | 257 | (should-error (json-parse-string "[\"\u00C4\xC0\x80\"]") |
| 227 | :type 'json-parse-error) | 258 | :type 'json-utf8-decode-error) |
| 228 | ;; Surrogates. | 259 | ;; Surrogates. |
| 229 | (should-error (json-parse-string "[\"\uDB7F\"]") | 260 | (should-error (json-parse-string "[\"\uDB7F\"]") |
| 230 | :type 'json-parse-error) | 261 | :type 'json-utf8-decode-error) |
| 231 | (should-error (json-parse-string "[\"\xED\xAD\xBF\"]") | 262 | (should-error (json-parse-string "[\"\xED\xAD\xBF\"]") |
| 232 | :type 'json-parse-error) | 263 | :type 'json-utf8-decode-error) |
| 233 | (should-error (json-parse-string "[\"\u00C4\xED\xAD\xBF\"]") | 264 | (should-error (json-parse-string "[\"\u00C4\xED\xAD\xBF\"]") |
| 234 | :type 'json-parse-error) | 265 | :type 'json-utf8-decode-error) |
| 235 | (should-error (json-parse-string "[\"\uDB7F\uDFFF\"]") | 266 | (should-error (json-parse-string "[\"\uDB7F\uDFFF\"]") |
| 236 | :type 'json-parse-error) | 267 | :type 'json-utf8-decode-error) |
| 237 | (should-error (json-parse-string "[\"\xED\xAD\xBF\xED\xBF\xBF\"]") | 268 | (should-error (json-parse-string "[\"\xED\xAD\xBF\xED\xBF\xBF\"]") |
| 238 | :type 'json-parse-error) | 269 | :type 'json-utf8-decode-error) |
| 239 | (should-error (json-parse-string "[\"\u00C4\xED\xAD\xBF\xED\xBF\xBF\"]") | 270 | (should-error (json-parse-string "[\"\u00C4\xED\xAD\xBF\xED\xBF\xBF\"]") |
| 240 | :type 'json-parse-error)) | 271 | :type 'json-utf8-decode-error)) |
| 241 | 272 | ||
| 242 | (ert-deftest json-parse-string/incomplete () | 273 | (ert-deftest json-parse-string/incomplete () |
| 243 | (should-error (json-parse-string "[123") :type 'json-end-of-file)) | 274 | (should-error (json-parse-string "[123") :type 'json-end-of-file)) |