diff options
| author | Philipp Stephani | 2016-10-24 21:54:51 +0200 |
|---|---|---|
| committer | Philipp Stephani | 2017-01-01 13:24:14 +0100 |
| commit | 93be35e038bbb19e8d64d3c1f9d1be76a9083d09 (patch) | |
| tree | 7a9074fa83d51aa8136f57be267c83016c4f3978 /lisp | |
| parent | baa370f255d2f9d3f662fac0de98eaadd3242aa6 (diff) | |
| download | emacs-93be35e038bbb19e8d64d3c1f9d1be76a9083d09.tar.gz emacs-93be35e038bbb19e8d64d3c1f9d1be76a9083d09.zip | |
Fix encoding of JSON surrogate pairs
JSON requires that such pairs be treated as UTF-16 surrogate pairs, not
individual code points; cf. Bug #24784.
* lisp/json.el (json-read-escaped-char): Fix decoding of surrogate
pairs.
(json--decode-utf-16-surrogates): New defun.
* test/lisp/json-tests.el (test-json-read-string): Add test for
surrogate pairs.
Diffstat (limited to 'lisp')
| -rw-r--r-- | lisp/json.el | 15 |
1 files changed, 15 insertions, 0 deletions
diff --git a/lisp/json.el b/lisp/json.el index 38f828e8fbb..b2ac356641b 100644 --- a/lisp/json.el +++ b/lisp/json.el | |||
| @@ -363,6 +363,10 @@ representation will be parsed correctly." | |||
| 363 | 363 | ||
| 364 | ;; String parsing | 364 | ;; String parsing |
| 365 | 365 | ||
| 366 | (defun json--decode-utf-16-surrogates (high low) | ||
| 367 | "Return the code point represented by the UTF-16 surrogates HIGH and LOW." | ||
| 368 | (+ (lsh (- high #xD800) 10) (- low #xDC00) #x10000)) | ||
| 369 | |||
| 366 | (defun json-read-escaped-char () | 370 | (defun json-read-escaped-char () |
| 367 | "Read the JSON string escaped character at point." | 371 | "Read the JSON string escaped character at point." |
| 368 | ;; Skip over the '\' | 372 | ;; Skip over the '\' |
| @@ -372,6 +376,17 @@ representation will be parsed correctly." | |||
| 372 | (cond | 376 | (cond |
| 373 | (special (cdr special)) | 377 | (special (cdr special)) |
| 374 | ((not (eq char ?u)) char) | 378 | ((not (eq char ?u)) char) |
| 379 | ;; Special-case UTF-16 surrogate pairs, | ||
| 380 | ;; cf. https://tools.ietf.org/html/rfc7159#section-7. Note that | ||
| 381 | ;; this clause overlaps with the next one and therefore has to | ||
| 382 | ;; come first. | ||
| 383 | ((looking-at | ||
| 384 | (rx (group (any "Dd") (any "89ABab") (= 2 (any "0-9A-Fa-f"))) | ||
| 385 | "\\u" (group (any "Dd") (any "C-Fc-f") (= 2 (any "0-9A-Fa-f"))))) | ||
| 386 | (json-advance 10) | ||
| 387 | (json--decode-utf-16-surrogates | ||
| 388 | (string-to-number (match-string 1) 16) | ||
| 389 | (string-to-number (match-string 2) 16))) | ||
| 375 | ((looking-at "[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f]") | 390 | ((looking-at "[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f]") |
| 376 | (let ((hex (match-string 0))) | 391 | (let ((hex (match-string 0))) |
| 377 | (json-advance 4) | 392 | (json-advance 4) |