diff options
| author | Noam Postavsky | 2017-07-08 13:20:17 -0400 |
|---|---|---|
| committer | Noam Postavsky | 2017-07-08 14:31:27 -0400 |
| commit | 06ff34cd2a86bde6ecc0baa613550bd7ed96f411 (patch) | |
| tree | a2e7dba9cfb458493a02bad9dc151440a6417f4a /test | |
| parent | a163391845c7fcc3287d4ef0b641ee0d178e6b9a (diff) | |
| download | emacs-06ff34cd2a86bde6ecc0baa613550bd7ed96f411.tar.gz emacs-06ff34cd2a86bde6ecc0baa613550bd7ed96f411.zip | |
Optimize UCS normalization tests
Brings the the time for `ucs-normalize-part1' from 200s down to 130s.
* test/lisp/international/ucs-normalize-tests.el
(ucs-normalize-tests--parse-column): Use character instead of string
of length 1 for terminator. Convert return value into string since
all callers need that form anyway.
(ucs-normalize-tests--normalization-equal-p): Rename from
ucs-normalize-tests--normalize. Use dedicated buffer instead of
messing with narrowing. Take string to compare against and insert it
into buffer so that compare-buffer-substrings can be used instead of
allocating a new string from buffer contents.
(ucs-normalize-tests--normalization-chareq-p): New macro, specialized
for comparing single character.
(ucs-normalize-tests--rule1-holds-p)
(ucs-normalize-tests--rule2-holds-p): Turn into defsubst.
(ucs-normalize-tests--rule1-failing-for-partX): Use `eq' instead of
`='.
Diffstat (limited to 'test')
| -rw-r--r-- | test/lisp/international/ucs-normalize-tests.el | 117 |
1 files changed, 66 insertions, 51 deletions
diff --git a/test/lisp/international/ucs-normalize-tests.el b/test/lisp/international/ucs-normalize-tests.el index fbf6aa307ec..02a4bba7a5f 100644 --- a/test/lisp/international/ucs-normalize-tests.el +++ b/test/lisp/international/ucs-normalize-tests.el | |||
| @@ -42,81 +42,96 @@ | |||
| 42 | (defun ucs-normalize-tests--parse-column () | 42 | (defun ucs-normalize-tests--parse-column () |
| 43 | (let ((chars nil) | 43 | (let ((chars nil) |
| 44 | (term nil)) | 44 | (term nil)) |
| 45 | (while (and (not (equal term ";")) | 45 | (while (and (not (eq term ?\;)) |
| 46 | (looking-at "\\([[:xdigit:]]\\{4,6\\}\\)\\([; ]\\)")) | 46 | (looking-at "\\([[:xdigit:]]\\{4,6\\}\\)\\([; ]\\)")) |
| 47 | (let ((code-point (match-string 1))) | 47 | (let ((code-point (match-string-no-properties 1))) |
| 48 | (setq term (match-string 2)) | 48 | (setq term (char-after (match-beginning 2))) |
| 49 | (goto-char (match-end 0)) | 49 | (goto-char (match-end 0)) |
| 50 | (push (string-to-number code-point 16) chars))) | 50 | (push (string-to-number code-point 16) chars))) |
| 51 | (nreverse chars))) | 51 | (apply #'string (nreverse chars)))) |
| 52 | 52 | ||
| 53 | (defmacro ucs-normalize-tests--normalize (norm str) | 53 | (defconst ucs-normalize-tests--norm-buf (generate-new-buffer " *ucs-normalizing-buffer*")) |
| 54 | |||
| 55 | (defmacro ucs-normalize-tests--normalization-equal-p (norm str equal-to) | ||
| 54 | "Like `ucs-normalize-string' but reuse current buffer for efficiency. | 56 | "Like `ucs-normalize-string' but reuse current buffer for efficiency. |
| 55 | And NORM is one of the symbols `NFC', `NFD', `NFKC', `NFKD' for brevity." | 57 | And NORM is one of the symbols `NFC', `NFD', `NFKC', `NFKD' for brevity." |
| 56 | (let ((norm-alist '((NFC . ucs-normalize-NFC-region) | 58 | (let ((norm-alist '((NFC . ucs-normalize-NFC-region) |
| 57 | (NFD . ucs-normalize-NFD-region) | 59 | (NFD . ucs-normalize-NFD-region) |
| 58 | (NFKC . ucs-normalize-NFKC-region) | 60 | (NFKC . ucs-normalize-NFKC-region) |
| 59 | (NFKD . ucs-normalize-NFKD-region)))) | 61 | (NFKD . ucs-normalize-NFKD-region)))) |
| 60 | `(save-restriction | 62 | `(with-current-buffer ucs-normalize-tests--norm-buf |
| 61 | (narrow-to-region (point) (point)) | 63 | (erase-buffer) |
| 62 | (insert ,str) | 64 | (insert ,str) |
| 63 | (funcall #',(cdr (assq norm norm-alist)) (point-min) (point-max)) | 65 | (,(cdr (assq norm norm-alist)) (point-min) (point-max)) |
| 64 | (delete-and-extract-region (point-min) (point-max))))) | 66 | (goto-char (point-min)) |
| 67 | (insert ,equal-to) | ||
| 68 | (eq (compare-buffer-substrings nil nil (point) nil (point) nil) 0)))) | ||
| 69 | |||
| 70 | (defmacro ucs-normalize-tests--normalization-chareq-p (norm char char-eq-to) | ||
| 71 | "Like `ucs-normalize-string' but reuse current buffer for efficiency. | ||
| 72 | And NORM is one of the symbols `NFC', `NFD', `NFKC', `NFKD' for brevity." | ||
| 73 | (let ((norm-alist '((NFC . ucs-normalize-NFC-region) | ||
| 74 | (NFD . ucs-normalize-NFD-region) | ||
| 75 | (NFKC . ucs-normalize-NFKC-region) | ||
| 76 | (NFKD . ucs-normalize-NFKD-region)))) | ||
| 77 | `(with-current-buffer ucs-normalize-tests--norm-buf | ||
| 78 | (erase-buffer) | ||
| 79 | (insert ,char) | ||
| 80 | (,(cdr (assq norm norm-alist)) (point-min) (point-max)) | ||
| 81 | (and (eq (buffer-size) 1) | ||
| 82 | (eq (char-after (point-min)) ,char-eq-to))))) | ||
| 65 | 83 | ||
| 66 | (defvar ucs-normalize-tests--chars-part1 nil) | 84 | (defvar ucs-normalize-tests--chars-part1 nil) |
| 67 | 85 | ||
| 68 | (defun ucs-normalize-tests--rule1-holds-p (&rest columns) | 86 | (defsubst ucs-normalize-tests--rule1-holds-p (source nfc nfd nfkc nfkd) |
| 69 | "Check 1st conformance rule. | 87 | "Check 1st conformance rule. |
| 70 | The following invariants must be true for all conformant implementations..." | 88 | The following invariants must be true for all conformant implementations..." |
| 71 | (when ucs-normalize-tests--chars-part1 | 89 | (when ucs-normalize-tests--chars-part1 |
| 72 | ;; See `ucs-normalize-tests--rule2-holds-p'. | 90 | ;; See `ucs-normalize-tests--rule2-holds-p'. |
| 73 | (aset ucs-normalize-tests--chars-part1 | 91 | (aset ucs-normalize-tests--chars-part1 |
| 74 | (caar columns) 1)) | 92 | (aref source 0) 1)) |
| 75 | (cl-destructuring-bind (source nfc nfd nfkc nfkd) | 93 | (and |
| 76 | (mapcar (lambda (c) (apply #'string c)) columns) | 94 | ;; c2 == toNFC(c1) == toNFC(c2) == toNFC(c3) |
| 77 | (and | 95 | (ucs-normalize-tests--normalization-equal-p NFC source nfc) |
| 78 | ;; c2 == toNFC(c1) == toNFC(c2) == toNFC(c3) | 96 | (ucs-normalize-tests--normalization-equal-p NFC nfc nfc) |
| 79 | (equal nfc (ucs-normalize-tests--normalize NFC source)) | 97 | (ucs-normalize-tests--normalization-equal-p NFC nfd nfc) |
| 80 | (equal nfc (ucs-normalize-tests--normalize NFC nfc)) | 98 | ;; c4 == toNFC(c4) == toNFC(c5) |
| 81 | (equal nfc (ucs-normalize-tests--normalize NFC nfd)) | 99 | (ucs-normalize-tests--normalization-equal-p NFC nfkc nfkc) |
| 82 | ;; c4 == toNFC(c4) == toNFC(c5) | 100 | (ucs-normalize-tests--normalization-equal-p NFC nfkd nfkc) |
| 83 | (equal nfkc (ucs-normalize-tests--normalize NFC nfkc)) | 101 | |
| 84 | (equal nfkc (ucs-normalize-tests--normalize NFC nfkd)) | 102 | ;; c3 == toNFD(c1) == toNFD(c2) == toNFD(c3) |
| 85 | 103 | (ucs-normalize-tests--normalization-equal-p NFD source nfd) | |
| 86 | ;; c3 == toNFD(c1) == toNFD(c2) == toNFD(c3) | 104 | (ucs-normalize-tests--normalization-equal-p NFD nfc nfd) |
| 87 | (equal nfd (ucs-normalize-tests--normalize NFD source)) | 105 | (ucs-normalize-tests--normalization-equal-p NFD nfd nfd) |
| 88 | (equal nfd (ucs-normalize-tests--normalize NFD nfc)) | 106 | ;; c5 == toNFD(c4) == toNFD(c5) |
| 89 | (equal nfd (ucs-normalize-tests--normalize NFD nfd)) | 107 | (ucs-normalize-tests--normalization-equal-p NFD nfkc nfkd) |
| 90 | ;; c5 == toNFD(c4) == toNFD(c5) | 108 | (ucs-normalize-tests--normalization-equal-p NFD nfkd nfkd) |
| 91 | (equal nfkd (ucs-normalize-tests--normalize NFD nfkc)) | 109 | |
| 92 | (equal nfkd (ucs-normalize-tests--normalize NFD nfkd)) | 110 | ;; c4 == toNFKC(c1) == toNFKC(c2) == toNFKC(c3) == toNFKC(c4) == toNFKC(c5) |
| 93 | 111 | (ucs-normalize-tests--normalization-equal-p NFKC source nfkc) | |
| 94 | ;; c4 == toNFKC(c1) == toNFKC(c2) == toNFKC(c3) == toNFKC(c4) == toNFKC(c5) | 112 | (ucs-normalize-tests--normalization-equal-p NFKC nfc nfkc) |
| 95 | (equal nfkc (ucs-normalize-tests--normalize NFKC source)) | 113 | (ucs-normalize-tests--normalization-equal-p NFKC nfd nfkc) |
| 96 | (equal nfkc (ucs-normalize-tests--normalize NFKC nfc)) | 114 | (ucs-normalize-tests--normalization-equal-p NFKC nfkc nfkc) |
| 97 | (equal nfkc (ucs-normalize-tests--normalize NFKC nfd)) | 115 | (ucs-normalize-tests--normalization-equal-p NFKC nfkd nfkc) |
| 98 | (equal nfkc (ucs-normalize-tests--normalize NFKC nfkc)) | 116 | |
| 99 | (equal nfkc (ucs-normalize-tests--normalize NFKC nfkd)) | 117 | ;; c5 == toNFKD(c1) == toNFKD(c2) == toNFKD(c3) == toNFKD(c4) == toNFKD(c5) |
| 100 | 118 | (ucs-normalize-tests--normalization-equal-p NFKD source nfkd) | |
| 101 | ;; c5 == toNFKD(c1) == toNFKD(c2) == toNFKD(c3) == toNFKD(c4) == toNFKD(c5) | 119 | (ucs-normalize-tests--normalization-equal-p NFKD nfc nfkd) |
| 102 | (equal nfkd (ucs-normalize-tests--normalize NFKD source)) | 120 | (ucs-normalize-tests--normalization-equal-p NFKD nfd nfkd) |
| 103 | (equal nfkd (ucs-normalize-tests--normalize NFKD nfc)) | 121 | (ucs-normalize-tests--normalization-equal-p NFKD nfkc nfkd) |
| 104 | (equal nfkd (ucs-normalize-tests--normalize NFKD nfd)) | 122 | (ucs-normalize-tests--normalization-equal-p NFKD nfkd nfkd))) |
| 105 | (equal nfkd (ucs-normalize-tests--normalize NFKD nfkc)) | 123 | |
| 106 | (equal nfkd (ucs-normalize-tests--normalize NFKD nfkd))))) | 124 | (defsubst ucs-normalize-tests--rule2-holds-p (X) |
| 107 | |||
| 108 | (defun ucs-normalize-tests--rule2-holds-p (char) | ||
| 109 | "Check 2nd conformance rule. | 125 | "Check 2nd conformance rule. |
| 110 | For every code point X assigned in this version of Unicode that is not specifically | 126 | For every code point X assigned in this version of Unicode that is not specifically |
| 111 | listed in Part 1, the following invariants must be true for all conformant | 127 | listed in Part 1, the following invariants must be true for all conformant |
| 112 | implementations: | 128 | implementations: |
| 113 | 129 | ||
| 114 | X == toNFC(X) == toNFD(X) == toNFKC(X) == toNFKD(X)" | 130 | X == toNFC(X) == toNFD(X) == toNFKC(X) == toNFKD(X)" |
| 115 | (let ((X (string char))) | 131 | (and (ucs-normalize-tests--normalization-chareq-p NFC X X) |
| 116 | (and (equal X (ucs-normalize-tests--normalize NFC X)) | 132 | (ucs-normalize-tests--normalization-chareq-p NFD X X) |
| 117 | (equal X (ucs-normalize-tests--normalize NFD X)) | 133 | (ucs-normalize-tests--normalization-chareq-p NFKC X X) |
| 118 | (equal X (ucs-normalize-tests--normalize NFKC X)) | 134 | (ucs-normalize-tests--normalization-chareq-p NFKD X X))) |
| 119 | (equal X (ucs-normalize-tests--normalize NFKD X))))) | ||
| 120 | 135 | ||
| 121 | (cl-defun ucs-normalize-tests--rule1-failing-for-partX (part &optional skip-lines &key progress-str) | 136 | (cl-defun ucs-normalize-tests--rule1-failing-for-partX (part &optional skip-lines &key progress-str) |
| 122 | "Returns a list of failed line numbers." | 137 | "Returns a list of failed line numbers." |
| @@ -134,7 +149,7 @@ implementations: | |||
| 134 | progress-str beg-line end-line | 149 | progress-str beg-line end-line |
| 135 | 0 nil 0.5)) | 150 | 0 nil 0.5)) |
| 136 | for line from beg-line to (1- end-line) | 151 | for line from beg-line to (1- end-line) |
| 137 | unless (or (= (following-char) ?#) | 152 | unless (or (eq (following-char) ?#) |
| 138 | (ucs-normalize-tests--rule1-holds-p | 153 | (ucs-normalize-tests--rule1-holds-p |
| 139 | (ucs-normalize-tests--parse-column) | 154 | (ucs-normalize-tests--parse-column) |
| 140 | (ucs-normalize-tests--parse-column) | 155 | (ucs-normalize-tests--parse-column) |