diff options
| author | Eli Zaretskii | 2016-09-24 12:18:54 +0300 |
|---|---|---|
| committer | Eli Zaretskii | 2016-09-24 12:18:54 +0300 |
| commit | 2f4776bf321bcd9c92a0f979f5a0544b76f3cba5 (patch) | |
| tree | 9fffcf4fca42eb3dbac5724bc16f82b297bc0aa8 /test/src/coding-tests.el | |
| parent | 3facefd162c7fdd20cbac97d2f6d34e74216eb35 (diff) | |
| download | emacs-2f4776bf321bcd9c92a0f979f5a0544b76f3cba5.tar.gz emacs-2f4776bf321bcd9c92a0f979f5a0544b76f3cba5.zip | |
Move coding-tests.el and decoder-test.el to their places
* test/src/coding-tests.el: Added all the tests from
test/lisp/legacy/decoder-tests.el.
* test/lisp/legacy/decoder-tests.el: File deleted.
Diffstat (limited to 'test/src/coding-tests.el')
| -rw-r--r-- | test/src/coding-tests.el | 327 |
1 files changed, 326 insertions, 1 deletions
diff --git a/test/src/coding-tests.el b/test/src/coding-tests.el index 772c8735519..bd494bc26f8 100644 --- a/test/src/coding-tests.el +++ b/test/src/coding-tests.el | |||
| @@ -1,8 +1,9 @@ | |||
| 1 | ;;; coding-tests.el --- tests for text encoding and decoding | 1 | ;;; coding-tests.el --- tests for text encoding and decoding |
| 2 | 2 | ||
| 3 | ;; Copyright (C) 2015-2016 Free Software Foundation, Inc. | 3 | ;; Copyright (C) 2013-2016 Free Software Foundation, Inc. |
| 4 | 4 | ||
| 5 | ;; Author: Eli Zaretskii <eliz@gnu.org> | 5 | ;; Author: Eli Zaretskii <eliz@gnu.org> |
| 6 | ;; Author: Kenichi Handa <handa@gnu.org> | ||
| 6 | 7 | ||
| 7 | ;; This file is part of GNU Emacs. | 8 | ;; This file is part of GNU Emacs. |
| 8 | 9 | ||
| @@ -56,3 +57,327 @@ | |||
| 56 | (insert (encode-coding-string "あ" 'euc-jp) "\xd" "\n") | 57 | (insert (encode-coding-string "あ" 'euc-jp) "\xd" "\n") |
| 57 | (decode-coding-region (point-min) (point-max) 'euc-jp-dos) | 58 | (decode-coding-region (point-min) (point-max) 'euc-jp-dos) |
| 58 | (should-not (string-match-p "\^M" (buffer-string))))) | 59 | (should-not (string-match-p "\^M" (buffer-string))))) |
| 60 | |||
| 61 | ;; Return the contents (specified by CONTENT-TYPE; ascii, latin, or | ||
| 62 | ;; binary) of a test file. | ||
| 63 | (defun coding-tests-file-contents (content-type) | ||
| 64 | (let* ((ascii "ABCDEFGHIJKLMNOPQRSTUVWXYZ\n") | ||
| 65 | (latin (concat ascii "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏ\n")) | ||
| 66 | (binary (string-to-multibyte | ||
| 67 | (concat (string-as-unibyte latin) | ||
| 68 | (unibyte-string #xC0 #xC1 ?\n))))) | ||
| 69 | (cond ((eq content-type 'ascii) ascii) | ||
| 70 | ((eq content-type 'latin) latin) | ||
| 71 | ((eq content-type 'binary) binary) | ||
| 72 | (t | ||
| 73 | (error "Invalid file content type: %s" content-type))))) | ||
| 74 | |||
| 75 | ;; Generate FILE with CONTENTS encoded by CODING-SYSTEM. | ||
| 76 | ;; whose encoding specified by CODING-SYSTEM. | ||
| 77 | (defun coding-tests-gen-file (file contents coding-system) | ||
| 78 | (or (file-directory-p coding-tests-workdir) | ||
| 79 | (mkdir coding-tests-workdir t)) | ||
| 80 | (setq file (expand-file-name file coding-tests-workdir)) | ||
| 81 | (with-temp-file file | ||
| 82 | (set-buffer-file-coding-system coding-system) | ||
| 83 | (insert contents)) | ||
| 84 | file) | ||
| 85 | |||
| 86 | ;;; The following three functions are filters for contents of a test | ||
| 87 | ;;; file. | ||
| 88 | |||
| 89 | ;; Convert all LFs to CR LF sequences in the string STR. | ||
| 90 | (defun coding-tests-lf-to-crlf (str) | ||
| 91 | (with-temp-buffer | ||
| 92 | (insert str) | ||
| 93 | (goto-char (point-min)) | ||
| 94 | (while (search-forward "\n" nil t) | ||
| 95 | (delete-char -1) | ||
| 96 | (insert "\r\n")) | ||
| 97 | (buffer-string))) | ||
| 98 | |||
| 99 | ;; Convert all LFs to CRs in the string STR. | ||
| 100 | (defun coding-tests-lf-to-cr (str) | ||
| 101 | (with-temp-buffer | ||
| 102 | (insert str) | ||
| 103 | (subst-char-in-region (point-min) (point-max) ?\n ?\r) | ||
| 104 | (buffer-string))) | ||
| 105 | |||
| 106 | ;; Convert all LFs to LF LF sequences in the string STR. | ||
| 107 | (defun coding-tests-lf-to-lflf (str) | ||
| 108 | (with-temp-buffer | ||
| 109 | (insert str) | ||
| 110 | (goto-char (point-min)) | ||
| 111 | (while (search-forward "\n" nil t) | ||
| 112 | (insert "\n")) | ||
| 113 | (buffer-string))) | ||
| 114 | |||
| 115 | ;; Prepend the UTF-8 BOM to STR. | ||
| 116 | (defun coding-tests-add-bom (str) | ||
| 117 | (concat "\xfeff" str)) | ||
| 118 | |||
| 119 | ;; Return the name of test file whose contents specified by | ||
| 120 | ;; CONTENT-TYPE and whose encoding specified by CODING-SYSTEM. | ||
| 121 | (defun coding-tests-filename (content-type coding-system &optional ext) | ||
| 122 | (if ext | ||
| 123 | (expand-file-name (format "%s-%s.%s" content-type coding-system ext) | ||
| 124 | coding-tests-workdir) | ||
| 125 | (expand-file-name (format "%s-%s" content-type coding-system) | ||
| 126 | coding-tests-workdir))) | ||
| 127 | |||
| 128 | |||
| 129 | ;;; Check ASCII optimizing decoder | ||
| 130 | |||
| 131 | ;; Generate a test file whose contents specified by CONTENT-TYPE and | ||
| 132 | ;; whose encoding specified by CODING-SYSTEM. | ||
| 133 | (defun coding-tests-ao-gen-file (content-type coding-system) | ||
| 134 | (let ((file (coding-tests-filename content-type coding-system))) | ||
| 135 | (coding-tests-gen-file file | ||
| 136 | (coding-tests-file-contents content-type) | ||
| 137 | coding-system))) | ||
| 138 | |||
| 139 | ;; Test the decoding of a file whose contents and encoding are | ||
| 140 | ;; specified by CONTENT-TYPE and WRITE-CODING. The test passes if the | ||
| 141 | ;; file is read by READ-CODING and detected as DETECTED-CODING and the | ||
| 142 | ;; contents is correctly decoded. | ||
| 143 | ;; Optional 5th arg TRANSLATOR is a function to translate the original | ||
| 144 | ;; file contents to match with the expected result of decoding. For | ||
| 145 | ;; instance, when a file of dos eol-type is read by unix eol-type, | ||
| 146 | ;; `decode-test-lf-to-crlf' must be specified. | ||
| 147 | |||
| 148 | (defun coding-tests (content-type write-coding read-coding detected-coding | ||
| 149 | &optional translator) | ||
| 150 | (prefer-coding-system 'utf-8-auto) | ||
| 151 | (let ((filename (coding-tests-filename content-type write-coding))) | ||
| 152 | (with-temp-buffer | ||
| 153 | (let ((coding-system-for-read read-coding) | ||
| 154 | (contents (coding-tests-file-contents content-type)) | ||
| 155 | (disable-ascii-optimization nil)) | ||
| 156 | (if translator | ||
| 157 | (setq contents (funcall translator contents))) | ||
| 158 | (insert-file-contents filename) | ||
| 159 | (if (and (coding-system-equal buffer-file-coding-system detected-coding) | ||
| 160 | (string= (buffer-string) contents)) | ||
| 161 | nil | ||
| 162 | (list buffer-file-coding-system | ||
| 163 | (string-to-list (buffer-string)) | ||
| 164 | (string-to-list contents))))))) | ||
| 165 | |||
| 166 | (ert-deftest ert-test-coding-ascii () | ||
| 167 | (unwind-protect | ||
| 168 | (progn | ||
| 169 | (dolist (eol-type '(unix dos mac)) | ||
| 170 | (coding-tests-ao-gen-file 'ascii eol-type)) | ||
| 171 | (should-not (coding-tests 'ascii 'unix 'undecided 'unix)) | ||
| 172 | (should-not (coding-tests 'ascii 'dos 'undecided 'dos)) | ||
| 173 | (should-not (coding-tests 'ascii 'dos 'dos 'dos)) | ||
| 174 | (should-not (coding-tests 'ascii 'mac 'undecided 'mac)) | ||
| 175 | (should-not (coding-tests 'ascii 'mac 'mac 'mac)) | ||
| 176 | (should-not (coding-tests 'ascii 'dos 'utf-8 'utf-8-dos)) | ||
| 177 | (should-not (coding-tests 'ascii 'dos 'unix 'unix | ||
| 178 | 'coding-tests-lf-to-crlf)) | ||
| 179 | (should-not (coding-tests 'ascii 'mac 'dos 'dos | ||
| 180 | 'coding-tests-lf-to-cr)) | ||
| 181 | (should-not (coding-tests 'ascii 'dos 'mac 'mac | ||
| 182 | 'coding-tests-lf-to-lflf))) | ||
| 183 | (coding-tests-remove-files))) | ||
| 184 | |||
| 185 | (ert-deftest ert-test-coding-latin () | ||
| 186 | (unwind-protect | ||
| 187 | (progn | ||
| 188 | (dolist (coding '("utf-8" "utf-8-with-signature")) | ||
| 189 | (dolist (eol-type '("unix" "dos" "mac")) | ||
| 190 | (coding-tests-ao-gen-file 'latin | ||
| 191 | (intern (concat coding "-" eol-type))))) | ||
| 192 | (should-not (coding-tests 'latin 'utf-8-unix 'undecided 'utf-8-unix)) | ||
| 193 | (should-not (coding-tests 'latin 'utf-8-unix 'utf-8-unix 'utf-8-unix)) | ||
| 194 | (should-not (coding-tests 'latin 'utf-8-dos 'undecided 'utf-8-dos)) | ||
| 195 | (should-not (coding-tests 'latin 'utf-8-dos 'utf-8-dos 'utf-8-dos)) | ||
| 196 | (should-not (coding-tests 'latin 'utf-8-mac 'undecided 'utf-8-mac)) | ||
| 197 | (should-not (coding-tests 'latin 'utf-8-mac 'utf-8-mac 'utf-8-mac)) | ||
| 198 | (should-not (coding-tests 'latin 'utf-8-dos 'unix 'utf-8-unix | ||
| 199 | 'coding-tests-lf-to-crlf)) | ||
| 200 | (should-not (coding-tests 'latin 'utf-8-mac 'dos 'utf-8-dos | ||
| 201 | 'coding-tests-lf-to-cr)) | ||
| 202 | (should-not (coding-tests 'latin 'utf-8-dos 'mac 'utf-8-mac | ||
| 203 | 'coding-tests-lf-to-lflf)) | ||
| 204 | (should-not (coding-tests 'latin 'utf-8-with-signature-unix 'undecided | ||
| 205 | 'utf-8-with-signature-unix)) | ||
| 206 | (should-not (coding-tests 'latin 'utf-8-with-signature-unix 'utf-8-auto | ||
| 207 | 'utf-8-with-signature-unix)) | ||
| 208 | (should-not (coding-tests 'latin 'utf-8-with-signature-dos 'undecided | ||
| 209 | 'utf-8-with-signature-dos)) | ||
| 210 | (should-not (coding-tests 'latin 'utf-8-with-signature-unix 'utf-8 | ||
| 211 | 'utf-8-unix 'coding-tests-add-bom)) | ||
| 212 | (should-not (coding-tests 'latin 'utf-8-with-signature-unix 'utf-8 | ||
| 213 | 'utf-8-unix 'coding-tests-add-bom))) | ||
| 214 | (coding-tests-remove-files))) | ||
| 215 | |||
| 216 | (ert-deftest ert-test-coding-binary () | ||
| 217 | (unwind-protect | ||
| 218 | (progn | ||
| 219 | (dolist (eol-type '("unix" "dos" "mac")) | ||
| 220 | (coding-tests-ao-gen-file 'binary | ||
| 221 | (intern (concat "raw-text" "-" eol-type)))) | ||
| 222 | (should-not (coding-tests 'binary 'raw-text-unix 'undecided | ||
| 223 | 'raw-text-unix)) | ||
| 224 | (should-not (coding-tests 'binary 'raw-text-dos 'undecided | ||
| 225 | 'raw-text-dos)) | ||
| 226 | (should-not (coding-tests 'binary 'raw-text-mac 'undecided | ||
| 227 | 'raw-text-mac)) | ||
| 228 | (should-not (coding-tests 'binary 'raw-text-dos 'unix | ||
| 229 | 'raw-text-unix 'coding-tests-lf-to-crlf)) | ||
| 230 | (should-not (coding-tests 'binary 'raw-text-mac 'dos | ||
| 231 | 'raw-text-dos 'coding-tests-lf-to-cr)) | ||
| 232 | (should-not (coding-tests 'binary 'raw-text-dos 'mac | ||
| 233 | 'raw-text-mac 'coding-tests-lf-to-lflf))) | ||
| 234 | (coding-tests-remove-files))) | ||
| 235 | |||
| 236 | |||
| 237 | ;;; Check the coding system `prefer-utf-8'. | ||
| 238 | |||
| 239 | ;; Read FILE. Check if the encoding was detected as DETECT. If | ||
| 240 | ;; PREFER is non-nil, prefer that coding system before reading. | ||
| 241 | |||
| 242 | (defun coding-tests-prefer-utf-8-read (file detect prefer) | ||
| 243 | (with-temp-buffer | ||
| 244 | (with-coding-priority (if prefer (list prefer)) | ||
| 245 | (insert-file-contents file)) | ||
| 246 | (if (eq buffer-file-coding-system detect) | ||
| 247 | nil | ||
| 248 | (format "Invalid detection: %s" buffer-file-coding-system)))) | ||
| 249 | |||
| 250 | ;; Read FILE, modify it, and write it. Check if the coding system | ||
| 251 | ;; used for writing was CODING. If CODING-TAG is non-nil, insert | ||
| 252 | ;; coding tag with it before writing. If STR is non-nil, insert it | ||
| 253 | ;; before writing. | ||
| 254 | |||
| 255 | (defun coding-tests-prefer-utf-8-write (file coding-tag coding | ||
| 256 | &optional str) | ||
| 257 | (with-temp-buffer | ||
| 258 | (insert-file-contents file) | ||
| 259 | (goto-char (point-min)) | ||
| 260 | (if coding-tag | ||
| 261 | (insert (format ";; -*- coding: %s; -*-\n" coding-tag)) | ||
| 262 | (insert ";;\n")) | ||
| 263 | (if str | ||
| 264 | (insert str)) | ||
| 265 | (write-file (coding-tests-filename 'test 'test "el")) | ||
| 266 | (if (coding-system-equal buffer-file-coding-system coding) | ||
| 267 | nil | ||
| 268 | (format "Incorrect encoding: %s" last-coding-system-used)))) | ||
| 269 | |||
| 270 | (ert-deftest ert-test-coding-prefer-utf-8 () | ||
| 271 | (unwind-protect | ||
| 272 | (let ((ascii (coding-tests-gen-file "ascii.el" | ||
| 273 | (coding-tests-file-contents 'ascii) | ||
| 274 | 'unix)) | ||
| 275 | (latin (coding-tests-gen-file "utf-8.el" | ||
| 276 | (coding-tests-file-contents 'latin) | ||
| 277 | 'utf-8-unix))) | ||
| 278 | (should-not (coding-tests-prefer-utf-8-read | ||
| 279 | ascii 'prefer-utf-8-unix nil)) | ||
| 280 | (should-not (coding-tests-prefer-utf-8-read | ||
| 281 | latin 'utf-8-unix nil)) | ||
| 282 | (should-not (coding-tests-prefer-utf-8-read | ||
| 283 | latin 'utf-8-unix 'iso-8859-1)) | ||
| 284 | (should-not (coding-tests-prefer-utf-8-read | ||
| 285 | latin 'utf-8-unix 'sjis)) | ||
| 286 | (should-not (coding-tests-prefer-utf-8-write | ||
| 287 | ascii nil 'prefer-utf-8-unix)) | ||
| 288 | (should-not (coding-tests-prefer-utf-8-write | ||
| 289 | ascii 'iso-8859-1 'iso-8859-1-unix)) | ||
| 290 | (should-not (coding-tests-prefer-utf-8-write | ||
| 291 | ascii nil 'utf-8-unix "À"))) | ||
| 292 | (coding-tests-remove-files))) | ||
| 293 | |||
| 294 | |||
| 295 | ;;; The following is for benchmark testing of the new optimized | ||
| 296 | ;;; decoder, not for regression testing. | ||
| 297 | |||
| 298 | (defun generate-ascii-file () | ||
| 299 | (dotimes (i 100000) | ||
| 300 | (insert-char ?a 80) | ||
| 301 | (insert "\n"))) | ||
| 302 | |||
| 303 | (defun generate-rarely-nonascii-file () | ||
| 304 | (dotimes (i 100000) | ||
| 305 | (if (/= i 50000) | ||
| 306 | (insert-char ?a 80) | ||
| 307 | (insert ?À) | ||
| 308 | (insert-char ?a 79)) | ||
| 309 | (insert "\n"))) | ||
| 310 | |||
| 311 | (defun generate-mostly-nonascii-file () | ||
| 312 | (dotimes (i 30000) | ||
| 313 | (insert-char ?a 80) | ||
| 314 | (insert "\n")) | ||
| 315 | (dotimes (i 20000) | ||
| 316 | (insert-char ?À 80) | ||
| 317 | (insert "\n")) | ||
| 318 | (dotimes (i 10000) | ||
| 319 | (insert-char ?あ 80) | ||
| 320 | (insert "\n"))) | ||
| 321 | |||
| 322 | |||
| 323 | (defvar test-file-list | ||
| 324 | '((generate-ascii-file | ||
| 325 | ("~/ascii-tag-utf-8-unix.unix" ";; -*- coding: utf-8-unix; -*-" unix) | ||
| 326 | ("~/ascii-tag-utf-8.unix" ";; -*- coding: utf-8; -*-" unix) | ||
| 327 | ("~/ascii-tag-none.unix" "" unix) | ||
| 328 | ("~/ascii-tag-utf-8-dos.dos" ";; -*- coding: utf-8-dos; -*-" dos) | ||
| 329 | ("~/ascii-tag-utf-8.dos" ";; -*- coding: utf-8; -*-" dos) | ||
| 330 | ("~/ascii-tag-none.dos" "" dos)) | ||
| 331 | (generate-rarely-nonascii-file | ||
| 332 | ("~/utf-8-r-tag-utf-8-unix.unix" ";; -*- coding: utf-8-unix; -*-" utf-8-unix) | ||
| 333 | ("~/utf-8-r-tag-utf-8.unix" ";; -*- coding: utf-8; -*-" utf-8-unix) | ||
| 334 | ("~/utf-8-r-tag-none.unix" "" utf-8-unix) | ||
| 335 | ("~/utf-8-r-tag-utf-8-dos.dos" ";; -*- coding: utf-8-dos; -*-" utf-8-dos) | ||
| 336 | ("~/utf-8-r-tag-utf-8.dos" ";; -*- coding: utf-8; -*-" utf-8-dos) | ||
| 337 | ("~/utf-8-r-tag-none.dos" "" utf-8-dos)) | ||
| 338 | (generate-mostly-nonascii-file | ||
| 339 | ("~/utf-8-m-tag-utf-8-unix.unix" ";; -*- coding: utf-8-unix; -*-" utf-8-unix) | ||
| 340 | ("~/utf-8-m-tag-utf-8.unix" ";; -*- coding: utf-8; -*-" utf-8-unix) | ||
| 341 | ("~/utf-8-m-tag-none.unix" "" utf-8-unix) | ||
| 342 | ("~/utf-8-m-tag-utf-8-dos.dos" ";; -*- coding: utf-8-dos; -*-" utf-8-dos) | ||
| 343 | ("~/utf-8-m-tag-utf-8.dos" ";; -*- coding: utf-8; -*-" utf-8-dos) | ||
| 344 | ("~/utf-8-m-tag-none.dos" "" utf-8-dos)))) | ||
| 345 | |||
| 346 | (defun generate-benchmark-test-file () | ||
| 347 | (interactive) | ||
| 348 | (with-temp-buffer | ||
| 349 | (message "Generating data...") | ||
| 350 | (dolist (files test-file-list) | ||
| 351 | (delete-region (point-min) (point-max)) | ||
| 352 | (funcall (car files)) | ||
| 353 | (dolist (file (cdr files)) | ||
| 354 | (message "Writing %s..." (car file)) | ||
| 355 | (goto-char (point-min)) | ||
| 356 | (insert (nth 1 file) "\n") | ||
| 357 | (let ((coding-system-for-write (nth 2 file))) | ||
| 358 | (write-region (point-min) (point-max) (car file))) | ||
| 359 | (delete-region (point-min) (point)))))) | ||
| 360 | |||
| 361 | (defun benchmark-decoder () | ||
| 362 | (let ((gc-cons-threshold 4000000)) | ||
| 363 | (insert "Without optimization:\n") | ||
| 364 | (dolist (files test-file-list) | ||
| 365 | (dolist (file (cdr files)) | ||
| 366 | (let* ((disable-ascii-optimization t) | ||
| 367 | (result (benchmark-run 10 | ||
| 368 | (with-temp-buffer (insert-file-contents (car file)))))) | ||
| 369 | (insert (format "%s: %s\n" (car file) result))))) | ||
| 370 | (insert "With optimization:\n") | ||
| 371 | (dolist (files test-file-list) | ||
| 372 | (dolist (file (cdr files)) | ||
| 373 | (let* ((disable-ascii-optimization nil) | ||
| 374 | (result (benchmark-run 10 | ||
| 375 | (with-temp-buffer (insert-file-contents (car file)))))) | ||
| 376 | (insert (format "%s: %s\n" (car file) result))))))) | ||
| 377 | |||
| 378 | ;; Local Variables: | ||
| 379 | ;; byte-compile-warnings: (not obsolete) | ||
| 380 | ;; End: | ||
| 381 | |||
| 382 | (provide 'coding-tests) | ||
| 383 | ;; coding-tests.el ends here | ||