aboutsummaryrefslogtreecommitdiffstats
path: root/test/src/coding-tests.el
diff options
context:
space:
mode:
authorEli Zaretskii2016-09-24 12:18:54 +0300
committerEli Zaretskii2016-09-24 12:18:54 +0300
commit2f4776bf321bcd9c92a0f979f5a0544b76f3cba5 (patch)
tree9fffcf4fca42eb3dbac5724bc16f82b297bc0aa8 /test/src/coding-tests.el
parent3facefd162c7fdd20cbac97d2f6d34e74216eb35 (diff)
downloademacs-2f4776bf321bcd9c92a0f979f5a0544b76f3cba5.tar.gz
emacs-2f4776bf321bcd9c92a0f979f5a0544b76f3cba5.zip
Move coding-tests.el and decoder-test.el to their places
* test/src/coding-tests.el: Added all the tests from test/lisp/legacy/decoder-tests.el. * test/lisp/legacy/decoder-tests.el: File deleted.
Diffstat (limited to 'test/src/coding-tests.el')
-rw-r--r--test/src/coding-tests.el327
1 files changed, 326 insertions, 1 deletions
diff --git a/test/src/coding-tests.el b/test/src/coding-tests.el
index 772c8735519..bd494bc26f8 100644
--- a/test/src/coding-tests.el
+++ b/test/src/coding-tests.el
@@ -1,8 +1,9 @@
1;;; coding-tests.el --- tests for text encoding and decoding 1;;; coding-tests.el --- tests for text encoding and decoding
2 2
3;; Copyright (C) 2015-2016 Free Software Foundation, Inc. 3;; Copyright (C) 2013-2016 Free Software Foundation, Inc.
4 4
5;; Author: Eli Zaretskii <eliz@gnu.org> 5;; Author: Eli Zaretskii <eliz@gnu.org>
6;; Author: Kenichi Handa <handa@gnu.org>
6 7
7;; This file is part of GNU Emacs. 8;; This file is part of GNU Emacs.
8 9
@@ -56,3 +57,327 @@
56 (insert (encode-coding-string "あ" 'euc-jp) "\xd" "\n") 57 (insert (encode-coding-string "あ" 'euc-jp) "\xd" "\n")
57 (decode-coding-region (point-min) (point-max) 'euc-jp-dos) 58 (decode-coding-region (point-min) (point-max) 'euc-jp-dos)
58 (should-not (string-match-p "\^M" (buffer-string))))) 59 (should-not (string-match-p "\^M" (buffer-string)))))
60
61;; Return the contents (specified by CONTENT-TYPE; ascii, latin, or
62;; binary) of a test file.
63(defun coding-tests-file-contents (content-type)
64 (let* ((ascii "ABCDEFGHIJKLMNOPQRSTUVWXYZ\n")
65 (latin (concat ascii "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏ\n"))
66 (binary (string-to-multibyte
67 (concat (string-as-unibyte latin)
68 (unibyte-string #xC0 #xC1 ?\n)))))
69 (cond ((eq content-type 'ascii) ascii)
70 ((eq content-type 'latin) latin)
71 ((eq content-type 'binary) binary)
72 (t
73 (error "Invalid file content type: %s" content-type)))))
74
75;; Generate FILE with CONTENTS encoded by CODING-SYSTEM.
76;; whose encoding specified by CODING-SYSTEM.
77(defun coding-tests-gen-file (file contents coding-system)
78 (or (file-directory-p coding-tests-workdir)
79 (mkdir coding-tests-workdir t))
80 (setq file (expand-file-name file coding-tests-workdir))
81 (with-temp-file file
82 (set-buffer-file-coding-system coding-system)
83 (insert contents))
84 file)
85
86;;; The following three functions are filters for contents of a test
87;;; file.
88
89;; Convert all LFs to CR LF sequences in the string STR.
90(defun coding-tests-lf-to-crlf (str)
91 (with-temp-buffer
92 (insert str)
93 (goto-char (point-min))
94 (while (search-forward "\n" nil t)
95 (delete-char -1)
96 (insert "\r\n"))
97 (buffer-string)))
98
99;; Convert all LFs to CRs in the string STR.
100(defun coding-tests-lf-to-cr (str)
101 (with-temp-buffer
102 (insert str)
103 (subst-char-in-region (point-min) (point-max) ?\n ?\r)
104 (buffer-string)))
105
106;; Convert all LFs to LF LF sequences in the string STR.
107(defun coding-tests-lf-to-lflf (str)
108 (with-temp-buffer
109 (insert str)
110 (goto-char (point-min))
111 (while (search-forward "\n" nil t)
112 (insert "\n"))
113 (buffer-string)))
114
115;; Prepend the UTF-8 BOM to STR.
116(defun coding-tests-add-bom (str)
117 (concat "\xfeff" str))
118
119;; Return the name of test file whose contents specified by
120;; CONTENT-TYPE and whose encoding specified by CODING-SYSTEM.
121(defun coding-tests-filename (content-type coding-system &optional ext)
122 (if ext
123 (expand-file-name (format "%s-%s.%s" content-type coding-system ext)
124 coding-tests-workdir)
125 (expand-file-name (format "%s-%s" content-type coding-system)
126 coding-tests-workdir)))
127
128
129;;; Check ASCII optimizing decoder
130
131;; Generate a test file whose contents specified by CONTENT-TYPE and
132;; whose encoding specified by CODING-SYSTEM.
133(defun coding-tests-ao-gen-file (content-type coding-system)
134 (let ((file (coding-tests-filename content-type coding-system)))
135 (coding-tests-gen-file file
136 (coding-tests-file-contents content-type)
137 coding-system)))
138
139;; Test the decoding of a file whose contents and encoding are
140;; specified by CONTENT-TYPE and WRITE-CODING. The test passes if the
141;; file is read by READ-CODING and detected as DETECTED-CODING and the
142;; contents is correctly decoded.
143;; Optional 5th arg TRANSLATOR is a function to translate the original
144;; file contents to match with the expected result of decoding. For
145;; instance, when a file of dos eol-type is read by unix eol-type,
146;; `decode-test-lf-to-crlf' must be specified.
147
148(defun coding-tests (content-type write-coding read-coding detected-coding
149 &optional translator)
150 (prefer-coding-system 'utf-8-auto)
151 (let ((filename (coding-tests-filename content-type write-coding)))
152 (with-temp-buffer
153 (let ((coding-system-for-read read-coding)
154 (contents (coding-tests-file-contents content-type))
155 (disable-ascii-optimization nil))
156 (if translator
157 (setq contents (funcall translator contents)))
158 (insert-file-contents filename)
159 (if (and (coding-system-equal buffer-file-coding-system detected-coding)
160 (string= (buffer-string) contents))
161 nil
162 (list buffer-file-coding-system
163 (string-to-list (buffer-string))
164 (string-to-list contents)))))))
165
166(ert-deftest ert-test-coding-ascii ()
167 (unwind-protect
168 (progn
169 (dolist (eol-type '(unix dos mac))
170 (coding-tests-ao-gen-file 'ascii eol-type))
171 (should-not (coding-tests 'ascii 'unix 'undecided 'unix))
172 (should-not (coding-tests 'ascii 'dos 'undecided 'dos))
173 (should-not (coding-tests 'ascii 'dos 'dos 'dos))
174 (should-not (coding-tests 'ascii 'mac 'undecided 'mac))
175 (should-not (coding-tests 'ascii 'mac 'mac 'mac))
176 (should-not (coding-tests 'ascii 'dos 'utf-8 'utf-8-dos))
177 (should-not (coding-tests 'ascii 'dos 'unix 'unix
178 'coding-tests-lf-to-crlf))
179 (should-not (coding-tests 'ascii 'mac 'dos 'dos
180 'coding-tests-lf-to-cr))
181 (should-not (coding-tests 'ascii 'dos 'mac 'mac
182 'coding-tests-lf-to-lflf)))
183 (coding-tests-remove-files)))
184
185(ert-deftest ert-test-coding-latin ()
186 (unwind-protect
187 (progn
188 (dolist (coding '("utf-8" "utf-8-with-signature"))
189 (dolist (eol-type '("unix" "dos" "mac"))
190 (coding-tests-ao-gen-file 'latin
191 (intern (concat coding "-" eol-type)))))
192 (should-not (coding-tests 'latin 'utf-8-unix 'undecided 'utf-8-unix))
193 (should-not (coding-tests 'latin 'utf-8-unix 'utf-8-unix 'utf-8-unix))
194 (should-not (coding-tests 'latin 'utf-8-dos 'undecided 'utf-8-dos))
195 (should-not (coding-tests 'latin 'utf-8-dos 'utf-8-dos 'utf-8-dos))
196 (should-not (coding-tests 'latin 'utf-8-mac 'undecided 'utf-8-mac))
197 (should-not (coding-tests 'latin 'utf-8-mac 'utf-8-mac 'utf-8-mac))
198 (should-not (coding-tests 'latin 'utf-8-dos 'unix 'utf-8-unix
199 'coding-tests-lf-to-crlf))
200 (should-not (coding-tests 'latin 'utf-8-mac 'dos 'utf-8-dos
201 'coding-tests-lf-to-cr))
202 (should-not (coding-tests 'latin 'utf-8-dos 'mac 'utf-8-mac
203 'coding-tests-lf-to-lflf))
204 (should-not (coding-tests 'latin 'utf-8-with-signature-unix 'undecided
205 'utf-8-with-signature-unix))
206 (should-not (coding-tests 'latin 'utf-8-with-signature-unix 'utf-8-auto
207 'utf-8-with-signature-unix))
208 (should-not (coding-tests 'latin 'utf-8-with-signature-dos 'undecided
209 'utf-8-with-signature-dos))
210 (should-not (coding-tests 'latin 'utf-8-with-signature-unix 'utf-8
211 'utf-8-unix 'coding-tests-add-bom))
212 (should-not (coding-tests 'latin 'utf-8-with-signature-unix 'utf-8
213 'utf-8-unix 'coding-tests-add-bom)))
214 (coding-tests-remove-files)))
215
216(ert-deftest ert-test-coding-binary ()
217 (unwind-protect
218 (progn
219 (dolist (eol-type '("unix" "dos" "mac"))
220 (coding-tests-ao-gen-file 'binary
221 (intern (concat "raw-text" "-" eol-type))))
222 (should-not (coding-tests 'binary 'raw-text-unix 'undecided
223 'raw-text-unix))
224 (should-not (coding-tests 'binary 'raw-text-dos 'undecided
225 'raw-text-dos))
226 (should-not (coding-tests 'binary 'raw-text-mac 'undecided
227 'raw-text-mac))
228 (should-not (coding-tests 'binary 'raw-text-dos 'unix
229 'raw-text-unix 'coding-tests-lf-to-crlf))
230 (should-not (coding-tests 'binary 'raw-text-mac 'dos
231 'raw-text-dos 'coding-tests-lf-to-cr))
232 (should-not (coding-tests 'binary 'raw-text-dos 'mac
233 'raw-text-mac 'coding-tests-lf-to-lflf)))
234 (coding-tests-remove-files)))
235
236
237;;; Check the coding system `prefer-utf-8'.
238
239;; Read FILE. Check if the encoding was detected as DETECT. If
240;; PREFER is non-nil, prefer that coding system before reading.
241
242(defun coding-tests-prefer-utf-8-read (file detect prefer)
243 (with-temp-buffer
244 (with-coding-priority (if prefer (list prefer))
245 (insert-file-contents file))
246 (if (eq buffer-file-coding-system detect)
247 nil
248 (format "Invalid detection: %s" buffer-file-coding-system))))
249
250;; Read FILE, modify it, and write it. Check if the coding system
251;; used for writing was CODING. If CODING-TAG is non-nil, insert
252;; coding tag with it before writing. If STR is non-nil, insert it
253;; before writing.
254
255(defun coding-tests-prefer-utf-8-write (file coding-tag coding
256 &optional str)
257 (with-temp-buffer
258 (insert-file-contents file)
259 (goto-char (point-min))
260 (if coding-tag
261 (insert (format ";; -*- coding: %s; -*-\n" coding-tag))
262 (insert ";;\n"))
263 (if str
264 (insert str))
265 (write-file (coding-tests-filename 'test 'test "el"))
266 (if (coding-system-equal buffer-file-coding-system coding)
267 nil
268 (format "Incorrect encoding: %s" last-coding-system-used))))
269
270(ert-deftest ert-test-coding-prefer-utf-8 ()
271 (unwind-protect
272 (let ((ascii (coding-tests-gen-file "ascii.el"
273 (coding-tests-file-contents 'ascii)
274 'unix))
275 (latin (coding-tests-gen-file "utf-8.el"
276 (coding-tests-file-contents 'latin)
277 'utf-8-unix)))
278 (should-not (coding-tests-prefer-utf-8-read
279 ascii 'prefer-utf-8-unix nil))
280 (should-not (coding-tests-prefer-utf-8-read
281 latin 'utf-8-unix nil))
282 (should-not (coding-tests-prefer-utf-8-read
283 latin 'utf-8-unix 'iso-8859-1))
284 (should-not (coding-tests-prefer-utf-8-read
285 latin 'utf-8-unix 'sjis))
286 (should-not (coding-tests-prefer-utf-8-write
287 ascii nil 'prefer-utf-8-unix))
288 (should-not (coding-tests-prefer-utf-8-write
289 ascii 'iso-8859-1 'iso-8859-1-unix))
290 (should-not (coding-tests-prefer-utf-8-write
291 ascii nil 'utf-8-unix "À")))
292 (coding-tests-remove-files)))
293
294
295;;; The following is for benchmark testing of the new optimized
296;;; decoder, not for regression testing.
297
298(defun generate-ascii-file ()
299 (dotimes (i 100000)
300 (insert-char ?a 80)
301 (insert "\n")))
302
303(defun generate-rarely-nonascii-file ()
304 (dotimes (i 100000)
305 (if (/= i 50000)
306 (insert-char ?a 80)
307 (insert ?À)
308 (insert-char ?a 79))
309 (insert "\n")))
310
311(defun generate-mostly-nonascii-file ()
312 (dotimes (i 30000)
313 (insert-char ?a 80)
314 (insert "\n"))
315 (dotimes (i 20000)
316 (insert-char ?À 80)
317 (insert "\n"))
318 (dotimes (i 10000)
319 (insert-char ?あ 80)
320 (insert "\n")))
321
322
323(defvar test-file-list
324 '((generate-ascii-file
325 ("~/ascii-tag-utf-8-unix.unix" ";; -*- coding: utf-8-unix; -*-" unix)
326 ("~/ascii-tag-utf-8.unix" ";; -*- coding: utf-8; -*-" unix)
327 ("~/ascii-tag-none.unix" "" unix)
328 ("~/ascii-tag-utf-8-dos.dos" ";; -*- coding: utf-8-dos; -*-" dos)
329 ("~/ascii-tag-utf-8.dos" ";; -*- coding: utf-8; -*-" dos)
330 ("~/ascii-tag-none.dos" "" dos))
331 (generate-rarely-nonascii-file
332 ("~/utf-8-r-tag-utf-8-unix.unix" ";; -*- coding: utf-8-unix; -*-" utf-8-unix)
333 ("~/utf-8-r-tag-utf-8.unix" ";; -*- coding: utf-8; -*-" utf-8-unix)
334 ("~/utf-8-r-tag-none.unix" "" utf-8-unix)
335 ("~/utf-8-r-tag-utf-8-dos.dos" ";; -*- coding: utf-8-dos; -*-" utf-8-dos)
336 ("~/utf-8-r-tag-utf-8.dos" ";; -*- coding: utf-8; -*-" utf-8-dos)
337 ("~/utf-8-r-tag-none.dos" "" utf-8-dos))
338 (generate-mostly-nonascii-file
339 ("~/utf-8-m-tag-utf-8-unix.unix" ";; -*- coding: utf-8-unix; -*-" utf-8-unix)
340 ("~/utf-8-m-tag-utf-8.unix" ";; -*- coding: utf-8; -*-" utf-8-unix)
341 ("~/utf-8-m-tag-none.unix" "" utf-8-unix)
342 ("~/utf-8-m-tag-utf-8-dos.dos" ";; -*- coding: utf-8-dos; -*-" utf-8-dos)
343 ("~/utf-8-m-tag-utf-8.dos" ";; -*- coding: utf-8; -*-" utf-8-dos)
344 ("~/utf-8-m-tag-none.dos" "" utf-8-dos))))
345
346(defun generate-benchmark-test-file ()
347 (interactive)
348 (with-temp-buffer
349 (message "Generating data...")
350 (dolist (files test-file-list)
351 (delete-region (point-min) (point-max))
352 (funcall (car files))
353 (dolist (file (cdr files))
354 (message "Writing %s..." (car file))
355 (goto-char (point-min))
356 (insert (nth 1 file) "\n")
357 (let ((coding-system-for-write (nth 2 file)))
358 (write-region (point-min) (point-max) (car file)))
359 (delete-region (point-min) (point))))))
360
361(defun benchmark-decoder ()
362 (let ((gc-cons-threshold 4000000))
363 (insert "Without optimization:\n")
364 (dolist (files test-file-list)
365 (dolist (file (cdr files))
366 (let* ((disable-ascii-optimization t)
367 (result (benchmark-run 10
368 (with-temp-buffer (insert-file-contents (car file))))))
369 (insert (format "%s: %s\n" (car file) result)))))
370 (insert "With optimization:\n")
371 (dolist (files test-file-list)
372 (dolist (file (cdr files))
373 (let* ((disable-ascii-optimization nil)
374 (result (benchmark-run 10
375 (with-temp-buffer (insert-file-contents (car file))))))
376 (insert (format "%s: %s\n" (car file) result)))))))
377
378;; Local Variables:
379;; byte-compile-warnings: (not obsolete)
380;; End:
381
382(provide 'coding-tests)
383;; coding-tests.el ends here