aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDima Kogan2016-02-27 18:06:35 -0800
committerMichal Nazarewicz2016-08-02 15:39:10 +0200
commit91d53091950d7aae6f16cf47d4cce754b5eacfe5 (patch)
treec36839bd06e505cca4aee99dad7ba1cec6df07a5
parent6db72771cc08e719a08f56db8abd7cefef9c8c99 (diff)
downloademacs-91d53091950d7aae6f16cf47d4cce754b5eacfe5.tar.gz
emacs-91d53091950d7aae6f16cf47d4cce754b5eacfe5.zip
Added driver for the regex tests
* test/src/regex-tests.el (regex-tests): Test executing glibc tests cases. [mina86@mina86.com: merged test with existing file]
-rw-r--r--test/src/regex-tests.el572
1 files changed, 572 insertions, 0 deletions
diff --git a/test/src/regex-tests.el b/test/src/regex-tests.el
index 00165ab0512..13a9f86e00f 100644
--- a/test/src/regex-tests.el
+++ b/test/src/regex-tests.el
@@ -20,6 +20,7 @@
20;;; Code: 20;;; Code:
21 21
22(require 'ert) 22(require 'ert)
23(require 'cl)
23 24
24(ert-deftest regex-word-cc-fallback-test () 25(ert-deftest regex-word-cc-fallback-test ()
25 "Test that ‘[[:cc:]]*x’ matches ‘x’ (bug#24020). 26 "Test that ‘[[:cc:]]*x’ matches ‘x’ (bug#24020).
@@ -89,4 +90,575 @@ match. The test is done using `string-match-p' as well as
89 (regex--test-cc "unibyte" "abcABC012 \t\n\1" "łą\u2622") 90 (regex--test-cc "unibyte" "abcABC012 \t\n\1" "łą\u2622")
90 (regex--test-cc "multibyte" "łą\u2622" "abcABC012 \t\n\1"))) 91 (regex--test-cc "multibyte" "łą\u2622" "abcABC012 \t\n\1")))
91 92
93
94(defmacro regex-tests-generic-line (comment-char test-file whitelist &rest body)
95 "Reads a line of the test file TEST-FILE, skipping
96comments (defined by COMMENT-CHAR), and evaluates the tests in
97this line as defined in the BODY. Line numbers in the WHITELIST
98are known failures, and are skipped."
99
100 `(with-temp-buffer
101 (modify-syntax-entry ?_ "w;; ") ; tests expect _ to be a word
102 (insert-file-contents ,(concat (file-name-directory (buffer-file-name)) test-file))
103
104 (let ((case-fold-search nil)
105 (line-number 1)
106 (whitelist-idx 0))
107
108 (goto-char (point-min))
109
110 (while (not (eobp))
111 (let ((start (point)))
112 (end-of-line)
113 (narrow-to-region start (point))
114
115 (goto-char (point-min))
116
117 (when
118 (and
119 ;; ignore comments
120 (save-excursion
121 (re-search-forward ,(concat "^[^" (string comment-char) "]") nil t))
122
123 ;; skip lines in the whitelist
124 (let ((whitelist-next
125 (condition-case nil
126 (aref ,whitelist whitelist-idx) (args-out-of-range nil))))
127 (cond
128 ;; whitelist exhausted. do process this line
129 ((null whitelist-next) t)
130
131 ;; we're not yet at the next whitelist element. do
132 ;; process this line
133 ((< line-number whitelist-next) t)
134
135 ;; we're past the next whitelist element. This
136 ;; shouldn't happen
137 ((> line-number whitelist-next)
138 (error
139 (format
140 "We somehow skipped the next whitelist element: line %d" whitelist-next)))
141
142 ;; we're at the next whitelist element. Skip this
143 ;; line, and advance the whitelist index
144 (t
145 (setq whitelist-idx (1+ whitelist-idx)) nil))))
146 ,@body)
147
148 (widen)
149 (forward-line)
150 (beginning-of-line)
151 (setq line-number (1+ line-number)))))))
152
153(defun regex-tests-compare (string what-failed bounds-ref &optional substring-ref)
154 "I just ran a search, looking at STRING. WHAT-FAILED describes
155what failed, if anything; valid values are 'search-failed,
156'compilation-failed and nil. I compare the beginning/end of each
157group with their expected values. This is done with either
158BOUNDS-REF or SUBSTRING-REF; one of those should be non-nil.
159BOUNDS-REF is a sequence \[start-ref0 end-ref0 start-ref1
160end-ref1 ....] while SUBSTRING-REF is the expected substring
161obtained by indexing the input string by start/end-ref.
162
163If the search was supposed to fail then start-ref0/substring-ref0
164is 'search-failed. If the search wasn't even supposed to compile
165successfully, then start-ref0/substring-ref0 is
166'compilation-failed. If I only care about a match succeeding,
167this can be set to t.
168
169This function returns a string that describes the failure, or nil
170on success"
171
172 (when (or
173 (and bounds-ref substring-ref)
174 (not (or bounds-ref substring-ref)))
175 (error "Exactly one of bounds-ref and bounds-ref should be non-nil"))
176
177 (let ((what-failed-ref (car (or bounds-ref substring-ref))))
178
179 (cond
180 ((eq what-failed 'search-failed)
181 (cond
182 ((eq what-failed-ref 'search-failed)
183 nil)
184 ((eq what-failed-ref 'compilation-failed)
185 "Expected pattern failure; but no match")
186 (t
187 "Expected match; but no match")))
188
189 ((eq what-failed 'compilation-failed)
190 (cond
191 ((eq what-failed-ref 'search-failed)
192 "Expected no match; but pattern failure")
193 ((eq what-failed-ref 'compilation-failed)
194 nil)
195 (t
196 "Expected match; but pattern failure")))
197
198 ;; The regex match succeeded
199 ((eq what-failed-ref 'search-failed)
200 "Expected no match; but match")
201 ((eq what-failed-ref 'compilation-failed)
202 "Expected pattern failure; but match")
203
204 ;; The regex match succeeded, as expected. I now check all the
205 ;; bounds
206 (t
207 (let ((idx 0)
208 msg
209 ref next-ref-function compare-ref-function mismatched-ref-function)
210
211 (if bounds-ref
212 (setq ref bounds-ref
213 next-ref-function (lambda (x) (cddr x))
214 compare-ref-function (lambda (ref start-pos end-pos)
215 (or (eq (car ref) t)
216 (and (eq start-pos (car ref))
217 (eq end-pos (cadr ref)))))
218 mismatched-ref-function (lambda (ref start-pos end-pos)
219 (format
220 "beginning/end positions: %d/%s and %d/%s"
221 start-pos (car ref) end-pos (cadr ref))))
222 (setq ref substring-ref
223 next-ref-function (lambda (x) (cdr x))
224 compare-ref-function (lambda (ref start-pos end-pos)
225 (or (eq (car ref) t)
226 (string= (substring string start-pos end-pos) (car ref))))
227 mismatched-ref-function (lambda (ref start-pos end-pos)
228 (format
229 "beginning/end positions: %d/%s and %d/%s"
230 start-pos (car ref) end-pos (cadr ref)))))
231
232 (while (not (or (null ref) msg))
233
234 (let ((start (match-beginning idx))
235 (end (match-end idx)))
236
237 (when (not (funcall compare-ref-function ref start end))
238 (setq msg
239 (format
240 "Have expected match, but mismatch in group %d: %s" idx (funcall mismatched-ref-function ref start end))))
241
242 (setq ref (funcall next-ref-function ref)
243 idx (1+ idx))))
244
245 (or msg
246 nil))))))
247
248
249
250(defun regex-tests-match (pattern string bounds-ref &optional substring-ref)
251 "I match the given STRING against PATTERN. I compare the
252beginning/end of each group with their expected values.
253BOUNDS-REF is a sequence [start-ref0 end-ref0 start-ref1 end-ref1
254....].
255
256If the search was supposed to fail then start-ref0 is
257'search-failed. If the search wasn't even supposed to compile
258successfully, then start-ref0 is 'compilation-failed.
259
260This function returns a string that describes the failure, or nil
261on success"
262
263 (if (string-match "\\[\\([\\.=]\\)..?\\1\\]" pattern)
264 ;; Skipping test: [.x.] and [=x=] forms not supported by emacs
265 nil
266
267 (regex-tests-compare
268 string
269 (condition-case nil
270 (if (string-match pattern string) nil 'search-failed)
271 ('invalid-regexp 'compilation-failed))
272 bounds-ref substring-ref)))
273
274
275(defconst regex-tests-re-even-escapes
276 "\\(?:^\\|[^\\\\]\\)\\(?:\\\\\\\\\\)*"
277 "Regex that matches an even number of \\ characters")
278
279(defconst regex-tests-re-odd-escapes
280 (concat regex-tests-re-even-escapes "\\\\")
281 "Regex that matches an odd number of \\ characters")
282
283
284(defun regex-tests-unextend (pattern)
285 "Basic conversion from extended regexen to emacs ones. This is
286mostly a hack that adds \\ to () and | and {}, and removes it if
287it already exists. We also change \\S (and \\s) to \\S- (and
288\\s-) because extended regexen see the former as whitespace, but
289emacs requires an extra symbol character"
290
291 (with-temp-buffer
292 (insert pattern)
293 (goto-char (point-min))
294
295 (while (re-search-forward "[()|{}]" nil t)
296 ;; point is past special character. If it is escaped, unescape
297 ;; it
298
299 (if (save-excursion
300 (re-search-backward (concat regex-tests-re-odd-escapes ".\\=") nil t))
301
302 ;; This special character is preceded by an odd number of \,
303 ;; so I unescape it by removing the last one
304 (progn
305 (forward-char -2)
306 (delete-char 1)
307 (forward-char 1))
308
309 ;; This special character is preceded by an even (possibly 0)
310 ;; number of \. I add an escape
311 (forward-char -1)
312 (insert "\\")
313 (forward-char 1)))
314
315 ;; convert \s to \s-
316 (goto-char (point-min))
317 (while (re-search-forward (concat regex-tests-re-odd-escapes "[Ss]") nil t)
318 (insert "-"))
319
320 (buffer-string)))
321
322(defun regex-tests-BOOST-frob-escapes (s ispattern)
323 "Mangle \\ the way it is done in frob_escapes() in
324regex-tests-BOOST.c in glibc: \\t, \\n, \\r are interpreted;
325\\\\, \\^, \{, \\|, \} are unescaped for the string (not
326pattern)"
327
328 ;; this is all similar to (regex-tests-unextend)
329 (with-temp-buffer
330 (insert s)
331
332 (let ((interpret-list (list "t" "n" "r")))
333 (while interpret-list
334 (goto-char (point-min))
335 (while (re-search-forward
336 (concat "\\(" regex-tests-re-even-escapes "\\)"
337 "\\\\" (car interpret-list))
338 nil t)
339 (replace-match (concat "\\1" (car (read-from-string
340 (concat "\"\\" (car interpret-list) "\""))))))
341
342 (setq interpret-list (cdr interpret-list))))
343
344 (when (not ispattern)
345 ;; unescape \\, \^, \{, \|, \}
346 (let ((unescape-list (list "\\\\" "^" "{" "|" "}")))
347 (while unescape-list
348 (goto-char (point-min))
349 (while (re-search-forward
350 (concat "\\(" regex-tests-re-even-escapes "\\)"
351 "\\\\" (car unescape-list))
352 nil t)
353 (replace-match (concat "\\1" (car unescape-list))))
354
355 (setq unescape-list (cdr unescape-list))))
356 )
357 (buffer-string)))
358
359
360
361
362(defconst regex-tests-BOOST-whitelist
363 [
364 ;; emacs is more stringent with regexen involving unbalanced )
365 63 65 69
366
367 ;; in emacs, regex . doesn't match \n
368 91
369
370 ;; emacs is more forgiving with * and ? that don't apply to
371 ;; characters
372 107 108 109 122 123 124 140 141 142
373
374 ;; emacs accepts regexen with {}
375 161
376
377 ;; emacs doesn't fail on bogus ranges such as [3-1] or [1-3-5]
378 222 223
379
380 ;; emacs doesn't match (ab*)[ab]*\1 greedily: only 4 chars of
381 ;; ababaaa match
382 284 294
383
384 ;; ambiguous groupings are ambiguous
385 443 444 445 446 448 449 450
386
387 ;; emacs doesn't know how to handle weird ranges such as [a-Z] and
388 ;; [[:alpha:]-a]
389 539 580 581
390
391 ;; emacs matches non-greedy regex ab.*? non-greedily
392 639 677 712
393 ]
394 "Line numbers in the boost test that should be skipped. These
395are false-positive test failures that represent known/benign
396differences in behavior.")
397
398;; - Format
399;; - Comments are lines starting with ;
400;; - Lines starting with - set options passed to regcomp() and regexec():
401;; - if no "REG_BASIC" is found, with have an extended regex
402;; - These set a flag:
403;; - REG_ICASE
404;; - REG_NEWLINE
405;; - REG_NOTBOL
406;; - REG_NOTEOL
407;;
408;; - Test lines are
409;; pattern string start0 end0 start1 end1 ...
410;;
411;; - pattern, string can have escapes
412;; - string can have whitespace if enclosed in ""
413;; - if string is "!", then the pattern is supposed to fail compilation
414;; - start/end are of group0, group1, etc. group 0 is the full match
415;; - start<0 indicates "no match"
416;; - start is the 0-based index of the first character
417;; - end is the 0-based index of the first character past the group
418(defun regex-tests-BOOST ()
419 (let (failures
420 basic icase newline notbol noteol)
421 (regex-tests-generic-line
422 ?; "regex-resources/BOOST.tests" regex-tests-BOOST-whitelist
423 (if (save-excursion (re-search-forward "^-" nil t))
424 (setq basic (save-excursion (re-search-forward "REG_BASIC" nil t))
425 icase (save-excursion (re-search-forward "REG_ICASE" nil t))
426 newline (save-excursion (re-search-forward "REG_NEWLINE" nil t))
427 notbol (save-excursion (re-search-forward "REG_NOTBOL" nil t))
428 noteol (save-excursion (re-search-forward "REG_NOTEOL" nil t)))
429
430 (save-excursion
431 (or (re-search-forward "\\(\\S-+\\)\\s-+\"\\(.*\\)\"\\s-+?\\(.+\\)" nil t)
432 (re-search-forward "\\(\\S-+\\)\\s-+\\(\\S-+\\)\\s-+?\\(.+\\)" nil t)
433 (re-search-forward "\\(\\S-+\\)\\s-+\\(!\\)" nil t)))
434
435 (let* ((pattern-raw (match-string 1))
436 (string-raw (match-string 2))
437 (positions-raw (match-string 3))
438 (pattern (regex-tests-BOOST-frob-escapes pattern-raw t))
439 (string (regex-tests-BOOST-frob-escapes string-raw nil))
440 (positions
441 (if (string= string "!")
442 (list 'compilation-failed 0)
443 (mapcar
444 (lambda (x)
445 (let ((x (string-to-number x)))
446 (if (< x 0) nil x)))
447 (split-string positions-raw)))))
448
449 (when (null (car positions))
450 (setcar positions 'search-failed))
451
452 (when (not basic)
453 (setq pattern (regex-tests-unextend pattern)))
454
455 ;; great. I now have all the data parsed. Let's use it to do
456 ;; stuff
457 (let* ((case-fold-search icase)
458 (msg (regex-tests-match pattern string positions)))
459
460 (if (and
461 ;; Skipping test: notbol/noteol not supported
462 (not notbol) (not noteol)
463
464 msg)
465
466 ;; store failure
467 (setq failures
468 (cons (format "line number %d: Regex '%s': %s"
469 line-number pattern msg)
470 failures)))))))
471
472 failures))
473
474(defconst regex-tests-PCRE-whitelist
475 [
476 ;; ambiguous groupings are ambiguous
477 610 611 1154 1157 1160 1168 1171 1176 1179 1182 1185 1188 1193 1196 1203
478 ]
479 "Line numbers in the PCRE test that should be skipped. These
480are false-positive test failures that represent known/benign
481differences in behavior.")
482
483;; - Format
484;;
485;; regex
486;; input_string
487;; group_num: group_match | "No match"
488;; input_string
489;; group_num: group_match | "No match"
490;; input_string
491;; group_num: group_match | "No match"
492;; input_string
493;; group_num: group_match | "No match"
494;; ...
495(defun regex-tests-PCRE ()
496 (let (failures
497 pattern icase string what-failed matches-observed)
498 (regex-tests-generic-line
499 ?# "regex-resources/PCRE.tests" regex-tests-PCRE-whitelist
500
501 (cond
502
503 ;; pattern
504 ((save-excursion (re-search-forward "^/\\(.*\\)/\\(.*i?\\)$" nil t))
505 (setq icase (string= "i" (match-string 2))
506 pattern (regex-tests-unextend (match-string 1))))
507
508 ;; string. read it in, match against pattern, and save all the results
509 ((save-excursion (re-search-forward "^ \\(.*\\)" nil t))
510 (let ((case-fold-search icase))
511 (setq string (match-string 1)
512
513 ;; the regex match under test
514 what-failed
515 (condition-case nil
516 (if (string-match pattern string) nil 'search-failed)
517 ('invalid-regexp 'compilation-failed))
518
519 matches-observed
520 (loop for x from 0 to 20
521 collect (and (not what-failed)
522 (or (match-string x string) "<unset>")))))
523 nil)
524
525 ;; verification line: failed match
526 ((save-excursion (re-search-forward "^No match" nil t))
527 (unless what-failed
528 (setq failures
529 (cons (format "line number %d: Regex '%s': Expected no match; but match"
530 line-number pattern)
531 failures))))
532
533 ;; verification line: succeeded match
534 ((save-excursion (re-search-forward "^ *\\([0-9]+\\): \\(.*\\)" nil t))
535 (let* ((match-ref (match-string 2))
536 (idx (string-to-number (match-string 1))))
537
538 (if what-failed
539 "Expected match; but no match"
540 (unless (string= match-ref (elt matches-observed idx))
541 (setq failures
542 (cons (format "line number %d: Regex '%s': Have expected match, but group %d is wrong: '%s'/'%s'"
543 line-number pattern
544 idx match-ref (elt matches-observed idx))
545 failures))))))
546
547 ;; reset
548 (t (setq pattern nil) nil)))
549
550 failures))
551
552(defconst regex-tests-PTESTS-whitelist
553 [
554 ;; emacs doesn't barf on weird ranges such as [b-a], but simply
555 ;; fails to match
556 138
557
558 ;; emacs doesn't see DEL (0x78) as a [:cntrl:] character
559 168
560 ]
561 "Line numbers in the PTESTS test that should be skipped. These
562are false-positive test failures that represent known/benign
563differences in behavior.")
564
565;; - Format
566;; - fields separated by ¦ (note: this is not a |)
567;; - start¦end¦pattern¦string
568;; - start is the 1-based index of the first character
569;; - end is the 1-based index of the last character
570(defun regex-tests-PTESTS ()
571 (let (failures)
572 (regex-tests-generic-line
573 ?# "regex-resources/PTESTS" regex-tests-PTESTS-whitelist
574 (let* ((fields (split-string (buffer-string) "¦"))
575
576 ;; string has 1-based index of first char in the
577 ;; match. -1 means "no match". -2 means "invalid
578 ;; regex".
579 ;;
580 ;; start-ref is 0-based index of first char in the
581 ;; match
582 ;;
583 ;; string==0 is a special case, and I have to treat
584 ;; it as start-ref = 0
585 (start-ref (let ((raw (string-to-number (elt fields 0))))
586 (cond
587 ((= raw -2) 'compilation-failed)
588 ((= raw -1) 'search-failed)
589 ((= raw 0) 0)
590 (t (1- raw)))))
591
592 ;; string has 1-based index of last char in the
593 ;; match. end-ref is 0-based index of first char past
594 ;; the match
595 (end-ref (string-to-number (elt fields 1)))
596 (pattern (elt fields 2))
597 (string (elt fields 3)))
598
599 (let ((msg (regex-tests-match pattern string (list start-ref end-ref))))
600 (when msg
601 (setq failures
602 (cons (format "line number %d: Regex '%s': %s"
603 line-number pattern msg)
604 failures))))))
605 failures))
606
607(defconst regex-tests-TESTS-whitelist
608 [
609 ;; emacs doesn't barf on weird ranges such as [b-a], but simply
610 ;; fails to match
611 42
612
613 ;; emacs is more forgiving with * and ? that don't apply to
614 ;; characters
615 57 58 59 60
616
617 ;; emacs is more stringent with regexen involving unbalanced )
618 67
619 ]
620 "Line numbers in the TESTS test that should be skipped. These
621are false-positive test failures that represent known/benign
622differences in behavior.")
623
624;; - Format
625;; - fields separated by :. Watch for [\[:xxx:]]
626;; - expected:pattern:string
627;;
628;; expected:
629;; | 0 | successful match |
630;; | 1 | failed match |
631;; | 2 | regcomp() should fail |
632(defun regex-tests-TESTS ()
633 (let (failures)
634 (regex-tests-generic-line
635 ?# "regex-resources/TESTS" regex-tests-TESTS-whitelist
636 (if (save-excursion (re-search-forward "^\\([^:]+\\):\\(.*\\):\\([^:]*\\)$" nil t))
637 (let* ((what-failed
638 (let ((raw (string-to-number (match-string 1))))
639 (cond
640 ((= raw 2) 'compilation-failed)
641 ((= raw 1) 'search-failed)
642 (t t))))
643 (string (match-string 3))
644 (pattern (regex-tests-unextend (match-string 2))))
645
646 (let ((msg (regex-tests-match pattern string nil (list what-failed))))
647 (when msg
648 (setq failures
649 (cons (format "line number %d: Regex '%s': %s"
650 line-number pattern msg)
651 failures)))))
652
653 (error "Error parsing TESTS file line: '%s'" (buffer-string))))
654 failures))
655
656(ert-deftest regex-tests ()
657 "Tests of the regular expression engine. This evaluates the
658BOOST, PCRE, PTESTS and TESTS test cases from glibc."
659 (should-not (regex-tests-BOOST))
660 (should-not (regex-tests-PCRE))
661 (should-not (regex-tests-PTESTS))
662 (should-not (regex-tests-TESTS)))
663
92;;; regex-tests.el ends here 664;;; regex-tests.el ends here