1 files changed, 92 insertions, 0 deletions
diff --git a/test/src/regex-tests.el b/test/src/regex-tests.el
new file mode 100644
index 00000000000..00165ab0512
--- /dev/null
+++ b/test/src/regex-tests.el
@@ -0,0 +1,92 @@
+;;; regex-tests.el --- tests for regex.c functions -*- lexical-binding: t -*-
+;; Copyright (C) 2015-2016 Free Software Foundation, Inc.
+;; This file is part of GNU Emacs.
+;; GNU Emacs is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+;; GNU Emacs is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;; You should have received a copy of the GNU General Public License
+;; along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.
+;;; Code:
+(require 'ert)
+(ert-deftest regex-word-cc-fallback-test ()
+  "Test that ‘[[:cc:]]*x’ matches ‘x’ (bug#24020).
+Test that a regex of the form \"[[:cc:]]*x\" where CC is
+a character class which matches a multibyte character X, matches
+string \"x\".
+For example, ‘[[:word:]]*\u2620’ regex (note: \u2620 is a word
+character) must match a string \"\u2420\"."
+  (dolist (class '("[[:word:]]" "\\sw"))
+    (dolist (repeat '("*" "+"))
+      (dolist (suffix '("" "b" "bar" "\u2620"))
+        (dolist (string '("" "foo"))
+          (when (not (and (string-equal repeat "+")
+                          (string-equal string "")))
+            (should (string-match (concat "^" class repeat suffix "$")
+                                  (concat string suffix)))))))))
+(defun regex--test-cc (name matching not-matching)
+  (should (string-match-p (concat "^[[:" name ":]]*$") matching))
+  (should (string-match-p (concat "^[[:" name ":]]*?\u2622$")
+                          (concat matching "\u2622")))
+  (should (string-match-p (concat "^[^[:" name ":]]*$") not-matching))
+  (should (string-match-p (concat "^[^[:" name ":]]*\u2622$")
+                          (concat not-matching "\u2622")))
+  (with-temp-buffer
+    (insert matching)
+    (let ((p (point)))
+      (insert not-matching)
+      (goto-char (point-min))
+      (skip-chars-forward (concat "[:" name ":]"))
+      (should (equal (point) p))
+      (skip-chars-forward (concat "^[:" name ":]"))
+      (should (equal (point) (point-max)))
+      (goto-char (point-min))
+      (skip-chars-forward (concat "[:" name ":]\u2622"))
+      (should (or (equal (point) p) (equal (point) (1+ p)))))))
+(ert-deftest regex-character-classes ()
+  "Perform sanity test of regexes using character classes.
+Go over all the supported character classes and test whether the
+classes and their inversions match what they are supposed to
+match.  The test is done using `string-match-p' as well as
+`skip-chars-forward'."
+  (let (case-fold-search)
+    (regex--test-cc "alnum" "abcABC012łąka" "-, \t\n")
+    (regex--test-cc "alpha" "abcABCłąka" "-,012 \t\n")
+    (regex--test-cc "digit" "012" "abcABCłąka-, \t\n")
+    (regex--test-cc "xdigit" "0123aBc" "łąk-, \t\n")
+    (regex--test-cc "upper" "ABCŁĄKA" "abc012-, \t\n")
+    (regex--test-cc "lower" "abcłąka" "ABC012-, \t\n")
+    (regex--test-cc "word" "abcABC012\u2620" "-, \t\n")
+    (regex--test-cc "punct" ".,-" "abcABC012\u2620 \t\n")
+    (regex--test-cc "cntrl" "\1\2\t\n" ".,-abcABC012\u2620 ")
+    (regex--test-cc "graph" "abcłąka\u2620-," " \t\n\1")
+    (regex--test-cc "print" "abcłąka\u2620-, " "\t\n\1")
+    (regex--test-cc "space" " \t\n\u2001" "abcABCł0123")
+    (regex--test-cc "blank" " \t" "\n\u2001")
+    (regex--test-cc "ascii" "abcABC012 \t\n\1" "łą\u2620")
+    (regex--test-cc "nonascii" "łą\u2622" "abcABC012 \t\n\1")
+    (regex--test-cc "unibyte" "abcABC012 \t\n\1" "łą\u2622")
+    (regex--test-cc "multibyte" "łą\u2622" "abcABC012 \t\n\1")))
+;;; regex-tests.el ends here

diff --git a/test/src/regex-tests.el b/test/src/regex-tests.el new file mode 100644 index 00000000000..00165ab0512 --- /dev/null +++ b/test/src/regex-tests.el
@@ -0,0 +1,92 @@
	1	;;; regex-tests.el --- tests for regex.c functions -- lexical-binding: t --
	2
	3	;; Copyright (C) 2015-2016 Free Software Foundation, Inc.
	4
	5	;; This file is part of GNU Emacs.
	6
	7	;; GNU Emacs is free software: you can redistribute it and/or modify
	8	;; it under the terms of the GNU General Public License as published by
	9	;; the Free Software Foundation, either version 3 of the License, or
	10	;; (at your option) any later version.
	11
	12	;; GNU Emacs is distributed in the hope that it will be useful,
	13	;; but WITHOUT ANY WARRANTY; without even the implied warranty of
	14	;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	15	;; GNU General Public License for more details.
	16
	17	;; You should have received a copy of the GNU General Public License
	18	;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
	19
	20	;;; Code:
	21
	22	(require 'ert)
	23
	24	(ert-deftest regex-word-cc-fallback-test ()
	25	"Test that ‘[[:cc:]]*x’ matches ‘x’ (bug#24020).
	26
	27	Test that a regex of the form \"[[:cc:]]*x\" where CC is
	28	a character class which matches a multibyte character X, matches
	29	string \"x\".
	30
	31	For example, ‘[[:word:]]*\u2620’ regex (note: \u2620 is a word
	32	character) must match a string \"\u2420\"."
	33	(dolist (class '("[[:word:]]" "\\sw"))
	34	(dolist (repeat '("*" "+"))
	35	(dolist (suffix '("" "b" "bar" "\u2620"))
	36	(dolist (string '("" "foo"))
	37	(when (not (and (string-equal repeat "+")
	38	(string-equal string "")))
	39	(should (string-match (concat "^" class repeat suffix "$")
	40	(concat string suffix)))))))))
	41
	42	(defun regex--test-cc (name matching not-matching)
	43	(should (string-match-p (concat "^[[:" name ":]]*$") matching))
	44	(should (string-match-p (concat "^[[:" name ":]]*?\u2622$")
	45	(concat matching "\u2622")))
	46	(should (string-match-p (concat "^[^[:" name ":]]*$") not-matching))
	47	(should (string-match-p (concat "^[^[:" name ":]]*\u2622$")
	48	(concat not-matching "\u2622")))
	49	(with-temp-buffer
	50	(insert matching)
	51	(let ((p (point)))
	52	(insert not-matching)
	53	(goto-char (point-min))
	54	(skip-chars-forward (concat "[:" name ":]"))
	55	(should (equal (point) p))
	56	(skip-chars-forward (concat "^[:" name ":]"))
	57	(should (equal (point) (point-max)))
	58	(goto-char (point-min))
	59	(skip-chars-forward (concat "[:" name ":]\u2622"))
	60	(should (or (equal (point) p) (equal (point) (1+ p)))))))
	61
	62	(ert-deftest regex-character-classes ()
	63	"Perform sanity test of regexes using character classes.
	64
	65	Go over all the supported character classes and test whether the
	66	classes and their inversions match what they are supposed to
	67	match. The test is done using `string-match-p' as well as
	68	`skip-chars-forward'."
	69	(let (case-fold-search)
	70	(regex--test-cc "alnum" "abcABC012łąka" "-, \t\n")
	71	(regex--test-cc "alpha" "abcABCłąka" "-,012 \t\n")
	72	(regex--test-cc "digit" "012" "abcABCłąka-, \t\n")
	73	(regex--test-cc "xdigit" "0123aBc" "łąk-, \t\n")
	74	(regex--test-cc "upper" "ABCŁĄKA" "abc012-, \t\n")
	75	(regex--test-cc "lower" "abcłąka" "ABC012-, \t\n")
	76
	77	(regex--test-cc "word" "abcABC012\u2620" "-, \t\n")
	78
	79	(regex--test-cc "punct" ".,-" "abcABC012\u2620 \t\n")
	80	(regex--test-cc "cntrl" "\1\2\t\n" ".,-abcABC012\u2620 ")
	81	(regex--test-cc "graph" "abcłąka\u2620-," " \t\n\1")
	82	(regex--test-cc "print" "abcłąka\u2620-, " "\t\n\1")
	83
	84	(regex--test-cc "space" " \t\n\u2001" "abcABCł0123")
	85	(regex--test-cc "blank" " \t" "\n\u2001")
	86
	87	(regex--test-cc "ascii" "abcABC012 \t\n\1" "łą\u2620")
	88	(regex--test-cc "nonascii" "łą\u2622" "abcABC012 \t\n\1")
	89	(regex--test-cc "unibyte" "abcABC012 \t\n\1" "łą\u2622")
	90	(regex--test-cc "multibyte" "łą\u2622" "abcABC012 \t\n\1")))
	91
	92	;;; regex-tests.el ends here