aboutsummaryrefslogtreecommitdiffstats
path: root/test/src
diff options
context:
space:
mode:
authorMattias Engdegård2019-06-28 10:20:55 +0200
committerMattias Engdegård2019-06-28 17:30:18 +0200
commita1f76adfb03c23bb4242928e8efe6193c301f0c1 (patch)
tree7e2a5c58656ffbe78d34dc58639d7cd5bf8f943a /test/src
parentaae5bf4438712c9fe761c5e4b5a871192852cd97 (diff)
downloademacs-a1f76adfb03c23bb4242928e8efe6193c301f0c1.tar.gz
emacs-a1f76adfb03c23bb4242928e8efe6193c301f0c1.zip
Correct regexp matching of raw bytes
Make regexp matching of raw bytes work in all combination of unibyte and multibyte patterns and targets, as exact strings and in character alternatives (bug#3687). * src/regex-emacs.c (analyze_first): Include raw byte in fastmap when pattern is a multibyte exact string. Include leading byte in fastmap for raw bytes in character alternatives. (re_match_2_internal): Decrement the byte count by the number of bytes in the pattern character, not 1. * test/src/regex-emacs-tests.el (regexp-unibyte-unibyte) (regexp-multibyte-unibyte, regexp-unibyte-mutibyte) (regexp-multibyte-multibyte): New tests.
Diffstat (limited to 'test/src')
-rw-r--r--test/src/regex-emacs-tests.el120
1 files changed, 120 insertions, 0 deletions
diff --git a/test/src/regex-emacs-tests.el b/test/src/regex-emacs-tests.el
index 0ae50c94d4c..50ed3e870a5 100644
--- a/test/src/regex-emacs-tests.el
+++ b/test/src/regex-emacs-tests.el
@@ -683,4 +683,124 @@ This evaluates the TESTS test cases from glibc."
683 (should-not (string-match "\\`x\\{65535\\}" (make-string 65534 ?x))) 683 (should-not (string-match "\\`x\\{65535\\}" (make-string 65534 ?x)))
684 (should-error (string-match "\\`x\\{65536\\}" "X") :type 'invalid-regexp)) 684 (should-error (string-match "\\`x\\{65536\\}" "X") :type 'invalid-regexp))
685 685
686(ert-deftest regexp-unibyte-unibyte ()
687 "Test matching a unibyte regexp against a unibyte string."
688 ;; Sanity check
689 (should-not (multibyte-string-p "ab"))
690 (should-not (multibyte-string-p "\xff"))
691 ;; ASCII
692 (should (string-match "a[b]" "ab"))
693 ;; Raw
694 (should (string-match "\xf1" "\xf1"))
695 (should-not (string-match "\xf1" "\xc1\xb1"))
696 ;; Raw, char alt
697 (should (string-match "[\xf1]" "\xf1"))
698 (should-not (string-match "[\xf1]" "\xc1\xb1"))
699 ;; Raw range
700 (should (string-match "[\x82-\xd3]" "\xbb"))
701 (should-not (string-match "[\x82-\xd3]" "a"))
702 (should-not (string-match "[\x82-\xd3]" "\x81"))
703 (should-not (string-match "[\x82-\xd3]" "\xd4"))
704 ;; ASCII-raw range
705 (should (string-match "[f-\xd3]" "q"))
706 (should (string-match "[f-\xd3]" "\xbb"))
707 (should-not (string-match "[f-\xd3]" "e"))
708 (should-not (string-match "[f-\xd3]" "\xd4")))
709
710(ert-deftest regexp-multibyte-multibyte ()
711 "Test matching a multibyte regexp against a multibyte string."
712 ;; Sanity check
713 (should (multibyte-string-p "åü"))
714 ;; ASCII
715 (should (string-match (string-to-multibyte "a[b]")
716 (string-to-multibyte "ab")))
717 ;; Unicode
718 (should (string-match "å[ü]z" "åüz"))
719 (should-not (string-match "ü" (string-to-multibyte "\xc3\xbc")))
720 ;; Raw
721 (should (string-match (string-to-multibyte "\xf1")
722 (string-to-multibyte "\xf1")))
723 (should-not (string-match (string-to-multibyte "\xf1")
724 (string-to-multibyte "\xc1\xb1")))
725 (should-not (string-match (string-to-multibyte "\xc1\xb1")
726 (string-to-multibyte "\xf1")))
727 ;; Raw, char alt
728 (should (string-match (string-to-multibyte "[\xf1]")
729 (string-to-multibyte "\xf1")))
730 ;; Raw range
731 (should (string-match (string-to-multibyte "[\x82-\xd3]")
732 (string-to-multibyte "\xbb")))
733 (should-not (string-match (string-to-multibyte "[\x82-\xd3]") "a"))
734 (should-not (string-match (string-to-multibyte "[\x82-\xd3]") "Å"))
735 (should-not (string-match (string-to-multibyte "[\x82-\xd3]") "ü"))
736 (should-not (string-match (string-to-multibyte "[\x82-\xd3]") "\x81"))
737 (should-not (string-match (string-to-multibyte "[\x82-\xd3]") "\xd4"))
738 ;; ASCII-raw range: should exclude U+0100..U+10FFFF
739 (should (string-match (string-to-multibyte "[f-\xd3]")
740 (string-to-multibyte "q")))
741 (should (string-match (string-to-multibyte "[f-\xd3]")
742 (string-to-multibyte "\xbb")))
743 (should-not (string-match (string-to-multibyte "[f-\xd3]") "e"))
744 (should-not (string-match (string-to-multibyte "[f-\xd3]") "Å"))
745 (should-not (string-match (string-to-multibyte "[f-\xd3]") "ü"))
746 (should-not (string-match (string-to-multibyte "[f-\xd3]") "\xd4"))
747 ;; Unicode-raw range: should be empty
748 (should-not (string-match "[å-\xd3]" "å"))
749 (should-not (string-match "[å-\xd3]" (string-to-multibyte "\xd3")))
750 (should-not (string-match "[å-\xd3]" (string-to-multibyte "\xbb")))
751 (should-not (string-match "[å-\xd3]" "ü"))
752 ;; No equivalence between raw bytes and latin-1
753 (should-not (string-match "å" (string-to-multibyte "\xe5")))
754 (should-not (string-match "[å]" (string-to-multibyte "\xe5")))
755 (should-not (string-match "\xe5" "å"))
756 (should-not (string-match "[\xe5]" "å")))
757
758(ert-deftest regexp-unibyte-multibyte ()
759 "Test matching a unibyte regexp against a multibyte string."
760 ;; ASCII
761 (should (string-match "a[b]" (string-to-multibyte "ab")))
762 ;; Unicode
763 (should (string-match "a.[^b]c" (string-to-multibyte "aåüc")))
764 ;; Raw
765 (should (string-match "\xf1" (string-to-multibyte "\xf1")))
766 (should-not (string-match "\xc1\xb1" (string-to-multibyte "\xf1")))
767 ;; Raw, char alt
768 (should (string-match "[\xf1]" (string-to-multibyte "\xf1")))
769 (should-not (string-match "[\xc1][\xb1]" (string-to-multibyte "\xf1")))
770 ;; ASCII-raw range: should exclude U+0100..U+10FFFF
771 (should (string-match "[f-\xd3]" (string-to-multibyte "q")))
772 (should (string-match "[f-\xd3]" (string-to-multibyte "\xbb")))
773 (should-not (string-match "[f-\xd3]" "e"))
774 (should-not (string-match "[f-\xd3]" "Å"))
775 (should-not (string-match "[f-\xd3]" "ü"))
776 (should-not (string-match "[f-\xd3]" "\xd4"))
777 ;; No equivalence between raw bytes and latin-1
778 (should-not (string-match "\xe5" "å"))
779 (should-not (string-match "[\xe5]" "å")))
780
781(ert-deftest regexp-multibyte-unibyte ()
782 "Test matching a multibyte regexp against a unibyte string."
783 ;; ASCII
784 (should (string-match (string-to-multibyte "a[b]") "ab"))
785 ;; Unicode
786 (should (string-match "a[^ü]c" "abc"))
787 (should-not (string-match "ü" "\xc3\xbc"))
788 ;; Raw
789 (should (string-match (string-to-multibyte "\xf1") "\xf1"))
790 (should-not (string-match (string-to-multibyte "\xf1") "\xc1\xb1"))
791 ;; Raw, char alt
792 (should (string-match (string-to-multibyte "[\xf1]") "\xf1"))
793 (should-not (string-match (string-to-multibyte "[\xf1]") "\xc1\xb1"))
794 ;; ASCII-raw range: should exclude U+0100..U+10FFFF
795 (should (string-match (string-to-multibyte "[f-\xd3]") "q"))
796 (should (string-match (string-to-multibyte "[f-\xd3]") "\xbb"))
797 (should-not (string-match (string-to-multibyte "[f-\xd3]") "e"))
798 (should-not (string-match (string-to-multibyte "[f-\xd3]") "\xd4"))
799 ;; Unicode-raw range: should be empty
800 (should-not (string-match "[å-\xd3]" "\xd3"))
801 (should-not (string-match "[å-\xd3]" "\xbb"))
802 ;; No equivalence between raw bytes and latin-1
803 (should-not (string-match "å" "\xe5"))
804 (should-not (string-match "[å]" "\xe5")))
805
686;;; regex-emacs-tests.el ends here 806;;; regex-emacs-tests.el ends here