diff options
| author | Mattias Engdegård | 2019-06-28 10:20:55 +0200 |
|---|---|---|
| committer | Mattias Engdegård | 2019-06-28 17:30:18 +0200 |
| commit | a1f76adfb03c23bb4242928e8efe6193c301f0c1 (patch) | |
| tree | 7e2a5c58656ffbe78d34dc58639d7cd5bf8f943a /test/src | |
| parent | aae5bf4438712c9fe761c5e4b5a871192852cd97 (diff) | |
| download | emacs-a1f76adfb03c23bb4242928e8efe6193c301f0c1.tar.gz emacs-a1f76adfb03c23bb4242928e8efe6193c301f0c1.zip | |
Correct regexp matching of raw bytes
Make regexp matching of raw bytes work in all combination of unibyte
and multibyte patterns and targets, as exact strings and in character
alternatives (bug#3687).
* src/regex-emacs.c (analyze_first):
Include raw byte in fastmap when pattern is a multibyte exact string.
Include leading byte in fastmap for raw bytes in character alternatives.
(re_match_2_internal):
Decrement the byte count by the number of bytes in the pattern character,
not 1.
* test/src/regex-emacs-tests.el (regexp-unibyte-unibyte)
(regexp-multibyte-unibyte, regexp-unibyte-mutibyte)
(regexp-multibyte-multibyte): New tests.
Diffstat (limited to 'test/src')
| -rw-r--r-- | test/src/regex-emacs-tests.el | 120 |
1 files changed, 120 insertions, 0 deletions
diff --git a/test/src/regex-emacs-tests.el b/test/src/regex-emacs-tests.el index 0ae50c94d4c..50ed3e870a5 100644 --- a/test/src/regex-emacs-tests.el +++ b/test/src/regex-emacs-tests.el | |||
| @@ -683,4 +683,124 @@ This evaluates the TESTS test cases from glibc." | |||
| 683 | (should-not (string-match "\\`x\\{65535\\}" (make-string 65534 ?x))) | 683 | (should-not (string-match "\\`x\\{65535\\}" (make-string 65534 ?x))) |
| 684 | (should-error (string-match "\\`x\\{65536\\}" "X") :type 'invalid-regexp)) | 684 | (should-error (string-match "\\`x\\{65536\\}" "X") :type 'invalid-regexp)) |
| 685 | 685 | ||
| 686 | (ert-deftest regexp-unibyte-unibyte () | ||
| 687 | "Test matching a unibyte regexp against a unibyte string." | ||
| 688 | ;; Sanity check | ||
| 689 | (should-not (multibyte-string-p "ab")) | ||
| 690 | (should-not (multibyte-string-p "\xff")) | ||
| 691 | ;; ASCII | ||
| 692 | (should (string-match "a[b]" "ab")) | ||
| 693 | ;; Raw | ||
| 694 | (should (string-match "\xf1" "\xf1")) | ||
| 695 | (should-not (string-match "\xf1" "\xc1\xb1")) | ||
| 696 | ;; Raw, char alt | ||
| 697 | (should (string-match "[\xf1]" "\xf1")) | ||
| 698 | (should-not (string-match "[\xf1]" "\xc1\xb1")) | ||
| 699 | ;; Raw range | ||
| 700 | (should (string-match "[\x82-\xd3]" "\xbb")) | ||
| 701 | (should-not (string-match "[\x82-\xd3]" "a")) | ||
| 702 | (should-not (string-match "[\x82-\xd3]" "\x81")) | ||
| 703 | (should-not (string-match "[\x82-\xd3]" "\xd4")) | ||
| 704 | ;; ASCII-raw range | ||
| 705 | (should (string-match "[f-\xd3]" "q")) | ||
| 706 | (should (string-match "[f-\xd3]" "\xbb")) | ||
| 707 | (should-not (string-match "[f-\xd3]" "e")) | ||
| 708 | (should-not (string-match "[f-\xd3]" "\xd4"))) | ||
| 709 | |||
| 710 | (ert-deftest regexp-multibyte-multibyte () | ||
| 711 | "Test matching a multibyte regexp against a multibyte string." | ||
| 712 | ;; Sanity check | ||
| 713 | (should (multibyte-string-p "åü")) | ||
| 714 | ;; ASCII | ||
| 715 | (should (string-match (string-to-multibyte "a[b]") | ||
| 716 | (string-to-multibyte "ab"))) | ||
| 717 | ;; Unicode | ||
| 718 | (should (string-match "å[ü]z" "åüz")) | ||
| 719 | (should-not (string-match "ü" (string-to-multibyte "\xc3\xbc"))) | ||
| 720 | ;; Raw | ||
| 721 | (should (string-match (string-to-multibyte "\xf1") | ||
| 722 | (string-to-multibyte "\xf1"))) | ||
| 723 | (should-not (string-match (string-to-multibyte "\xf1") | ||
| 724 | (string-to-multibyte "\xc1\xb1"))) | ||
| 725 | (should-not (string-match (string-to-multibyte "\xc1\xb1") | ||
| 726 | (string-to-multibyte "\xf1"))) | ||
| 727 | ;; Raw, char alt | ||
| 728 | (should (string-match (string-to-multibyte "[\xf1]") | ||
| 729 | (string-to-multibyte "\xf1"))) | ||
| 730 | ;; Raw range | ||
| 731 | (should (string-match (string-to-multibyte "[\x82-\xd3]") | ||
| 732 | (string-to-multibyte "\xbb"))) | ||
| 733 | (should-not (string-match (string-to-multibyte "[\x82-\xd3]") "a")) | ||
| 734 | (should-not (string-match (string-to-multibyte "[\x82-\xd3]") "Å")) | ||
| 735 | (should-not (string-match (string-to-multibyte "[\x82-\xd3]") "ü")) | ||
| 736 | (should-not (string-match (string-to-multibyte "[\x82-\xd3]") "\x81")) | ||
| 737 | (should-not (string-match (string-to-multibyte "[\x82-\xd3]") "\xd4")) | ||
| 738 | ;; ASCII-raw range: should exclude U+0100..U+10FFFF | ||
| 739 | (should (string-match (string-to-multibyte "[f-\xd3]") | ||
| 740 | (string-to-multibyte "q"))) | ||
| 741 | (should (string-match (string-to-multibyte "[f-\xd3]") | ||
| 742 | (string-to-multibyte "\xbb"))) | ||
| 743 | (should-not (string-match (string-to-multibyte "[f-\xd3]") "e")) | ||
| 744 | (should-not (string-match (string-to-multibyte "[f-\xd3]") "Å")) | ||
| 745 | (should-not (string-match (string-to-multibyte "[f-\xd3]") "ü")) | ||
| 746 | (should-not (string-match (string-to-multibyte "[f-\xd3]") "\xd4")) | ||
| 747 | ;; Unicode-raw range: should be empty | ||
| 748 | (should-not (string-match "[å-\xd3]" "å")) | ||
| 749 | (should-not (string-match "[å-\xd3]" (string-to-multibyte "\xd3"))) | ||
| 750 | (should-not (string-match "[å-\xd3]" (string-to-multibyte "\xbb"))) | ||
| 751 | (should-not (string-match "[å-\xd3]" "ü")) | ||
| 752 | ;; No equivalence between raw bytes and latin-1 | ||
| 753 | (should-not (string-match "å" (string-to-multibyte "\xe5"))) | ||
| 754 | (should-not (string-match "[å]" (string-to-multibyte "\xe5"))) | ||
| 755 | (should-not (string-match "\xe5" "å")) | ||
| 756 | (should-not (string-match "[\xe5]" "å"))) | ||
| 757 | |||
| 758 | (ert-deftest regexp-unibyte-multibyte () | ||
| 759 | "Test matching a unibyte regexp against a multibyte string." | ||
| 760 | ;; ASCII | ||
| 761 | (should (string-match "a[b]" (string-to-multibyte "ab"))) | ||
| 762 | ;; Unicode | ||
| 763 | (should (string-match "a.[^b]c" (string-to-multibyte "aåüc"))) | ||
| 764 | ;; Raw | ||
| 765 | (should (string-match "\xf1" (string-to-multibyte "\xf1"))) | ||
| 766 | (should-not (string-match "\xc1\xb1" (string-to-multibyte "\xf1"))) | ||
| 767 | ;; Raw, char alt | ||
| 768 | (should (string-match "[\xf1]" (string-to-multibyte "\xf1"))) | ||
| 769 | (should-not (string-match "[\xc1][\xb1]" (string-to-multibyte "\xf1"))) | ||
| 770 | ;; ASCII-raw range: should exclude U+0100..U+10FFFF | ||
| 771 | (should (string-match "[f-\xd3]" (string-to-multibyte "q"))) | ||
| 772 | (should (string-match "[f-\xd3]" (string-to-multibyte "\xbb"))) | ||
| 773 | (should-not (string-match "[f-\xd3]" "e")) | ||
| 774 | (should-not (string-match "[f-\xd3]" "Å")) | ||
| 775 | (should-not (string-match "[f-\xd3]" "ü")) | ||
| 776 | (should-not (string-match "[f-\xd3]" "\xd4")) | ||
| 777 | ;; No equivalence between raw bytes and latin-1 | ||
| 778 | (should-not (string-match "\xe5" "å")) | ||
| 779 | (should-not (string-match "[\xe5]" "å"))) | ||
| 780 | |||
| 781 | (ert-deftest regexp-multibyte-unibyte () | ||
| 782 | "Test matching a multibyte regexp against a unibyte string." | ||
| 783 | ;; ASCII | ||
| 784 | (should (string-match (string-to-multibyte "a[b]") "ab")) | ||
| 785 | ;; Unicode | ||
| 786 | (should (string-match "a[^ü]c" "abc")) | ||
| 787 | (should-not (string-match "ü" "\xc3\xbc")) | ||
| 788 | ;; Raw | ||
| 789 | (should (string-match (string-to-multibyte "\xf1") "\xf1")) | ||
| 790 | (should-not (string-match (string-to-multibyte "\xf1") "\xc1\xb1")) | ||
| 791 | ;; Raw, char alt | ||
| 792 | (should (string-match (string-to-multibyte "[\xf1]") "\xf1")) | ||
| 793 | (should-not (string-match (string-to-multibyte "[\xf1]") "\xc1\xb1")) | ||
| 794 | ;; ASCII-raw range: should exclude U+0100..U+10FFFF | ||
| 795 | (should (string-match (string-to-multibyte "[f-\xd3]") "q")) | ||
| 796 | (should (string-match (string-to-multibyte "[f-\xd3]") "\xbb")) | ||
| 797 | (should-not (string-match (string-to-multibyte "[f-\xd3]") "e")) | ||
| 798 | (should-not (string-match (string-to-multibyte "[f-\xd3]") "\xd4")) | ||
| 799 | ;; Unicode-raw range: should be empty | ||
| 800 | (should-not (string-match "[å-\xd3]" "\xd3")) | ||
| 801 | (should-not (string-match "[å-\xd3]" "\xbb")) | ||
| 802 | ;; No equivalence between raw bytes and latin-1 | ||
| 803 | (should-not (string-match "å" "\xe5")) | ||
| 804 | (should-not (string-match "[å]" "\xe5"))) | ||
| 805 | |||
| 686 | ;;; regex-emacs-tests.el ends here | 806 | ;;; regex-emacs-tests.el ends here |