aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorStefan Monnier2023-09-15 14:16:48 -0400
committerStefan Monnier2023-09-15 14:16:48 -0400
commit9610aaeb9e5f3d572616f0742fca2f5e5abc141d (patch)
tree6a34b9f6a866ca8cc9db49a183b39f5d582aeb4e /src
parent1d952078c0c45fc095795294351a4a2ee7e6c253 (diff)
downloademacs-9610aaeb9e5f3d572616f0742fca2f5e5abc141d.tar.gz
emacs-9610aaeb9e5f3d572616f0742fca2f5e5abc141d.zip
* src/regex-emacs.c (mutually_exclusive_p): Refactor
Minor refactoring to avoid swapping p1/p2. * src/regex-emacs.c (mutually_exclusive_exactn) (mutually_exclusive_charset): New functions, extracted from `mutually_exclusive_p`. (mutually_exclusive_p): Use them.
Diffstat (limited to 'src')
-rw-r--r--src/regex-emacs.c196
1 files changed, 105 insertions, 91 deletions
diff --git a/src/regex-emacs.c b/src/regex-emacs.c
index 394ba22e9b0..52f240bdaf6 100644
--- a/src/regex-emacs.c
+++ b/src/regex-emacs.c
@@ -3643,13 +3643,111 @@ execute_charset (re_char **pp, int c, int corig, bool unibyte,
3643 return not; 3643 return not;
3644} 3644}
3645 3645
3646/* Case where `p2` points to an `exactn`. */
3647static bool
3648mutually_exclusive_exactn (struct re_pattern_buffer *bufp, re_char *p1,
3649 re_char *p2)
3650{
3651 bool multibyte = RE_MULTIBYTE_P (bufp);
3652 int c
3653 = (re_opcode_t) *p2 == endline ? '\n'
3654 : RE_STRING_CHAR (p2 + 2, multibyte);
3655
3656 if ((re_opcode_t) *p1 == exactn)
3657 {
3658 if (c != RE_STRING_CHAR (p1 + 2, multibyte))
3659 {
3660 DEBUG_PRINT (" '%c' != '%c' => fast loop.\n", c, p1[2]);
3661 return true;
3662 }
3663 }
3664
3665 else if ((re_opcode_t) *p1 == charset
3666 || (re_opcode_t) *p1 == charset_not)
3667 {
3668 if (!execute_charset (&p1, c, c, !multibyte || ASCII_CHAR_P (c),
3669 Qnil))
3670 {
3671 DEBUG_PRINT (" No match => fast loop.\n");
3672 return true;
3673 }
3674 }
3675 else if ((re_opcode_t) *p1 == anychar
3676 && c == '\n')
3677 {
3678 DEBUG_PRINT (" . != \\n => fast loop.\n");
3679 return true;
3680 }
3681 return false;
3682}
3683
3684/* Case where `p2` points to an `charset`. */
3685static bool
3686mutually_exclusive_charset (struct re_pattern_buffer *bufp, re_char *p1,
3687 re_char *p2)
3688{
3689 /* It is hard to list up all the character in charset
3690 P2 if it includes multibyte character. Give up in
3691 such case. */
3692 if (!RE_MULTIBYTE_P (bufp) || !CHARSET_RANGE_TABLE_EXISTS_P (p2))
3693 {
3694 /* Now, we are sure that P2 has no range table.
3695 So, for the size of bitmap in P2, 'p2[1]' is
3696 enough. But P1 may have range table, so the
3697 size of bitmap table of P1 is extracted by
3698 using macro 'CHARSET_BITMAP_SIZE'.
3699
3700 In a multibyte case, we know that all the character
3701 listed in P2 is ASCII. In a unibyte case, P1 has only a
3702 bitmap table. So, in both cases, it is enough to test
3703 only the bitmap table of P1. */
3704
3705 if ((re_opcode_t) *p1 == charset)
3706 {
3707 int idx;
3708 /* We win if the charset inside the loop
3709 has no overlap with the one after the loop. */
3710 for (idx = 0;
3711 (idx < (int) p2[1]
3712 && idx < CHARSET_BITMAP_SIZE (p1));
3713 idx++)
3714 if ((p2[2 + idx] & p1[2 + idx]) != 0)
3715 break;
3716
3717 if (idx == p2[1]
3718 || idx == CHARSET_BITMAP_SIZE (p1))
3719 {
3720 DEBUG_PRINT (" No match => fast loop.\n");
3721 return true;
3722 }
3723 }
3724 else if ((re_opcode_t) *p1 == charset_not)
3725 {
3726 int idx;
3727 /* We win if the charset_not inside the loop lists
3728 every character listed in the charset after. */
3729 for (idx = 0; idx < (int) p2[1]; idx++)
3730 if (! (p2[2 + idx] == 0
3731 || (idx < CHARSET_BITMAP_SIZE (p1)
3732 && ((p2[2 + idx] & ~ p1[2 + idx]) == 0))))
3733 break;
3734
3735 if (idx == p2[1])
3736 {
3737 DEBUG_PRINT (" No match => fast loop.\n");
3738 return true;
3739 }
3740 }
3741 }
3742 return false;
3743}
3744
3646/* True if "p1 matches something" implies "p2 fails". */ 3745/* True if "p1 matches something" implies "p2 fails". */
3647static bool 3746static bool
3648mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1, 3747mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1,
3649 re_char *p2) 3748 re_char *p2)
3650{ 3749{
3651 re_opcode_t op2; 3750 re_opcode_t op2;
3652 bool multibyte = RE_MULTIBYTE_P (bufp);
3653 unsigned char *pend = bufp->buffer + bufp->used; 3751 unsigned char *pend = bufp->buffer + bufp->used;
3654 re_char *p2_orig = p2; 3752 re_char *p2_orig = p2;
3655 3753
@@ -3684,98 +3782,14 @@ mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1,
3684 3782
3685 case endline: 3783 case endline:
3686 case exactn: 3784 case exactn:
3687 { 3785 return mutually_exclusive_exactn (bufp, p1, p2);
3688 int c
3689 = (re_opcode_t) *p2 == endline ? '\n'
3690 : RE_STRING_CHAR (p2 + 2, multibyte);
3691
3692 if ((re_opcode_t) *p1 == exactn)
3693 {
3694 if (c != RE_STRING_CHAR (p1 + 2, multibyte))
3695 {
3696 DEBUG_PRINT (" '%c' != '%c' => fast loop.\n", c, p1[2]);
3697 return true;
3698 }
3699 }
3700
3701 else if ((re_opcode_t) *p1 == charset
3702 || (re_opcode_t) *p1 == charset_not)
3703 {
3704 if (!execute_charset (&p1, c, c, !multibyte || ASCII_CHAR_P (c),
3705 Qnil))
3706 {
3707 DEBUG_PRINT (" No match => fast loop.\n");
3708 return true;
3709 }
3710 }
3711 else if ((re_opcode_t) *p1 == anychar
3712 && c == '\n')
3713 {
3714 DEBUG_PRINT (" . != \\n => fast loop.\n");
3715 return true;
3716 }
3717 }
3718 break;
3719 3786
3720 case charset: 3787 case charset:
3721 { 3788 {
3722 if ((re_opcode_t) *p1 == exactn) 3789 if ((re_opcode_t) *p1 == exactn)
3723 /* Reuse the code above. */ 3790 return mutually_exclusive_exactn (bufp, p2, p1);
3724 return mutually_exclusive_p (bufp, p2, p1); 3791 else
3725 3792 return mutually_exclusive_charset (bufp, p1, p2);
3726 /* It is hard to list up all the character in charset
3727 P2 if it includes multibyte character. Give up in
3728 such case. */
3729 else if (!multibyte || !CHARSET_RANGE_TABLE_EXISTS_P (p2))
3730 {
3731 /* Now, we are sure that P2 has no range table.
3732 So, for the size of bitmap in P2, 'p2[1]' is
3733 enough. But P1 may have range table, so the
3734 size of bitmap table of P1 is extracted by
3735 using macro 'CHARSET_BITMAP_SIZE'.
3736
3737 In a multibyte case, we know that all the character
3738 listed in P2 is ASCII. In a unibyte case, P1 has only a
3739 bitmap table. So, in both cases, it is enough to test
3740 only the bitmap table of P1. */
3741
3742 if ((re_opcode_t) *p1 == charset)
3743 {
3744 int idx;
3745 /* We win if the charset inside the loop
3746 has no overlap with the one after the loop. */
3747 for (idx = 0;
3748 (idx < (int) p2[1]
3749 && idx < CHARSET_BITMAP_SIZE (p1));
3750 idx++)
3751 if ((p2[2 + idx] & p1[2 + idx]) != 0)
3752 break;
3753
3754 if (idx == p2[1]
3755 || idx == CHARSET_BITMAP_SIZE (p1))
3756 {
3757 DEBUG_PRINT (" No match => fast loop.\n");
3758 return true;
3759 }
3760 }
3761 else if ((re_opcode_t) *p1 == charset_not)
3762 {
3763 int idx;
3764 /* We win if the charset_not inside the loop lists
3765 every character listed in the charset after. */
3766 for (idx = 0; idx < (int) p2[1]; idx++)
3767 if (! (p2[2 + idx] == 0
3768 || (idx < CHARSET_BITMAP_SIZE (p1)
3769 && ((p2[2 + idx] & ~ p1[2 + idx]) == 0))))
3770 break;
3771
3772 if (idx == p2[1])
3773 {
3774 DEBUG_PRINT (" No match => fast loop.\n");
3775 return true;
3776 }
3777 }
3778 }
3779 } 3793 }
3780 break; 3794 break;
3781 3795
@@ -3783,9 +3797,9 @@ mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1,
3783 switch (*p1) 3797 switch (*p1)
3784 { 3798 {
3785 case exactn: 3799 case exactn:
3800 return mutually_exclusive_exactn (bufp, p2, p1);
3786 case charset: 3801 case charset:
3787 /* Reuse the code above. */ 3802 return mutually_exclusive_charset (bufp, p2, p1);
3788 return mutually_exclusive_p (bufp, p2, p1);
3789 case charset_not: 3803 case charset_not:
3790 /* When we have two charset_not, it's very unlikely that 3804 /* When we have two charset_not, it's very unlikely that
3791 they don't overlap. The union of the two sets of excluded 3805 they don't overlap. The union of the two sets of excluded