diff options
| author | Stefan Monnier | 2023-09-15 14:16:48 -0400 |
|---|---|---|
| committer | Stefan Monnier | 2023-09-15 14:16:48 -0400 |
| commit | 9610aaeb9e5f3d572616f0742fca2f5e5abc141d (patch) | |
| tree | 6a34b9f6a866ca8cc9db49a183b39f5d582aeb4e /src | |
| parent | 1d952078c0c45fc095795294351a4a2ee7e6c253 (diff) | |
| download | emacs-9610aaeb9e5f3d572616f0742fca2f5e5abc141d.tar.gz emacs-9610aaeb9e5f3d572616f0742fca2f5e5abc141d.zip | |
* src/regex-emacs.c (mutually_exclusive_p): Refactor
Minor refactoring to avoid swapping p1/p2.
* src/regex-emacs.c (mutually_exclusive_exactn)
(mutually_exclusive_charset): New functions, extracted from
`mutually_exclusive_p`.
(mutually_exclusive_p): Use them.
Diffstat (limited to 'src')
| -rw-r--r-- | src/regex-emacs.c | 196 |
1 files changed, 105 insertions, 91 deletions
diff --git a/src/regex-emacs.c b/src/regex-emacs.c index 394ba22e9b0..52f240bdaf6 100644 --- a/src/regex-emacs.c +++ b/src/regex-emacs.c | |||
| @@ -3643,13 +3643,111 @@ execute_charset (re_char **pp, int c, int corig, bool unibyte, | |||
| 3643 | return not; | 3643 | return not; |
| 3644 | } | 3644 | } |
| 3645 | 3645 | ||
| 3646 | /* Case where `p2` points to an `exactn`. */ | ||
| 3647 | static bool | ||
| 3648 | mutually_exclusive_exactn (struct re_pattern_buffer *bufp, re_char *p1, | ||
| 3649 | re_char *p2) | ||
| 3650 | { | ||
| 3651 | bool multibyte = RE_MULTIBYTE_P (bufp); | ||
| 3652 | int c | ||
| 3653 | = (re_opcode_t) *p2 == endline ? '\n' | ||
| 3654 | : RE_STRING_CHAR (p2 + 2, multibyte); | ||
| 3655 | |||
| 3656 | if ((re_opcode_t) *p1 == exactn) | ||
| 3657 | { | ||
| 3658 | if (c != RE_STRING_CHAR (p1 + 2, multibyte)) | ||
| 3659 | { | ||
| 3660 | DEBUG_PRINT (" '%c' != '%c' => fast loop.\n", c, p1[2]); | ||
| 3661 | return true; | ||
| 3662 | } | ||
| 3663 | } | ||
| 3664 | |||
| 3665 | else if ((re_opcode_t) *p1 == charset | ||
| 3666 | || (re_opcode_t) *p1 == charset_not) | ||
| 3667 | { | ||
| 3668 | if (!execute_charset (&p1, c, c, !multibyte || ASCII_CHAR_P (c), | ||
| 3669 | Qnil)) | ||
| 3670 | { | ||
| 3671 | DEBUG_PRINT (" No match => fast loop.\n"); | ||
| 3672 | return true; | ||
| 3673 | } | ||
| 3674 | } | ||
| 3675 | else if ((re_opcode_t) *p1 == anychar | ||
| 3676 | && c == '\n') | ||
| 3677 | { | ||
| 3678 | DEBUG_PRINT (" . != \\n => fast loop.\n"); | ||
| 3679 | return true; | ||
| 3680 | } | ||
| 3681 | return false; | ||
| 3682 | } | ||
| 3683 | |||
| 3684 | /* Case where `p2` points to an `charset`. */ | ||
| 3685 | static bool | ||
| 3686 | mutually_exclusive_charset (struct re_pattern_buffer *bufp, re_char *p1, | ||
| 3687 | re_char *p2) | ||
| 3688 | { | ||
| 3689 | /* It is hard to list up all the character in charset | ||
| 3690 | P2 if it includes multibyte character. Give up in | ||
| 3691 | such case. */ | ||
| 3692 | if (!RE_MULTIBYTE_P (bufp) || !CHARSET_RANGE_TABLE_EXISTS_P (p2)) | ||
| 3693 | { | ||
| 3694 | /* Now, we are sure that P2 has no range table. | ||
| 3695 | So, for the size of bitmap in P2, 'p2[1]' is | ||
| 3696 | enough. But P1 may have range table, so the | ||
| 3697 | size of bitmap table of P1 is extracted by | ||
| 3698 | using macro 'CHARSET_BITMAP_SIZE'. | ||
| 3699 | |||
| 3700 | In a multibyte case, we know that all the character | ||
| 3701 | listed in P2 is ASCII. In a unibyte case, P1 has only a | ||
| 3702 | bitmap table. So, in both cases, it is enough to test | ||
| 3703 | only the bitmap table of P1. */ | ||
| 3704 | |||
| 3705 | if ((re_opcode_t) *p1 == charset) | ||
| 3706 | { | ||
| 3707 | int idx; | ||
| 3708 | /* We win if the charset inside the loop | ||
| 3709 | has no overlap with the one after the loop. */ | ||
| 3710 | for (idx = 0; | ||
| 3711 | (idx < (int) p2[1] | ||
| 3712 | && idx < CHARSET_BITMAP_SIZE (p1)); | ||
| 3713 | idx++) | ||
| 3714 | if ((p2[2 + idx] & p1[2 + idx]) != 0) | ||
| 3715 | break; | ||
| 3716 | |||
| 3717 | if (idx == p2[1] | ||
| 3718 | || idx == CHARSET_BITMAP_SIZE (p1)) | ||
| 3719 | { | ||
| 3720 | DEBUG_PRINT (" No match => fast loop.\n"); | ||
| 3721 | return true; | ||
| 3722 | } | ||
| 3723 | } | ||
| 3724 | else if ((re_opcode_t) *p1 == charset_not) | ||
| 3725 | { | ||
| 3726 | int idx; | ||
| 3727 | /* We win if the charset_not inside the loop lists | ||
| 3728 | every character listed in the charset after. */ | ||
| 3729 | for (idx = 0; idx < (int) p2[1]; idx++) | ||
| 3730 | if (! (p2[2 + idx] == 0 | ||
| 3731 | || (idx < CHARSET_BITMAP_SIZE (p1) | ||
| 3732 | && ((p2[2 + idx] & ~ p1[2 + idx]) == 0)))) | ||
| 3733 | break; | ||
| 3734 | |||
| 3735 | if (idx == p2[1]) | ||
| 3736 | { | ||
| 3737 | DEBUG_PRINT (" No match => fast loop.\n"); | ||
| 3738 | return true; | ||
| 3739 | } | ||
| 3740 | } | ||
| 3741 | } | ||
| 3742 | return false; | ||
| 3743 | } | ||
| 3744 | |||
| 3646 | /* True if "p1 matches something" implies "p2 fails". */ | 3745 | /* True if "p1 matches something" implies "p2 fails". */ |
| 3647 | static bool | 3746 | static bool |
| 3648 | mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1, | 3747 | mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1, |
| 3649 | re_char *p2) | 3748 | re_char *p2) |
| 3650 | { | 3749 | { |
| 3651 | re_opcode_t op2; | 3750 | re_opcode_t op2; |
| 3652 | bool multibyte = RE_MULTIBYTE_P (bufp); | ||
| 3653 | unsigned char *pend = bufp->buffer + bufp->used; | 3751 | unsigned char *pend = bufp->buffer + bufp->used; |
| 3654 | re_char *p2_orig = p2; | 3752 | re_char *p2_orig = p2; |
| 3655 | 3753 | ||
| @@ -3684,98 +3782,14 @@ mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1, | |||
| 3684 | 3782 | ||
| 3685 | case endline: | 3783 | case endline: |
| 3686 | case exactn: | 3784 | case exactn: |
| 3687 | { | 3785 | return mutually_exclusive_exactn (bufp, p1, p2); |
| 3688 | int c | ||
| 3689 | = (re_opcode_t) *p2 == endline ? '\n' | ||
| 3690 | : RE_STRING_CHAR (p2 + 2, multibyte); | ||
| 3691 | |||
| 3692 | if ((re_opcode_t) *p1 == exactn) | ||
| 3693 | { | ||
| 3694 | if (c != RE_STRING_CHAR (p1 + 2, multibyte)) | ||
| 3695 | { | ||
| 3696 | DEBUG_PRINT (" '%c' != '%c' => fast loop.\n", c, p1[2]); | ||
| 3697 | return true; | ||
| 3698 | } | ||
| 3699 | } | ||
| 3700 | |||
| 3701 | else if ((re_opcode_t) *p1 == charset | ||
| 3702 | || (re_opcode_t) *p1 == charset_not) | ||
| 3703 | { | ||
| 3704 | if (!execute_charset (&p1, c, c, !multibyte || ASCII_CHAR_P (c), | ||
| 3705 | Qnil)) | ||
| 3706 | { | ||
| 3707 | DEBUG_PRINT (" No match => fast loop.\n"); | ||
| 3708 | return true; | ||
| 3709 | } | ||
| 3710 | } | ||
| 3711 | else if ((re_opcode_t) *p1 == anychar | ||
| 3712 | && c == '\n') | ||
| 3713 | { | ||
| 3714 | DEBUG_PRINT (" . != \\n => fast loop.\n"); | ||
| 3715 | return true; | ||
| 3716 | } | ||
| 3717 | } | ||
| 3718 | break; | ||
| 3719 | 3786 | ||
| 3720 | case charset: | 3787 | case charset: |
| 3721 | { | 3788 | { |
| 3722 | if ((re_opcode_t) *p1 == exactn) | 3789 | if ((re_opcode_t) *p1 == exactn) |
| 3723 | /* Reuse the code above. */ | 3790 | return mutually_exclusive_exactn (bufp, p2, p1); |
| 3724 | return mutually_exclusive_p (bufp, p2, p1); | 3791 | else |
| 3725 | 3792 | return mutually_exclusive_charset (bufp, p1, p2); | |
| 3726 | /* It is hard to list up all the character in charset | ||
| 3727 | P2 if it includes multibyte character. Give up in | ||
| 3728 | such case. */ | ||
| 3729 | else if (!multibyte || !CHARSET_RANGE_TABLE_EXISTS_P (p2)) | ||
| 3730 | { | ||
| 3731 | /* Now, we are sure that P2 has no range table. | ||
| 3732 | So, for the size of bitmap in P2, 'p2[1]' is | ||
| 3733 | enough. But P1 may have range table, so the | ||
| 3734 | size of bitmap table of P1 is extracted by | ||
| 3735 | using macro 'CHARSET_BITMAP_SIZE'. | ||
| 3736 | |||
| 3737 | In a multibyte case, we know that all the character | ||
| 3738 | listed in P2 is ASCII. In a unibyte case, P1 has only a | ||
| 3739 | bitmap table. So, in both cases, it is enough to test | ||
| 3740 | only the bitmap table of P1. */ | ||
| 3741 | |||
| 3742 | if ((re_opcode_t) *p1 == charset) | ||
| 3743 | { | ||
| 3744 | int idx; | ||
| 3745 | /* We win if the charset inside the loop | ||
| 3746 | has no overlap with the one after the loop. */ | ||
| 3747 | for (idx = 0; | ||
| 3748 | (idx < (int) p2[1] | ||
| 3749 | && idx < CHARSET_BITMAP_SIZE (p1)); | ||
| 3750 | idx++) | ||
| 3751 | if ((p2[2 + idx] & p1[2 + idx]) != 0) | ||
| 3752 | break; | ||
| 3753 | |||
| 3754 | if (idx == p2[1] | ||
| 3755 | || idx == CHARSET_BITMAP_SIZE (p1)) | ||
| 3756 | { | ||
| 3757 | DEBUG_PRINT (" No match => fast loop.\n"); | ||
| 3758 | return true; | ||
| 3759 | } | ||
| 3760 | } | ||
| 3761 | else if ((re_opcode_t) *p1 == charset_not) | ||
| 3762 | { | ||
| 3763 | int idx; | ||
| 3764 | /* We win if the charset_not inside the loop lists | ||
| 3765 | every character listed in the charset after. */ | ||
| 3766 | for (idx = 0; idx < (int) p2[1]; idx++) | ||
| 3767 | if (! (p2[2 + idx] == 0 | ||
| 3768 | || (idx < CHARSET_BITMAP_SIZE (p1) | ||
| 3769 | && ((p2[2 + idx] & ~ p1[2 + idx]) == 0)))) | ||
| 3770 | break; | ||
| 3771 | |||
| 3772 | if (idx == p2[1]) | ||
| 3773 | { | ||
| 3774 | DEBUG_PRINT (" No match => fast loop.\n"); | ||
| 3775 | return true; | ||
| 3776 | } | ||
| 3777 | } | ||
| 3778 | } | ||
| 3779 | } | 3793 | } |
| 3780 | break; | 3794 | break; |
| 3781 | 3795 | ||
| @@ -3783,9 +3797,9 @@ mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1, | |||
| 3783 | switch (*p1) | 3797 | switch (*p1) |
| 3784 | { | 3798 | { |
| 3785 | case exactn: | 3799 | case exactn: |
| 3800 | return mutually_exclusive_exactn (bufp, p2, p1); | ||
| 3786 | case charset: | 3801 | case charset: |
| 3787 | /* Reuse the code above. */ | 3802 | return mutually_exclusive_charset (bufp, p2, p1); |
| 3788 | return mutually_exclusive_p (bufp, p2, p1); | ||
| 3789 | case charset_not: | 3803 | case charset_not: |
| 3790 | /* When we have two charset_not, it's very unlikely that | 3804 | /* When we have two charset_not, it's very unlikely that |
| 3791 | they don't overlap. The union of the two sets of excluded | 3805 | they don't overlap. The union of the two sets of excluded |