aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStefan Monnier2023-09-29 17:39:10 -0400
committerStefan Monnier2023-09-29 17:39:10 -0400
commite61a03984335b4ffb164280b2df80668b2a92c23 (patch)
tree561cddddaf41c7ea3f44466f7881b5f927f69173
parent6e44d6e18438ea2665ae6252a6ec090963dd7e42 (diff)
downloademacs-e61a03984335b4ffb164280b2df80668b2a92c23.tar.gz
emacs-e61a03984335b4ffb164280b2df80668b2a92c23.zip
regex.c: Consolidate the two analysis functions
We currently have two functions that analyze the bytecode to try and apply optimizations: `analyze_first` and `mutually_exclusive_p`. Extract the common code between them into a new function `forall_firstchar`, and then rewrite the old ones on top of that one. Along the way, we get slightly better analyses that reverts the recent de-optimizations but without re-introducing the corresponding bugs. * src/regex-emacs.c (forall_firstchar_1, forall_firstchar): New functions. (analyze_first_old): Rename from `analyze_first`. (struct anafirst_data): New struct. (analyze_first_fastmap, analyze_first_null): New functions. (analyze_first): Rewrite to use `forall_firstchar` with those two functions. Take a `bufp` rather than a `multibyte` arg. (regex_compile, re_compile_fastmap): Adjust calls accordingly. (struct mutexcl_data): New struct. (mutually_exclusive_one): New function. (mutually_exclusive_p): Rewrite to use `forall_firstchar` with that function.
-rw-r--r--src/regex-emacs.c591
1 files changed, 580 insertions, 11 deletions
diff --git a/src/regex-emacs.c b/src/regex-emacs.c
index 33bdad5c46b..dbd5bcb8953 100644
--- a/src/regex-emacs.c
+++ b/src/regex-emacs.c
@@ -1179,8 +1179,8 @@ static void insert_op2 (re_opcode_t op, unsigned char *loc,
1179static bool at_begline_loc_p (re_char *pattern, re_char *p); 1179static bool at_begline_loc_p (re_char *pattern, re_char *p);
1180static bool at_endline_loc_p (re_char *p, re_char *pend); 1180static bool at_endline_loc_p (re_char *p, re_char *pend);
1181static re_char *skip_one_char (re_char *p); 1181static re_char *skip_one_char (re_char *p);
1182static bool analyze_first (re_char *p, re_char *pend, 1182static bool analyze_first (struct re_pattern_buffer *bufp,
1183 char *fastmap, bool multibyte); 1183 re_char *p, re_char *pend, char *fastmap);
1184 1184
1185/* Fetch the next character in the uncompiled pattern, with no 1185/* Fetch the next character in the uncompiled pattern, with no
1186 translation. */ 1186 translation. */
@@ -1930,7 +1930,7 @@ regex_compile (re_char *pattern, ptrdiff_t size,
1930 ptrdiff_t startoffset = 0; 1930 ptrdiff_t startoffset = 0;
1931 re_opcode_t ofj = 1931 re_opcode_t ofj =
1932 /* Check if the loop can match the empty string. */ 1932 /* Check if the loop can match the empty string. */
1933 (simple || !analyze_first (laststart, b, NULL, false)) 1933 (simple || !analyze_first (bufp, laststart, b, NULL))
1934 ? on_failure_jump : on_failure_jump_loop; 1934 ? on_failure_jump : on_failure_jump_loop;
1935 eassert (skip_one_char (laststart) <= b); 1935 eassert (skip_one_char (laststart) <= b);
1936 1936
@@ -1987,7 +1987,7 @@ regex_compile (re_char *pattern, ptrdiff_t size,
1987 GET_BUFFER_SPACE (7); /* We might use less. */ 1987 GET_BUFFER_SPACE (7); /* We might use less. */
1988 if (many_times_ok) 1988 if (many_times_ok)
1989 { 1989 {
1990 bool emptyp = analyze_first (laststart, b, NULL, false); 1990 bool emptyp = analyze_first (bufp, laststart, b, NULL);
1991 1991
1992 /* The non-greedy multiple match looks like 1992 /* The non-greedy multiple match looks like
1993 a repeat..until: we only need a conditional jump 1993 a repeat..until: we only need a conditional jump
@@ -2822,7 +2822,229 @@ group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum)
2822 return false; 2822 return false;
2823} 2823}
2824 2824
2825/* analyze_first. 2825/* Iterate through all the char-matching operations directly reachable from P.
2826 This is the inner loop of `forall_firstchar`, which see.
2827 LOOP_BEG..LOOP_END delimit the currentl "block" of code (we assume
2828 the code is made of syntactically nested loops).
2829 LOOP_END is blindly assumed to be "safe".
2830 To guarantee termination, at each iteration, either LOOP_BEG should
2831 get bigger, or it should stay the same and P should get bigger. */
2832static bool
2833forall_firstchar_1 (re_char *p, re_char *pend,
2834 re_char *loop_beg, re_char *loop_end,
2835 bool f (const re_char *p, void *arg), void *arg)
2836{
2837 eassert (p >= loop_beg);
2838 eassert (p <= loop_end);
2839
2840 while (true)
2841 {
2842 re_char *newp1, *newp2, *tmp;
2843 re_char *p_orig = p;
2844
2845 if (p == pend)
2846 return false;
2847 else if (p == loop_end)
2848 return true;
2849 else if (p > loop_end)
2850 {
2851#if ENABLE_CHECKING
2852 fprintf (stderr, "FORALL_FIRSTCHAR: Broken assumption1!!\n");
2853#endif
2854 return false; /* FIXME: Broken assumption about the code shape. */
2855 }
2856 else
2857 switch (*p)
2858 {
2859 /* Cases which stop the iteration. */
2860 case succeed:
2861 case exactn:
2862 case charset:
2863 case charset_not:
2864 case anychar:
2865 case syntaxspec:
2866 case notsyntaxspec:
2867 case categoryspec:
2868 case notcategoryspec:
2869 return f (p, arg);
2870
2871 /* Cases which may match the empty string. */
2872 case at_dot:
2873 case begbuf:
2874 case no_op:
2875 case wordbound:
2876 case notwordbound:
2877 case begline:
2878 p++;
2879 continue;
2880
2881 /* Cases which may match the empty string and may
2882 tell us something about the next char. */
2883 case endline:
2884 case endbuf:
2885 case wordbeg:
2886 case wordend:
2887 case symbeg:
2888 case symend:
2889 if (f (p, arg))
2890 return true;
2891 p++;
2892 continue;
2893
2894 case jump:
2895 case jump_n:
2896 newp1 = extract_address (p + 1);
2897 if (newp1 > p)
2898 { /* Forward jump, boring. */
2899 p = newp1;
2900 continue;
2901 }
2902 switch (*newp1)
2903 {
2904 case on_failure_jump:
2905 case on_failure_keep_string_jump:
2906 case on_failure_jump_nastyloop:
2907 case on_failure_jump_loop:
2908 case on_failure_jump_smart:
2909 case succeed_n:
2910 newp2 = extract_address (newp1 + 1);
2911 goto do_twoway_jump;
2912 default:
2913 newp2 = loop_end; /* "Safe" choice. */
2914 goto do_jump;
2915 }
2916
2917 case on_failure_jump:
2918 case on_failure_keep_string_jump:
2919 case on_failure_jump_nastyloop:
2920 case on_failure_jump_loop:
2921 case on_failure_jump_smart:
2922 newp1 = extract_address (p + 1);
2923 newp2 = p + 3;
2924 /* For `+` loops, we often have an `on_failure_jump` that skips
2925 forward over a subsequent `jump`. Recognize this pattern
2926 since that subsequent `jump` is the one that jumps to the
2927 loop-entry. */
2928 newp2 = ((re_opcode_t) *newp2 == jump)
2929 ? extract_address (newp2 + 1) : newp2;
2930
2931 do_twoway_jump:
2932 /* We have to check that both destinations are safe.
2933 Arrange for `newp1` to be the smaller of the two. */
2934 if (newp1 > newp2)
2935 (tmp = newp1, newp1 = newp2, newp2 = tmp);
2936
2937 if (newp2 <= p_orig) /* Both destinations go backward! */
2938 {
2939#if ENABLE_CHECKING
2940 fprintf (stderr, "FORALL_FIRSTCHAR: Broken assumption2!!\n");
2941#endif
2942 return false;
2943 }
2944
2945 if (!forall_firstchar_1 (newp2, pend, loop_beg, loop_end, f, arg))
2946 return false;
2947
2948 do_jump:
2949 eassert (newp2 <= loop_end);
2950 if (newp1 <= p_orig)
2951 {
2952 if (newp1 < loop_beg)
2953 {
2954#if ENABLE_CHECKING
2955 fprintf (stderr, "FORALL_FIRSTCHAR: Broken assumption3!!\n");
2956#endif
2957 return false;
2958 }
2959 else if (newp1 == loop_beg)
2960 /* If we jump backward to the entry point of the current loop
2961 it means it's a zero-length cycle through that loop;
2962 this cycle itself does not break safety. */
2963 return true;
2964 else
2965 /* We jump backward to a new loop, nested within the current
2966 one. `newp1` is the entry point and `newp2` the exit of
2967 that inner loop. */
2968 /* `p` gets smaller, but termination is still ensured because
2969 `loop_beg` gets bigger. */
2970 (loop_beg = newp1, loop_end = newp2);
2971 }
2972 p = newp1;
2973 continue;
2974
2975 case succeed_n:
2976 newp1 = extract_address (p + 1);
2977 newp2 = p + 5; /* Skip the two bytes containing the count. */
2978 goto do_twoway_jump;
2979
2980 case set_number_at:
2981 int offset = extract_number (p + 1);
2982 DEBUG_STATEMENT (eassert (extract_number (p + 3)));
2983 /* If we're setting the counter of an immediately following
2984 `succeed_n`, then this next execution of `succeed_n` will do
2985 nothing but decrement its counter and "fall through".
2986 So we do the fall through here to avoid considering the
2987 "on failure" part of the `succeed_n` which should only be
2988 considered when coming from the `jump(_n)` at the end of
2989 the loop. */
2990 p += (offset == 5 && p[5] == succeed_n) ? 10 : 5;
2991 continue;
2992
2993 case start_memory:
2994 case stop_memory:
2995 p += 2;
2996 continue;
2997
2998 /* This could match the empty string, so we may need to continue,
2999 but in most cases, this can match "anything", so we should
3000 return `false` unless told otherwise. */
3001 case duplicate:
3002 if (!f (p, arg))
3003 return false;
3004 p += 2;
3005 continue;
3006
3007 default:
3008 abort (); /* We have listed all the cases. */
3009 }
3010 }
3011}
3012
3013/* Iterate through all the char-matching operations directly reachable from P.
3014 Return true if P is "safe", meaning that PEND cannot be reached directly
3015 from P and all calls to F returned true.
3016 Return false if PEND *may* be directly reachable from P or if one of
3017 the calls to F returned false.
3018 PEND can be NULL (and hence never reachable).
3019
3020 Call `F (POS, ARG)` for every POS directly reachable from P,
3021 before reaching PEND, where POS is the position of a char-matching
3022 operation (`exactn`, `charset`, ...).
3023
3024 For operations that match the empty string (`wordbeg`, ...), if F
3025 returns true we stop going down that path immediately but if it returns
3026 false we don't consider it as a failure and we simply look for the
3027 next char-matching operations on that path.
3028 For `duplicate`, it is the reverse: a false is an immediate failure
3029 whereas a true just lets the analysis continue with the rest of the path.
3030
3031 This function can be used while building the bytecode (in which case
3032 you should pass NULL for bufp), but if so, P and PEND need to delimit
3033 a valid block such that there is not jump to a location outside
3034 of [P...PEND]. */
3035static bool
3036forall_firstchar (struct re_pattern_buffer *bufp, re_char *p, re_char *pend,
3037 bool f (re_char *p, void *arg), void *arg)
3038{
3039 eassert (!bufp || bufp->used);
3040 eassert (pend || bufp->used);
3041 return forall_firstchar_1 (p, pend,
3042 bufp ? bufp->buffer - 1 : p,
3043 bufp ? bufp->buffer + bufp->used + 1 : pend,
3044 f, arg);
3045}
3046
3047/* analyze_first_old.
2826 If fastmap is non-NULL, go through the pattern and fill fastmap 3048 If fastmap is non-NULL, go through the pattern and fill fastmap
2827 with all the possible leading chars. If fastmap is NULL, don't 3049 with all the possible leading chars. If fastmap is NULL, don't
2828 bother filling it up (obviously) and only return whether the 3050 bother filling it up (obviously) and only return whether the
@@ -2833,7 +3055,7 @@ group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum)
2833 or if fastmap was not updated accurately. */ 3055 or if fastmap was not updated accurately. */
2834 3056
2835static bool 3057static bool
2836analyze_first (re_char *p, re_char *pend, char *fastmap, bool multibyte) 3058analyze_first_old (re_char *p, re_char *pend, char *fastmap, bool multibyte)
2837{ 3059{
2838 int j, k; 3060 int j, k;
2839 int nbits; 3061 int nbits;
@@ -3079,7 +3301,7 @@ analyze_first (re_char *p, re_char *pend, char *fastmap, bool multibyte)
3079 { /* We have to look down both arms. 3301 { /* We have to look down both arms.
3080 We first go down the "straight" path so as to minimize 3302 We first go down the "straight" path so as to minimize
3081 stack usage when going through alternatives. */ 3303 stack usage when going through alternatives. */
3082 bool r = analyze_first (p, pend, fastmap, multibyte); 3304 bool r = analyze_first_old (p, pend, fastmap, multibyte);
3083 if (r) return r; 3305 if (r) return r;
3084 p += j; 3306 p += j;
3085 } 3307 }
@@ -3133,7 +3355,263 @@ analyze_first (re_char *p, re_char *pend, char *fastmap, bool multibyte)
3133 /* We reached the end without matching anything. */ 3355 /* We reached the end without matching anything. */
3134 return true; 3356 return true;
3135 3357
3136} /* analyze_first */ 3358} /* analyze_first_old */
3359
3360struct anafirst_data {
3361 bool multibyte;
3362 char *fastmap;
3363 bool match_any_multibyte_characters;
3364};
3365
3366static bool
3367analyze_first_fastmap (const re_char *p, void *arg)
3368{
3369 struct anafirst_data *data = arg;
3370
3371 int j, k;
3372 int nbits;
3373 bool not;
3374
3375 switch (*p)
3376 {
3377 case succeed:
3378 return false;
3379
3380 case duplicate:
3381 /* If the first character has to match a backreference, that means
3382 that the group was empty (since it already matched). Since this
3383 is the only case that interests us here, we can assume that the
3384 backreference must match the empty string and we need to
3385 build the fastmap from the rest of the path. */
3386 return true;
3387
3388 /* Following are the cases which match a character. These end
3389 with 'break'. */
3390
3391 case exactn:
3392 p++;
3393 /* If multibyte is nonzero, the first byte of each
3394 character is an ASCII or a leading code. Otherwise,
3395 each byte is a character. Thus, this works in both
3396 cases. */
3397 data->fastmap[p[1]] = 1;
3398 if (data->multibyte)
3399 {
3400 /* Cover the case of matching a raw char in a
3401 multibyte regexp against unibyte. */
3402 if (CHAR_BYTE8_HEAD_P (p[1]))
3403 data->fastmap[CHAR_TO_BYTE8 (STRING_CHAR (p + 1))] = 1;
3404 }
3405 else
3406 {
3407 /* For the case of matching this unibyte regex
3408 against multibyte, we must set a leading code of
3409 the corresponding multibyte character. */
3410 int c = RE_CHAR_TO_MULTIBYTE (p[1]);
3411
3412 data->fastmap[CHAR_LEADING_CODE (c)] = 1;
3413 }
3414 return true;
3415
3416 case anychar:
3417 /* We could put all the chars except for \n (and maybe \0)
3418 but we don't bother since it is generally not worth it. */
3419 return false;
3420
3421 case charset_not:
3422 {
3423 /* Chars beyond end of bitmap are possible matches. */
3424 for (j = CHARSET_BITMAP_SIZE (p) * BYTEWIDTH;
3425 j < (1 << BYTEWIDTH); j++)
3426 data->fastmap[j] = 1;
3427 }
3428 FALLTHROUGH;
3429 case charset:
3430 not = (re_opcode_t) *(p) == charset_not;
3431 nbits = CHARSET_BITMAP_SIZE (p) * BYTEWIDTH;
3432 p += 2;
3433 for (j = 0; j < nbits; j++)
3434 if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not)
3435 data->fastmap[j] = 1;
3436
3437 /* To match raw bytes (in the 80..ff range) against multibyte
3438 strings, add their leading bytes to the fastmap. */
3439 for (j = 0x80; j < nbits; j++)
3440 if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not)
3441 data->fastmap[CHAR_LEADING_CODE (BYTE8_TO_CHAR (j))] = 1;
3442
3443 if (/* Any leading code can possibly start a character
3444 which doesn't match the specified set of characters. */
3445 not
3446 ||
3447 /* If we can match a character class, we can match any
3448 multibyte characters. */
3449 (CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
3450 && CHARSET_RANGE_TABLE_BITS (&p[-2]) != 0))
3451
3452 {
3453 if (!data->match_any_multibyte_characters)
3454 {
3455 for (j = MIN_MULTIBYTE_LEADING_CODE;
3456 j <= MAX_MULTIBYTE_LEADING_CODE; j++)
3457 data->fastmap[j] = 1;
3458 data->match_any_multibyte_characters = true;
3459 }
3460 }
3461
3462 else if (!not && CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
3463 && data->match_any_multibyte_characters == false)
3464 {
3465 /* Set fastmap[I] to 1 where I is a leading code of each
3466 multibyte character in the range table. */
3467 int c, count;
3468 unsigned char lc1, lc2;
3469
3470 /* Make P points the range table. '+ 2' is to skip flag
3471 bits for a character class. */
3472 p += CHARSET_BITMAP_SIZE (&p[-2]) + 2;
3473
3474 /* Extract the number of ranges in range table into COUNT. */
3475 EXTRACT_NUMBER_AND_INCR (count, p);
3476 for (; count > 0; count--, p += 3)
3477 {
3478 /* Extract the start and end of each range. */
3479 EXTRACT_CHARACTER (c, p);
3480 lc1 = CHAR_LEADING_CODE (c);
3481 p += 3;
3482 EXTRACT_CHARACTER (c, p);
3483 lc2 = CHAR_LEADING_CODE (c);
3484 for (j = lc1; j <= lc2; j++)
3485 data->fastmap[j] = 1;
3486 }
3487 }
3488 return true;
3489
3490 case syntaxspec:
3491 case notsyntaxspec:
3492 /* This match depends on text properties. These end with
3493 aborting optimizations. */
3494 return false;
3495
3496 case categoryspec:
3497 case notcategoryspec:
3498 not = (re_opcode_t)p[0] == notcategoryspec;
3499 p++;
3500 k = *p++;
3501 for (j = (1 << BYTEWIDTH); j >= 0; j--)
3502 if ((CHAR_HAS_CATEGORY (j, k)) ^ not)
3503 data->fastmap[j] = 1;
3504
3505 /* Any leading code can possibly start a character which
3506 has or doesn't has the_malloc_fn specified category. */
3507 if (!data->match_any_multibyte_characters)
3508 {
3509 for (j = MIN_MULTIBYTE_LEADING_CODE;
3510 j <= MAX_MULTIBYTE_LEADING_CODE; j++)
3511 data->fastmap[j] = 1;
3512 data->match_any_multibyte_characters = true;
3513 }
3514 return true;
3515
3516 case endline:
3517 case endbuf:
3518 case wordbeg:
3519 case wordend:
3520 case symbeg:
3521 case symend:
3522 /* This false doesn't mean failure but rather "not succeeded yet". */
3523 return false;
3524
3525 default:
3526#if ENABLE_CHECKING
3527 abort (); /* We have listed all the cases. */
3528#endif
3529 return false;
3530 }
3531}
3532
3533static bool
3534analyze_first_null (const re_char *p, void *arg)
3535{
3536 switch (*p)
3537 {
3538 case succeed:
3539 /* This is safe: we can't reach `pend` at all from here. */
3540 return true;
3541
3542 case duplicate:
3543 /* Either `duplicate` ends up matching a non-empty string, in which
3544 case we're good, or it matches the empty string, in which case we
3545 need to continue checking the rest of this path, which is exactly
3546 what returning `true` does, here. */
3547 return true;
3548
3549 case exactn:
3550 case anychar:
3551 case charset_not:
3552 case charset:
3553 case syntaxspec:
3554 case notsyntaxspec:
3555 case categoryspec:
3556 case notcategoryspec:
3557 return true;
3558
3559 case endline:
3560 case endbuf:
3561 case wordbeg:
3562 case wordend:
3563 case symbeg:
3564 case symend:
3565 /* This false doesn't mean failure but rather "not succeeded yet". */
3566 return false;
3567
3568 default:
3569#if ENABLE_CHECKING
3570 abort (); /* We have listed all the cases. */
3571#endif
3572 return false;
3573 }
3574}
3575
3576/* analyze_first.
3577 If fastmap is non-NULL, go through the pattern and fill fastmap
3578 with all the possible leading chars. If fastmap is NULL, don't
3579 bother filling it up (obviously) and only return whether the
3580 pattern could potentially match the empty string.
3581
3582 Return false if p matches at least one char before reaching pend.
3583 Return true if p..pend might match the empty string
3584 or if fastmap was not updated accurately. */
3585
3586static bool
3587analyze_first (struct re_pattern_buffer *bufp,
3588 re_char *p, re_char *pend, char *fastmap)
3589{
3590 eassert (pend);
3591 struct anafirst_data data = { bufp ? bufp->multibyte : false,
3592 fastmap, false };
3593 bool safe = forall_firstchar (bufp->used ? bufp : NULL, p, pend,
3594 fastmap ? analyze_first_fastmap
3595 : analyze_first_null,
3596 &data);
3597#if ENABLE_CHECKING
3598 bool old = !!analyze_first_old (p, pend, fastmap, data.multibyte);
3599 if (old && safe)
3600 {
3601 fprintf (stderr, "ANALYZE_FIRST: New optimization (fastmap=%d)!\n",
3602 fastmap ? 1 : 0);
3603 print_partial_compiled_pattern (stderr, p, pend);
3604 }
3605 else if (!old && !safe)
3606 {
3607 fprintf (stderr, "ANALYZE_FIRST: Lost an optimization (fastmap=%d)!\n",
3608 fastmap ? 1 : 0);
3609 print_partial_compiled_pattern (stderr, p, pend);
3610 }
3611#endif
3612 return !safe;
3613}
3614
3137 3615
3138/* Compute a fastmap for the compiled pattern in BUFP. 3616/* Compute a fastmap for the compiled pattern in BUFP.
3139 A fastmap records which of the (1 << BYTEWIDTH) possible 3617 A fastmap records which of the (1 << BYTEWIDTH) possible
@@ -3162,8 +3640,8 @@ re_compile_fastmap (struct re_pattern_buffer *bufp)
3162 /* FIXME: Is the following assignment correct even when ANALYSIS < 0? */ 3640 /* FIXME: Is the following assignment correct even when ANALYSIS < 0? */
3163 bufp->fastmap_accurate = 1; /* It will be when we're done. */ 3641 bufp->fastmap_accurate = 1; /* It will be when we're done. */
3164 3642
3165 bufp->can_be_null = analyze_first (bufp->buffer, bufp->buffer + bufp->used, 3643 bufp->can_be_null = analyze_first (bufp, bufp->buffer,
3166 fastmap, RE_MULTIBYTE_P (bufp)); 3644 bufp->buffer + bufp->used, fastmap);
3167} /* re_compile_fastmap */ 3645} /* re_compile_fastmap */
3168 3646
3169/* Set REGS to hold NUM_REGS registers, storing them in STARTS and 3647/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
@@ -3962,11 +4440,102 @@ mutually_exclusive_aux (struct re_pattern_buffer *bufp, re_char *p1,
3962 return false; 4440 return false;
3963} 4441}
3964 4442
4443struct mutexcl_data {
4444 struct re_pattern_buffer *bufp;
4445 re_char *p1;
4446};
4447
4448static bool
4449mutually_exclusive_one (re_char *p2, void *arg)
4450{
4451 struct mutexcl_data *data = arg;
4452 switch (*p2)
4453 {
4454 case endline:
4455 case exactn:
4456 return mutually_exclusive_exactn (data->bufp, data->p1, p2);
4457 case charset:
4458 {
4459 if (*data->p1 == exactn)
4460 return mutually_exclusive_exactn (data->bufp, p2, data->p1);
4461 else
4462 return mutually_exclusive_charset (data->bufp, data->p1, p2);
4463 }
4464
4465 case charset_not:
4466 switch (*data->p1)
4467 {
4468 case exactn:
4469 return mutually_exclusive_exactn (data->bufp, p2, data->p1);
4470 case charset:
4471 return mutually_exclusive_charset (data->bufp, p2, data->p1);
4472 case charset_not:
4473 /* When we have two charset_not, it's very unlikely that
4474 they don't overlap. The union of the two sets of excluded
4475 chars should cover all possible chars, which, as a matter of
4476 fact, is virtually impossible in multibyte buffers. */
4477 return false;
4478 }
4479 return false;
4480 case anychar:
4481 return false; /* FIXME: exactn \n ? */
4482 case syntaxspec:
4483 return (*data->p1 == notsyntaxspec && data->p1[1] == p2[1]);
4484 case notsyntaxspec:
4485 return (*data->p1 == syntaxspec && data->p1[1] == p2[1]);
4486 case categoryspec:
4487 return (*data->p1 == notcategoryspec && data->p1[1] == p2[1]);
4488 case notcategoryspec:
4489 return (*data->p1 == categoryspec && data->p1[1] == p2[1]);
4490
4491 case endbuf:
4492 case succeed:
4493 return true;
4494 case wordbeg:
4495 return (*data->p1 == notsyntaxspec && data->p1[1] == Sword);
4496 case wordend:
4497 return (*data->p1 == syntaxspec && data->p1[1] == Sword);
4498 case symbeg:
4499 return (*data->p1 == notsyntaxspec
4500 && (data->p1[1] == Ssymbol || data->p1[1] == Sword));
4501 case symend:
4502 return (*data->p1 == syntaxspec
4503 && (data->p1[1] == Ssymbol || data->p1[1] == Sword));
4504
4505 case duplicate:
4506 /* At this point, we know nothing about what this can match, sadly. */
4507 return false;
4508
4509 default:
4510#if ENABLE_CHECKING
4511 abort (); /* We have listed all the cases. */
4512#endif
4513 return false;
4514 }
4515}
4516
3965static bool 4517static bool
3966mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1, 4518mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1,
3967 re_char *p2) 4519 re_char *p2)
3968{ 4520{
3969 return mutually_exclusive_aux (bufp, p1, p2, bufp->buffer - 1, NULL); 4521 struct mutexcl_data data = { bufp, p1 };
4522 bool new = forall_firstchar (bufp, p2, NULL, mutually_exclusive_one, &data);
4523#if ENABLE_CHECKING
4524 bool old = mutually_exclusive_aux (bufp, p1, p2, bufp->buffer - 1, NULL);
4525 if (old && !new)
4526 {
4527 fprintf (stderr, "MUTUALLY_EXCLUSIVE_P: Lost an optimization between %d and %d!\n",
4528 (int)(p1 - bufp->buffer), (int)(p2 - bufp->buffer));
4529 print_partial_compiled_pattern (stderr, bufp->buffer, bufp->buffer + bufp->used);
4530 }
4531 else if (!old && new)
4532 {
4533 fprintf (stderr, "MUTUALLY_EXCLUSIVE_P: New optimization between %d and %d!\n",
4534 (int)(p1 - bufp->buffer), (int)(p2 - bufp->buffer));
4535 print_partial_compiled_pattern (stderr, bufp->buffer, bufp->buffer + bufp->used);
4536 }
4537#endif
4538 return new;
3970} 4539}
3971 4540
3972/* Matching routines. */ 4541/* Matching routines. */