1 files changed, 3 insertions, 562 deletions
diff --git a/src/regex-emacs.c b/src/regex-emacs.c
index dbd5bcb8953..79e9f2c105a 100644
--- a/src/regex-emacs.c
+++ b/src/regex-emacs.c
@@ -3044,319 +3044,6 @@ forall_firstchar (struct re_pattern_buffer *bufp, re_char *p, re_char *pend,
                             f, arg);
 }
-/* analyze_first_old.
-   If fastmap is non-NULL, go through the pattern and fill fastmap
-   with all the possible leading chars.  If fastmap is NULL, don't
-   bother filling it up (obviously) and only return whether the
-   pattern could potentially match the empty string.
-   Return false if p..pend matches at least one char.
-   Return true  if p..pend might match the empty string
-                or if fastmap was not updated accurately.  */
-static bool
-analyze_first_old (re_char *p, re_char *pend, char *fastmap, bool multibyte)
-{
-  int j, k;
-  int nbits;
-  bool not;
-  /* If all elements for base leading-codes in fastmap is set, this
-     flag is set true.  */
-  bool match_any_multibyte_characters = false;
-  eassert (p);
-  /* The loop below works as follows:
-     - It has a working-list kept in the PATTERN_STACK and which basically
-       starts by only containing a pointer to the first operation.
-     - If the opcode we're looking at is a match against some set of
-       chars, then we add those chars to the fastmap and go on to the
-       next work element from the worklist (done via 'break').
-     - If the opcode is a control operator on the other hand, we either
-       ignore it (if it's meaningless at this point, such as 'start_memory')
-       or execute it (if it's a jump).  If the jump has several destinations
-       (i.e. 'on_failure_jump'), then we push the other destination onto the
-       worklist.
-     We guarantee termination by ignoring backward jumps (more or less),
-     so that P is monotonically increasing.  More to the point, we
-     never set P (or push) anything '<= p1'.  */
-  while (p < pend)
-    {
-      /* P1 is used as a marker of how far back a 'on_failure_jump'
-         can go without being ignored.  It is normally equal to P
-         (which prevents any backward 'on_failure_jump') except right
-         after a plain 'jump', to allow patterns such as:
-            0: jump 10
-            3..9: <body>
-            10: on_failure_jump 3
-         as used for the *? operator.  */
-      re_char *p1 = p;
-      switch (*p++)
-        {
-        case succeed:
-          return true;
-        case duplicate:
-          /* If the first character has to match a backreference, that means
-             that the group was empty (since it already matched).  Since this
-             is the only case that interests us here, we can assume that the
-             backreference must match the empty string.  */
-          /* Note: Only true if we started from bufp->buffer (e.g. when
-             fastmap is non-NULL), but when fastmap is NULL we're only
-             trying to figure out if the body of a loop can possibly match
-             the empty string so it's safe (tho sometimes pessimistic)
-             to presume that `duplicate` can.  */
-          p++;
-          continue;
-      /* Following are the cases which match a character.  These end
-         with 'break'.  */
-        case exactn:
-          if (fastmap)
-            {
-              /* If multibyte is nonzero, the first byte of each
-                 character is an ASCII or a leading code.  Otherwise,
-                 each byte is a character.  Thus, this works in both
-                 cases. */
-              fastmap[p[1]] = 1;
-              if (multibyte)
-                {
-                  /* Cover the case of matching a raw char in a
-                     multibyte regexp against unibyte.  */
-                  if (CHAR_BYTE8_HEAD_P (p[1]))
-                    fastmap[CHAR_TO_BYTE8 (STRING_CHAR (p + 1))] = 1;
-                }
-              else
-                {
-                  /* For the case of matching this unibyte regex
-                     against multibyte, we must set a leading code of
-                     the corresponding multibyte character.  */
-                  int c = RE_CHAR_TO_MULTIBYTE (p[1]);
-                  fastmap[CHAR_LEADING_CODE (c)] = 1;
-                }
-            }
-          break;
-        case anychar:
-          /* We could put all the chars except for \n (and maybe \0)
-             but we don't bother since it is generally not worth it.  */
-          if (!fastmap) break;
-          return true;
-        case charset_not:
-          if (!fastmap) break;
-          {
-            /* Chars beyond end of bitmap are possible matches.  */
-            for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH;
-                 j < (1 << BYTEWIDTH); j++)
-              fastmap[j] = 1;
-          }
-          FALLTHROUGH;
-        case charset:
-          if (!fastmap) break;
-          not = (re_opcode_t) *(p - 1) == charset_not;
-          nbits = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH;
-          p++;
-          for (j = 0; j < nbits; j++)
-            if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not)
-              fastmap[j] = 1;
-          /* To match raw bytes (in the 80..ff range) against multibyte
-             strings, add their leading bytes to the fastmap.  */
-          for (j = 0x80; j < nbits; j++)
-            if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not)
-              fastmap[CHAR_LEADING_CODE (BYTE8_TO_CHAR (j))] = 1;
-          if (/* Any leading code can possibly start a character
-                 which doesn't match the specified set of characters.  */
-              not
-              ||
-              /* If we can match a character class, we can match any
-                 multibyte characters.  */
-              (CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
-               && CHARSET_RANGE_TABLE_BITS (&p[-2]) != 0))
-            {
-              if (match_any_multibyte_characters == false)
-                {
-                  for (j = MIN_MULTIBYTE_LEADING_CODE;
-                       j <= MAX_MULTIBYTE_LEADING_CODE; j++)
-                    fastmap[j] = 1;
-                  match_any_multibyte_characters = true;
-                }
-            }
-          else if (!not && CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
-                   && match_any_multibyte_characters == false)
-            {
-              /* Set fastmap[I] to 1 where I is a leading code of each
-                 multibyte character in the range table. */
-              int c, count;
-              unsigned char lc1, lc2;
-              /* Make P points the range table.  '+ 2' is to skip flag
-                 bits for a character class.  */
-              p += CHARSET_BITMAP_SIZE (&p[-2]) + 2;
-              /* Extract the number of ranges in range table into COUNT.  */
-              EXTRACT_NUMBER_AND_INCR (count, p);
-              for (; count > 0; count--, p += 3)
-                {
-                  /* Extract the start and end of each range.  */
-                  EXTRACT_CHARACTER (c, p);
-                  lc1 = CHAR_LEADING_CODE (c);
-                  p += 3;
-                  EXTRACT_CHARACTER (c, p);
-                  lc2 = CHAR_LEADING_CODE (c);
-                  for (j = lc1; j <= lc2; j++)
-                    fastmap[j] = 1;
-                }
-            }
-          break;
-        case syntaxspec:
-        case notsyntaxspec:
-          if (!fastmap) break;
-          /* This match depends on text properties.  These end with
-             aborting optimizations.  */
-          return true;
-        case categoryspec:
-        case notcategoryspec:
-          if (!fastmap) break;
-          not = (re_opcode_t)p[-1] == notcategoryspec;
-          k = *p++;
-          for (j = (1 << BYTEWIDTH); j >= 0; j--)
-            if ((CHAR_HAS_CATEGORY (j, k)) ^ not)
-              fastmap[j] = 1;
-          /* Any leading code can possibly start a character which
-             has or doesn't has the specified category.  */
-          if (match_any_multibyte_characters == false)
-            {
-              for (j = MIN_MULTIBYTE_LEADING_CODE;
-                   j <= MAX_MULTIBYTE_LEADING_CODE; j++)
-                fastmap[j] = 1;
-              match_any_multibyte_characters = true;
-            }
-          break;
-      /* All cases after this match the empty string.  These end with
-         'continue'.  */
-        case at_dot:
-        case no_op:
-        case begline:
-        case endline:
-        case begbuf:
-        case endbuf:
-        case wordbound:
-        case notwordbound:
-        case wordbeg:
-        case wordend:
-        case symbeg:
-        case symend:
-          continue;
-        case jump:
-          EXTRACT_NUMBER_AND_INCR (j, p);
-          if (j < 0)
-            /* Backward jumps can only go back to code that we've already
-               visited.  're_compile' should make sure this is true.  */
-            break;
-          p += j;
-          switch (*p)
-            {
-            case on_failure_jump:
-            case on_failure_keep_string_jump:
-            case on_failure_jump_loop:
-            case on_failure_jump_nastyloop:
-            case on_failure_jump_smart:
-              p++;
-              break;
-            default:
-              continue;
-            };
-          /* Keep P1 to allow the 'on_failure_jump' we are jumping to
-             to jump back to "just after here".  */
-          FALLTHROUGH;
-        case on_failure_jump:
-        case on_failure_keep_string_jump:
-        case on_failure_jump_nastyloop:
-        case on_failure_jump_loop:
-        case on_failure_jump_smart:
-          EXTRACT_NUMBER_AND_INCR (j, p);
-          if (p + j <= p1)
-            ; /* Backward jump to be ignored.  */
-          else
-            { /* We have to look down both arms.
-                 We first go down the "straight" path so as to minimize
-                 stack usage when going through alternatives.  */
-              bool r = analyze_first_old (p, pend, fastmap, multibyte);
-              if (r) return r;
-              p += j;
-            }
-          continue;
-        case jump_n:
-          /* This code simply does not properly handle forward jump_n.  */
-          DEBUG_STATEMENT (EXTRACT_NUMBER (j, p); eassert (j < 0));
-          p += 4;
-          /* jump_n can either jump or fall through.  The (backward) jump
-             case has already been handled, so we only need to look at the
-             fallthrough case.  */
-          continue;
-        case succeed_n:
-          /* If N == 0, it should be an on_failure_jump_loop instead.
-             `j` can be negative because `EXTRACT_NUMBER` extracts a
-             signed number whereas `succeed_n` treats it as unsigned.  */
-          DEBUG_STATEMENT (EXTRACT_NUMBER (j, p + 2); eassert (j != 0));
-          p += 4;
-          /* We only care about one iteration of the loop, so we don't
-             need to consider the case where this behaves like an
-             on_failure_jump.  */
-          /* FIXME: Sadly, the above is not true when the loop's body
-             can match the empty string :-(  */
-          /* continue; */
-          return true;
-        case set_number_at:
-          p += 4;
-          continue;
-        case start_memory:
-        case stop_memory:
-          p += 1;
-          continue;
-        default:
-          abort (); /* We have listed all the cases.  */
-        } /* switch *p++ */
-      /* Getting here means we have found the possible starting
-         characters for one path of the pattern -- and that the empty
-         string does not match.  We need not follow this path further.  */
-      return false;
-    } /* while p */
-  /* We reached the end without matching anything.  */
-  return true;
-} /* analyze_first_old */
 struct anafirst_data {
  bool multibyte;
  char *fastmap;
@@ -3594,21 +3281,6 @@ analyze_first (struct re_pattern_buffer *bufp,
                                fastmap ? analyze_first_fastmap
                                : analyze_first_null,
                                &data);
-#if ENABLE_CHECKING
-  bool old = !!analyze_first_old (p, pend, fastmap, data.multibyte);
-  if (old && safe)
-    {
-      fprintf (stderr, "ANALYZE_FIRST: New optimization (fastmap=%d)!\n",
-               fastmap ? 1 : 0);
-      print_partial_compiled_pattern (stderr, p, pend);
-    }
-  else if (!old && !safe)
-    {
-      fprintf (stderr, "ANALYZE_FIRST: Lost an optimization (fastmap=%d)!\n",
-               fastmap ? 1 : 0);
-      print_partial_compiled_pattern (stderr, p, pend);
-    }
-#endif
  return !safe;
 }
@@ -4056,30 +3728,6 @@ skip_one_char (re_char *p)
 }
-/* Jump over non-matching operations.  */
-static re_char *
-skip_noops (re_char *p, re_char *pend)
-{
-  while (p < pend)
-    {
-      switch (*p)
-        {
-        case start_memory:
-        case stop_memory:
-          p += 2; break;
-        case no_op:
-          p += 1; break;
-        case jump:
-          p = extract_address (p + 1);
-          break;
-        default:
-          return p;
-        }
-    }
-  eassert (p == pend);
-  return p;
-}
 /* Test if C matches charset op.  *PP points to the charset or charset_not
   opcode.  When the function finishes, *PP will be advanced past that opcode.
   C is character to test (possibly after translations) and CORIG is original
@@ -4247,199 +3895,6 @@ mutually_exclusive_charset (struct re_pattern_buffer *bufp, re_char *p1,
  return false;
 }
-/* True if "p1 matches something" implies "p2 fails".  */
-/* We're trying to follow all paths reachable from `p2`, but since some
-   loops can match the empty string, this can loop back to `p2`.
-   To avoid inf-looping, we take advantage of the fact that
-   the bytecode we generate is made of syntactically nested loops, more
-   specifically, every loop has a single entry point and single exit point.
-   The function takes 2 more arguments (`loop_entry` and `loop_exit`).
-   `loop_entry` points to the sole entry point of the current loop and
-   `loop_exit` points to its sole exit point (when non-NULL).
-   Jumps outside of `loop_entry..exit` should not occur.
-   The function can assume that `loop_exit` is "mutually exclusive".
-   The same holds for `loop_entry` except when `p2 == loop_entry`.
-   To guarantee termination, recursive calls should make sure that either
-   `loop_entry` is larger, or it's unchanged but `p2` is larger.
-   FIXME: This is failsafe (can't return true when it shouldn't)
-   but it could be too conservative if we start generating bytecode
-   with a different shape, so maybe we should bite the bullet and
-   replace done_beg/end with an actual list of positions we've
-   already processed.  */
-static bool
-mutually_exclusive_aux (struct re_pattern_buffer *bufp, re_char *p1,
-                        re_char *p2, re_char *loop_entry, re_char *loop_exit)
-{
-  re_opcode_t op2;
-  unsigned char *pend = bufp->buffer + bufp->used;
-  re_char *p2_orig = p2;
-  eassert (p1 >= bufp->buffer && p1 < pend
-           && p2 >= bufp->buffer && p2 <= pend);
-  if (p2 == loop_exit)
-    return true;          /* Presumably already checked elsewhere.  */
-  eassert (loop_entry && p2 >= loop_entry);
-  if (p2 < loop_entry || (loop_exit && p2 > loop_exit))
-    { /* The assumptions about the shape of the code aren't true :-(  */
-#ifdef ENABLE_CHECKING
-      error ("Broken assumption in regex.c:mutually_exclusive_aux");
-#endif
-      return false;
-    }
-  /* Skip over open/close-group commands.
-     If what follows this loop is a ...+ construct,
-     look at what begins its body, since we will have to
-     match at least one of that.  */
-  p2 = skip_noops (p2, pend);
-  /* The same skip can be done for p1, except that this function
-     is only used in the case where p1 is a simple match operator.  */
-  /* p1 = skip_noops (p1, pend); */
-  eassert (p1 >= bufp->buffer && p1 < pend
-           && p2 >= bufp->buffer && p2 <= pend);
-  op2 = p2 == pend ? succeed : *p2;
-  switch (op2)
-    {
-    case succeed:
-    case endbuf:
-      /* If we're at the end of the pattern, we can change.  */
-      if (skip_one_char (p1))
-        {
-          DEBUG_PRINT ("  End of pattern: fast loop.\n");
-          return true;
-        }
-      break;
-    case endline:
-    case exactn:
-      return mutually_exclusive_exactn (bufp, p1, p2);
-    case charset:
-      {
-        if ((re_opcode_t) *p1 == exactn)
-          return mutually_exclusive_exactn (bufp, p2, p1);
-        else
-          return mutually_exclusive_charset (bufp, p1, p2);
-      }
-      break;
-    case charset_not:
-      switch (*p1)
-        {
-        case exactn:
-          return mutually_exclusive_exactn (bufp, p2, p1);
-        case charset:
-          return mutually_exclusive_charset (bufp, p2, p1);
-        case charset_not:
-          /* When we have two charset_not, it's very unlikely that
-             they don't overlap.  The union of the two sets of excluded
-             chars should cover all possible chars, which, as a matter of
-             fact, is virtually impossible in multibyte buffers.  */
-          break;
-        }
-      break;
-    case wordend:
-      return ((re_opcode_t) *p1 == syntaxspec && p1[1] == Sword);
-    case symend:
-      return ((re_opcode_t) *p1 == syntaxspec
-              && (p1[1] == Ssymbol || p1[1] == Sword));
-    case notsyntaxspec:
-      return ((re_opcode_t) *p1 == syntaxspec && p1[1] == p2[1]);
-    case wordbeg:
-      return ((re_opcode_t) *p1 == notsyntaxspec && p1[1] == Sword);
-    case symbeg:
-      return ((re_opcode_t) *p1 == notsyntaxspec
-              && (p1[1] == Ssymbol || p1[1] == Sword));
-    case syntaxspec:
-      return ((re_opcode_t) *p1 == notsyntaxspec && p1[1] == p2[1]);
-    case wordbound:
-      /* FIXME: This optimization seems correct after the first iteration
-         of the loop, but not for the very first :-(
-         IOW we'd need to pull out the first iteration and do:
-            syntaxspec w
-            on_failure_keep_string_jump end
-          loop:
-            syntaxspec w
-            goto loop
-          end:
-            wordbound
-      return (((re_opcode_t) *p1 == notsyntaxspec
-               || (re_opcode_t) *p1 == syntaxspec)
-              && p1[1] == Sword);  */
-      return false;
-    case categoryspec:
-      return ((re_opcode_t) *p1 == notcategoryspec && p1[1] == p2[1]);
-    case notcategoryspec:
-      return ((re_opcode_t) *p1 == categoryspec && p1[1] == p2[1]);
-    case on_failure_jump_nastyloop:
-    case on_failure_jump_smart:
-    case on_failure_jump_loop:
-    case on_failure_keep_string_jump:
-    case on_failure_jump:
-      {
-        int mcnt;
-        p2++;
-        EXTRACT_NUMBER_AND_INCR (mcnt, p2);
-        re_char *p2_other = p2 + mcnt, *tmp;
-        /* For `+` loops, we often have an `on_failure_jump` that skips forward
-           over a subsequent `jump` for lack of an `on_failure_dont_jump`
-           kind of thing.  Recognize this pattern since that subsequent
-           `jump` is the one that jumps to the loop-entry.  */
-        if ((re_opcode_t) p2[0] == jump && mcnt == 3)
-          p2 = extract_address (p2 + 1);
-        /* We have to check that both destinations are safe.
-           Arrange for `p2` to be the smaller of the two.  */
-        if (p2 > p2_other)
-          (tmp = p2, p2 = p2_other, p2_other = tmp);
-        if (p2_other <= p2_orig /* Both destinations go backward!  */
-            || !mutually_exclusive_aux (bufp, p1, p2_other,
-                                        loop_entry, loop_exit))
-          return false;
-        /* Now that we know that `p2_other` is a safe (i.e. mutually-exclusive)
-           position, let's check `p2`.  */
-        if (p2 == loop_entry)
-          /* If we jump backward to the entry point of the current loop
-             it means it's a zero-length cycle through that loop, so
-             this cycle itself does not break mutual-exclusion.  */
-          return true;
-        else if (p2 > p2_orig)
-          /* Boring forward jump.  */
-          return mutually_exclusive_aux (bufp, p1, p2, loop_entry, loop_exit);
-        else if (loop_entry < p2 && p2 < p2_orig)
-          /* We jump backward to a new loop, nested within the current one.
-             `p2` is the entry point and `p2_other` the exit of that inner.  */
-          return mutually_exclusive_aux (bufp, p1, p2, p2, p2_other);
-        else
-          return false;
-      }
-    default:
-      ;
-    }
-  /* Safe default.  */
-  return false;
-}
 struct mutexcl_data {
  struct re_pattern_buffer *bufp;
  re_char *p1;
@@ -4514,28 +3969,14 @@ mutually_exclusive_one (re_char *p2, void *arg)
    }
 }
+/* True if "p1 matches something" implies "p2 fails".  */
 static bool
 mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1,
                      re_char *p2)
 {
  struct mutexcl_data data = { bufp, p1 };
-  bool new = forall_firstchar (bufp, p2, NULL, mutually_exclusive_one, &data);
+  return forall_firstchar (bufp, p2, NULL, mutually_exclusive_one, &data);
-#if ENABLE_CHECKING
-  bool old = mutually_exclusive_aux (bufp, p1, p2, bufp->buffer - 1, NULL);
-  if (old && !new)
-    {
-      fprintf (stderr, "MUTUALLY_EXCLUSIVE_P: Lost an optimization between %d and %d!\n",
-               (int)(p1 - bufp->buffer), (int)(p2 - bufp->buffer));
-      print_partial_compiled_pattern (stderr, bufp->buffer, bufp->buffer + bufp->used);
-    }
-  else if (!old && new)
-    {
-      fprintf (stderr, "MUTUALLY_EXCLUSIVE_P: New optimization between %d and %d!\n",
-               (int)(p1 - bufp->buffer), (int)(p2 - bufp->buffer));
-      print_partial_compiled_pattern (stderr, bufp->buffer, bufp->buffer + bufp->used);
-    }
-#endif
-  return new;
 }
 /* Matching routines.  */

diff --git a/src/regex-emacs.c b/src/regex-emacs.c index dbd5bcb8953..79e9f2c105a 100644 --- a/src/regex-emacs.c +++ b/src/regex-emacs.c
@@ -3044,319 +3044,6 @@ forall_firstchar (struct re_pattern_buffer bufp, re_char p, re_char *pend,
3044	f, arg);	3044	f, arg);
3045	}	3045	}
3046		3046
3047	/* analyze_first_old.
3048	If fastmap is non-NULL, go through the pattern and fill fastmap
3049	with all the possible leading chars. If fastmap is NULL, don't
3050	bother filling it up (obviously) and only return whether the
3051	pattern could potentially match the empty string.
3052
3053	Return false if p..pend matches at least one char.
3054	Return true if p..pend might match the empty string
3055	or if fastmap was not updated accurately. */
3056
3057	static bool
3058	analyze_first_old (re_char p, re_char pend, char *fastmap, bool multibyte)
3059	{
3060	int j, k;
3061	int nbits;
3062	bool not;
3063
3064	/* If all elements for base leading-codes in fastmap is set, this
3065	flag is set true. */
3066	bool match_any_multibyte_characters = false;
3067
3068	eassert (p);
3069
3070	/* The loop below works as follows:
3071	- It has a working-list kept in the PATTERN_STACK and which basically
3072	starts by only containing a pointer to the first operation.
3073	- If the opcode we're looking at is a match against some set of
3074	chars, then we add those chars to the fastmap and go on to the
3075	next work element from the worklist (done via 'break').
3076	- If the opcode is a control operator on the other hand, we either
3077	ignore it (if it's meaningless at this point, such as 'start_memory')
3078	or execute it (if it's a jump). If the jump has several destinations
3079	(i.e. 'on_failure_jump'), then we push the other destination onto the
3080	worklist.
3081	We guarantee termination by ignoring backward jumps (more or less),
3082	so that P is monotonically increasing. More to the point, we
3083	never set P (or push) anything '<= p1'. */
3084
3085	while (p < pend)
3086	{
3087	/* P1 is used as a marker of how far back a 'on_failure_jump'
3088	can go without being ignored. It is normally equal to P
3089	(which prevents any backward 'on_failure_jump') except right
3090	after a plain 'jump', to allow patterns such as:
3091	0: jump 10
3092	3..9: <body>
3093	10: on_failure_jump 3
3094	as used for the ? operator. /
3095	re_char *p1 = p;
3096
3097	switch (*p++)
3098	{
3099	case succeed:
3100	return true;
3101
3102	case duplicate:
3103	/* If the first character has to match a backreference, that means
3104	that the group was empty (since it already matched). Since this
3105	is the only case that interests us here, we can assume that the
3106	backreference must match the empty string. */
3107	/* Note: Only true if we started from bufp->buffer (e.g. when
3108	fastmap is non-NULL), but when fastmap is NULL we're only
3109	trying to figure out if the body of a loop can possibly match
3110	the empty string so it's safe (tho sometimes pessimistic)
3111	to presume that `duplicate` can. */
3112	p++;
3113	continue;
3114
3115
3116	/* Following are the cases which match a character. These end
3117	with 'break'. */
3118
3119	case exactn:
3120	if (fastmap)
3121	{
3122	/* If multibyte is nonzero, the first byte of each
3123	character is an ASCII or a leading code. Otherwise,
3124	each byte is a character. Thus, this works in both
3125	cases. */
3126	fastmap[p[1]] = 1;
3127	if (multibyte)
3128	{
3129	/* Cover the case of matching a raw char in a
3130	multibyte regexp against unibyte. */
3131	if (CHAR_BYTE8_HEAD_P (p[1]))
3132	fastmap[CHAR_TO_BYTE8 (STRING_CHAR (p + 1))] = 1;
3133	}
3134	else
3135	{
3136	/* For the case of matching this unibyte regex
3137	against multibyte, we must set a leading code of
3138	the corresponding multibyte character. */
3139	int c = RE_CHAR_TO_MULTIBYTE (p[1]);
3140
3141	fastmap[CHAR_LEADING_CODE (c)] = 1;
3142	}
3143	}
3144	break;
3145
3146
3147	case anychar:
3148	/* We could put all the chars except for \n (and maybe \0)
3149	but we don't bother since it is generally not worth it. */
3150	if (!fastmap) break;
3151	return true;
3152
3153
3154	case charset_not:
3155	if (!fastmap) break;
3156	{
3157	/* Chars beyond end of bitmap are possible matches. */
3158	for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH;
3159	j < (1 << BYTEWIDTH); j++)
3160	fastmap[j] = 1;
3161	}
3162	FALLTHROUGH;
3163	case charset:
3164	if (!fastmap) break;
3165	not = (re_opcode_t) *(p - 1) == charset_not;
3166	nbits = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH;
3167	p++;
3168	for (j = 0; j < nbits; j++)
3169	if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not)
3170	fastmap[j] = 1;
3171
3172	/* To match raw bytes (in the 80..ff range) against multibyte
3173	strings, add their leading bytes to the fastmap. */
3174	for (j = 0x80; j < nbits; j++)
3175	if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not)
3176	fastmap[CHAR_LEADING_CODE (BYTE8_TO_CHAR (j))] = 1;
3177
3178	if (/* Any leading code can possibly start a character
3179	which doesn't match the specified set of characters. */
3180	not
3181	\|\|
3182	/* If we can match a character class, we can match any
3183	multibyte characters. */
3184	(CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
3185	&& CHARSET_RANGE_TABLE_BITS (&p[-2]) != 0))
3186
3187	{
3188	if (match_any_multibyte_characters == false)
3189	{
3190	for (j = MIN_MULTIBYTE_LEADING_CODE;
3191	j <= MAX_MULTIBYTE_LEADING_CODE; j++)
3192	fastmap[j] = 1;
3193	match_any_multibyte_characters = true;
3194	}
3195	}
3196
3197	else if (!not && CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
3198	&& match_any_multibyte_characters == false)
3199	{
3200	/* Set fastmap[I] to 1 where I is a leading code of each
3201	multibyte character in the range table. */
3202	int c, count;
3203	unsigned char lc1, lc2;
3204
3205	/* Make P points the range table. '+ 2' is to skip flag
3206	bits for a character class. */
3207	p += CHARSET_BITMAP_SIZE (&p[-2]) + 2;
3208
3209	/* Extract the number of ranges in range table into COUNT. */
3210	EXTRACT_NUMBER_AND_INCR (count, p);
3211	for (; count > 0; count--, p += 3)
3212	{
3213	/* Extract the start and end of each range. */
3214	EXTRACT_CHARACTER (c, p);
3215	lc1 = CHAR_LEADING_CODE (c);
3216	p += 3;
3217	EXTRACT_CHARACTER (c, p);
3218	lc2 = CHAR_LEADING_CODE (c);
3219	for (j = lc1; j <= lc2; j++)
3220	fastmap[j] = 1;
3221	}
3222	}
3223	break;
3224
3225	case syntaxspec:
3226	case notsyntaxspec:
3227	if (!fastmap) break;
3228	/* This match depends on text properties. These end with
3229	aborting optimizations. */
3230	return true;
3231
3232	case categoryspec:
3233	case notcategoryspec:
3234	if (!fastmap) break;
3235	not = (re_opcode_t)p[-1] == notcategoryspec;
3236	k = *p++;
3237	for (j = (1 << BYTEWIDTH); j >= 0; j--)
3238	if ((CHAR_HAS_CATEGORY (j, k)) ^ not)
3239	fastmap[j] = 1;
3240
3241	/* Any leading code can possibly start a character which
3242	has or doesn't has the specified category. */
3243	if (match_any_multibyte_characters == false)
3244	{
3245	for (j = MIN_MULTIBYTE_LEADING_CODE;
3246	j <= MAX_MULTIBYTE_LEADING_CODE; j++)
3247	fastmap[j] = 1;
3248	match_any_multibyte_characters = true;
3249	}
3250	break;
3251
3252	/* All cases after this match the empty string. These end with
3253	'continue'. */
3254
3255	case at_dot:
3256	case no_op:
3257	case begline:
3258	case endline:
3259	case begbuf:
3260	case endbuf:
3261	case wordbound:
3262	case notwordbound:
3263	case wordbeg:
3264	case wordend:
3265	case symbeg:
3266	case symend:
3267	continue;
3268
3269
3270	case jump:
3271	EXTRACT_NUMBER_AND_INCR (j, p);
3272	if (j < 0)
3273	/* Backward jumps can only go back to code that we've already
3274	visited. 're_compile' should make sure this is true. */
3275	break;
3276	p += j;
3277	switch (*p)
3278	{
3279	case on_failure_jump:
3280	case on_failure_keep_string_jump:
3281	case on_failure_jump_loop:
3282	case on_failure_jump_nastyloop:
3283	case on_failure_jump_smart:
3284	p++;
3285	break;
3286	default:
3287	continue;
3288	};
3289	/* Keep P1 to allow the 'on_failure_jump' we are jumping to
3290	to jump back to "just after here". */
3291	FALLTHROUGH;
3292	case on_failure_jump:
3293	case on_failure_keep_string_jump:
3294	case on_failure_jump_nastyloop:
3295	case on_failure_jump_loop:
3296	case on_failure_jump_smart:
3297	EXTRACT_NUMBER_AND_INCR (j, p);
3298	if (p + j <= p1)
3299	; /* Backward jump to be ignored. */
3300	else
3301	{ /* We have to look down both arms.
3302	We first go down the "straight" path so as to minimize
3303	stack usage when going through alternatives. */
3304	bool r = analyze_first_old (p, pend, fastmap, multibyte);
3305	if (r) return r;
3306	p += j;
3307	}
3308	continue;
3309
3310
3311	case jump_n:
3312	/* This code simply does not properly handle forward jump_n. */
3313	DEBUG_STATEMENT (EXTRACT_NUMBER (j, p); eassert (j < 0));
3314	p += 4;
3315	/* jump_n can either jump or fall through. The (backward) jump
3316	case has already been handled, so we only need to look at the
3317	fallthrough case. */
3318	continue;
3319
3320	case succeed_n:
3321	/* If N == 0, it should be an on_failure_jump_loop instead.
3322	`j` can be negative because `EXTRACT_NUMBER` extracts a
3323	signed number whereas `succeed_n` treats it as unsigned. */
3324	DEBUG_STATEMENT (EXTRACT_NUMBER (j, p + 2); eassert (j != 0));
3325	p += 4;
3326	/* We only care about one iteration of the loop, so we don't
3327	need to consider the case where this behaves like an
3328	on_failure_jump. */
3329	/* FIXME: Sadly, the above is not true when the loop's body
3330	can match the empty string :-( */
3331	/* continue; */
3332	return true;
3333
3334	case set_number_at:
3335	p += 4;
3336	continue;
3337
3338
3339	case start_memory:
3340	case stop_memory:
3341	p += 1;
3342	continue;
3343
3344
3345	default:
3346	abort (); /* We have listed all the cases. */
3347	} /* switch p++ /
3348
3349	/* Getting here means we have found the possible starting
3350	characters for one path of the pattern -- and that the empty
3351	string does not match. We need not follow this path further. */
3352	return false;
3353	} /* while p */
3354
3355	/* We reached the end without matching anything. */
3356	return true;
3357
3358	} /* analyze_first_old */
3359
3360	struct anafirst_data {	3047	struct anafirst_data {
3361	bool multibyte;	3048	bool multibyte;
3362	char *fastmap;	3049	char *fastmap;
@@ -3594,21 +3281,6 @@ analyze_first (struct re_pattern_buffer *bufp,
3594	fastmap ? analyze_first_fastmap	3281	fastmap ? analyze_first_fastmap
3595	: analyze_first_null,	3282	: analyze_first_null,
3596	&data);	3283	&data);
3597	#if ENABLE_CHECKING
3598	bool old = !!analyze_first_old (p, pend, fastmap, data.multibyte);
3599	if (old && safe)
3600	{
3601	fprintf (stderr, "ANALYZE_FIRST: New optimization (fastmap=%d)!\n",
3602	fastmap ? 1 : 0);
3603	print_partial_compiled_pattern (stderr, p, pend);
3604	}
3605	else if (!old && !safe)
3606	{
3607	fprintf (stderr, "ANALYZE_FIRST: Lost an optimization (fastmap=%d)!\n",
3608	fastmap ? 1 : 0);
3609	print_partial_compiled_pattern (stderr, p, pend);
3610	}
3611	#endif
3612	return !safe;	3284	return !safe;
3613	}	3285	}
3614		3286
@@ -4056,30 +3728,6 @@ skip_one_char (re_char *p)
4056	}	3728	}
4057		3729
4058		3730
4059	/* Jump over non-matching operations. */
4060	static re_char *
4061	skip_noops (re_char p, re_char pend)
4062	{
4063	while (p < pend)
4064	{
4065	switch (*p)
4066	{
4067	case start_memory:
4068	case stop_memory:
4069	p += 2; break;
4070	case no_op:
4071	p += 1; break;
4072	case jump:
4073	p = extract_address (p + 1);
4074	break;
4075	default:
4076	return p;
4077	}
4078	}
4079	eassert (p == pend);
4080	return p;
4081	}
4082
4083	/* Test if C matches charset op. *PP points to the charset or charset_not	3731	/* Test if C matches charset op. *PP points to the charset or charset_not
4084	opcode. When the function finishes, *PP will be advanced past that opcode.	3732	opcode. When the function finishes, *PP will be advanced past that opcode.
4085	C is character to test (possibly after translations) and CORIG is original	3733	C is character to test (possibly after translations) and CORIG is original
@@ -4247,199 +3895,6 @@ mutually_exclusive_charset (struct re_pattern_buffer bufp, re_char p1,
4247	return false;	3895	return false;
4248	}	3896	}
4249		3897
4250	/* True if "p1 matches something" implies "p2 fails". */
4251	/* We're trying to follow all paths reachable from `p2`, but since some
4252	loops can match the empty string, this can loop back to `p2`.
4253
4254	To avoid inf-looping, we take advantage of the fact that
4255	the bytecode we generate is made of syntactically nested loops, more
4256	specifically, every loop has a single entry point and single exit point.
4257
4258	The function takes 2 more arguments (`loop_entry` and `loop_exit`).
4259	`loop_entry` points to the sole entry point of the current loop and
4260	`loop_exit` points to its sole exit point (when non-NULL).
4261
4262	Jumps outside of `loop_entry..exit` should not occur.
4263	The function can assume that `loop_exit` is "mutually exclusive".
4264	The same holds for `loop_entry` except when `p2 == loop_entry`.
4265
4266	To guarantee termination, recursive calls should make sure that either
4267	`loop_entry` is larger, or it's unchanged but `p2` is larger.
4268
4269	FIXME: This is failsafe (can't return true when it shouldn't)
4270	but it could be too conservative if we start generating bytecode
4271	with a different shape, so maybe we should bite the bullet and
4272	replace done_beg/end with an actual list of positions we've
4273	already processed. */
4274	static bool
4275	mutually_exclusive_aux (struct re_pattern_buffer bufp, re_char p1,
4276	re_char p2, re_char loop_entry, re_char *loop_exit)
4277	{
4278	re_opcode_t op2;
4279	unsigned char *pend = bufp->buffer + bufp->used;
4280	re_char *p2_orig = p2;
4281
4282	eassert (p1 >= bufp->buffer && p1 < pend
4283	&& p2 >= bufp->buffer && p2 <= pend);
4284
4285	if (p2 == loop_exit)
4286	return true; /* Presumably already checked elsewhere. */
4287	eassert (loop_entry && p2 >= loop_entry);
4288	if (p2 < loop_entry \|\| (loop_exit && p2 > loop_exit))
4289	{ /* The assumptions about the shape of the code aren't true :-( */
4290	#ifdef ENABLE_CHECKING
4291	error ("Broken assumption in regex.c:mutually_exclusive_aux");
4292	#endif
4293	return false;
4294	}
4295
4296	/* Skip over open/close-group commands.
4297	If what follows this loop is a ...+ construct,
4298	look at what begins its body, since we will have to
4299	match at least one of that. */
4300	p2 = skip_noops (p2, pend);
4301	/* The same skip can be done for p1, except that this function
4302	is only used in the case where p1 is a simple match operator. */
4303	/* p1 = skip_noops (p1, pend); */
4304
4305	eassert (p1 >= bufp->buffer && p1 < pend
4306	&& p2 >= bufp->buffer && p2 <= pend);
4307
4308	op2 = p2 == pend ? succeed : *p2;
4309
4310	switch (op2)
4311	{
4312	case succeed:
4313	case endbuf:
4314	/* If we're at the end of the pattern, we can change. */
4315	if (skip_one_char (p1))
4316	{
4317	DEBUG_PRINT (" End of pattern: fast loop.\n");
4318	return true;
4319	}
4320	break;
4321
4322	case endline:
4323	case exactn:
4324	return mutually_exclusive_exactn (bufp, p1, p2);
4325
4326	case charset:
4327	{
4328	if ((re_opcode_t) *p1 == exactn)
4329	return mutually_exclusive_exactn (bufp, p2, p1);
4330	else
4331	return mutually_exclusive_charset (bufp, p1, p2);
4332	}
4333	break;
4334
4335	case charset_not:
4336	switch (*p1)
4337	{
4338	case exactn:
4339	return mutually_exclusive_exactn (bufp, p2, p1);
4340	case charset:
4341	return mutually_exclusive_charset (bufp, p2, p1);
4342	case charset_not:
4343	/* When we have two charset_not, it's very unlikely that
4344	they don't overlap. The union of the two sets of excluded
4345	chars should cover all possible chars, which, as a matter of
4346	fact, is virtually impossible in multibyte buffers. */
4347	break;
4348	}
4349	break;
4350
4351	case wordend:
4352	return ((re_opcode_t) *p1 == syntaxspec && p1[1] == Sword);
4353	case symend:
4354	return ((re_opcode_t) *p1 == syntaxspec
4355	&& (p1[1] == Ssymbol \|\| p1[1] == Sword));
4356	case notsyntaxspec:
4357	return ((re_opcode_t) *p1 == syntaxspec && p1[1] == p2[1]);
4358
4359	case wordbeg:
4360	return ((re_opcode_t) *p1 == notsyntaxspec && p1[1] == Sword);
4361	case symbeg:
4362	return ((re_opcode_t) *p1 == notsyntaxspec
4363	&& (p1[1] == Ssymbol \|\| p1[1] == Sword));
4364	case syntaxspec:
4365	return ((re_opcode_t) *p1 == notsyntaxspec && p1[1] == p2[1]);
4366
4367	case wordbound:
4368	/* FIXME: This optimization seems correct after the first iteration
4369	of the loop, but not for the very first :-(
4370	IOW we'd need to pull out the first iteration and do:
4371
4372	syntaxspec w
4373	on_failure_keep_string_jump end
4374	loop:
4375	syntaxspec w
4376	goto loop
4377	end:
4378	wordbound
4379
4380	return (((re_opcode_t) *p1 == notsyntaxspec
4381	\|\| (re_opcode_t) *p1 == syntaxspec)
4382	&& p1[1] == Sword); */
4383	return false;
4384
4385	case categoryspec:
4386	return ((re_opcode_t) *p1 == notcategoryspec && p1[1] == p2[1]);
4387	case notcategoryspec:
4388	return ((re_opcode_t) *p1 == categoryspec && p1[1] == p2[1]);
4389
4390	case on_failure_jump_nastyloop:
4391	case on_failure_jump_smart:
4392	case on_failure_jump_loop:
4393	case on_failure_keep_string_jump:
4394	case on_failure_jump:
4395	{
4396	int mcnt;
4397	p2++;
4398	EXTRACT_NUMBER_AND_INCR (mcnt, p2);
4399	re_char p2_other = p2 + mcnt, tmp;
4400	/* For `+` loops, we often have an `on_failure_jump` that skips forward
4401	over a subsequent `jump` for lack of an `on_failure_dont_jump`
4402	kind of thing. Recognize this pattern since that subsequent
4403	`jump` is the one that jumps to the loop-entry. */
4404	if ((re_opcode_t) p2[0] == jump && mcnt == 3)
4405	p2 = extract_address (p2 + 1);
4406
4407	/* We have to check that both destinations are safe.
4408	Arrange for `p2` to be the smaller of the two. */
4409	if (p2 > p2_other)
4410	(tmp = p2, p2 = p2_other, p2_other = tmp);
4411
4412	if (p2_other <= p2_orig /* Both destinations go backward! */
4413	\|\| !mutually_exclusive_aux (bufp, p1, p2_other,
4414	loop_entry, loop_exit))
4415	return false;
4416
4417	/* Now that we know that `p2_other` is a safe (i.e. mutually-exclusive)
4418	position, let's check `p2`. */
4419	if (p2 == loop_entry)
4420	/* If we jump backward to the entry point of the current loop
4421	it means it's a zero-length cycle through that loop, so
4422	this cycle itself does not break mutual-exclusion. */
4423	return true;
4424	else if (p2 > p2_orig)
4425	/* Boring forward jump. */
4426	return mutually_exclusive_aux (bufp, p1, p2, loop_entry, loop_exit);
4427	else if (loop_entry < p2 && p2 < p2_orig)
4428	/* We jump backward to a new loop, nested within the current one.
4429	`p2` is the entry point and `p2_other` the exit of that inner. */
4430	return mutually_exclusive_aux (bufp, p1, p2, p2, p2_other);
4431	else
4432	return false;
4433	}
4434
4435	default:
4436	;
4437	}
4438
4439	/* Safe default. */
4440	return false;
4441	}
4442
4443	struct mutexcl_data {	3898	struct mutexcl_data {
4444	struct re_pattern_buffer *bufp;	3899	struct re_pattern_buffer *bufp;
4445	re_char *p1;	3900	re_char *p1;
@@ -4514,28 +3969,14 @@ mutually_exclusive_one (re_char p2, void arg)
4514	}	3969	}
4515	}	3970	}
4516		3971
		3972	/* True if "p1 matches something" implies "p2 fails". */
		3973
4517	static bool	3974	static bool
4518	mutually_exclusive_p (struct re_pattern_buffer bufp, re_char p1,	3975	mutually_exclusive_p (struct re_pattern_buffer bufp, re_char p1,
4519	re_char *p2)	3976	re_char *p2)
4520	{	3977	{
4521	struct mutexcl_data data = { bufp, p1 };	3978	struct mutexcl_data data = { bufp, p1 };
4522	bool new = forall_firstchar (bufp, p2, NULL, mutually_exclusive_one, &data);	3979	return forall_firstchar (bufp, p2, NULL, mutually_exclusive_one, &data);
4523	#if ENABLE_CHECKING
4524	bool old = mutually_exclusive_aux (bufp, p1, p2, bufp->buffer - 1, NULL);
4525	if (old && !new)
4526	{
4527	fprintf (stderr, "MUTUALLY_EXCLUSIVE_P: Lost an optimization between %d and %d!\n",
4528	(int)(p1 - bufp->buffer), (int)(p2 - bufp->buffer));
4529	print_partial_compiled_pattern (stderr, bufp->buffer, bufp->buffer + bufp->used);
4530	}
4531	else if (!old && new)
4532	{
4533	fprintf (stderr, "MUTUALLY_EXCLUSIVE_P: New optimization between %d and %d!\n",
4534	(int)(p1 - bufp->buffer), (int)(p2 - bufp->buffer));
4535	print_partial_compiled_pattern (stderr, bufp->buffer, bufp->buffer + bufp->used);
4536	}
4537	#endif
4538	return new;
4539	}	3980	}
4540		3981
4541	/* Matching routines. */	3982	/* Matching routines. */