* regex.c (PTR_TO_OFFSET) [!emacs]: Remove.

(RE_MULTIBYTE_P, RE_STRING_CHAR_AND_LENGTH): New macros. (GET_CHAR_BEFORE_2): Moved from charset.h plus fixed minor bug when we are between str1 and str2. (MAX_MULTIBYTE_LENGTH, CHAR_STRING) [!emacs]: Provide trivial default. (PATFETCH): Use `TRANSLATE'. (PATFETCH_RAW): Fetch multibyte char if applicable. (PATUNFETCH): Remove. (regex_compile): Rely on PATFETCH to do most of the multibyte magic. When writing a char, write it directly into the pattern buffer rather than going needlessly through a temp char-array. (re_match_2_internal): Similarly, rely on RE_STRING_CHAR to do the multibyte magic and remove the useless `#ifdef emacs'. (bcmp_translate): Don't compare as multibyte chars when in a unibyte buffer. * regex.h (struct re_pattern_buffer): Make field `multibyte' conditional on `emacs'. * charset.h (GET_CHAR_BEFORE_2): Moved to regex.c.
author: Stefan Monnier 2000-04-02 23:56:46 +0000
committer: Stefan Monnier 2000-04-02 23:56:46 +0000
commit: 2d1675e45c46d97aec4c6af28a0719778f79b8da (patch)
tree: 11c6492fbe96211f75cc8f4ac37e7e0cde1538bb /src
parent: 096540869a2be7676946ef9c4ee52e5cedb5c28a (diff)
download: emacs-2d1675e45c46d97aec4c6af28a0719778f79b8da.tar.gz
emacs-2d1675e45c46d97aec4c6af28a0719778f79b8da.zip
4 files changed, 110 insertions, 143 deletions
diff --git a/src/ChangeLog b/src/ChangeLog
index de883b830a4..9f3f20bbb3e 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,26 @@
+2000-04-02  Stefan Monnier  <monnier@cs.yale.edu>
+        * regex.c (PTR_TO_OFFSET) [!emacs]: Remove.
+        (RE_MULTIBYTE_P, RE_STRING_CHAR_AND_LENGTH): New macros.
+        (GET_CHAR_BEFORE_2): Moved from charset.h plus fixed minor bug when
+        we are between str1 and str2.
+        (MAX_MULTIBYTE_LENGTH, CHAR_STRING) [!emacs]: Provide trivial default.
+        (PATFETCH): Use `TRANSLATE'.
+        (PATFETCH_RAW): Fetch multibyte char if applicable.
+        (PATUNFETCH): Remove.
+        (regex_compile): Rely on PATFETCH to do most of the multibyte magic.
+        When writing a char, write it directly into the pattern buffer rather
+        than going needlessly through a temp char-array.
+        (re_match_2_internal): Similarly, rely on RE_STRING_CHAR to do the
+        multibyte magic and remove the useless `#ifdef emacs'.
+        (bcmp_translate): Don't compare as multibyte chars when in a unibyte
+        buffer.
+        * regex.h (struct re_pattern_buffer): Make field `multibyte'
+        conditional on `emacs'.
+        * charset.h (GET_CHAR_BEFORE_2): Moved to regex.c.
 2000-04-01  Ken Raeburn  <raeburn@gnu.org>
        * alloc.c (MARK_STRING, UNMARK_STRING, STRING_MARKED_P): Expand
diff --git a/src/charset.h b/src/charset.h
index 3acc447c5f8..d9257b8955a 100644
--- a/src/charset.h
+++ b/src/charset.h
@@ -577,18 +577,6 @@ else
   ? 1                                                  \
   : multibyte_form_length (str, len))
-/* Set C a (possibly multibyte) character before P.  P points into a
-   string which is the virtual concatenation of STR1 (which ends at
-   END1) or STR2 (which ends at END2).  */
-#define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2)                     \
-  do {                                                                      \
-    const unsigned char *dtemp = (p);                                       \
-    const unsigned char *dlimit = ((p) > (str2) && (p) <= (end2)) ? (str2) : (str1); \
-    while (dtemp-- > dlimit && *dtemp >= 0xA0);             \
-    c = STRING_CHAR (dtemp, p - dtemp);                                     \
-  } while (0)
 #ifdef emacs
 /* Increase the buffer byte position POS_BYTE of the current buffer to
diff --git a/src/regex.c b/src/regex.c
index 911daed209d..9a56db728e5 100644
--- a/src/regex.c
+++ b/src/regex.c
@@ -20,7 +20,6 @@
   USA.  */
 /* TODO:
-   - clean up multibyte issues
   - structure the opcode space into opcode+flag.
   - merge with glibc's regex.[ch]
 */
@@ -37,8 +36,6 @@
 /* Converts the pointer to the char to BEG-based offset from the start.  */
 #define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d))
 #define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object)))
-#else
-#define PTR_TO_OFFSET(d) 0
 #endif
 #ifdef HAVE_CONFIG_H
@@ -79,8 +76,28 @@
 #define realloc xrealloc
 #define free xfree
+#define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte)
 #define RE_STRING_CHAR(p, s) \
  (multibyte ? (STRING_CHAR (p, s)) : (*(p)))
+#define RE_STRING_CHAR_AND_LENGTH(p, s, len) \
+  (multibyte ? (STRING_CHAR_AND_LENGTH (p, s, len)) : ((len) = 1, *(p)))
+/* Set C a (possibly multibyte) character before P.  P points into a
+   string which is the virtual concatenation of STR1 (which ends at
+   END1) or STR2 (which ends at END2).  */
+#define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2)                 \
+  do {                                                                  \
+    if (multibyte)                                                      \
+       {                                                                \
+         re_char *dtemp = (p) == (str2) ? (end1) : (p);                 \
+         re_char *dlimit = ((p) > (str2) && (p) <= (end2)) ? (str2) : (str1); \
+         while (dtemp-- > dlimit && !CHAR_HEAD_P (*dtemp));             \
+         c = STRING_CHAR (dtemp, (p) - dtemp);                          \
+       }                                                                \
+     else                                                               \
+       (c = ((p) == (str2) ? (end1) : (p))[-1]);                        \
+  } while (0)
 #else  /* not emacs */
@@ -181,6 +198,8 @@ init_syntax_once ()
 #define BASE_LEADING_CODE_P(c) (0)
 #define CHAR_CHARSET(c) 0
 #define CHARSET_LEADING_CODE_BASE(c) 0
+#define MAX_MULTIBYTE_LENGTH 1
+#define RE_MULTIBYTE_P(x) 0
 #define WORD_BOUNDARY_P(c1, c2) (0)
 #define CHAR_HEAD_P(p) (1)
 #define SINGLE_BYTE_CHAR_P(c) (1)
@@ -188,7 +207,9 @@ init_syntax_once ()
 #define MULTIBYTE_FORM_LENGTH(p, s) (1)
 #define STRING_CHAR(p, s) (*(p))
 #define RE_STRING_CHAR STRING_CHAR
+#define CHAR_STRING(c, s) (*(s) = (c), 1)
 #define STRING_CHAR_AND_LENGTH(p, s, actual_len) ((actual_len) = 1, *(p))
+#define RE_STRING_CHAR_AND_LENGTH STRING_CHAR_AND_LENGTH
 #define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
  (c = ((p) == (str2) ? *((end1) - 1) : *((p) - 1)))
 #endif /* not emacs */
@@ -1550,19 +1571,19 @@ static int analyse_first _RE_ARGS((unsigned char *p, unsigned char *pend,
 #define PATFETCH(c)                                                     \
  do {                                                                  \
    PATFETCH_RAW (c);                                                   \
-    if (RE_TRANSLATE_P (translate)) c = RE_TRANSLATE (translate, c);    \
+    c = TRANSLATE (c);                                                  \
  } while (0)
 /* Fetch the next character in the uncompiled pattern, with no
   translation.  */
 #define PATFETCH_RAW(c)                                                 \
-  do {if (p == pend) return REG_EEND;                                   \
+  do {                                                                  \
-    c = *p++;                                                           \
+    int len;                                                            \
+    if (p == pend) return REG_EEND;                                     \
+    c = RE_STRING_CHAR_AND_LENGTH (p, pend - p, len);                   \
+    p += len;                                                           \
  } while (0)
-/* Go backwards one character in the pattern.  */
-#define PATUNFETCH p--
 /* If `translate' is non-null, return translate[D], else just D.  We
   cast the subscript to translate because some data is declared as
@@ -1957,6 +1978,9 @@ regex_compile (pattern, size, syntax, bufp)
  /* Work area for range table of charset.  */
  struct range_table_work_area range_table_work;
+  /* If the object matched can contain multibyte characters.  */
+  const boolean multibyte = RE_MULTIBYTE_P (bufp);
 #ifdef DEBUG
  debug++;
  DEBUG_PRINT1 ("\nCompiling pattern: ");
@@ -1994,14 +2018,6 @@ regex_compile (pattern, size, syntax, bufp)
  /* Always count groups, whether or not bufp->no_sub is set.  */
  bufp->re_nsub = 0;
-#ifdef emacs
-  /* bufp->multibyte is set before regex_compile is called, so don't alter
-     it. */
-#else  /* not emacs */
-  /* Nothing is recognized as a multibyte character.  */
-  bufp->multibyte = 0;
-#endif
 #if !defined (emacs) && !defined (SYNTAX_TABLE)
  /* Initialize the syntax table.  */
   init_syntax_once ();
@@ -2254,8 +2270,8 @@ regex_compile (pattern, size, syntax, bufp)
            /* Read in characters and ranges, setting map bits.  */
            for (;;)
              {
-                int len;
                boolean escaped_char = false;
+                const unsigned char *p2 = p;
                if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
@@ -2274,19 +2290,10 @@ regex_compile (pattern, size, syntax, bufp)
                    /* Could be the end of the bracket expression.      If it's
                       not (i.e., when the bracket expression is `[]' so
                       far), the ']' character bit gets set way below.  */
-                    if (c == ']' && p != p1 + 1)
+                    if (c == ']' && p2 != p1)
                      break;
                  }
-                /* If C indicates start of multibyte char, get the
-                   actual character code in C, and set the pattern
-                   pointer P to the next character boundary.  */
-                if (bufp->multibyte && BASE_LEADING_CODE_P (c))
-                  {
-                    PATUNFETCH;
-                    c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
-                    p += len;
-                  }
                /* What should we do for the character which is
                   greater than 0x7F, but not BASE_LEADING_CODE_P?
                   XXX */
@@ -2294,8 +2301,8 @@ regex_compile (pattern, size, syntax, bufp)
                /* See if we're at the beginning of a possible character
                   class.  */
-                else if (!escaped_char &&
+                if (!escaped_char &&
-                         syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
+                    syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
                  {
                    /* Leave room for the null.  */
                    char str[CHAR_CLASS_MAX_LENGTH + 1];
@@ -2358,7 +2365,7 @@ regex_compile (pattern, size, syntax, bufp)
                           they can only match ASCII characters.  We
                           don't need to handle them for multibyte.  */
-                        if (bufp->multibyte)
+                        if (multibyte)
                          {
                            int bit = 0;
@@ -2435,12 +2442,6 @@ regex_compile (pattern, size, syntax, bufp)
                    /* Fetch the character which ends the range. */
                    PATFETCH (c1);
-                    if (bufp->multibyte && BASE_LEADING_CODE_P (c1))
-                      {
-                        PATUNFETCH;
-                        c1 = STRING_CHAR_AND_LENGTH (p, pend - p, len);
-                        p += len;
-                      }
                    if (SINGLE_BYTE_CHAR_P (c)
                        && ! SINGLE_BYTE_CHAR_P (c1))
@@ -3028,16 +3029,6 @@ regex_compile (pattern, size, syntax, bufp)
        default:
        /* Expects the character in `c'.  */
        normal_char:
-          p1 = p - 1;           /* P1 points the head of C.  */
-#ifdef emacs
-          if (bufp->multibyte)
-            {
-              c = STRING_CHAR (p1, pend - p1);
-              c = TRANSLATE (c);
-              /* Set P to the next character boundary.  */
-              p += MULTIBYTE_FORM_LENGTH (p1, pend - p1) - 1;
-            }
-#endif
              /* If no exactn currently being built.  */
          if (!pending_exact
@@ -3045,7 +3036,7 @@ regex_compile (pattern, size, syntax, bufp)
              || pending_exact + *pending_exact + 1 != b
              /* We have only one byte following the exactn for the count.  */
-              || *pending_exact >= (1 << BYTEWIDTH) - (p - p1)
+              || *pending_exact >= (1 << BYTEWIDTH) - MAX_MULTIBYTE_LENGTH
              /* If followed by a repetition operator.  */
              || (p != pend && (*p == '*' || *p == '^'))
@@ -3065,24 +3056,13 @@ regex_compile (pattern, size, syntax, bufp)
              pending_exact = b - 1;
            }
-#ifdef emacs
+          GET_BUFFER_SPACE (MAX_MULTIBYTE_LENGTH);
-          if (! SINGLE_BYTE_CHAR_P (c))
+          {
-            {
+            int len = CHAR_STRING (c, b);
-              unsigned char str[MAX_MULTIBYTE_LENGTH];
+            b += len;
-              int i = CHAR_STRING (c, str);
+            (*pending_exact) += len;
-              int j;
+          }
-              for (j = 0; j < i; j++)
-                {
-                  BUF_PUSH (str[j]);
-                  (*pending_exact)++;
-                }
-            }
-          else
-#endif
-            {
-              BUF_PUSH (c);
-              (*pending_exact)++;
-            }
          break;
        } /* switch (c) */
    } /* while p != pend */
@@ -3616,7 +3596,7 @@ re_compile_fastmap (bufp)
  bufp->fastmap_accurate = 1;       /* It will be when we're done.  */
  analysis = analyse_first (bufp->buffer, bufp->buffer + bufp->used,
-                            fastmap, bufp->multibyte);
+                            fastmap, RE_MULTIBYTE_P (bufp));
  if (analysis < -1)
    return analysis;
  bufp->can_be_null = (analysis != 0);
@@ -3723,7 +3703,7 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
  int anchored_start = 0;
  /* Nonzero if we have to concern multibyte character.  */
-  const boolean multibyte = bufp->multibyte;
+  const boolean multibyte = RE_MULTIBYTE_P (bufp);
  /* Check for out-of-range STARTPOS.  */
  if (startpos < 0 || startpos > total_size)
@@ -3850,11 +3830,11 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
            }
          else                          /* Searching backwards.  */
            {
-              buf_ch = STRING_CHAR (d, (startpos >= size1
+              int room = (startpos >= size1
-                                        ? size2 + size1 - startpos
+                          ? size2 + size1 - startpos
-                                        : size1 - startpos));
+                          : size1 - startpos);
-              if (RE_TRANSLATE_P (translate))
+              buf_ch = RE_STRING_CHAR (d, room);
-                buf_ch = RE_TRANSLATE (translate, buf_ch);
+              buf_ch = TRANSLATE (buf_ch);
              if (! (buf_ch >= 0400
                     || fastmap[buf_ch]))
@@ -3940,7 +3920,10 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
 /* Declarations and macros for re_match_2.  */
-static int bcmp_translate ();
+static int bcmp_translate _RE_ARGS((re_char *s1, re_char *s2,
+                                    register int len,
+                                    RE_TRANSLATE_TYPE translate,
+                                    const int multibyte));
 /* This converts PTR, a pointer into one of the search strings `string1'
   and `string2' into an offset from the beginning of that string.  */
@@ -4093,7 +4076,7 @@ mutually_exclusive_p (bufp, p1, p2)
     unsigned char *p1, *p2;
 {
  re_opcode_t op2;
-  const boolean multibyte = bufp->multibyte;
+  const boolean multibyte = RE_MULTIBYTE_P (bufp);
  unsigned char *pend = bufp->buffer + bufp->used;
  assert (p1 >= bufp->buffer && p1 < pend
@@ -4373,7 +4356,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
  RE_TRANSLATE_TYPE translate = bufp->translate;
  /* Nonzero if we have to concern multibyte character.  */
-  const boolean multibyte = bufp->multibyte;
+  const boolean multibyte = RE_MULTIBYTE_P (bufp);
  /* Failure point stack.  Each place that can handle a failure further
     down the line pushes a failure point on this stack.  It consists of
@@ -4721,7 +4704,6 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
             testing `translate' inside the loop.  */
          if (RE_TRANSLATE_P (translate))
            {
-#ifdef emacs
              if (multibyte)
                do
                  {
@@ -4745,7 +4727,6 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
                  }
                while (mcnt > 0);
              else
-#endif /* not emacs */
                do
                  {
                    PREFETCH ();
@@ -4783,17 +4764,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
            DEBUG_PRINT1 ("EXECUTING anychar.\n");
            PREFETCH ();
+            buf_ch = RE_STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen);
-#ifdef emacs
-            if (multibyte)
-              buf_ch = STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen);
-            else
-#endif /* not emacs */
-              {
-                buf_ch = *d;
-                buf_charlen = 1;
-              }
            buf_ch = TRANSLATE (buf_ch);
            if ((!(bufp->syntax & RE_DOT_NEWLINE)
@@ -4828,27 +4799,20 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
            DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
-            PREFETCH ();
-            c = *d;
            range_table_exists = CHARSET_RANGE_TABLE_EXISTS_P (&p[-1]);
-#ifdef emacs
            if (range_table_exists)
              {
                range_table = CHARSET_RANGE_TABLE (&p[-1]); /* Past the bitmap.  */
                EXTRACT_NUMBER_AND_INCR (count, range_table);
              }
-            if (multibyte && BASE_LEADING_CODE_P (c))
+            PREFETCH ();
-              c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
+            c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len);
-#endif /* emacs */
+            c = TRANSLATE (c); /* The character to match.  */
            if (SINGLE_BYTE_CHAR_P (c))
              {                 /* Lookup bitmap.  */
-                c = TRANSLATE (c); /* The character to match.  */
-                len = 1;
                /* Cast to `unsigned' instead of `unsigned char' in
                   case the bit list is a full 32 bytes long.  */
                if (c < (unsigned) (CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH)
@@ -4994,7 +4958,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
                /* Compare that many; failure if mismatch, else move
                   past them.  */
                if (RE_TRANSLATE_P (translate)
-                    ? bcmp_translate (d, d2, mcnt, translate)
+                    ? bcmp_translate (d, d2, mcnt, translate, multibyte)
                    : bcmp (d, d2, mcnt))
                  {
                    d = dfail;
@@ -5263,18 +5227,17 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
                 is the character at D, and S2 is the syntax of C2.  */
              int c1, c2, s1, s2;
 #ifdef emacs
-              int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d - 1));
+              int offset = PTR_TO_OFFSET (d - 1);
+              int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
              UPDATE_SYNTAX_TABLE (charpos);
 #endif
-              /* FIXME: This does a STRING_CHAR even for unibyte buffers.  */
              GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
              s1 = SYNTAX (c1);
 #ifdef emacs
              UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
 #endif
              PREFETCH ();
-              /* FIXME: This does a STRING_CHAR even for unibyte buffers.  */
+              c2 = RE_STRING_CHAR (d, dend - d);
-              c2 = STRING_CHAR (d, dend - d);
              s2 = SYNTAX (c2);
              if (/* Case 2: Only one of S1 and S2 is Sword.  */
@@ -5303,12 +5266,12 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
                 is the character at D, and S2 is the syntax of C2.  */
              int c1, c2, s1, s2;
 #ifdef emacs
-              int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d));
+              int offset = PTR_TO_OFFSET (d);
+              int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
              UPDATE_SYNTAX_TABLE (charpos);
 #endif
              PREFETCH ();
-              /* FIXME: This does a STRING_CHAR even for unibyte buffers.  */
+              c2 = RE_STRING_CHAR (d, dend - d);
-              c2 = STRING_CHAR (d, dend - d);
              s2 = SYNTAX (c2);
        
              /* Case 2: S2 is not Sword. */
@@ -5346,7 +5309,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
                 is the character at D, and S2 is the syntax of C2.  */
              int c1, c2, s1, s2;
 #ifdef emacs
-              int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d) - 1);
+              int offset = PTR_TO_OFFSET (d) - 1;
+              int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
              UPDATE_SYNTAX_TABLE (charpos);
 #endif
              GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
@@ -5360,8 +5324,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
              if (!AT_STRINGS_END (d))
                {
                  PREFETCH ();
-                  /* FIXME: This does a STRING_CHAR even for unibyte buffers.  */
+                  c2 = RE_STRING_CHAR (d, dend - d);
-                  c2 = STRING_CHAR (d, dend - d);
 #ifdef emacs
                  UPDATE_SYNTAX_TABLE_FORWARD (charpos);
 #endif
@@ -5383,20 +5346,15 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
          PREFETCH ();
 #ifdef emacs
          {
-            int pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d));
+            int offset = PTR_TO_OFFSET (d);
+            int pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
            UPDATE_SYNTAX_TABLE (pos1);
          }
 #endif
          {
            int c, len;
-            if (multibyte)
+            c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len);
-              /* we must concern about multibyte form, ... */
-              c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
-            else
-              /* everything should be handled as ASCII, even though it
-                 looks like multibyte form.  */
-              c = *d, len = 1;
            if ((SYNTAX (c) != (enum syntaxcode) mcnt) ^ not)
              goto fail;
@@ -5431,11 +5389,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
          PREFETCH ();
          {
            int c, len;
+            c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len);
-            if (multibyte)
-              c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
-            else
-              c = *d, len = 1;
            if ((!CHAR_HAS_CATEGORY (c, mcnt)) ^ not)
              goto fail;
@@ -5512,23 +5466,23 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
   bytes; nonzero otherwise.  */
 static int
-bcmp_translate (s1, s2, len, translate)
+bcmp_translate (s1, s2, len, translate, multibyte)
-     unsigned char *s1, *s2;
+     re_char *s1, *s2;
     register int len;
     RE_TRANSLATE_TYPE translate;
+     const int multibyte;
 {
-  register unsigned char *p1 = s1, *p2 = s2;
+  register re_char *p1 = s1, *p2 = s2;
-  unsigned char *p1_end = s1 + len;
+  re_char *p1_end = s1 + len;
-  unsigned char *p2_end = s2 + len;
+  re_char *p2_end = s2 + len;
  while (p1 != p1_end && p2 != p2_end)
    {
      int p1_charlen, p2_charlen;
      int p1_ch, p2_ch;
-      /* FIXME: This assumes `multibyte = true'.  */
+      p1_ch = RE_STRING_CHAR_AND_LENGTH (p1, p1_end - p1, p1_charlen);
-      p1_ch = STRING_CHAR_AND_LENGTH (p1, p1_end - p1, p1_charlen);
+      p2_ch = RE_STRING_CHAR_AND_LENGTH (p2, p2_end - p2, p2_charlen);
-      p2_ch = STRING_CHAR_AND_LENGTH (p2, p2_end - p2, p2_charlen);
      if (RE_TRANSLATE (translate, p1_ch)
          != RE_TRANSLATE (translate, p2_ch))
diff --git a/src/regex.h b/src/regex.h
index 9ebc4e0bf22..9ee2060a1ed 100644
--- a/src/regex.h
+++ b/src/regex.h
@@ -362,9 +362,11 @@ struct re_pattern_buffer
        /* If true, an anchor at a newline matches.  */
  unsigned newline_anchor : 1;
+#ifdef emacs
  /* If true, multi-byte form in the `buffer' should be recognized as a
     multibyte character. */
  unsigned multibyte : 1;
+#endif
 /* [[[end pattern_buffer]]] */
 };
author	Stefan Monnier	2000-04-02 23:56:46 +0000
committer	Stefan Monnier	2000-04-02 23:56:46 +0000
commit	2d1675e45c46d97aec4c6af28a0719778f79b8da (patch)
tree	11c6492fbe96211f75cc8f4ac37e7e0cde1538bb /src
parent	096540869a2be7676946ef9c4ee52e5cedb5c28a (diff)
download	emacs-2d1675e45c46d97aec4c6af28a0719778f79b8da.tar.gz emacs-2d1675e45c46d97aec4c6af28a0719778f79b8da.zip