1 files changed, 129 insertions, 94 deletions
diff --git a/src/search.c b/src/search.c
index 7e990f2bfd7..d86a7cca7b2 100644
--- a/src/search.c
+++ b/src/search.c
@@ -293,7 +293,7 @@ looking_at_1 (string, posix)
  CHECK_STRING (string);
  bufp = compile_pattern (string, &search_regs,
                          (!NILP (current_buffer->case_fold_search)
-                           ? DOWNCASE_TABLE : Qnil),
+                           ? current_buffer->case_canon_table : Qnil),
                          posix,
                          !NILP (current_buffer->enable_multibyte_characters));
@@ -399,7 +399,7 @@ string_match_1 (regexp, string, start, posix)
  bufp = compile_pattern (regexp, &search_regs,
                          (!NILP (current_buffer->case_fold_search)
-                           ? DOWNCASE_TABLE : Qnil),
+                           ? current_buffer->case_canon_table : Qnil),
                          posix,
                          STRING_MULTIBYTE (string));
  immediate_quit = 1;
@@ -499,7 +499,7 @@ fast_c_string_match_ignore_case (regexp, string)
  regexp = string_make_unibyte (regexp);
  re_match_object = Qt;
  bufp = compile_pattern (regexp, 0,
-                          Vascii_downcase_table, 0,
+                          Vascii_canon_table, 0,
                          0);
  immediate_quit = 1;
  val = re_search (bufp, string, len, 0, len, 0);
@@ -516,7 +516,7 @@ fast_string_match_ignore_case (regexp, string)
  int val;
  struct re_pattern_buffer *bufp;
-  bufp = compile_pattern (regexp, 0, Vascii_downcase_table,
+  bufp = compile_pattern (regexp, 0, Vascii_canon_table,
                          0, STRING_MULTIBYTE (string));
  immediate_quit = 1;
  re_match_object = string;
@@ -1175,7 +1175,9 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n,
      unsigned char *patbuf;
      int multibyte = !NILP (current_buffer->enable_multibyte_characters);
      unsigned char *base_pat = SDATA (string);
-      int charset_base = -1;
+      /* Set to nozero if we find a non-ASCII char that need
+         translation.  */
+      int charset_base = 0;
      int boyer_moore_ok = 1;
      /* MULTIBYTE says whether the text to be searched is multibyte.
@@ -1221,9 +1223,17 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n,
      base_pat = raw_pattern;
      if (multibyte)
        {
+          /* Fill patbuf by translated characters in STRING while
+             checking if we can use boyer-moore search.  If TRT is
+             non-nil, we can use boyer-moore search only if TRT can be
+             represented by the byte array of 256 elements.  For that,
+             all non-ASCII case-equivalents of all case-senstive
+             characters in STRING must belong to the same charset and
+             row.  */
          while (--len >= 0)
            {
-              unsigned char str[MAX_MULTIBYTE_LENGTH];
+              unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
              int c, translated, inverse;
              int in_charlen, charlen;
@@ -1233,50 +1243,62 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n,
              if (RE && *base_pat == '\\')
                {
                  len--;
+                  raw_pattern_size--;
                  len_byte--;
                  base_pat++;
                }
              c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
-              /* Translate the character, if requested.  */
+              if (NILP (trt))
-              TRANSLATE (translated, trt, c);
-              /* If translation changed the byte-length, go back
-                 to the original character.  */
-              charlen = CHAR_STRING (translated, str);
-              if (in_charlen != charlen)
                {
-                  translated = c;
+                  str = base_pat;
-                  charlen = CHAR_STRING (c, str);
+                  charlen = in_charlen;
                }
+              else
-              /* If we are searching for something strange,
-                 an invalid multibyte code, don't use boyer-moore.  */
-              if (! ASCII_BYTE_P (translated)
-                  && (charlen == 1 /* 8bit code */
-                      || charlen != in_charlen /* invalid multibyte code */
-                      ))
-                boyer_moore_ok = 0;
-              TRANSLATE (inverse, inverse_trt, c);
-              /* Did this char actually get translated?
-                 Would any other char get translated into it?  */
-              if (translated != c || inverse != c)
                {
-                  /* Keep track of which character set row
+                  /* Translate the character.  */
-                     contains the characters that need translation.  */
+                  TRANSLATE (translated, trt, c);
-                  int charset_base_code = c & ~CHAR_FIELD3_MASK;
+                  charlen = CHAR_STRING (translated, str_base);
-                  int inverse_charset_base = inverse & ~CHAR_FIELD3_MASK;
+                  str = str_base;
-                  if (charset_base_code != inverse_charset_base)
+                  /* Check if C has any other case-equivalents.  */
-                    boyer_moore_ok = 0;
+                  TRANSLATE (inverse, inverse_trt, c);
-                  else if (charset_base == -1)
+                  /* If so, check if we can use boyer-moore.  */
-                    charset_base = charset_base_code;
+                  if (c != inverse && boyer_moore_ok)
-                  else if (charset_base != charset_base_code)
+                    {
-                    /* If two different rows appear, needing translation,
+                      /* Check if all equivalents belong to the same
-                       then we cannot use boyer_moore search.  */
+                         charset & row.  Note that the check of C
-                    boyer_moore_ok = 0;
+                         itself is done by the last iteration.  Note
+                         also that we don't have to check ASCII
+                         characters because boyer-moore search can
+                         always handle their translation.  */
+                      while (1)
+                        {
+                          if (! ASCII_BYTE_P (inverse))
+                            {
+                              if (SINGLE_BYTE_CHAR_P (inverse))
+                                {
+                                  /* Boyer-moore search can't handle a
+                                     translation of an eight-bit
+                                     character.  */
+                                  boyer_moore_ok = 0;
+                                  break;
+                                }
+                              else if (charset_base == 0)
+                                charset_base = inverse & ~CHAR_FIELD3_MASK;
+                              else if ((inverse & ~CHAR_FIELD3_MASK)
+                                       != charset_base)
+                                {
+                                  boyer_moore_ok = 0;
+                                  break;
+                                }
+                            }
+                          if (c == inverse)
+                            break;
+                          TRANSLATE (inverse, inverse_trt, inverse);
+                        }
+                    }
                }
              /* Store this character into the translated pattern.  */
@@ -1300,6 +1322,7 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n,
              if (RE && *base_pat == '\\')
                {
                  len--;
+                  raw_pattern_size--;
                  base_pat++;
                }
              c = *base_pat++;
@@ -1533,16 +1556,18 @@ simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
    return n;
 }
-/* Do Boyer-Moore search N times for the string PAT,
+/* Do Boyer-Moore search N times for the string BASE_PAT,
   whose length is LEN/LEN_BYTE,
   from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
   DIRECTION says which direction we search in.
   TRT and INVERSE_TRT are translation tables.
+   Characters in PAT are already translated by TRT.
-   This kind of search works if all the characters in PAT that have
+   This kind of search works if all the characters in BASE_PAT that
-   nontrivial translation are the same aside from the last byte.  This
+   have nontrivial translation are the same aside from the last byte.
-   makes it possible to translate just the last byte of a character,
+   This makes it possible to translate just the last byte of a
-   and do so after just a simple test of the context.
+   character, and do so after just a simple test of the context.
+   CHARSET_BASE is nonzero iff there is such a non-ASCII character.
   If that criterion is not satisfied, do not call this function.  */
@@ -1569,8 +1594,13 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
  int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
  unsigned char simple_translate[0400];
-  int translate_prev_byte = 0;
+  /* These are set to the preceding bytes of a byte to be translated
-  int translate_anteprev_byte = 0;
+     if charset_base is nonzero.  As the maximum byte length of a
+     multibyte character is 4, we have to check at most three previous
+     bytes.  */
+  int translate_prev_byte1 = 0;
+  int translate_prev_byte2 = 0;
+  int translate_prev_byte3 = 0;
 #ifdef C_ALLOCA
  int BM_tab_space[0400];
@@ -1636,6 +1666,23 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
  for (i = 0; i < 0400; i++)
    simple_translate[i] = i;
+  if (charset_base)
+    {
+      /* Setup translate_prev_byte1/2/3 from CHARSET_BASE.  Only a
+         byte following them are the target of translation.  */
+      int sample_char = charset_base | 0x20;
+      unsigned char str[MAX_MULTIBYTE_LENGTH];
+      int len = CHAR_STRING (sample_char, str);
+      translate_prev_byte1 = str[len - 2];
+      if (len > 2)
+        {
+          translate_prev_byte2 = str[len - 3];
+          if (len > 3)
+            translate_prev_byte3 = str[len - 4];
+        }
+    }
  i = 0;
  while (i != infinity)
    {
@@ -1645,57 +1692,37 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
        i = infinity;
      if (! NILP (trt))
        {
-          int ch;
+          /* If the byte currently looking at is a head of a character
-          int untranslated;
+             to check case-equivalents, set CH to that character.  An
-          int this_translated = 1;
+             ASCII character and a non-ASCII character matching with
+             CHARSET_BASE are to be checked.  */
-          if (multibyte
+          int ch = -1;
-              /* Is *PTR the last byte of a character?  */
-              && (pat_end - ptr == 1 || CHAR_HEAD_P (ptr[1])))
+          if (ASCII_BYTE_P (*ptr) || ! multibyte)
+            ch = *ptr;
+          else if (charset_base && CHAR_HEAD_P (*ptr))
            {
-              unsigned char *charstart = ptr;
+              ch = STRING_CHAR (ptr, pat_end - ptr);
-              while (! CHAR_HEAD_P (*charstart))
+              if (charset_base != (ch & ~CHAR_FIELD3_MASK))
-                charstart--;
+                ch = -1;
-              untranslated = STRING_CHAR (charstart, ptr - charstart + 1);
-              if (charset_base == (untranslated & ~CHAR_FIELD3_MASK))
-                {
-                  TRANSLATE (ch, trt, untranslated);
-                  if (! CHAR_HEAD_P (*ptr))
-                    {
-                      translate_prev_byte = ptr[-1];
-                      if (! CHAR_HEAD_P (translate_prev_byte))
-                        translate_anteprev_byte = ptr[-2];
-                    }
-                }
-              else
-                {
-                  this_translated = 0;
-                  ch = *ptr;
-                }
            }
-          else if (!multibyte)
-            TRANSLATE (ch, trt, *ptr);
-          else
-            {
-              ch = *ptr;
-              this_translated = 0;
-            }
-          if (ch > 0400)
-            j = ((unsigned char) ch) | 0200;
-          else
-            j = (unsigned char) ch;
+          j = *ptr;
          if (i == infinity)
            stride_for_teases = BM_tab[j];
          BM_tab[j] = dirlen - i;
          /* A translation table is accompanied by its inverse -- see */
          /* comment following downcase_table for details */
-          if (this_translated)
+          if (ch >= 0)
            {
              int starting_ch = ch;
-              int starting_j = j;
+              int starting_j;
+              if (ch > 0400)
+                starting_j = ((unsigned char) ch) | 0200;
+              else
+                starting_j = (unsigned char) ch;
              while (1)
                {
                  TRANSLATE (ch, inverse_trt, ch);
@@ -1821,9 +1848,13 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
                          || ((cursor == tail_end_ptr
                               || CHAR_HEAD_P (cursor[1]))
                              && (CHAR_HEAD_P (cursor[0])
-                                  || (translate_prev_byte == cursor[-1]
+                                  /* Check if this is the last byte of
-                                      && (CHAR_HEAD_P (translate_prev_byte)
+                                     a translable character.  */
-                                          || translate_anteprev_byte == cursor[-2])))))
+                                  || (translate_prev_byte1 == cursor[-1]
+                                      && (CHAR_HEAD_P (translate_prev_byte1)
+                                          || (translate_prev_byte2 == cursor[-2]
+                                              && (CHAR_HEAD_P (translate_prev_byte2)
+                                                  || (translate_prev_byte3 == cursor[-3]))))))))
                        ch = simple_translate[*cursor];
                      else
                        ch = *cursor;
@@ -1901,9 +1932,13 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
                      || ((ptr == tail_end_ptr
                           || CHAR_HEAD_P (ptr[1]))
                          && (CHAR_HEAD_P (ptr[0])
-                              || (translate_prev_byte == ptr[-1]
+                              /* Check if this is the last byte of a
-                                  && (CHAR_HEAD_P (translate_prev_byte)
+                                 translable character.  */
-                                      || translate_anteprev_byte == ptr[-2])))))
+                              || (translate_prev_byte1 == ptr[-1]
+                                  && (CHAR_HEAD_P (translate_prev_byte1)
+                                      || (translate_prev_byte2 == ptr[-2]
+                                          && (CHAR_HEAD_P (translate_prev_byte2)
+                                              || translate_prev_byte3 == ptr[-3])))))))
                    ch = simple_translate[*ptr];
                  else
                    ch = *ptr;

diff --git a/src/search.c b/src/search.c index 7e990f2bfd7..d86a7cca7b2 100644 --- a/src/search.c +++ b/src/search.c
@@ -293,7 +293,7 @@ looking_at_1 (string, posix)
293	CHECK_STRING (string);	293	CHECK_STRING (string);
294	bufp = compile_pattern (string, &search_regs,	294	bufp = compile_pattern (string, &search_regs,
295	(!NILP (current_buffer->case_fold_search)	295	(!NILP (current_buffer->case_fold_search)
296	? DOWNCASE_TABLE : Qnil),	296	? current_buffer->case_canon_table : Qnil),
297	posix,	297	posix,
298	!NILP (current_buffer->enable_multibyte_characters));	298	!NILP (current_buffer->enable_multibyte_characters));
299		299
@@ -399,7 +399,7 @@ string_match_1 (regexp, string, start, posix)
399		399
400	bufp = compile_pattern (regexp, &search_regs,	400	bufp = compile_pattern (regexp, &search_regs,
401	(!NILP (current_buffer->case_fold_search)	401	(!NILP (current_buffer->case_fold_search)
402	? DOWNCASE_TABLE : Qnil),	402	? current_buffer->case_canon_table : Qnil),
403	posix,	403	posix,
404	STRING_MULTIBYTE (string));	404	STRING_MULTIBYTE (string));
405	immediate_quit = 1;	405	immediate_quit = 1;
@@ -499,7 +499,7 @@ fast_c_string_match_ignore_case (regexp, string)
499	regexp = string_make_unibyte (regexp);	499	regexp = string_make_unibyte (regexp);
500	re_match_object = Qt;	500	re_match_object = Qt;
501	bufp = compile_pattern (regexp, 0,	501	bufp = compile_pattern (regexp, 0,
502	Vascii_downcase_table, 0,	502	Vascii_canon_table, 0,
503	0);	503	0);
504	immediate_quit = 1;	504	immediate_quit = 1;
505	val = re_search (bufp, string, len, 0, len, 0);	505	val = re_search (bufp, string, len, 0, len, 0);
@@ -516,7 +516,7 @@ fast_string_match_ignore_case (regexp, string)
516	int val;	516	int val;
517	struct re_pattern_buffer *bufp;	517	struct re_pattern_buffer *bufp;
518		518
519	bufp = compile_pattern (regexp, 0, Vascii_downcase_table,	519	bufp = compile_pattern (regexp, 0, Vascii_canon_table,
520	0, STRING_MULTIBYTE (string));	520	0, STRING_MULTIBYTE (string));
521	immediate_quit = 1;	521	immediate_quit = 1;
522	re_match_object = string;	522	re_match_object = string;
@@ -1175,7 +1175,9 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1175	unsigned char *patbuf;	1175	unsigned char *patbuf;
1176	int multibyte = !NILP (current_buffer->enable_multibyte_characters);	1176	int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1177	unsigned char *base_pat = SDATA (string);	1177	unsigned char *base_pat = SDATA (string);
1178	int charset_base = -1;	1178	/* Set to nozero if we find a non-ASCII char that need
		1179	translation. */
		1180	int charset_base = 0;
1179	int boyer_moore_ok = 1;	1181	int boyer_moore_ok = 1;
1180		1182
1181	/* MULTIBYTE says whether the text to be searched is multibyte.	1183	/* MULTIBYTE says whether the text to be searched is multibyte.
@@ -1221,9 +1223,17 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1221	base_pat = raw_pattern;	1223	base_pat = raw_pattern;
1222	if (multibyte)	1224	if (multibyte)
1223	{	1225	{
		1226	/* Fill patbuf by translated characters in STRING while
		1227	checking if we can use boyer-moore search. If TRT is
		1228	non-nil, we can use boyer-moore search only if TRT can be
		1229	represented by the byte array of 256 elements. For that,
		1230	all non-ASCII case-equivalents of all case-senstive
		1231	characters in STRING must belong to the same charset and
		1232	row. */
		1233
1224	while (--len >= 0)	1234	while (--len >= 0)
1225	{	1235	{
1226	unsigned char str[MAX_MULTIBYTE_LENGTH];	1236	unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1227	int c, translated, inverse;	1237	int c, translated, inverse;
1228	int in_charlen, charlen;	1238	int in_charlen, charlen;
1229		1239
@@ -1233,50 +1243,62 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1233	if (RE && *base_pat == '\\')	1243	if (RE && *base_pat == '\\')
1234	{	1244	{
1235	len--;	1245	len--;
		1246	raw_pattern_size--;
1236	len_byte--;	1247	len_byte--;
1237	base_pat++;	1248	base_pat++;
1238	}	1249	}
1239		1250
1240	c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);	1251	c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1241		1252
1242	/* Translate the character, if requested. */	1253	if (NILP (trt))
1243	TRANSLATE (translated, trt, c);
1244	/* If translation changed the byte-length, go back
1245	to the original character. */
1246	charlen = CHAR_STRING (translated, str);
1247	if (in_charlen != charlen)
1248	{	1254	{
1249	translated = c;	1255	str = base_pat;
1250	charlen = CHAR_STRING (c, str);	1256	charlen = in_charlen;
1251	}	1257	}
1252		1258	else
1253	/* If we are searching for something strange,
1254	an invalid multibyte code, don't use boyer-moore. */
1255	if (! ASCII_BYTE_P (translated)
1256	&& (charlen == 1 /* 8bit code */
1257	\|\| charlen != in_charlen /* invalid multibyte code */
1258	))
1259	boyer_moore_ok = 0;
1260
1261	TRANSLATE (inverse, inverse_trt, c);
1262
1263	/* Did this char actually get translated?
1264	Would any other char get translated into it? */
1265	if (translated != c \|\| inverse != c)
1266	{	1259	{
1267	/* Keep track of which character set row	1260	/* Translate the character. */
1268	contains the characters that need translation. */	1261	TRANSLATE (translated, trt, c);
1269	int charset_base_code = c & ~CHAR_FIELD3_MASK;	1262	charlen = CHAR_STRING (translated, str_base);
1270	int inverse_charset_base = inverse & ~CHAR_FIELD3_MASK;	1263	str = str_base;
1271		1264
1272	if (charset_base_code != inverse_charset_base)	1265	/* Check if C has any other case-equivalents. */
1273	boyer_moore_ok = 0;	1266	TRANSLATE (inverse, inverse_trt, c);
1274	else if (charset_base == -1)	1267	/* If so, check if we can use boyer-moore. */
1275	charset_base = charset_base_code;	1268	if (c != inverse && boyer_moore_ok)
1276	else if (charset_base != charset_base_code)	1269	{
1277	/* If two different rows appear, needing translation,	1270	/* Check if all equivalents belong to the same
1278	then we cannot use boyer_moore search. */	1271	charset & row. Note that the check of C
1279	boyer_moore_ok = 0;	1272	itself is done by the last iteration. Note
		1273	also that we don't have to check ASCII
		1274	characters because boyer-moore search can
		1275	always handle their translation. */
		1276	while (1)
		1277	{
		1278	if (! ASCII_BYTE_P (inverse))
		1279	{
		1280	if (SINGLE_BYTE_CHAR_P (inverse))
		1281	{
		1282	/* Boyer-moore search can't handle a
		1283	translation of an eight-bit
		1284	character. */
		1285	boyer_moore_ok = 0;
		1286	break;
		1287	}
		1288	else if (charset_base == 0)
		1289	charset_base = inverse & ~CHAR_FIELD3_MASK;
		1290	else if ((inverse & ~CHAR_FIELD3_MASK)
		1291	!= charset_base)
		1292	{
		1293	boyer_moore_ok = 0;
		1294	break;
		1295	}
		1296	}
		1297	if (c == inverse)
		1298	break;
		1299	TRANSLATE (inverse, inverse_trt, inverse);
		1300	}
		1301	}
1280	}	1302	}
1281		1303
1282	/* Store this character into the translated pattern. */	1304	/* Store this character into the translated pattern. */
@@ -1300,6 +1322,7 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1300	if (RE && *base_pat == '\\')	1322	if (RE && *base_pat == '\\')
1301	{	1323	{
1302	len--;	1324	len--;
		1325	raw_pattern_size--;
1303	base_pat++;	1326	base_pat++;
1304	}	1327	}
1305	c = *base_pat++;	1328	c = *base_pat++;
@@ -1533,16 +1556,18 @@ simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1533	return n;	1556	return n;
1534	}	1557	}
1535		1558
1536	/* Do Boyer-Moore search N times for the string PAT,	1559	/* Do Boyer-Moore search N times for the string BASE_PAT,
1537	whose length is LEN/LEN_BYTE,	1560	whose length is LEN/LEN_BYTE,
1538	from buffer position POS/POS_BYTE until LIM/LIM_BYTE.	1561	from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1539	DIRECTION says which direction we search in.	1562	DIRECTION says which direction we search in.
1540	TRT and INVERSE_TRT are translation tables.	1563	TRT and INVERSE_TRT are translation tables.
		1564	Characters in PAT are already translated by TRT.
1541		1565
1542	This kind of search works if all the characters in PAT that have	1566	This kind of search works if all the characters in BASE_PAT that
1543	nontrivial translation are the same aside from the last byte. This	1567	have nontrivial translation are the same aside from the last byte.
1544	makes it possible to translate just the last byte of a character,	1568	This makes it possible to translate just the last byte of a
1545	and do so after just a simple test of the context.	1569	character, and do so after just a simple test of the context.
		1570	CHARSET_BASE is nonzero iff there is such a non-ASCII character.
1546		1571
1547	If that criterion is not satisfied, do not call this function. */	1572	If that criterion is not satisfied, do not call this function. */
1548		1573
@@ -1569,8 +1594,13 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1569	int multibyte = ! NILP (current_buffer->enable_multibyte_characters);	1594	int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1570		1595
1571	unsigned char simple_translate[0400];	1596	unsigned char simple_translate[0400];
1572	int translate_prev_byte = 0;	1597	/* These are set to the preceding bytes of a byte to be translated
1573	int translate_anteprev_byte = 0;	1598	if charset_base is nonzero. As the maximum byte length of a
		1599	multibyte character is 4, we have to check at most three previous
		1600	bytes. */
		1601	int translate_prev_byte1 = 0;
		1602	int translate_prev_byte2 = 0;
		1603	int translate_prev_byte3 = 0;
1574		1604
1575	#ifdef C_ALLOCA	1605	#ifdef C_ALLOCA
1576	int BM_tab_space[0400];	1606	int BM_tab_space[0400];
@@ -1636,6 +1666,23 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1636	for (i = 0; i < 0400; i++)	1666	for (i = 0; i < 0400; i++)
1637	simple_translate[i] = i;	1667	simple_translate[i] = i;
1638		1668
		1669	if (charset_base)
		1670	{
		1671	/* Setup translate_prev_byte1/2/3 from CHARSET_BASE. Only a
		1672	byte following them are the target of translation. */
		1673	int sample_char = charset_base \| 0x20;
		1674	unsigned char str[MAX_MULTIBYTE_LENGTH];
		1675	int len = CHAR_STRING (sample_char, str);
		1676
		1677	translate_prev_byte1 = str[len - 2];
		1678	if (len > 2)
		1679	{
		1680	translate_prev_byte2 = str[len - 3];
		1681	if (len > 3)
		1682	translate_prev_byte3 = str[len - 4];
		1683	}
		1684	}
		1685
1639	i = 0;	1686	i = 0;
1640	while (i != infinity)	1687	while (i != infinity)
1641	{	1688	{
@@ -1645,57 +1692,37 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1645	i = infinity;	1692	i = infinity;
1646	if (! NILP (trt))	1693	if (! NILP (trt))
1647	{	1694	{
1648	int ch;	1695	/* If the byte currently looking at is a head of a character
1649	int untranslated;	1696	to check case-equivalents, set CH to that character. An
1650	int this_translated = 1;	1697	ASCII character and a non-ASCII character matching with
1651		1698	CHARSET_BASE are to be checked. */
1652	if (multibyte	1699	int ch = -1;
1653	/* Is PTR the last byte of a character? /	1700
1654	&& (pat_end - ptr == 1 \|\| CHAR_HEAD_P (ptr[1])))	1701	if (ASCII_BYTE_P (*ptr) \|\| ! multibyte)
		1702	ch = *ptr;
		1703	else if (charset_base && CHAR_HEAD_P (*ptr))
1655	{	1704	{
1656	unsigned char *charstart = ptr;	1705	ch = STRING_CHAR (ptr, pat_end - ptr);
1657	while (! CHAR_HEAD_P (*charstart))	1706	if (charset_base != (ch & ~CHAR_FIELD3_MASK))
1658	charstart--;	1707	ch = -1;
1659	untranslated = STRING_CHAR (charstart, ptr - charstart + 1);
1660	if (charset_base == (untranslated & ~CHAR_FIELD3_MASK))
1661	{
1662	TRANSLATE (ch, trt, untranslated);
1663	if (! CHAR_HEAD_P (*ptr))
1664	{
1665	translate_prev_byte = ptr[-1];
1666	if (! CHAR_HEAD_P (translate_prev_byte))
1667	translate_anteprev_byte = ptr[-2];
1668	}
1669	}
1670	else
1671	{
1672	this_translated = 0;
1673	ch = *ptr;
1674	}
1675	}	1708	}
1676	else if (!multibyte)
1677	TRANSLATE (ch, trt, *ptr);
1678	else
1679	{
1680	ch = *ptr;
1681	this_translated = 0;
1682	}
1683
1684	if (ch > 0400)
1685	j = ((unsigned char) ch) \| 0200;
1686	else
1687	j = (unsigned char) ch;
1688		1709
		1710	j = *ptr;
1689	if (i == infinity)	1711	if (i == infinity)
1690	stride_for_teases = BM_tab[j];	1712	stride_for_teases = BM_tab[j];
1691		1713
1692	BM_tab[j] = dirlen - i;	1714	BM_tab[j] = dirlen - i;
1693	/* A translation table is accompanied by its inverse -- see */	1715	/* A translation table is accompanied by its inverse -- see */
1694	/* comment following downcase_table for details */	1716	/* comment following downcase_table for details */
1695	if (this_translated)	1717	if (ch >= 0)
1696	{	1718	{
1697	int starting_ch = ch;	1719	int starting_ch = ch;
1698	int starting_j = j;	1720	int starting_j;
		1721
		1722	if (ch > 0400)
		1723	starting_j = ((unsigned char) ch) \| 0200;
		1724	else
		1725	starting_j = (unsigned char) ch;
1699	while (1)	1726	while (1)
1700	{	1727	{
1701	TRANSLATE (ch, inverse_trt, ch);	1728	TRANSLATE (ch, inverse_trt, ch);
@@ -1821,9 +1848,13 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1821	\|\| ((cursor == tail_end_ptr	1848	\|\| ((cursor == tail_end_ptr
1822	\|\| CHAR_HEAD_P (cursor[1]))	1849	\|\| CHAR_HEAD_P (cursor[1]))
1823	&& (CHAR_HEAD_P (cursor[0])	1850	&& (CHAR_HEAD_P (cursor[0])
1824	\|\| (translate_prev_byte == cursor[-1]	1851	/* Check if this is the last byte of
1825	&& (CHAR_HEAD_P (translate_prev_byte)	1852	a translable character. */
1826	\|\| translate_anteprev_byte == cursor[-2])))))	1853	\|\| (translate_prev_byte1 == cursor[-1]
		1854	&& (CHAR_HEAD_P (translate_prev_byte1)
		1855	\|\| (translate_prev_byte2 == cursor[-2]
		1856	&& (CHAR_HEAD_P (translate_prev_byte2)
		1857	\|\| (translate_prev_byte3 == cursor[-3]))))))))
1827	ch = simple_translate[*cursor];	1858	ch = simple_translate[*cursor];
1828	else	1859	else
1829	ch = *cursor;	1860	ch = *cursor;
@@ -1901,9 +1932,13 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1901	\|\| ((ptr == tail_end_ptr	1932	\|\| ((ptr == tail_end_ptr
1902	\|\| CHAR_HEAD_P (ptr[1]))	1933	\|\| CHAR_HEAD_P (ptr[1]))
1903	&& (CHAR_HEAD_P (ptr[0])	1934	&& (CHAR_HEAD_P (ptr[0])
1904	\|\| (translate_prev_byte == ptr[-1]	1935	/* Check if this is the last byte of a
1905	&& (CHAR_HEAD_P (translate_prev_byte)	1936	translable character. */
1906	\|\| translate_anteprev_byte == ptr[-2])))))	1937	\|\| (translate_prev_byte1 == ptr[-1]
		1938	&& (CHAR_HEAD_P (translate_prev_byte1)
		1939	\|\| (translate_prev_byte2 == ptr[-2]
		1940	&& (CHAR_HEAD_P (translate_prev_byte2)
		1941	\|\| translate_prev_byte3 == ptr[-3])))))))
1907	ch = simple_translate[*ptr];	1942	ch = simple_translate[*ptr];
1908	else	1943	else
1909	ch = *ptr;	1944	ch = *ptr;