aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorStefan Monnier2000-04-02 23:56:46 +0000
committerStefan Monnier2000-04-02 23:56:46 +0000
commit2d1675e45c46d97aec4c6af28a0719778f79b8da (patch)
tree11c6492fbe96211f75cc8f4ac37e7e0cde1538bb /src
parent096540869a2be7676946ef9c4ee52e5cedb5c28a (diff)
downloademacs-2d1675e45c46d97aec4c6af28a0719778f79b8da.tar.gz
emacs-2d1675e45c46d97aec4c6af28a0719778f79b8da.zip
* regex.c (PTR_TO_OFFSET) [!emacs]: Remove.
(RE_MULTIBYTE_P, RE_STRING_CHAR_AND_LENGTH): New macros. (GET_CHAR_BEFORE_2): Moved from charset.h plus fixed minor bug when we are between str1 and str2. (MAX_MULTIBYTE_LENGTH, CHAR_STRING) [!emacs]: Provide trivial default. (PATFETCH): Use `TRANSLATE'. (PATFETCH_RAW): Fetch multibyte char if applicable. (PATUNFETCH): Remove. (regex_compile): Rely on PATFETCH to do most of the multibyte magic. When writing a char, write it directly into the pattern buffer rather than going needlessly through a temp char-array. (re_match_2_internal): Similarly, rely on RE_STRING_CHAR to do the multibyte magic and remove the useless `#ifdef emacs'. (bcmp_translate): Don't compare as multibyte chars when in a unibyte buffer. * regex.h (struct re_pattern_buffer): Make field `multibyte' conditional on `emacs'. * charset.h (GET_CHAR_BEFORE_2): Moved to regex.c.
Diffstat (limited to 'src')
-rw-r--r--src/ChangeLog23
-rw-r--r--src/charset.h12
-rw-r--r--src/regex.c216
-rw-r--r--src/regex.h2
4 files changed, 110 insertions, 143 deletions
diff --git a/src/ChangeLog b/src/ChangeLog
index de883b830a4..9f3f20bbb3e 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,26 @@
12000-04-02 Stefan Monnier <monnier@cs.yale.edu>
2
3 * regex.c (PTR_TO_OFFSET) [!emacs]: Remove.
4 (RE_MULTIBYTE_P, RE_STRING_CHAR_AND_LENGTH): New macros.
5 (GET_CHAR_BEFORE_2): Moved from charset.h plus fixed minor bug when
6 we are between str1 and str2.
7 (MAX_MULTIBYTE_LENGTH, CHAR_STRING) [!emacs]: Provide trivial default.
8 (PATFETCH): Use `TRANSLATE'.
9 (PATFETCH_RAW): Fetch multibyte char if applicable.
10 (PATUNFETCH): Remove.
11 (regex_compile): Rely on PATFETCH to do most of the multibyte magic.
12 When writing a char, write it directly into the pattern buffer rather
13 than going needlessly through a temp char-array.
14 (re_match_2_internal): Similarly, rely on RE_STRING_CHAR to do the
15 multibyte magic and remove the useless `#ifdef emacs'.
16 (bcmp_translate): Don't compare as multibyte chars when in a unibyte
17 buffer.
18
19 * regex.h (struct re_pattern_buffer): Make field `multibyte'
20 conditional on `emacs'.
21
22 * charset.h (GET_CHAR_BEFORE_2): Moved to regex.c.
23
12000-04-01 Ken Raeburn <raeburn@gnu.org> 242000-04-01 Ken Raeburn <raeburn@gnu.org>
2 25
3 * alloc.c (MARK_STRING, UNMARK_STRING, STRING_MARKED_P): Expand 26 * alloc.c (MARK_STRING, UNMARK_STRING, STRING_MARKED_P): Expand
diff --git a/src/charset.h b/src/charset.h
index 3acc447c5f8..d9257b8955a 100644
--- a/src/charset.h
+++ b/src/charset.h
@@ -577,18 +577,6 @@ else
577 ? 1 \ 577 ? 1 \
578 : multibyte_form_length (str, len)) 578 : multibyte_form_length (str, len))
579 579
580/* Set C a (possibly multibyte) character before P. P points into a
581 string which is the virtual concatenation of STR1 (which ends at
582 END1) or STR2 (which ends at END2). */
583
584#define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
585 do { \
586 const unsigned char *dtemp = (p); \
587 const unsigned char *dlimit = ((p) > (str2) && (p) <= (end2)) ? (str2) : (str1); \
588 while (dtemp-- > dlimit && *dtemp >= 0xA0); \
589 c = STRING_CHAR (dtemp, p - dtemp); \
590 } while (0)
591
592#ifdef emacs 580#ifdef emacs
593 581
594/* Increase the buffer byte position POS_BYTE of the current buffer to 582/* Increase the buffer byte position POS_BYTE of the current buffer to
diff --git a/src/regex.c b/src/regex.c
index 911daed209d..9a56db728e5 100644
--- a/src/regex.c
+++ b/src/regex.c
@@ -20,7 +20,6 @@
20 USA. */ 20 USA. */
21 21
22/* TODO: 22/* TODO:
23 - clean up multibyte issues
24 - structure the opcode space into opcode+flag. 23 - structure the opcode space into opcode+flag.
25 - merge with glibc's regex.[ch] 24 - merge with glibc's regex.[ch]
26 */ 25 */
@@ -37,8 +36,6 @@
37/* Converts the pointer to the char to BEG-based offset from the start. */ 36/* Converts the pointer to the char to BEG-based offset from the start. */
38#define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d)) 37#define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d))
39#define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object))) 38#define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object)))
40#else
41#define PTR_TO_OFFSET(d) 0
42#endif 39#endif
43 40
44#ifdef HAVE_CONFIG_H 41#ifdef HAVE_CONFIG_H
@@ -79,8 +76,28 @@
79#define realloc xrealloc 76#define realloc xrealloc
80#define free xfree 77#define free xfree
81 78
79#define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte)
82#define RE_STRING_CHAR(p, s) \ 80#define RE_STRING_CHAR(p, s) \
83 (multibyte ? (STRING_CHAR (p, s)) : (*(p))) 81 (multibyte ? (STRING_CHAR (p, s)) : (*(p)))
82#define RE_STRING_CHAR_AND_LENGTH(p, s, len) \
83 (multibyte ? (STRING_CHAR_AND_LENGTH (p, s, len)) : ((len) = 1, *(p)))
84
85/* Set C a (possibly multibyte) character before P. P points into a
86 string which is the virtual concatenation of STR1 (which ends at
87 END1) or STR2 (which ends at END2). */
88#define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
89 do { \
90 if (multibyte) \
91 { \
92 re_char *dtemp = (p) == (str2) ? (end1) : (p); \
93 re_char *dlimit = ((p) > (str2) && (p) <= (end2)) ? (str2) : (str1); \
94 while (dtemp-- > dlimit && !CHAR_HEAD_P (*dtemp)); \
95 c = STRING_CHAR (dtemp, (p) - dtemp); \
96 } \
97 else \
98 (c = ((p) == (str2) ? (end1) : (p))[-1]); \
99 } while (0)
100
84 101
85#else /* not emacs */ 102#else /* not emacs */
86 103
@@ -181,6 +198,8 @@ init_syntax_once ()
181#define BASE_LEADING_CODE_P(c) (0) 198#define BASE_LEADING_CODE_P(c) (0)
182#define CHAR_CHARSET(c) 0 199#define CHAR_CHARSET(c) 0
183#define CHARSET_LEADING_CODE_BASE(c) 0 200#define CHARSET_LEADING_CODE_BASE(c) 0
201#define MAX_MULTIBYTE_LENGTH 1
202#define RE_MULTIBYTE_P(x) 0
184#define WORD_BOUNDARY_P(c1, c2) (0) 203#define WORD_BOUNDARY_P(c1, c2) (0)
185#define CHAR_HEAD_P(p) (1) 204#define CHAR_HEAD_P(p) (1)
186#define SINGLE_BYTE_CHAR_P(c) (1) 205#define SINGLE_BYTE_CHAR_P(c) (1)
@@ -188,7 +207,9 @@ init_syntax_once ()
188#define MULTIBYTE_FORM_LENGTH(p, s) (1) 207#define MULTIBYTE_FORM_LENGTH(p, s) (1)
189#define STRING_CHAR(p, s) (*(p)) 208#define STRING_CHAR(p, s) (*(p))
190#define RE_STRING_CHAR STRING_CHAR 209#define RE_STRING_CHAR STRING_CHAR
210#define CHAR_STRING(c, s) (*(s) = (c), 1)
191#define STRING_CHAR_AND_LENGTH(p, s, actual_len) ((actual_len) = 1, *(p)) 211#define STRING_CHAR_AND_LENGTH(p, s, actual_len) ((actual_len) = 1, *(p))
212#define RE_STRING_CHAR_AND_LENGTH STRING_CHAR_AND_LENGTH
192#define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \ 213#define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
193 (c = ((p) == (str2) ? *((end1) - 1) : *((p) - 1))) 214 (c = ((p) == (str2) ? *((end1) - 1) : *((p) - 1)))
194#endif /* not emacs */ 215#endif /* not emacs */
@@ -1550,19 +1571,19 @@ static int analyse_first _RE_ARGS((unsigned char *p, unsigned char *pend,
1550#define PATFETCH(c) \ 1571#define PATFETCH(c) \
1551 do { \ 1572 do { \
1552 PATFETCH_RAW (c); \ 1573 PATFETCH_RAW (c); \
1553 if (RE_TRANSLATE_P (translate)) c = RE_TRANSLATE (translate, c); \ 1574 c = TRANSLATE (c); \
1554 } while (0) 1575 } while (0)
1555 1576
1556/* Fetch the next character in the uncompiled pattern, with no 1577/* Fetch the next character in the uncompiled pattern, with no
1557 translation. */ 1578 translation. */
1558#define PATFETCH_RAW(c) \ 1579#define PATFETCH_RAW(c) \
1559 do {if (p == pend) return REG_EEND; \ 1580 do { \
1560 c = *p++; \ 1581 int len; \
1582 if (p == pend) return REG_EEND; \
1583 c = RE_STRING_CHAR_AND_LENGTH (p, pend - p, len); \
1584 p += len; \
1561 } while (0) 1585 } while (0)
1562 1586
1563/* Go backwards one character in the pattern. */
1564#define PATUNFETCH p--
1565
1566 1587
1567/* If `translate' is non-null, return translate[D], else just D. We 1588/* If `translate' is non-null, return translate[D], else just D. We
1568 cast the subscript to translate because some data is declared as 1589 cast the subscript to translate because some data is declared as
@@ -1957,6 +1978,9 @@ regex_compile (pattern, size, syntax, bufp)
1957 /* Work area for range table of charset. */ 1978 /* Work area for range table of charset. */
1958 struct range_table_work_area range_table_work; 1979 struct range_table_work_area range_table_work;
1959 1980
1981 /* If the object matched can contain multibyte characters. */
1982 const boolean multibyte = RE_MULTIBYTE_P (bufp);
1983
1960#ifdef DEBUG 1984#ifdef DEBUG
1961 debug++; 1985 debug++;
1962 DEBUG_PRINT1 ("\nCompiling pattern: "); 1986 DEBUG_PRINT1 ("\nCompiling pattern: ");
@@ -1994,14 +2018,6 @@ regex_compile (pattern, size, syntax, bufp)
1994 /* Always count groups, whether or not bufp->no_sub is set. */ 2018 /* Always count groups, whether or not bufp->no_sub is set. */
1995 bufp->re_nsub = 0; 2019 bufp->re_nsub = 0;
1996 2020
1997#ifdef emacs
1998 /* bufp->multibyte is set before regex_compile is called, so don't alter
1999 it. */
2000#else /* not emacs */
2001 /* Nothing is recognized as a multibyte character. */
2002 bufp->multibyte = 0;
2003#endif
2004
2005#if !defined (emacs) && !defined (SYNTAX_TABLE) 2021#if !defined (emacs) && !defined (SYNTAX_TABLE)
2006 /* Initialize the syntax table. */ 2022 /* Initialize the syntax table. */
2007 init_syntax_once (); 2023 init_syntax_once ();
@@ -2254,8 +2270,8 @@ regex_compile (pattern, size, syntax, bufp)
2254 /* Read in characters and ranges, setting map bits. */ 2270 /* Read in characters and ranges, setting map bits. */
2255 for (;;) 2271 for (;;)
2256 { 2272 {
2257 int len;
2258 boolean escaped_char = false; 2273 boolean escaped_char = false;
2274 const unsigned char *p2 = p;
2259 2275
2260 if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 2276 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2261 2277
@@ -2274,19 +2290,10 @@ regex_compile (pattern, size, syntax, bufp)
2274 /* Could be the end of the bracket expression. If it's 2290 /* Could be the end of the bracket expression. If it's
2275 not (i.e., when the bracket expression is `[]' so 2291 not (i.e., when the bracket expression is `[]' so
2276 far), the ']' character bit gets set way below. */ 2292 far), the ']' character bit gets set way below. */
2277 if (c == ']' && p != p1 + 1) 2293 if (c == ']' && p2 != p1)
2278 break; 2294 break;
2279 } 2295 }
2280 2296
2281 /* If C indicates start of multibyte char, get the
2282 actual character code in C, and set the pattern
2283 pointer P to the next character boundary. */
2284 if (bufp->multibyte && BASE_LEADING_CODE_P (c))
2285 {
2286 PATUNFETCH;
2287 c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
2288 p += len;
2289 }
2290 /* What should we do for the character which is 2297 /* What should we do for the character which is
2291 greater than 0x7F, but not BASE_LEADING_CODE_P? 2298 greater than 0x7F, but not BASE_LEADING_CODE_P?
2292 XXX */ 2299 XXX */
@@ -2294,8 +2301,8 @@ regex_compile (pattern, size, syntax, bufp)
2294 /* See if we're at the beginning of a possible character 2301 /* See if we're at the beginning of a possible character
2295 class. */ 2302 class. */
2296 2303
2297 else if (!escaped_char && 2304 if (!escaped_char &&
2298 syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') 2305 syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
2299 { 2306 {
2300 /* Leave room for the null. */ 2307 /* Leave room for the null. */
2301 char str[CHAR_CLASS_MAX_LENGTH + 1]; 2308 char str[CHAR_CLASS_MAX_LENGTH + 1];
@@ -2358,7 +2365,7 @@ regex_compile (pattern, size, syntax, bufp)
2358 they can only match ASCII characters. We 2365 they can only match ASCII characters. We
2359 don't need to handle them for multibyte. */ 2366 don't need to handle them for multibyte. */
2360 2367
2361 if (bufp->multibyte) 2368 if (multibyte)
2362 { 2369 {
2363 int bit = 0; 2370 int bit = 0;
2364 2371
@@ -2435,12 +2442,6 @@ regex_compile (pattern, size, syntax, bufp)
2435 2442
2436 /* Fetch the character which ends the range. */ 2443 /* Fetch the character which ends the range. */
2437 PATFETCH (c1); 2444 PATFETCH (c1);
2438 if (bufp->multibyte && BASE_LEADING_CODE_P (c1))
2439 {
2440 PATUNFETCH;
2441 c1 = STRING_CHAR_AND_LENGTH (p, pend - p, len);
2442 p += len;
2443 }
2444 2445
2445 if (SINGLE_BYTE_CHAR_P (c) 2446 if (SINGLE_BYTE_CHAR_P (c)
2446 && ! SINGLE_BYTE_CHAR_P (c1)) 2447 && ! SINGLE_BYTE_CHAR_P (c1))
@@ -3028,16 +3029,6 @@ regex_compile (pattern, size, syntax, bufp)
3028 default: 3029 default:
3029 /* Expects the character in `c'. */ 3030 /* Expects the character in `c'. */
3030 normal_char: 3031 normal_char:
3031 p1 = p - 1; /* P1 points the head of C. */
3032#ifdef emacs
3033 if (bufp->multibyte)
3034 {
3035 c = STRING_CHAR (p1, pend - p1);
3036 c = TRANSLATE (c);
3037 /* Set P to the next character boundary. */
3038 p += MULTIBYTE_FORM_LENGTH (p1, pend - p1) - 1;
3039 }
3040#endif
3041 /* If no exactn currently being built. */ 3032 /* If no exactn currently being built. */
3042 if (!pending_exact 3033 if (!pending_exact
3043 3034
@@ -3045,7 +3036,7 @@ regex_compile (pattern, size, syntax, bufp)
3045 || pending_exact + *pending_exact + 1 != b 3036 || pending_exact + *pending_exact + 1 != b
3046 3037
3047 /* We have only one byte following the exactn for the count. */ 3038 /* We have only one byte following the exactn for the count. */
3048 || *pending_exact >= (1 << BYTEWIDTH) - (p - p1) 3039 || *pending_exact >= (1 << BYTEWIDTH) - MAX_MULTIBYTE_LENGTH
3049 3040
3050 /* If followed by a repetition operator. */ 3041 /* If followed by a repetition operator. */
3051 || (p != pend && (*p == '*' || *p == '^')) 3042 || (p != pend && (*p == '*' || *p == '^'))
@@ -3065,24 +3056,13 @@ regex_compile (pattern, size, syntax, bufp)
3065 pending_exact = b - 1; 3056 pending_exact = b - 1;
3066 } 3057 }
3067 3058
3068#ifdef emacs 3059 GET_BUFFER_SPACE (MAX_MULTIBYTE_LENGTH);
3069 if (! SINGLE_BYTE_CHAR_P (c)) 3060 {
3070 { 3061 int len = CHAR_STRING (c, b);
3071 unsigned char str[MAX_MULTIBYTE_LENGTH]; 3062 b += len;
3072 int i = CHAR_STRING (c, str); 3063 (*pending_exact) += len;
3073 int j; 3064 }
3074 for (j = 0; j < i; j++) 3065
3075 {
3076 BUF_PUSH (str[j]);
3077 (*pending_exact)++;
3078 }
3079 }
3080 else
3081#endif
3082 {
3083 BUF_PUSH (c);
3084 (*pending_exact)++;
3085 }
3086 break; 3066 break;
3087 } /* switch (c) */ 3067 } /* switch (c) */
3088 } /* while p != pend */ 3068 } /* while p != pend */
@@ -3616,7 +3596,7 @@ re_compile_fastmap (bufp)
3616 bufp->fastmap_accurate = 1; /* It will be when we're done. */ 3596 bufp->fastmap_accurate = 1; /* It will be when we're done. */
3617 3597
3618 analysis = analyse_first (bufp->buffer, bufp->buffer + bufp->used, 3598 analysis = analyse_first (bufp->buffer, bufp->buffer + bufp->used,
3619 fastmap, bufp->multibyte); 3599 fastmap, RE_MULTIBYTE_P (bufp));
3620 if (analysis < -1) 3600 if (analysis < -1)
3621 return analysis; 3601 return analysis;
3622 bufp->can_be_null = (analysis != 0); 3602 bufp->can_be_null = (analysis != 0);
@@ -3723,7 +3703,7 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
3723 int anchored_start = 0; 3703 int anchored_start = 0;
3724 3704
3725 /* Nonzero if we have to concern multibyte character. */ 3705 /* Nonzero if we have to concern multibyte character. */
3726 const boolean multibyte = bufp->multibyte; 3706 const boolean multibyte = RE_MULTIBYTE_P (bufp);
3727 3707
3728 /* Check for out-of-range STARTPOS. */ 3708 /* Check for out-of-range STARTPOS. */
3729 if (startpos < 0 || startpos > total_size) 3709 if (startpos < 0 || startpos > total_size)
@@ -3850,11 +3830,11 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
3850 } 3830 }
3851 else /* Searching backwards. */ 3831 else /* Searching backwards. */
3852 { 3832 {
3853 buf_ch = STRING_CHAR (d, (startpos >= size1 3833 int room = (startpos >= size1
3854 ? size2 + size1 - startpos 3834 ? size2 + size1 - startpos
3855 : size1 - startpos)); 3835 : size1 - startpos);
3856 if (RE_TRANSLATE_P (translate)) 3836 buf_ch = RE_STRING_CHAR (d, room);
3857 buf_ch = RE_TRANSLATE (translate, buf_ch); 3837 buf_ch = TRANSLATE (buf_ch);
3858 3838
3859 if (! (buf_ch >= 0400 3839 if (! (buf_ch >= 0400
3860 || fastmap[buf_ch])) 3840 || fastmap[buf_ch]))
@@ -3940,7 +3920,10 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
3940 3920
3941/* Declarations and macros for re_match_2. */ 3921/* Declarations and macros for re_match_2. */
3942 3922
3943static int bcmp_translate (); 3923static int bcmp_translate _RE_ARGS((re_char *s1, re_char *s2,
3924 register int len,
3925 RE_TRANSLATE_TYPE translate,
3926 const int multibyte));
3944 3927
3945/* This converts PTR, a pointer into one of the search strings `string1' 3928/* This converts PTR, a pointer into one of the search strings `string1'
3946 and `string2' into an offset from the beginning of that string. */ 3929 and `string2' into an offset from the beginning of that string. */
@@ -4093,7 +4076,7 @@ mutually_exclusive_p (bufp, p1, p2)
4093 unsigned char *p1, *p2; 4076 unsigned char *p1, *p2;
4094{ 4077{
4095 re_opcode_t op2; 4078 re_opcode_t op2;
4096 const boolean multibyte = bufp->multibyte; 4079 const boolean multibyte = RE_MULTIBYTE_P (bufp);
4097 unsigned char *pend = bufp->buffer + bufp->used; 4080 unsigned char *pend = bufp->buffer + bufp->used;
4098 4081
4099 assert (p1 >= bufp->buffer && p1 < pend 4082 assert (p1 >= bufp->buffer && p1 < pend
@@ -4373,7 +4356,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4373 RE_TRANSLATE_TYPE translate = bufp->translate; 4356 RE_TRANSLATE_TYPE translate = bufp->translate;
4374 4357
4375 /* Nonzero if we have to concern multibyte character. */ 4358 /* Nonzero if we have to concern multibyte character. */
4376 const boolean multibyte = bufp->multibyte; 4359 const boolean multibyte = RE_MULTIBYTE_P (bufp);
4377 4360
4378 /* Failure point stack. Each place that can handle a failure further 4361 /* Failure point stack. Each place that can handle a failure further
4379 down the line pushes a failure point on this stack. It consists of 4362 down the line pushes a failure point on this stack. It consists of
@@ -4721,7 +4704,6 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4721 testing `translate' inside the loop. */ 4704 testing `translate' inside the loop. */
4722 if (RE_TRANSLATE_P (translate)) 4705 if (RE_TRANSLATE_P (translate))
4723 { 4706 {
4724#ifdef emacs
4725 if (multibyte) 4707 if (multibyte)
4726 do 4708 do
4727 { 4709 {
@@ -4745,7 +4727,6 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4745 } 4727 }
4746 while (mcnt > 0); 4728 while (mcnt > 0);
4747 else 4729 else
4748#endif /* not emacs */
4749 do 4730 do
4750 { 4731 {
4751 PREFETCH (); 4732 PREFETCH ();
@@ -4783,17 +4764,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4783 DEBUG_PRINT1 ("EXECUTING anychar.\n"); 4764 DEBUG_PRINT1 ("EXECUTING anychar.\n");
4784 4765
4785 PREFETCH (); 4766 PREFETCH ();
4786 4767 buf_ch = RE_STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen);
4787#ifdef emacs
4788 if (multibyte)
4789 buf_ch = STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen);
4790 else
4791#endif /* not emacs */
4792 {
4793 buf_ch = *d;
4794 buf_charlen = 1;
4795 }
4796
4797 buf_ch = TRANSLATE (buf_ch); 4768 buf_ch = TRANSLATE (buf_ch);
4798 4769
4799 if ((!(bufp->syntax & RE_DOT_NEWLINE) 4770 if ((!(bufp->syntax & RE_DOT_NEWLINE)
@@ -4828,27 +4799,20 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4828 4799
4829 DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : ""); 4800 DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
4830 4801
4831 PREFETCH ();
4832 c = *d;
4833
4834 range_table_exists = CHARSET_RANGE_TABLE_EXISTS_P (&p[-1]); 4802 range_table_exists = CHARSET_RANGE_TABLE_EXISTS_P (&p[-1]);
4835 4803
4836#ifdef emacs
4837 if (range_table_exists) 4804 if (range_table_exists)
4838 { 4805 {
4839 range_table = CHARSET_RANGE_TABLE (&p[-1]); /* Past the bitmap. */ 4806 range_table = CHARSET_RANGE_TABLE (&p[-1]); /* Past the bitmap. */
4840 EXTRACT_NUMBER_AND_INCR (count, range_table); 4807 EXTRACT_NUMBER_AND_INCR (count, range_table);
4841 } 4808 }
4842 4809
4843 if (multibyte && BASE_LEADING_CODE_P (c)) 4810 PREFETCH ();
4844 c = STRING_CHAR_AND_LENGTH (d, dend - d, len); 4811 c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len);
4845#endif /* emacs */ 4812 c = TRANSLATE (c); /* The character to match. */
4846 4813
4847 if (SINGLE_BYTE_CHAR_P (c)) 4814 if (SINGLE_BYTE_CHAR_P (c))
4848 { /* Lookup bitmap. */ 4815 { /* Lookup bitmap. */
4849 c = TRANSLATE (c); /* The character to match. */
4850 len = 1;
4851
4852 /* Cast to `unsigned' instead of `unsigned char' in 4816 /* Cast to `unsigned' instead of `unsigned char' in
4853 case the bit list is a full 32 bytes long. */ 4817 case the bit list is a full 32 bytes long. */
4854 if (c < (unsigned) (CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH) 4818 if (c < (unsigned) (CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH)
@@ -4994,7 +4958,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4994 /* Compare that many; failure if mismatch, else move 4958 /* Compare that many; failure if mismatch, else move
4995 past them. */ 4959 past them. */
4996 if (RE_TRANSLATE_P (translate) 4960 if (RE_TRANSLATE_P (translate)
4997 ? bcmp_translate (d, d2, mcnt, translate) 4961 ? bcmp_translate (d, d2, mcnt, translate, multibyte)
4998 : bcmp (d, d2, mcnt)) 4962 : bcmp (d, d2, mcnt))
4999 { 4963 {
5000 d = dfail; 4964 d = dfail;
@@ -5263,18 +5227,17 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5263 is the character at D, and S2 is the syntax of C2. */ 5227 is the character at D, and S2 is the syntax of C2. */
5264 int c1, c2, s1, s2; 5228 int c1, c2, s1, s2;
5265#ifdef emacs 5229#ifdef emacs
5266 int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d - 1)); 5230 int offset = PTR_TO_OFFSET (d - 1);
5231 int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
5267 UPDATE_SYNTAX_TABLE (charpos); 5232 UPDATE_SYNTAX_TABLE (charpos);
5268#endif 5233#endif
5269 /* FIXME: This does a STRING_CHAR even for unibyte buffers. */
5270 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); 5234 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
5271 s1 = SYNTAX (c1); 5235 s1 = SYNTAX (c1);
5272#ifdef emacs 5236#ifdef emacs
5273 UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1); 5237 UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
5274#endif 5238#endif
5275 PREFETCH (); 5239 PREFETCH ();
5276 /* FIXME: This does a STRING_CHAR even for unibyte buffers. */ 5240 c2 = RE_STRING_CHAR (d, dend - d);
5277 c2 = STRING_CHAR (d, dend - d);
5278 s2 = SYNTAX (c2); 5241 s2 = SYNTAX (c2);
5279 5242
5280 if (/* Case 2: Only one of S1 and S2 is Sword. */ 5243 if (/* Case 2: Only one of S1 and S2 is Sword. */
@@ -5303,12 +5266,12 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5303 is the character at D, and S2 is the syntax of C2. */ 5266 is the character at D, and S2 is the syntax of C2. */
5304 int c1, c2, s1, s2; 5267 int c1, c2, s1, s2;
5305#ifdef emacs 5268#ifdef emacs
5306 int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d)); 5269 int offset = PTR_TO_OFFSET (d);
5270 int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
5307 UPDATE_SYNTAX_TABLE (charpos); 5271 UPDATE_SYNTAX_TABLE (charpos);
5308#endif 5272#endif
5309 PREFETCH (); 5273 PREFETCH ();
5310 /* FIXME: This does a STRING_CHAR even for unibyte buffers. */ 5274 c2 = RE_STRING_CHAR (d, dend - d);
5311 c2 = STRING_CHAR (d, dend - d);
5312 s2 = SYNTAX (c2); 5275 s2 = SYNTAX (c2);
5313 5276
5314 /* Case 2: S2 is not Sword. */ 5277 /* Case 2: S2 is not Sword. */
@@ -5346,7 +5309,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5346 is the character at D, and S2 is the syntax of C2. */ 5309 is the character at D, and S2 is the syntax of C2. */
5347 int c1, c2, s1, s2; 5310 int c1, c2, s1, s2;
5348#ifdef emacs 5311#ifdef emacs
5349 int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d) - 1); 5312 int offset = PTR_TO_OFFSET (d) - 1;
5313 int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
5350 UPDATE_SYNTAX_TABLE (charpos); 5314 UPDATE_SYNTAX_TABLE (charpos);
5351#endif 5315#endif
5352 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); 5316 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
@@ -5360,8 +5324,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5360 if (!AT_STRINGS_END (d)) 5324 if (!AT_STRINGS_END (d))
5361 { 5325 {
5362 PREFETCH (); 5326 PREFETCH ();
5363 /* FIXME: This does a STRING_CHAR even for unibyte buffers. */ 5327 c2 = RE_STRING_CHAR (d, dend - d);
5364 c2 = STRING_CHAR (d, dend - d);
5365#ifdef emacs 5328#ifdef emacs
5366 UPDATE_SYNTAX_TABLE_FORWARD (charpos); 5329 UPDATE_SYNTAX_TABLE_FORWARD (charpos);
5367#endif 5330#endif
@@ -5383,20 +5346,15 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5383 PREFETCH (); 5346 PREFETCH ();
5384#ifdef emacs 5347#ifdef emacs
5385 { 5348 {
5386 int pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d)); 5349 int offset = PTR_TO_OFFSET (d);
5350 int pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
5387 UPDATE_SYNTAX_TABLE (pos1); 5351 UPDATE_SYNTAX_TABLE (pos1);
5388 } 5352 }
5389#endif 5353#endif
5390 { 5354 {
5391 int c, len; 5355 int c, len;
5392 5356
5393 if (multibyte) 5357 c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len);
5394 /* we must concern about multibyte form, ... */
5395 c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
5396 else
5397 /* everything should be handled as ASCII, even though it
5398 looks like multibyte form. */
5399 c = *d, len = 1;
5400 5358
5401 if ((SYNTAX (c) != (enum syntaxcode) mcnt) ^ not) 5359 if ((SYNTAX (c) != (enum syntaxcode) mcnt) ^ not)
5402 goto fail; 5360 goto fail;
@@ -5431,11 +5389,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5431 PREFETCH (); 5389 PREFETCH ();
5432 { 5390 {
5433 int c, len; 5391 int c, len;
5434 5392 c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len);
5435 if (multibyte)
5436 c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
5437 else
5438 c = *d, len = 1;
5439 5393
5440 if ((!CHAR_HAS_CATEGORY (c, mcnt)) ^ not) 5394 if ((!CHAR_HAS_CATEGORY (c, mcnt)) ^ not)
5441 goto fail; 5395 goto fail;
@@ -5512,23 +5466,23 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5512 bytes; nonzero otherwise. */ 5466 bytes; nonzero otherwise. */
5513 5467
5514static int 5468static int
5515bcmp_translate (s1, s2, len, translate) 5469bcmp_translate (s1, s2, len, translate, multibyte)
5516 unsigned char *s1, *s2; 5470 re_char *s1, *s2;
5517 register int len; 5471 register int len;
5518 RE_TRANSLATE_TYPE translate; 5472 RE_TRANSLATE_TYPE translate;
5473 const int multibyte;
5519{ 5474{
5520 register unsigned char *p1 = s1, *p2 = s2; 5475 register re_char *p1 = s1, *p2 = s2;
5521 unsigned char *p1_end = s1 + len; 5476 re_char *p1_end = s1 + len;
5522 unsigned char *p2_end = s2 + len; 5477 re_char *p2_end = s2 + len;
5523 5478
5524 while (p1 != p1_end && p2 != p2_end) 5479 while (p1 != p1_end && p2 != p2_end)
5525 { 5480 {
5526 int p1_charlen, p2_charlen; 5481 int p1_charlen, p2_charlen;
5527 int p1_ch, p2_ch; 5482 int p1_ch, p2_ch;
5528 5483
5529 /* FIXME: This assumes `multibyte = true'. */ 5484 p1_ch = RE_STRING_CHAR_AND_LENGTH (p1, p1_end - p1, p1_charlen);
5530 p1_ch = STRING_CHAR_AND_LENGTH (p1, p1_end - p1, p1_charlen); 5485 p2_ch = RE_STRING_CHAR_AND_LENGTH (p2, p2_end - p2, p2_charlen);
5531 p2_ch = STRING_CHAR_AND_LENGTH (p2, p2_end - p2, p2_charlen);
5532 5486
5533 if (RE_TRANSLATE (translate, p1_ch) 5487 if (RE_TRANSLATE (translate, p1_ch)
5534 != RE_TRANSLATE (translate, p2_ch)) 5488 != RE_TRANSLATE (translate, p2_ch))
diff --git a/src/regex.h b/src/regex.h
index 9ebc4e0bf22..9ee2060a1ed 100644
--- a/src/regex.h
+++ b/src/regex.h
@@ -362,9 +362,11 @@ struct re_pattern_buffer
362 /* If true, an anchor at a newline matches. */ 362 /* If true, an anchor at a newline matches. */
363 unsigned newline_anchor : 1; 363 unsigned newline_anchor : 1;
364 364
365#ifdef emacs
365 /* If true, multi-byte form in the `buffer' should be recognized as a 366 /* If true, multi-byte form in the `buffer' should be recognized as a
366 multibyte character. */ 367 multibyte character. */
367 unsigned multibyte : 1; 368 unsigned multibyte : 1;
369#endif
368 370
369/* [[[end pattern_buffer]]] */ 371/* [[[end pattern_buffer]]] */
370}; 372};