aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/regex.c230
1 files changed, 133 insertions, 97 deletions
diff --git a/src/regex.c b/src/regex.c
index a997402a15d..a26c0f57a65 100644
--- a/src/regex.c
+++ b/src/regex.c
@@ -168,7 +168,7 @@ init_syntax_once ()
168 168
169#define SYNTAX(c) re_syntax_table[c] 169#define SYNTAX(c) re_syntax_table[c]
170 170
171/* Dummy macro for non emacs environments. */ 171/* Dummy macros for non-Emacs environments. */
172#define BASE_LEADING_CODE_P(c) (0) 172#define BASE_LEADING_CODE_P(c) (0)
173#define WORD_BOUNDARY_P(c1, c2) (0) 173#define WORD_BOUNDARY_P(c1, c2) (0)
174#define CHAR_HEAD_P(p) (1) 174#define CHAR_HEAD_P(p) (1)
@@ -1539,7 +1539,7 @@ static reg_errcode_t compile_range ();
1539#define PATFETCH(c) \ 1539#define PATFETCH(c) \
1540 do {if (p == pend) return REG_EEND; \ 1540 do {if (p == pend) return REG_EEND; \
1541 c = (unsigned char) *p++; \ 1541 c = (unsigned char) *p++; \
1542 if (translate) c = (unsigned char) translate[c]; \ 1542 if (translate) c = RE_TRANSLATE (translate, c); \
1543 } while (0) 1543 } while (0)
1544#endif 1544#endif
1545 1545
@@ -1560,7 +1560,7 @@ static reg_errcode_t compile_range ();
1560 when we use a character as a subscript we must make it unsigned. */ 1560 when we use a character as a subscript we must make it unsigned. */
1561#ifndef TRANSLATE 1561#ifndef TRANSLATE
1562#define TRANSLATE(d) \ 1562#define TRANSLATE(d) \
1563 (translate ? (unsigned char) RE_TRANSLATE (translate, (unsigned char) (d)) : (d)) 1563 (translate ? (unsigned) RE_TRANSLATE (translate, (unsigned) (d)) : (d))
1564#endif 1564#endif
1565 1565
1566 1566
@@ -2107,9 +2107,10 @@ regex_compile (pattern, size, syntax, bufp)
2107 incremented `p', by the way, to be the character after 2107 incremented `p', by the way, to be the character after
2108 the `*'. Do we have to do something analogous here 2108 the `*'. Do we have to do something analogous here
2109 for null bytes, because of RE_DOT_NOT_NULL? */ 2109 for null bytes, because of RE_DOT_NOT_NULL? */
2110 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.') 2110 if (TRANSLATE ((unsigned char)*(p - 2)) == TRANSLATE ('.')
2111 && zero_times_ok 2111 && zero_times_ok
2112 && p < pend && TRANSLATE (*p) == TRANSLATE ('\n') 2112 && p < pend
2113 && TRANSLATE ((unsigned char)*p) == TRANSLATE ('\n')
2113 && !(syntax & RE_DOT_NEWLINE)) 2114 && !(syntax & RE_DOT_NEWLINE))
2114 { /* We have .*\n. */ 2115 { /* We have .*\n. */
2115 STORE_JUMP (jump, b, laststart); 2116 STORE_JUMP (jump, b, laststart);
@@ -2333,7 +2334,18 @@ regex_compile (pattern, size, syntax, bufp)
2333 p += len; 2334 p += len;
2334 } 2335 }
2335 2336
2336 if (!SAME_CHARSET_P (c, c1)) 2337 if (SINGLE_BYTE_CHAR_P (c)
2338 && ! SINGLE_BYTE_CHAR_P (c1))
2339 {
2340 /* Handle a range such as \177-\377 in multibyte mode.
2341 Split that into two ranges,,
2342 the low one ending at 0237, and the high one
2343 starting at ...040. */
2344 int c1_base = (c1 & ~0177) | 040;
2345 SET_RANGE_TABLE_WORK_AREA (range_table_work, c, c1);
2346 c1 = 0237;
2347 }
2348 else if (!SAME_CHARSET_P (c, c1))
2337 FREE_STACK_RETURN (REG_ERANGE); 2349 FREE_STACK_RETURN (REG_ERANGE);
2338 } 2350 }
2339 else 2351 else
@@ -2359,8 +2371,8 @@ regex_compile (pattern, size, syntax, bufp)
2359 for (this_char = range_start; this_char <= range_end; 2371 for (this_char = range_start; this_char <= range_end;
2360 this_char++) 2372 this_char++)
2361 SET_LIST_BIT (TRANSLATE (this_char)); 2373 SET_LIST_BIT (TRANSLATE (this_char));
2374 }
2362 } 2375 }
2363 }
2364 else 2376 else
2365 /* ... into range table. */ 2377 /* ... into range table. */
2366 SET_RANGE_TABLE_WORK_AREA (range_table_work, c, c1); 2378 SET_RANGE_TABLE_WORK_AREA (range_table_work, c, c1);
@@ -2913,8 +2925,8 @@ regex_compile (pattern, size, syntax, bufp)
2913 /* Here, C may translated, therefore C may not equal to *P1. */ 2925 /* Here, C may translated, therefore C may not equal to *P1. */
2914 while (1) 2926 while (1)
2915 { 2927 {
2916 BUF_PUSH (c); 2928 BUF_PUSH (c);
2917 (*pending_exact)++; 2929 (*pending_exact)++;
2918 if (++p1 == p) 2930 if (++p1 == p)
2919 break; 2931 break;
2920 2932
@@ -3121,64 +3133,6 @@ group_in_compile_stack (compile_stack, regnum)
3121 3133
3122 return false; 3134 return false;
3123} 3135}
3124
3125
3126/* Read the ending character of a range (in a bracket expression) from the
3127 uncompiled pattern *P_PTR (which ends at PEND). We assume the
3128 starting character is in `P[-2]'. (`P[-1]' is the character `-'.)
3129 Then we set the translation of all bits between the starting and
3130 ending characters (inclusive) in the compiled pattern B.
3131
3132 Return an error code.
3133
3134 We use these short variable names so we can use the same macros as
3135 `regex_compile' itself. */
3136
3137static reg_errcode_t
3138compile_range (p_ptr, pend, translate, syntax, b)
3139 const char **p_ptr, *pend;
3140 RE_TRANSLATE_TYPE translate;
3141 reg_syntax_t syntax;
3142 unsigned char *b;
3143{
3144 unsigned this_char;
3145
3146 const char *p = *p_ptr;
3147 int range_start, range_end;
3148
3149 if (p == pend)
3150 return REG_ERANGE;
3151
3152 /* Even though the pattern is a signed `char *', we need to fetch
3153 with unsigned char *'s; if the high bit of the pattern character
3154 is set, the range endpoints will be negative if we fetch using a
3155 signed char *.
3156
3157 We also want to fetch the endpoints without translating them; the
3158 appropriate translation is done in the bit-setting loop below. */
3159 /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *. */
3160 range_start = ((const unsigned char *) p)[-2];
3161 range_end = ((const unsigned char *) p)[0];
3162
3163 /* Have to increment the pointer into the pattern string, so the
3164 caller isn't still at the ending character. */
3165 (*p_ptr)++;
3166
3167 /* If the start is after the end, the range is empty. */
3168 if (range_start > range_end)
3169 return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
3170
3171 /* Here we see why `this_char' has to be larger than an `unsigned
3172 char' -- the range is inclusive, so if `range_end' == 0xff
3173 (assuming 8-bit characters), we would otherwise go into an infinite
3174 loop, since all characters <= 0xff. */
3175 for (this_char = range_start; this_char <= range_end; this_char++)
3176 {
3177 SET_LIST_BIT (TRANSLATE (this_char));
3178 }
3179
3180 return REG_NOERROR;
3181}
3182 3136
3183/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in 3137/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
3184 BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible 3138 BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible
@@ -3812,24 +3766,45 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
3812 the first null string. */ 3766 the first null string. */
3813 if (fastmap && startpos < total_size && !bufp->can_be_null) 3767 if (fastmap && startpos < total_size && !bufp->can_be_null)
3814 { 3768 {
3769 register const char *d;
3770 register unsigned int buf_ch;
3771
3772 d = POS_ADDR_VSTRING (startpos);
3773
3815 if (range > 0) /* Searching forwards. */ 3774 if (range > 0) /* Searching forwards. */
3816 { 3775 {
3817 register const char *d;
3818 register int lim = 0; 3776 register int lim = 0;
3819 int irange = range; 3777 int irange = range;
3820 3778
3821 if (startpos < size1 && startpos + range >= size1) 3779 if (startpos < size1 && startpos + range >= size1)
3822 lim = range - (size1 - startpos); 3780 lim = range - (size1 - startpos);
3823 3781
3824 d = POS_ADDR_VSTRING (startpos);
3825
3826 /* Written out as an if-else to avoid testing `translate' 3782 /* Written out as an if-else to avoid testing `translate'
3827 inside the loop. */ 3783 inside the loop. */
3828 if (translate) 3784 if (translate)
3829 while (range > lim 3785 {
3830 && !fastmap[(unsigned char) 3786 if (multibyte)
3831 RE_TRANSLATE (translate, (unsigned char) *d++)]) 3787 while (range > lim)
3832 range--; 3788 {
3789 int buf_charlen;
3790
3791 buf_ch = STRING_CHAR_AND_LENGTH (d, range - lim,
3792 buf_charlen);
3793
3794 buf_ch = RE_TRANSLATE (translate, buf_ch);
3795 if (buf_ch >= 0400
3796 || fastmap[buf_ch])
3797 break;
3798
3799 range -= buf_charlen;
3800 d += buf_charlen;
3801 }
3802 else
3803 while (range > lim
3804 && !fastmap[(unsigned char)
3805 RE_TRANSLATE (translate, (unsigned char) *d++)])
3806 range--;
3807 }
3833 else 3808 else
3834 while (range > lim && !fastmap[(unsigned char) *d++]) 3809 while (range > lim && !fastmap[(unsigned char) *d++])
3835 range--; 3810 range--;
@@ -3838,11 +3813,16 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
3838 } 3813 }
3839 else /* Searching backwards. */ 3814 else /* Searching backwards. */
3840 { 3815 {
3841 register char c = (size1 == 0 || startpos >= size1 3816 int room = (size1 == 0 || startpos >= size1
3842 ? string2[startpos - size1] 3817 ? size2 + size1 - startpos
3843 : string1[startpos]); 3818 : size1 - startpos);
3819
3820 buf_ch = STRING_CHAR (d, room);
3821 if (translate)
3822 buf_ch = RE_TRANSLATE (translate, buf_ch);
3844 3823
3845 if (!fastmap[(unsigned char) TRANSLATE (c)]) 3824 if (! (buf_ch >= 0400
3825 || fastmap[buf_ch]))
3846 goto advance; 3826 goto advance;
3847 } 3827 }
3848 } 3828 }
@@ -4515,14 +4495,36 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4515 testing `translate' inside the loop. */ 4495 testing `translate' inside the loop. */
4516 if (translate) 4496 if (translate)
4517 { 4497 {
4518 do 4498#ifdef emacs
4519 { 4499 if (multibyte)
4520 PREFETCH (); 4500 do
4521 if ((unsigned char) RE_TRANSLATE (translate, (unsigned char) *d++) 4501 {
4522 != (unsigned char) *p++) 4502 int pat_charlen, buf_charlen;
4523 goto fail; 4503 int pat_ch, buf_ch;
4524 } 4504
4525 while (--mcnt); 4505 PREFETCH ();
4506 pat_ch = STRING_CHAR_AND_LENGTH (p, pend - p, pat_charlen);
4507 buf_ch = STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen);
4508
4509 if (RE_TRANSLATE (translate, buf_ch)
4510 != pat_ch)
4511 goto fail;
4512
4513 p += pat_charlen;
4514 d += buf_charlen;
4515 mcnt -= pat_charlen;
4516 }
4517 while (mcnt > 0);
4518 else
4519#endif /* not emacs */
4520 do
4521 {
4522 PREFETCH ();
4523 if ((unsigned char) RE_TRANSLATE (translate, (unsigned char) *d++)
4524 != (unsigned char) *p++)
4525 goto fail;
4526 }
4527 while (--mcnt);
4526 } 4528 }
4527 else 4529 else
4528 { 4530 {
@@ -4539,17 +4541,36 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4539 4541
4540 /* Match any character except possibly a newline or a null. */ 4542 /* Match any character except possibly a newline or a null. */
4541 case anychar: 4543 case anychar:
4542 DEBUG_PRINT1 ("EXECUTING anychar.\n"); 4544 {
4545 int buf_charlen;
4546 int buf_ch;
4543 4547
4544 PREFETCH (); 4548 DEBUG_PRINT1 ("EXECUTING anychar.\n");
4545 4549
4546 if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n') 4550 PREFETCH ();
4547 || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
4548 goto fail;
4549 4551
4550 SET_REGS_MATCHED (); 4552#ifdef emacs
4551 DEBUG_PRINT2 (" Matched `%d'.\n", *d); 4553 if (multibyte)
4552 d += multibyte ? MULTIBYTE_FORM_LENGTH (d, dend - d) : 1; 4554 buf_ch = STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen);
4555 else
4556#endif /* not emacs */
4557 {
4558 buf_ch = *d;
4559 buf_charlen = 1;
4560 }
4561
4562 buf_ch = TRANSLATE (buf_ch);
4563
4564 if ((!(bufp->syntax & RE_DOT_NEWLINE)
4565 && buf_ch == '\n')
4566 || ((bufp->syntax & RE_DOT_NOT_NULL)
4567 && buf_ch == '\000'))
4568 goto fail;
4569
4570 SET_REGS_MATCHED ();
4571 DEBUG_PRINT2 (" Matched `%d'.\n", *d);
4572 d += buf_charlen;
4573 }
4553 break; 4574 break;
4554 4575
4555 4576
@@ -5926,12 +5947,27 @@ bcmp_translate (s1, s2, len, translate)
5926 RE_TRANSLATE_TYPE translate; 5947 RE_TRANSLATE_TYPE translate;
5927{ 5948{
5928 register unsigned char *p1 = s1, *p2 = s2; 5949 register unsigned char *p1 = s1, *p2 = s2;
5929 while (len) 5950 unsigned char *p1_end = s1 + len;
5951 unsigned char *p2_end = s2 + len;
5952
5953 while (p1 != p1_end && p2 != p2_end)
5930 { 5954 {
5931 if (RE_TRANSLATE (translate, *p1++) != RE_TRANSLATE (translate, *p2++)) 5955 int p1_charlen, p2_charlen;
5956 int p1_ch, p2_ch;
5957
5958 p1_ch = STRING_CHAR_AND_LENGTH (p1, p1_end - p1, p1_charlen);
5959 p2_ch = STRING_CHAR_AND_LENGTH (p2, p2_end - p2, p2_charlen);
5960
5961 if (RE_TRANSLATE (translate, p1_ch)
5962 != RE_TRANSLATE (translate, p2_ch))
5932 return 1; 5963 return 1;
5933 len--; 5964
5965 p1 += p1_charlen, p2 += p2_charlen;
5934 } 5966 }
5967
5968 if (p1 != p1_end || p2 != p2_end)
5969 return 1;
5970
5935 return 0; 5971 return 0;
5936} 5972}
5937 5973