aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorStefan Monnier2002-08-23 22:21:51 +0000
committerStefan Monnier2002-08-23 22:21:51 +0000
commit365958144ea38255d543a4232b926ca81e849fa9 (patch)
tree43beeeefed478bcbfac634c44348351456decaff /src
parentd846a776e1043ad6d23a71a8daf42cc8b197c4f9 (diff)
downloademacs-365958144ea38255d543a4232b926ca81e849fa9.tar.gz
emacs-365958144ea38255d543a4232b926ca81e849fa9.zip
(PATFETCH): Remove the translating fetch.
(PATFETCH_RAW): Rename to PATFETCH. (set_image_of_range): New fun. (SET_RANGE_TABLE_WORK_AREA): Use it. (regex_compile): Don't translate the pattern chars so eagerly. Only do it when inserting an `exactn' bytecode or when handling a char-range. (mutually_exclusive_p): Avoid empty statement.
Diffstat (limited to 'src')
-rw-r--r--src/ChangeLog22
-rw-r--r--src/regex.c76
2 files changed, 66 insertions, 32 deletions
diff --git a/src/ChangeLog b/src/ChangeLog
index 6dcc95b7f8d..c6180468193 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,14 @@
12002-08-23 Stefan Monnier <monnier@cs.yale.edu>
2
3 * regex.c (PATFETCH): Remove the translating fetch.
4 (PATFETCH_RAW): Rename to PATFETCH.
5 (set_image_of_range): New fun.
6 (SET_RANGE_TABLE_WORK_AREA): Use it.
7 (regex_compile): Don't translate the pattern chars so eagerly.
8 Only do it when inserting an `exactn' bytecode or when handling
9 a char-range.
10 (mutually_exclusive_p): Avoid empty statement.
11
12002-08-22 Kim F. Storm <storm@cua.dk> 122002-08-22 Kim F. Storm <storm@cua.dk>
2 13
3 * xdisp.c (redisplay_window): Do not `goto try_to_scroll' when we 14 * xdisp.c (redisplay_window): Do not `goto try_to_scroll' when we
@@ -511,11 +522,10 @@
511 (parse_solitary_modifier, Fexecute_extended_command): Likewise. 522 (parse_solitary_modifier, Fexecute_extended_command): Likewise.
512 * textprop.c (validate_interval_range, interval_of): Likewise. 523 * textprop.c (validate_interval_range, interval_of): Likewise.
513 524
514 * fontset.c (Fset_fontset_font): Use SDATA instead of 525 * fontset.c (Fset_fontset_font): Use SDATA instead of XSTRING()->data.
515 XSTRING()->data.
516 526
517 * charset.h (FETCH_STRING_CHAR_ADVANCE, 527 * charset.h (FETCH_STRING_CHAR_ADVANCE)
518 FETCH_STRING_CHAR_ADVANCE_NO_CHECK): Use SBYTES instead of 528 (FETCH_STRING_CHAR_ADVANCE_NO_CHECK): Use SBYTES instead of
519 XSTRING()->size_byte. 529 XSTRING()->size_byte.
520 530
521 * lisp.h (SDATA, SREF): Produce rvalue. 531 * lisp.h (SDATA, SREF): Produce rvalue.
@@ -524,8 +534,8 @@
524 * buffer.c (Fother_buffer): Use SREF when retrieving a byte from 534 * buffer.c (Fother_buffer): Use SREF when retrieving a byte from
525 a string. 535 a string.
526 * casefiddle.c (casify_object): Use SSET. 536 * casefiddle.c (casify_object): Use SSET.
527 * charset.h (FETCH_STRING_CHAR_ADVANCE, 537 * charset.h (FETCH_STRING_CHAR_ADVANCE)
528 FETCH_STRING_CHAR_ADVANCE_NO_CHECK): Use SDATA when getting 538 (FETCH_STRING_CHAR_ADVANCE_NO_CHECK): Use SDATA when getting
529 address of string contents. 539 address of string contents.
530 * data.c (Faref): Use SDATA. 540 * data.c (Faref): Use SDATA.
531 (Faset): Use SDATA, SSET. 541 (Faset): Use SDATA, SSET.
diff --git a/src/regex.c b/src/regex.c
index 591d6f14e12..e01259cc85a 100644
--- a/src/regex.c
+++ b/src/regex.c
@@ -19,7 +19,9 @@
19 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, 19 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
20 USA. */ 20 USA. */
21 21
22/* TODO: 22/* BUGS:
23 - (x?)*y\1z should match both xxxxyxz and xxxyz.
24 TODO:
23 - structure the opcode space into opcode+flag. 25 - structure the opcode space into opcode+flag.
24 - merge with glibc's regex.[ch]. 26 - merge with glibc's regex.[ch].
25 - replace (succeed_n + jump_n + set_number_at) with something that doesn't 27 - replace (succeed_n + jump_n + set_number_at) with something that doesn't
@@ -1682,17 +1684,9 @@ static re_char *skip_one_char _RE_ARGS ((re_char *p));
1682static int analyse_first _RE_ARGS ((re_char *p, re_char *pend, 1684static int analyse_first _RE_ARGS ((re_char *p, re_char *pend,
1683 char *fastmap, const int multibyte)); 1685 char *fastmap, const int multibyte));
1684 1686
1685/* Fetch the next character in the uncompiled pattern---translating it
1686 if necessary. */
1687#define PATFETCH(c) \
1688 do { \
1689 PATFETCH_RAW (c); \
1690 c = TRANSLATE (c); \
1691 } while (0)
1692
1693/* Fetch the next character in the uncompiled pattern, with no 1687/* Fetch the next character in the uncompiled pattern, with no
1694 translation. */ 1688 translation. */
1695#define PATFETCH_RAW(c) \ 1689#define PATFETCH(c) \
1696 do { \ 1690 do { \
1697 int len; \ 1691 int len; \
1698 if (p == pend) return REG_EEND; \ 1692 if (p == pend) return REG_EEND; \
@@ -1914,12 +1908,13 @@ struct range_table_work_area
1914#define BIT_UPPER 0x10 1908#define BIT_UPPER 0x10
1915#define BIT_MULTIBYTE 0x20 1909#define BIT_MULTIBYTE 0x20
1916 1910
1917/* Set a range (RANGE_START, RANGE_END) to WORK_AREA. */ 1911/* Set a range START..END to WORK_AREA.
1918#define SET_RANGE_TABLE_WORK_AREA(work_area, range_start, range_end) \ 1912 The range is passed through TRANSLATE, so START and END
1919 do { \ 1913 should be untranslated. */
1920 EXTEND_RANGE_TABLE_WORK_AREA ((work_area), 2); \ 1914#define SET_RANGE_TABLE_WORK_AREA(work_area, start, end) \
1921 (work_area).table[(work_area).used++] = (range_start); \ 1915 do { \
1922 (work_area).table[(work_area).used++] = (range_end); \ 1916 EXTEND_RANGE_TABLE_WORK_AREA ((work_area), 2); \
1917 set_image_of_range (&work_area, start, end, translate); \
1923 } while (0) 1918 } while (0)
1924 1919
1925/* Free allocated memory for WORK_AREA. */ 1920/* Free allocated memory for WORK_AREA. */
@@ -2077,6 +2072,31 @@ re_wctype_to_bit (cc)
2077} 2072}
2078#endif 2073#endif
2079 2074
2075
2076
2077/* We need to find the image of the range start..end when passed through
2078 TRANSLATE. This is not necessarily TRANSLATE(start)..TRANSLATE(end)
2079 and is not even necessarily contiguous.
2080 We approximate it with the smallest contiguous range that contains
2081 all the chars we need. */
2082static void
2083set_image_of_range (work_area, start, end, translate)
2084 RE_TRANSLATE_TYPE translate;
2085 struct range_table_work_area *work_area;
2086 re_wchar_t start, end;
2087{
2088 re_wchar_t cmin = TRANSLATE (start), cmax = TRANSLATE (end);
2089 if (RE_TRANSLATE_P (translate))
2090 for (; start <= end; start++)
2091 {
2092 re_wchar_t c = TRANSLATE (start);
2093 cmin = MIN (cmin, c);
2094 cmax = MAX (cmax, c);
2095 }
2096 work_area->table[work_area->used++] = (cmin);
2097 work_area->table[work_area->used++] = (cmax);
2098}
2099
2080/* Explicit quit checking is only used on NTemacs. */ 2100/* Explicit quit checking is only used on NTemacs. */
2081#if defined WINDOWSNT && defined emacs && defined QUIT 2101#if defined WINDOWSNT && defined emacs && defined QUIT
2082extern int immediate_quit; 2102extern int immediate_quit;
@@ -2525,6 +2545,10 @@ regex_compile (pattern, size, syntax, bufp)
2525 2545
2526 if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 2546 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2527 2547
2548 /* Don't translate yet. The range TRANSLATE(X..Y) cannot
2549 always be determined from TRANSLATE(X) and TRANSLATE(Y)
2550 So the translation is done later in a loop. Example:
2551 (let ((case-fold-search t)) (string-match "[A-_]" "A")) */
2528 PATFETCH (c); 2552 PATFETCH (c);
2529 2553
2530 /* \ might escape characters inside [...] and [^...]. */ 2554 /* \ might escape characters inside [...] and [^...]. */
@@ -2584,7 +2608,7 @@ regex_compile (pattern, size, syntax, bufp)
2584 them). */ 2608 them). */
2585 if (c == ':' && *p == ']') 2609 if (c == ':' && *p == ']')
2586 { 2610 {
2587 int ch; 2611 re_wchar_t ch;
2588 re_wctype_t cc; 2612 re_wctype_t cc;
2589 2613
2590 cc = re_wctype (str); 2614 cc = re_wctype (str);
@@ -2653,8 +2677,8 @@ regex_compile (pattern, size, syntax, bufp)
2653 starting at the smallest character in 2677 starting at the smallest character in
2654 the charset of C1 and ending at C1. */ 2678 the charset of C1 and ending at C1. */
2655 int charset = CHAR_CHARSET (c1); 2679 int charset = CHAR_CHARSET (c1);
2656 int c2 = MAKE_CHAR (charset, 0, 0); 2680 re_wchar_t c2 = MAKE_CHAR (charset, 0, 0);
2657 2681
2658 SET_RANGE_TABLE_WORK_AREA (range_table_work, 2682 SET_RANGE_TABLE_WORK_AREA (range_table_work,
2659 c2, c1); 2683 c2, c1);
2660 c1 = 0377; 2684 c1 = 0377;
@@ -2672,7 +2696,7 @@ regex_compile (pattern, size, syntax, bufp)
2672 /* ... into bitmap. */ 2696 /* ... into bitmap. */
2673 { 2697 {
2674 re_wchar_t this_char; 2698 re_wchar_t this_char;
2675 int range_start = c, range_end = c1; 2699 re_wchar_t range_start = c, range_end = c1;
2676 2700
2677 /* If the start is after the end, the range is empty. */ 2701 /* If the start is after the end, the range is empty. */
2678 if (range_start > range_end) 2702 if (range_start > range_end)
@@ -2769,7 +2793,7 @@ regex_compile (pattern, size, syntax, bufp)
2769 /* Do not translate the character after the \, so that we can 2793 /* Do not translate the character after the \, so that we can
2770 distinguish, e.g., \B from \b, even if we normally would 2794 distinguish, e.g., \B from \b, even if we normally would
2771 translate, e.g., B to b. */ 2795 translate, e.g., B to b. */
2772 PATFETCH_RAW (c); 2796 PATFETCH (c);
2773 2797
2774 switch (c) 2798 switch (c)
2775 { 2799 {
@@ -3129,13 +3153,13 @@ regex_compile (pattern, size, syntax, bufp)
3129 3153
3130 case 'c': 3154 case 'c':
3131 laststart = b; 3155 laststart = b;
3132 PATFETCH_RAW (c); 3156 PATFETCH (c);
3133 BUF_PUSH_2 (categoryspec, c); 3157 BUF_PUSH_2 (categoryspec, c);
3134 break; 3158 break;
3135 3159
3136 case 'C': 3160 case 'C':
3137 laststart = b; 3161 laststart = b;
3138 PATFETCH_RAW (c); 3162 PATFETCH (c);
3139 BUF_PUSH_2 (notcategoryspec, c); 3163 BUF_PUSH_2 (notcategoryspec, c);
3140 break; 3164 break;
3141#endif /* emacs */ 3165#endif /* emacs */
@@ -3225,7 +3249,6 @@ regex_compile (pattern, size, syntax, bufp)
3225 /* You might think it would be useful for \ to mean 3249 /* You might think it would be useful for \ to mean
3226 not to translate; but if we don't translate it 3250 not to translate; but if we don't translate it
3227 it will never match anything. */ 3251 it will never match anything. */
3228 c = TRANSLATE (c);
3229 goto normal_char; 3252 goto normal_char;
3230 } 3253 }
3231 break; 3254 break;
@@ -3234,7 +3257,7 @@ regex_compile (pattern, size, syntax, bufp)
3234 default: 3257 default:
3235 /* Expects the character in `c'. */ 3258 /* Expects the character in `c'. */
3236 normal_char: 3259 normal_char:
3237 /* If no exactn currently being built. */ 3260 /* If no exactn currently being built. */
3238 if (!pending_exact 3261 if (!pending_exact
3239 3262
3240 /* If last exactn not at current position. */ 3263 /* If last exactn not at current position. */
@@ -3265,6 +3288,7 @@ regex_compile (pattern, size, syntax, bufp)
3265 { 3288 {
3266 int len; 3289 int len;
3267 3290
3291 c = TRANSLATE (c);
3268 if (multibyte) 3292 if (multibyte)
3269 len = CHAR_STRING (c, b); 3293 len = CHAR_STRING (c, b);
3270 else 3294 else
@@ -4427,7 +4451,7 @@ mutually_exclusive_p (bufp, p1, p2)
4427 they don't overlap. The union of the two sets of excluded 4451 they don't overlap. The union of the two sets of excluded
4428 chars should cover all possible chars, which, as a matter of 4452 chars should cover all possible chars, which, as a matter of
4429 fact, is virtually impossible in multibyte buffers. */ 4453 fact, is virtually impossible in multibyte buffers. */
4430 ; 4454 break;
4431 } 4455 }
4432 break; 4456 break;
4433 4457