diff options
| author | Paul Eggert | 2018-08-05 18:41:20 -0700 |
|---|---|---|
| committer | Paul Eggert | 2018-08-05 19:36:09 -0700 |
| commit | 3a6abe65c1324361bf0efcb65df61d22a39cfaaf (patch) | |
| tree | 90ecb27f9ecbb8a0f8d9b24cf67a809b52b0b32d | |
| parent | d904cc83f3036db96107a3976cee1a0112547de6 (diff) | |
| download | emacs-3a6abe65c1324361bf0efcb65df61d22a39cfaaf.tar.gz emacs-3a6abe65c1324361bf0efcb65df61d22a39cfaaf.zip | |
Simplify regex-emacs code by assuming Emacs
* src/regex-emacs.c: Omit no-longer-needed AIX code.
Don’t ignore GCC warnings.
Include regex-emacs.h immediately after config.h,
to test that it’s independent.
Omit the "#ifndef emacs" and "#ifdef REGEX_MALLOC" and
"#if WIDE_CHAR_SUPPORT" or "#ifdef _REGEX_RE_COMP",
code, as we are no longer interested in compiling outside
Emacs (with or without debugging or native wide char support)
or in avoiding alloca.
(REGEX_EMACS_DEBUG, regex_emacs_debug): Rename from DEBUG and debug,
to avoid collision with other DEBUGS. All uses changed.
In debugging output, change %ld and %zd to %zu when appropriate.
No need to include stddef.h, stdlib.h, sys/types.h, wchar.h,
wctype.h, locale/localeinfo.h, locale/elem-hash.h, langinfo.h,
libintl.h, unistd.h, stdbool.h, string.h, stdio.h, assert.h.
All uses of assert changed to eassert.
(RE_DUP_MAX, reg_syntax_t, RE_BACKSLASH_ESCAPE_IN_LISTS)
(RE_BK_PLUS_QM, RE_CHAR_CLASSES, RE_CONTEXT_INDEP_ANCHORS)
(RE_CONTEXT_INDEP_OPS, RE_CONTEXT_INVALID_OPS, RE_DOT_NEWLINE)
(RE_DOT_NOT_NULL, RE_HAT_LISTS_NOT_NEWLINE, RE_INTERVALS)
(RE_LIMITED_OPS, RE_NEWLINE_ALT, RE_NO_BK_BRACES)
(RE_NO_BK_PARENS, RE_NO_BK_REFS, RE_NO_BK_VBAR)
(RE_NO_EMPTY_RANGES, RE_UNMATCHED_RIGHT_PAREN_ORD)
(RE_NO_POSIX_BACKTRACKING, RE_NO_GNU_OPS, RE_FRUGAL)
(RE_SHY_GROUPS, RE_NO_NEWLINE_ANCHOR, RE_SYNTAX_EMACS)
(REG_NOERROR, REG_NOMATCH, REG_BADPAT, REG_ECOLLATE)
(REG_ECTYPE, REG_EESCAPE, REG_ESUBREG, REG_EBRACK, REG_EPAREN)
(REG_EBRACE, REG_BADBR, REG_ERANGE, REG_ESPACE, REG_BADRPT)
(REG_EEND, REG_ESIZE, REG_ERPAREN, REG_ERANGEX, REG_ESIZEBR)
(reg_errcode_t, REGS_UNALLOCATED, REGS_REALLOCATE, REGS_FIXED)
(RE_NREGS, RE_TRANSLATE, RE_TRANSLATE_P):
Move here from regex-emacs.h.
(RE_NREGS): Define unconditionally.
(boolean): Remove. All uses replaced by bool.
(WIDE_CHAR_SUPPORT, regfree, regexec, regcomp, regerror):
(re_set_syntax, re_syntax_options, WEAK_ALIAS, gettext, gettext_noop):
Remove. All uses removed.
(malloc, realloc, free): Do not redefine. Adjust all callers
to use xmalloc, xrealloc, xfree instead.
(re_error_msgid): Use C99 to avoid need to keep in same order
as reg_error_t.
(REGEX_USE_SAFE_ALLOCA): Simplify by using USE_SAFE_ALLOCA.
(REGEX_ALLOCATE, REGEX_REALLOCATE, REGEX_FREE, REGEX_ALLOCATE_STACK)
(REGEX_REALLOCATE_STACK, REGEX_FREE_STACK): Remove.
All callers changed to use the non-REGEX_MALLOC version.
(REGEX_TALLOC): Remove. All callers changed to use SAFE_ALLOCA.
(re_set_syntax): Remove; unused.
(MATCH_MAY_ALLOCATE): Remove; now always true. All uses simplified.
(INIT_FAILURE_ALLOC): Define unconditionally.
(re_compile_fastmap): Now static.
(re_compile_pattern): Avoid unnecessary cast.
* src/regex-emacs.h (EMACS_REGEX_H): Renamed from _REGEX_H to
avoid possible collision with glibc.
Don’t include sys/types.h. All uses of ssize_t changed to ptrdiff_t.
Don’t worry about C++ or VMS.
Assume emacs is defined and that _REGEX_RE_COMP and WIDE_CHAR_SUPPORT
are not.
Define struct re_registers before including lisp.h.
(REG_ENOSYS, RE_TRANSLATE_TYPE): Remove; all uses replaced by
Lisp_Object.
(regoff_t): Remove. All uses replaced with ptrdiff_t.
(re_match, regcomp, regexec, regerror, regfree):
Remove decl of nonexistent functions.
(RE_DEBUG, RE_SYNTAX_AWK, RE_SYNTAX_GNU_AWK)
(RE_SYNTAX_POSIX_AWK, RE_SYNTAX_GREP, RE_SYNTAX_EGREP)
(RE_SYNTAX_POSIX_EGREP, RE_SYNTAX_ED, RE_SYNTAX_SED)
(_RE_SYNTAX_POSIX_COMMON, RE_SYNTAX_POSIX_BASIC)
(RE_SYNTAX_POSIX_MINIMAL_BASIC, RE_SYNTAX_POSIX_EXTENDED)
(RE_SYNTAX_POSIX_MINIMAL_EXTENDED, REG_EXTENDED, REG_ICASE)
(REG_NEWLINE, REG_NOSUB, REG_NOTBOL, REG_NOTEOL, regmatch_t):
Remove; unused.
* src/search.c (Fset_match_data): Simplify range test now that
we know it’s ptrdiff_t.
| -rw-r--r-- | src/regex-emacs.c | 2013 | ||||
| -rw-r--r-- | src/regex-emacs.h | 543 | ||||
| -rw-r--r-- | src/search.c | 21 | ||||
| -rw-r--r-- | src/thread.h | 4 |
4 files changed, 500 insertions, 2081 deletions
diff --git a/src/regex-emacs.c b/src/regex-emacs.c index 08fc8c67f1c..eb5970ffcf1 100644 --- a/src/regex-emacs.c +++ b/src/regex-emacs.c | |||
| @@ -21,159 +21,187 @@ | |||
| 21 | - structure the opcode space into opcode+flag. | 21 | - structure the opcode space into opcode+flag. |
| 22 | - merge with glibc's regex.[ch]. | 22 | - merge with glibc's regex.[ch]. |
| 23 | - replace (succeed_n + jump_n + set_number_at) with something that doesn't | 23 | - replace (succeed_n + jump_n + set_number_at) with something that doesn't |
| 24 | need to modify the compiled regexp so that re_match can be reentrant. | 24 | need to modify the compiled regexp so that re_search can be reentrant. |
| 25 | - get rid of on_failure_jump_smart by doing the optimization in re_comp | 25 | - get rid of on_failure_jump_smart by doing the optimization in re_comp |
| 26 | rather than at run-time, so that re_match can be reentrant. | 26 | rather than at run-time, so that re_search can be reentrant. |
| 27 | */ | 27 | */ |
| 28 | 28 | ||
| 29 | /* AIX requires this to be the first thing in the file. */ | ||
| 30 | #if defined _AIX && !defined REGEX_MALLOC | ||
| 31 | #pragma alloca | ||
| 32 | #endif | ||
| 33 | |||
| 34 | /* Ignore some GCC warnings for now. This section should go away | ||
| 35 | once the Emacs and Gnulib regex code is merged. */ | ||
| 36 | #if 4 < __GNUC__ + (5 <= __GNUC_MINOR__) || defined __clang__ | ||
| 37 | # pragma GCC diagnostic ignored "-Wstrict-overflow" | ||
| 38 | # ifndef emacs | ||
| 39 | # pragma GCC diagnostic ignored "-Wunused-function" | ||
| 40 | # pragma GCC diagnostic ignored "-Wunused-macros" | ||
| 41 | # pragma GCC diagnostic ignored "-Wunused-result" | ||
| 42 | # pragma GCC diagnostic ignored "-Wunused-variable" | ||
| 43 | # endif | ||
| 44 | #endif | ||
| 45 | |||
| 46 | #if 4 < __GNUC__ + (6 <= __GNUC_MINOR__) && ! defined __clang__ | ||
| 47 | # pragma GCC diagnostic ignored "-Wunused-but-set-variable" | ||
| 48 | #endif | ||
| 49 | |||
| 50 | #include <config.h> | 29 | #include <config.h> |
| 51 | 30 | ||
| 52 | #include <stddef.h> | 31 | /* Get the interface, including the syntax bits. */ |
| 53 | #include <stdlib.h> | 32 | #include "regex-emacs.h" |
| 54 | |||
| 55 | #ifdef emacs | ||
| 56 | /* We need this for `regex-emacs.h', and perhaps for the Emacs include | ||
| 57 | files. */ | ||
| 58 | # include <sys/types.h> | ||
| 59 | #endif | ||
| 60 | |||
| 61 | /* Whether to use ISO C Amendment 1 wide char functions. | ||
| 62 | Those should not be used for Emacs since it uses its own. */ | ||
| 63 | #if defined _LIBC | ||
| 64 | #define WIDE_CHAR_SUPPORT 1 | ||
| 65 | #else | ||
| 66 | #define WIDE_CHAR_SUPPORT \ | ||
| 67 | (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC && !emacs) | ||
| 68 | #endif | ||
| 69 | 33 | ||
| 70 | /* For platform which support the ISO C amendment 1 functionality we | 34 | #include <stdlib.h> |
| 71 | support user defined character classes. */ | ||
| 72 | #if WIDE_CHAR_SUPPORT | ||
| 73 | /* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */ | ||
| 74 | # include <wchar.h> | ||
| 75 | # include <wctype.h> | ||
| 76 | #endif | ||
| 77 | 35 | ||
| 78 | #ifdef _LIBC | 36 | #include "character.h" |
| 79 | /* We have to keep the namespace clean. */ | 37 | #include "buffer.h" |
| 80 | # define regfree(preg) __regfree (preg) | ||
| 81 | # define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef) | ||
| 82 | # define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags) | ||
| 83 | # define regerror(err_code, preg, errbuf, errbuf_size) \ | ||
| 84 | __regerror (err_code, preg, errbuf, errbuf_size) | ||
| 85 | # define re_set_registers(bu, re, nu, st, en) \ | ||
| 86 | __re_set_registers (bu, re, nu, st, en) | ||
| 87 | # define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \ | ||
| 88 | __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) | ||
| 89 | # define re_match(bufp, string, size, pos, regs) \ | ||
| 90 | __re_match (bufp, string, size, pos, regs) | ||
| 91 | # define re_search(bufp, string, size, startpos, range, regs) \ | ||
| 92 | __re_search (bufp, string, size, startpos, range, regs) | ||
| 93 | # define re_compile_pattern(pattern, length, bufp) \ | ||
| 94 | __re_compile_pattern (pattern, length, bufp) | ||
| 95 | # define re_set_syntax(syntax) __re_set_syntax (syntax) | ||
| 96 | # define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \ | ||
| 97 | __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop) | ||
| 98 | # define re_compile_fastmap(bufp) __re_compile_fastmap (bufp) | ||
| 99 | |||
| 100 | /* Make sure we call libc's function even if the user overrides them. */ | ||
| 101 | # define btowc __btowc | ||
| 102 | # define iswctype __iswctype | ||
| 103 | # define wctype __wctype | ||
| 104 | |||
| 105 | # define WEAK_ALIAS(a,b) weak_alias (a, b) | ||
| 106 | |||
| 107 | /* We are also using some library internals. */ | ||
| 108 | # include <locale/localeinfo.h> | ||
| 109 | # include <locale/elem-hash.h> | ||
| 110 | # include <langinfo.h> | ||
| 111 | #else | ||
| 112 | # define WEAK_ALIAS(a,b) | ||
| 113 | #endif | ||
| 114 | 38 | ||
| 115 | /* This is for other GNU distributions with internationalized messages. */ | 39 | #include "syntax.h" |
| 116 | #if HAVE_LIBINTL_H || defined _LIBC | 40 | #include "category.h" |
| 117 | # include <libintl.h> | ||
| 118 | #else | ||
| 119 | # define gettext(msgid) (msgid) | ||
| 120 | #endif | ||
| 121 | 41 | ||
| 122 | #ifndef gettext_noop | 42 | /* Maximum number of duplicates an interval can allow. Some systems |
| 123 | /* This define is so xgettext can find the internationalizable | 43 | define this in other header files, but we want our |
| 124 | strings. */ | 44 | value, so remove any previous define. */ |
| 125 | # define gettext_noop(String) String | 45 | #ifdef RE_DUP_MAX |
| 46 | # undef RE_DUP_MAX | ||
| 126 | #endif | 47 | #endif |
| 127 | 48 | /* Repeat counts are stored in opcodes as 2 byte integers. This was | |
| 128 | /* The `emacs' switch turns on certain matching commands | 49 | previously limited to 7fff because the parsing code uses signed |
| 129 | that make sense only in Emacs. */ | 50 | ints. But Emacs only runs on 32 bit platforms anyway. */ |
| 130 | #ifdef emacs | 51 | #define RE_DUP_MAX (0xffff) |
| 131 | 52 | ||
| 132 | # include "lisp.h" | 53 | /* The following bits are used to determine the regexp syntax we |
| 133 | # include "character.h" | 54 | recognize. The set/not-set meanings where historically chosen so |
| 134 | # include "buffer.h" | 55 | that Emacs syntax had the value 0. |
| 135 | 56 | The bits are given in alphabetical order, and | |
| 136 | # include "syntax.h" | 57 | the definitions shifted by one from the previous bit; thus, when we |
| 137 | # include "category.h" | 58 | add or remove a bit, only one other definition need change. */ |
| 59 | typedef unsigned long reg_syntax_t; | ||
| 60 | |||
| 61 | /* If this bit is not set, then \ inside a bracket expression is literal. | ||
| 62 | If set, then such a \ quotes the following character. */ | ||
| 63 | #define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1) | ||
| 64 | |||
| 65 | /* If this bit is not set, then + and ? are operators, and \+ and \? are | ||
| 66 | literals. | ||
| 67 | If set, then \+ and \? are operators and + and ? are literals. */ | ||
| 68 | #define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1) | ||
| 69 | |||
| 70 | /* If this bit is set, then character classes are supported. They are: | ||
| 71 | [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:], | ||
| 72 | [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:]. | ||
| 73 | If not set, then character classes are not supported. */ | ||
| 74 | #define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1) | ||
| 75 | |||
| 76 | /* If this bit is set, then ^ and $ are always anchors (outside bracket | ||
| 77 | expressions, of course). | ||
| 78 | If this bit is not set, then it depends: | ||
| 79 | ^ is an anchor if it is at the beginning of a regular | ||
| 80 | expression or after an open-group or an alternation operator; | ||
| 81 | $ is an anchor if it is at the end of a regular expression, or | ||
| 82 | before a close-group or an alternation operator. | ||
| 83 | |||
| 84 | This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because | ||
| 85 | POSIX draft 11.2 says that * etc. in leading positions is undefined. | ||
| 86 | We already implemented a previous draft which made those constructs | ||
| 87 | invalid, though, so we haven't changed the code back. */ | ||
| 88 | #define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1) | ||
| 89 | |||
| 90 | /* If this bit is set, then special characters are always special | ||
| 91 | regardless of where they are in the pattern. | ||
| 92 | If this bit is not set, then special characters are special only in | ||
| 93 | some contexts; otherwise they are ordinary. Specifically, | ||
| 94 | * + ? and intervals are only special when not after the beginning, | ||
| 95 | open-group, or alternation operator. */ | ||
| 96 | #define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1) | ||
| 97 | |||
| 98 | /* If this bit is set, then *, +, ?, and { cannot be first in an re or | ||
| 99 | immediately after an alternation or begin-group operator. */ | ||
| 100 | #define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1) | ||
| 101 | |||
| 102 | /* If this bit is set, then . matches newline. | ||
| 103 | If not set, then it doesn't. */ | ||
| 104 | #define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1) | ||
| 105 | |||
| 106 | /* If this bit is set, then . doesn't match NUL. | ||
| 107 | If not set, then it does. */ | ||
| 108 | #define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1) | ||
| 109 | |||
| 110 | /* If this bit is set, nonmatching lists [^...] do not match newline. | ||
| 111 | If not set, they do. */ | ||
| 112 | #define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1) | ||
| 113 | |||
| 114 | /* If this bit is set, either \{...\} or {...} defines an | ||
| 115 | interval, depending on RE_NO_BK_BRACES. | ||
| 116 | If not set, \{, \}, {, and } are literals. */ | ||
| 117 | #define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1) | ||
| 118 | |||
| 119 | /* If this bit is set, +, ? and | aren't recognized as operators. | ||
| 120 | If not set, they are. */ | ||
| 121 | #define RE_LIMITED_OPS (RE_INTERVALS << 1) | ||
| 122 | |||
| 123 | /* If this bit is set, newline is an alternation operator. | ||
| 124 | If not set, newline is literal. */ | ||
| 125 | #define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1) | ||
| 126 | |||
| 127 | /* If this bit is set, then `{...}' defines an interval, and \{ and \} | ||
| 128 | are literals. | ||
| 129 | If not set, then `\{...\}' defines an interval. */ | ||
| 130 | #define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1) | ||
| 131 | |||
| 132 | /* If this bit is set, (...) defines a group, and \( and \) are literals. | ||
| 133 | If not set, \(...\) defines a group, and ( and ) are literals. */ | ||
| 134 | #define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1) | ||
| 135 | |||
| 136 | /* If this bit is set, then \<digit> matches <digit>. | ||
| 137 | If not set, then \<digit> is a back-reference. */ | ||
| 138 | #define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1) | ||
| 139 | |||
| 140 | /* If this bit is set, then | is an alternation operator, and \| is literal. | ||
| 141 | If not set, then \| is an alternation operator, and | is literal. */ | ||
| 142 | #define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1) | ||
| 143 | |||
| 144 | /* If this bit is set, then an ending range point collating higher | ||
| 145 | than the starting range point, as in [z-a], is invalid. | ||
| 146 | If not set, then when ending range point collates higher than the | ||
| 147 | starting range point, the range is ignored. */ | ||
| 148 | #define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1) | ||
| 149 | |||
| 150 | /* If this bit is set, then an unmatched ) is ordinary. | ||
| 151 | If not set, then an unmatched ) is invalid. */ | ||
| 152 | #define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1) | ||
| 153 | |||
| 154 | /* If this bit is set, succeed as soon as we match the whole pattern, | ||
| 155 | without further backtracking. */ | ||
| 156 | #define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1) | ||
| 157 | |||
| 158 | /* If this bit is set, do not process the GNU regex operators. | ||
| 159 | If not set, then the GNU regex operators are recognized. */ | ||
| 160 | #define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1) | ||
| 161 | |||
| 162 | /* If this bit is set, then *?, +? and ?? match non greedily. */ | ||
| 163 | #define RE_FRUGAL (RE_NO_GNU_OPS << 1) | ||
| 164 | |||
| 165 | /* If this bit is set, then (?:...) is treated as a shy group. */ | ||
| 166 | #define RE_SHY_GROUPS (RE_FRUGAL << 1) | ||
| 167 | |||
| 168 | /* If this bit is set, ^ and $ only match at beg/end of buffer. */ | ||
| 169 | #define RE_NO_NEWLINE_ANCHOR (RE_SHY_GROUPS << 1) | ||
| 170 | |||
| 171 | /* This global variable defines the particular regexp syntax to use (for | ||
| 172 | some interfaces). When a regexp is compiled, the syntax used is | ||
| 173 | stored in the pattern buffer, so changing this does not affect | ||
| 174 | already-compiled regexps. */ | ||
| 175 | /* extern reg_syntax_t re_syntax_options; */ | ||
| 176 | /* Define combinations of the above bits for the standard possibilities. */ | ||
| 177 | #define RE_SYNTAX_EMACS \ | ||
| 178 | (RE_CHAR_CLASSES | RE_INTERVALS | RE_SHY_GROUPS | RE_FRUGAL) | ||
| 138 | 179 | ||
| 139 | /* Make syntax table lookup grant data in gl_state. */ | 180 | /* Make syntax table lookup grant data in gl_state. */ |
| 140 | # define SYNTAX(c) syntax_property (c, 1) | 181 | #define SYNTAX(c) syntax_property (c, 1) |
| 141 | |||
| 142 | # ifdef malloc | ||
| 143 | # undef malloc | ||
| 144 | # endif | ||
| 145 | # define malloc xmalloc | ||
| 146 | # ifdef realloc | ||
| 147 | # undef realloc | ||
| 148 | # endif | ||
| 149 | # define realloc xrealloc | ||
| 150 | # ifdef free | ||
| 151 | # undef free | ||
| 152 | # endif | ||
| 153 | # define free xfree | ||
| 154 | 182 | ||
| 155 | /* Converts the pointer to the char to BEG-based offset from the start. */ | 183 | /* Converts the pointer to the char to BEG-based offset from the start. */ |
| 156 | # define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d)) | 184 | #define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d)) |
| 157 | /* Strings are 0-indexed, buffers are 1-indexed; we pun on the boolean | 185 | /* Strings are 0-indexed, buffers are 1-indexed; we pun on the boolean |
| 158 | result to get the right base index. */ | 186 | result to get the right base index. */ |
| 159 | # define POS_AS_IN_BUFFER(p) \ | 187 | #define POS_AS_IN_BUFFER(p) \ |
| 160 | ((p) + (NILP (gl_state.object) || BUFFERP (gl_state.object))) | 188 | ((p) + (NILP (gl_state.object) || BUFFERP (gl_state.object))) |
| 161 | 189 | ||
| 162 | # define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte) | 190 | #define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte) |
| 163 | # define RE_TARGET_MULTIBYTE_P(bufp) ((bufp)->target_multibyte) | 191 | #define RE_TARGET_MULTIBYTE_P(bufp) ((bufp)->target_multibyte) |
| 164 | # define RE_STRING_CHAR(p, multibyte) \ | 192 | #define RE_STRING_CHAR(p, multibyte) \ |
| 165 | (multibyte ? (STRING_CHAR (p)) : (*(p))) | 193 | (multibyte ? (STRING_CHAR (p)) : (*(p))) |
| 166 | # define RE_STRING_CHAR_AND_LENGTH(p, len, multibyte) \ | 194 | #define RE_STRING_CHAR_AND_LENGTH(p, len, multibyte) \ |
| 167 | (multibyte ? (STRING_CHAR_AND_LENGTH (p, len)) : ((len) = 1, *(p))) | 195 | (multibyte ? (STRING_CHAR_AND_LENGTH (p, len)) : ((len) = 1, *(p))) |
| 168 | 196 | ||
| 169 | # define RE_CHAR_TO_MULTIBYTE(c) UNIBYTE_TO_CHAR (c) | 197 | #define RE_CHAR_TO_MULTIBYTE(c) UNIBYTE_TO_CHAR (c) |
| 170 | 198 | ||
| 171 | # define RE_CHAR_TO_UNIBYTE(c) CHAR_TO_BYTE_SAFE (c) | 199 | #define RE_CHAR_TO_UNIBYTE(c) CHAR_TO_BYTE_SAFE (c) |
| 172 | 200 | ||
| 173 | /* Set C a (possibly converted to multibyte) character before P. P | 201 | /* Set C a (possibly converted to multibyte) character before P. P |
| 174 | points into a string which is the virtual concatenation of STR1 | 202 | points into a string which is the virtual concatenation of STR1 |
| 175 | (which ends at END1) or STR2 (which ends at END2). */ | 203 | (which ends at END1) or STR2 (which ends at END2). */ |
| 176 | # define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \ | 204 | #define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \ |
| 177 | do { \ | 205 | do { \ |
| 178 | if (target_multibyte) \ | 206 | if (target_multibyte) \ |
| 179 | { \ | 207 | { \ |
| @@ -191,7 +219,7 @@ | |||
| 191 | 219 | ||
| 192 | /* Set C a (possibly converted to multibyte) character at P, and set | 220 | /* Set C a (possibly converted to multibyte) character at P, and set |
| 193 | LEN to the byte length of that character. */ | 221 | LEN to the byte length of that character. */ |
| 194 | # define GET_CHAR_AFTER(c, p, len) \ | 222 | #define GET_CHAR_AFTER(c, p, len) \ |
| 195 | do { \ | 223 | do { \ |
| 196 | if (target_multibyte) \ | 224 | if (target_multibyte) \ |
| 197 | (c) = STRING_CHAR_AND_LENGTH (p, len); \ | 225 | (c) = STRING_CHAR_AND_LENGTH (p, len); \ |
| @@ -202,235 +230,66 @@ | |||
| 202 | (c) = RE_CHAR_TO_MULTIBYTE (c); \ | 230 | (c) = RE_CHAR_TO_MULTIBYTE (c); \ |
| 203 | } \ | 231 | } \ |
| 204 | } while (0) | 232 | } while (0) |
| 205 | |||
| 206 | #else /* not emacs */ | ||
| 207 | |||
| 208 | /* If we are not linking with Emacs proper, | ||
| 209 | we can't use the relocating allocator | ||
| 210 | even if config.h says that we can. */ | ||
| 211 | # undef REL_ALLOC | ||
| 212 | |||
| 213 | # include <unistd.h> | ||
| 214 | |||
| 215 | /* When used in Emacs's lib-src, we need xmalloc and xrealloc. */ | ||
| 216 | |||
| 217 | static ATTRIBUTE_MALLOC void * | ||
| 218 | xmalloc (size_t size) | ||
| 219 | { | ||
| 220 | void *val = malloc (size); | ||
| 221 | if (!val && size) | ||
| 222 | { | ||
| 223 | write (STDERR_FILENO, "virtual memory exhausted\n", 25); | ||
| 224 | exit (1); | ||
| 225 | } | ||
| 226 | return val; | ||
| 227 | } | ||
| 228 | |||
| 229 | static void * | ||
| 230 | xrealloc (void *block, size_t size) | ||
| 231 | { | ||
| 232 | void *val; | ||
| 233 | /* We must call malloc explicitly when BLOCK is 0, since some | ||
| 234 | reallocs don't do this. */ | ||
| 235 | if (! block) | ||
| 236 | val = malloc (size); | ||
| 237 | else | ||
| 238 | val = realloc (block, size); | ||
| 239 | if (!val && size) | ||
| 240 | { | ||
| 241 | write (STDERR_FILENO, "virtual memory exhausted\n", 25); | ||
| 242 | exit (1); | ||
| 243 | } | ||
| 244 | return val; | ||
| 245 | } | ||
| 246 | |||
| 247 | # ifdef malloc | ||
| 248 | # undef malloc | ||
| 249 | # endif | ||
| 250 | # define malloc xmalloc | ||
| 251 | # ifdef realloc | ||
| 252 | # undef realloc | ||
| 253 | # endif | ||
| 254 | # define realloc xrealloc | ||
| 255 | |||
| 256 | # include <stdbool.h> | ||
| 257 | # include <string.h> | ||
| 258 | |||
| 259 | /* Define the syntax stuff for \<, \>, etc. */ | ||
| 260 | |||
| 261 | /* Sword must be nonzero for the wordchar pattern commands in re_match_2. */ | ||
| 262 | enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 }; | ||
| 263 | |||
| 264 | /* Dummy macros for non-Emacs environments. */ | ||
| 265 | # define MAX_MULTIBYTE_LENGTH 1 | ||
| 266 | # define RE_MULTIBYTE_P(x) 0 | ||
| 267 | # define RE_TARGET_MULTIBYTE_P(x) 0 | ||
| 268 | # define WORD_BOUNDARY_P(c1, c2) (0) | ||
| 269 | # define BYTES_BY_CHAR_HEAD(p) (1) | ||
| 270 | # define PREV_CHAR_BOUNDARY(p, limit) ((p)--) | ||
| 271 | # define STRING_CHAR(p) (*(p)) | ||
| 272 | # define RE_STRING_CHAR(p, multibyte) STRING_CHAR (p) | ||
| 273 | # define CHAR_STRING(c, s) (*(s) = (c), 1) | ||
| 274 | # define STRING_CHAR_AND_LENGTH(p, actual_len) ((actual_len) = 1, *(p)) | ||
| 275 | # define RE_STRING_CHAR_AND_LENGTH(p, len, multibyte) STRING_CHAR_AND_LENGTH (p, len) | ||
| 276 | # define RE_CHAR_TO_MULTIBYTE(c) (c) | ||
| 277 | # define RE_CHAR_TO_UNIBYTE(c) (c) | ||
| 278 | # define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \ | ||
| 279 | (c = ((p) == (str2) ? *((end1) - 1) : *((p) - 1))) | ||
| 280 | # define GET_CHAR_AFTER(c, p, len) \ | ||
| 281 | (c = *p, len = 1) | ||
| 282 | # define CHAR_BYTE8_P(c) (0) | ||
| 283 | # define CHAR_LEADING_CODE(c) (c) | ||
| 284 | |||
| 285 | #endif /* not emacs */ | ||
| 286 | |||
| 287 | #ifndef RE_TRANSLATE | ||
| 288 | # define RE_TRANSLATE(TBL, C) ((unsigned char)(TBL)[C]) | ||
| 289 | # define RE_TRANSLATE_P(TBL) (TBL) | ||
| 290 | #endif | ||
| 291 | 233 | ||
| 292 | /* Get the interface, including the syntax bits. */ | ||
| 293 | #include "regex-emacs.h" | ||
| 294 | |||
| 295 | /* isalpha etc. are used for the character classes. */ | 234 | /* isalpha etc. are used for the character classes. */ |
| 296 | #include <ctype.h> | 235 | #include <ctype.h> |
| 297 | 236 | ||
| 298 | #ifdef emacs | ||
| 299 | |||
| 300 | /* 1 if C is an ASCII character. */ | 237 | /* 1 if C is an ASCII character. */ |
| 301 | # define IS_REAL_ASCII(c) ((c) < 0200) | 238 | #define IS_REAL_ASCII(c) ((c) < 0200) |
| 302 | 239 | ||
| 303 | /* 1 if C is a unibyte character. */ | 240 | /* 1 if C is a unibyte character. */ |
| 304 | # define ISUNIBYTE(c) (SINGLE_BYTE_CHAR_P ((c))) | 241 | #define ISUNIBYTE(c) (SINGLE_BYTE_CHAR_P ((c))) |
| 305 | 242 | ||
| 306 | /* The Emacs definitions should not be directly affected by locales. */ | 243 | /* The Emacs definitions should not be directly affected by locales. */ |
| 307 | 244 | ||
| 308 | /* In Emacs, these are only used for single-byte characters. */ | 245 | /* In Emacs, these are only used for single-byte characters. */ |
| 309 | # define ISDIGIT(c) ((c) >= '0' && (c) <= '9') | 246 | #define ISDIGIT(c) ((c) >= '0' && (c) <= '9') |
| 310 | # define ISCNTRL(c) ((c) < ' ') | 247 | #define ISCNTRL(c) ((c) < ' ') |
| 311 | # define ISXDIGIT(c) (0 <= char_hexdigit (c)) | 248 | #define ISXDIGIT(c) (0 <= char_hexdigit (c)) |
| 312 | 249 | ||
| 313 | /* The rest must handle multibyte characters. */ | 250 | /* The rest must handle multibyte characters. */ |
| 314 | 251 | ||
| 315 | # define ISBLANK(c) (IS_REAL_ASCII (c) \ | 252 | #define ISBLANK(c) (IS_REAL_ASCII (c) \ |
| 316 | ? ((c) == ' ' || (c) == '\t') \ | 253 | ? ((c) == ' ' || (c) == '\t') \ |
| 317 | : blankp (c)) | 254 | : blankp (c)) |
| 318 | 255 | ||
| 319 | # define ISGRAPH(c) (SINGLE_BYTE_CHAR_P (c) \ | 256 | #define ISGRAPH(c) (SINGLE_BYTE_CHAR_P (c) \ |
| 320 | ? (c) > ' ' && !((c) >= 0177 && (c) <= 0240) \ | 257 | ? (c) > ' ' && !((c) >= 0177 && (c) <= 0240) \ |
| 321 | : graphicp (c)) | 258 | : graphicp (c)) |
| 322 | 259 | ||
| 323 | # define ISPRINT(c) (SINGLE_BYTE_CHAR_P (c) \ | 260 | #define ISPRINT(c) (SINGLE_BYTE_CHAR_P (c) \ |
| 324 | ? (c) >= ' ' && !((c) >= 0177 && (c) <= 0237) \ | 261 | ? (c) >= ' ' && !((c) >= 0177 && (c) <= 0237) \ |
| 325 | : printablep (c)) | 262 | : printablep (c)) |
| 326 | 263 | ||
| 327 | # define ISALNUM(c) (IS_REAL_ASCII (c) \ | 264 | #define ISALNUM(c) (IS_REAL_ASCII (c) \ |
| 328 | ? (((c) >= 'a' && (c) <= 'z') \ | 265 | ? (((c) >= 'a' && (c) <= 'z') \ |
| 329 | || ((c) >= 'A' && (c) <= 'Z') \ | 266 | || ((c) >= 'A' && (c) <= 'Z') \ |
| 330 | || ((c) >= '0' && (c) <= '9')) \ | 267 | || ((c) >= '0' && (c) <= '9')) \ |
| 331 | : alphanumericp (c)) | 268 | : alphanumericp (c)) |
| 332 | 269 | ||
| 333 | # define ISALPHA(c) (IS_REAL_ASCII (c) \ | 270 | #define ISALPHA(c) (IS_REAL_ASCII (c) \ |
| 334 | ? (((c) >= 'a' && (c) <= 'z') \ | 271 | ? (((c) >= 'a' && (c) <= 'z') \ |
| 335 | || ((c) >= 'A' && (c) <= 'Z')) \ | 272 | || ((c) >= 'A' && (c) <= 'Z')) \ |
| 336 | : alphabeticp (c)) | 273 | : alphabeticp (c)) |
| 337 | 274 | ||
| 338 | # define ISLOWER(c) lowercasep (c) | 275 | #define ISLOWER(c) lowercasep (c) |
| 339 | 276 | ||
| 340 | # define ISPUNCT(c) (IS_REAL_ASCII (c) \ | 277 | #define ISPUNCT(c) (IS_REAL_ASCII (c) \ |
| 341 | ? ((c) > ' ' && (c) < 0177 \ | 278 | ? ((c) > ' ' && (c) < 0177 \ |
| 342 | && !(((c) >= 'a' && (c) <= 'z') \ | 279 | && !(((c) >= 'a' && (c) <= 'z') \ |
| 343 | || ((c) >= 'A' && (c) <= 'Z') \ | 280 | || ((c) >= 'A' && (c) <= 'Z') \ |
| 344 | || ((c) >= '0' && (c) <= '9'))) \ | 281 | || ((c) >= '0' && (c) <= '9'))) \ |
| 345 | : SYNTAX (c) != Sword) | 282 | : SYNTAX (c) != Sword) |
| 346 | 283 | ||
| 347 | # define ISSPACE(c) (SYNTAX (c) == Swhitespace) | 284 | #define ISSPACE(c) (SYNTAX (c) == Swhitespace) |
| 348 | 285 | ||
| 349 | # define ISUPPER(c) uppercasep (c) | 286 | #define ISUPPER(c) uppercasep (c) |
| 350 | |||
| 351 | # define ISWORD(c) (SYNTAX (c) == Sword) | ||
| 352 | |||
| 353 | #else /* not emacs */ | ||
| 354 | |||
| 355 | /* 1 if C is an ASCII character. */ | ||
| 356 | # define IS_REAL_ASCII(c) ((c) < 0200) | ||
| 357 | |||
| 358 | /* This distinction is not meaningful, except in Emacs. */ | ||
| 359 | # define ISUNIBYTE(c) 1 | ||
| 360 | |||
| 361 | # ifdef isblank | ||
| 362 | # define ISBLANK(c) isblank (c) | ||
| 363 | # else | ||
| 364 | # define ISBLANK(c) ((c) == ' ' || (c) == '\t') | ||
| 365 | # endif | ||
| 366 | # ifdef isgraph | ||
| 367 | # define ISGRAPH(c) isgraph (c) | ||
| 368 | # else | ||
| 369 | # define ISGRAPH(c) (isprint (c) && !isspace (c)) | ||
| 370 | # endif | ||
| 371 | |||
| 372 | /* Solaris defines ISPRINT so we must undefine it first. */ | ||
| 373 | # undef ISPRINT | ||
| 374 | # define ISPRINT(c) isprint (c) | ||
| 375 | # define ISDIGIT(c) isdigit (c) | ||
| 376 | # define ISALNUM(c) isalnum (c) | ||
| 377 | # define ISALPHA(c) isalpha (c) | ||
| 378 | # define ISCNTRL(c) iscntrl (c) | ||
| 379 | # define ISLOWER(c) islower (c) | ||
| 380 | # define ISPUNCT(c) ispunct (c) | ||
| 381 | # define ISSPACE(c) isspace (c) | ||
| 382 | # define ISUPPER(c) isupper (c) | ||
| 383 | # define ISXDIGIT(c) isxdigit (c) | ||
| 384 | |||
| 385 | # define ISWORD(c) ISALPHA (c) | ||
| 386 | |||
| 387 | # ifdef _tolower | ||
| 388 | # define TOLOWER(c) _tolower (c) | ||
| 389 | # else | ||
| 390 | # define TOLOWER(c) tolower (c) | ||
| 391 | # endif | ||
| 392 | |||
| 393 | /* How many characters in the character set. */ | ||
| 394 | # define CHAR_SET_SIZE 256 | ||
| 395 | |||
| 396 | # ifdef SYNTAX_TABLE | ||
| 397 | |||
| 398 | extern char *re_syntax_table; | ||
| 399 | |||
| 400 | # else /* not SYNTAX_TABLE */ | ||
| 401 | |||
| 402 | static char re_syntax_table[CHAR_SET_SIZE]; | ||
| 403 | |||
| 404 | static void | ||
| 405 | init_syntax_once (void) | ||
| 406 | { | ||
| 407 | register int c; | ||
| 408 | static int done = 0; | ||
| 409 | |||
| 410 | if (done) | ||
| 411 | return; | ||
| 412 | |||
| 413 | memset (re_syntax_table, 0, sizeof re_syntax_table); | ||
| 414 | |||
| 415 | for (c = 0; c < CHAR_SET_SIZE; ++c) | ||
| 416 | if (ISALNUM (c)) | ||
| 417 | re_syntax_table[c] = Sword; | ||
| 418 | |||
| 419 | re_syntax_table['_'] = Ssymbol; | ||
| 420 | |||
| 421 | done = 1; | ||
| 422 | } | ||
| 423 | 287 | ||
| 424 | # endif /* not SYNTAX_TABLE */ | 288 | #define ISWORD(c) (SYNTAX (c) == Sword) |
| 425 | |||
| 426 | # define SYNTAX(c) re_syntax_table[(c)] | ||
| 427 | |||
| 428 | #endif /* not emacs */ | ||
| 429 | 289 | ||
| 430 | #define SIGN_EXTEND_CHAR(c) ((signed char) (c)) | 290 | #define SIGN_EXTEND_CHAR(c) ((signed char) (c)) |
| 431 | 291 | ||
| 432 | /* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we | 292 | /* Use alloca instead of malloc. This is because using malloc in |
| 433 | use `alloca' instead of `malloc'. This is because using malloc in | ||
| 434 | re_search* or re_match* could cause memory leaks when C-g is used | 293 | re_search* or re_match* could cause memory leaks when C-g is used |
| 435 | in Emacs (note that SAFE_ALLOCA could also call malloc, but does so | 294 | in Emacs (note that SAFE_ALLOCA could also call malloc, but does so |
| 436 | via `record_xmalloc' which uses `unwind_protect' to ensure the | 295 | via `record_xmalloc' which uses `unwind_protect' to ensure the |
| @@ -442,64 +301,17 @@ init_syntax_once (void) | |||
| 442 | not functions -- `alloca'-allocated space disappears at the end of the | 301 | not functions -- `alloca'-allocated space disappears at the end of the |
| 443 | function it is called in. */ | 302 | function it is called in. */ |
| 444 | 303 | ||
| 445 | #ifdef REGEX_MALLOC | ||
| 446 | |||
| 447 | # define REGEX_ALLOCATE malloc | ||
| 448 | # define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize) | ||
| 449 | # define REGEX_FREE free | ||
| 450 | |||
| 451 | #else /* not REGEX_MALLOC */ | ||
| 452 | |||
| 453 | # ifdef emacs | ||
| 454 | /* This may be adjusted in main(), if the stack is successfully grown. */ | 304 | /* This may be adjusted in main(), if the stack is successfully grown. */ |
| 455 | ptrdiff_t emacs_re_safe_alloca = MAX_ALLOCA; | 305 | ptrdiff_t emacs_re_safe_alloca = MAX_ALLOCA; |
| 456 | /* Like USE_SAFE_ALLOCA, but use emacs_re_safe_alloca. */ | 306 | /* Like USE_SAFE_ALLOCA, but use emacs_re_safe_alloca. */ |
| 457 | # define REGEX_USE_SAFE_ALLOCA \ | 307 | #define REGEX_USE_SAFE_ALLOCA \ |
| 458 | ptrdiff_t sa_avail = emacs_re_safe_alloca; \ | 308 | USE_SAFE_ALLOCA; sa_avail = emacs_re_safe_alloca |
| 459 | ptrdiff_t sa_count = SPECPDL_INDEX () | ||
| 460 | |||
| 461 | # define REGEX_SAFE_FREE() SAFE_FREE () | ||
| 462 | # define REGEX_ALLOCATE SAFE_ALLOCA | ||
| 463 | # else | ||
| 464 | # include <alloca.h> | ||
| 465 | # define REGEX_ALLOCATE alloca | ||
| 466 | # endif | ||
| 467 | 309 | ||
| 468 | /* Assumes a `char *destination' variable. */ | 310 | /* Assumes a `char *destination' variable. */ |
| 469 | # define REGEX_REALLOCATE(source, osize, nsize) \ | 311 | #define REGEX_REALLOCATE(source, osize, nsize) \ |
| 470 | (destination = REGEX_ALLOCATE (nsize), \ | 312 | (destination = SAFE_ALLOCA (nsize), \ |
| 471 | memcpy (destination, source, osize)) | 313 | memcpy (destination, source, osize)) |
| 472 | 314 | ||
| 473 | /* No need to do anything to free, after alloca. */ | ||
| 474 | # define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */ | ||
| 475 | |||
| 476 | #endif /* not REGEX_MALLOC */ | ||
| 477 | |||
| 478 | #ifndef REGEX_USE_SAFE_ALLOCA | ||
| 479 | # define REGEX_USE_SAFE_ALLOCA ((void) 0) | ||
| 480 | # define REGEX_SAFE_FREE() ((void) 0) | ||
| 481 | #endif | ||
| 482 | |||
| 483 | /* Define how to allocate the failure stack. */ | ||
| 484 | |||
| 485 | #if defined REL_ALLOC && defined REGEX_MALLOC | ||
| 486 | |||
| 487 | # define REGEX_ALLOCATE_STACK(size) \ | ||
| 488 | r_alloc (&failure_stack_ptr, (size)) | ||
| 489 | # define REGEX_REALLOCATE_STACK(source, osize, nsize) \ | ||
| 490 | r_re_alloc (&failure_stack_ptr, (nsize)) | ||
| 491 | # define REGEX_FREE_STACK(ptr) \ | ||
| 492 | r_alloc_free (&failure_stack_ptr) | ||
| 493 | |||
| 494 | #else /* not using relocating allocator */ | ||
| 495 | |||
| 496 | # define REGEX_ALLOCATE_STACK(size) REGEX_ALLOCATE (size) | ||
| 497 | # define REGEX_REALLOCATE_STACK(source, o, n) REGEX_REALLOCATE (source, o, n) | ||
| 498 | # define REGEX_FREE_STACK(ptr) REGEX_FREE (ptr) | ||
| 499 | |||
| 500 | #endif /* not using relocating allocator */ | ||
| 501 | |||
| 502 | |||
| 503 | /* True if `size1' is non-NULL and PTR is pointing anywhere inside | 315 | /* True if `size1' is non-NULL and PTR is pointing anywhere inside |
| 504 | `string1' or just past its end. This works if PTR is NULL, which is | 316 | `string1' or just past its end. This works if PTR is NULL, which is |
| 505 | a good thing. */ | 317 | a good thing. */ |
| @@ -507,30 +319,21 @@ ptrdiff_t emacs_re_safe_alloca = MAX_ALLOCA; | |||
| 507 | (size1 && string1 <= (ptr) && (ptr) <= string1 + size1) | 319 | (size1 && string1 <= (ptr) && (ptr) <= string1 + size1) |
| 508 | 320 | ||
| 509 | /* (Re)Allocate N items of type T using malloc, or fail. */ | 321 | /* (Re)Allocate N items of type T using malloc, or fail. */ |
| 510 | #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t))) | 322 | #define TALLOC(n, t) ((t *) xmalloc ((n) * sizeof (t))) |
| 511 | #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t))) | 323 | #define RETALLOC(addr, n, t) ((addr) = (t *) xrealloc (addr, (n) * sizeof (t))) |
| 512 | #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t))) | ||
| 513 | 324 | ||
| 514 | #define BYTEWIDTH 8 /* In bits. */ | 325 | #define BYTEWIDTH 8 /* In bits. */ |
| 515 | 326 | ||
| 516 | #ifndef emacs | ||
| 517 | # undef max | ||
| 518 | # undef min | ||
| 519 | # define max(a, b) ((a) > (b) ? (a) : (b)) | ||
| 520 | # define min(a, b) ((a) < (b) ? (a) : (b)) | ||
| 521 | #endif | ||
| 522 | |||
| 523 | /* Type of source-pattern and string chars. */ | 327 | /* Type of source-pattern and string chars. */ |
| 524 | typedef const unsigned char re_char; | 328 | typedef const unsigned char re_char; |
| 525 | 329 | ||
| 526 | typedef char boolean; | 330 | static void re_compile_fastmap (struct re_pattern_buffer *); |
| 527 | 331 | static ptrdiff_t re_match_2_internal (struct re_pattern_buffer *bufp, | |
| 528 | static regoff_t re_match_2_internal (struct re_pattern_buffer *bufp, | ||
| 529 | re_char *string1, size_t size1, | 332 | re_char *string1, size_t size1, |
| 530 | re_char *string2, size_t size2, | 333 | re_char *string2, size_t size2, |
| 531 | ssize_t pos, | 334 | ptrdiff_t pos, |
| 532 | struct re_registers *regs, | 335 | struct re_registers *regs, |
| 533 | ssize_t stop); | 336 | ptrdiff_t stop); |
| 534 | 337 | ||
| 535 | /* These are the command codes that appear in compiled regular | 338 | /* These are the command codes that appear in compiled regular |
| 536 | expressions. Some opcodes are followed by argument bytes. A | 339 | expressions. Some opcodes are followed by argument bytes. A |
| @@ -592,8 +395,7 @@ typedef enum | |||
| 592 | /* Fail unless at end of line. */ | 395 | /* Fail unless at end of line. */ |
| 593 | endline, | 396 | endline, |
| 594 | 397 | ||
| 595 | /* Succeeds if at beginning of buffer (if emacs) or at beginning | 398 | /* Succeeds if at beginning of buffer. */ |
| 596 | of string to be matched (if not). */ | ||
| 597 | begbuf, | 399 | begbuf, |
| 598 | 400 | ||
| 599 | /* Analogously, for end of buffer/string. */ | 401 | /* Analogously, for end of buffer/string. */ |
| @@ -658,10 +460,9 @@ typedef enum | |||
| 658 | syntaxspec, | 460 | syntaxspec, |
| 659 | 461 | ||
| 660 | /* Matches any character whose syntax is not that specified. */ | 462 | /* Matches any character whose syntax is not that specified. */ |
| 661 | notsyntaxspec | 463 | notsyntaxspec, |
| 662 | 464 | ||
| 663 | #ifdef emacs | 465 | at_dot, /* Succeeds if at point. */ |
| 664 | , at_dot, /* Succeeds if at point. */ | ||
| 665 | 466 | ||
| 666 | /* Matches any character whose category-set contains the specified | 467 | /* Matches any character whose category-set contains the specified |
| 667 | category. The operator is followed by a byte which contains a | 468 | category. The operator is followed by a byte which contains a |
| @@ -672,7 +473,6 @@ typedef enum | |||
| 672 | specified category. The operator is followed by a byte which | 473 | specified category. The operator is followed by a byte which |
| 673 | contains the category code (mnemonic ASCII character). */ | 474 | contains the category code (mnemonic ASCII character). */ |
| 674 | notcategoryspec | 475 | notcategoryspec |
| 675 | #endif /* emacs */ | ||
| 676 | } re_opcode_t; | 476 | } re_opcode_t; |
| 677 | 477 | ||
| 678 | /* Common operations on the compiled pattern. */ | 478 | /* Common operations on the compiled pattern. */ |
| @@ -760,12 +560,10 @@ extract_number_and_incr (re_char **source) | |||
| 760 | and the 2 bytes of flags at the start of the range table. */ | 560 | and the 2 bytes of flags at the start of the range table. */ |
| 761 | #define CHARSET_RANGE_TABLE(p) (&(p)[4 + CHARSET_BITMAP_SIZE (p)]) | 561 | #define CHARSET_RANGE_TABLE(p) (&(p)[4 + CHARSET_BITMAP_SIZE (p)]) |
| 762 | 562 | ||
| 763 | #ifdef emacs | ||
| 764 | /* Extract the bit flags that start a range table. */ | 563 | /* Extract the bit flags that start a range table. */ |
| 765 | #define CHARSET_RANGE_TABLE_BITS(p) \ | 564 | #define CHARSET_RANGE_TABLE_BITS(p) \ |
| 766 | ((p)[2 + CHARSET_BITMAP_SIZE (p)] \ | 565 | ((p)[2 + CHARSET_BITMAP_SIZE (p)] \ |
| 767 | + (p)[3 + CHARSET_BITMAP_SIZE (p)] * 0x100) | 566 | + (p)[3 + CHARSET_BITMAP_SIZE (p)] * 0x100) |
| 768 | #endif | ||
| 769 | 567 | ||
| 770 | /* Return the address of end of RANGE_TABLE. COUNT is number of | 568 | /* Return the address of end of RANGE_TABLE. COUNT is number of |
| 771 | ranges (which is a pair of (start, end)) in the RANGE_TABLE. `* 2' | 569 | ranges (which is a pair of (start, end)) in the RANGE_TABLE. `* 2' |
| @@ -774,29 +572,23 @@ extract_number_and_incr (re_char **source) | |||
| 774 | #define CHARSET_RANGE_TABLE_END(range_table, count) \ | 572 | #define CHARSET_RANGE_TABLE_END(range_table, count) \ |
| 775 | ((range_table) + (count) * 2 * 3) | 573 | ((range_table) + (count) * 2 * 3) |
| 776 | 574 | ||
| 777 | /* If DEBUG is defined, Regex prints many voluminous messages about what | 575 | /* If REGEX_EMACS_DEBUG is defined, print many voluminous messages |
| 778 | it is doing (if the variable `debug' is nonzero). If linked with the | 576 | (if the variable regex_emacs_debug is positive). */ |
| 779 | main program in `iregex.c', you can enter patterns and strings | ||
| 780 | interactively. And if linked with the main program in `main.c' and | ||
| 781 | the other test files, you can run the already-written tests. */ | ||
| 782 | 577 | ||
| 783 | #ifdef DEBUG | 578 | #ifdef REGEX_EMACS_DEBUG |
| 784 | 579 | ||
| 785 | /* We use standard I/O for debugging. */ | 580 | /* We use standard I/O for debugging. */ |
| 786 | # include <stdio.h> | 581 | # include <stdio.h> |
| 787 | 582 | ||
| 788 | /* It is useful to test things that ``must'' be true when debugging. */ | 583 | static int regex_emacs_debug = -100000; |
| 789 | # include <assert.h> | ||
| 790 | |||
| 791 | static int debug = -100000; | ||
| 792 | 584 | ||
| 793 | # define DEBUG_STATEMENT(e) e | 585 | # define DEBUG_STATEMENT(e) e |
| 794 | # define DEBUG_PRINT(...) if (debug > 0) printf (__VA_ARGS__) | 586 | # define DEBUG_PRINT(...) if (regex_emacs_debug > 0) printf (__VA_ARGS__) |
| 795 | # define DEBUG_COMPILES_ARGUMENTS | 587 | # define DEBUG_COMPILES_ARGUMENTS |
| 796 | # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \ | 588 | # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \ |
| 797 | if (debug > 0) print_partial_compiled_pattern (s, e) | 589 | if (regex_emacs_debug > 0) print_partial_compiled_pattern (s, e) |
| 798 | # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \ | 590 | # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \ |
| 799 | if (debug > 0) print_double_string (w, s1, sz1, s2, sz2) | 591 | if (regex_emacs_debug > 0) print_double_string (w, s1, sz1, s2, sz2) |
| 800 | 592 | ||
| 801 | 593 | ||
| 802 | /* Print the fastmap in human-readable form. */ | 594 | /* Print the fastmap in human-readable form. */ |
| @@ -1085,7 +877,7 @@ print_compiled_pattern (struct re_pattern_buffer *bufp) | |||
| 1085 | re_char *buffer = bufp->buffer; | 877 | re_char *buffer = bufp->buffer; |
| 1086 | 878 | ||
| 1087 | print_partial_compiled_pattern (buffer, buffer + bufp->used); | 879 | print_partial_compiled_pattern (buffer, buffer + bufp->used); |
| 1088 | printf ("%ld bytes used/%ld bytes allocated.\n", | 880 | printf ("%zu bytes used/%zu bytes allocated.\n", |
| 1089 | bufp->used, bufp->allocated); | 881 | bufp->used, bufp->allocated); |
| 1090 | 882 | ||
| 1091 | if (bufp->fastmap_accurate && bufp->fastmap) | 883 | if (bufp->fastmap_accurate && bufp->fastmap) |
| @@ -1131,146 +923,100 @@ print_double_string (re_char *where, re_char *string1, ssize_t size1, | |||
| 1131 | } | 923 | } |
| 1132 | } | 924 | } |
| 1133 | 925 | ||
| 1134 | #else /* not DEBUG */ | 926 | #else /* not REGEX_EMACS_DEBUG */ |
| 1135 | |||
| 1136 | # undef assert | ||
| 1137 | # define assert(e) | ||
| 1138 | 927 | ||
| 1139 | # define DEBUG_STATEMENT(e) | 928 | # define DEBUG_STATEMENT(e) |
| 1140 | # define DEBUG_PRINT(...) | 929 | # define DEBUG_PRINT(...) |
| 1141 | # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) | 930 | # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) |
| 1142 | # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) | 931 | # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) |
| 1143 | 932 | ||
| 1144 | #endif /* not DEBUG */ | 933 | #endif /* not REGEX_EMACS_DEBUG */ |
| 1145 | 934 | ||
| 1146 | #ifndef emacs | 935 | typedef enum |
| 1147 | |||
| 1148 | /* Set by `re_set_syntax' to the current regexp syntax to recognize. Can | ||
| 1149 | also be assigned to arbitrarily: each pattern buffer stores its own | ||
| 1150 | syntax, so it can be changed between regex compilations. */ | ||
| 1151 | /* This has no initializer because initialized variables in Emacs | ||
| 1152 | become read-only after dumping. */ | ||
| 1153 | reg_syntax_t re_syntax_options; | ||
| 1154 | |||
| 1155 | |||
| 1156 | /* Specify the precise syntax of regexps for compilation. This provides | ||
| 1157 | for compatibility for various utilities which historically have | ||
| 1158 | different, incompatible syntaxes. | ||
| 1159 | |||
| 1160 | The argument SYNTAX is a bit mask comprised of the various bits | ||
| 1161 | defined in regex-emacs.h. We return the old syntax. */ | ||
| 1162 | |||
| 1163 | reg_syntax_t | ||
| 1164 | re_set_syntax (reg_syntax_t syntax) | ||
| 1165 | { | 936 | { |
| 1166 | reg_syntax_t ret = re_syntax_options; | 937 | REG_NOERROR = 0, /* Success. */ |
| 1167 | 938 | REG_NOMATCH, /* Didn't find a match (for regexec). */ | |
| 1168 | re_syntax_options = syntax; | 939 | |
| 1169 | return ret; | 940 | /* POSIX regcomp return error codes. (In the order listed in the |
| 1170 | } | 941 | standard.) An older version of this code supported the POSIX |
| 1171 | WEAK_ALIAS (__re_set_syntax, re_set_syntax) | 942 | API; this version continues to use these names internally. */ |
| 1172 | 943 | REG_BADPAT, /* Invalid pattern. */ | |
| 1173 | #endif | 944 | REG_ECOLLATE, /* Not implemented. */ |
| 1174 | 945 | REG_ECTYPE, /* Invalid character class name. */ | |
| 1175 | /* This table gives an error message for each of the error codes listed | 946 | REG_EESCAPE, /* Trailing backslash. */ |
| 1176 | in regex-emacs.h. Obviously the order here has to be same as there. | 947 | REG_ESUBREG, /* Invalid back reference. */ |
| 1177 | POSIX doesn't require that we do anything for REG_NOERROR, | 948 | REG_EBRACK, /* Unmatched left bracket. */ |
| 1178 | but why not be nice? */ | 949 | REG_EPAREN, /* Parenthesis imbalance. */ |
| 950 | REG_EBRACE, /* Unmatched \{. */ | ||
| 951 | REG_BADBR, /* Invalid contents of \{\}. */ | ||
| 952 | REG_ERANGE, /* Invalid range end. */ | ||
| 953 | REG_ESPACE, /* Ran out of memory. */ | ||
| 954 | REG_BADRPT, /* No preceding re for repetition op. */ | ||
| 955 | |||
| 956 | /* Error codes we've added. */ | ||
| 957 | REG_EEND, /* Premature end. */ | ||
| 958 | REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */ | ||
| 959 | REG_ERPAREN, /* Unmatched ) or \); not returned from regcomp. */ | ||
| 960 | REG_ERANGEX, /* Range striding over charsets. */ | ||
| 961 | REG_ESIZEBR /* n or m too big in \{n,m\} */ | ||
| 962 | } reg_errcode_t; | ||
| 1179 | 963 | ||
| 1180 | static const char *re_error_msgid[] = | 964 | static const char *re_error_msgid[] = |
| 1181 | { | 965 | { |
| 1182 | gettext_noop ("Success"), /* REG_NOERROR */ | 966 | [REG_NOERROR] = "Success", |
| 1183 | gettext_noop ("No match"), /* REG_NOMATCH */ | 967 | [REG_NOMATCH] = "No match", |
| 1184 | gettext_noop ("Invalid regular expression"), /* REG_BADPAT */ | 968 | [REG_BADPAT] = "Invalid regular expression", |
| 1185 | gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */ | 969 | [REG_ECOLLATE] = "Invalid collation character", |
| 1186 | gettext_noop ("Invalid character class name"), /* REG_ECTYPE */ | 970 | [REG_ECTYPE] = "Invalid character class name", |
| 1187 | gettext_noop ("Trailing backslash"), /* REG_EESCAPE */ | 971 | [REG_EESCAPE] = "Trailing backslash", |
| 1188 | gettext_noop ("Invalid back reference"), /* REG_ESUBREG */ | 972 | [REG_ESUBREG] = "Invalid back reference", |
| 1189 | gettext_noop ("Unmatched [ or [^"), /* REG_EBRACK */ | 973 | [REG_EBRACK] = "Unmatched [ or [^", |
| 1190 | gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */ | 974 | [REG_EPAREN] = "Unmatched ( or \\(", |
| 1191 | gettext_noop ("Unmatched \\{"), /* REG_EBRACE */ | 975 | [REG_EBRACE] = "Unmatched \\{", |
| 1192 | gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */ | 976 | [REG_BADBR] = "Invalid content of \\{\\}", |
| 1193 | gettext_noop ("Invalid range end"), /* REG_ERANGE */ | 977 | [REG_ERANGE] = "Invalid range end", |
| 1194 | gettext_noop ("Memory exhausted"), /* REG_ESPACE */ | 978 | [REG_ESPACE] = "Memory exhausted", |
| 1195 | gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */ | 979 | [REG_BADRPT] = "Invalid preceding regular expression", |
| 1196 | gettext_noop ("Premature end of regular expression"), /* REG_EEND */ | 980 | [REG_EEND] = "Premature end of regular expression", |
| 1197 | gettext_noop ("Regular expression too big"), /* REG_ESIZE */ | 981 | [REG_ESIZE] = "Regular expression too big", |
| 1198 | gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */ | 982 | [REG_ERPAREN] = "Unmatched ) or \\)", |
| 1199 | gettext_noop ("Range striding over charsets"), /* REG_ERANGEX */ | 983 | [REG_ERANGEX ] = "Range striding over charsets", |
| 1200 | gettext_noop ("Invalid content of \\{\\}, repetitions too big") /* REG_ESIZEBR */ | 984 | [REG_ESIZEBR ] = "Invalid content of \\{\\}", |
| 1201 | }; | 985 | }; |
| 1202 | |||
| 1203 | /* Whether to allocate memory during matching. */ | ||
| 1204 | |||
| 1205 | /* Define MATCH_MAY_ALLOCATE to allow the searching and matching | ||
| 1206 | functions allocate memory for the failure stack and registers. | ||
| 1207 | Normally should be defined, because otherwise searching and | ||
| 1208 | matching routines will have much smaller memory resources at their | ||
| 1209 | disposal, and therefore might fail to handle complex regexps. | ||
| 1210 | Therefore undefine MATCH_MAY_ALLOCATE only in the following | ||
| 1211 | exceptional situations: | ||
| 1212 | |||
| 1213 | . When running on a system where memory is at premium. | ||
| 1214 | . When alloca cannot be used at all, perhaps due to bugs in | ||
| 1215 | its implementation, or its being unavailable, or due to a | ||
| 1216 | very small stack size. This requires to define REGEX_MALLOC | ||
| 1217 | to use malloc instead, which in turn could lead to memory | ||
| 1218 | leaks if search is interrupted by a signal. (For these | ||
| 1219 | reasons, defining REGEX_MALLOC when building Emacs | ||
| 1220 | automatically undefines MATCH_MAY_ALLOCATE, but outside | ||
| 1221 | Emacs you may not care about memory leaks.) If you want to | ||
| 1222 | prevent the memory leaks, undefine MATCH_MAY_ALLOCATE. | ||
| 1223 | . When code that calls the searching and matching functions | ||
| 1224 | cannot allow memory allocation, for whatever reasons. */ | ||
| 1225 | |||
| 1226 | /* Normally, this is fine. */ | ||
| 1227 | #define MATCH_MAY_ALLOCATE | ||
| 1228 | |||
| 1229 | /* The match routines may not allocate if (1) they would do it with malloc | ||
| 1230 | and (2) it's not safe for them to use malloc. | ||
| 1231 | Note that if REL_ALLOC is defined, matching would not use malloc for the | ||
| 1232 | failure stack, but we would still use it for the register vectors; | ||
| 1233 | so REL_ALLOC should not affect this. */ | ||
| 1234 | #if defined REGEX_MALLOC && defined emacs | ||
| 1235 | # undef MATCH_MAY_ALLOCATE | ||
| 1236 | #endif | ||
| 1237 | 986 | ||
| 1238 | /* While regex matching of a single compiled pattern isn't reentrant | 987 | /* For 'regs_allocated'. */ |
| 1239 | (because we compile regexes to bytecode programs, and the bytecode | 988 | enum { REGS_UNALLOCATED, REGS_REALLOCATE, REGS_FIXED }; |
| 1240 | programs are self-modifying), the regex machinery must nevertheless | ||
| 1241 | be reentrant with respect to _different_ patterns, and we do that | ||
| 1242 | by avoiding global variables and using MATCH_MAY_ALLOCATE. */ | ||
| 1243 | #if !defined MATCH_MAY_ALLOCATE && defined emacs | ||
| 1244 | # error "Emacs requires MATCH_MAY_ALLOCATE" | ||
| 1245 | #endif | ||
| 1246 | 989 | ||
| 990 | /* If 'regs_allocated' is REGS_UNALLOCATED in the pattern buffer, | ||
| 991 | 're_match_2' returns information about at least this many registers | ||
| 992 | the first time a `regs' structure is passed. */ | ||
| 993 | enum { RE_NREGS = 30 }; | ||
| 1247 | 994 | ||
| 995 | /* The searching and matching functions allocate memory for the | ||
| 996 | failure stack and registers. Otherwise searching and matching | ||
| 997 | routines would have much smaller memory resources at their | ||
| 998 | disposal, and therefore might fail to handle complex regexps. */ | ||
| 999 | |||
| 1248 | /* Failure stack declarations and macros; both re_compile_fastmap and | 1000 | /* Failure stack declarations and macros; both re_compile_fastmap and |
| 1249 | re_match_2 use a failure stack. These have to be macros because of | 1001 | re_match_2 use a failure stack. These have to be macros because of |
| 1250 | REGEX_ALLOCATE_STACK. */ | 1002 | SAFE_ALLOCA. */ |
| 1251 | 1003 | ||
| 1252 | 1004 | ||
| 1253 | /* Approximate number of failure points for which to initially allocate space | 1005 | /* Approximate number of failure points for which to initially allocate space |
| 1254 | when matching. If this number is exceeded, we allocate more | 1006 | when matching. If this number is exceeded, we allocate more |
| 1255 | space, so it is not a hard limit. */ | 1007 | space, so it is not a hard limit. */ |
| 1256 | #ifndef INIT_FAILURE_ALLOC | 1008 | #define INIT_FAILURE_ALLOC 20 |
| 1257 | # define INIT_FAILURE_ALLOC 20 | ||
| 1258 | #endif | ||
| 1259 | 1009 | ||
| 1260 | /* Roughly the maximum number of failure points on the stack. Would be | 1010 | /* Roughly the maximum number of failure points on the stack. Would be |
| 1261 | exactly that if always used TYPICAL_FAILURE_SIZE items each time we failed. | 1011 | exactly that if always used TYPICAL_FAILURE_SIZE items each time we failed. |
| 1262 | This is a variable only so users of regex can assign to it; we never | 1012 | This is a variable only so users of regex can assign to it; we never |
| 1263 | change it ourselves. We always multiply it by TYPICAL_FAILURE_SIZE | 1013 | change it ourselves. We always multiply it by TYPICAL_FAILURE_SIZE |
| 1264 | before using it, so it should probably be a byte-count instead. */ | 1014 | before using it, so it should probably be a byte-count instead. */ |
| 1265 | # if defined MATCH_MAY_ALLOCATE | ||
| 1266 | /* Note that 4400 was enough to cause a crash on Alpha OSF/1, | 1015 | /* Note that 4400 was enough to cause a crash on Alpha OSF/1, |
| 1267 | whose default stack limit is 2mb. In order for a larger | 1016 | whose default stack limit is 2mb. In order for a larger |
| 1268 | value to work reliably, you have to try to make it accord | 1017 | value to work reliably, you have to try to make it accord |
| 1269 | with the process stack limit. */ | 1018 | with the process stack limit. */ |
| 1270 | size_t emacs_re_max_failures = 40000; | 1019 | size_t emacs_re_max_failures = 40000; |
| 1271 | # else | ||
| 1272 | size_t emacs_re_max_failures = 4000; | ||
| 1273 | # endif | ||
| 1274 | 1020 | ||
| 1275 | union fail_stack_elt | 1021 | union fail_stack_elt |
| 1276 | { | 1022 | { |
| @@ -1292,33 +1038,17 @@ typedef struct | |||
| 1292 | #define FAIL_STACK_EMPTY() (fail_stack.frame == 0) | 1038 | #define FAIL_STACK_EMPTY() (fail_stack.frame == 0) |
| 1293 | 1039 | ||
| 1294 | 1040 | ||
| 1295 | /* Define macros to initialize and free the failure stack. | 1041 | /* Define macros to initialize and free the failure stack. */ |
| 1296 | Do `return -2' if the alloc fails. */ | ||
| 1297 | 1042 | ||
| 1298 | #ifdef MATCH_MAY_ALLOCATE | 1043 | #define INIT_FAIL_STACK() \ |
| 1299 | # define INIT_FAIL_STACK() \ | ||
| 1300 | do { \ | 1044 | do { \ |
| 1301 | fail_stack.stack = \ | 1045 | fail_stack.stack = \ |
| 1302 | REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * TYPICAL_FAILURE_SIZE \ | 1046 | SAFE_ALLOCA (INIT_FAILURE_ALLOC * TYPICAL_FAILURE_SIZE \ |
| 1303 | * sizeof (fail_stack_elt_t)); \ | 1047 | * sizeof (fail_stack_elt_t)); \ |
| 1304 | \ | ||
| 1305 | if (fail_stack.stack == NULL) \ | ||
| 1306 | return -2; \ | ||
| 1307 | \ | ||
| 1308 | fail_stack.size = INIT_FAILURE_ALLOC; \ | 1048 | fail_stack.size = INIT_FAILURE_ALLOC; \ |
| 1309 | fail_stack.avail = 0; \ | 1049 | fail_stack.avail = 0; \ |
| 1310 | fail_stack.frame = 0; \ | 1050 | fail_stack.frame = 0; \ |
| 1311 | } while (0) | 1051 | } while (0) |
| 1312 | #else | ||
| 1313 | # define INIT_FAIL_STACK() \ | ||
| 1314 | do { \ | ||
| 1315 | fail_stack.avail = 0; \ | ||
| 1316 | fail_stack.frame = 0; \ | ||
| 1317 | } while (0) | ||
| 1318 | |||
| 1319 | # define RETALLOC_IF(addr, n, t) \ | ||
| 1320 | if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t) | ||
| 1321 | #endif | ||
| 1322 | 1052 | ||
| 1323 | 1053 | ||
| 1324 | /* Double the size of FAIL_STACK, up to a limit | 1054 | /* Double the size of FAIL_STACK, up to a limit |
| @@ -1327,7 +1057,7 @@ typedef struct | |||
| 1327 | Return 1 if succeeds, and 0 if either ran out of memory | 1057 | Return 1 if succeeds, and 0 if either ran out of memory |
| 1328 | allocating space for it or it was already too large. | 1058 | allocating space for it or it was already too large. |
| 1329 | 1059 | ||
| 1330 | REGEX_REALLOCATE_STACK requires `destination' be declared. */ | 1060 | REGEX_REALLOCATE requires `destination' be declared. */ |
| 1331 | 1061 | ||
| 1332 | /* Factor to increase the failure stack size by | 1062 | /* Factor to increase the failure stack size by |
| 1333 | when we increase it. | 1063 | when we increase it. |
| @@ -1340,18 +1070,15 @@ typedef struct | |||
| 1340 | (((fail_stack).size >= emacs_re_max_failures * TYPICAL_FAILURE_SIZE) \ | 1070 | (((fail_stack).size >= emacs_re_max_failures * TYPICAL_FAILURE_SIZE) \ |
| 1341 | ? 0 \ | 1071 | ? 0 \ |
| 1342 | : ((fail_stack).stack \ | 1072 | : ((fail_stack).stack \ |
| 1343 | = REGEX_REALLOCATE_STACK ((fail_stack).stack, \ | 1073 | = REGEX_REALLOCATE ((fail_stack).stack, \ |
| 1344 | (fail_stack).size * sizeof (fail_stack_elt_t), \ | 1074 | (fail_stack).size * sizeof (fail_stack_elt_t), \ |
| 1345 | min (emacs_re_max_failures * TYPICAL_FAILURE_SIZE, \ | 1075 | min (emacs_re_max_failures * TYPICAL_FAILURE_SIZE, \ |
| 1346 | ((fail_stack).size * FAIL_STACK_GROWTH_FACTOR)) \ | 1076 | ((fail_stack).size * FAIL_STACK_GROWTH_FACTOR)) \ |
| 1347 | * sizeof (fail_stack_elt_t)), \ | 1077 | * sizeof (fail_stack_elt_t)), \ |
| 1348 | \ | 1078 | ((fail_stack).size \ |
| 1349 | (fail_stack).stack == NULL \ | 1079 | = (min (emacs_re_max_failures * TYPICAL_FAILURE_SIZE, \ |
| 1350 | ? 0 \ | 1080 | ((fail_stack).size * FAIL_STACK_GROWTH_FACTOR)))), \ |
| 1351 | : ((fail_stack).size \ | 1081 | 1)) |
| 1352 | = (min (emacs_re_max_failures * TYPICAL_FAILURE_SIZE, \ | ||
| 1353 | ((fail_stack).size * FAIL_STACK_GROWTH_FACTOR))), \ | ||
| 1354 | 1))) | ||
| 1355 | 1082 | ||
| 1356 | 1083 | ||
| 1357 | /* Push a pointer value onto the failure stack. | 1084 | /* Push a pointer value onto the failure stack. |
| @@ -1385,8 +1112,8 @@ typedef struct | |||
| 1385 | while (REMAINING_AVAIL_SLOTS <= space) { \ | 1112 | while (REMAINING_AVAIL_SLOTS <= space) { \ |
| 1386 | if (!GROW_FAIL_STACK (fail_stack)) \ | 1113 | if (!GROW_FAIL_STACK (fail_stack)) \ |
| 1387 | return -2; \ | 1114 | return -2; \ |
| 1388 | DEBUG_PRINT ("\n Doubled stack; size now: %zd\n", (fail_stack).size);\ | 1115 | DEBUG_PRINT ("\n Doubled stack; size now: %zu\n", (fail_stack).size);\ |
| 1389 | DEBUG_PRINT (" slots available: %zd\n", REMAINING_AVAIL_SLOTS);\ | 1116 | DEBUG_PRINT (" slots available: %zu\n", REMAINING_AVAIL_SLOTS);\ |
| 1390 | } | 1117 | } |
| 1391 | 1118 | ||
| 1392 | /* Push register NUM onto the stack. */ | 1119 | /* Push register NUM onto the stack. */ |
| @@ -1424,7 +1151,7 @@ do { \ | |||
| 1424 | if (pfreg == -1) \ | 1151 | if (pfreg == -1) \ |
| 1425 | { \ | 1152 | { \ |
| 1426 | /* It's a counter. */ \ | 1153 | /* It's a counter. */ \ |
| 1427 | /* Here, we discard `const', making re_match non-reentrant. */ \ | 1154 | /* Discard 'const', making re_search non-reentrant. */ \ |
| 1428 | unsigned char *ptr = (unsigned char *) POP_FAILURE_POINTER (); \ | 1155 | unsigned char *ptr = (unsigned char *) POP_FAILURE_POINTER (); \ |
| 1429 | pfreg = POP_FAILURE_INT (); \ | 1156 | pfreg = POP_FAILURE_INT (); \ |
| 1430 | STORE_NUMBER (ptr, pfreg); \ | 1157 | STORE_NUMBER (ptr, pfreg); \ |
| @@ -1442,14 +1169,14 @@ do { \ | |||
| 1442 | /* Check that we are not stuck in an infinite loop. */ | 1169 | /* Check that we are not stuck in an infinite loop. */ |
| 1443 | #define CHECK_INFINITE_LOOP(pat_cur, string_place) \ | 1170 | #define CHECK_INFINITE_LOOP(pat_cur, string_place) \ |
| 1444 | do { \ | 1171 | do { \ |
| 1445 | ssize_t failure = TOP_FAILURE_HANDLE (); \ | 1172 | ptrdiff_t failure = TOP_FAILURE_HANDLE (); \ |
| 1446 | /* Check for infinite matching loops */ \ | 1173 | /* Check for infinite matching loops */ \ |
| 1447 | while (failure > 0 \ | 1174 | while (failure > 0 \ |
| 1448 | && (FAILURE_STR (failure) == string_place \ | 1175 | && (FAILURE_STR (failure) == string_place \ |
| 1449 | || FAILURE_STR (failure) == NULL)) \ | 1176 | || FAILURE_STR (failure) == NULL)) \ |
| 1450 | { \ | 1177 | { \ |
| 1451 | assert (FAILURE_PAT (failure) >= bufp->buffer \ | 1178 | eassert (FAILURE_PAT (failure) >= bufp->buffer \ |
| 1452 | && FAILURE_PAT (failure) <= bufp->buffer + bufp->used); \ | 1179 | && FAILURE_PAT (failure) <= bufp->buffer + bufp->used); \ |
| 1453 | if (FAILURE_PAT (failure) == pat_cur) \ | 1180 | if (FAILURE_PAT (failure) == pat_cur) \ |
| 1454 | { \ | 1181 | { \ |
| 1455 | cycle = 1; \ | 1182 | cycle = 1; \ |
| @@ -1478,14 +1205,14 @@ do { \ | |||
| 1478 | \ | 1205 | \ |
| 1479 | DEBUG_STATEMENT (nfailure_points_pushed++); \ | 1206 | DEBUG_STATEMENT (nfailure_points_pushed++); \ |
| 1480 | DEBUG_PRINT ("\nPUSH_FAILURE_POINT:\n"); \ | 1207 | DEBUG_PRINT ("\nPUSH_FAILURE_POINT:\n"); \ |
| 1481 | DEBUG_PRINT (" Before push, next avail: %zd\n", (fail_stack).avail); \ | 1208 | DEBUG_PRINT (" Before push, next avail: %zu\n", (fail_stack).avail); \ |
| 1482 | DEBUG_PRINT (" size: %zd\n", (fail_stack).size);\ | 1209 | DEBUG_PRINT (" size: %zu\n", (fail_stack).size);\ |
| 1483 | \ | 1210 | \ |
| 1484 | ENSURE_FAIL_STACK (NUM_NONREG_ITEMS); \ | 1211 | ENSURE_FAIL_STACK (NUM_NONREG_ITEMS); \ |
| 1485 | \ | 1212 | \ |
| 1486 | DEBUG_PRINT ("\n"); \ | 1213 | DEBUG_PRINT ("\n"); \ |
| 1487 | \ | 1214 | \ |
| 1488 | DEBUG_PRINT (" Push frame index: %zd\n", fail_stack.frame); \ | 1215 | DEBUG_PRINT (" Push frame index: %zu\n", fail_stack.frame); \ |
| 1489 | PUSH_FAILURE_INT (fail_stack.frame); \ | 1216 | PUSH_FAILURE_INT (fail_stack.frame); \ |
| 1490 | \ | 1217 | \ |
| 1491 | DEBUG_PRINT (" Push string %p: \"", string_place); \ | 1218 | DEBUG_PRINT (" Push string %p: \"", string_place); \ |
| @@ -1523,12 +1250,12 @@ do { \ | |||
| 1523 | 1250 | ||
| 1524 | #define POP_FAILURE_POINT(str, pat) \ | 1251 | #define POP_FAILURE_POINT(str, pat) \ |
| 1525 | do { \ | 1252 | do { \ |
| 1526 | assert (!FAIL_STACK_EMPTY ()); \ | 1253 | eassert (!FAIL_STACK_EMPTY ()); \ |
| 1527 | \ | 1254 | \ |
| 1528 | /* Remove failure points and point to how many regs pushed. */ \ | 1255 | /* Remove failure points and point to how many regs pushed. */ \ |
| 1529 | DEBUG_PRINT ("POP_FAILURE_POINT:\n"); \ | 1256 | DEBUG_PRINT ("POP_FAILURE_POINT:\n"); \ |
| 1530 | DEBUG_PRINT (" Before pop, next avail: %zd\n", fail_stack.avail); \ | 1257 | DEBUG_PRINT (" Before pop, next avail: %zu\n", fail_stack.avail); \ |
| 1531 | DEBUG_PRINT (" size: %zd\n", fail_stack.size); \ | 1258 | DEBUG_PRINT (" size: %zu\n", fail_stack.size); \ |
| 1532 | \ | 1259 | \ |
| 1533 | /* Pop the saved registers. */ \ | 1260 | /* Pop the saved registers. */ \ |
| 1534 | while (fail_stack.frame < fail_stack.avail) \ | 1261 | while (fail_stack.frame < fail_stack.avail) \ |
| @@ -1547,10 +1274,10 @@ do { \ | |||
| 1547 | DEBUG_PRINT ("\"\n"); \ | 1274 | DEBUG_PRINT ("\"\n"); \ |
| 1548 | \ | 1275 | \ |
| 1549 | fail_stack.frame = POP_FAILURE_INT (); \ | 1276 | fail_stack.frame = POP_FAILURE_INT (); \ |
| 1550 | DEBUG_PRINT (" Popping frame index: %zd\n", fail_stack.frame); \ | 1277 | DEBUG_PRINT (" Popping frame index: %zu\n", fail_stack.frame); \ |
| 1551 | \ | 1278 | \ |
| 1552 | assert (fail_stack.avail >= 0); \ | 1279 | eassert (fail_stack.avail >= 0); \ |
| 1553 | assert (fail_stack.frame <= fail_stack.avail); \ | 1280 | eassert (fail_stack.frame <= fail_stack.avail); \ |
| 1554 | \ | 1281 | \ |
| 1555 | DEBUG_STATEMENT (nfailure_points_popped++); \ | 1282 | DEBUG_STATEMENT (nfailure_points_popped++); \ |
| 1556 | } while (0) /* POP_FAILURE_POINT */ | 1283 | } while (0) /* POP_FAILURE_POINT */ |
| @@ -1563,12 +1290,8 @@ do { \ | |||
| 1563 | /* Subroutine declarations and macros for regex_compile. */ | 1290 | /* Subroutine declarations and macros for regex_compile. */ |
| 1564 | 1291 | ||
| 1565 | static reg_errcode_t regex_compile (re_char *pattern, size_t size, | 1292 | static reg_errcode_t regex_compile (re_char *pattern, size_t size, |
| 1566 | #ifdef emacs | ||
| 1567 | bool posix_backtracking, | 1293 | bool posix_backtracking, |
| 1568 | const char *whitespace_regexp, | 1294 | const char *whitespace_regexp, |
| 1569 | #else | ||
| 1570 | reg_syntax_t syntax, | ||
| 1571 | #endif | ||
| 1572 | struct re_pattern_buffer *bufp); | 1295 | struct re_pattern_buffer *bufp); |
| 1573 | static void store_op1 (re_opcode_t op, unsigned char *loc, int arg); | 1296 | static void store_op1 (re_opcode_t op, unsigned char *loc, int arg); |
| 1574 | static void store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2); | 1297 | static void store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2); |
| @@ -1576,10 +1299,10 @@ static void insert_op1 (re_opcode_t op, unsigned char *loc, | |||
| 1576 | int arg, unsigned char *end); | 1299 | int arg, unsigned char *end); |
| 1577 | static void insert_op2 (re_opcode_t op, unsigned char *loc, | 1300 | static void insert_op2 (re_opcode_t op, unsigned char *loc, |
| 1578 | int arg1, int arg2, unsigned char *end); | 1301 | int arg1, int arg2, unsigned char *end); |
| 1579 | static boolean at_begline_loc_p (re_char *pattern, re_char *p, | 1302 | static bool at_begline_loc_p (re_char *pattern, re_char *p, |
| 1580 | reg_syntax_t syntax); | 1303 | reg_syntax_t syntax); |
| 1581 | static boolean at_endline_loc_p (re_char *p, re_char *pend, | 1304 | static bool at_endline_loc_p (re_char *p, re_char *pend, |
| 1582 | reg_syntax_t syntax); | 1305 | reg_syntax_t syntax); |
| 1583 | static re_char *skip_one_char (re_char *p); | 1306 | static re_char *skip_one_char (re_char *p); |
| 1584 | static int analyze_first (re_char *p, re_char *pend, | 1307 | static int analyze_first (re_char *p, re_char *pend, |
| 1585 | char *fastmap, const int multibyte); | 1308 | char *fastmap, const int multibyte); |
| @@ -1595,14 +1318,15 @@ static int analyze_first (re_char *p, re_char *pend, | |||
| 1595 | } while (0) | 1318 | } while (0) |
| 1596 | 1319 | ||
| 1597 | 1320 | ||
| 1598 | /* If `translate' is non-null, return translate[D], else just D. We | 1321 | #define RE_TRANSLATE(TBL, C) char_table_translate (TBL, C) |
| 1322 | #define RE_TRANSLATE_P(TBL) (!EQ (TBL, make_number (0))) | ||
| 1323 | |||
| 1324 | /* If `translate' is non-zero, return translate[D], else just D. We | ||
| 1599 | cast the subscript to translate because some data is declared as | 1325 | cast the subscript to translate because some data is declared as |
| 1600 | `char *', to avoid warnings when a string constant is passed. But | 1326 | `char *', to avoid warnings when a string constant is passed. But |
| 1601 | when we use a character as a subscript we must make it unsigned. */ | 1327 | when we use a character as a subscript we must make it unsigned. */ |
| 1602 | #ifndef TRANSLATE | 1328 | #define TRANSLATE(d) \ |
| 1603 | # define TRANSLATE(d) \ | ||
| 1604 | (RE_TRANSLATE_P (translate) ? RE_TRANSLATE (translate, (d)) : (d)) | 1329 | (RE_TRANSLATE_P (translate) ? RE_TRANSLATE (translate, (d)) : (d)) |
| 1605 | #endif | ||
| 1606 | 1330 | ||
| 1607 | 1331 | ||
| 1608 | /* Macros for outputting the compiled pattern into `buffer'. */ | 1332 | /* Macros for outputting the compiled pattern into `buffer'. */ |
| @@ -1677,8 +1401,6 @@ static int analyze_first (re_char *p, re_char *pend, | |||
| 1677 | if (laststart_set) laststart_off = laststart - old_buffer; \ | 1401 | if (laststart_set) laststart_off = laststart - old_buffer; \ |
| 1678 | if (pending_exact_set) pending_exact_off = pending_exact - old_buffer; \ | 1402 | if (pending_exact_set) pending_exact_off = pending_exact - old_buffer; \ |
| 1679 | RETALLOC (bufp->buffer, bufp->allocated, unsigned char); \ | 1403 | RETALLOC (bufp->buffer, bufp->allocated, unsigned char); \ |
| 1680 | if (bufp->buffer == NULL) \ | ||
| 1681 | return REG_ESPACE; \ | ||
| 1682 | unsigned char *new_buffer = bufp->buffer; \ | 1404 | unsigned char *new_buffer = bufp->buffer; \ |
| 1683 | b = new_buffer + b_off; \ | 1405 | b = new_buffer + b_off; \ |
| 1684 | begalt = new_buffer + begalt_off; \ | 1406 | begalt = new_buffer + begalt_off; \ |
| @@ -1729,12 +1451,6 @@ typedef struct | |||
| 1729 | 1451 | ||
| 1730 | /* The next available element. */ | 1452 | /* The next available element. */ |
| 1731 | #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) | 1453 | #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) |
| 1732 | |||
| 1733 | /* Explicit quit checking is needed for Emacs, which uses polling to | ||
| 1734 | process input events. */ | ||
| 1735 | #ifndef emacs | ||
| 1736 | static void maybe_quit (void) {} | ||
| 1737 | #endif | ||
| 1738 | 1454 | ||
| 1739 | /* Structure to manage work area for range table. */ | 1455 | /* Structure to manage work area for range table. */ |
| 1740 | struct range_table_work_area | 1456 | struct range_table_work_area |
| @@ -1745,8 +1461,6 @@ struct range_table_work_area | |||
| 1745 | int bits; /* flag to record character classes */ | 1461 | int bits; /* flag to record character classes */ |
| 1746 | }; | 1462 | }; |
| 1747 | 1463 | ||
| 1748 | #ifdef emacs | ||
| 1749 | |||
| 1750 | /* Make sure that WORK_AREA can hold more N multibyte characters. | 1464 | /* Make sure that WORK_AREA can hold more N multibyte characters. |
| 1751 | This is used only in set_image_of_range and set_image_of_range_1. | 1465 | This is used only in set_image_of_range and set_image_of_range_1. |
| 1752 | It expects WORK_AREA to be a pointer. | 1466 | It expects WORK_AREA to be a pointer. |
| @@ -1773,13 +1487,11 @@ struct range_table_work_area | |||
| 1773 | (work_area).table[(work_area).used++] = (range_end); \ | 1487 | (work_area).table[(work_area).used++] = (range_end); \ |
| 1774 | } while (0) | 1488 | } while (0) |
| 1775 | 1489 | ||
| 1776 | #endif /* emacs */ | ||
| 1777 | |||
| 1778 | /* Free allocated memory for WORK_AREA. */ | 1490 | /* Free allocated memory for WORK_AREA. */ |
| 1779 | #define FREE_RANGE_TABLE_WORK_AREA(work_area) \ | 1491 | #define FREE_RANGE_TABLE_WORK_AREA(work_area) \ |
| 1780 | do { \ | 1492 | do { \ |
| 1781 | if ((work_area).table) \ | 1493 | if ((work_area).table) \ |
| 1782 | free ((work_area).table); \ | 1494 | xfree ((work_area).table); \ |
| 1783 | } while (0) | 1495 | } while (0) |
| 1784 | 1496 | ||
| 1785 | #define CLEAR_RANGE_TABLE_WORK_USED(work_area) ((work_area).used = 0, (work_area).bits = 0) | 1497 | #define CLEAR_RANGE_TABLE_WORK_USED(work_area) ((work_area).used = 0, (work_area).bits = 0) |
| @@ -1807,8 +1519,6 @@ struct range_table_work_area | |||
| 1807 | #define SET_LIST_BIT(c) (b[((c)) / BYTEWIDTH] |= 1 << ((c) % BYTEWIDTH)) | 1519 | #define SET_LIST_BIT(c) (b[((c)) / BYTEWIDTH] |= 1 << ((c) % BYTEWIDTH)) |
| 1808 | 1520 | ||
| 1809 | 1521 | ||
| 1810 | #ifdef emacs | ||
| 1811 | |||
| 1812 | /* Store characters in the range FROM to TO in the bitmap at B (for | 1522 | /* Store characters in the range FROM to TO in the bitmap at B (for |
| 1813 | ASCII and unibyte characters) and WORK_AREA (for multibyte | 1523 | ASCII and unibyte characters) and WORK_AREA (for multibyte |
| 1814 | characters) while translating them and paying attention to the | 1524 | characters) while translating them and paying attention to the |
| @@ -1912,8 +1622,6 @@ struct range_table_work_area | |||
| 1912 | } \ | 1622 | } \ |
| 1913 | } while (0) | 1623 | } while (0) |
| 1914 | 1624 | ||
| 1915 | #endif /* emacs */ | ||
| 1916 | |||
| 1917 | /* Get the next unsigned number in the uncompiled pattern. */ | 1625 | /* Get the next unsigned number in the uncompiled pattern. */ |
| 1918 | #define GET_INTERVAL_COUNT(num) \ | 1626 | #define GET_INTERVAL_COUNT(num) \ |
| 1919 | do { \ | 1627 | do { \ |
| @@ -1936,8 +1644,6 @@ struct range_table_work_area | |||
| 1936 | } \ | 1644 | } \ |
| 1937 | } while (0) | 1645 | } while (0) |
| 1938 | 1646 | ||
| 1939 | #if ! WIDE_CHAR_SUPPORT | ||
| 1940 | |||
| 1941 | /* Parse a character class, i.e. string such as "[:name:]". *strp | 1647 | /* Parse a character class, i.e. string such as "[:name:]". *strp |
| 1942 | points to the string to be parsed and limit is length, in bytes, of | 1648 | points to the string to be parsed and limit is length, in bytes, of |
| 1943 | that string. | 1649 | that string. |
| @@ -2031,7 +1737,7 @@ re_wctype_parse (const unsigned char **strp, unsigned limit) | |||
| 2031 | } | 1737 | } |
| 2032 | 1738 | ||
| 2033 | /* True if CH is in the char class CC. */ | 1739 | /* True if CH is in the char class CC. */ |
| 2034 | boolean | 1740 | bool |
| 2035 | re_iswctype (int ch, re_wctype_t cc) | 1741 | re_iswctype (int ch, re_wctype_t cc) |
| 2036 | { | 1742 | { |
| 2037 | switch (cc) | 1743 | switch (cc) |
| @@ -2084,7 +1790,6 @@ re_wctype_to_bit (re_wctype_t cc) | |||
| 2084 | abort (); | 1790 | abort (); |
| 2085 | } | 1791 | } |
| 2086 | } | 1792 | } |
| 2087 | #endif | ||
| 2088 | 1793 | ||
| 2089 | /* Filling in the work area of a range. */ | 1794 | /* Filling in the work area of a range. */ |
| 2090 | 1795 | ||
| @@ -2094,288 +1799,16 @@ static void | |||
| 2094 | extend_range_table_work_area (struct range_table_work_area *work_area) | 1799 | extend_range_table_work_area (struct range_table_work_area *work_area) |
| 2095 | { | 1800 | { |
| 2096 | work_area->allocated += 16 * sizeof (int); | 1801 | work_area->allocated += 16 * sizeof (int); |
| 2097 | work_area->table = realloc (work_area->table, work_area->allocated); | 1802 | work_area->table = xrealloc (work_area->table, work_area->allocated); |
| 2098 | } | 1803 | } |
| 2099 | |||
| 2100 | #if 0 | ||
| 2101 | #ifdef emacs | ||
| 2102 | |||
| 2103 | /* Carefully find the ranges of codes that are equivalent | ||
| 2104 | under case conversion to the range start..end when passed through | ||
| 2105 | TRANSLATE. Handle the case where non-letters can come in between | ||
| 2106 | two upper-case letters (which happens in Latin-1). | ||
| 2107 | Also handle the case of groups of more than 2 case-equivalent chars. | ||
| 2108 | |||
| 2109 | The basic method is to look at consecutive characters and see | ||
| 2110 | if they can form a run that can be handled as one. | ||
| 2111 | |||
| 2112 | Returns -1 if successful, REG_ESPACE if ran out of space. */ | ||
| 2113 | |||
| 2114 | static int | ||
| 2115 | set_image_of_range_1 (struct range_table_work_area *work_area, | ||
| 2116 | re_wchar_t start, re_wchar_t end, | ||
| 2117 | RE_TRANSLATE_TYPE translate) | ||
| 2118 | { | ||
| 2119 | /* `one_case' indicates a character, or a run of characters, | ||
| 2120 | each of which is an isolate (no case-equivalents). | ||
| 2121 | This includes all ASCII non-letters. | ||
| 2122 | |||
| 2123 | `two_case' indicates a character, or a run of characters, | ||
| 2124 | each of which has two case-equivalent forms. | ||
| 2125 | This includes all ASCII letters. | ||
| 2126 | |||
| 2127 | `strange' indicates a character that has more than one | ||
| 2128 | case-equivalent. */ | ||
| 2129 | |||
| 2130 | enum case_type {one_case, two_case, strange}; | ||
| 2131 | |||
| 2132 | /* Describe the run that is in progress, | ||
| 2133 | which the next character can try to extend. | ||
| 2134 | If run_type is strange, that means there really is no run. | ||
| 2135 | If run_type is one_case, then run_start...run_end is the run. | ||
| 2136 | If run_type is two_case, then the run is run_start...run_end, | ||
| 2137 | and the case-equivalents end at run_eqv_end. */ | ||
| 2138 | |||
| 2139 | enum case_type run_type = strange; | ||
| 2140 | int run_start, run_end, run_eqv_end; | ||
| 2141 | |||
| 2142 | Lisp_Object eqv_table; | ||
| 2143 | |||
| 2144 | if (!RE_TRANSLATE_P (translate)) | ||
| 2145 | { | ||
| 2146 | EXTEND_RANGE_TABLE (work_area, 2); | ||
| 2147 | work_area->table[work_area->used++] = (start); | ||
| 2148 | work_area->table[work_area->used++] = (end); | ||
| 2149 | return -1; | ||
| 2150 | } | ||
| 2151 | |||
| 2152 | eqv_table = XCHAR_TABLE (translate)->extras[2]; | ||
| 2153 | |||
| 2154 | for (; start <= end; start++) | ||
| 2155 | { | ||
| 2156 | enum case_type this_type; | ||
| 2157 | int eqv = RE_TRANSLATE (eqv_table, start); | ||
| 2158 | int minchar, maxchar; | ||
| 2159 | |||
| 2160 | /* Classify this character */ | ||
| 2161 | if (eqv == start) | ||
| 2162 | this_type = one_case; | ||
| 2163 | else if (RE_TRANSLATE (eqv_table, eqv) == start) | ||
| 2164 | this_type = two_case; | ||
| 2165 | else | ||
| 2166 | this_type = strange; | ||
| 2167 | |||
| 2168 | if (start < eqv) | ||
| 2169 | minchar = start, maxchar = eqv; | ||
| 2170 | else | ||
| 2171 | minchar = eqv, maxchar = start; | ||
| 2172 | |||
| 2173 | /* Can this character extend the run in progress? */ | ||
| 2174 | if (this_type == strange || this_type != run_type | ||
| 2175 | || !(minchar == run_end + 1 | ||
| 2176 | && (run_type == two_case | ||
| 2177 | ? maxchar == run_eqv_end + 1 : 1))) | ||
| 2178 | { | ||
| 2179 | /* No, end the run. | ||
| 2180 | Record each of its equivalent ranges. */ | ||
| 2181 | if (run_type == one_case) | ||
| 2182 | { | ||
| 2183 | EXTEND_RANGE_TABLE (work_area, 2); | ||
| 2184 | work_area->table[work_area->used++] = run_start; | ||
| 2185 | work_area->table[work_area->used++] = run_end; | ||
| 2186 | } | ||
| 2187 | else if (run_type == two_case) | ||
| 2188 | { | ||
| 2189 | EXTEND_RANGE_TABLE (work_area, 4); | ||
| 2190 | work_area->table[work_area->used++] = run_start; | ||
| 2191 | work_area->table[work_area->used++] = run_end; | ||
| 2192 | work_area->table[work_area->used++] | ||
| 2193 | = RE_TRANSLATE (eqv_table, run_start); | ||
| 2194 | work_area->table[work_area->used++] | ||
| 2195 | = RE_TRANSLATE (eqv_table, run_end); | ||
| 2196 | } | ||
| 2197 | run_type = strange; | ||
| 2198 | } | ||
| 2199 | |||
| 2200 | if (this_type == strange) | ||
| 2201 | { | ||
| 2202 | /* For a strange character, add each of its equivalents, one | ||
| 2203 | by one. Don't start a range. */ | ||
| 2204 | do | ||
| 2205 | { | ||
| 2206 | EXTEND_RANGE_TABLE (work_area, 2); | ||
| 2207 | work_area->table[work_area->used++] = eqv; | ||
| 2208 | work_area->table[work_area->used++] = eqv; | ||
| 2209 | eqv = RE_TRANSLATE (eqv_table, eqv); | ||
| 2210 | } | ||
| 2211 | while (eqv != start); | ||
| 2212 | } | ||
| 2213 | |||
| 2214 | /* Add this char to the run, or start a new run. */ | ||
| 2215 | else if (run_type == strange) | ||
| 2216 | { | ||
| 2217 | /* Initialize a new range. */ | ||
| 2218 | run_type = this_type; | ||
| 2219 | run_start = start; | ||
| 2220 | run_end = start; | ||
| 2221 | run_eqv_end = RE_TRANSLATE (eqv_table, run_end); | ||
| 2222 | } | ||
| 2223 | else | ||
| 2224 | { | ||
| 2225 | /* Extend a running range. */ | ||
| 2226 | run_end = minchar; | ||
| 2227 | run_eqv_end = RE_TRANSLATE (eqv_table, run_end); | ||
| 2228 | } | ||
| 2229 | } | ||
| 2230 | |||
| 2231 | /* If a run is still in progress at the end, finish it now | ||
| 2232 | by recording its equivalent ranges. */ | ||
| 2233 | if (run_type == one_case) | ||
| 2234 | { | ||
| 2235 | EXTEND_RANGE_TABLE (work_area, 2); | ||
| 2236 | work_area->table[work_area->used++] = run_start; | ||
| 2237 | work_area->table[work_area->used++] = run_end; | ||
| 2238 | } | ||
| 2239 | else if (run_type == two_case) | ||
| 2240 | { | ||
| 2241 | EXTEND_RANGE_TABLE (work_area, 4); | ||
| 2242 | work_area->table[work_area->used++] = run_start; | ||
| 2243 | work_area->table[work_area->used++] = run_end; | ||
| 2244 | work_area->table[work_area->used++] | ||
| 2245 | = RE_TRANSLATE (eqv_table, run_start); | ||
| 2246 | work_area->table[work_area->used++] | ||
| 2247 | = RE_TRANSLATE (eqv_table, run_end); | ||
| 2248 | } | ||
| 2249 | |||
| 2250 | return -1; | ||
| 2251 | } | ||
| 2252 | |||
| 2253 | #endif /* emacs */ | ||
| 2254 | |||
| 2255 | /* Record the image of the range start..end when passed through | ||
| 2256 | TRANSLATE. This is not necessarily TRANSLATE(start)..TRANSLATE(end) | ||
| 2257 | and is not even necessarily contiguous. | ||
| 2258 | Normally we approximate it with the smallest contiguous range that contains | ||
| 2259 | all the chars we need. However, for Latin-1 we go to extra effort | ||
| 2260 | to do a better job. | ||
| 2261 | |||
| 2262 | This function is not called for ASCII ranges. | ||
| 2263 | |||
| 2264 | Returns -1 if successful, REG_ESPACE if ran out of space. */ | ||
| 2265 | |||
| 2266 | static int | ||
| 2267 | set_image_of_range (struct range_table_work_area *work_area, | ||
| 2268 | re_wchar_t start, re_wchar_t end, | ||
| 2269 | RE_TRANSLATE_TYPE translate) | ||
| 2270 | { | ||
| 2271 | re_wchar_t cmin, cmax; | ||
| 2272 | |||
| 2273 | #ifdef emacs | ||
| 2274 | /* For Latin-1 ranges, use set_image_of_range_1 | ||
| 2275 | to get proper handling of ranges that include letters and nonletters. | ||
| 2276 | For a range that includes the whole of Latin-1, this is not necessary. | ||
| 2277 | For other character sets, we don't bother to get this right. */ | ||
| 2278 | if (RE_TRANSLATE_P (translate) && start < 04400 | ||
| 2279 | && !(start < 04200 && end >= 04377)) | ||
| 2280 | { | ||
| 2281 | int newend; | ||
| 2282 | int tem; | ||
| 2283 | newend = end; | ||
| 2284 | if (newend > 04377) | ||
| 2285 | newend = 04377; | ||
| 2286 | tem = set_image_of_range_1 (work_area, start, newend, translate); | ||
| 2287 | if (tem > 0) | ||
| 2288 | return tem; | ||
| 2289 | |||
| 2290 | start = 04400; | ||
| 2291 | if (end < 04400) | ||
| 2292 | return -1; | ||
| 2293 | } | ||
| 2294 | #endif | ||
| 2295 | |||
| 2296 | EXTEND_RANGE_TABLE (work_area, 2); | ||
| 2297 | work_area->table[work_area->used++] = (start); | ||
| 2298 | work_area->table[work_area->used++] = (end); | ||
| 2299 | |||
| 2300 | cmin = -1, cmax = -1; | ||
| 2301 | |||
| 2302 | if (RE_TRANSLATE_P (translate)) | ||
| 2303 | { | ||
| 2304 | int ch; | ||
| 2305 | |||
| 2306 | for (ch = start; ch <= end; ch++) | ||
| 2307 | { | ||
| 2308 | re_wchar_t c = TRANSLATE (ch); | ||
| 2309 | if (! (start <= c && c <= end)) | ||
| 2310 | { | ||
| 2311 | if (cmin == -1) | ||
| 2312 | cmin = c, cmax = c; | ||
| 2313 | else | ||
| 2314 | { | ||
| 2315 | cmin = min (cmin, c); | ||
| 2316 | cmax = max (cmax, c); | ||
| 2317 | } | ||
| 2318 | } | ||
| 2319 | } | ||
| 2320 | |||
| 2321 | if (cmin != -1) | ||
| 2322 | { | ||
| 2323 | EXTEND_RANGE_TABLE (work_area, 2); | ||
| 2324 | work_area->table[work_area->used++] = (cmin); | ||
| 2325 | work_area->table[work_area->used++] = (cmax); | ||
| 2326 | } | ||
| 2327 | } | ||
| 2328 | |||
| 2329 | return -1; | ||
| 2330 | } | ||
| 2331 | #endif /* 0 */ | ||
| 2332 | |||
| 2333 | #ifndef MATCH_MAY_ALLOCATE | ||
| 2334 | |||
| 2335 | /* If we cannot allocate large objects within re_match_2_internal, | ||
| 2336 | we make the fail stack and register vectors global. | ||
| 2337 | The fail stack, we grow to the maximum size when a regexp | ||
| 2338 | is compiled. | ||
| 2339 | The register vectors, we adjust in size each time we | ||
| 2340 | compile a regexp, according to the number of registers it needs. */ | ||
| 2341 | |||
| 2342 | static fail_stack_type fail_stack; | ||
| 2343 | |||
| 2344 | /* Size with which the following vectors are currently allocated. | ||
| 2345 | That is so we can make them bigger as needed, | ||
| 2346 | but never make them smaller. */ | ||
| 2347 | static int regs_allocated_size; | ||
| 2348 | |||
| 2349 | static re_char ** regstart, ** regend; | ||
| 2350 | static re_char **best_regstart, **best_regend; | ||
| 2351 | |||
| 2352 | /* Make the register vectors big enough for NUM_REGS registers, | ||
| 2353 | but don't make them smaller. */ | ||
| 2354 | |||
| 2355 | static | ||
| 2356 | regex_grow_registers (int num_regs) | ||
| 2357 | { | ||
| 2358 | if (num_regs > regs_allocated_size) | ||
| 2359 | { | ||
| 2360 | RETALLOC_IF (regstart, num_regs, re_char *); | ||
| 2361 | RETALLOC_IF (regend, num_regs, re_char *); | ||
| 2362 | RETALLOC_IF (best_regstart, num_regs, re_char *); | ||
| 2363 | RETALLOC_IF (best_regend, num_regs, re_char *); | ||
| 2364 | |||
| 2365 | regs_allocated_size = num_regs; | ||
| 2366 | } | ||
| 2367 | } | ||
| 2368 | |||
| 2369 | #endif /* not MATCH_MAY_ALLOCATE */ | ||
| 2370 | 1804 | ||
| 2371 | static boolean group_in_compile_stack (compile_stack_type compile_stack, | 1805 | static bool group_in_compile_stack (compile_stack_type, regnum_t); |
| 2372 | regnum_t regnum); | ||
| 2373 | 1806 | ||
| 2374 | /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. | 1807 | /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. |
| 2375 | Returns one of error codes defined in `regex-emacs.h', or zero for success. | 1808 | Returns one of error codes defined in `regex-emacs.h', or zero for success. |
| 2376 | 1809 | ||
| 2377 | If WHITESPACE_REGEXP is given (only #ifdef emacs), it is used instead of | 1810 | If WHITESPACE_REGEXP is given, it is used instead of a space |
| 2378 | a space character in PATTERN. | 1811 | character in PATTERN. |
| 2379 | 1812 | ||
| 2380 | Assumes the `allocated' (and perhaps `buffer') and `translate' | 1813 | Assumes the `allocated' (and perhaps `buffer') and `translate' |
| 2381 | fields are set in BUFP on entry. | 1814 | fields are set in BUFP on entry. |
| @@ -2404,42 +1837,33 @@ do { \ | |||
| 2404 | #define FREE_STACK_RETURN(value) \ | 1837 | #define FREE_STACK_RETURN(value) \ |
| 2405 | do { \ | 1838 | do { \ |
| 2406 | FREE_RANGE_TABLE_WORK_AREA (range_table_work); \ | 1839 | FREE_RANGE_TABLE_WORK_AREA (range_table_work); \ |
| 2407 | free (compile_stack.stack); \ | 1840 | xfree (compile_stack.stack); \ |
| 2408 | return value; \ | 1841 | return value; \ |
| 2409 | } while (0) | 1842 | } while (0) |
| 2410 | 1843 | ||
| 2411 | static reg_errcode_t | 1844 | static reg_errcode_t |
| 2412 | regex_compile (re_char *pattern, size_t size, | 1845 | regex_compile (re_char *pattern, size_t size, |
| 2413 | #ifdef emacs | ||
| 2414 | # define syntax RE_SYNTAX_EMACS | ||
| 2415 | bool posix_backtracking, | 1846 | bool posix_backtracking, |
| 2416 | const char *whitespace_regexp, | 1847 | const char *whitespace_regexp, |
| 2417 | #else | ||
| 2418 | reg_syntax_t syntax, | ||
| 2419 | # define posix_backtracking (!(syntax & RE_NO_POSIX_BACKTRACKING)) | ||
| 2420 | #endif | ||
| 2421 | struct re_pattern_buffer *bufp) | 1848 | struct re_pattern_buffer *bufp) |
| 2422 | { | 1849 | { |
| 1850 | reg_syntax_t syntax = RE_SYNTAX_EMACS; | ||
| 1851 | |||
| 2423 | /* We fetch characters from PATTERN here. */ | 1852 | /* We fetch characters from PATTERN here. */ |
| 2424 | register re_wchar_t c, c1; | 1853 | int c, c1; |
| 2425 | 1854 | ||
| 2426 | /* Points to the end of the buffer, where we should append. */ | 1855 | /* Points to the end of the buffer, where we should append. */ |
| 2427 | register unsigned char *b; | 1856 | unsigned char *b; |
| 2428 | 1857 | ||
| 2429 | /* Keeps track of unclosed groups. */ | 1858 | /* Keeps track of unclosed groups. */ |
| 2430 | compile_stack_type compile_stack; | 1859 | compile_stack_type compile_stack; |
| 2431 | 1860 | ||
| 2432 | /* Points to the current (ending) position in the pattern. */ | 1861 | /* Points to the current (ending) position in the pattern. */ |
| 2433 | #ifdef AIX | ||
| 2434 | /* `const' makes AIX compiler fail. */ | ||
| 2435 | unsigned char *p = pattern; | ||
| 2436 | #else | ||
| 2437 | re_char *p = pattern; | 1862 | re_char *p = pattern; |
| 2438 | #endif | ||
| 2439 | re_char *pend = pattern + size; | 1863 | re_char *pend = pattern + size; |
| 2440 | 1864 | ||
| 2441 | /* How to translate the characters in the pattern. */ | 1865 | /* How to translate the characters in the pattern. */ |
| 2442 | RE_TRANSLATE_TYPE translate = bufp->translate; | 1866 | Lisp_Object translate = bufp->translate; |
| 2443 | 1867 | ||
| 2444 | /* Address of the count-byte of the most recently inserted `exactn' | 1868 | /* Address of the count-byte of the most recently inserted `exactn' |
| 2445 | command. This makes it possible to tell if a new exact-match | 1869 | command. This makes it possible to tell if a new exact-match |
| @@ -2468,9 +1892,8 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2468 | struct range_table_work_area range_table_work; | 1892 | struct range_table_work_area range_table_work; |
| 2469 | 1893 | ||
| 2470 | /* If the object matched can contain multibyte characters. */ | 1894 | /* If the object matched can contain multibyte characters. */ |
| 2471 | const boolean multibyte = RE_MULTIBYTE_P (bufp); | 1895 | bool multibyte = RE_MULTIBYTE_P (bufp); |
| 2472 | 1896 | ||
| 2473 | #ifdef emacs | ||
| 2474 | /* Nonzero if we have pushed down into a subpattern. */ | 1897 | /* Nonzero if we have pushed down into a subpattern. */ |
| 2475 | int in_subpattern = 0; | 1898 | int in_subpattern = 0; |
| 2476 | 1899 | ||
| @@ -2479,26 +1902,22 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2479 | re_char *main_p; | 1902 | re_char *main_p; |
| 2480 | re_char *main_pattern; | 1903 | re_char *main_pattern; |
| 2481 | re_char *main_pend; | 1904 | re_char *main_pend; |
| 2482 | #endif | ||
| 2483 | 1905 | ||
| 2484 | #ifdef DEBUG | 1906 | #ifdef REGEX_EMACS_DEBUG |
| 2485 | debug++; | 1907 | regex_emacs_debug++; |
| 2486 | DEBUG_PRINT ("\nCompiling pattern: "); | 1908 | DEBUG_PRINT ("\nCompiling pattern: "); |
| 2487 | if (debug > 0) | 1909 | if (regex_emacs_debug > 0) |
| 2488 | { | 1910 | { |
| 2489 | unsigned debug_count; | 1911 | size_t debug_count; |
| 2490 | 1912 | ||
| 2491 | for (debug_count = 0; debug_count < size; debug_count++) | 1913 | for (debug_count = 0; debug_count < size; debug_count++) |
| 2492 | putchar (pattern[debug_count]); | 1914 | putchar (pattern[debug_count]); |
| 2493 | putchar ('\n'); | 1915 | putchar ('\n'); |
| 2494 | } | 1916 | } |
| 2495 | #endif /* DEBUG */ | 1917 | #endif |
| 2496 | 1918 | ||
| 2497 | /* Initialize the compile stack. */ | 1919 | /* Initialize the compile stack. */ |
| 2498 | compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t); | 1920 | compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t); |
| 2499 | if (compile_stack.stack == NULL) | ||
| 2500 | return REG_ESPACE; | ||
| 2501 | |||
| 2502 | compile_stack.size = INIT_COMPILE_STACK_SIZE; | 1921 | compile_stack.size = INIT_COMPILE_STACK_SIZE; |
| 2503 | compile_stack.avail = 0; | 1922 | compile_stack.avail = 0; |
| 2504 | 1923 | ||
| @@ -2506,9 +1925,6 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2506 | range_table_work.allocated = 0; | 1925 | range_table_work.allocated = 0; |
| 2507 | 1926 | ||
| 2508 | /* Initialize the pattern buffer. */ | 1927 | /* Initialize the pattern buffer. */ |
| 2509 | #ifndef emacs | ||
| 2510 | bufp->syntax = syntax; | ||
| 2511 | #endif | ||
| 2512 | bufp->fastmap_accurate = 0; | 1928 | bufp->fastmap_accurate = 0; |
| 2513 | bufp->not_bol = bufp->not_eol = 0; | 1929 | bufp->not_bol = bufp->not_eol = 0; |
| 2514 | bufp->used_syntax = 0; | 1930 | bufp->used_syntax = 0; |
| @@ -2521,11 +1937,6 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2521 | /* Always count groups, whether or not bufp->no_sub is set. */ | 1937 | /* Always count groups, whether or not bufp->no_sub is set. */ |
| 2522 | bufp->re_nsub = 0; | 1938 | bufp->re_nsub = 0; |
| 2523 | 1939 | ||
| 2524 | #if !defined emacs && !defined SYNTAX_TABLE | ||
| 2525 | /* Initialize the syntax table. */ | ||
| 2526 | init_syntax_once (); | ||
| 2527 | #endif | ||
| 2528 | |||
| 2529 | if (bufp->allocated == 0) | 1940 | if (bufp->allocated == 0) |
| 2530 | { | 1941 | { |
| 2531 | if (bufp->buffer) | 1942 | if (bufp->buffer) |
| @@ -2538,8 +1949,6 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2538 | { /* Caller did not allocate a buffer. Do it for them. */ | 1949 | { /* Caller did not allocate a buffer. Do it for them. */ |
| 2539 | bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char); | 1950 | bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char); |
| 2540 | } | 1951 | } |
| 2541 | if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE); | ||
| 2542 | |||
| 2543 | bufp->allocated = INIT_BUF_SIZE; | 1952 | bufp->allocated = INIT_BUF_SIZE; |
| 2544 | } | 1953 | } |
| 2545 | 1954 | ||
| @@ -2550,7 +1959,6 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2550 | { | 1959 | { |
| 2551 | if (p == pend) | 1960 | if (p == pend) |
| 2552 | { | 1961 | { |
| 2553 | #ifdef emacs | ||
| 2554 | /* If this is the end of an included regexp, | 1962 | /* If this is the end of an included regexp, |
| 2555 | pop back to the main regexp and try again. */ | 1963 | pop back to the main regexp and try again. */ |
| 2556 | if (in_subpattern) | 1964 | if (in_subpattern) |
| @@ -2561,7 +1969,6 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2561 | pend = main_pend; | 1969 | pend = main_pend; |
| 2562 | continue; | 1970 | continue; |
| 2563 | } | 1971 | } |
| 2564 | #endif | ||
| 2565 | /* If this is the end of the main regexp, we are done. */ | 1972 | /* If this is the end of the main regexp, we are done. */ |
| 2566 | break; | 1973 | break; |
| 2567 | } | 1974 | } |
| @@ -2570,7 +1977,6 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2570 | 1977 | ||
| 2571 | switch (c) | 1978 | switch (c) |
| 2572 | { | 1979 | { |
| 2573 | #ifdef emacs | ||
| 2574 | case ' ': | 1980 | case ' ': |
| 2575 | { | 1981 | { |
| 2576 | re_char *p1 = p; | 1982 | re_char *p1 = p; |
| @@ -2603,7 +2009,6 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2603 | pend = p + strlen (whitespace_regexp); | 2009 | pend = p + strlen (whitespace_regexp); |
| 2604 | break; | 2010 | break; |
| 2605 | } | 2011 | } |
| 2606 | #endif | ||
| 2607 | 2012 | ||
| 2608 | case '^': | 2013 | case '^': |
| 2609 | { | 2014 | { |
| @@ -2654,8 +2059,8 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2654 | 2059 | ||
| 2655 | { | 2060 | { |
| 2656 | /* 1 means zero (many) matches is allowed. */ | 2061 | /* 1 means zero (many) matches is allowed. */ |
| 2657 | boolean zero_times_ok = 0, many_times_ok = 0; | 2062 | bool zero_times_ok = false, many_times_ok = false; |
| 2658 | boolean greedy = 1; | 2063 | bool greedy = true; |
| 2659 | 2064 | ||
| 2660 | /* If there is a sequence of repetition chars, collapse it | 2065 | /* If there is a sequence of repetition chars, collapse it |
| 2661 | down to just one (the right one). We can't combine | 2066 | down to just one (the right one). We can't combine |
| @@ -2666,7 +2071,7 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2666 | { | 2071 | { |
| 2667 | if ((syntax & RE_FRUGAL) | 2072 | if ((syntax & RE_FRUGAL) |
| 2668 | && c == '?' && (zero_times_ok || many_times_ok)) | 2073 | && c == '?' && (zero_times_ok || many_times_ok)) |
| 2669 | greedy = 0; | 2074 | greedy = false; |
| 2670 | else | 2075 | else |
| 2671 | { | 2076 | { |
| 2672 | zero_times_ok |= c != '+'; | 2077 | zero_times_ok |= c != '+'; |
| @@ -2705,13 +2110,13 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2705 | { | 2110 | { |
| 2706 | if (many_times_ok) | 2111 | if (many_times_ok) |
| 2707 | { | 2112 | { |
| 2708 | boolean simple = skip_one_char (laststart) == b; | 2113 | bool simple = skip_one_char (laststart) == b; |
| 2709 | size_t startoffset = 0; | 2114 | size_t startoffset = 0; |
| 2710 | re_opcode_t ofj = | 2115 | re_opcode_t ofj = |
| 2711 | /* Check if the loop can match the empty string. */ | 2116 | /* Check if the loop can match the empty string. */ |
| 2712 | (simple || !analyze_first (laststart, b, NULL, 0)) | 2117 | (simple || !analyze_first (laststart, b, NULL, 0)) |
| 2713 | ? on_failure_jump : on_failure_jump_loop; | 2118 | ? on_failure_jump : on_failure_jump_loop; |
| 2714 | assert (skip_one_char (laststart) <= b); | 2119 | eassert (skip_one_char (laststart) <= b); |
| 2715 | 2120 | ||
| 2716 | if (!zero_times_ok && simple) | 2121 | if (!zero_times_ok && simple) |
| 2717 | { /* Since simple * loops can be made faster by using | 2122 | { /* Since simple * loops can be made faster by using |
| @@ -2744,7 +2149,7 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2744 | else | 2149 | else |
| 2745 | { | 2150 | { |
| 2746 | /* A simple ? pattern. */ | 2151 | /* A simple ? pattern. */ |
| 2747 | assert (zero_times_ok); | 2152 | eassert (zero_times_ok); |
| 2748 | GET_BUFFER_SPACE (3); | 2153 | GET_BUFFER_SPACE (3); |
| 2749 | INSERT_JUMP (on_failure_jump, laststart, b + 3); | 2154 | INSERT_JUMP (on_failure_jump, laststart, b + 3); |
| 2750 | b += 3; | 2155 | b += 3; |
| @@ -2756,7 +2161,7 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2756 | GET_BUFFER_SPACE (7); /* We might use less. */ | 2161 | GET_BUFFER_SPACE (7); /* We might use less. */ |
| 2757 | if (many_times_ok) | 2162 | if (many_times_ok) |
| 2758 | { | 2163 | { |
| 2759 | boolean emptyp = analyze_first (laststart, b, NULL, 0); | 2164 | bool emptyp = analyze_first (laststart, b, NULL, 0); |
| 2760 | 2165 | ||
| 2761 | /* The non-greedy multiple match looks like | 2166 | /* The non-greedy multiple match looks like |
| 2762 | a repeat..until: we only need a conditional jump | 2167 | a repeat..until: we only need a conditional jump |
| @@ -2831,10 +2236,9 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2831 | /* Read in characters and ranges, setting map bits. */ | 2236 | /* Read in characters and ranges, setting map bits. */ |
| 2832 | for (;;) | 2237 | for (;;) |
| 2833 | { | 2238 | { |
| 2834 | boolean escaped_char = false; | ||
| 2835 | const unsigned char *p2 = p; | 2239 | const unsigned char *p2 = p; |
| 2836 | re_wctype_t cc; | 2240 | re_wctype_t cc; |
| 2837 | re_wchar_t ch; | 2241 | int ch; |
| 2838 | 2242 | ||
| 2839 | if (p == pend) FREE_STACK_RETURN (REG_EBRACK); | 2243 | if (p == pend) FREE_STACK_RETURN (REG_EBRACK); |
| 2840 | 2244 | ||
| @@ -2849,15 +2253,6 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2849 | if (p == pend) | 2253 | if (p == pend) |
| 2850 | FREE_STACK_RETURN (REG_EBRACK); | 2254 | FREE_STACK_RETURN (REG_EBRACK); |
| 2851 | 2255 | ||
| 2852 | #ifndef emacs | ||
| 2853 | for (ch = 0; ch < (1 << BYTEWIDTH); ++ch) | ||
| 2854 | if (re_iswctype (btowc (ch), cc)) | ||
| 2855 | { | ||
| 2856 | c = TRANSLATE (ch); | ||
| 2857 | if (c < (1 << BYTEWIDTH)) | ||
| 2858 | SET_LIST_BIT (c); | ||
| 2859 | } | ||
| 2860 | #else /* emacs */ | ||
| 2861 | /* Most character classes in a multibyte match just set | 2256 | /* Most character classes in a multibyte match just set |
| 2862 | a flag. Exceptions are is_blank, is_digit, is_cntrl, and | 2257 | a flag. Exceptions are is_blank, is_digit, is_cntrl, and |
| 2863 | is_xdigit, since they can only match ASCII characters. | 2258 | is_xdigit, since they can only match ASCII characters. |
| @@ -2884,7 +2279,7 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2884 | } | 2279 | } |
| 2885 | SET_RANGE_TABLE_WORK_AREA_BIT | 2280 | SET_RANGE_TABLE_WORK_AREA_BIT |
| 2886 | (range_table_work, re_wctype_to_bit (cc)); | 2281 | (range_table_work, re_wctype_to_bit (cc)); |
| 2887 | #endif /* emacs */ | 2282 | |
| 2888 | /* In most cases the matching rule for char classes only | 2283 | /* In most cases the matching rule for char classes only |
| 2889 | uses the syntax table for multibyte chars, so that the | 2284 | uses the syntax table for multibyte chars, so that the |
| 2890 | content of the syntax-table is not hardcoded in the | 2285 | content of the syntax-table is not hardcoded in the |
| @@ -2908,7 +2303,6 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2908 | if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); | 2303 | if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); |
| 2909 | 2304 | ||
| 2910 | PATFETCH (c); | 2305 | PATFETCH (c); |
| 2911 | escaped_char = true; | ||
| 2912 | } | 2306 | } |
| 2913 | else | 2307 | else |
| 2914 | { | 2308 | { |
| @@ -2927,13 +2321,12 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2927 | 2321 | ||
| 2928 | /* Fetch the character which ends the range. */ | 2322 | /* Fetch the character which ends the range. */ |
| 2929 | PATFETCH (c1); | 2323 | PATFETCH (c1); |
| 2930 | #ifdef emacs | 2324 | |
| 2931 | if (CHAR_BYTE8_P (c1) | 2325 | if (CHAR_BYTE8_P (c1) |
| 2932 | && ! ASCII_CHAR_P (c) && ! CHAR_BYTE8_P (c)) | 2326 | && ! ASCII_CHAR_P (c) && ! CHAR_BYTE8_P (c)) |
| 2933 | /* Treat the range from a multibyte character to | 2327 | /* Treat the range from a multibyte character to |
| 2934 | raw-byte character as empty. */ | 2328 | raw-byte character as empty. */ |
| 2935 | c = c1 + 1; | 2329 | c = c1 + 1; |
| 2936 | #endif /* emacs */ | ||
| 2937 | } | 2330 | } |
| 2938 | else | 2331 | else |
| 2939 | /* Range from C to C. */ | 2332 | /* Range from C to C. */ |
| @@ -2947,15 +2340,6 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2947 | } | 2340 | } |
| 2948 | else | 2341 | else |
| 2949 | { | 2342 | { |
| 2950 | #ifndef emacs | ||
| 2951 | /* Set the range into bitmap */ | ||
| 2952 | for (; c <= c1; c++) | ||
| 2953 | { | ||
| 2954 | ch = TRANSLATE (c); | ||
| 2955 | if (ch < (1 << BYTEWIDTH)) | ||
| 2956 | SET_LIST_BIT (ch); | ||
| 2957 | } | ||
| 2958 | #else /* emacs */ | ||
| 2959 | if (c < 128) | 2343 | if (c < 128) |
| 2960 | { | 2344 | { |
| 2961 | ch = min (127, c1); | 2345 | ch = min (127, c1); |
| @@ -2982,7 +2366,6 @@ regex_compile (re_char *pattern, size_t size, | |||
| 2982 | SETUP_UNIBYTE_RANGE (range_table_work, c, c1); | 2366 | SETUP_UNIBYTE_RANGE (range_table_work, c, c1); |
| 2983 | } | 2367 | } |
| 2984 | } | 2368 | } |
| 2985 | #endif /* emacs */ | ||
| 2986 | } | 2369 | } |
| 2987 | } | 2370 | } |
| 2988 | 2371 | ||
| @@ -3007,8 +2390,7 @@ regex_compile (re_char *pattern, size_t size, | |||
| 3007 | /* Indicate the existence of range table. */ | 2390 | /* Indicate the existence of range table. */ |
| 3008 | laststart[1] |= 0x80; | 2391 | laststart[1] |= 0x80; |
| 3009 | 2392 | ||
| 3010 | /* Store the character class flag bits into the range table. | 2393 | /* Store the character class flag bits into the range table. */ |
| 3011 | If not in emacs, these flag bits are always 0. */ | ||
| 3012 | *b++ = RANGE_TABLE_WORK_BITS (range_table_work) & 0xff; | 2394 | *b++ = RANGE_TABLE_WORK_BITS (range_table_work) & 0xff; |
| 3013 | *b++ = RANGE_TABLE_WORK_BITS (range_table_work) >> 8; | 2395 | *b++ = RANGE_TABLE_WORK_BITS (range_table_work) >> 8; |
| 3014 | 2396 | ||
| @@ -3127,8 +2509,6 @@ regex_compile (re_char *pattern, size_t size, | |||
| 3127 | { | 2509 | { |
| 3128 | RETALLOC (compile_stack.stack, compile_stack.size << 1, | 2510 | RETALLOC (compile_stack.stack, compile_stack.size << 1, |
| 3129 | compile_stack_elt_t); | 2511 | compile_stack_elt_t); |
| 3130 | if (compile_stack.stack == NULL) return REG_ESPACE; | ||
| 3131 | |||
| 3132 | compile_stack.size <<= 1; | 2512 | compile_stack.size <<= 1; |
| 3133 | } | 2513 | } |
| 3134 | 2514 | ||
| @@ -3184,7 +2564,7 @@ regex_compile (re_char *pattern, size_t size, | |||
| 3184 | 2564 | ||
| 3185 | /* Since we just checked for an empty stack above, this | 2565 | /* Since we just checked for an empty stack above, this |
| 3186 | ``can't happen''. */ | 2566 | ``can't happen''. */ |
| 3187 | assert (compile_stack.avail != 0); | 2567 | eassert (compile_stack.avail != 0); |
| 3188 | { | 2568 | { |
| 3189 | /* We don't just want to restore into `regnum', because | 2569 | /* We don't just want to restore into `regnum', because |
| 3190 | later groups should continue to be numbered higher, | 2570 | later groups should continue to be numbered higher, |
| @@ -3410,7 +2790,7 @@ regex_compile (re_char *pattern, size_t size, | |||
| 3410 | 2790 | ||
| 3411 | unfetch_interval: | 2791 | unfetch_interval: |
| 3412 | /* If an invalid interval, match the characters as literals. */ | 2792 | /* If an invalid interval, match the characters as literals. */ |
| 3413 | assert (beg_interval); | 2793 | eassert (beg_interval); |
| 3414 | p = beg_interval; | 2794 | p = beg_interval; |
| 3415 | beg_interval = NULL; | 2795 | beg_interval = NULL; |
| 3416 | 2796 | ||
| @@ -3419,13 +2799,12 @@ regex_compile (re_char *pattern, size_t size, | |||
| 3419 | 2799 | ||
| 3420 | if (!(syntax & RE_NO_BK_BRACES)) | 2800 | if (!(syntax & RE_NO_BK_BRACES)) |
| 3421 | { | 2801 | { |
| 3422 | assert (p > pattern && p[-1] == '\\'); | 2802 | eassert (p > pattern && p[-1] == '\\'); |
| 3423 | goto normal_backslash; | 2803 | goto normal_backslash; |
| 3424 | } | 2804 | } |
| 3425 | else | 2805 | else |
| 3426 | goto normal_char; | 2806 | goto normal_char; |
| 3427 | 2807 | ||
| 3428 | #ifdef emacs | ||
| 3429 | case '=': | 2808 | case '=': |
| 3430 | laststart = b; | 2809 | laststart = b; |
| 3431 | BUF_PUSH (at_dot); | 2810 | BUF_PUSH (at_dot); |
| @@ -3454,8 +2833,6 @@ regex_compile (re_char *pattern, size_t size, | |||
| 3454 | PATFETCH (c); | 2833 | PATFETCH (c); |
| 3455 | BUF_PUSH_2 (notcategoryspec, c); | 2834 | BUF_PUSH_2 (notcategoryspec, c); |
| 3456 | break; | 2835 | break; |
| 3457 | #endif /* emacs */ | ||
| 3458 | |||
| 3459 | 2836 | ||
| 3460 | case 'w': | 2837 | case 'w': |
| 3461 | if (syntax & RE_NO_GNU_OPS) | 2838 | if (syntax & RE_NO_GNU_OPS) |
| @@ -3607,7 +2984,7 @@ regex_compile (re_char *pattern, size_t size, | |||
| 3607 | c1 = RE_CHAR_TO_MULTIBYTE (c); | 2984 | c1 = RE_CHAR_TO_MULTIBYTE (c); |
| 3608 | if (! CHAR_BYTE8_P (c1)) | 2985 | if (! CHAR_BYTE8_P (c1)) |
| 3609 | { | 2986 | { |
| 3610 | re_wchar_t c2 = TRANSLATE (c1); | 2987 | int c2 = TRANSLATE (c1); |
| 3611 | 2988 | ||
| 3612 | if (c1 != c2 && (c1 = RE_CHAR_TO_UNIBYTE (c2)) >= 0) | 2989 | if (c1 != c2 && (c1 = RE_CHAR_TO_UNIBYTE (c2)) >= 0) |
| 3613 | c = c1; | 2990 | c = c1; |
| @@ -3638,41 +3015,18 @@ regex_compile (re_char *pattern, size_t size, | |||
| 3638 | /* We have succeeded; set the length of the buffer. */ | 3015 | /* We have succeeded; set the length of the buffer. */ |
| 3639 | bufp->used = b - bufp->buffer; | 3016 | bufp->used = b - bufp->buffer; |
| 3640 | 3017 | ||
| 3641 | #ifdef DEBUG | 3018 | #ifdef REGEX_EMACS_DEBUG |
| 3642 | if (debug > 0) | 3019 | if (regex_emacs_debug > 0) |
| 3643 | { | 3020 | { |
| 3644 | re_compile_fastmap (bufp); | 3021 | re_compile_fastmap (bufp); |
| 3645 | DEBUG_PRINT ("\nCompiled pattern: \n"); | 3022 | DEBUG_PRINT ("\nCompiled pattern: \n"); |
| 3646 | print_compiled_pattern (bufp); | 3023 | print_compiled_pattern (bufp); |
| 3647 | } | 3024 | } |
| 3648 | debug--; | 3025 | regex_emacs_debug--; |
| 3649 | #endif /* DEBUG */ | 3026 | #endif |
| 3650 | |||
| 3651 | #ifndef MATCH_MAY_ALLOCATE | ||
| 3652 | /* Initialize the failure stack to the largest possible stack. This | ||
| 3653 | isn't necessary unless we're trying to avoid calling alloca in | ||
| 3654 | the search and match routines. */ | ||
| 3655 | { | ||
| 3656 | int num_regs = bufp->re_nsub + 1; | ||
| 3657 | |||
| 3658 | if (fail_stack.size < emacs_re_max_failures * TYPICAL_FAILURE_SIZE) | ||
| 3659 | { | ||
| 3660 | fail_stack.size = emacs_re_max_failures * TYPICAL_FAILURE_SIZE; | ||
| 3661 | falk_stack.stack = realloc (fail_stack.stack, | ||
| 3662 | fail_stack.size * sizeof *falk_stack.stack); | ||
| 3663 | } | ||
| 3664 | |||
| 3665 | regex_grow_registers (num_regs); | ||
| 3666 | } | ||
| 3667 | #endif /* not MATCH_MAY_ALLOCATE */ | ||
| 3668 | 3027 | ||
| 3669 | FREE_STACK_RETURN (REG_NOERROR); | 3028 | FREE_STACK_RETURN (REG_NOERROR); |
| 3670 | 3029 | ||
| 3671 | #ifdef emacs | ||
| 3672 | # undef syntax | ||
| 3673 | #else | ||
| 3674 | # undef posix_backtracking | ||
| 3675 | #endif | ||
| 3676 | } /* regex_compile */ | 3030 | } /* regex_compile */ |
| 3677 | 3031 | ||
| 3678 | /* Subroutines for `regex_compile'. */ | 3032 | /* Subroutines for `regex_compile'. */ |
| @@ -3733,11 +3087,11 @@ insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, unsigned cha | |||
| 3733 | after an alternative or a begin-subexpression. We assume there is at | 3087 | after an alternative or a begin-subexpression. We assume there is at |
| 3734 | least one character before the ^. */ | 3088 | least one character before the ^. */ |
| 3735 | 3089 | ||
| 3736 | static boolean | 3090 | static bool |
| 3737 | at_begline_loc_p (re_char *pattern, re_char *p, reg_syntax_t syntax) | 3091 | at_begline_loc_p (re_char *pattern, re_char *p, reg_syntax_t syntax) |
| 3738 | { | 3092 | { |
| 3739 | re_char *prev = p - 2; | 3093 | re_char *prev = p - 2; |
| 3740 | boolean odd_backslashes; | 3094 | bool odd_backslashes; |
| 3741 | 3095 | ||
| 3742 | /* After a subexpression? */ | 3096 | /* After a subexpression? */ |
| 3743 | if (*prev == '(') | 3097 | if (*prev == '(') |
| @@ -3774,11 +3128,11 @@ at_begline_loc_p (re_char *pattern, re_char *p, reg_syntax_t syntax) | |||
| 3774 | /* The dual of at_begline_loc_p. This one is for $. We assume there is | 3128 | /* The dual of at_begline_loc_p. This one is for $. We assume there is |
| 3775 | at least one character after the $, i.e., `P < PEND'. */ | 3129 | at least one character after the $, i.e., `P < PEND'. */ |
| 3776 | 3130 | ||
| 3777 | static boolean | 3131 | static bool |
| 3778 | at_endline_loc_p (re_char *p, re_char *pend, reg_syntax_t syntax) | 3132 | at_endline_loc_p (re_char *p, re_char *pend, reg_syntax_t syntax) |
| 3779 | { | 3133 | { |
| 3780 | re_char *next = p; | 3134 | re_char *next = p; |
| 3781 | boolean next_backslash = *next == '\\'; | 3135 | bool next_backslash = *next == '\\'; |
| 3782 | re_char *next_next = p + 1 < pend ? p + 1 : 0; | 3136 | re_char *next_next = p + 1 < pend ? p + 1 : 0; |
| 3783 | 3137 | ||
| 3784 | return | 3138 | return |
| @@ -3794,10 +3148,10 @@ at_endline_loc_p (re_char *p, re_char *pend, reg_syntax_t syntax) | |||
| 3794 | /* Returns true if REGNUM is in one of COMPILE_STACK's elements and | 3148 | /* Returns true if REGNUM is in one of COMPILE_STACK's elements and |
| 3795 | false if it's not. */ | 3149 | false if it's not. */ |
| 3796 | 3150 | ||
| 3797 | static boolean | 3151 | static bool |
| 3798 | group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum) | 3152 | group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum) |
| 3799 | { | 3153 | { |
| 3800 | ssize_t this_element; | 3154 | ptrdiff_t this_element; |
| 3801 | 3155 | ||
| 3802 | for (this_element = compile_stack.avail - 1; | 3156 | for (this_element = compile_stack.avail - 1; |
| 3803 | this_element >= 0; | 3157 | this_element >= 0; |
| @@ -3823,13 +3177,13 @@ analyze_first (re_char *p, re_char *pend, char *fastmap, | |||
| 3823 | const int multibyte) | 3177 | const int multibyte) |
| 3824 | { | 3178 | { |
| 3825 | int j, k; | 3179 | int j, k; |
| 3826 | boolean not; | 3180 | bool not; |
| 3827 | 3181 | ||
| 3828 | /* If all elements for base leading-codes in fastmap is set, this | 3182 | /* If all elements for base leading-codes in fastmap is set, this |
| 3829 | flag is set true. */ | 3183 | flag is set true. */ |
| 3830 | boolean match_any_multibyte_characters = false; | 3184 | bool match_any_multibyte_characters = false; |
| 3831 | 3185 | ||
| 3832 | assert (p); | 3186 | eassert (p); |
| 3833 | 3187 | ||
| 3834 | /* The loop below works as follows: | 3188 | /* The loop below works as follows: |
| 3835 | - It has a working-list kept in the PATTERN_STACK and which basically | 3189 | - It has a working-list kept in the PATTERN_STACK and which basically |
| @@ -3920,7 +3274,6 @@ analyze_first (re_char *p, re_char *pend, char *fastmap, | |||
| 3920 | if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not) | 3274 | if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not) |
| 3921 | fastmap[j] = 1; | 3275 | fastmap[j] = 1; |
| 3922 | 3276 | ||
| 3923 | #ifdef emacs | ||
| 3924 | if (/* Any leading code can possibly start a character | 3277 | if (/* Any leading code can possibly start a character |
| 3925 | which doesn't match the specified set of characters. */ | 3278 | which doesn't match the specified set of characters. */ |
| 3926 | not | 3279 | not |
| @@ -3966,20 +3319,11 @@ analyze_first (re_char *p, re_char *pend, char *fastmap, | |||
| 3966 | fastmap[j] = 1; | 3319 | fastmap[j] = 1; |
| 3967 | } | 3320 | } |
| 3968 | } | 3321 | } |
| 3969 | #endif | ||
| 3970 | break; | 3322 | break; |
| 3971 | 3323 | ||
| 3972 | case syntaxspec: | 3324 | case syntaxspec: |
| 3973 | case notsyntaxspec: | 3325 | case notsyntaxspec: |
| 3974 | if (!fastmap) break; | 3326 | if (!fastmap) break; |
| 3975 | #ifndef emacs | ||
| 3976 | not = (re_opcode_t)p[-1] == notsyntaxspec; | ||
| 3977 | k = *p++; | ||
| 3978 | for (j = 0; j < (1 << BYTEWIDTH); j++) | ||
| 3979 | if ((SYNTAX (j) == (enum syntaxcode) k) ^ not) | ||
| 3980 | fastmap[j] = 1; | ||
| 3981 | break; | ||
| 3982 | #else /* emacs */ | ||
| 3983 | /* This match depends on text properties. These end with | 3327 | /* This match depends on text properties. These end with |
| 3984 | aborting optimizations. */ | 3328 | aborting optimizations. */ |
| 3985 | return -1; | 3329 | return -1; |
| @@ -4008,7 +3352,6 @@ analyze_first (re_char *p, re_char *pend, char *fastmap, | |||
| 4008 | `continue'. */ | 3352 | `continue'. */ |
| 4009 | 3353 | ||
| 4010 | case at_dot: | 3354 | case at_dot: |
| 4011 | #endif /* !emacs */ | ||
| 4012 | case no_op: | 3355 | case no_op: |
| 4013 | case begline: | 3356 | case begline: |
| 4014 | case endline: | 3357 | case endline: |
| @@ -4066,7 +3409,7 @@ analyze_first (re_char *p, re_char *pend, char *fastmap, | |||
| 4066 | 3409 | ||
| 4067 | case jump_n: | 3410 | case jump_n: |
| 4068 | /* This code simply does not properly handle forward jump_n. */ | 3411 | /* This code simply does not properly handle forward jump_n. */ |
| 4069 | DEBUG_STATEMENT (EXTRACT_NUMBER (j, p); assert (j < 0)); | 3412 | DEBUG_STATEMENT (EXTRACT_NUMBER (j, p); eassert (j < 0)); |
| 4070 | p += 4; | 3413 | p += 4; |
| 4071 | /* jump_n can either jump or fall through. The (backward) jump | 3414 | /* jump_n can either jump or fall through. The (backward) jump |
| 4072 | case has already been handled, so we only need to look at the | 3415 | case has already been handled, so we only need to look at the |
| @@ -4075,7 +3418,7 @@ analyze_first (re_char *p, re_char *pend, char *fastmap, | |||
| 4075 | 3418 | ||
| 4076 | case succeed_n: | 3419 | case succeed_n: |
| 4077 | /* If N == 0, it should be an on_failure_jump_loop instead. */ | 3420 | /* If N == 0, it should be an on_failure_jump_loop instead. */ |
| 4078 | DEBUG_STATEMENT (EXTRACT_NUMBER (j, p + 2); assert (j > 0)); | 3421 | DEBUG_STATEMENT (EXTRACT_NUMBER (j, p + 2); eassert (j > 0)); |
| 4079 | p += 4; | 3422 | p += 4; |
| 4080 | /* We only care about one iteration of the loop, so we don't | 3423 | /* We only care about one iteration of the loop, so we don't |
| 4081 | need to consider the case where this behaves like an | 3424 | need to consider the case where this behaves like an |
| @@ -4126,13 +3469,13 @@ analyze_first (re_char *p, re_char *pend, char *fastmap, | |||
| 4126 | 3469 | ||
| 4127 | Returns 0 if we succeed, -2 if an internal error. */ | 3470 | Returns 0 if we succeed, -2 if an internal error. */ |
| 4128 | 3471 | ||
| 4129 | int | 3472 | static void |
| 4130 | re_compile_fastmap (struct re_pattern_buffer *bufp) | 3473 | re_compile_fastmap (struct re_pattern_buffer *bufp) |
| 4131 | { | 3474 | { |
| 4132 | char *fastmap = bufp->fastmap; | 3475 | char *fastmap = bufp->fastmap; |
| 4133 | int analysis; | 3476 | int analysis; |
| 4134 | 3477 | ||
| 4135 | assert (fastmap && bufp->buffer); | 3478 | eassert (fastmap && bufp->buffer); |
| 4136 | 3479 | ||
| 4137 | memset (fastmap, 0, 1 << BYTEWIDTH); /* Assume nothing's valid. */ | 3480 | memset (fastmap, 0, 1 << BYTEWIDTH); /* Assume nothing's valid. */ |
| 4138 | bufp->fastmap_accurate = 1; /* It will be when we're done. */ | 3481 | bufp->fastmap_accurate = 1; /* It will be when we're done. */ |
| @@ -4140,14 +3483,13 @@ re_compile_fastmap (struct re_pattern_buffer *bufp) | |||
| 4140 | analysis = analyze_first (bufp->buffer, bufp->buffer + bufp->used, | 3483 | analysis = analyze_first (bufp->buffer, bufp->buffer + bufp->used, |
| 4141 | fastmap, RE_MULTIBYTE_P (bufp)); | 3484 | fastmap, RE_MULTIBYTE_P (bufp)); |
| 4142 | bufp->can_be_null = (analysis != 0); | 3485 | bufp->can_be_null = (analysis != 0); |
| 4143 | return 0; | ||
| 4144 | } /* re_compile_fastmap */ | 3486 | } /* re_compile_fastmap */ |
| 4145 | 3487 | ||
| 4146 | /* Set REGS to hold NUM_REGS registers, storing them in STARTS and | 3488 | /* Set REGS to hold NUM_REGS registers, storing them in STARTS and |
| 4147 | ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use | 3489 | ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use |
| 4148 | this memory for recording register information. STARTS and ENDS | 3490 | this memory for recording register information. STARTS and ENDS |
| 4149 | must be allocated using the malloc library routine, and must each | 3491 | must be allocated using the malloc library routine, and must each |
| 4150 | be at least NUM_REGS * sizeof (regoff_t) bytes long. | 3492 | be at least NUM_REGS * sizeof (ptrdiff_t) bytes long. |
| 4151 | 3493 | ||
| 4152 | If NUM_REGS == 0, then subsequent matches should allocate their own | 3494 | If NUM_REGS == 0, then subsequent matches should allocate their own |
| 4153 | register data. | 3495 | register data. |
| @@ -4157,7 +3499,8 @@ re_compile_fastmap (struct re_pattern_buffer *bufp) | |||
| 4157 | freeing the old data. */ | 3499 | freeing the old data. */ |
| 4158 | 3500 | ||
| 4159 | void | 3501 | void |
| 4160 | re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs, unsigned int num_regs, regoff_t *starts, regoff_t *ends) | 3502 | re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs, |
| 3503 | unsigned int num_regs, ptrdiff_t *starts, ptrdiff_t *ends) | ||
| 4161 | { | 3504 | { |
| 4162 | if (num_regs) | 3505 | if (num_regs) |
| 4163 | { | 3506 | { |
| @@ -4173,21 +3516,19 @@ re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs, uns | |||
| 4173 | regs->start = regs->end = 0; | 3516 | regs->start = regs->end = 0; |
| 4174 | } | 3517 | } |
| 4175 | } | 3518 | } |
| 4176 | WEAK_ALIAS (__re_set_registers, re_set_registers) | ||
| 4177 | 3519 | ||
| 4178 | /* Searching routines. */ | 3520 | /* Searching routines. */ |
| 4179 | 3521 | ||
| 4180 | /* Like re_search_2, below, but only one string is specified, and | 3522 | /* Like re_search_2, below, but only one string is specified, and |
| 4181 | doesn't let you say where to stop matching. */ | 3523 | doesn't let you say where to stop matching. */ |
| 4182 | 3524 | ||
| 4183 | regoff_t | 3525 | ptrdiff_t |
| 4184 | re_search (struct re_pattern_buffer *bufp, const char *string, size_t size, | 3526 | re_search (struct re_pattern_buffer *bufp, const char *string, size_t size, |
| 4185 | ssize_t startpos, ssize_t range, struct re_registers *regs) | 3527 | ptrdiff_t startpos, ptrdiff_t range, struct re_registers *regs) |
| 4186 | { | 3528 | { |
| 4187 | return re_search_2 (bufp, NULL, 0, string, size, startpos, range, | 3529 | return re_search_2 (bufp, NULL, 0, string, size, startpos, range, |
| 4188 | regs, size); | 3530 | regs, size); |
| 4189 | } | 3531 | } |
| 4190 | WEAK_ALIAS (__re_search, re_search) | ||
| 4191 | 3532 | ||
| 4192 | /* Head address of virtual concatenation of string. */ | 3533 | /* Head address of virtual concatenation of string. */ |
| 4193 | #define HEAD_ADDR_VSTRING(P) \ | 3534 | #define HEAD_ADDR_VSTRING(P) \ |
| @@ -4218,21 +3559,21 @@ WEAK_ALIAS (__re_search, re_search) | |||
| 4218 | found, -1 if no match, or -2 if error (such as failure | 3559 | found, -1 if no match, or -2 if error (such as failure |
| 4219 | stack overflow). */ | 3560 | stack overflow). */ |
| 4220 | 3561 | ||
| 4221 | regoff_t | 3562 | ptrdiff_t |
| 4222 | re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1, | 3563 | re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1, |
| 4223 | const char *str2, size_t size2, ssize_t startpos, ssize_t range, | 3564 | const char *str2, size_t size2, ptrdiff_t startpos, ptrdiff_t range, |
| 4224 | struct re_registers *regs, ssize_t stop) | 3565 | struct re_registers *regs, ptrdiff_t stop) |
| 4225 | { | 3566 | { |
| 4226 | regoff_t val; | 3567 | ptrdiff_t val; |
| 4227 | re_char *string1 = (re_char *) str1; | 3568 | re_char *string1 = (re_char *) str1; |
| 4228 | re_char *string2 = (re_char *) str2; | 3569 | re_char *string2 = (re_char *) str2; |
| 4229 | register char *fastmap = bufp->fastmap; | 3570 | char *fastmap = bufp->fastmap; |
| 4230 | register RE_TRANSLATE_TYPE translate = bufp->translate; | 3571 | Lisp_Object translate = bufp->translate; |
| 4231 | size_t total_size = size1 + size2; | 3572 | size_t total_size = size1 + size2; |
| 4232 | ssize_t endpos = startpos + range; | 3573 | ptrdiff_t endpos = startpos + range; |
| 4233 | boolean anchored_start; | 3574 | bool anchored_start; |
| 4234 | /* Nonzero if we are searching multibyte string. */ | 3575 | /* Nonzero if we are searching multibyte string. */ |
| 4235 | const boolean multibyte = RE_TARGET_MULTIBYTE_P (bufp); | 3576 | bool multibyte = RE_TARGET_MULTIBYTE_P (bufp); |
| 4236 | 3577 | ||
| 4237 | /* Check for out-of-range STARTPOS. */ | 3578 | /* Check for out-of-range STARTPOS. */ |
| 4238 | if (startpos < 0 || startpos > total_size) | 3579 | if (startpos < 0 || startpos > total_size) |
| @@ -4256,7 +3597,6 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1, | |||
| 4256 | range = 0; | 3597 | range = 0; |
| 4257 | } | 3598 | } |
| 4258 | 3599 | ||
| 4259 | #ifdef emacs | ||
| 4260 | /* In a forward search for something that starts with \=. | 3600 | /* In a forward search for something that starts with \=. |
| 4261 | don't keep searching past point. */ | 3601 | don't keep searching past point. */ |
| 4262 | if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0) | 3602 | if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0) |
| @@ -4265,7 +3605,6 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1, | |||
| 4265 | if (range < 0) | 3605 | if (range < 0) |
| 4266 | return -1; | 3606 | return -1; |
| 4267 | } | 3607 | } |
| 4268 | #endif /* emacs */ | ||
| 4269 | 3608 | ||
| 4270 | /* Update the fastmap now if not correct already. */ | 3609 | /* Update the fastmap now if not correct already. */ |
| 4271 | if (fastmap && !bufp->fastmap_accurate) | 3610 | if (fastmap && !bufp->fastmap_accurate) |
| @@ -4274,14 +3613,12 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1, | |||
| 4274 | /* See whether the pattern is anchored. */ | 3613 | /* See whether the pattern is anchored. */ |
| 4275 | anchored_start = (bufp->buffer[0] == begline); | 3614 | anchored_start = (bufp->buffer[0] == begline); |
| 4276 | 3615 | ||
| 4277 | #ifdef emacs | ||
| 4278 | gl_state.object = re_match_object; /* Used by SYNTAX_TABLE_BYTE_TO_CHAR. */ | 3616 | gl_state.object = re_match_object; /* Used by SYNTAX_TABLE_BYTE_TO_CHAR. */ |
| 4279 | { | 3617 | { |
| 4280 | ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (startpos)); | 3618 | ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (startpos)); |
| 4281 | 3619 | ||
| 4282 | SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1); | 3620 | SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1); |
| 4283 | } | 3621 | } |
| 4284 | #endif | ||
| 4285 | 3622 | ||
| 4286 | /* Loop through the string, looking for a place to start matching. */ | 3623 | /* Loop through the string, looking for a place to start matching. */ |
| 4287 | for (;;) | 3624 | for (;;) |
| @@ -4304,14 +3641,14 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1, | |||
| 4304 | the first null string. */ | 3641 | the first null string. */ |
| 4305 | if (fastmap && startpos < total_size && !bufp->can_be_null) | 3642 | if (fastmap && startpos < total_size && !bufp->can_be_null) |
| 4306 | { | 3643 | { |
| 4307 | register re_char *d; | 3644 | re_char *d; |
| 4308 | register re_wchar_t buf_ch; | 3645 | int buf_ch; |
| 4309 | 3646 | ||
| 4310 | d = POS_ADDR_VSTRING (startpos); | 3647 | d = POS_ADDR_VSTRING (startpos); |
| 4311 | 3648 | ||
| 4312 | if (range > 0) /* Searching forwards. */ | 3649 | if (range > 0) /* Searching forwards. */ |
| 4313 | { | 3650 | { |
| 4314 | ssize_t irange = range, lim = 0; | 3651 | ptrdiff_t irange = range, lim = 0; |
| 4315 | 3652 | ||
| 4316 | if (startpos < size1 && startpos + range >= size1) | 3653 | if (startpos < size1 && startpos + range >= size1) |
| 4317 | lim = range - (size1 - startpos); | 3654 | lim = range - (size1 - startpos); |
| @@ -4336,11 +3673,9 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1, | |||
| 4336 | else | 3673 | else |
| 4337 | while (range > lim) | 3674 | while (range > lim) |
| 4338 | { | 3675 | { |
| 4339 | register re_wchar_t ch, translated; | ||
| 4340 | |||
| 4341 | buf_ch = *d; | 3676 | buf_ch = *d; |
| 4342 | ch = RE_CHAR_TO_MULTIBYTE (buf_ch); | 3677 | int ch = RE_CHAR_TO_MULTIBYTE (buf_ch); |
| 4343 | translated = RE_TRANSLATE (translate, ch); | 3678 | int translated = RE_TRANSLATE (translate, ch); |
| 4344 | if (translated != ch | 3679 | if (translated != ch |
| 4345 | && (ch = RE_CHAR_TO_UNIBYTE (translated)) >= 0) | 3680 | && (ch = RE_CHAR_TO_UNIBYTE (translated)) >= 0) |
| 4346 | buf_ch = ch; | 3681 | buf_ch = ch; |
| @@ -4383,11 +3718,9 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1, | |||
| 4383 | } | 3718 | } |
| 4384 | else | 3719 | else |
| 4385 | { | 3720 | { |
| 4386 | register re_wchar_t ch, translated; | ||
| 4387 | |||
| 4388 | buf_ch = *d; | 3721 | buf_ch = *d; |
| 4389 | ch = RE_CHAR_TO_MULTIBYTE (buf_ch); | 3722 | int ch = RE_CHAR_TO_MULTIBYTE (buf_ch); |
| 4390 | translated = TRANSLATE (ch); | 3723 | int translated = TRANSLATE (ch); |
| 4391 | if (translated != ch | 3724 | if (translated != ch |
| 4392 | && (ch = RE_CHAR_TO_UNIBYTE (translated)) >= 0) | 3725 | && (ch = RE_CHAR_TO_UNIBYTE (translated)) >= 0) |
| 4393 | buf_ch = ch; | 3726 | buf_ch = ch; |
| @@ -4457,13 +3790,12 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1, | |||
| 4457 | } | 3790 | } |
| 4458 | return -1; | 3791 | return -1; |
| 4459 | } /* re_search_2 */ | 3792 | } /* re_search_2 */ |
| 4460 | WEAK_ALIAS (__re_search_2, re_search_2) | ||
| 4461 | 3793 | ||
| 4462 | /* Declarations and macros for re_match_2. */ | 3794 | /* Declarations and macros for re_match_2. */ |
| 4463 | 3795 | ||
| 4464 | static int bcmp_translate (re_char *s1, re_char *s2, | 3796 | static int bcmp_translate (re_char *s1, re_char *s2, |
| 4465 | register ssize_t len, | 3797 | ptrdiff_t len, |
| 4466 | RE_TRANSLATE_TYPE translate, | 3798 | Lisp_Object translate, |
| 4467 | const int multibyte); | 3799 | const int multibyte); |
| 4468 | 3800 | ||
| 4469 | /* This converts PTR, a pointer into one of the search strings `string1' | 3801 | /* This converts PTR, a pointer into one of the search strings `string1' |
| @@ -4531,29 +3863,6 @@ static int bcmp_translate (re_char *s1, re_char *s2, | |||
| 4531 | || WORDCHAR_P (d - 1) != WORDCHAR_P (d)) | 3863 | || WORDCHAR_P (d - 1) != WORDCHAR_P (d)) |
| 4532 | #endif | 3864 | #endif |
| 4533 | 3865 | ||
| 4534 | /* Free everything we malloc. */ | ||
| 4535 | #ifdef MATCH_MAY_ALLOCATE | ||
| 4536 | # define FREE_VAR(var) \ | ||
| 4537 | do { \ | ||
| 4538 | if (var) \ | ||
| 4539 | { \ | ||
| 4540 | REGEX_FREE (var); \ | ||
| 4541 | var = NULL; \ | ||
| 4542 | } \ | ||
| 4543 | } while (0) | ||
| 4544 | # define FREE_VARIABLES() \ | ||
| 4545 | do { \ | ||
| 4546 | REGEX_FREE_STACK (fail_stack.stack); \ | ||
| 4547 | FREE_VAR (regstart); \ | ||
| 4548 | FREE_VAR (regend); \ | ||
| 4549 | FREE_VAR (best_regstart); \ | ||
| 4550 | FREE_VAR (best_regend); \ | ||
| 4551 | REGEX_SAFE_FREE (); \ | ||
| 4552 | } while (0) | ||
| 4553 | #else | ||
| 4554 | # define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */ | ||
| 4555 | #endif /* not MATCH_MAY_ALLOCATE */ | ||
| 4556 | |||
| 4557 | 3866 | ||
| 4558 | /* Optimization routines. */ | 3867 | /* Optimization routines. */ |
| 4559 | 3868 | ||
| @@ -4586,10 +3895,8 @@ skip_one_char (re_char *p) | |||
| 4586 | 3895 | ||
| 4587 | case syntaxspec: | 3896 | case syntaxspec: |
| 4588 | case notsyntaxspec: | 3897 | case notsyntaxspec: |
| 4589 | #ifdef emacs | ||
| 4590 | case categoryspec: | 3898 | case categoryspec: |
| 4591 | case notcategoryspec: | 3899 | case notcategoryspec: |
| 4592 | #endif /* emacs */ | ||
| 4593 | p++; | 3900 | p++; |
| 4594 | break; | 3901 | break; |
| 4595 | 3902 | ||
| @@ -4623,7 +3930,7 @@ skip_noops (re_char *p, re_char *pend) | |||
| 4623 | return p; | 3930 | return p; |
| 4624 | } | 3931 | } |
| 4625 | } | 3932 | } |
| 4626 | assert (p == pend); | 3933 | eassert (p == pend); |
| 4627 | return p; | 3934 | return p; |
| 4628 | } | 3935 | } |
| 4629 | 3936 | ||
| @@ -4656,11 +3963,10 @@ execute_charset (re_char **pp, unsigned c, unsigned corig, bool unibyte) | |||
| 4656 | && p[2 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) | 3963 | && p[2 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) |
| 4657 | return !not; | 3964 | return !not; |
| 4658 | } | 3965 | } |
| 4659 | #ifdef emacs | ||
| 4660 | else if (rtp) | 3966 | else if (rtp) |
| 4661 | { | 3967 | { |
| 4662 | int class_bits = CHARSET_RANGE_TABLE_BITS (p); | 3968 | int class_bits = CHARSET_RANGE_TABLE_BITS (p); |
| 4663 | re_wchar_t range_start, range_end; | 3969 | int range_start, range_end; |
| 4664 | 3970 | ||
| 4665 | /* Sort tests by the most commonly used classes with some adjustment to which | 3971 | /* Sort tests by the most commonly used classes with some adjustment to which |
| 4666 | tests are easiest to perform. Take a look at comment in re_wctype_parse | 3972 | tests are easiest to perform. Take a look at comment in re_wctype_parse |
| @@ -4691,7 +3997,7 @@ execute_charset (re_char **pp, unsigned c, unsigned corig, bool unibyte) | |||
| 4691 | return !not; | 3997 | return !not; |
| 4692 | } | 3998 | } |
| 4693 | } | 3999 | } |
| 4694 | #endif /* emacs */ | 4000 | |
| 4695 | return not; | 4001 | return not; |
| 4696 | } | 4002 | } |
| 4697 | 4003 | ||
| @@ -4701,11 +4007,11 @@ mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1, | |||
| 4701 | re_char *p2) | 4007 | re_char *p2) |
| 4702 | { | 4008 | { |
| 4703 | re_opcode_t op2; | 4009 | re_opcode_t op2; |
| 4704 | const boolean multibyte = RE_MULTIBYTE_P (bufp); | 4010 | bool multibyte = RE_MULTIBYTE_P (bufp); |
| 4705 | unsigned char *pend = bufp->buffer + bufp->used; | 4011 | unsigned char *pend = bufp->buffer + bufp->used; |
| 4706 | 4012 | ||
| 4707 | assert (p1 >= bufp->buffer && p1 < pend | 4013 | eassert (p1 >= bufp->buffer && p1 < pend |
| 4708 | && p2 >= bufp->buffer && p2 <= pend); | 4014 | && p2 >= bufp->buffer && p2 <= pend); |
| 4709 | 4015 | ||
| 4710 | /* Skip over open/close-group commands. | 4016 | /* Skip over open/close-group commands. |
| 4711 | If what follows this loop is a ...+ construct, | 4017 | If what follows this loop is a ...+ construct, |
| @@ -4716,8 +4022,8 @@ mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1, | |||
| 4716 | is only used in the case where p1 is a simple match operator. */ | 4022 | is only used in the case where p1 is a simple match operator. */ |
| 4717 | /* p1 = skip_noops (p1, pend); */ | 4023 | /* p1 = skip_noops (p1, pend); */ |
| 4718 | 4024 | ||
| 4719 | assert (p1 >= bufp->buffer && p1 < pend | 4025 | eassert (p1 >= bufp->buffer && p1 < pend |
| 4720 | && p2 >= bufp->buffer && p2 <= pend); | 4026 | && p2 >= bufp->buffer && p2 <= pend); |
| 4721 | 4027 | ||
| 4722 | op2 = p2 == pend ? succeed : *p2; | 4028 | op2 = p2 == pend ? succeed : *p2; |
| 4723 | 4029 | ||
| @@ -4736,7 +4042,7 @@ mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1, | |||
| 4736 | case endline: | 4042 | case endline: |
| 4737 | case exactn: | 4043 | case exactn: |
| 4738 | { | 4044 | { |
| 4739 | register re_wchar_t c | 4045 | int c |
| 4740 | = (re_opcode_t) *p2 == endline ? '\n' | 4046 | = (re_opcode_t) *p2 == endline ? '\n' |
| 4741 | : RE_STRING_CHAR (p2 + 2, multibyte); | 4047 | : RE_STRING_CHAR (p2 + 2, multibyte); |
| 4742 | 4048 | ||
| @@ -4866,12 +4172,10 @@ mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1, | |||
| 4866 | || (re_opcode_t) *p1 == syntaxspec) | 4172 | || (re_opcode_t) *p1 == syntaxspec) |
| 4867 | && p1[1] == Sword); | 4173 | && p1[1] == Sword); |
| 4868 | 4174 | ||
| 4869 | #ifdef emacs | ||
| 4870 | case categoryspec: | 4175 | case categoryspec: |
| 4871 | return ((re_opcode_t) *p1 == notcategoryspec && p1[1] == p2[1]); | 4176 | return ((re_opcode_t) *p1 == notcategoryspec && p1[1] == p2[1]); |
| 4872 | case notcategoryspec: | 4177 | case notcategoryspec: |
| 4873 | return ((re_opcode_t) *p1 == categoryspec && p1[1] == p2[1]); | 4178 | return ((re_opcode_t) *p1 == categoryspec && p1[1] == p2[1]); |
| 4874 | #endif /* emacs */ | ||
| 4875 | 4179 | ||
| 4876 | default: | 4180 | default: |
| 4877 | ; | 4181 | ; |
| @@ -4884,20 +4188,6 @@ mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1, | |||
| 4884 | 4188 | ||
| 4885 | /* Matching routines. */ | 4189 | /* Matching routines. */ |
| 4886 | 4190 | ||
| 4887 | #ifndef emacs /* Emacs never uses this. */ | ||
| 4888 | /* re_match is like re_match_2 except it takes only a single string. */ | ||
| 4889 | |||
| 4890 | regoff_t | ||
| 4891 | re_match (struct re_pattern_buffer *bufp, const char *string, | ||
| 4892 | size_t size, ssize_t pos, struct re_registers *regs) | ||
| 4893 | { | ||
| 4894 | regoff_t result = re_match_2_internal (bufp, NULL, 0, (re_char *) string, | ||
| 4895 | size, pos, regs, size); | ||
| 4896 | return result; | ||
| 4897 | } | ||
| 4898 | WEAK_ALIAS (__re_match, re_match) | ||
| 4899 | #endif /* not emacs */ | ||
| 4900 | |||
| 4901 | /* re_match_2 matches the compiled pattern in BUFP against the | 4191 | /* re_match_2 matches the compiled pattern in BUFP against the |
| 4902 | the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 | 4192 | the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 |
| 4903 | and SIZE2, respectively). We start matching at POS, and stop | 4193 | and SIZE2, respectively). We start matching at POS, and stop |
| @@ -4911,34 +4201,31 @@ WEAK_ALIAS (__re_match, re_match) | |||
| 4911 | failure stack overflowing). Otherwise, we return the length of the | 4201 | failure stack overflowing). Otherwise, we return the length of the |
| 4912 | matched substring. */ | 4202 | matched substring. */ |
| 4913 | 4203 | ||
| 4914 | regoff_t | 4204 | ptrdiff_t |
| 4915 | re_match_2 (struct re_pattern_buffer *bufp, const char *string1, | 4205 | re_match_2 (struct re_pattern_buffer *bufp, const char *string1, |
| 4916 | size_t size1, const char *string2, size_t size2, ssize_t pos, | 4206 | size_t size1, const char *string2, size_t size2, ptrdiff_t pos, |
| 4917 | struct re_registers *regs, ssize_t stop) | 4207 | struct re_registers *regs, ptrdiff_t stop) |
| 4918 | { | 4208 | { |
| 4919 | regoff_t result; | 4209 | ptrdiff_t result; |
| 4920 | 4210 | ||
| 4921 | #ifdef emacs | 4211 | ptrdiff_t charpos; |
| 4922 | ssize_t charpos; | ||
| 4923 | gl_state.object = re_match_object; /* Used by SYNTAX_TABLE_BYTE_TO_CHAR. */ | 4212 | gl_state.object = re_match_object; /* Used by SYNTAX_TABLE_BYTE_TO_CHAR. */ |
| 4924 | charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (pos)); | 4213 | charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (pos)); |
| 4925 | SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1); | 4214 | SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1); |
| 4926 | #endif | ||
| 4927 | 4215 | ||
| 4928 | result = re_match_2_internal (bufp, (re_char *) string1, size1, | 4216 | result = re_match_2_internal (bufp, (re_char *) string1, size1, |
| 4929 | (re_char *) string2, size2, | 4217 | (re_char *) string2, size2, |
| 4930 | pos, regs, stop); | 4218 | pos, regs, stop); |
| 4931 | return result; | 4219 | return result; |
| 4932 | } | 4220 | } |
| 4933 | WEAK_ALIAS (__re_match_2, re_match_2) | ||
| 4934 | 4221 | ||
| 4935 | 4222 | ||
| 4936 | /* This is a separate function so that we can force an alloca cleanup | 4223 | /* This is a separate function so that we can force an alloca cleanup |
| 4937 | afterwards. */ | 4224 | afterwards. */ |
| 4938 | static regoff_t | 4225 | static ptrdiff_t |
| 4939 | re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | 4226 | re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, |
| 4940 | size_t size1, re_char *string2, size_t size2, | 4227 | size_t size1, re_char *string2, size_t size2, |
| 4941 | ssize_t pos, struct re_registers *regs, ssize_t stop) | 4228 | ptrdiff_t pos, struct re_registers *regs, ptrdiff_t stop) |
| 4942 | { | 4229 | { |
| 4943 | /* General temporaries. */ | 4230 | /* General temporaries. */ |
| 4944 | int mcnt; | 4231 | int mcnt; |
| @@ -4965,13 +4252,13 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 4965 | re_char *pend = p + bufp->used; | 4252 | re_char *pend = p + bufp->used; |
| 4966 | 4253 | ||
| 4967 | /* We use this to map every character in the string. */ | 4254 | /* We use this to map every character in the string. */ |
| 4968 | RE_TRANSLATE_TYPE translate = bufp->translate; | 4255 | Lisp_Object translate = bufp->translate; |
| 4969 | 4256 | ||
| 4970 | /* Nonzero if BUFP is setup from a multibyte regex. */ | 4257 | /* True if BUFP is setup from a multibyte regex. */ |
| 4971 | const boolean multibyte = RE_MULTIBYTE_P (bufp); | 4258 | bool multibyte = RE_MULTIBYTE_P (bufp); |
| 4972 | 4259 | ||
| 4973 | /* Nonzero if STRING1/STRING2 are multibyte. */ | 4260 | /* True if STRING1/STRING2 are multibyte. */ |
| 4974 | const boolean target_multibyte = RE_TARGET_MULTIBYTE_P (bufp); | 4261 | bool target_multibyte = RE_TARGET_MULTIBYTE_P (bufp); |
| 4975 | 4262 | ||
| 4976 | /* Failure point stack. Each place that can handle a failure further | 4263 | /* Failure point stack. Each place that can handle a failure further |
| 4977 | down the line pushes a failure point on this stack. It consists of | 4264 | down the line pushes a failure point on this stack. It consists of |
| @@ -4980,19 +4267,11 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 4980 | registers, and, finally, two char *'s. The first char * is where | 4267 | registers, and, finally, two char *'s. The first char * is where |
| 4981 | to resume scanning the pattern; the second one is where to resume | 4268 | to resume scanning the pattern; the second one is where to resume |
| 4982 | scanning the strings. */ | 4269 | scanning the strings. */ |
| 4983 | #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ | ||
| 4984 | fail_stack_type fail_stack; | 4270 | fail_stack_type fail_stack; |
| 4985 | #endif | ||
| 4986 | #ifdef DEBUG_COMPILES_ARGUMENTS | 4271 | #ifdef DEBUG_COMPILES_ARGUMENTS |
| 4987 | unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0; | 4272 | unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0; |
| 4988 | #endif | 4273 | #endif |
| 4989 | 4274 | ||
| 4990 | #if defined REL_ALLOC && defined REGEX_MALLOC | ||
| 4991 | /* This holds the pointer to the failure stack, when | ||
| 4992 | it is allocated relocatably. */ | ||
| 4993 | fail_stack_elt_t *failure_stack_ptr; | ||
| 4994 | #endif | ||
| 4995 | |||
| 4996 | /* We fill all the registers internally, independent of what we | 4275 | /* We fill all the registers internally, independent of what we |
| 4997 | return, for use in backreferences. The number here includes | 4276 | return, for use in backreferences. The number here includes |
| 4998 | an element for register zero. */ | 4277 | an element for register zero. */ |
| @@ -5005,18 +4284,14 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 5005 | matching and the regnum-th regend points to right after where we | 4284 | matching and the regnum-th regend points to right after where we |
| 5006 | stopped matching the regnum-th subexpression. (The zeroth register | 4285 | stopped matching the regnum-th subexpression. (The zeroth register |
| 5007 | keeps track of what the whole pattern matches.) */ | 4286 | keeps track of what the whole pattern matches.) */ |
| 5008 | #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ | 4287 | re_char **regstart UNINIT, **regend UNINIT; |
| 5009 | re_char **regstart, **regend; | ||
| 5010 | #endif | ||
| 5011 | 4288 | ||
| 5012 | /* The following record the register info as found in the above | 4289 | /* The following record the register info as found in the above |
| 5013 | variables when we find a match better than any we've seen before. | 4290 | variables when we find a match better than any we've seen before. |
| 5014 | This happens as we backtrack through the failure points, which in | 4291 | This happens as we backtrack through the failure points, which in |
| 5015 | turn happens only if we have not yet matched the entire string. */ | 4292 | turn happens only if we have not yet matched the entire string. */ |
| 5016 | unsigned best_regs_set = false; | 4293 | unsigned best_regs_set = false; |
| 5017 | #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ | 4294 | re_char **best_regstart UNINIT, **best_regend UNINIT; |
| 5018 | re_char **best_regstart, **best_regend; | ||
| 5019 | #endif | ||
| 5020 | 4295 | ||
| 5021 | /* Logically, this is `best_regend[0]'. But we don't want to have to | 4296 | /* Logically, this is `best_regend[0]'. But we don't want to have to |
| 5022 | allocate space for that if we're not allocating space for anything | 4297 | allocate space for that if we're not allocating space for anything |
| @@ -5039,7 +4314,6 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 5039 | 4314 | ||
| 5040 | INIT_FAIL_STACK (); | 4315 | INIT_FAIL_STACK (); |
| 5041 | 4316 | ||
| 5042 | #ifdef MATCH_MAY_ALLOCATE | ||
| 5043 | /* Do not bother to initialize all the register variables if there are | 4317 | /* Do not bother to initialize all the register variables if there are |
| 5044 | no groups in the pattern, as it takes a fair amount of time. If | 4318 | no groups in the pattern, as it takes a fair amount of time. If |
| 5045 | there are groups, we include space for register 0 (the whole | 4319 | there are groups, we include space for register 0 (the whole |
| @@ -5047,29 +4321,16 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 5047 | array indexing. We should fix this. */ | 4321 | array indexing. We should fix this. */ |
| 5048 | if (bufp->re_nsub) | 4322 | if (bufp->re_nsub) |
| 5049 | { | 4323 | { |
| 5050 | regstart = REGEX_TALLOC (num_regs, re_char *); | 4324 | regstart = SAFE_ALLOCA (num_regs * 4 * sizeof *regstart); |
| 5051 | regend = REGEX_TALLOC (num_regs, re_char *); | 4325 | regend = regstart + num_regs; |
| 5052 | best_regstart = REGEX_TALLOC (num_regs, re_char *); | 4326 | best_regstart = regend + num_regs; |
| 5053 | best_regend = REGEX_TALLOC (num_regs, re_char *); | 4327 | best_regend = best_regstart + num_regs; |
| 5054 | |||
| 5055 | if (!(regstart && regend && best_regstart && best_regend)) | ||
| 5056 | { | ||
| 5057 | FREE_VARIABLES (); | ||
| 5058 | return -2; | ||
| 5059 | } | ||
| 5060 | } | 4328 | } |
| 5061 | else | ||
| 5062 | { | ||
| 5063 | /* We must initialize all our variables to NULL, so that | ||
| 5064 | `FREE_VARIABLES' doesn't try to free them. */ | ||
| 5065 | regstart = regend = best_regstart = best_regend = NULL; | ||
| 5066 | } | ||
| 5067 | #endif /* MATCH_MAY_ALLOCATE */ | ||
| 5068 | 4329 | ||
| 5069 | /* The starting position is bogus. */ | 4330 | /* The starting position is bogus. */ |
| 5070 | if (pos < 0 || pos > size1 + size2) | 4331 | if (pos < 0 || pos > size1 + size2) |
| 5071 | { | 4332 | { |
| 5072 | FREE_VARIABLES (); | 4333 | SAFE_FREE (); |
| 5073 | return -1; | 4334 | return -1; |
| 5074 | } | 4335 | } |
| 5075 | 4336 | ||
| @@ -5229,13 +4490,8 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 5229 | extra element beyond `num_regs' for the `-1' marker | 4490 | extra element beyond `num_regs' for the `-1' marker |
| 5230 | GNU code uses. */ | 4491 | GNU code uses. */ |
| 5231 | regs->num_regs = max (RE_NREGS, num_regs + 1); | 4492 | regs->num_regs = max (RE_NREGS, num_regs + 1); |
| 5232 | regs->start = TALLOC (regs->num_regs, regoff_t); | 4493 | regs->start = TALLOC (regs->num_regs, ptrdiff_t); |
| 5233 | regs->end = TALLOC (regs->num_regs, regoff_t); | 4494 | regs->end = TALLOC (regs->num_regs, ptrdiff_t); |
| 5234 | if (regs->start == NULL || regs->end == NULL) | ||
| 5235 | { | ||
| 5236 | FREE_VARIABLES (); | ||
| 5237 | return -2; | ||
| 5238 | } | ||
| 5239 | bufp->regs_allocated = REGS_REALLOCATE; | 4495 | bufp->regs_allocated = REGS_REALLOCATE; |
| 5240 | } | 4496 | } |
| 5241 | else if (bufp->regs_allocated == REGS_REALLOCATE) | 4497 | else if (bufp->regs_allocated == REGS_REALLOCATE) |
| @@ -5245,21 +4501,12 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 5245 | if (regs->num_regs < num_regs + 1) | 4501 | if (regs->num_regs < num_regs + 1) |
| 5246 | { | 4502 | { |
| 5247 | regs->num_regs = num_regs + 1; | 4503 | regs->num_regs = num_regs + 1; |
| 5248 | RETALLOC (regs->start, regs->num_regs, regoff_t); | 4504 | RETALLOC (regs->start, regs->num_regs, ptrdiff_t); |
| 5249 | RETALLOC (regs->end, regs->num_regs, regoff_t); | 4505 | RETALLOC (regs->end, regs->num_regs, ptrdiff_t); |
| 5250 | if (regs->start == NULL || regs->end == NULL) | ||
| 5251 | { | ||
| 5252 | FREE_VARIABLES (); | ||
| 5253 | return -2; | ||
| 5254 | } | ||
| 5255 | } | 4506 | } |
| 5256 | } | 4507 | } |
| 5257 | else | 4508 | else |
| 5258 | { | 4509 | eassert (bufp->regs_allocated == REGS_FIXED); |
| 5259 | /* These braces fend off a "empty body in an else-statement" | ||
| 5260 | warning under GCC when assert expands to nothing. */ | ||
| 5261 | assert (bufp->regs_allocated == REGS_FIXED); | ||
| 5262 | } | ||
| 5263 | 4510 | ||
| 5264 | /* Convert the pointer data in `regstart' and `regend' to | 4511 | /* Convert the pointer data in `regstart' and `regend' to |
| 5265 | indices. Register zero has to be set differently, | 4512 | indices. Register zero has to be set differently, |
| @@ -5301,7 +4548,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 5301 | 4548 | ||
| 5302 | DEBUG_PRINT ("Returning %td from re_match_2.\n", dcnt); | 4549 | DEBUG_PRINT ("Returning %td from re_match_2.\n", dcnt); |
| 5303 | 4550 | ||
| 5304 | FREE_VARIABLES (); | 4551 | SAFE_FREE (); |
| 5305 | return dcnt; | 4552 | return dcnt; |
| 5306 | } | 4553 | } |
| 5307 | 4554 | ||
| @@ -5328,33 +4575,6 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 5328 | /* Remember the start point to rollback upon failure. */ | 4575 | /* Remember the start point to rollback upon failure. */ |
| 5329 | dfail = d; | 4576 | dfail = d; |
| 5330 | 4577 | ||
| 5331 | #ifndef emacs | ||
| 5332 | /* This is written out as an if-else so we don't waste time | ||
| 5333 | testing `translate' inside the loop. */ | ||
| 5334 | if (RE_TRANSLATE_P (translate)) | ||
| 5335 | do | ||
| 5336 | { | ||
| 5337 | PREFETCH (); | ||
| 5338 | if (RE_TRANSLATE (translate, *d) != *p++) | ||
| 5339 | { | ||
| 5340 | d = dfail; | ||
| 5341 | goto fail; | ||
| 5342 | } | ||
| 5343 | d++; | ||
| 5344 | } | ||
| 5345 | while (--mcnt); | ||
| 5346 | else | ||
| 5347 | do | ||
| 5348 | { | ||
| 5349 | PREFETCH (); | ||
| 5350 | if (*d++ != *p++) | ||
| 5351 | { | ||
| 5352 | d = dfail; | ||
| 5353 | goto fail; | ||
| 5354 | } | ||
| 5355 | } | ||
| 5356 | while (--mcnt); | ||
| 5357 | #else /* emacs */ | ||
| 5358 | /* The cost of testing `translate' is comparatively small. */ | 4578 | /* The cost of testing `translate' is comparatively small. */ |
| 5359 | if (target_multibyte) | 4579 | if (target_multibyte) |
| 5360 | do | 4580 | do |
| @@ -5419,7 +4639,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 5419 | d++; | 4639 | d++; |
| 5420 | } | 4640 | } |
| 5421 | while (--mcnt); | 4641 | while (--mcnt); |
| 5422 | #endif | 4642 | |
| 5423 | break; | 4643 | break; |
| 5424 | 4644 | ||
| 5425 | 4645 | ||
| @@ -5427,7 +4647,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 5427 | case anychar: | 4647 | case anychar: |
| 5428 | { | 4648 | { |
| 5429 | int buf_charlen; | 4649 | int buf_charlen; |
| 5430 | re_wchar_t buf_ch; | 4650 | int buf_ch; |
| 5431 | reg_syntax_t syntax; | 4651 | reg_syntax_t syntax; |
| 5432 | 4652 | ||
| 5433 | DEBUG_PRINT ("EXECUTING anychar.\n"); | 4653 | DEBUG_PRINT ("EXECUTING anychar.\n"); |
| @@ -5437,11 +4657,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 5437 | target_multibyte); | 4657 | target_multibyte); |
| 5438 | buf_ch = TRANSLATE (buf_ch); | 4658 | buf_ch = TRANSLATE (buf_ch); |
| 5439 | 4659 | ||
| 5440 | #ifdef emacs | ||
| 5441 | syntax = RE_SYNTAX_EMACS; | 4660 | syntax = RE_SYNTAX_EMACS; |
| 5442 | #else | ||
| 5443 | syntax = bufp->syntax; | ||
| 5444 | #endif | ||
| 5445 | 4661 | ||
| 5446 | if ((!(syntax & RE_DOT_NEWLINE) && buf_ch == '\n') | 4662 | if ((!(syntax & RE_DOT_NEWLINE) && buf_ch == '\n') |
| 5447 | || ((syntax & RE_DOT_NOT_NULL) && buf_ch == '\000')) | 4663 | || ((syntax & RE_DOT_NOT_NULL) && buf_ch == '\000')) |
| @@ -5460,7 +4676,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 5460 | int len; | 4676 | int len; |
| 5461 | 4677 | ||
| 5462 | /* Whether matching against a unibyte character. */ | 4678 | /* Whether matching against a unibyte character. */ |
| 5463 | boolean unibyte_char = false; | 4679 | bool unibyte_char = false; |
| 5464 | 4680 | ||
| 5465 | DEBUG_PRINT ("EXECUTING charset%s.\n", | 4681 | DEBUG_PRINT ("EXECUTING charset%s.\n", |
| 5466 | (re_opcode_t) *(p - 1) == charset_not ? "_not" : ""); | 4682 | (re_opcode_t) *(p - 1) == charset_not ? "_not" : ""); |
| @@ -5530,10 +4746,10 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 5530 | case stop_memory: | 4746 | case stop_memory: |
| 5531 | DEBUG_PRINT ("EXECUTING stop_memory %d:\n", *p); | 4747 | DEBUG_PRINT ("EXECUTING stop_memory %d:\n", *p); |
| 5532 | 4748 | ||
| 5533 | assert (!REG_UNSET (regstart[*p])); | 4749 | eassert (!REG_UNSET (regstart[*p])); |
| 5534 | /* Strictly speaking, there should be code such as: | 4750 | /* Strictly speaking, there should be code such as: |
| 5535 | 4751 | ||
| 5536 | assert (REG_UNSET (regend[*p])); | 4752 | eassert (REG_UNSET (regend[*p])); |
| 5537 | PUSH_FAILURE_REGSTOP ((unsigned int)*p); | 4753 | PUSH_FAILURE_REGSTOP ((unsigned int)*p); |
| 5538 | 4754 | ||
| 5539 | But the only info to be pushed is regend[*p] and it is known to | 4755 | But the only info to be pushed is regend[*p] and it is known to |
| @@ -5557,7 +4773,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 5557 | followed by the numeric value of <digit> as the register number. */ | 4773 | followed by the numeric value of <digit> as the register number. */ |
| 5558 | case duplicate: | 4774 | case duplicate: |
| 5559 | { | 4775 | { |
| 5560 | register re_char *d2, *dend2; | 4776 | re_char *d2, *dend2; |
| 5561 | int regno = *p++; /* Get which register to match against. */ | 4777 | int regno = *p++; /* Get which register to match against. */ |
| 5562 | DEBUG_PRINT ("EXECUTING duplicate %d.\n", regno); | 4778 | DEBUG_PRINT ("EXECUTING duplicate %d.\n", regno); |
| 5563 | 4779 | ||
| @@ -5719,7 +4935,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 5719 | DEBUG_PRINT ("EXECUTING on_failure_jump_nastyloop %d (to %p):\n", | 4935 | DEBUG_PRINT ("EXECUTING on_failure_jump_nastyloop %d (to %p):\n", |
| 5720 | mcnt, p + mcnt); | 4936 | mcnt, p + mcnt); |
| 5721 | 4937 | ||
| 5722 | assert ((re_opcode_t)p[-4] == no_op); | 4938 | eassert ((re_opcode_t)p[-4] == no_op); |
| 5723 | { | 4939 | { |
| 5724 | int cycle = 0; | 4940 | int cycle = 0; |
| 5725 | CHECK_INFINITE_LOOP (p - 4, d); | 4941 | CHECK_INFINITE_LOOP (p - 4, d); |
| @@ -5788,7 +5004,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 5788 | mcnt, p + mcnt); | 5004 | mcnt, p + mcnt); |
| 5789 | { | 5005 | { |
| 5790 | re_char *p1 = p; /* Next operation. */ | 5006 | re_char *p1 = p; /* Next operation. */ |
| 5791 | /* Here, we discard `const', making re_match non-reentrant. */ | 5007 | /* Discard 'const', making re_search non-reentrant. */ |
| 5792 | unsigned char *p2 = (unsigned char *) p + mcnt; /* Jump dest. */ | 5008 | unsigned char *p2 = (unsigned char *) p + mcnt; /* Jump dest. */ |
| 5793 | unsigned char *p3 = (unsigned char *) p - 3; /* opcode location. */ | 5009 | unsigned char *p3 = (unsigned char *) p - 3; /* opcode location. */ |
| 5794 | 5010 | ||
| @@ -5799,9 +5015,9 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 5799 | 5015 | ||
| 5800 | /* Ensure this is indeed the trivial kind of loop | 5016 | /* Ensure this is indeed the trivial kind of loop |
| 5801 | we are expecting. */ | 5017 | we are expecting. */ |
| 5802 | assert (skip_one_char (p1) == p2 - 3); | 5018 | eassert (skip_one_char (p1) == p2 - 3); |
| 5803 | assert ((re_opcode_t) p2[-3] == jump && p2 + mcnt == p); | 5019 | eassert ((re_opcode_t) p2[-3] == jump && p2 + mcnt == p); |
| 5804 | DEBUG_STATEMENT (debug += 2); | 5020 | DEBUG_STATEMENT (regex_emacs_debug += 2); |
| 5805 | if (mutually_exclusive_p (bufp, p1, p2)) | 5021 | if (mutually_exclusive_p (bufp, p1, p2)) |
| 5806 | { | 5022 | { |
| 5807 | /* Use a fast `on_failure_keep_string_jump' loop. */ | 5023 | /* Use a fast `on_failure_keep_string_jump' loop. */ |
| @@ -5815,7 +5031,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 5815 | DEBUG_PRINT (" smart default => slow loop.\n"); | 5031 | DEBUG_PRINT (" smart default => slow loop.\n"); |
| 5816 | *p3 = (unsigned char) on_failure_jump; | 5032 | *p3 = (unsigned char) on_failure_jump; |
| 5817 | } | 5033 | } |
| 5818 | DEBUG_STATEMENT (debug -= 2); | 5034 | DEBUG_STATEMENT (regex_emacs_debug -= 2); |
| 5819 | } | 5035 | } |
| 5820 | break; | 5036 | break; |
| 5821 | 5037 | ||
| @@ -5840,7 +5056,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 5840 | /* Originally, mcnt is how many times we HAVE to succeed. */ | 5056 | /* Originally, mcnt is how many times we HAVE to succeed. */ |
| 5841 | if (mcnt != 0) | 5057 | if (mcnt != 0) |
| 5842 | { | 5058 | { |
| 5843 | /* Here, we discard `const', making re_match non-reentrant. */ | 5059 | /* Discard 'const', making re_search non-reentrant. */ |
| 5844 | unsigned char *p2 = (unsigned char *) p + 2; /* counter loc. */ | 5060 | unsigned char *p2 = (unsigned char *) p + 2; /* counter loc. */ |
| 5845 | mcnt--; | 5061 | mcnt--; |
| 5846 | p += 4; | 5062 | p += 4; |
| @@ -5859,7 +5075,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 5859 | /* Originally, this is how many times we CAN jump. */ | 5075 | /* Originally, this is how many times we CAN jump. */ |
| 5860 | if (mcnt != 0) | 5076 | if (mcnt != 0) |
| 5861 | { | 5077 | { |
| 5862 | /* Here, we discard `const', making re_match non-reentrant. */ | 5078 | /* Discard 'const', making re_search non-reentrant. */ |
| 5863 | unsigned char *p2 = (unsigned char *) p + 2; /* counter loc. */ | 5079 | unsigned char *p2 = (unsigned char *) p + 2; /* counter loc. */ |
| 5864 | mcnt--; | 5080 | mcnt--; |
| 5865 | PUSH_NUMBER (p2, mcnt); | 5081 | PUSH_NUMBER (p2, mcnt); |
| @@ -5876,7 +5092,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 5876 | DEBUG_PRINT ("EXECUTING set_number_at.\n"); | 5092 | DEBUG_PRINT ("EXECUTING set_number_at.\n"); |
| 5877 | 5093 | ||
| 5878 | EXTRACT_NUMBER_AND_INCR (mcnt, p); | 5094 | EXTRACT_NUMBER_AND_INCR (mcnt, p); |
| 5879 | /* Here, we discard `const', making re_match non-reentrant. */ | 5095 | /* Discard 'const', making re_search non-reentrant. */ |
| 5880 | p2 = (unsigned char *) p + mcnt; | 5096 | p2 = (unsigned char *) p + mcnt; |
| 5881 | /* Signedness doesn't matter since we only copy MCNT's bits. */ | 5097 | /* Signedness doesn't matter since we only copy MCNT's bits. */ |
| 5882 | EXTRACT_NUMBER_AND_INCR (mcnt, p); | 5098 | EXTRACT_NUMBER_AND_INCR (mcnt, p); |
| @@ -5888,7 +5104,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 5888 | case wordbound: | 5104 | case wordbound: |
| 5889 | case notwordbound: | 5105 | case notwordbound: |
| 5890 | { | 5106 | { |
| 5891 | boolean not = (re_opcode_t) *(p - 1) == notwordbound; | 5107 | bool not = (re_opcode_t) *(p - 1) == notwordbound; |
| 5892 | DEBUG_PRINT ("EXECUTING %swordbound.\n", not ? "not" : ""); | 5108 | DEBUG_PRINT ("EXECUTING %swordbound.\n", not ? "not" : ""); |
| 5893 | 5109 | ||
| 5894 | /* We SUCCEED (or FAIL) in one of the following cases: */ | 5110 | /* We SUCCEED (or FAIL) in one of the following cases: */ |
| @@ -5900,19 +5116,15 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 5900 | { | 5116 | { |
| 5901 | /* C1 is the character before D, S1 is the syntax of C1, C2 | 5117 | /* C1 is the character before D, S1 is the syntax of C1, C2 |
| 5902 | is the character at D, and S2 is the syntax of C2. */ | 5118 | is the character at D, and S2 is the syntax of C2. */ |
| 5903 | re_wchar_t c1, c2; | 5119 | int c1, c2; |
| 5904 | int s1, s2; | 5120 | int s1, s2; |
| 5905 | int dummy; | 5121 | int dummy; |
| 5906 | #ifdef emacs | 5122 | ptrdiff_t offset = PTR_TO_OFFSET (d - 1); |
| 5907 | ssize_t offset = PTR_TO_OFFSET (d - 1); | 5123 | ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); |
| 5908 | ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); | ||
| 5909 | UPDATE_SYNTAX_TABLE (charpos); | 5124 | UPDATE_SYNTAX_TABLE (charpos); |
| 5910 | #endif | ||
| 5911 | GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); | 5125 | GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); |
| 5912 | s1 = SYNTAX (c1); | 5126 | s1 = SYNTAX (c1); |
| 5913 | #ifdef emacs | ||
| 5914 | UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1); | 5127 | UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1); |
| 5915 | #endif | ||
| 5916 | PREFETCH_NOLIMIT (); | 5128 | PREFETCH_NOLIMIT (); |
| 5917 | GET_CHAR_AFTER (c2, d, dummy); | 5129 | GET_CHAR_AFTER (c2, d, dummy); |
| 5918 | s2 = SYNTAX (c2); | 5130 | s2 = SYNTAX (c2); |
| @@ -5942,14 +5154,12 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 5942 | { | 5154 | { |
| 5943 | /* C1 is the character before D, S1 is the syntax of C1, C2 | 5155 | /* C1 is the character before D, S1 is the syntax of C1, C2 |
| 5944 | is the character at D, and S2 is the syntax of C2. */ | 5156 | is the character at D, and S2 is the syntax of C2. */ |
| 5945 | re_wchar_t c1, c2; | 5157 | int c1, c2; |
| 5946 | int s1, s2; | 5158 | int s1, s2; |
| 5947 | int dummy; | 5159 | int dummy; |
| 5948 | #ifdef emacs | 5160 | ptrdiff_t offset = PTR_TO_OFFSET (d); |
| 5949 | ssize_t offset = PTR_TO_OFFSET (d); | 5161 | ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); |
| 5950 | ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); | ||
| 5951 | UPDATE_SYNTAX_TABLE (charpos); | 5162 | UPDATE_SYNTAX_TABLE (charpos); |
| 5952 | #endif | ||
| 5953 | PREFETCH (); | 5163 | PREFETCH (); |
| 5954 | GET_CHAR_AFTER (c2, d, dummy); | 5164 | GET_CHAR_AFTER (c2, d, dummy); |
| 5955 | s2 = SYNTAX (c2); | 5165 | s2 = SYNTAX (c2); |
| @@ -5962,9 +5172,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 5962 | if (!AT_STRINGS_BEG (d)) | 5172 | if (!AT_STRINGS_BEG (d)) |
| 5963 | { | 5173 | { |
| 5964 | GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); | 5174 | GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); |
| 5965 | #ifdef emacs | ||
| 5966 | UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1); | 5175 | UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1); |
| 5967 | #endif | ||
| 5968 | s1 = SYNTAX (c1); | 5176 | s1 = SYNTAX (c1); |
| 5969 | 5177 | ||
| 5970 | /* ... and S1 is Sword, and WORD_BOUNDARY_P (C1, C2) | 5178 | /* ... and S1 is Sword, and WORD_BOUNDARY_P (C1, C2) |
| @@ -5987,14 +5195,12 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 5987 | { | 5195 | { |
| 5988 | /* C1 is the character before D, S1 is the syntax of C1, C2 | 5196 | /* C1 is the character before D, S1 is the syntax of C1, C2 |
| 5989 | is the character at D, and S2 is the syntax of C2. */ | 5197 | is the character at D, and S2 is the syntax of C2. */ |
| 5990 | re_wchar_t c1, c2; | 5198 | int c1, c2; |
| 5991 | int s1, s2; | 5199 | int s1, s2; |
| 5992 | int dummy; | 5200 | int dummy; |
| 5993 | #ifdef emacs | 5201 | ptrdiff_t offset = PTR_TO_OFFSET (d) - 1; |
| 5994 | ssize_t offset = PTR_TO_OFFSET (d) - 1; | 5202 | ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); |
| 5995 | ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); | ||
| 5996 | UPDATE_SYNTAX_TABLE (charpos); | 5203 | UPDATE_SYNTAX_TABLE (charpos); |
| 5997 | #endif | ||
| 5998 | GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); | 5204 | GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); |
| 5999 | s1 = SYNTAX (c1); | 5205 | s1 = SYNTAX (c1); |
| 6000 | 5206 | ||
| @@ -6007,9 +5213,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 6007 | { | 5213 | { |
| 6008 | PREFETCH_NOLIMIT (); | 5214 | PREFETCH_NOLIMIT (); |
| 6009 | GET_CHAR_AFTER (c2, d, dummy); | 5215 | GET_CHAR_AFTER (c2, d, dummy); |
| 6010 | #ifdef emacs | ||
| 6011 | UPDATE_SYNTAX_TABLE_FORWARD (charpos); | 5216 | UPDATE_SYNTAX_TABLE_FORWARD (charpos); |
| 6012 | #endif | ||
| 6013 | s2 = SYNTAX (c2); | 5217 | s2 = SYNTAX (c2); |
| 6014 | 5218 | ||
| 6015 | /* ... and S2 is Sword, and WORD_BOUNDARY_P (C1, C2) | 5219 | /* ... and S2 is Sword, and WORD_BOUNDARY_P (C1, C2) |
| @@ -6032,13 +5236,11 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 6032 | { | 5236 | { |
| 6033 | /* C1 is the character before D, S1 is the syntax of C1, C2 | 5237 | /* C1 is the character before D, S1 is the syntax of C1, C2 |
| 6034 | is the character at D, and S2 is the syntax of C2. */ | 5238 | is the character at D, and S2 is the syntax of C2. */ |
| 6035 | re_wchar_t c1, c2; | 5239 | int c1, c2; |
| 6036 | int s1, s2; | 5240 | int s1, s2; |
| 6037 | #ifdef emacs | 5241 | ptrdiff_t offset = PTR_TO_OFFSET (d); |
| 6038 | ssize_t offset = PTR_TO_OFFSET (d); | 5242 | ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); |
| 6039 | ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); | ||
| 6040 | UPDATE_SYNTAX_TABLE (charpos); | 5243 | UPDATE_SYNTAX_TABLE (charpos); |
| 6041 | #endif | ||
| 6042 | PREFETCH (); | 5244 | PREFETCH (); |
| 6043 | c2 = RE_STRING_CHAR (d, target_multibyte); | 5245 | c2 = RE_STRING_CHAR (d, target_multibyte); |
| 6044 | s2 = SYNTAX (c2); | 5246 | s2 = SYNTAX (c2); |
| @@ -6051,9 +5253,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 6051 | if (!AT_STRINGS_BEG (d)) | 5253 | if (!AT_STRINGS_BEG (d)) |
| 6052 | { | 5254 | { |
| 6053 | GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); | 5255 | GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); |
| 6054 | #ifdef emacs | ||
| 6055 | UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1); | 5256 | UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1); |
| 6056 | #endif | ||
| 6057 | s1 = SYNTAX (c1); | 5257 | s1 = SYNTAX (c1); |
| 6058 | 5258 | ||
| 6059 | /* ... and S1 is Sword or Ssymbol. */ | 5259 | /* ... and S1 is Sword or Ssymbol. */ |
| @@ -6075,13 +5275,11 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 6075 | { | 5275 | { |
| 6076 | /* C1 is the character before D, S1 is the syntax of C1, C2 | 5276 | /* C1 is the character before D, S1 is the syntax of C1, C2 |
| 6077 | is the character at D, and S2 is the syntax of C2. */ | 5277 | is the character at D, and S2 is the syntax of C2. */ |
| 6078 | re_wchar_t c1, c2; | 5278 | int c1, c2; |
| 6079 | int s1, s2; | 5279 | int s1, s2; |
| 6080 | #ifdef emacs | 5280 | ptrdiff_t offset = PTR_TO_OFFSET (d) - 1; |
| 6081 | ssize_t offset = PTR_TO_OFFSET (d) - 1; | 5281 | ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); |
| 6082 | ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); | ||
| 6083 | UPDATE_SYNTAX_TABLE (charpos); | 5282 | UPDATE_SYNTAX_TABLE (charpos); |
| 6084 | #endif | ||
| 6085 | GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); | 5283 | GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); |
| 6086 | s1 = SYNTAX (c1); | 5284 | s1 = SYNTAX (c1); |
| 6087 | 5285 | ||
| @@ -6094,9 +5292,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 6094 | { | 5292 | { |
| 6095 | PREFETCH_NOLIMIT (); | 5293 | PREFETCH_NOLIMIT (); |
| 6096 | c2 = RE_STRING_CHAR (d, target_multibyte); | 5294 | c2 = RE_STRING_CHAR (d, target_multibyte); |
| 6097 | #ifdef emacs | ||
| 6098 | UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1); | 5295 | UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1); |
| 6099 | #endif | ||
| 6100 | s2 = SYNTAX (c2); | 5296 | s2 = SYNTAX (c2); |
| 6101 | 5297 | ||
| 6102 | /* ... and S2 is Sword or Ssymbol. */ | 5298 | /* ... and S2 is Sword or Ssymbol. */ |
| @@ -6109,21 +5305,19 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 6109 | case syntaxspec: | 5305 | case syntaxspec: |
| 6110 | case notsyntaxspec: | 5306 | case notsyntaxspec: |
| 6111 | { | 5307 | { |
| 6112 | boolean not = (re_opcode_t) *(p - 1) == notsyntaxspec; | 5308 | bool not = (re_opcode_t) *(p - 1) == notsyntaxspec; |
| 6113 | mcnt = *p++; | 5309 | mcnt = *p++; |
| 6114 | DEBUG_PRINT ("EXECUTING %ssyntaxspec %d.\n", not ? "not" : "", | 5310 | DEBUG_PRINT ("EXECUTING %ssyntaxspec %d.\n", not ? "not" : "", |
| 6115 | mcnt); | 5311 | mcnt); |
| 6116 | PREFETCH (); | 5312 | PREFETCH (); |
| 6117 | #ifdef emacs | ||
| 6118 | { | 5313 | { |
| 6119 | ssize_t offset = PTR_TO_OFFSET (d); | 5314 | ptrdiff_t offset = PTR_TO_OFFSET (d); |
| 6120 | ssize_t pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (offset); | 5315 | ptrdiff_t pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (offset); |
| 6121 | UPDATE_SYNTAX_TABLE (pos1); | 5316 | UPDATE_SYNTAX_TABLE (pos1); |
| 6122 | } | 5317 | } |
| 6123 | #endif | ||
| 6124 | { | 5318 | { |
| 6125 | int len; | 5319 | int len; |
| 6126 | re_wchar_t c; | 5320 | int c; |
| 6127 | 5321 | ||
| 6128 | GET_CHAR_AFTER (c, d, len); | 5322 | GET_CHAR_AFTER (c, d, len); |
| 6129 | if ((SYNTAX (c) != (enum syntaxcode) mcnt) ^ not) | 5323 | if ((SYNTAX (c) != (enum syntaxcode) mcnt) ^ not) |
| @@ -6133,7 +5327,6 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 6133 | } | 5327 | } |
| 6134 | break; | 5328 | break; |
| 6135 | 5329 | ||
| 6136 | #ifdef emacs | ||
| 6137 | case at_dot: | 5330 | case at_dot: |
| 6138 | DEBUG_PRINT ("EXECUTING at_dot.\n"); | 5331 | DEBUG_PRINT ("EXECUTING at_dot.\n"); |
| 6139 | if (PTR_BYTE_POS (d) != PT_BYTE) | 5332 | if (PTR_BYTE_POS (d) != PT_BYTE) |
| @@ -6143,7 +5336,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 6143 | case categoryspec: | 5336 | case categoryspec: |
| 6144 | case notcategoryspec: | 5337 | case notcategoryspec: |
| 6145 | { | 5338 | { |
| 6146 | boolean not = (re_opcode_t) *(p - 1) == notcategoryspec; | 5339 | bool not = (re_opcode_t) *(p - 1) == notcategoryspec; |
| 6147 | mcnt = *p++; | 5340 | mcnt = *p++; |
| 6148 | DEBUG_PRINT ("EXECUTING %scategoryspec %d.\n", | 5341 | DEBUG_PRINT ("EXECUTING %scategoryspec %d.\n", |
| 6149 | not ? "not" : "", mcnt); | 5342 | not ? "not" : "", mcnt); |
| @@ -6151,7 +5344,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 6151 | 5344 | ||
| 6152 | { | 5345 | { |
| 6153 | int len; | 5346 | int len; |
| 6154 | re_wchar_t c; | 5347 | int c; |
| 6155 | GET_CHAR_AFTER (c, d, len); | 5348 | GET_CHAR_AFTER (c, d, len); |
| 6156 | if ((!CHAR_HAS_CATEGORY (c, mcnt)) ^ not) | 5349 | if ((!CHAR_HAS_CATEGORY (c, mcnt)) ^ not) |
| 6157 | goto fail; | 5350 | goto fail; |
| @@ -6160,8 +5353,6 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 6160 | } | 5353 | } |
| 6161 | break; | 5354 | break; |
| 6162 | 5355 | ||
| 6163 | #endif /* emacs */ | ||
| 6164 | |||
| 6165 | default: | 5356 | default: |
| 6166 | abort (); | 5357 | abort (); |
| 6167 | } | 5358 | } |
| @@ -6180,11 +5371,11 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 6180 | switch (*pat++) | 5371 | switch (*pat++) |
| 6181 | { | 5372 | { |
| 6182 | case on_failure_keep_string_jump: | 5373 | case on_failure_keep_string_jump: |
| 6183 | assert (str == NULL); | 5374 | eassert (str == NULL); |
| 6184 | goto continue_failure_jump; | 5375 | goto continue_failure_jump; |
| 6185 | 5376 | ||
| 6186 | case on_failure_jump_nastyloop: | 5377 | case on_failure_jump_nastyloop: |
| 6187 | assert ((re_opcode_t)pat[-2] == no_op); | 5378 | eassert ((re_opcode_t)pat[-2] == no_op); |
| 6188 | PUSH_FAILURE_POINT (pat - 2, str); | 5379 | PUSH_FAILURE_POINT (pat - 2, str); |
| 6189 | FALLTHROUGH; | 5380 | FALLTHROUGH; |
| 6190 | case on_failure_jump_loop: | 5381 | case on_failure_jump_loop: |
| @@ -6204,7 +5395,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 6204 | abort (); | 5395 | abort (); |
| 6205 | } | 5396 | } |
| 6206 | 5397 | ||
| 6207 | assert (p >= bufp->buffer && p <= pend); | 5398 | eassert (p >= bufp->buffer && p <= pend); |
| 6208 | 5399 | ||
| 6209 | if (d >= string1 && d <= end1) | 5400 | if (d >= string1 && d <= end1) |
| 6210 | dend = end_match_1; | 5401 | dend = end_match_1; |
| @@ -6216,7 +5407,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 6216 | if (best_regs_set) | 5407 | if (best_regs_set) |
| 6217 | goto restore_best_regs; | 5408 | goto restore_best_regs; |
| 6218 | 5409 | ||
| 6219 | FREE_VARIABLES (); | 5410 | SAFE_FREE (); |
| 6220 | 5411 | ||
| 6221 | return -1; /* Failure to match. */ | 5412 | return -1; /* Failure to match. */ |
| 6222 | } | 5413 | } |
| @@ -6227,8 +5418,8 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, | |||
| 6227 | bytes; nonzero otherwise. */ | 5418 | bytes; nonzero otherwise. */ |
| 6228 | 5419 | ||
| 6229 | static int | 5420 | static int |
| 6230 | bcmp_translate (re_char *s1, re_char *s2, ssize_t len, | 5421 | bcmp_translate (re_char *s1, re_char *s2, ptrdiff_t len, |
| 6231 | RE_TRANSLATE_TYPE translate, const int target_multibyte) | 5422 | Lisp_Object translate, int target_multibyte) |
| 6232 | { | 5423 | { |
| 6233 | re_char *p1 = s1, *p2 = s2; | 5424 | re_char *p1 = s1, *p2 = s2; |
| 6234 | re_char *p1_end = s1 + len; | 5425 | re_char *p1_end = s1 + len; |
| @@ -6239,7 +5430,7 @@ bcmp_translate (re_char *s1, re_char *s2, ssize_t len, | |||
| 6239 | while (p1 < p1_end && p2 < p2_end) | 5430 | while (p1 < p1_end && p2 < p2_end) |
| 6240 | { | 5431 | { |
| 6241 | int p1_charlen, p2_charlen; | 5432 | int p1_charlen, p2_charlen; |
| 6242 | re_wchar_t p1_ch, p2_ch; | 5433 | int p1_ch, p2_ch; |
| 6243 | 5434 | ||
| 6244 | GET_CHAR_AFTER (p1_ch, p1, p1_charlen); | 5435 | GET_CHAR_AFTER (p1_ch, p1, p1_charlen); |
| 6245 | GET_CHAR_AFTER (p2_ch, p2, p2_charlen); | 5436 | GET_CHAR_AFTER (p2_ch, p2, p2_charlen); |
| @@ -6270,9 +5461,7 @@ bcmp_translate (re_char *s1, re_char *s2, ssize_t len, | |||
| 6270 | 5461 | ||
| 6271 | const char * | 5462 | const char * |
| 6272 | re_compile_pattern (const char *pattern, size_t length, | 5463 | re_compile_pattern (const char *pattern, size_t length, |
| 6273 | #ifdef emacs | ||
| 6274 | bool posix_backtracking, const char *whitespace_regexp, | 5464 | bool posix_backtracking, const char *whitespace_regexp, |
| 6275 | #endif | ||
| 6276 | struct re_pattern_buffer *bufp) | 5465 | struct re_pattern_buffer *bufp) |
| 6277 | { | 5466 | { |
| 6278 | reg_errcode_t ret; | 5467 | reg_errcode_t ret; |
| @@ -6282,334 +5471,16 @@ re_compile_pattern (const char *pattern, size_t length, | |||
| 6282 | bufp->regs_allocated = REGS_UNALLOCATED; | 5471 | bufp->regs_allocated = REGS_UNALLOCATED; |
| 6283 | 5472 | ||
| 6284 | /* And GNU code determines whether or not to get register information | 5473 | /* And GNU code determines whether or not to get register information |
| 6285 | by passing null for the REGS argument to re_match, etc., not by | 5474 | by passing null for the REGS argument to re_search, etc., not by |
| 6286 | setting no_sub. */ | 5475 | setting no_sub. */ |
| 6287 | bufp->no_sub = 0; | 5476 | bufp->no_sub = 0; |
| 6288 | 5477 | ||
| 6289 | ret = regex_compile ((re_char *) pattern, length, | 5478 | ret = regex_compile ((re_char *) pattern, length, |
| 6290 | #ifdef emacs | ||
| 6291 | posix_backtracking, | 5479 | posix_backtracking, |
| 6292 | whitespace_regexp, | 5480 | whitespace_regexp, |
| 6293 | #else | ||
| 6294 | re_syntax_options, | ||
| 6295 | #endif | ||
| 6296 | bufp); | 5481 | bufp); |
| 6297 | 5482 | ||
| 6298 | if (!ret) | 5483 | if (!ret) |
| 6299 | return NULL; | 5484 | return NULL; |
| 6300 | return gettext (re_error_msgid[(int) ret]); | 5485 | return re_error_msgid[ret]; |
| 6301 | } | ||
| 6302 | WEAK_ALIAS (__re_compile_pattern, re_compile_pattern) | ||
| 6303 | |||
| 6304 | /* Entry points compatible with 4.2 BSD regex library. We don't define | ||
| 6305 | them unless specifically requested. */ | ||
| 6306 | |||
| 6307 | #if defined _REGEX_RE_COMP || defined _LIBC | ||
| 6308 | |||
| 6309 | /* BSD has one and only one pattern buffer. */ | ||
| 6310 | static struct re_pattern_buffer re_comp_buf; | ||
| 6311 | |||
| 6312 | char * | ||
| 6313 | # ifdef _LIBC | ||
| 6314 | /* Make these definitions weak in libc, so POSIX programs can redefine | ||
| 6315 | these names if they don't use our functions, and still use | ||
| 6316 | regcomp/regexec below without link errors. */ | ||
| 6317 | weak_function | ||
| 6318 | # endif | ||
| 6319 | re_comp (const char *s) | ||
| 6320 | { | ||
| 6321 | reg_errcode_t ret; | ||
| 6322 | |||
| 6323 | if (!s) | ||
| 6324 | { | ||
| 6325 | if (!re_comp_buf.buffer) | ||
| 6326 | /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */ | ||
| 6327 | return (char *) gettext ("No previous regular expression"); | ||
| 6328 | return 0; | ||
| 6329 | } | ||
| 6330 | |||
| 6331 | if (!re_comp_buf.buffer) | ||
| 6332 | { | ||
| 6333 | re_comp_buf.buffer = malloc (200); | ||
| 6334 | if (re_comp_buf.buffer == NULL) | ||
| 6335 | /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */ | ||
| 6336 | return (char *) gettext (re_error_msgid[(int) REG_ESPACE]); | ||
| 6337 | re_comp_buf.allocated = 200; | ||
| 6338 | |||
| 6339 | re_comp_buf.fastmap = malloc (1 << BYTEWIDTH); | ||
| 6340 | if (re_comp_buf.fastmap == NULL) | ||
| 6341 | /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */ | ||
| 6342 | return (char *) gettext (re_error_msgid[(int) REG_ESPACE]); | ||
| 6343 | } | ||
| 6344 | |||
| 6345 | /* Since `re_exec' always passes NULL for the `regs' argument, we | ||
| 6346 | don't need to initialize the pattern buffer fields which affect it. */ | ||
| 6347 | |||
| 6348 | ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf); | ||
| 6349 | |||
| 6350 | if (!ret) | ||
| 6351 | return NULL; | ||
| 6352 | |||
| 6353 | /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */ | ||
| 6354 | return (char *) gettext (re_error_msgid[(int) ret]); | ||
| 6355 | } | ||
| 6356 | |||
| 6357 | |||
| 6358 | int | ||
| 6359 | # ifdef _LIBC | ||
| 6360 | weak_function | ||
| 6361 | # endif | ||
| 6362 | re_exec (const char *s) | ||
| 6363 | { | ||
| 6364 | const size_t len = strlen (s); | ||
| 6365 | return re_search (&re_comp_buf, s, len, 0, len, 0) >= 0; | ||
| 6366 | } | 5486 | } |
| 6367 | #endif /* _REGEX_RE_COMP */ | ||
| 6368 | |||
| 6369 | /* POSIX.2 functions. Don't define these for Emacs. */ | ||
| 6370 | |||
| 6371 | #ifndef emacs | ||
| 6372 | |||
| 6373 | /* regcomp takes a regular expression as a string and compiles it. | ||
| 6374 | |||
| 6375 | PREG is a regex_t *. We do not expect any fields to be initialized, | ||
| 6376 | since POSIX says we shouldn't. Thus, we set | ||
| 6377 | |||
| 6378 | `buffer' to the compiled pattern; | ||
| 6379 | `used' to the length of the compiled pattern; | ||
| 6380 | `syntax' to RE_SYNTAX_POSIX_EXTENDED if the | ||
| 6381 | REG_EXTENDED bit in CFLAGS is set; otherwise, to | ||
| 6382 | RE_SYNTAX_POSIX_BASIC; | ||
| 6383 | `fastmap' to an allocated space for the fastmap; | ||
| 6384 | `fastmap_accurate' to zero; | ||
| 6385 | `re_nsub' to the number of subexpressions in PATTERN. | ||
| 6386 | |||
| 6387 | PATTERN is the address of the pattern string. | ||
| 6388 | |||
| 6389 | CFLAGS is a series of bits which affect compilation. | ||
| 6390 | |||
| 6391 | If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we | ||
| 6392 | use POSIX basic syntax. | ||
| 6393 | |||
| 6394 | If REG_NEWLINE is set, then . and [^...] don't match newline. | ||
| 6395 | Also, regexec will try a match beginning after every newline. | ||
| 6396 | |||
| 6397 | If REG_ICASE is set, then we considers upper- and lowercase | ||
| 6398 | versions of letters to be equivalent when matching. | ||
| 6399 | |||
| 6400 | If REG_NOSUB is set, then when PREG is passed to regexec, that | ||
| 6401 | routine will report only success or failure, and nothing about the | ||
| 6402 | registers. | ||
| 6403 | |||
| 6404 | It returns 0 if it succeeds, nonzero if it doesn't. (See regex-emacs.h for | ||
| 6405 | the return codes and their meanings.) */ | ||
| 6406 | |||
| 6407 | reg_errcode_t | ||
| 6408 | regcomp (regex_t *_Restrict_ preg, const char *_Restrict_ pattern, | ||
| 6409 | int cflags) | ||
| 6410 | { | ||
| 6411 | reg_errcode_t ret; | ||
| 6412 | reg_syntax_t syntax | ||
| 6413 | = (cflags & REG_EXTENDED) ? | ||
| 6414 | RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC; | ||
| 6415 | |||
| 6416 | /* regex_compile will allocate the space for the compiled pattern. */ | ||
| 6417 | preg->buffer = 0; | ||
| 6418 | preg->allocated = 0; | ||
| 6419 | preg->used = 0; | ||
| 6420 | |||
| 6421 | /* Try to allocate space for the fastmap. */ | ||
| 6422 | preg->fastmap = malloc (1 << BYTEWIDTH); | ||
| 6423 | |||
| 6424 | if (cflags & REG_ICASE) | ||
| 6425 | { | ||
| 6426 | unsigned i; | ||
| 6427 | |||
| 6428 | preg->translate = malloc (CHAR_SET_SIZE * sizeof *preg->translate); | ||
| 6429 | if (preg->translate == NULL) | ||
| 6430 | return (int) REG_ESPACE; | ||
| 6431 | |||
| 6432 | /* Map uppercase characters to corresponding lowercase ones. */ | ||
| 6433 | for (i = 0; i < CHAR_SET_SIZE; i++) | ||
| 6434 | preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i; | ||
| 6435 | } | ||
| 6436 | else | ||
| 6437 | preg->translate = NULL; | ||
| 6438 | |||
| 6439 | /* If REG_NEWLINE is set, newlines are treated differently. */ | ||
| 6440 | if (cflags & REG_NEWLINE) | ||
| 6441 | { /* REG_NEWLINE implies neither . nor [^...] match newline. */ | ||
| 6442 | syntax &= ~RE_DOT_NEWLINE; | ||
| 6443 | syntax |= RE_HAT_LISTS_NOT_NEWLINE; | ||
| 6444 | } | ||
| 6445 | else | ||
| 6446 | syntax |= RE_NO_NEWLINE_ANCHOR; | ||
| 6447 | |||
| 6448 | preg->no_sub = !!(cflags & REG_NOSUB); | ||
| 6449 | |||
| 6450 | /* POSIX says a null character in the pattern terminates it, so we | ||
| 6451 | can use strlen here in compiling the pattern. */ | ||
| 6452 | ret = regex_compile ((re_char *) pattern, strlen (pattern), syntax, preg); | ||
| 6453 | |||
| 6454 | /* POSIX doesn't distinguish between an unmatched open-group and an | ||
| 6455 | unmatched close-group: both are REG_EPAREN. */ | ||
| 6456 | if (ret == REG_ERPAREN) | ||
| 6457 | ret = REG_EPAREN; | ||
| 6458 | |||
| 6459 | if (ret == REG_NOERROR && preg->fastmap) | ||
| 6460 | { /* Compute the fastmap now, since regexec cannot modify the pattern | ||
| 6461 | buffer. */ | ||
| 6462 | re_compile_fastmap (preg); | ||
| 6463 | if (preg->can_be_null) | ||
| 6464 | { /* The fastmap can't be used anyway. */ | ||
| 6465 | free (preg->fastmap); | ||
| 6466 | preg->fastmap = NULL; | ||
| 6467 | } | ||
| 6468 | } | ||
| 6469 | return ret; | ||
| 6470 | } | ||
| 6471 | WEAK_ALIAS (__regcomp, regcomp) | ||
| 6472 | |||
| 6473 | |||
| 6474 | /* regexec searches for a given pattern, specified by PREG, in the | ||
| 6475 | string STRING. | ||
| 6476 | |||
| 6477 | If NMATCH is zero or REG_NOSUB was set in the cflags argument to | ||
| 6478 | `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at | ||
| 6479 | least NMATCH elements, and we set them to the offsets of the | ||
| 6480 | corresponding matched substrings. | ||
| 6481 | |||
| 6482 | EFLAGS specifies `execution flags' which affect matching: if | ||
| 6483 | REG_NOTBOL is set, then ^ does not match at the beginning of the | ||
| 6484 | string; if REG_NOTEOL is set, then $ does not match at the end. | ||
| 6485 | |||
| 6486 | We return 0 if we find a match and REG_NOMATCH if not. */ | ||
| 6487 | |||
| 6488 | reg_errcode_t | ||
| 6489 | regexec (const regex_t *_Restrict_ preg, const char *_Restrict_ string, | ||
| 6490 | size_t nmatch, regmatch_t pmatch[_Restrict_arr_], int eflags) | ||
| 6491 | { | ||
| 6492 | regoff_t ret; | ||
| 6493 | struct re_registers regs; | ||
| 6494 | regex_t private_preg; | ||
| 6495 | size_t len = strlen (string); | ||
| 6496 | boolean want_reg_info = !preg->no_sub && nmatch > 0 && pmatch; | ||
| 6497 | |||
| 6498 | private_preg = *preg; | ||
| 6499 | |||
| 6500 | private_preg.not_bol = !!(eflags & REG_NOTBOL); | ||
| 6501 | private_preg.not_eol = !!(eflags & REG_NOTEOL); | ||
| 6502 | |||
| 6503 | /* The user has told us exactly how many registers to return | ||
| 6504 | information about, via `nmatch'. We have to pass that on to the | ||
| 6505 | matching routines. */ | ||
| 6506 | private_preg.regs_allocated = REGS_FIXED; | ||
| 6507 | |||
| 6508 | if (want_reg_info) | ||
| 6509 | { | ||
| 6510 | regs.num_regs = nmatch; | ||
| 6511 | regs.start = TALLOC (nmatch * 2, regoff_t); | ||
| 6512 | if (regs.start == NULL) | ||
| 6513 | return REG_NOMATCH; | ||
| 6514 | regs.end = regs.start + nmatch; | ||
| 6515 | } | ||
| 6516 | |||
| 6517 | /* Instead of using not_eol to implement REG_NOTEOL, we could simply | ||
| 6518 | pass (&private_preg, string, len + 1, 0, len, ...) pretending the string | ||
| 6519 | was a little bit longer but still only matching the real part. | ||
| 6520 | This works because the `endline' will check for a '\n' and will find a | ||
| 6521 | '\0', correctly deciding that this is not the end of a line. | ||
| 6522 | But it doesn't work out so nicely for REG_NOTBOL, since we don't have | ||
| 6523 | a convenient '\0' there. For all we know, the string could be preceded | ||
| 6524 | by '\n' which would throw things off. */ | ||
| 6525 | |||
| 6526 | /* Perform the searching operation. */ | ||
| 6527 | ret = re_search (&private_preg, string, len, | ||
| 6528 | /* start: */ 0, /* range: */ len, | ||
| 6529 | want_reg_info ? ®s : 0); | ||
| 6530 | |||
| 6531 | /* Copy the register information to the POSIX structure. */ | ||
| 6532 | if (want_reg_info) | ||
| 6533 | { | ||
| 6534 | if (ret >= 0) | ||
| 6535 | { | ||
| 6536 | unsigned r; | ||
| 6537 | |||
| 6538 | for (r = 0; r < nmatch; r++) | ||
| 6539 | { | ||
| 6540 | pmatch[r].rm_so = regs.start[r]; | ||
| 6541 | pmatch[r].rm_eo = regs.end[r]; | ||
| 6542 | } | ||
| 6543 | } | ||
| 6544 | |||
| 6545 | /* If we needed the temporary register info, free the space now. */ | ||
| 6546 | free (regs.start); | ||
| 6547 | } | ||
| 6548 | |||
| 6549 | /* We want zero return to mean success, unlike `re_search'. */ | ||
| 6550 | return ret >= 0 ? REG_NOERROR : REG_NOMATCH; | ||
| 6551 | } | ||
| 6552 | WEAK_ALIAS (__regexec, regexec) | ||
| 6553 | |||
| 6554 | |||
| 6555 | /* Returns a message corresponding to an error code, ERR_CODE, returned | ||
| 6556 | from either regcomp or regexec. We don't use PREG here. | ||
| 6557 | |||
| 6558 | ERR_CODE was previously called ERRCODE, but that name causes an | ||
| 6559 | error with msvc8 compiler. */ | ||
| 6560 | |||
| 6561 | size_t | ||
| 6562 | regerror (int err_code, const regex_t *preg, char *errbuf, size_t errbuf_size) | ||
| 6563 | { | ||
| 6564 | const char *msg; | ||
| 6565 | size_t msg_size; | ||
| 6566 | |||
| 6567 | if (err_code < 0 | ||
| 6568 | || err_code >= (sizeof (re_error_msgid) / sizeof (re_error_msgid[0]))) | ||
| 6569 | /* Only error codes returned by the rest of the code should be passed | ||
| 6570 | to this routine. If we are given anything else, or if other regex | ||
| 6571 | code generates an invalid error code, then the program has a bug. | ||
| 6572 | Dump core so we can fix it. */ | ||
| 6573 | abort (); | ||
| 6574 | |||
| 6575 | msg = gettext (re_error_msgid[err_code]); | ||
| 6576 | |||
| 6577 | msg_size = strlen (msg) + 1; /* Includes the null. */ | ||
| 6578 | |||
| 6579 | if (errbuf_size != 0) | ||
| 6580 | { | ||
| 6581 | if (msg_size > errbuf_size) | ||
| 6582 | { | ||
| 6583 | memcpy (errbuf, msg, errbuf_size - 1); | ||
| 6584 | errbuf[errbuf_size - 1] = 0; | ||
| 6585 | } | ||
| 6586 | else | ||
| 6587 | strcpy (errbuf, msg); | ||
| 6588 | } | ||
| 6589 | |||
| 6590 | return msg_size; | ||
| 6591 | } | ||
| 6592 | WEAK_ALIAS (__regerror, regerror) | ||
| 6593 | |||
| 6594 | |||
| 6595 | /* Free dynamically allocated space used by PREG. */ | ||
| 6596 | |||
| 6597 | void | ||
| 6598 | regfree (regex_t *preg) | ||
| 6599 | { | ||
| 6600 | free (preg->buffer); | ||
| 6601 | preg->buffer = NULL; | ||
| 6602 | |||
| 6603 | preg->allocated = 0; | ||
| 6604 | preg->used = 0; | ||
| 6605 | |||
| 6606 | free (preg->fastmap); | ||
| 6607 | preg->fastmap = NULL; | ||
| 6608 | preg->fastmap_accurate = 0; | ||
| 6609 | |||
| 6610 | free (preg->translate); | ||
| 6611 | preg->translate = NULL; | ||
| 6612 | } | ||
| 6613 | WEAK_ALIAS (__regfree, regfree) | ||
| 6614 | |||
| 6615 | #endif /* not emacs */ | ||
diff --git a/src/regex-emacs.h b/src/regex-emacs.h index 9a6214af98c..159c7dcb9b8 100644 --- a/src/regex-emacs.h +++ b/src/regex-emacs.h | |||
| @@ -17,163 +17,24 @@ | |||
| 17 | You should have received a copy of the GNU General Public License | 17 | You should have received a copy of the GNU General Public License |
| 18 | along with this program. If not, see <https://www.gnu.org/licenses/>. */ | 18 | along with this program. If not, see <https://www.gnu.org/licenses/>. */ |
| 19 | 19 | ||
| 20 | #ifndef _REGEX_H | 20 | #ifndef EMACS_REGEX_H |
| 21 | #define _REGEX_H 1 | 21 | #define EMACS_REGEX_H 1 |
| 22 | 22 | ||
| 23 | #if defined emacs && (defined _REGEX_RE_COMP || defined _LIBC) | 23 | #include <stddef.h> |
| 24 | /* We're not defining re_set_syntax and using a different prototype of | 24 | |
| 25 | re_compile_pattern when building Emacs so fail compilation early with | 25 | /* This is the structure we store register match data in. See |
| 26 | a (somewhat helpful) error message when conflict is detected. */ | 26 | regex.texinfo for a full description of what registers match. |
| 27 | # error "_REGEX_RE_COMP nor _LIBC can be defined if emacs is defined." | 27 | Declare this before including lisp.h, since lisp.h (via thread.h) |
| 28 | #endif | 28 | uses struct re_registers. */ |
| 29 | 29 | struct re_registers | |
| 30 | #include <sys/types.h> | 30 | { |
| 31 | 31 | unsigned num_regs; | |
| 32 | /* Allow the use in C++ code. */ | 32 | ptrdiff_t *start; |
| 33 | #ifdef __cplusplus | 33 | ptrdiff_t *end; |
| 34 | extern "C" { | 34 | }; |
| 35 | #endif | 35 | |
| 36 | 36 | #include "lisp.h" | |
| 37 | #if !defined _POSIX_C_SOURCE && !defined _POSIX_SOURCE && defined VMS | 37 | |
| 38 | /* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it | ||
| 39 | should be there. */ | ||
| 40 | # include <stddef.h> | ||
| 41 | #endif | ||
| 42 | |||
| 43 | /* The following bits are used to determine the regexp syntax we | ||
| 44 | recognize. The set/not-set meanings where historically chosen so | ||
| 45 | that Emacs syntax had the value 0. | ||
| 46 | The bits are given in alphabetical order, and | ||
| 47 | the definitions shifted by one from the previous bit; thus, when we | ||
| 48 | add or remove a bit, only one other definition need change. */ | ||
| 49 | typedef unsigned long reg_syntax_t; | ||
| 50 | |||
| 51 | /* If this bit is not set, then \ inside a bracket expression is literal. | ||
| 52 | If set, then such a \ quotes the following character. */ | ||
| 53 | #define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1) | ||
| 54 | |||
| 55 | /* If this bit is not set, then + and ? are operators, and \+ and \? are | ||
| 56 | literals. | ||
| 57 | If set, then \+ and \? are operators and + and ? are literals. */ | ||
| 58 | #define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1) | ||
| 59 | |||
| 60 | /* If this bit is set, then character classes are supported. They are: | ||
| 61 | [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:], | ||
| 62 | [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:]. | ||
| 63 | If not set, then character classes are not supported. */ | ||
| 64 | #define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1) | ||
| 65 | |||
| 66 | /* If this bit is set, then ^ and $ are always anchors (outside bracket | ||
| 67 | expressions, of course). | ||
| 68 | If this bit is not set, then it depends: | ||
| 69 | ^ is an anchor if it is at the beginning of a regular | ||
| 70 | expression or after an open-group or an alternation operator; | ||
| 71 | $ is an anchor if it is at the end of a regular expression, or | ||
| 72 | before a close-group or an alternation operator. | ||
| 73 | |||
| 74 | This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because | ||
| 75 | POSIX draft 11.2 says that * etc. in leading positions is undefined. | ||
| 76 | We already implemented a previous draft which made those constructs | ||
| 77 | invalid, though, so we haven't changed the code back. */ | ||
| 78 | #define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1) | ||
| 79 | |||
| 80 | /* If this bit is set, then special characters are always special | ||
| 81 | regardless of where they are in the pattern. | ||
| 82 | If this bit is not set, then special characters are special only in | ||
| 83 | some contexts; otherwise they are ordinary. Specifically, | ||
| 84 | * + ? and intervals are only special when not after the beginning, | ||
| 85 | open-group, or alternation operator. */ | ||
| 86 | #define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1) | ||
| 87 | |||
| 88 | /* If this bit is set, then *, +, ?, and { cannot be first in an re or | ||
| 89 | immediately after an alternation or begin-group operator. */ | ||
| 90 | #define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1) | ||
| 91 | |||
| 92 | /* If this bit is set, then . matches newline. | ||
| 93 | If not set, then it doesn't. */ | ||
| 94 | #define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1) | ||
| 95 | |||
| 96 | /* If this bit is set, then . doesn't match NUL. | ||
| 97 | If not set, then it does. */ | ||
| 98 | #define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1) | ||
| 99 | |||
| 100 | /* If this bit is set, nonmatching lists [^...] do not match newline. | ||
| 101 | If not set, they do. */ | ||
| 102 | #define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1) | ||
| 103 | |||
| 104 | /* If this bit is set, either \{...\} or {...} defines an | ||
| 105 | interval, depending on RE_NO_BK_BRACES. | ||
| 106 | If not set, \{, \}, {, and } are literals. */ | ||
| 107 | #define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1) | ||
| 108 | |||
| 109 | /* If this bit is set, +, ? and | aren't recognized as operators. | ||
| 110 | If not set, they are. */ | ||
| 111 | #define RE_LIMITED_OPS (RE_INTERVALS << 1) | ||
| 112 | |||
| 113 | /* If this bit is set, newline is an alternation operator. | ||
| 114 | If not set, newline is literal. */ | ||
| 115 | #define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1) | ||
| 116 | |||
| 117 | /* If this bit is set, then `{...}' defines an interval, and \{ and \} | ||
| 118 | are literals. | ||
| 119 | If not set, then `\{...\}' defines an interval. */ | ||
| 120 | #define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1) | ||
| 121 | |||
| 122 | /* If this bit is set, (...) defines a group, and \( and \) are literals. | ||
| 123 | If not set, \(...\) defines a group, and ( and ) are literals. */ | ||
| 124 | #define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1) | ||
| 125 | |||
| 126 | /* If this bit is set, then \<digit> matches <digit>. | ||
| 127 | If not set, then \<digit> is a back-reference. */ | ||
| 128 | #define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1) | ||
| 129 | |||
| 130 | /* If this bit is set, then | is an alternation operator, and \| is literal. | ||
| 131 | If not set, then \| is an alternation operator, and | is literal. */ | ||
| 132 | #define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1) | ||
| 133 | |||
| 134 | /* If this bit is set, then an ending range point collating higher | ||
| 135 | than the starting range point, as in [z-a], is invalid. | ||
| 136 | If not set, then when ending range point collates higher than the | ||
| 137 | starting range point, the range is ignored. */ | ||
| 138 | #define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1) | ||
| 139 | |||
| 140 | /* If this bit is set, then an unmatched ) is ordinary. | ||
| 141 | If not set, then an unmatched ) is invalid. */ | ||
| 142 | #define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1) | ||
| 143 | |||
| 144 | /* If this bit is set, succeed as soon as we match the whole pattern, | ||
| 145 | without further backtracking. */ | ||
| 146 | #define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1) | ||
| 147 | |||
| 148 | /* If this bit is set, do not process the GNU regex operators. | ||
| 149 | If not set, then the GNU regex operators are recognized. */ | ||
| 150 | #define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1) | ||
| 151 | |||
| 152 | /* If this bit is set, then *?, +? and ?? match non greedily. */ | ||
| 153 | #define RE_FRUGAL (RE_NO_GNU_OPS << 1) | ||
| 154 | |||
| 155 | /* If this bit is set, then (?:...) is treated as a shy group. */ | ||
| 156 | #define RE_SHY_GROUPS (RE_FRUGAL << 1) | ||
| 157 | |||
| 158 | /* If this bit is set, ^ and $ only match at beg/end of buffer. */ | ||
| 159 | #define RE_NO_NEWLINE_ANCHOR (RE_SHY_GROUPS << 1) | ||
| 160 | |||
| 161 | /* If this bit is set, turn on internal regex debugging. | ||
| 162 | If not set, and debugging was on, turn it off. | ||
| 163 | This only works if regex-emacs.c is compiled -DDEBUG. | ||
| 164 | We define this bit always, so that all that's needed to turn on | ||
| 165 | debugging is to recompile regex-emacs.c; the calling code can always have | ||
| 166 | this bit set, and it won't affect anything in the normal case. */ | ||
| 167 | #define RE_DEBUG (RE_NO_NEWLINE_ANCHOR << 1) | ||
| 168 | |||
| 169 | /* This global variable defines the particular regexp syntax to use (for | ||
| 170 | some interfaces). When a regexp is compiled, the syntax used is | ||
| 171 | stored in the pattern buffer, so changing this does not affect | ||
| 172 | already-compiled regexps. */ | ||
| 173 | /* extern reg_syntax_t re_syntax_options; */ | ||
| 174 | |||
| 175 | #ifdef emacs | ||
| 176 | # include "lisp.h" | ||
| 177 | /* In Emacs, this is the string or buffer in which we are matching. | 38 | /* In Emacs, this is the string or buffer in which we are matching. |
| 178 | It is used for looking up syntax properties. | 39 | It is used for looking up syntax properties. |
| 179 | 40 | ||
| @@ -187,187 +48,23 @@ typedef unsigned long reg_syntax_t; | |||
| 187 | and match functions. These functions capture the current value of | 48 | and match functions. These functions capture the current value of |
| 188 | re_match_object into gl_state on entry. | 49 | re_match_object into gl_state on entry. |
| 189 | 50 | ||
| 190 | TODO: once we get rid of the !emacs case in this code, turn into an | 51 | TODO: turn into an actual function parameter. */ |
| 191 | actual function parameter. */ | ||
| 192 | extern Lisp_Object re_match_object; | 52 | extern Lisp_Object re_match_object; |
| 193 | #endif | ||
| 194 | 53 | ||
| 195 | /* Roughly the maximum number of failure points on the stack. */ | 54 | /* Roughly the maximum number of failure points on the stack. */ |
| 196 | extern size_t emacs_re_max_failures; | 55 | extern size_t emacs_re_max_failures; |
| 197 | 56 | ||
| 198 | #ifdef emacs | ||
| 199 | /* Amount of memory that we can safely stack allocate. */ | 57 | /* Amount of memory that we can safely stack allocate. */ |
| 200 | extern ptrdiff_t emacs_re_safe_alloca; | 58 | extern ptrdiff_t emacs_re_safe_alloca; |
| 201 | #endif | ||
| 202 | |||
| 203 | |||
| 204 | /* Define combinations of the above bits for the standard possibilities. | ||
| 205 | (The [[[ comments delimit what gets put into the Texinfo file, so | ||
| 206 | don't delete them!) */ | ||
| 207 | /* [[[begin syntaxes]]] */ | ||
| 208 | #define RE_SYNTAX_EMACS \ | ||
| 209 | (RE_CHAR_CLASSES | RE_INTERVALS | RE_SHY_GROUPS | RE_FRUGAL) | ||
| 210 | |||
| 211 | #define RE_SYNTAX_AWK \ | ||
| 212 | (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ | ||
| 213 | | RE_NO_BK_PARENS | RE_NO_BK_REFS \ | ||
| 214 | | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ | ||
| 215 | | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \ | ||
| 216 | | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS) | ||
| 217 | |||
| 218 | #define RE_SYNTAX_GNU_AWK \ | ||
| 219 | ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG) \ | ||
| 220 | & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS)) | ||
| 221 | |||
| 222 | #define RE_SYNTAX_POSIX_AWK \ | ||
| 223 | (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \ | ||
| 224 | | RE_INTERVALS | RE_NO_GNU_OPS) | ||
| 225 | |||
| 226 | #define RE_SYNTAX_GREP \ | ||
| 227 | (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ | ||
| 228 | | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \ | ||
| 229 | | RE_NEWLINE_ALT) | ||
| 230 | |||
| 231 | #define RE_SYNTAX_EGREP \ | ||
| 232 | (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \ | ||
| 233 | | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \ | ||
| 234 | | RE_NEWLINE_ALT | RE_NO_BK_PARENS \ | ||
| 235 | | RE_NO_BK_VBAR) | ||
| 236 | |||
| 237 | #define RE_SYNTAX_POSIX_EGREP \ | ||
| 238 | (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES) | ||
| 239 | |||
| 240 | /* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */ | ||
| 241 | #define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC | ||
| 242 | |||
| 243 | #define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC | ||
| 244 | |||
| 245 | /* Syntax bits common to both basic and extended POSIX regex syntax. */ | ||
| 246 | #define _RE_SYNTAX_POSIX_COMMON \ | ||
| 247 | (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \ | ||
| 248 | | RE_INTERVALS | RE_NO_EMPTY_RANGES) | ||
| 249 | |||
| 250 | #define RE_SYNTAX_POSIX_BASIC \ | ||
| 251 | (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM) | ||
| 252 | |||
| 253 | /* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes | ||
| 254 | RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this | ||
| 255 | isn't minimal, since other operators, such as \`, aren't disabled. */ | ||
| 256 | #define RE_SYNTAX_POSIX_MINIMAL_BASIC \ | ||
| 257 | (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS) | ||
| 258 | |||
| 259 | #define RE_SYNTAX_POSIX_EXTENDED \ | ||
| 260 | (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ | ||
| 261 | | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \ | ||
| 262 | | RE_NO_BK_PARENS | RE_NO_BK_VBAR \ | ||
| 263 | | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD) | ||
| 264 | |||
| 265 | /* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is | ||
| 266 | removed and RE_NO_BK_REFS is added. */ | ||
| 267 | #define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \ | ||
| 268 | (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ | ||
| 269 | | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \ | ||
| 270 | | RE_NO_BK_PARENS | RE_NO_BK_REFS \ | ||
| 271 | | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD) | ||
| 272 | /* [[[end syntaxes]]] */ | ||
| 273 | 59 | ||
| 274 | /* Maximum number of duplicates an interval can allow. Some systems | ||
| 275 | (erroneously) define this in other header files, but we want our | ||
| 276 | value, so remove any previous define. */ | ||
| 277 | #ifdef RE_DUP_MAX | ||
| 278 | # undef RE_DUP_MAX | ||
| 279 | #endif | ||
| 280 | /* Repeat counts are stored in opcodes as 2 byte integers. This was | ||
| 281 | previously limited to 7fff because the parsing code uses signed | ||
| 282 | ints. But Emacs only runs on 32 bit platforms anyway. */ | ||
| 283 | #define RE_DUP_MAX (0xffff) | ||
| 284 | |||
| 285 | |||
| 286 | /* POSIX `cflags' bits (i.e., information for `regcomp'). */ | ||
| 287 | |||
| 288 | /* If this bit is set, then use extended regular expression syntax. | ||
| 289 | If not set, then use basic regular expression syntax. */ | ||
| 290 | #define REG_EXTENDED 1 | ||
| 291 | |||
| 292 | /* If this bit is set, then ignore case when matching. | ||
| 293 | If not set, then case is significant. */ | ||
| 294 | #define REG_ICASE (REG_EXTENDED << 1) | ||
| 295 | |||
| 296 | /* If this bit is set, then anchors do not match at newline | ||
| 297 | characters in the string. | ||
| 298 | If not set, then anchors do match at newlines. */ | ||
| 299 | #define REG_NEWLINE (REG_ICASE << 1) | ||
| 300 | |||
| 301 | /* If this bit is set, then report only success or fail in regexec. | ||
| 302 | If not set, then returns differ between not matching and errors. */ | ||
| 303 | #define REG_NOSUB (REG_NEWLINE << 1) | ||
| 304 | |||
| 305 | |||
| 306 | /* POSIX `eflags' bits (i.e., information for regexec). */ | ||
| 307 | |||
| 308 | /* If this bit is set, then the beginning-of-line operator doesn't match | ||
| 309 | the beginning of the string (presumably because it's not the | ||
| 310 | beginning of a line). | ||
| 311 | If not set, then the beginning-of-line operator does match the | ||
| 312 | beginning of the string. */ | ||
| 313 | #define REG_NOTBOL 1 | ||
| 314 | |||
| 315 | /* Like REG_NOTBOL, except for the end-of-line. */ | ||
| 316 | #define REG_NOTEOL (1 << 1) | ||
| 317 | |||
| 318 | |||
| 319 | /* If any error codes are removed, changed, or added, update the | ||
| 320 | `re_error_msg' table in regex-emacs.c. */ | ||
| 321 | typedef enum | ||
| 322 | { | ||
| 323 | #ifdef _XOPEN_SOURCE | ||
| 324 | REG_ENOSYS = -1, /* This will never happen for this implementation. */ | ||
| 325 | #endif | ||
| 326 | |||
| 327 | REG_NOERROR = 0, /* Success. */ | ||
| 328 | REG_NOMATCH, /* Didn't find a match (for regexec). */ | ||
| 329 | |||
| 330 | /* POSIX regcomp return error codes. (In the order listed in the | ||
| 331 | standard.) */ | ||
| 332 | REG_BADPAT, /* Invalid pattern. */ | ||
| 333 | REG_ECOLLATE, /* Not implemented. */ | ||
| 334 | REG_ECTYPE, /* Invalid character class name. */ | ||
| 335 | REG_EESCAPE, /* Trailing backslash. */ | ||
| 336 | REG_ESUBREG, /* Invalid back reference. */ | ||
| 337 | REG_EBRACK, /* Unmatched left bracket. */ | ||
| 338 | REG_EPAREN, /* Parenthesis imbalance. */ | ||
| 339 | REG_EBRACE, /* Unmatched \{. */ | ||
| 340 | REG_BADBR, /* Invalid contents of \{\}. */ | ||
| 341 | REG_ERANGE, /* Invalid range end. */ | ||
| 342 | REG_ESPACE, /* Ran out of memory. */ | ||
| 343 | REG_BADRPT, /* No preceding re for repetition op. */ | ||
| 344 | |||
| 345 | /* Error codes we've added. */ | ||
| 346 | REG_EEND, /* Premature end. */ | ||
| 347 | REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */ | ||
| 348 | REG_ERPAREN, /* Unmatched ) or \); not returned from regcomp. */ | ||
| 349 | REG_ERANGEX, /* Range striding over charsets. */ | ||
| 350 | REG_ESIZEBR /* n or m too big in \{n,m\} */ | ||
| 351 | } reg_errcode_t; | ||
| 352 | |||
| 353 | /* Use a type compatible with Emacs. */ | ||
| 354 | #define RE_TRANSLATE_TYPE Lisp_Object | ||
| 355 | #define RE_TRANSLATE(TBL, C) char_table_translate (TBL, C) | ||
| 356 | #define RE_TRANSLATE_P(TBL) (!EQ (TBL, make_number (0))) | ||
| 357 | |||
| 358 | /* This data structure represents a compiled pattern. Before calling | 60 | /* This data structure represents a compiled pattern. Before calling |
| 359 | the pattern compiler, the fields `buffer', `allocated', `fastmap', | 61 | the pattern compiler, the fields `buffer', `allocated', `fastmap', |
| 360 | `translate', and `no_sub' can be set. After the pattern has been | 62 | `translate', and `no_sub' can be set. After the pattern has been |
| 361 | compiled, the `re_nsub' field is available. All other fields are | 63 | compiled, the `re_nsub' field is available. All other fields are |
| 362 | private to the regex routines. */ | 64 | private to the regex routines. */ |
| 363 | 65 | ||
| 364 | #ifndef RE_TRANSLATE_TYPE | ||
| 365 | # define RE_TRANSLATE_TYPE char * | ||
| 366 | #endif | ||
| 367 | |||
| 368 | struct re_pattern_buffer | 66 | struct re_pattern_buffer |
| 369 | { | 67 | { |
| 370 | /* [[[begin pattern_buffer]]] */ | ||
| 371 | /* Space that holds the compiled pattern. It is declared as | 68 | /* Space that holds the compiled pattern. It is declared as |
| 372 | `unsigned char *' because its elements are | 69 | `unsigned char *' because its elements are |
| 373 | sometimes used as array indexes. */ | 70 | sometimes used as array indexes. */ |
| @@ -379,13 +76,9 @@ struct re_pattern_buffer | |||
| 379 | /* Number of bytes actually used in `buffer'. */ | 76 | /* Number of bytes actually used in `buffer'. */ |
| 380 | size_t used; | 77 | size_t used; |
| 381 | 78 | ||
| 382 | #ifdef emacs | ||
| 383 | /* Charset of unibyte characters at compiling time. */ | 79 | /* Charset of unibyte characters at compiling time. */ |
| 384 | int charset_unibyte; | 80 | int charset_unibyte; |
| 385 | #else | 81 | |
| 386 | /* Syntax setting with which the pattern was compiled. */ | ||
| 387 | reg_syntax_t syntax; | ||
| 388 | #endif | ||
| 389 | /* Pointer to a fastmap, if any, otherwise zero. re_search uses | 82 | /* Pointer to a fastmap, if any, otherwise zero. re_search uses |
| 390 | the fastmap, if there is one, to skip over impossible | 83 | the fastmap, if there is one, to skip over impossible |
| 391 | starting points for matches. */ | 84 | starting points for matches. */ |
| @@ -395,7 +88,7 @@ struct re_pattern_buffer | |||
| 395 | comparing them, or zero for no translation. The translation | 88 | comparing them, or zero for no translation. The translation |
| 396 | is applied to a pattern when it is compiled and to a string | 89 | is applied to a pattern when it is compiled and to a string |
| 397 | when it is matched. */ | 90 | when it is matched. */ |
| 398 | RE_TRANSLATE_TYPE translate; | 91 | Lisp_Object translate; |
| 399 | 92 | ||
| 400 | /* Number of subexpressions found by the compiler. */ | 93 | /* Number of subexpressions found by the compiler. */ |
| 401 | size_t re_nsub; | 94 | size_t re_nsub; |
| @@ -410,9 +103,6 @@ struct re_pattern_buffer | |||
| 410 | for `max (RE_NREGS, re_nsub + 1)' groups. | 103 | for `max (RE_NREGS, re_nsub + 1)' groups. |
| 411 | If REGS_REALLOCATE, reallocate space if necessary. | 104 | If REGS_REALLOCATE, reallocate space if necessary. |
| 412 | If REGS_FIXED, use what's there. */ | 105 | If REGS_FIXED, use what's there. */ |
| 413 | #define REGS_UNALLOCATED 0 | ||
| 414 | #define REGS_REALLOCATE 1 | ||
| 415 | #define REGS_FIXED 2 | ||
| 416 | unsigned regs_allocated : 2; | 106 | unsigned regs_allocated : 2; |
| 417 | 107 | ||
| 418 | /* Set to zero when `regex_compile' compiles a pattern; set to one | 108 | /* Set to zero when `regex_compile' compiles a pattern; set to one |
| @@ -434,7 +124,6 @@ struct re_pattern_buffer | |||
| 434 | so the compiled pattern is only valid for the current syntax table. */ | 124 | so the compiled pattern is only valid for the current syntax table. */ |
| 435 | unsigned used_syntax : 1; | 125 | unsigned used_syntax : 1; |
| 436 | 126 | ||
| 437 | #ifdef emacs | ||
| 438 | /* If true, multi-byte form in the regexp pattern should be | 127 | /* If true, multi-byte form in the regexp pattern should be |
| 439 | recognized as a multibyte character. */ | 128 | recognized as a multibyte character. */ |
| 440 | unsigned multibyte : 1; | 129 | unsigned multibyte : 1; |
| @@ -442,72 +131,17 @@ struct re_pattern_buffer | |||
| 442 | /* If true, multi-byte form in the target of match should be | 131 | /* If true, multi-byte form in the target of match should be |
| 443 | recognized as a multibyte character. */ | 132 | recognized as a multibyte character. */ |
| 444 | unsigned target_multibyte : 1; | 133 | unsigned target_multibyte : 1; |
| 445 | #endif | ||
| 446 | |||
| 447 | /* [[[end pattern_buffer]]] */ | ||
| 448 | }; | 134 | }; |
| 449 | |||
| 450 | typedef struct re_pattern_buffer regex_t; | ||
| 451 | |||
| 452 | /* POSIX 1003.1-2008 requires that regoff_t be at least as wide as | ||
| 453 | ptrdiff_t and ssize_t. We don't know of any hosts where ptrdiff_t | ||
| 454 | is wider than ssize_t, so ssize_t is safe. ptrdiff_t is not | ||
| 455 | necessarily visible here, so use ssize_t. */ | ||
| 456 | typedef ssize_t regoff_t; | ||
| 457 | |||
| 458 | |||
| 459 | /* This is the structure we store register match data in. See | ||
| 460 | regex.texinfo for a full description of what registers match. */ | ||
| 461 | struct re_registers | ||
| 462 | { | ||
| 463 | unsigned num_regs; | ||
| 464 | regoff_t *start; | ||
| 465 | regoff_t *end; | ||
| 466 | }; | ||
| 467 | |||
| 468 | |||
| 469 | /* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer, | ||
| 470 | `re_match_2' returns information about at least this many registers | ||
| 471 | the first time a `regs' structure is passed. */ | ||
| 472 | #ifndef RE_NREGS | ||
| 473 | # define RE_NREGS 30 | ||
| 474 | #endif | ||
| 475 | |||
| 476 | |||
| 477 | /* POSIX specification for registers. Aside from the different names than | ||
| 478 | `re_registers', POSIX uses an array of structures, instead of a | ||
| 479 | structure of arrays. */ | ||
| 480 | typedef struct | ||
| 481 | { | ||
| 482 | regoff_t rm_so; /* Byte offset from string's start to substring's start. */ | ||
| 483 | regoff_t rm_eo; /* Byte offset from string's start to substring's end. */ | ||
| 484 | } regmatch_t; | ||
| 485 | 135 | ||
| 486 | /* Declarations for routines. */ | 136 | /* Declarations for routines. */ |
| 487 | 137 | ||
| 488 | #ifndef emacs | ||
| 489 | |||
| 490 | /* Sets the current default syntax to SYNTAX, and return the old syntax. | ||
| 491 | You can also simply assign to the `re_syntax_options' variable. */ | ||
| 492 | extern reg_syntax_t re_set_syntax (reg_syntax_t __syntax); | ||
| 493 | |||
| 494 | #endif | ||
| 495 | |||
| 496 | /* Compile the regular expression PATTERN, with length LENGTH | 138 | /* Compile the regular expression PATTERN, with length LENGTH |
| 497 | and syntax given by the global `re_syntax_options', into the buffer | 139 | and syntax given by the global `re_syntax_options', into the buffer |
| 498 | BUFFER. Return NULL if successful, and an error string if not. */ | 140 | BUFFER. Return NULL if successful, and an error string if not. */ |
| 499 | extern const char *re_compile_pattern (const char *__pattern, size_t __length, | 141 | extern const char *re_compile_pattern (const char *pattern, size_t length, |
| 500 | #ifdef emacs | ||
| 501 | bool posix_backtracking, | 142 | bool posix_backtracking, |
| 502 | const char *whitespace_regexp, | 143 | const char *whitespace_regexp, |
| 503 | #endif | 144 | struct re_pattern_buffer *buffer); |
| 504 | struct re_pattern_buffer *__buffer); | ||
| 505 | |||
| 506 | |||
| 507 | /* Compile a fastmap for the compiled pattern in BUFFER; used to | ||
| 508 | accelerate searches. Return 0 if successful and -2 if was an | ||
| 509 | internal error. */ | ||
| 510 | extern int re_compile_fastmap (struct re_pattern_buffer *__buffer); | ||
| 511 | 145 | ||
| 512 | 146 | ||
| 513 | /* Search in the string STRING (with length LENGTH) for the pattern | 147 | /* Search in the string STRING (with length LENGTH) for the pattern |
| @@ -515,42 +149,36 @@ extern int re_compile_fastmap (struct re_pattern_buffer *__buffer); | |||
| 515 | characters. Return the starting position of the match, -1 for no | 149 | characters. Return the starting position of the match, -1 for no |
| 516 | match, or -2 for an internal error. Also return register | 150 | match, or -2 for an internal error. Also return register |
| 517 | information in REGS (if REGS and BUFFER->no_sub are nonzero). */ | 151 | information in REGS (if REGS and BUFFER->no_sub are nonzero). */ |
| 518 | extern regoff_t re_search (struct re_pattern_buffer *__buffer, | 152 | extern ptrdiff_t re_search (struct re_pattern_buffer *buffer, |
| 519 | const char *__string, size_t __length, | 153 | const char *string, size_t length, |
| 520 | ssize_t __start, ssize_t __range, | 154 | ptrdiff_t start, ptrdiff_t range, |
| 521 | struct re_registers *__regs); | 155 | struct re_registers *regs); |
| 522 | 156 | ||
| 523 | 157 | ||
| 524 | /* Like `re_search', but search in the concatenation of STRING1 and | 158 | /* Like `re_search', but search in the concatenation of STRING1 and |
| 525 | STRING2. Also, stop searching at index START + STOP. */ | 159 | STRING2. Also, stop searching at index START + STOP. */ |
| 526 | extern regoff_t re_search_2 (struct re_pattern_buffer *__buffer, | 160 | extern ptrdiff_t re_search_2 (struct re_pattern_buffer *buffer, |
| 527 | const char *__string1, size_t __length1, | 161 | const char *string1, size_t length1, |
| 528 | const char *__string2, size_t __length2, | 162 | const char *string2, size_t length2, |
| 529 | ssize_t __start, ssize_t __range, | 163 | ptrdiff_t start, ptrdiff_t range, |
| 530 | struct re_registers *__regs, | 164 | struct re_registers *regs, |
| 531 | ssize_t __stop); | 165 | ptrdiff_t stop); |
| 532 | 166 | ||
| 533 | 167 | ||
| 534 | /* Like `re_search', but return how many characters in STRING the regexp | 168 | /* Like 're_search_2', but return how many characters in STRING the regexp |
| 535 | in BUFFER matched, starting at position START. */ | 169 | in BUFFER matched, starting at position START. */ |
| 536 | extern regoff_t re_match (struct re_pattern_buffer *__buffer, | 170 | extern ptrdiff_t re_match_2 (struct re_pattern_buffer *buffer, |
| 537 | const char *__string, size_t __length, | 171 | const char *string1, size_t length1, |
| 538 | ssize_t __start, struct re_registers *__regs); | 172 | const char *string2, size_t length2, |
| 539 | 173 | ptrdiff_t start, struct re_registers *regs, | |
| 540 | 174 | ptrdiff_t stop); | |
| 541 | /* Relates to `re_match' as `re_search_2' relates to `re_search'. */ | ||
| 542 | extern regoff_t re_match_2 (struct re_pattern_buffer *__buffer, | ||
| 543 | const char *__string1, size_t __length1, | ||
| 544 | const char *__string2, size_t __length2, | ||
| 545 | ssize_t __start, struct re_registers *__regs, | ||
| 546 | ssize_t __stop); | ||
| 547 | 175 | ||
| 548 | 176 | ||
| 549 | /* Set REGS to hold NUM_REGS registers, storing them in STARTS and | 177 | /* Set REGS to hold NUM_REGS registers, storing them in STARTS and |
| 550 | ENDS. Subsequent matches using BUFFER and REGS will use this memory | 178 | ENDS. Subsequent matches using BUFFER and REGS will use this memory |
| 551 | for recording register information. STARTS and ENDS must be | 179 | for recording register information. STARTS and ENDS must be |
| 552 | allocated with malloc, and must each be at least `NUM_REGS * sizeof | 180 | allocated with malloc, and must each be at least `NUM_REGS * sizeof |
| 553 | (regoff_t)' bytes long. | 181 | (ptrdiff_t)' bytes long. |
| 554 | 182 | ||
| 555 | If NUM_REGS == 0, then subsequent matches should allocate their own | 183 | If NUM_REGS == 0, then subsequent matches should allocate their own |
| 556 | register data. | 184 | register data. |
| @@ -558,83 +186,10 @@ extern regoff_t re_match_2 (struct re_pattern_buffer *__buffer, | |||
| 558 | Unless this function is called, the first search or match using | 186 | Unless this function is called, the first search or match using |
| 559 | PATTERN_BUFFER will allocate its own register data, without | 187 | PATTERN_BUFFER will allocate its own register data, without |
| 560 | freeing the old data. */ | 188 | freeing the old data. */ |
| 561 | extern void re_set_registers (struct re_pattern_buffer *__buffer, | 189 | extern void re_set_registers (struct re_pattern_buffer *buffer, |
| 562 | struct re_registers *__regs, | 190 | struct re_registers *regs, |
| 563 | unsigned __num_regs, | 191 | unsigned num_regs, |
| 564 | regoff_t *__starts, regoff_t *__ends); | 192 | ptrdiff_t *starts, ptrdiff_t *ends); |
| 565 | |||
| 566 | #if defined _REGEX_RE_COMP || defined _LIBC | ||
| 567 | # ifndef _CRAY | ||
| 568 | /* 4.2 bsd compatibility. */ | ||
| 569 | extern char *re_comp (const char *); | ||
| 570 | extern int re_exec (const char *); | ||
| 571 | # endif | ||
| 572 | #endif | ||
| 573 | |||
| 574 | /* GCC 2.95 and later have "__restrict"; C99 compilers have | ||
| 575 | "restrict", and "configure" may have defined "restrict". | ||
| 576 | Other compilers use __restrict, __restrict__, and _Restrict, and | ||
| 577 | 'configure' might #define 'restrict' to those words, so pick a | ||
| 578 | different name. */ | ||
| 579 | #ifndef _Restrict_ | ||
| 580 | # if 199901L <= __STDC_VERSION__ | ||
| 581 | # define _Restrict_ restrict | ||
| 582 | # elif 2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__) | ||
| 583 | # define _Restrict_ __restrict | ||
| 584 | # else | ||
| 585 | # define _Restrict_ | ||
| 586 | # endif | ||
| 587 | #endif | ||
| 588 | /* gcc 3.1 and up support the [restrict] syntax. Don't trust | ||
| 589 | sys/cdefs.h's definition of __restrict_arr, though, as it | ||
| 590 | mishandles gcc -ansi -pedantic. */ | ||
| 591 | #ifndef _Restrict_arr_ | ||
| 592 | # if ((199901L <= __STDC_VERSION__ \ | ||
| 593 | || ((3 < __GNUC__ || (3 == __GNUC__ && 1 <= __GNUC_MINOR__)) \ | ||
| 594 | && !defined __STRICT_ANSI__)) \ | ||
| 595 | && !defined __GNUG__) | ||
| 596 | # define _Restrict_arr_ _Restrict_ | ||
| 597 | # else | ||
| 598 | # define _Restrict_arr_ | ||
| 599 | # endif | ||
| 600 | #endif | ||
| 601 | |||
| 602 | /* POSIX compatibility. */ | ||
| 603 | extern reg_errcode_t regcomp (regex_t *_Restrict_ __preg, | ||
| 604 | const char *_Restrict_ __pattern, | ||
| 605 | int __cflags); | ||
| 606 | |||
| 607 | extern reg_errcode_t regexec (const regex_t *_Restrict_ __preg, | ||
| 608 | const char *_Restrict_ __string, size_t __nmatch, | ||
| 609 | regmatch_t __pmatch[_Restrict_arr_], | ||
| 610 | int __eflags); | ||
| 611 | |||
| 612 | extern size_t regerror (int __errcode, const regex_t * __preg, | ||
| 613 | char *__errbuf, size_t __errbuf_size); | ||
| 614 | |||
| 615 | extern void regfree (regex_t *__preg); | ||
| 616 | |||
| 617 | |||
| 618 | #ifdef __cplusplus | ||
| 619 | } | ||
| 620 | #endif /* C++ */ | ||
| 621 | |||
| 622 | /* For platform which support the ISO C amendment 1 functionality we | ||
| 623 | support user defined character classes. */ | ||
| 624 | #if WIDE_CHAR_SUPPORT | ||
| 625 | /* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */ | ||
| 626 | # include <wchar.h> | ||
| 627 | # include <wctype.h> | ||
| 628 | |||
| 629 | typedef wctype_t re_wctype_t; | ||
| 630 | typedef wchar_t re_wchar_t; | ||
| 631 | # define re_wctype wctype | ||
| 632 | # define re_iswctype iswctype | ||
| 633 | # define re_wctype_to_bit(cc) 0 | ||
| 634 | #else | ||
| 635 | # ifndef emacs | ||
| 636 | # define btowc(c) c | ||
| 637 | # endif | ||
| 638 | 193 | ||
| 639 | /* Character classes. */ | 194 | /* Character classes. */ |
| 640 | typedef enum { RECC_ERROR = 0, | 195 | typedef enum { RECC_ERROR = 0, |
| @@ -648,12 +203,8 @@ typedef enum { RECC_ERROR = 0, | |||
| 648 | RECC_ASCII, RECC_UNIBYTE | 203 | RECC_ASCII, RECC_UNIBYTE |
| 649 | } re_wctype_t; | 204 | } re_wctype_t; |
| 650 | 205 | ||
| 651 | extern char re_iswctype (int ch, re_wctype_t cc); | 206 | extern bool re_iswctype (int ch, re_wctype_t cc); |
| 652 | extern re_wctype_t re_wctype_parse (const unsigned char **strp, unsigned limit); | 207 | extern re_wctype_t re_wctype_parse (const unsigned char **strp, |
| 653 | 208 | unsigned limit); | |
| 654 | typedef int re_wchar_t; | ||
| 655 | |||
| 656 | #endif /* not WIDE_CHAR_SUPPORT */ | ||
| 657 | 209 | ||
| 658 | #endif /* regex-emacs.h */ | 210 | #endif /* regex-emacs.h */ |
| 659 | |||
diff --git a/src/search.c b/src/search.c index d4b03220412..f758bb9304a 100644 --- a/src/search.c +++ b/src/search.c | |||
| @@ -59,8 +59,8 @@ static struct regexp_cache searchbufs[REGEXP_CACHE_SIZE]; | |||
| 59 | static struct regexp_cache *searchbuf_head; | 59 | static struct regexp_cache *searchbuf_head; |
| 60 | 60 | ||
| 61 | 61 | ||
| 62 | /* Every call to re_match, etc., must pass &search_regs as the regs | 62 | /* Every call to re_search, etc., must pass &search_regs as the regs |
| 63 | argument unless you can show it is unnecessary (i.e., if re_match | 63 | argument unless you can show it is unnecessary (i.e., if re_search |
| 64 | is certainly going to be called again before region-around-match | 64 | is certainly going to be called again before region-around-match |
| 65 | can be called). | 65 | can be called). |
| 66 | 66 | ||
| @@ -2189,8 +2189,8 @@ set_search_regs (ptrdiff_t beg_byte, ptrdiff_t nbytes) | |||
| 2189 | the match position. */ | 2189 | the match position. */ |
| 2190 | if (search_regs.num_regs == 0) | 2190 | if (search_regs.num_regs == 0) |
| 2191 | { | 2191 | { |
| 2192 | search_regs.start = xmalloc (2 * sizeof (regoff_t)); | 2192 | search_regs.start = xmalloc (2 * sizeof *search_regs.start); |
| 2193 | search_regs.end = xmalloc (2 * sizeof (regoff_t)); | 2193 | search_regs.end = xmalloc (2 * sizeof *search_regs.end); |
| 2194 | search_regs.num_regs = 2; | 2194 | search_regs.num_regs = 2; |
| 2195 | } | 2195 | } |
| 2196 | 2196 | ||
| @@ -3001,9 +3001,9 @@ If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */) | |||
| 3001 | memory_full (SIZE_MAX); | 3001 | memory_full (SIZE_MAX); |
| 3002 | search_regs.start = | 3002 | search_regs.start = |
| 3003 | xpalloc (search_regs.start, &num_regs, length - num_regs, | 3003 | xpalloc (search_regs.start, &num_regs, length - num_regs, |
| 3004 | min (PTRDIFF_MAX, UINT_MAX), sizeof (regoff_t)); | 3004 | min (PTRDIFF_MAX, UINT_MAX), sizeof *search_regs.start); |
| 3005 | search_regs.end = | 3005 | search_regs.end = |
| 3006 | xrealloc (search_regs.end, num_regs * sizeof (regoff_t)); | 3006 | xrealloc (search_regs.end, num_regs * sizeof *search_regs.end); |
| 3007 | 3007 | ||
| 3008 | for (i = search_regs.num_regs; i < num_regs; i++) | 3008 | for (i = search_regs.num_regs; i < num_regs; i++) |
| 3009 | search_regs.start[i] = -1; | 3009 | search_regs.start[i] = -1; |
| @@ -3058,12 +3058,9 @@ If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */) | |||
| 3058 | XSETFASTINT (marker, 0); | 3058 | XSETFASTINT (marker, 0); |
| 3059 | 3059 | ||
| 3060 | CHECK_NUMBER_COERCE_MARKER (marker); | 3060 | CHECK_NUMBER_COERCE_MARKER (marker); |
| 3061 | if ((XINT (from) < 0 | 3061 | if (PTRDIFF_MIN <= XINT (from) && XINT (from) <= PTRDIFF_MAX |
| 3062 | ? TYPE_MINIMUM (regoff_t) <= XINT (from) | 3062 | && PTRDIFF_MIN <= XINT (marker) |
| 3063 | : XINT (from) <= TYPE_MAXIMUM (regoff_t)) | 3063 | && XINT (marker) <= PTRDIFF_MAX) |
| 3064 | && (XINT (marker) < 0 | ||
| 3065 | ? TYPE_MINIMUM (regoff_t) <= XINT (marker) | ||
| 3066 | : XINT (marker) <= TYPE_MAXIMUM (regoff_t))) | ||
| 3067 | { | 3064 | { |
| 3068 | search_regs.start[i] = XINT (from); | 3065 | search_regs.start[i] = XINT (from); |
| 3069 | search_regs.end[i] = XINT (marker); | 3066 | search_regs.end[i] = XINT (marker); |
diff --git a/src/thread.h b/src/thread.h index e1eb40921b4..8ecb00824df 100644 --- a/src/thread.h +++ b/src/thread.h | |||
| @@ -112,8 +112,8 @@ struct thread_state | |||
| 112 | struct buffer *m_current_buffer; | 112 | struct buffer *m_current_buffer; |
| 113 | #define current_buffer (current_thread->m_current_buffer) | 113 | #define current_buffer (current_thread->m_current_buffer) |
| 114 | 114 | ||
| 115 | /* Every call to re_match, etc., must pass &search_regs as the regs | 115 | /* Every call to re_match_2, etc., must pass &search_regs as the regs |
| 116 | argument unless you can show it is unnecessary (i.e., if re_match | 116 | argument unless you can show it is unnecessary (i.e., if re_match_2 |
| 117 | is certainly going to be called again before region-around-match | 117 | is certainly going to be called again before region-around-match |
| 118 | can be called). | 118 | can be called). |
| 119 | 119 | ||