aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorJim Blandy1993-05-22 07:25:53 +0000
committerJim Blandy1993-05-22 07:25:53 +0000
commite959badab49fc399436f1531bbe8e4b1dea7ca1f (patch)
treea5c75144513e0349218063a6d52be72f5af353ba /src
parent4a6ac398d4dbd01e357da0a8aae8204cb7568444 (diff)
downloademacs-e959badab49fc399436f1531bbe8e4b1dea7ca1f.tar.gz
emacs-e959badab49fc399436f1531bbe8e4b1dea7ca1f.zip
*** empty log message ***
Diffstat (limited to 'src')
-rw-r--r--src/regex.c756
1 files changed, 426 insertions, 330 deletions
diff --git a/src/regex.c b/src/regex.c
index e8b588207da..cbf203b03b1 100644
--- a/src/regex.c
+++ b/src/regex.c
@@ -235,6 +235,8 @@ char *alloca ();
235/* (Re)Allocate N items of type T using malloc, or fail. */ 235/* (Re)Allocate N items of type T using malloc, or fail. */
236#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t))) 236#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
237#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t))) 237#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
238#define RETALLOC_IF(addr, n, t) \
239 if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
238#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t))) 240#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
239 241
240#define BYTEWIDTH 8 /* In bits. */ 242#define BYTEWIDTH 8 /* In bits. */
@@ -871,6 +873,375 @@ static const char *re_error_msg[] =
871 "Unmatched ) or \\)", /* REG_ERPAREN */ 873 "Unmatched ) or \\)", /* REG_ERPAREN */
872 }; 874 };
873 875
876/* Avoiding alloca during matching, to placate r_alloc. */
877
878/* Define MATCH_SHOULD_NOT_ALLOCA if we need to make sure that the
879 searching and matching functions should not call alloca. On some
880 systems, alloca is implemented in terms of malloc, and if we're
881 using the relocating allocator routines, then malloc could cause a
882 relocation, which might (if the strings being searched are in the
883 ralloc heap) shift the data out from underneath the regexp
884 routines. */
885#if defined (REL_ALLOC)
886#if ! defined (HAVE_ALLOCA) || defined (C_ALLOCA)
887#define MATCH_SHOULD_NOT_ALLOCA
888#endif
889#endif
890
891
892/* Failure stack declarations and macros; both re_compile_fastmap and
893 re_match_2 use a failure stack. These have to be macros because of
894 REGEX_ALLOCATE. */
895
896
897/* Number of failure points for which to initially allocate space
898 when matching. If this number is exceeded, we allocate more
899 space, so it is not a hard limit. */
900#ifndef INIT_FAILURE_ALLOC
901#define INIT_FAILURE_ALLOC 5
902#endif
903
904/* Roughly the maximum number of failure points on the stack. Would be
905 exactly that if always used MAX_FAILURE_SPACE each time we failed.
906 This is a variable only so users of regex can assign to it; we never
907 change it ourselves. */
908int re_max_failures = 2000;
909
910typedef const unsigned char *fail_stack_elt_t;
911
912typedef struct
913{
914 fail_stack_elt_t *stack;
915 unsigned size;
916 unsigned avail; /* Offset of next open position. */
917} fail_stack_type;
918
919#define FAIL_STACK_EMPTY() (fail_stack.avail == 0)
920#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
921#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
922#define FAIL_STACK_TOP() (fail_stack.stack[fail_stack.avail])
923
924
925/* Initialize `fail_stack'. Do `return -2' if the alloc fails. */
926
927#ifdef MATCH_SHOULD_NOT_ALLOCA
928#define INIT_FAIL_STACK() \
929 do { \
930 fail_stack.stack = (fail_stack_elt_t *) \
931 REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
932 \
933 if (fail_stack.stack == NULL) \
934 return -2; \
935 \
936 fail_stack.size = INIT_FAILURE_ALLOC; \
937 fail_stack.avail = 0; \
938 } while (0)
939#else
940#define INIT_FAIL_STACK() \
941 do { \
942 fail_stack.avail = 0; \
943 } while (0)
944#endif
945
946
947/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
948
949 Return 1 if succeeds, and 0 if either ran out of memory
950 allocating space for it or it was already too large.
951
952 REGEX_REALLOCATE requires `destination' be declared. */
953
954#define DOUBLE_FAIL_STACK(fail_stack) \
955 ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS \
956 ? 0 \
957 : ((fail_stack).stack = (fail_stack_elt_t *) \
958 REGEX_REALLOCATE ((fail_stack).stack, \
959 (fail_stack).size * sizeof (fail_stack_elt_t), \
960 ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \
961 \
962 (fail_stack).stack == NULL \
963 ? 0 \
964 : ((fail_stack).size <<= 1, \
965 1)))
966
967
968/* Push PATTERN_OP on FAIL_STACK.
969
970 Return 1 if was able to do so and 0 if ran out of memory allocating
971 space to do so. */
972#define PUSH_PATTERN_OP(pattern_op, fail_stack) \
973 ((FAIL_STACK_FULL () \
974 && !DOUBLE_FAIL_STACK (fail_stack)) \
975 ? 0 \
976 : ((fail_stack).stack[(fail_stack).avail++] = pattern_op, \
977 1))
978
979/* This pushes an item onto the failure stack. Must be a four-byte
980 value. Assumes the variable `fail_stack'. Probably should only
981 be called from within `PUSH_FAILURE_POINT'. */
982#define PUSH_FAILURE_ITEM(item) \
983 fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item
984
985/* The complement operation. Assumes `fail_stack' is nonempty. */
986#define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail]
987
988/* Used to omit pushing failure point id's when we're not debugging. */
989#ifdef DEBUG
990#define DEBUG_PUSH PUSH_FAILURE_ITEM
991#define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_ITEM ()
992#else
993#define DEBUG_PUSH(item)
994#define DEBUG_POP(item_addr)
995#endif
996
997
998/* Push the information about the state we will need
999 if we ever fail back to it.
1000
1001 Requires variables fail_stack, regstart, regend, reg_info, and
1002 num_regs be declared. DOUBLE_FAIL_STACK requires `destination' be
1003 declared.
1004
1005 Does `return FAILURE_CODE' if runs out of memory. */
1006
1007#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \
1008 do { \
1009 char *destination; \
1010 /* Must be int, so when we don't save any registers, the arithmetic \
1011 of 0 + -1 isn't done as unsigned. */ \
1012 int this_reg; \
1013 \
1014 DEBUG_STATEMENT (failure_id++); \
1015 DEBUG_STATEMENT (nfailure_points_pushed++); \
1016 DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
1017 DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\
1018 DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
1019 \
1020 DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \
1021 DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \
1022 \
1023 /* Ensure we have enough space allocated for what we will push. */ \
1024 while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \
1025 { \
1026 if (!DOUBLE_FAIL_STACK (fail_stack)) \
1027 return failure_code; \
1028 \
1029 DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \
1030 (fail_stack).size); \
1031 DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\
1032 } \
1033 \
1034 /* Push the info, starting with the registers. */ \
1035 DEBUG_PRINT1 ("\n"); \
1036 \
1037 for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
1038 this_reg++) \
1039 { \
1040 DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \
1041 DEBUG_STATEMENT (num_regs_pushed++); \
1042 \
1043 DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
1044 PUSH_FAILURE_ITEM (regstart[this_reg]); \
1045 \
1046 DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
1047 PUSH_FAILURE_ITEM (regend[this_reg]); \
1048 \
1049 DEBUG_PRINT2 (" info: 0x%x\n ", reg_info[this_reg]); \
1050 DEBUG_PRINT2 (" match_null=%d", \
1051 REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \
1052 DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \
1053 DEBUG_PRINT2 (" matched_something=%d", \
1054 MATCHED_SOMETHING (reg_info[this_reg])); \
1055 DEBUG_PRINT2 (" ever_matched=%d", \
1056 EVER_MATCHED_SOMETHING (reg_info[this_reg])); \
1057 DEBUG_PRINT1 ("\n"); \
1058 PUSH_FAILURE_ITEM (reg_info[this_reg].word); \
1059 } \
1060 \
1061 DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg);\
1062 PUSH_FAILURE_ITEM (lowest_active_reg); \
1063 \
1064 DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg);\
1065 PUSH_FAILURE_ITEM (highest_active_reg); \
1066 \
1067 DEBUG_PRINT2 (" Pushing pattern 0x%x: ", pattern_place); \
1068 DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \
1069 PUSH_FAILURE_ITEM (pattern_place); \
1070 \
1071 DEBUG_PRINT2 (" Pushing string 0x%x: `", string_place); \
1072 DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \
1073 size2); \
1074 DEBUG_PRINT1 ("'\n"); \
1075 PUSH_FAILURE_ITEM (string_place); \
1076 \
1077 DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \
1078 DEBUG_PUSH (failure_id); \
1079 } while (0)
1080
1081/* This is the number of items that are pushed and popped on the stack
1082 for each register. */
1083#define NUM_REG_ITEMS 3
1084
1085/* Individual items aside from the registers. */
1086#ifdef DEBUG
1087#define NUM_NONREG_ITEMS 5 /* Includes failure point id. */
1088#else
1089#define NUM_NONREG_ITEMS 4
1090#endif
1091
1092/* We push at most this many items on the stack. */
1093#define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
1094
1095/* We actually push this many items. */
1096#define NUM_FAILURE_ITEMS \
1097 ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS \
1098 + NUM_NONREG_ITEMS)
1099
1100/* How many items can still be added to the stack without overflowing it. */
1101#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
1102
1103
1104/* Pops what PUSH_FAIL_STACK pushes.
1105
1106 We restore into the parameters, all of which should be lvalues:
1107 STR -- the saved data position.
1108 PAT -- the saved pattern position.
1109 LOW_REG, HIGH_REG -- the highest and lowest active registers.
1110 REGSTART, REGEND -- arrays of string positions.
1111 REG_INFO -- array of information about each subexpression.
1112
1113 Also assumes the variables `fail_stack' and (if debugging), `bufp',
1114 `pend', `string1', `size1', `string2', and `size2'. */
1115
1116#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
1117{ \
1118 DEBUG_STATEMENT (fail_stack_elt_t failure_id;) \
1119 int this_reg; \
1120 const unsigned char *string_temp; \
1121 \
1122 assert (!FAIL_STACK_EMPTY ()); \
1123 \
1124 /* Remove failure points and point to how many regs pushed. */ \
1125 DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \
1126 DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \
1127 DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \
1128 \
1129 assert (fail_stack.avail >= NUM_NONREG_ITEMS); \
1130 \
1131 DEBUG_POP (&failure_id); \
1132 DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \
1133 \
1134 /* If the saved string location is NULL, it came from an \
1135 on_failure_keep_string_jump opcode, and we want to throw away the \
1136 saved NULL, thus retaining our current position in the string. */ \
1137 string_temp = POP_FAILURE_ITEM (); \
1138 if (string_temp != NULL) \
1139 str = (const char *) string_temp; \
1140 \
1141 DEBUG_PRINT2 (" Popping string 0x%x: `", str); \
1142 DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
1143 DEBUG_PRINT1 ("'\n"); \
1144 \
1145 pat = (unsigned char *) POP_FAILURE_ITEM (); \
1146 DEBUG_PRINT2 (" Popping pattern 0x%x: ", pat); \
1147 DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
1148 \
1149 /* Restore register info. */ \
1150 high_reg = (unsigned) POP_FAILURE_ITEM (); \
1151 DEBUG_PRINT2 (" Popping high active reg: %d\n", high_reg); \
1152 \
1153 low_reg = (unsigned) POP_FAILURE_ITEM (); \
1154 DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \
1155 \
1156 for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \
1157 { \
1158 DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \
1159 \
1160 reg_info[this_reg].word = POP_FAILURE_ITEM (); \
1161 DEBUG_PRINT2 (" info: 0x%x\n", reg_info[this_reg]); \
1162 \
1163 regend[this_reg] = (const char *) POP_FAILURE_ITEM (); \
1164 DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
1165 \
1166 regstart[this_reg] = (const char *) POP_FAILURE_ITEM (); \
1167 DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
1168 } \
1169 \
1170 DEBUG_STATEMENT (nfailure_points_popped++); \
1171} /* POP_FAILURE_POINT */
1172
1173
1174
1175/* Structure for per-register (a.k.a. per-group) information.
1176 This must not be longer than one word, because we push this value
1177 onto the failure stack. Other register information, such as the
1178 starting and ending positions (which are addresses), and the list of
1179 inner groups (which is a bits list) are maintained in separate
1180 variables.
1181
1182 We are making a (strictly speaking) nonportable assumption here: that
1183 the compiler will pack our bit fields into something that fits into
1184 the type of `word', i.e., is something that fits into one item on the
1185 failure stack. */
1186typedef union
1187{
1188 fail_stack_elt_t word;
1189 struct
1190 {
1191 /* This field is one if this group can match the empty string,
1192 zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */
1193#define MATCH_NULL_UNSET_VALUE 3
1194 unsigned match_null_string_p : 2;
1195 unsigned is_active : 1;
1196 unsigned matched_something : 1;
1197 unsigned ever_matched_something : 1;
1198 } bits;
1199} register_info_type;
1200
1201#define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p)
1202#define IS_ACTIVE(R) ((R).bits.is_active)
1203#define MATCHED_SOMETHING(R) ((R).bits.matched_something)
1204#define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something)
1205
1206
1207/* Call this when have matched a real character; it sets `matched' flags
1208 for the subexpressions which we are currently inside. Also records
1209 that those subexprs have matched. */
1210#define SET_REGS_MATCHED() \
1211 do \
1212 { \
1213 unsigned r; \
1214 for (r = lowest_active_reg; r <= highest_active_reg; r++) \
1215 { \
1216 MATCHED_SOMETHING (reg_info[r]) \
1217 = EVER_MATCHED_SOMETHING (reg_info[r]) \
1218 = 1; \
1219 } \
1220 } \
1221 while (0)
1222
1223
1224/* Registers are set to a sentinel when they haven't yet matched. */
1225#define REG_UNSET_VALUE ((char *) -1)
1226#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
1227
1228
1229
1230/* How do we implement MATCH_SHOULD_NOT_ALLOCA?
1231 We make the fail stack a global thing, and then grow it to
1232 re_max_failures when we compile. */
1233#ifdef MATCH_SHOULD_NOT_ALLOCA
1234static fail_stack_type fail_stack;
1235
1236static const char ** regstart, ** regend;
1237static const char ** old_regstart, ** old_regend;
1238static const char **best_regstart, **best_regend;
1239static register_info_type *reg_info;
1240static const char **reg_dummy;
1241static register_info_type *reg_info_dummy;
1242#endif
1243
1244
874/* Subroutine declarations and macros for regex_compile. */ 1245/* Subroutine declarations and macros for regex_compile. */
875 1246
876static void store_op1 (), store_op2 (); 1247static void store_op1 (), store_op2 ();
@@ -2086,6 +2457,40 @@ regex_compile (pattern, size, syntax, bufp)
2086 } 2457 }
2087#endif /* DEBUG */ 2458#endif /* DEBUG */
2088 2459
2460#ifdef MATCH_SHOULD_NOT_ALLOCA
2461 /* Initialize the failure stack to the largest possible stack. This
2462 isn't necessary unless we're trying to avoid calling alloca in
2463 the search and match routines. */
2464 {
2465 int num_regs = bufp->re_nsub + 1;
2466
2467 /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
2468 is strictly greater than re_max_failures, the largest possible stack
2469 is 2 * re_max_failures failure points. */
2470 fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
2471 if (fail_stack.stack)
2472 fail_stack.stack =
2473 (fail_stack_elt_t *) realloc (fail_stack.stack,
2474 (fail_stack.size
2475 * sizeof (fail_stack_elt_t)));
2476 else
2477 fail_stack.stack =
2478 (fail_stack_elt_t *) malloc (fail_stack.size
2479 * sizeof (fail_stack_elt_t));
2480
2481 /* Initialize some other variables the matcher uses. */
2482 RETALLOC_IF (regstart, num_regs, const char *);
2483 RETALLOC_IF (regend, num_regs, const char *);
2484 RETALLOC_IF (old_regstart, num_regs, const char *);
2485 RETALLOC_IF (old_regend, num_regs, const char *);
2486 RETALLOC_IF (best_regstart, num_regs, const char *);
2487 RETALLOC_IF (best_regend, num_regs, const char *);
2488 RETALLOC_IF (reg_info, num_regs, register_info_type);
2489 RETALLOC_IF (reg_dummy, num_regs, const char *);
2490 RETALLOC_IF (reg_info_dummy, num_regs, register_info_type);
2491 }
2492#endif
2493
2089 return REG_NOERROR; 2494 return REG_NOERROR;
2090} /* regex_compile */ 2495} /* regex_compile */
2091 2496
@@ -2275,280 +2680,6 @@ compile_range (p_ptr, pend, translate, syntax, b)
2275 return REG_NOERROR; 2680 return REG_NOERROR;
2276} 2681}
2277 2682
2278/* Failure stack declarations and macros; both re_compile_fastmap and
2279 re_match_2 use a failure stack. These have to be macros because of
2280 REGEX_ALLOCATE. */
2281
2282
2283/* Number of failure points for which to initially allocate space
2284 when matching. If this number is exceeded, we allocate more
2285 space, so it is not a hard limit. */
2286#ifndef INIT_FAILURE_ALLOC
2287#define INIT_FAILURE_ALLOC 5
2288#endif
2289
2290/* Roughly the maximum number of failure points on the stack. Would be
2291 exactly that if always used MAX_FAILURE_SPACE each time we failed.
2292 This is a variable only so users of regex can assign to it; we never
2293 change it ourselves. */
2294int re_max_failures = 2000;
2295
2296typedef const unsigned char *fail_stack_elt_t;
2297
2298typedef struct
2299{
2300 fail_stack_elt_t *stack;
2301 unsigned size;
2302 unsigned avail; /* Offset of next open position. */
2303} fail_stack_type;
2304
2305#define FAIL_STACK_EMPTY() (fail_stack.avail == 0)
2306#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
2307#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
2308#define FAIL_STACK_TOP() (fail_stack.stack[fail_stack.avail])
2309
2310
2311/* Initialize `fail_stack'. Do `return -2' if the alloc fails. */
2312
2313#define INIT_FAIL_STACK() \
2314 do { \
2315 fail_stack.stack = (fail_stack_elt_t *) \
2316 REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
2317 \
2318 if (fail_stack.stack == NULL) \
2319 return -2; \
2320 \
2321 fail_stack.size = INIT_FAILURE_ALLOC; \
2322 fail_stack.avail = 0; \
2323 } while (0)
2324
2325
2326/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
2327
2328 Return 1 if succeeds, and 0 if either ran out of memory
2329 allocating space for it or it was already too large.
2330
2331 REGEX_REALLOCATE requires `destination' be declared. */
2332
2333#define DOUBLE_FAIL_STACK(fail_stack) \
2334 ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS \
2335 ? 0 \
2336 : ((fail_stack).stack = (fail_stack_elt_t *) \
2337 REGEX_REALLOCATE ((fail_stack).stack, \
2338 (fail_stack).size * sizeof (fail_stack_elt_t), \
2339 ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \
2340 \
2341 (fail_stack).stack == NULL \
2342 ? 0 \
2343 : ((fail_stack).size <<= 1, \
2344 1)))
2345
2346
2347/* Push PATTERN_OP on FAIL_STACK.
2348
2349 Return 1 if was able to do so and 0 if ran out of memory allocating
2350 space to do so. */
2351#define PUSH_PATTERN_OP(pattern_op, fail_stack) \
2352 ((FAIL_STACK_FULL () \
2353 && !DOUBLE_FAIL_STACK (fail_stack)) \
2354 ? 0 \
2355 : ((fail_stack).stack[(fail_stack).avail++] = pattern_op, \
2356 1))
2357
2358/* This pushes an item onto the failure stack. Must be a four-byte
2359 value. Assumes the variable `fail_stack'. Probably should only
2360 be called from within `PUSH_FAILURE_POINT'. */
2361#define PUSH_FAILURE_ITEM(item) \
2362 fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item
2363
2364/* The complement operation. Assumes `fail_stack' is nonempty. */
2365#define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail]
2366
2367/* Used to omit pushing failure point id's when we're not debugging. */
2368#ifdef DEBUG
2369#define DEBUG_PUSH PUSH_FAILURE_ITEM
2370#define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_ITEM ()
2371#else
2372#define DEBUG_PUSH(item)
2373#define DEBUG_POP(item_addr)
2374#endif
2375
2376
2377/* Push the information about the state we will need
2378 if we ever fail back to it.
2379
2380 Requires variables fail_stack, regstart, regend, reg_info, and
2381 num_regs be declared. DOUBLE_FAIL_STACK requires `destination' be
2382 declared.
2383
2384 Does `return FAILURE_CODE' if runs out of memory. */
2385
2386#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \
2387 do { \
2388 char *destination; \
2389 /* Must be int, so when we don't save any registers, the arithmetic \
2390 of 0 + -1 isn't done as unsigned. */ \
2391 int this_reg; \
2392 \
2393 DEBUG_STATEMENT (failure_id++); \
2394 DEBUG_STATEMENT (nfailure_points_pushed++); \
2395 DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
2396 DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\
2397 DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
2398 \
2399 DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \
2400 DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \
2401 \
2402 /* Ensure we have enough space allocated for what we will push. */ \
2403 while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \
2404 { \
2405 if (!DOUBLE_FAIL_STACK (fail_stack)) \
2406 return failure_code; \
2407 \
2408 DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \
2409 (fail_stack).size); \
2410 DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\
2411 } \
2412 \
2413 /* Push the info, starting with the registers. */ \
2414 DEBUG_PRINT1 ("\n"); \
2415 \
2416 for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
2417 this_reg++) \
2418 { \
2419 DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \
2420 DEBUG_STATEMENT (num_regs_pushed++); \
2421 \
2422 DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
2423 PUSH_FAILURE_ITEM (regstart[this_reg]); \
2424 \
2425 DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
2426 PUSH_FAILURE_ITEM (regend[this_reg]); \
2427 \
2428 DEBUG_PRINT2 (" info: 0x%x\n ", reg_info[this_reg]); \
2429 DEBUG_PRINT2 (" match_null=%d", \
2430 REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \
2431 DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \
2432 DEBUG_PRINT2 (" matched_something=%d", \
2433 MATCHED_SOMETHING (reg_info[this_reg])); \
2434 DEBUG_PRINT2 (" ever_matched=%d", \
2435 EVER_MATCHED_SOMETHING (reg_info[this_reg])); \
2436 DEBUG_PRINT1 ("\n"); \
2437 PUSH_FAILURE_ITEM (reg_info[this_reg].word); \
2438 } \
2439 \
2440 DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg);\
2441 PUSH_FAILURE_ITEM (lowest_active_reg); \
2442 \
2443 DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg);\
2444 PUSH_FAILURE_ITEM (highest_active_reg); \
2445 \
2446 DEBUG_PRINT2 (" Pushing pattern 0x%x: ", pattern_place); \
2447 DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \
2448 PUSH_FAILURE_ITEM (pattern_place); \
2449 \
2450 DEBUG_PRINT2 (" Pushing string 0x%x: `", string_place); \
2451 DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \
2452 size2); \
2453 DEBUG_PRINT1 ("'\n"); \
2454 PUSH_FAILURE_ITEM (string_place); \
2455 \
2456 DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \
2457 DEBUG_PUSH (failure_id); \
2458 } while (0)
2459
2460/* This is the number of items that are pushed and popped on the stack
2461 for each register. */
2462#define NUM_REG_ITEMS 3
2463
2464/* Individual items aside from the registers. */
2465#ifdef DEBUG
2466#define NUM_NONREG_ITEMS 5 /* Includes failure point id. */
2467#else
2468#define NUM_NONREG_ITEMS 4
2469#endif
2470
2471/* We push at most this many items on the stack. */
2472#define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
2473
2474/* We actually push this many items. */
2475#define NUM_FAILURE_ITEMS \
2476 ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS \
2477 + NUM_NONREG_ITEMS)
2478
2479/* How many items can still be added to the stack without overflowing it. */
2480#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
2481
2482
2483/* Pops what PUSH_FAIL_STACK pushes.
2484
2485 We restore into the parameters, all of which should be lvalues:
2486 STR -- the saved data position.
2487 PAT -- the saved pattern position.
2488 LOW_REG, HIGH_REG -- the highest and lowest active registers.
2489 REGSTART, REGEND -- arrays of string positions.
2490 REG_INFO -- array of information about each subexpression.
2491
2492 Also assumes the variables `fail_stack' and (if debugging), `bufp',
2493 `pend', `string1', `size1', `string2', and `size2'. */
2494
2495#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
2496{ \
2497 DEBUG_STATEMENT (fail_stack_elt_t failure_id;) \
2498 int this_reg; \
2499 const unsigned char *string_temp; \
2500 \
2501 assert (!FAIL_STACK_EMPTY ()); \
2502 \
2503 /* Remove failure points and point to how many regs pushed. */ \
2504 DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \
2505 DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \
2506 DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \
2507 \
2508 assert (fail_stack.avail >= NUM_NONREG_ITEMS); \
2509 \
2510 DEBUG_POP (&failure_id); \
2511 DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \
2512 \
2513 /* If the saved string location is NULL, it came from an \
2514 on_failure_keep_string_jump opcode, and we want to throw away the \
2515 saved NULL, thus retaining our current position in the string. */ \
2516 string_temp = POP_FAILURE_ITEM (); \
2517 if (string_temp != NULL) \
2518 str = (const char *) string_temp; \
2519 \
2520 DEBUG_PRINT2 (" Popping string 0x%x: `", str); \
2521 DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
2522 DEBUG_PRINT1 ("'\n"); \
2523 \
2524 pat = (unsigned char *) POP_FAILURE_ITEM (); \
2525 DEBUG_PRINT2 (" Popping pattern 0x%x: ", pat); \
2526 DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
2527 \
2528 /* Restore register info. */ \
2529 high_reg = (unsigned) POP_FAILURE_ITEM (); \
2530 DEBUG_PRINT2 (" Popping high active reg: %d\n", high_reg); \
2531 \
2532 low_reg = (unsigned) POP_FAILURE_ITEM (); \
2533 DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \
2534 \
2535 for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \
2536 { \
2537 DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \
2538 \
2539 reg_info[this_reg].word = POP_FAILURE_ITEM (); \
2540 DEBUG_PRINT2 (" info: 0x%x\n", reg_info[this_reg]); \
2541 \
2542 regend[this_reg] = (const char *) POP_FAILURE_ITEM (); \
2543 DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
2544 \
2545 regstart[this_reg] = (const char *) POP_FAILURE_ITEM (); \
2546 DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
2547 } \
2548 \
2549 DEBUG_STATEMENT (nfailure_points_popped++); \
2550} /* POP_FAILURE_POINT */
2551
2552/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in 2683/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
2553 BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible 2684 BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible
2554 characters can start a string that matches the pattern. This fastmap 2685 characters can start a string that matches the pattern. This fastmap
@@ -2567,7 +2698,9 @@ re_compile_fastmap (bufp)
2567 struct re_pattern_buffer *bufp; 2698 struct re_pattern_buffer *bufp;
2568{ 2699{
2569 int j, k; 2700 int j, k;
2701#ifndef MATCH_SHOULD_NOT_ALLOCA
2570 fail_stack_type fail_stack; 2702 fail_stack_type fail_stack;
2703#endif
2571#ifndef REGEX_MALLOC 2704#ifndef REGEX_MALLOC
2572 char *destination; 2705 char *destination;
2573#endif 2706#endif
@@ -3030,65 +3163,11 @@ static boolean alt_match_null_string_p (),
3030 common_op_match_null_string_p (), 3163 common_op_match_null_string_p (),
3031 group_match_null_string_p (); 3164 group_match_null_string_p ();
3032 3165
3033/* Structure for per-register (a.k.a. per-group) information.
3034 This must not be longer than one word, because we push this value
3035 onto the failure stack. Other register information, such as the
3036 starting and ending positions (which are addresses), and the list of
3037 inner groups (which is a bits list) are maintained in separate
3038 variables.
3039
3040 We are making a (strictly speaking) nonportable assumption here: that
3041 the compiler will pack our bit fields into something that fits into
3042 the type of `word', i.e., is something that fits into one item on the
3043 failure stack. */
3044typedef union
3045{
3046 fail_stack_elt_t word;
3047 struct
3048 {
3049 /* This field is one if this group can match the empty string,
3050 zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */
3051#define MATCH_NULL_UNSET_VALUE 3
3052 unsigned match_null_string_p : 2;
3053 unsigned is_active : 1;
3054 unsigned matched_something : 1;
3055 unsigned ever_matched_something : 1;
3056 } bits;
3057} register_info_type;
3058
3059#define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p)
3060#define IS_ACTIVE(R) ((R).bits.is_active)
3061#define MATCHED_SOMETHING(R) ((R).bits.matched_something)
3062#define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something)
3063
3064
3065/* Call this when have matched a real character; it sets `matched' flags
3066 for the subexpressions which we are currently inside. Also records
3067 that those subexprs have matched. */
3068#define SET_REGS_MATCHED() \
3069 do \
3070 { \
3071 unsigned r; \
3072 for (r = lowest_active_reg; r <= highest_active_reg; r++) \
3073 { \
3074 MATCHED_SOMETHING (reg_info[r]) \
3075 = EVER_MATCHED_SOMETHING (reg_info[r]) \
3076 = 1; \
3077 } \
3078 } \
3079 while (0)
3080
3081
3082/* This converts PTR, a pointer into one of the search strings `string1' 3166/* This converts PTR, a pointer into one of the search strings `string1'
3083 and `string2' into an offset from the beginning of that string. */ 3167 and `string2' into an offset from the beginning of that string. */
3084#define POINTER_TO_OFFSET(ptr) \ 3168#define POINTER_TO_OFFSET(ptr) \
3085 (FIRST_STRING_P (ptr) ? (ptr) - string1 : (ptr) - string2 + size1) 3169 (FIRST_STRING_P (ptr) ? (ptr) - string1 : (ptr) - string2 + size1)
3086 3170
3087/* Registers are set to a sentinel when they haven't yet matched. */
3088#define REG_UNSET_VALUE ((char *) -1)
3089#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
3090
3091
3092/* Macros for dealing with the split strings in re_match_2. */ 3171/* Macros for dealing with the split strings in re_match_2. */
3093 3172
3094#define MATCHING_IN_FIRST_STRING (dend == end_match_1) 3173#define MATCHING_IN_FIRST_STRING (dend == end_match_1)
@@ -3130,6 +3209,9 @@ typedef union
3130 3209
3131 3210
3132/* Free everything we malloc. */ 3211/* Free everything we malloc. */
3212#ifdef MATCH_SHOULD_NOT_ALLOCA
3213#define FREE_VARIABLES() /* Do nothing! */
3214#else
3133#ifdef REGEX_MALLOC 3215#ifdef REGEX_MALLOC
3134#define FREE_VAR(var) if (var) free (var); var = NULL 3216#define FREE_VAR(var) if (var) free (var); var = NULL
3135#define FREE_VARIABLES() \ 3217#define FREE_VARIABLES() \
@@ -3149,7 +3231,7 @@ typedef union
3149/* Some MIPS systems (at least) want this to free alloca'd storage. */ 3231/* Some MIPS systems (at least) want this to free alloca'd storage. */
3150#define FREE_VARIABLES() alloca (0) 3232#define FREE_VARIABLES() alloca (0)
3151#endif /* not REGEX_MALLOC */ 3233#endif /* not REGEX_MALLOC */
3152 3234#endif /* not MATCH_SHOULD_NOT_ALLOCA */
3153 3235
3154/* These values must meet several constraints. They must not be valid 3236/* These values must meet several constraints. They must not be valid
3155 register values; since we have a limit of 255 registers (because 3237 register values; since we have a limit of 255 registers (because
@@ -3230,7 +3312,9 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3230 scanning the strings. If the latter is zero, the failure point is 3312 scanning the strings. If the latter is zero, the failure point is
3231 a ``dummy''; if a failure happens and the failure point is a dummy, 3313 a ``dummy''; if a failure happens and the failure point is a dummy,
3232 it gets discarded and the next next one is tried. */ 3314 it gets discarded and the next next one is tried. */
3315#ifndef MATCH_SHOULD_NOT_ALLOCA /* otherwise, this is global. */
3233 fail_stack_type fail_stack; 3316 fail_stack_type fail_stack;
3317#endif
3234#ifdef DEBUG 3318#ifdef DEBUG
3235 static unsigned failure_id = 0; 3319 static unsigned failure_id = 0;
3236 unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0; 3320 unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
@@ -3252,14 +3336,18 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3252 matching and the regnum-th regend points to right after where we 3336 matching and the regnum-th regend points to right after where we
3253 stopped matching the regnum-th subexpression. (The zeroth register 3337 stopped matching the regnum-th subexpression. (The zeroth register
3254 keeps track of what the whole pattern matches.) */ 3338 keeps track of what the whole pattern matches.) */
3339#ifndef MATCH_SHOULD_NOT_ALLOCA /* otherwise, these are global. */
3255 const char **regstart, **regend; 3340 const char **regstart, **regend;
3341#endif
3256 3342
3257 /* If a group that's operated upon by a repetition operator fails to 3343 /* If a group that's operated upon by a repetition operator fails to
3258 match anything, then the register for its start will need to be 3344 match anything, then the register for its start will need to be
3259 restored because it will have been set to wherever in the string we 3345 restored because it will have been set to wherever in the string we
3260 are when we last see its open-group operator. Similarly for a 3346 are when we last see its open-group operator. Similarly for a
3261 register's end. */ 3347 register's end. */
3348#ifndef MATCH_SHOULD_NOT_ALLOCA /* otherwise, these are global. */
3262 const char **old_regstart, **old_regend; 3349 const char **old_regstart, **old_regend;
3350#endif
3263 3351
3264 /* The is_active field of reg_info helps us keep track of which (possibly 3352 /* The is_active field of reg_info helps us keep track of which (possibly
3265 nested) subexpressions we are currently in. The matched_something 3353 nested) subexpressions we are currently in. The matched_something
@@ -3267,14 +3355,18 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3267 matched any of the pattern so far this time through the reg_num-th 3355 matched any of the pattern so far this time through the reg_num-th
3268 subexpression. These two fields get reset each time through any 3356 subexpression. These two fields get reset each time through any
3269 loop their register is in. */ 3357 loop their register is in. */
3358#ifndef MATCH_SHOULD_NOT_ALLOCA /* otherwise, this is global. */
3270 register_info_type *reg_info; 3359 register_info_type *reg_info;
3360#endif
3271 3361
3272 /* The following record the register info as found in the above 3362 /* The following record the register info as found in the above
3273 variables when we find a match better than any we've seen before. 3363 variables when we find a match better than any we've seen before.
3274 This happens as we backtrack through the failure points, which in 3364 This happens as we backtrack through the failure points, which in
3275 turn happens only if we have not yet matched the entire string. */ 3365 turn happens only if we have not yet matched the entire string. */
3276 unsigned best_regs_set = false; 3366 unsigned best_regs_set = false;
3367#ifndef MATCH_SHOULD_NOT_ALLOCA /* otherwise, these are global. */
3277 const char **best_regstart, **best_regend; 3368 const char **best_regstart, **best_regend;
3369#endif
3278 3370
3279 /* Logically, this is `best_regend[0]'. But we don't want to have to 3371 /* Logically, this is `best_regend[0]'. But we don't want to have to
3280 allocate space for that if we're not allocating space for anything 3372 allocate space for that if we're not allocating space for anything
@@ -3287,8 +3379,10 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3287 const char *match_end = NULL; 3379 const char *match_end = NULL;
3288 3380
3289 /* Used when we pop values we don't care about. */ 3381 /* Used when we pop values we don't care about. */
3382#ifndef MATCH_SHOULD_NOT_ALLOCA /* otherwise, these are global. */
3290 const char **reg_dummy; 3383 const char **reg_dummy;
3291 register_info_type *reg_info_dummy; 3384 register_info_type *reg_info_dummy;
3385#endif
3292 3386
3293#ifdef DEBUG 3387#ifdef DEBUG
3294 /* Counts the total number of registers pushed. */ 3388 /* Counts the total number of registers pushed. */
@@ -3299,6 +3393,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3299 3393
3300 INIT_FAIL_STACK (); 3394 INIT_FAIL_STACK ();
3301 3395
3396#ifndef MATCH_SHOULD_NOT_ALLOCA
3302 /* Do not bother to initialize all the register variables if there are 3397 /* Do not bother to initialize all the register variables if there are
3303 no groups in the pattern, as it takes a fair amount of time. If 3398 no groups in the pattern, as it takes a fair amount of time. If
3304 there are groups, we include space for register 0 (the whole 3399 there are groups, we include space for register 0 (the whole
@@ -3323,7 +3418,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3323 return -2; 3418 return -2;
3324 } 3419 }
3325 } 3420 }
3326#ifdef REGEX_MALLOC 3421#if defined (REGEX_MALLOC)
3327 else 3422 else
3328 { 3423 {
3329 /* We must initialize all our variables to NULL, so that 3424 /* We must initialize all our variables to NULL, so that
@@ -3333,6 +3428,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3333 reg_info = reg_info_dummy = (register_info_type *) NULL; 3428 reg_info = reg_info_dummy = (register_info_type *) NULL;
3334 } 3429 }
3335#endif /* REGEX_MALLOC */ 3430#endif /* REGEX_MALLOC */
3431#endif /* MATCH_SHOULD_NOT_ALLOCA */
3336 3432
3337 /* The starting position is bogus. */ 3433 /* The starting position is bogus. */
3338 if (pos < 0 || pos > size1 + size2) 3434 if (pos < 0 || pos > size1 + size2)