aboutsummaryrefslogtreecommitdiffstats
path: root/src/regex.c
diff options
context:
space:
mode:
authorRichard M. Stallman2002-09-05 02:34:37 +0000
committerRichard M. Stallman2002-09-05 02:34:37 +0000
commit77d11aec5142e07dbebbee6c1a08ca3c826e6b8f (patch)
treeb4e68b42eddd4ba018a7d0a5b7303860ff7ff1d8 /src/regex.c
parentdfc538bb10d0f690597cd082b66347bfe9fb9949 (diff)
downloademacs-77d11aec5142e07dbebbee6c1a08ca3c826e6b8f.tar.gz
emacs-77d11aec5142e07dbebbee6c1a08ca3c826e6b8f.zip
(set_image_of_range_1): New function.
(set_image_of_range): Use set_image_of_range_1 for Latin-1. Return a value to indicate running out of memory. (SET_RANGE_TABLE_WORK_AREA): Check value from set_image_of_range. (extend_range_table_work_area): New subroutine. (EXTEND_RANGE_TABLE): Replaces EXTEND_RANGE_TABLE_WORK_AREA. Different calling conventions, and used from set_image_of_range{,_1}. (IMMEDIATE_QUIT_CHECK): Definitions moved.
Diffstat (limited to 'src/regex.c')
-rw-r--r--src/regex.c269
1 files changed, 233 insertions, 36 deletions
diff --git a/src/regex.c b/src/regex.c
index e01259cc85a..3b04fe30d23 100644
--- a/src/regex.c
+++ b/src/regex.c
@@ -1869,7 +1869,17 @@ typedef struct
1869/* The next available element. */ 1869/* The next available element. */
1870#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) 1870#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1871 1871
1872 1872/* Explicit quit checking is only used on NTemacs. */
1873#if defined WINDOWSNT && defined emacs && defined QUIT
1874extern int immediate_quit;
1875# define IMMEDIATE_QUIT_CHECK \
1876 do { \
1877 if (immediate_quit) QUIT; \
1878 } while (0)
1879#else
1880# define IMMEDIATE_QUIT_CHECK ((void)0)
1881#endif
1882
1873/* Structure to manage work area for range table. */ 1883/* Structure to manage work area for range table. */
1874struct range_table_work_area 1884struct range_table_work_area
1875{ 1885{
@@ -1879,21 +1889,19 @@ struct range_table_work_area
1879 int bits; /* flag to record character classes */ 1889 int bits; /* flag to record character classes */
1880}; 1890};
1881 1891
1882/* Make sure that WORK_AREA can hold more N multibyte characters. */ 1892/* Make sure that WORK_AREA can hold more N multibyte characters.
1883#define EXTEND_RANGE_TABLE_WORK_AREA(work_area, n) \ 1893 This is used only in set_image_of_range and set_image_of_range_1.
1884 do { \ 1894 It expects WORK_AREA to be a pointer.
1885 if (((work_area).used + (n)) * sizeof (int) > (work_area).allocated) \ 1895 If it can't get the space, it returns from the surrounding function. */
1886 { \ 1896
1887 (work_area).allocated += 16 * sizeof (int); \ 1897#define EXTEND_RANGE_TABLE(work_area, n) \
1888 if ((work_area).table) \ 1898 do { \
1889 (work_area).table \ 1899 if (((work_area)->used + (n)) * sizeof (int) > (work_area)->allocated) \
1890 = (int *) realloc ((work_area).table, (work_area).allocated); \ 1900 { \
1891 else \ 1901 extend_range_table_work_area (work_area); \
1892 (work_area).table \ 1902 if ((work_area)->table == 0) \
1893 = (int *) malloc ((work_area).allocated); \ 1903 return (REG_ESPACE); \
1894 if ((work_area).table == 0) \ 1904 } \
1895 FREE_STACK_RETURN (REG_ESPACE); \
1896 } \
1897 } while (0) 1905 } while (0)
1898 1906
1899#define SET_RANGE_TABLE_WORK_AREA_BIT(work_area, bit) \ 1907#define SET_RANGE_TABLE_WORK_AREA_BIT(work_area, bit) \
@@ -1911,10 +1919,12 @@ struct range_table_work_area
1911/* Set a range START..END to WORK_AREA. 1919/* Set a range START..END to WORK_AREA.
1912 The range is passed through TRANSLATE, so START and END 1920 The range is passed through TRANSLATE, so START and END
1913 should be untranslated. */ 1921 should be untranslated. */
1914#define SET_RANGE_TABLE_WORK_AREA(work_area, start, end) \ 1922#define SET_RANGE_TABLE_WORK_AREA(work_area, start, end) \
1915 do { \ 1923 do { \
1916 EXTEND_RANGE_TABLE_WORK_AREA ((work_area), 2); \ 1924 int tem; \
1917 set_image_of_range (&work_area, start, end, translate); \ 1925 tem = set_image_of_range (&work_area, start, end, translate); \
1926 if (tem > 0) \
1927 FREE_STACK_RETURN (tem); \
1918 } while (0) 1928 } while (0)
1919 1929
1920/* Free allocated memory for WORK_AREA. */ 1930/* Free allocated memory for WORK_AREA. */
@@ -1928,7 +1938,7 @@ struct range_table_work_area
1928#define RANGE_TABLE_WORK_USED(work_area) ((work_area).used) 1938#define RANGE_TABLE_WORK_USED(work_area) ((work_area).used)
1929#define RANGE_TABLE_WORK_BITS(work_area) ((work_area).bits) 1939#define RANGE_TABLE_WORK_BITS(work_area) ((work_area).bits)
1930#define RANGE_TABLE_WORK_ELT(work_area, i) ((work_area).table[i]) 1940#define RANGE_TABLE_WORK_ELT(work_area, i) ((work_area).table[i])
1931 1941
1932 1942
1933/* Set the bit for character C in a list. */ 1943/* Set the bit for character C in a list. */
1934#define SET_LIST_BIT(c) (b[((c)) / BYTEWIDTH] |= 1 << ((c) % BYTEWIDTH)) 1944#define SET_LIST_BIT(c) (b[((c)) / BYTEWIDTH] |= 1 << ((c) % BYTEWIDTH))
@@ -1958,7 +1968,7 @@ struct range_table_work_area
1958 FREE_STACK_RETURN (REG_BADBR); \ 1968 FREE_STACK_RETURN (REG_BADBR); \
1959 } \ 1969 } \
1960 } while (0) 1970 } while (0)
1961 1971
1962#if WIDE_CHAR_SUPPORT 1972#if WIDE_CHAR_SUPPORT
1963/* The GNU C library provides support for user-defined character classes 1973/* The GNU C library provides support for user-defined character classes
1964 and the functions from ISO C amendement 1. */ 1974 and the functions from ISO C amendement 1. */
@@ -2071,21 +2081,215 @@ re_wctype_to_bit (cc)
2071 } 2081 }
2072} 2082}
2073#endif 2083#endif
2084
2085/* Filling in the work area of a range. */
2074 2086
2087/* Actually extend the space in WORK_AREA. */
2075 2088
2089static void
2090extend_range_table_work_area (work_area)
2091 struct range_table_work_area *work_area;
2092{
2093 work_area->allocated += 16 * sizeof (int);
2094 if (work_area->table)
2095 work_area->table
2096 = (int *) realloc (work_area->table, work_area->allocated);
2097 else
2098 work_area->table
2099 = (int *) malloc (work_area->allocated);
2100}
2101
2102#ifdef emacs
2103
2104/* Carefully find the ranges of codes that are equivalent
2105 under case conversion to the range start..end when passed through
2106 TRANSLATE. Handle the case where non-letters can come in between
2107 two upper-case letters (which happens in Latin-1).
2108 Also handle the case of groups of more than 2 case-equivalent chars.
2109
2110 The basic method is to look at consecutive characters and see
2111 if they can form a run that can be handled as one.
2112
2113 Returns -1 if successful, REG_ESPACE if ran out of space. */
2114
2115static int
2116set_image_of_range_1 (work_area, start, end, translate)
2117 RE_TRANSLATE_TYPE translate;
2118 struct range_table_work_area *work_area;
2119 re_wchar_t start, end;
2120{
2121 /* `one_case' indicates a character, or a run of characters,
2122 each of which is an isolate (no case-equivalents).
2123 This includes all ASCII non-letters.
2124
2125 `two_case' indicates a character, or a run of characters,
2126 each of which has two case-equivalent forms.
2127 This includes all ASCII letters.
2128
2129 `strange' indicates a character that has more than one
2130 case-equivalent. */
2131
2132 enum case_type {one_case, two_case, strange};
2133
2134 /* Describe the run that is in progress,
2135 which the next character can try to extend.
2136 If run_type is strange, that means there really is no run.
2137 If run_type is one_case, then run_start...run_end is the run.
2138 If run_type is two_case, then the run is run_start...run_end,
2139 and the case-equivalents end at run_eqv_end. */
2140
2141 enum case_type run_type = strange;
2142 int run_start, run_end, run_eqv_end;
2143
2144 Lisp_Object eqv_table;
2145
2146 if (!RE_TRANSLATE_P (translate))
2147 {
2148 work_area->table[work_area->used++] = (start);
2149 work_area->table[work_area->used++] = (end);
2150 return;
2151 }
2152
2153 eqv_table = XCHAR_TABLE (translate)->extras[2];
2154
2155 for (; start <= end; start++)
2156 {
2157 enum case_type this_type;
2158 int eqv = RE_TRANSLATE (eqv_table, start);
2159 int minchar, maxchar;
2160
2161 /* Classify this character */
2162 if (eqv == start)
2163 this_type = one_case;
2164 else if (RE_TRANSLATE (eqv_table, eqv) == start)
2165 this_type = two_case;
2166 else
2167 this_type = strange;
2168
2169 if (start < eqv)
2170 minchar = start, maxchar = eqv;
2171 else
2172 minchar = eqv, maxchar = start;
2173
2174 /* Can this character extend the run in progress? */
2175 if (this_type == strange || this_type != run_type
2176 || !(minchar == run_end + 1
2177 && (run_type == two_case
2178 ? maxchar == run_eqv_end + 1 : 1)))
2179 {
2180 /* No, end the run.
2181 Record each of its equivalent ranges. */
2182 if (run_type == one_case)
2183 {
2184 EXTEND_RANGE_TABLE (work_area, 2);
2185 work_area->table[work_area->used++] = run_start;
2186 work_area->table[work_area->used++] = run_end;
2187 }
2188 else if (run_type == two_case)
2189 {
2190 EXTEND_RANGE_TABLE (work_area, 4);
2191 work_area->table[work_area->used++] = run_start;
2192 work_area->table[work_area->used++] = run_end;
2193 work_area->table[work_area->used++]
2194 = RE_TRANSLATE (eqv_table, run_start);
2195 work_area->table[work_area->used++]
2196 = RE_TRANSLATE (eqv_table, run_end);
2197 }
2198 run_type = strange;
2199 }
2200
2201 if (this_type == strange)
2202 {
2203 /* For a strange character, add each of its equivalents, one
2204 by one. Don't start a range. */
2205 do
2206 {
2207 EXTEND_RANGE_TABLE (work_area, 2);
2208 work_area->table[work_area->used++] = eqv;
2209 work_area->table[work_area->used++] = eqv;
2210 eqv = RE_TRANSLATE (eqv_table, eqv);
2211 }
2212 while (eqv != start);
2213 }
2214
2215 /* Add this char to the run, or start a new run. */
2216 else if (run_type == strange)
2217 {
2218 /* Initialize a new range. */
2219 run_type = this_type;
2220 run_start = start;
2221 run_end = start;
2222 run_eqv_end = RE_TRANSLATE (eqv_table, run_end);
2223 }
2224 else
2225 {
2226 /* Extend a running range. */
2227 run_end = minchar;
2228 run_eqv_end = RE_TRANSLATE (eqv_table, run_end);
2229 }
2230 }
2231
2232 /* If a run is still in progress at the end, finish it now
2233 by recording its equivalent ranges. */
2234 if (run_type == one_case)
2235 {
2236 EXTEND_RANGE_TABLE (work_area, 2);
2237 work_area->table[work_area->used++] = run_start;
2238 work_area->table[work_area->used++] = run_end;
2239 }
2240 else if (run_type == two_case)
2241 {
2242 EXTEND_RANGE_TABLE (work_area, 4);
2243 work_area->table[work_area->used++] = run_start;
2244 work_area->table[work_area->used++] = run_end;
2245 work_area->table[work_area->used++]
2246 = RE_TRANSLATE (eqv_table, run_start);
2247 work_area->table[work_area->used++]
2248 = RE_TRANSLATE (eqv_table, run_end);
2249 }
2250
2251 return -1;
2252}
2253
2254#endif /* emacs */
2076 2255
2077/* We need to find the image of the range start..end when passed through 2256/* We need to find the image of the range start..end when passed through
2078 TRANSLATE. This is not necessarily TRANSLATE(start)..TRANSLATE(end) 2257 TRANSLATE. This is not necessarily TRANSLATE(start)..TRANSLATE(end)
2079 and is not even necessarily contiguous. 2258 and is not even necessarily contiguous.
2080 We approximate it with the smallest contiguous range that contains 2259 We approximate it with the smallest contiguous range that contains
2081 all the chars we need. */ 2260 all the chars we need. However, that is not good enough for Latin-1,
2082static void 2261 so we do a better job in that case.
2262
2263 Returns -1 if successful, REG_ESPACE if ran out of space. */
2264
2265static int
2083set_image_of_range (work_area, start, end, translate) 2266set_image_of_range (work_area, start, end, translate)
2084 RE_TRANSLATE_TYPE translate; 2267 RE_TRANSLATE_TYPE translate;
2085 struct range_table_work_area *work_area; 2268 struct range_table_work_area *work_area;
2086 re_wchar_t start, end; 2269 re_wchar_t start, end;
2087{ 2270{
2088 re_wchar_t cmin = TRANSLATE (start), cmax = TRANSLATE (end); 2271 re_wchar_t cmin, cmax;
2272
2273#ifdef emacs
2274 /* For Latin-1 ranges, use set_image_of_range_1
2275 to get proper handling of ranges that include letters and nonletters.
2276 For ASCII, this is not necessary.
2277 For other character sets, we don't bother to get this right. */
2278 if (start < 04400 && end > 0200)
2279 {
2280 int tem;
2281 tem = set_image_of_range_1 (work_area, start, end, translate);
2282 if (tem > 0)
2283 return tem;
2284
2285 start = 04400;
2286 if (end < 04400)
2287 return -1;
2288 }
2289#endif
2290
2291 cmin = TRANSLATE (start), cmax = TRANSLATE (end);
2292
2089 if (RE_TRANSLATE_P (translate)) 2293 if (RE_TRANSLATE_P (translate))
2090 for (; start <= end; start++) 2294 for (; start <= end; start++)
2091 { 2295 {
@@ -2093,20 +2297,13 @@ set_image_of_range (work_area, start, end, translate)
2093 cmin = MIN (cmin, c); 2297 cmin = MIN (cmin, c);
2094 cmax = MAX (cmax, c); 2298 cmax = MAX (cmax, c);
2095 } 2299 }
2300
2301 EXTEND_RANGE_TABLE (work_area, 2);
2096 work_area->table[work_area->used++] = (cmin); 2302 work_area->table[work_area->used++] = (cmin);
2097 work_area->table[work_area->used++] = (cmax); 2303 work_area->table[work_area->used++] = (cmax);
2098}
2099 2304
2100/* Explicit quit checking is only used on NTemacs. */ 2305 return -1;
2101#if defined WINDOWSNT && defined emacs && defined QUIT 2306}
2102extern int immediate_quit;
2103# define IMMEDIATE_QUIT_CHECK \
2104 do { \
2105 if (immediate_quit) QUIT; \
2106 } while (0)
2107#else
2108# define IMMEDIATE_QUIT_CHECK ((void)0)
2109#endif
2110 2307
2111#ifndef MATCH_MAY_ALLOCATE 2308#ifndef MATCH_MAY_ALLOCATE
2112 2309