diff options
| author | Richard M. Stallman | 2002-09-05 02:34:37 +0000 |
|---|---|---|
| committer | Richard M. Stallman | 2002-09-05 02:34:37 +0000 |
| commit | 77d11aec5142e07dbebbee6c1a08ca3c826e6b8f (patch) | |
| tree | b4e68b42eddd4ba018a7d0a5b7303860ff7ff1d8 /src/regex.c | |
| parent | dfc538bb10d0f690597cd082b66347bfe9fb9949 (diff) | |
| download | emacs-77d11aec5142e07dbebbee6c1a08ca3c826e6b8f.tar.gz emacs-77d11aec5142e07dbebbee6c1a08ca3c826e6b8f.zip | |
(set_image_of_range_1): New function.
(set_image_of_range): Use set_image_of_range_1 for Latin-1.
Return a value to indicate running out of memory.
(SET_RANGE_TABLE_WORK_AREA): Check value from set_image_of_range.
(extend_range_table_work_area): New subroutine.
(EXTEND_RANGE_TABLE): Replaces EXTEND_RANGE_TABLE_WORK_AREA.
Different calling conventions, and used from set_image_of_range{,_1}.
(IMMEDIATE_QUIT_CHECK): Definitions moved.
Diffstat (limited to 'src/regex.c')
| -rw-r--r-- | src/regex.c | 269 |
1 files changed, 233 insertions, 36 deletions
diff --git a/src/regex.c b/src/regex.c index e01259cc85a..3b04fe30d23 100644 --- a/src/regex.c +++ b/src/regex.c | |||
| @@ -1869,7 +1869,17 @@ typedef struct | |||
| 1869 | /* The next available element. */ | 1869 | /* The next available element. */ |
| 1870 | #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) | 1870 | #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) |
| 1871 | 1871 | ||
| 1872 | 1872 | /* Explicit quit checking is only used on NTemacs. */ | |
| 1873 | #if defined WINDOWSNT && defined emacs && defined QUIT | ||
| 1874 | extern int immediate_quit; | ||
| 1875 | # define IMMEDIATE_QUIT_CHECK \ | ||
| 1876 | do { \ | ||
| 1877 | if (immediate_quit) QUIT; \ | ||
| 1878 | } while (0) | ||
| 1879 | #else | ||
| 1880 | # define IMMEDIATE_QUIT_CHECK ((void)0) | ||
| 1881 | #endif | ||
| 1882 | |||
| 1873 | /* Structure to manage work area for range table. */ | 1883 | /* Structure to manage work area for range table. */ |
| 1874 | struct range_table_work_area | 1884 | struct range_table_work_area |
| 1875 | { | 1885 | { |
| @@ -1879,21 +1889,19 @@ struct range_table_work_area | |||
| 1879 | int bits; /* flag to record character classes */ | 1889 | int bits; /* flag to record character classes */ |
| 1880 | }; | 1890 | }; |
| 1881 | 1891 | ||
| 1882 | /* Make sure that WORK_AREA can hold more N multibyte characters. */ | 1892 | /* Make sure that WORK_AREA can hold more N multibyte characters. |
| 1883 | #define EXTEND_RANGE_TABLE_WORK_AREA(work_area, n) \ | 1893 | This is used only in set_image_of_range and set_image_of_range_1. |
| 1884 | do { \ | 1894 | It expects WORK_AREA to be a pointer. |
| 1885 | if (((work_area).used + (n)) * sizeof (int) > (work_area).allocated) \ | 1895 | If it can't get the space, it returns from the surrounding function. */ |
| 1886 | { \ | 1896 | |
| 1887 | (work_area).allocated += 16 * sizeof (int); \ | 1897 | #define EXTEND_RANGE_TABLE(work_area, n) \ |
| 1888 | if ((work_area).table) \ | 1898 | do { \ |
| 1889 | (work_area).table \ | 1899 | if (((work_area)->used + (n)) * sizeof (int) > (work_area)->allocated) \ |
| 1890 | = (int *) realloc ((work_area).table, (work_area).allocated); \ | 1900 | { \ |
| 1891 | else \ | 1901 | extend_range_table_work_area (work_area); \ |
| 1892 | (work_area).table \ | 1902 | if ((work_area)->table == 0) \ |
| 1893 | = (int *) malloc ((work_area).allocated); \ | 1903 | return (REG_ESPACE); \ |
| 1894 | if ((work_area).table == 0) \ | 1904 | } \ |
| 1895 | FREE_STACK_RETURN (REG_ESPACE); \ | ||
| 1896 | } \ | ||
| 1897 | } while (0) | 1905 | } while (0) |
| 1898 | 1906 | ||
| 1899 | #define SET_RANGE_TABLE_WORK_AREA_BIT(work_area, bit) \ | 1907 | #define SET_RANGE_TABLE_WORK_AREA_BIT(work_area, bit) \ |
| @@ -1911,10 +1919,12 @@ struct range_table_work_area | |||
| 1911 | /* Set a range START..END to WORK_AREA. | 1919 | /* Set a range START..END to WORK_AREA. |
| 1912 | The range is passed through TRANSLATE, so START and END | 1920 | The range is passed through TRANSLATE, so START and END |
| 1913 | should be untranslated. */ | 1921 | should be untranslated. */ |
| 1914 | #define SET_RANGE_TABLE_WORK_AREA(work_area, start, end) \ | 1922 | #define SET_RANGE_TABLE_WORK_AREA(work_area, start, end) \ |
| 1915 | do { \ | 1923 | do { \ |
| 1916 | EXTEND_RANGE_TABLE_WORK_AREA ((work_area), 2); \ | 1924 | int tem; \ |
| 1917 | set_image_of_range (&work_area, start, end, translate); \ | 1925 | tem = set_image_of_range (&work_area, start, end, translate); \ |
| 1926 | if (tem > 0) \ | ||
| 1927 | FREE_STACK_RETURN (tem); \ | ||
| 1918 | } while (0) | 1928 | } while (0) |
| 1919 | 1929 | ||
| 1920 | /* Free allocated memory for WORK_AREA. */ | 1930 | /* Free allocated memory for WORK_AREA. */ |
| @@ -1928,7 +1938,7 @@ struct range_table_work_area | |||
| 1928 | #define RANGE_TABLE_WORK_USED(work_area) ((work_area).used) | 1938 | #define RANGE_TABLE_WORK_USED(work_area) ((work_area).used) |
| 1929 | #define RANGE_TABLE_WORK_BITS(work_area) ((work_area).bits) | 1939 | #define RANGE_TABLE_WORK_BITS(work_area) ((work_area).bits) |
| 1930 | #define RANGE_TABLE_WORK_ELT(work_area, i) ((work_area).table[i]) | 1940 | #define RANGE_TABLE_WORK_ELT(work_area, i) ((work_area).table[i]) |
| 1931 | 1941 | ||
| 1932 | 1942 | ||
| 1933 | /* Set the bit for character C in a list. */ | 1943 | /* Set the bit for character C in a list. */ |
| 1934 | #define SET_LIST_BIT(c) (b[((c)) / BYTEWIDTH] |= 1 << ((c) % BYTEWIDTH)) | 1944 | #define SET_LIST_BIT(c) (b[((c)) / BYTEWIDTH] |= 1 << ((c) % BYTEWIDTH)) |
| @@ -1958,7 +1968,7 @@ struct range_table_work_area | |||
| 1958 | FREE_STACK_RETURN (REG_BADBR); \ | 1968 | FREE_STACK_RETURN (REG_BADBR); \ |
| 1959 | } \ | 1969 | } \ |
| 1960 | } while (0) | 1970 | } while (0) |
| 1961 | 1971 | ||
| 1962 | #if WIDE_CHAR_SUPPORT | 1972 | #if WIDE_CHAR_SUPPORT |
| 1963 | /* The GNU C library provides support for user-defined character classes | 1973 | /* The GNU C library provides support for user-defined character classes |
| 1964 | and the functions from ISO C amendement 1. */ | 1974 | and the functions from ISO C amendement 1. */ |
| @@ -2071,21 +2081,215 @@ re_wctype_to_bit (cc) | |||
| 2071 | } | 2081 | } |
| 2072 | } | 2082 | } |
| 2073 | #endif | 2083 | #endif |
| 2084 | |||
| 2085 | /* Filling in the work area of a range. */ | ||
| 2074 | 2086 | ||
| 2087 | /* Actually extend the space in WORK_AREA. */ | ||
| 2075 | 2088 | ||
| 2089 | static void | ||
| 2090 | extend_range_table_work_area (work_area) | ||
| 2091 | struct range_table_work_area *work_area; | ||
| 2092 | { | ||
| 2093 | work_area->allocated += 16 * sizeof (int); | ||
| 2094 | if (work_area->table) | ||
| 2095 | work_area->table | ||
| 2096 | = (int *) realloc (work_area->table, work_area->allocated); | ||
| 2097 | else | ||
| 2098 | work_area->table | ||
| 2099 | = (int *) malloc (work_area->allocated); | ||
| 2100 | } | ||
| 2101 | |||
| 2102 | #ifdef emacs | ||
| 2103 | |||
| 2104 | /* Carefully find the ranges of codes that are equivalent | ||
| 2105 | under case conversion to the range start..end when passed through | ||
| 2106 | TRANSLATE. Handle the case where non-letters can come in between | ||
| 2107 | two upper-case letters (which happens in Latin-1). | ||
| 2108 | Also handle the case of groups of more than 2 case-equivalent chars. | ||
| 2109 | |||
| 2110 | The basic method is to look at consecutive characters and see | ||
| 2111 | if they can form a run that can be handled as one. | ||
| 2112 | |||
| 2113 | Returns -1 if successful, REG_ESPACE if ran out of space. */ | ||
| 2114 | |||
| 2115 | static int | ||
| 2116 | set_image_of_range_1 (work_area, start, end, translate) | ||
| 2117 | RE_TRANSLATE_TYPE translate; | ||
| 2118 | struct range_table_work_area *work_area; | ||
| 2119 | re_wchar_t start, end; | ||
| 2120 | { | ||
| 2121 | /* `one_case' indicates a character, or a run of characters, | ||
| 2122 | each of which is an isolate (no case-equivalents). | ||
| 2123 | This includes all ASCII non-letters. | ||
| 2124 | |||
| 2125 | `two_case' indicates a character, or a run of characters, | ||
| 2126 | each of which has two case-equivalent forms. | ||
| 2127 | This includes all ASCII letters. | ||
| 2128 | |||
| 2129 | `strange' indicates a character that has more than one | ||
| 2130 | case-equivalent. */ | ||
| 2131 | |||
| 2132 | enum case_type {one_case, two_case, strange}; | ||
| 2133 | |||
| 2134 | /* Describe the run that is in progress, | ||
| 2135 | which the next character can try to extend. | ||
| 2136 | If run_type is strange, that means there really is no run. | ||
| 2137 | If run_type is one_case, then run_start...run_end is the run. | ||
| 2138 | If run_type is two_case, then the run is run_start...run_end, | ||
| 2139 | and the case-equivalents end at run_eqv_end. */ | ||
| 2140 | |||
| 2141 | enum case_type run_type = strange; | ||
| 2142 | int run_start, run_end, run_eqv_end; | ||
| 2143 | |||
| 2144 | Lisp_Object eqv_table; | ||
| 2145 | |||
| 2146 | if (!RE_TRANSLATE_P (translate)) | ||
| 2147 | { | ||
| 2148 | work_area->table[work_area->used++] = (start); | ||
| 2149 | work_area->table[work_area->used++] = (end); | ||
| 2150 | return; | ||
| 2151 | } | ||
| 2152 | |||
| 2153 | eqv_table = XCHAR_TABLE (translate)->extras[2]; | ||
| 2154 | |||
| 2155 | for (; start <= end; start++) | ||
| 2156 | { | ||
| 2157 | enum case_type this_type; | ||
| 2158 | int eqv = RE_TRANSLATE (eqv_table, start); | ||
| 2159 | int minchar, maxchar; | ||
| 2160 | |||
| 2161 | /* Classify this character */ | ||
| 2162 | if (eqv == start) | ||
| 2163 | this_type = one_case; | ||
| 2164 | else if (RE_TRANSLATE (eqv_table, eqv) == start) | ||
| 2165 | this_type = two_case; | ||
| 2166 | else | ||
| 2167 | this_type = strange; | ||
| 2168 | |||
| 2169 | if (start < eqv) | ||
| 2170 | minchar = start, maxchar = eqv; | ||
| 2171 | else | ||
| 2172 | minchar = eqv, maxchar = start; | ||
| 2173 | |||
| 2174 | /* Can this character extend the run in progress? */ | ||
| 2175 | if (this_type == strange || this_type != run_type | ||
| 2176 | || !(minchar == run_end + 1 | ||
| 2177 | && (run_type == two_case | ||
| 2178 | ? maxchar == run_eqv_end + 1 : 1))) | ||
| 2179 | { | ||
| 2180 | /* No, end the run. | ||
| 2181 | Record each of its equivalent ranges. */ | ||
| 2182 | if (run_type == one_case) | ||
| 2183 | { | ||
| 2184 | EXTEND_RANGE_TABLE (work_area, 2); | ||
| 2185 | work_area->table[work_area->used++] = run_start; | ||
| 2186 | work_area->table[work_area->used++] = run_end; | ||
| 2187 | } | ||
| 2188 | else if (run_type == two_case) | ||
| 2189 | { | ||
| 2190 | EXTEND_RANGE_TABLE (work_area, 4); | ||
| 2191 | work_area->table[work_area->used++] = run_start; | ||
| 2192 | work_area->table[work_area->used++] = run_end; | ||
| 2193 | work_area->table[work_area->used++] | ||
| 2194 | = RE_TRANSLATE (eqv_table, run_start); | ||
| 2195 | work_area->table[work_area->used++] | ||
| 2196 | = RE_TRANSLATE (eqv_table, run_end); | ||
| 2197 | } | ||
| 2198 | run_type = strange; | ||
| 2199 | } | ||
| 2200 | |||
| 2201 | if (this_type == strange) | ||
| 2202 | { | ||
| 2203 | /* For a strange character, add each of its equivalents, one | ||
| 2204 | by one. Don't start a range. */ | ||
| 2205 | do | ||
| 2206 | { | ||
| 2207 | EXTEND_RANGE_TABLE (work_area, 2); | ||
| 2208 | work_area->table[work_area->used++] = eqv; | ||
| 2209 | work_area->table[work_area->used++] = eqv; | ||
| 2210 | eqv = RE_TRANSLATE (eqv_table, eqv); | ||
| 2211 | } | ||
| 2212 | while (eqv != start); | ||
| 2213 | } | ||
| 2214 | |||
| 2215 | /* Add this char to the run, or start a new run. */ | ||
| 2216 | else if (run_type == strange) | ||
| 2217 | { | ||
| 2218 | /* Initialize a new range. */ | ||
| 2219 | run_type = this_type; | ||
| 2220 | run_start = start; | ||
| 2221 | run_end = start; | ||
| 2222 | run_eqv_end = RE_TRANSLATE (eqv_table, run_end); | ||
| 2223 | } | ||
| 2224 | else | ||
| 2225 | { | ||
| 2226 | /* Extend a running range. */ | ||
| 2227 | run_end = minchar; | ||
| 2228 | run_eqv_end = RE_TRANSLATE (eqv_table, run_end); | ||
| 2229 | } | ||
| 2230 | } | ||
| 2231 | |||
| 2232 | /* If a run is still in progress at the end, finish it now | ||
| 2233 | by recording its equivalent ranges. */ | ||
| 2234 | if (run_type == one_case) | ||
| 2235 | { | ||
| 2236 | EXTEND_RANGE_TABLE (work_area, 2); | ||
| 2237 | work_area->table[work_area->used++] = run_start; | ||
| 2238 | work_area->table[work_area->used++] = run_end; | ||
| 2239 | } | ||
| 2240 | else if (run_type == two_case) | ||
| 2241 | { | ||
| 2242 | EXTEND_RANGE_TABLE (work_area, 4); | ||
| 2243 | work_area->table[work_area->used++] = run_start; | ||
| 2244 | work_area->table[work_area->used++] = run_end; | ||
| 2245 | work_area->table[work_area->used++] | ||
| 2246 | = RE_TRANSLATE (eqv_table, run_start); | ||
| 2247 | work_area->table[work_area->used++] | ||
| 2248 | = RE_TRANSLATE (eqv_table, run_end); | ||
| 2249 | } | ||
| 2250 | |||
| 2251 | return -1; | ||
| 2252 | } | ||
| 2253 | |||
| 2254 | #endif /* emacs */ | ||
| 2076 | 2255 | ||
| 2077 | /* We need to find the image of the range start..end when passed through | 2256 | /* We need to find the image of the range start..end when passed through |
| 2078 | TRANSLATE. This is not necessarily TRANSLATE(start)..TRANSLATE(end) | 2257 | TRANSLATE. This is not necessarily TRANSLATE(start)..TRANSLATE(end) |
| 2079 | and is not even necessarily contiguous. | 2258 | and is not even necessarily contiguous. |
| 2080 | We approximate it with the smallest contiguous range that contains | 2259 | We approximate it with the smallest contiguous range that contains |
| 2081 | all the chars we need. */ | 2260 | all the chars we need. However, that is not good enough for Latin-1, |
| 2082 | static void | 2261 | so we do a better job in that case. |
| 2262 | |||
| 2263 | Returns -1 if successful, REG_ESPACE if ran out of space. */ | ||
| 2264 | |||
| 2265 | static int | ||
| 2083 | set_image_of_range (work_area, start, end, translate) | 2266 | set_image_of_range (work_area, start, end, translate) |
| 2084 | RE_TRANSLATE_TYPE translate; | 2267 | RE_TRANSLATE_TYPE translate; |
| 2085 | struct range_table_work_area *work_area; | 2268 | struct range_table_work_area *work_area; |
| 2086 | re_wchar_t start, end; | 2269 | re_wchar_t start, end; |
| 2087 | { | 2270 | { |
| 2088 | re_wchar_t cmin = TRANSLATE (start), cmax = TRANSLATE (end); | 2271 | re_wchar_t cmin, cmax; |
| 2272 | |||
| 2273 | #ifdef emacs | ||
| 2274 | /* For Latin-1 ranges, use set_image_of_range_1 | ||
| 2275 | to get proper handling of ranges that include letters and nonletters. | ||
| 2276 | For ASCII, this is not necessary. | ||
| 2277 | For other character sets, we don't bother to get this right. */ | ||
| 2278 | if (start < 04400 && end > 0200) | ||
| 2279 | { | ||
| 2280 | int tem; | ||
| 2281 | tem = set_image_of_range_1 (work_area, start, end, translate); | ||
| 2282 | if (tem > 0) | ||
| 2283 | return tem; | ||
| 2284 | |||
| 2285 | start = 04400; | ||
| 2286 | if (end < 04400) | ||
| 2287 | return -1; | ||
| 2288 | } | ||
| 2289 | #endif | ||
| 2290 | |||
| 2291 | cmin = TRANSLATE (start), cmax = TRANSLATE (end); | ||
| 2292 | |||
| 2089 | if (RE_TRANSLATE_P (translate)) | 2293 | if (RE_TRANSLATE_P (translate)) |
| 2090 | for (; start <= end; start++) | 2294 | for (; start <= end; start++) |
| 2091 | { | 2295 | { |
| @@ -2093,20 +2297,13 @@ set_image_of_range (work_area, start, end, translate) | |||
| 2093 | cmin = MIN (cmin, c); | 2297 | cmin = MIN (cmin, c); |
| 2094 | cmax = MAX (cmax, c); | 2298 | cmax = MAX (cmax, c); |
| 2095 | } | 2299 | } |
| 2300 | |||
| 2301 | EXTEND_RANGE_TABLE (work_area, 2); | ||
| 2096 | work_area->table[work_area->used++] = (cmin); | 2302 | work_area->table[work_area->used++] = (cmin); |
| 2097 | work_area->table[work_area->used++] = (cmax); | 2303 | work_area->table[work_area->used++] = (cmax); |
| 2098 | } | ||
| 2099 | 2304 | ||
| 2100 | /* Explicit quit checking is only used on NTemacs. */ | 2305 | return -1; |
| 2101 | #if defined WINDOWSNT && defined emacs && defined QUIT | 2306 | } |
| 2102 | extern int immediate_quit; | ||
| 2103 | # define IMMEDIATE_QUIT_CHECK \ | ||
| 2104 | do { \ | ||
| 2105 | if (immediate_quit) QUIT; \ | ||
| 2106 | } while (0) | ||
| 2107 | #else | ||
| 2108 | # define IMMEDIATE_QUIT_CHECK ((void)0) | ||
| 2109 | #endif | ||
| 2110 | 2307 | ||
| 2111 | #ifndef MATCH_MAY_ALLOCATE | 2308 | #ifndef MATCH_MAY_ALLOCATE |
| 2112 | 2309 | ||