aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKenichi Handa2002-05-07 04:50:29 +0000
committerKenichi Handa2002-05-07 04:50:29 +0000
commite9ce014c7b75a858f3b8412e997dc91516e42e36 (patch)
treefe5693254088db0f9357ede8e2ac71dc2f45316b
parentea99bcc1c79f6ec893bf5b7558541b94cff281fe (diff)
downloademacs-e9ce014c7b75a858f3b8412e997dc91516e42e36.tar.gz
emacs-e9ce014c7b75a858f3b8412e997dc91516e42e36.zip
(struct charset_map_entries): New struct.
(load_charset_map): Renamed from parse_charset_map. New args entries and n_entries. Caller changed. (load_charset_map_from_file): Renamed from load_charset_map. Caller changed. New arg control_flag. Call load_charset_map at the tail. (load_charset_map_from_vector): New function. (Fdefine_charset_internal): Setup charset.compact_codes_p. (encode_char): If the charset is compact, change a character index to a code point.
-rw-r--r--src/charset.c276
1 files changed, 185 insertions, 91 deletions
diff --git a/src/charset.c b/src/charset.c
index a8b85cb11cd..46f7c717e46 100644
--- a/src/charset.c
+++ b/src/charset.c
@@ -153,12 +153,20 @@ Lisp_Object Vchar_unified_charset_table;
153 153
154 154
155 155
156/* Set to 1 when a charset map is loaded to warn that a buffer text 156/* Set to 1 to warn that a charset map is loaded and thus a buffer
157 and a string data may be relocated. */ 157 text and a string data may be relocated. */
158int charset_map_loaded; 158int charset_map_loaded;
159 159
160/* Parse the mapping vector MAP which has this form: 160struct charset_map_entries
161 [CODE0 CHAR0 CODE1 CHAR1 ... ] 161{
162 struct {
163 unsigned from, to;
164 int c;
165 } entry[0x10000];
166 struct charset_map_entries *next;
167};
168
169/* Load the mapping information for CHARSET from ENTRIES.
162 170
163 If CONTROL_FLAG is 0, setup CHARSET->min_char and CHARSET->max_char. 171 If CONTROL_FLAG is 0, setup CHARSET->min_char and CHARSET->max_char.
164 172
@@ -170,9 +178,10 @@ int charset_map_loaded;
170 setup it too. */ 178 setup it too. */
171 179
172static void 180static void
173parse_charset_map (charset, map, control_flag) 181load_charset_map (charset, entries, n_entries, control_flag)
174 struct charset *charset; 182 struct charset *charset;
175 Lisp_Object map; 183 struct charset_map_entries *entries;
184 int n_entries;
176 int control_flag; 185 int control_flag;
177{ 186{
178 Lisp_Object vec, table; 187 Lisp_Object vec, table;
@@ -180,12 +189,14 @@ parse_charset_map (charset, map, control_flag)
180 unsigned max_code = CHARSET_MAX_CODE (charset); 189 unsigned max_code = CHARSET_MAX_CODE (charset);
181 int ascii_compatible_p = charset->ascii_compatible_p; 190 int ascii_compatible_p = charset->ascii_compatible_p;
182 int min_char, max_char, nonascii_min_char; 191 int min_char, max_char, nonascii_min_char;
183 int size;
184 int i; 192 int i;
185 int first; 193 int first;
186 unsigned char *fast_map = charset->fast_map; 194 unsigned char *fast_map = charset->fast_map;
187 195
188 if (control_flag) 196 if (n_entries <= 0)
197 return;
198
199 if (control_flag > 0)
189 { 200 {
190 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1; 201 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
191 unsigned invalid_code = CHARSET_INVALID_CODE (charset); 202 unsigned invalid_code = CHARSET_INVALID_CODE (charset);
@@ -199,37 +210,53 @@ parse_charset_map (charset, map, control_flag)
199 charset_map_loaded = 1; 210 charset_map_loaded = 1;
200 } 211 }
201 212
202 size = ASIZE (map); 213 min_char = max_char = entries->entry[0].c;
203 nonascii_min_char = MAX_CHAR; 214 nonascii_min_char = MAX_CHAR;
204 CHARSET_COMPACT_CODES_P (charset) = 1; 215 for (i = 0; i < n_entries; i++)
205 for (first = 1, i = 0; i < size; i += 2)
206 { 216 {
207 Lisp_Object val; 217 unsigned from, to;
208 unsigned code;
209 int c, char_index; 218 int c, char_index;
219 int idx = i % 0x10000;
210 220
211 val = AREF (map, i); 221 if (i > 0 && idx == 0)
212 CHECK_NATNUM (val); 222 entries = entries->next;
213 code = XFASTINT (val); 223 from = entries->entry[idx].from;
214 val = AREF (map, i + 1); 224 to = entries->entry[idx].to;
215 CHECK_NATNUM (val); 225 c = entries->entry[idx].c;
216 c = XFASTINT (val);
217 226
218 if (code < min_code || code > max_code)
219 continue;
220 char_index = CODE_POINT_TO_INDEX (charset, code);
221 if (char_index < 0
222 || c > MAX_CHAR)
223 continue;
224
225 if (control_flag < 2) 227 if (control_flag < 2)
226 { 228 {
227 if (first) 229 if (control_flag == 1)
228 { 230 {
229 min_char = max_char = c; 231 unsigned code = from;
230 first = 0; 232 int from_index, to_index;
233
234 from_index = CODE_POINT_TO_INDEX (charset, from);
235 if (from == to)
236 to_index = from_index;
237 else
238 to_index = CODE_POINT_TO_INDEX (charset, to);
239 if (from_index < 0 || to_index < 0)
240 continue;
241 if (CHARSET_COMPACT_CODES_P (charset))
242 while (1)
243 {
244 ASET (vec, from_index, make_number (c));
245 CHAR_TABLE_SET (table, c, make_number (code));
246 if (from_index == to_index)
247 break;
248 from_index++, c++;
249 code = INDEX_TO_CODE_POINT (charset, from_index);
250 }
251 else
252 for (; from_index <= to_index; from_index++, c++)
253 {
254 ASET (vec, from_index, make_number (c));
255 CHAR_TABLE_SET (table, c, make_number (from_index));
256 }
231 } 257 }
232 else if (c > max_char) 258
259 if (c > max_char)
233 max_char = c; 260 max_char = c;
234 else if (c < min_char) 261 else if (c < min_char)
235 min_char = c; 262 min_char = c;
@@ -239,27 +266,12 @@ parse_charset_map (charset, map, control_flag)
239 266
240 CHARSET_FAST_MAP_SET (c, fast_map); 267 CHARSET_FAST_MAP_SET (c, fast_map);
241 } 268 }
242 269 else
243 if (control_flag)
244 { 270 {
245 if (control_flag == 1) 271 for (; from <= to; from++)
246 {
247 if (char_index >= ASIZE (vec))
248 abort ();
249 ASET (vec, char_index, make_number (c));
250 if (code > 0x7FFFFFF)
251 {
252 CHAR_TABLE_SET (table, c,
253 Fcons (make_number (code >> 16),
254 make_number (code & 0xFFFF)));
255 CHARSET_COMPACT_CODES_P (charset) = 0;
256 }
257 else
258 CHAR_TABLE_SET (table, c, make_number (code));
259 }
260 else
261 { 272 {
262 int c1 = DECODE_CHAR (charset, code); 273 int c1 = DECODE_CHAR (charset, from);
274
263 if (c1 >= 0) 275 if (c1 >= 0)
264 { 276 {
265 CHAR_TABLE_SET (table, c, make_number (c1)); 277 CHAR_TABLE_SET (table, c, make_number (c1));
@@ -277,7 +289,7 @@ parse_charset_map (charset, map, control_flag)
277 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p 289 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
278 ? nonascii_min_char : min_char); 290 ? nonascii_min_char : min_char);
279 CHARSET_MAX_CHAR (charset) = max_char; 291 CHARSET_MAX_CHAR (charset) = max_char;
280 if (control_flag) 292 if (control_flag == 1)
281 { 293 {
282 CHARSET_DECODER (charset) = vec; 294 CHARSET_DECODER (charset) = vec;
283 CHARSET_ENCODER (charset) = table; 295 CHARSET_ENCODER (charset) = table;
@@ -325,36 +337,43 @@ read_hex (fp, eof)
325 else 337 else
326 while ((c = getc (fp)) != EOF && isdigit (c)) 338 while ((c = getc (fp)) != EOF && isdigit (c))
327 n = (n * 10) + c - '0'; 339 n = (n * 10) + c - '0';
340 if (c != EOF)
341 ungetc (c, fp);
328 return n; 342 return n;
329} 343}
330 344
331 345
332/* Return a mapping vector for CHARSET loaded from MAPFILE. 346/* Return a mapping vector for CHARSET loaded from MAPFILE.
333 Each line of MAPFILE has this form: 347 Each line of MAPFILE has this form
334 0xAAAA 0xBBBB 348 0xAAAA 0xCCCC
335 where 0xAAAA is a code-point and 0xBBBB is the corresponding 349 where 0xAAAA is a code-point and 0xCCCC is the corresponding
336 character code. 350 character code, or this form
351 0xAAAA-0xBBBB 0xCCCC
352 where 0xAAAA and 0xBBBB are code-points specifying a range, and
353 0xCCCC is the first character code of the range.
354
337 The returned vector has this form: 355 The returned vector has this form:
338 [ CODE1 CHAR1 CODE2 CHAR2 .... ] 356 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
339*/ 357 where CODE1 is a code-point or a cons of code-points specifying a
358 range. */
340 359
341extern void add_to_log P_ ((char *, Lisp_Object, Lisp_Object)); 360extern void add_to_log P_ ((char *, Lisp_Object, Lisp_Object));
342 361
343static Lisp_Object 362static void
344load_charset_map (charset, mapfile) 363load_charset_map_from_file (charset, mapfile, control_flag)
345 struct charset *charset; 364 struct charset *charset;
346 Lisp_Object mapfile; 365 Lisp_Object mapfile;
366 int control_flag;
347{ 367{
368 unsigned min_code = CHARSET_MIN_CODE (charset);
369 unsigned max_code = CHARSET_MAX_CODE (charset);
348 int fd; 370 int fd;
349 FILE *fp; 371 FILE *fp;
350 int num;
351 unsigned *numbers_table[256];
352 int numbers_table_used;
353 unsigned *numbers;
354 int eof; 372 int eof;
355 Lisp_Object suffixes; 373 Lisp_Object suffixes;
356 Lisp_Object vec;
357 int i; 374 int i;
375 struct charset_map_entries *head, *entries;
376 int n_entries;
358 377
359 suffixes = Fcons (build_string (".map"), 378 suffixes = Fcons (build_string (".map"),
360 Fcons (build_string (".TXT"), Qnil)); 379 Fcons (build_string (".TXT"), Qnil));
@@ -365,42 +384,114 @@ load_charset_map (charset, mapfile)
365 || ! (fp = fdopen (fd, "r"))) 384 || ! (fp = fdopen (fd, "r")))
366 { 385 {
367 add_to_log ("Failure in loading charset map: %S", mapfile, Qnil); 386 add_to_log ("Failure in loading charset map: %S", mapfile, Qnil);
368 return Qnil; 387 return;
369 } 388 }
370 389
371 numbers_table_used = 0; 390 head = entries = ((struct charset_map_entries *)
372 num = 0; 391 alloca (sizeof (struct charset_map_entries)));
392 n_entries = 0;
373 eof = 0; 393 eof = 0;
374 while (1) 394 while (1)
375 { 395 {
376 unsigned n = read_hex (fp, &eof); 396 unsigned from, to;
397 int c;
398 int idx;
377 399
400 from = read_hex (fp, &eof);
378 if (eof) 401 if (eof)
379 break; 402 break;
380 if ((num % 0x10000) == 0) 403 if (getc (fp) == '-')
404 to = read_hex (fp, &eof);
405 else
406 to = from;
407 c = (int) read_hex (fp, &eof);
408
409 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
410 continue;
411
412 if (n_entries > 0 && (n_entries % 0x10000) == 0)
381 { 413 {
382 if (numbers_table_used == 256) 414 entries->next = ((struct charset_map_entries *)
383 break; 415 alloca (sizeof (struct charset_map_entries)));
384 numbers = (unsigned *) alloca (sizeof (unsigned) * 0x10000); 416 entries = entries->next;
385 numbers_table[numbers_table_used++] = numbers;
386 } 417 }
387 *numbers++ = n; 418 idx = n_entries % 0x10000;
388 num++; 419 entries->entry[idx].from = from;
420 entries->entry[idx].to = to;
421 entries->entry[idx].c = c;
422 n_entries++;
389 } 423 }
390 fclose (fp); 424 fclose (fp);
391 close (fd); 425 close (fd);
392 426
393 vec = Fmake_vector (make_number (num), Qnil); 427 load_charset_map (charset, head, n_entries, control_flag);
394 for (i = 0; i < num; i++, numbers++) 428}
429
430static void
431load_charset_map_from_vector (charset, vec, control_flag)
432 struct charset *charset;
433 Lisp_Object vec;
434 int control_flag;
435{
436 unsigned min_code = CHARSET_MIN_CODE (charset);
437 unsigned max_code = CHARSET_MAX_CODE (charset);
438 struct charset_map_entries *head, *entries;
439 int n_entries;
440 int len = ASIZE (vec);
441 int i;
442
443 if (len % 2 == 1)
395 { 444 {
396 if ((i % 0x10000) == 0) 445 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
397 numbers = numbers_table[i / 0x10000]; 446 return;
398 ASET (vec, i, make_number (*numbers));
399 } 447 }
400 448
401 charset_map_loaded = 1; 449 head = entries = ((struct charset_map_entries *)
450 alloca (sizeof (struct charset_map_entries)));
451 n_entries = 0;
452 for (i = 0; i < len; i += 2)
453 {
454 Lisp_Object val, val2;
455 unsigned from, to;
456 int c;
457 int idx;
402 458
403 return vec; 459 val = AREF (vec, i);
460 if (CONSP (val))
461 {
462 val2 = XCDR (val);
463 val = XCAR (val);
464 CHECK_NATNUM (val);
465 CHECK_NATNUM (val2);
466 from = XFASTINT (val);
467 to = XFASTINT (val2);
468 }
469 else
470 {
471 CHECK_NATNUM (val);
472 from = to = XFASTINT (val);
473 }
474 val = AREF (vec, i + 1);
475 CHECK_NATNUM (val);
476 c = XFASTINT (val);
477
478 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
479 continue;
480
481 if ((n_entries % 0x10000) == 0)
482 {
483 entries->next = ((struct charset_map_entries *)
484 alloca (sizeof (struct charset_map_entries)));
485 entries = entries->next;
486 }
487 idx = n_entries % 0x10000;
488 entries->entry[idx].from = from;
489 entries->entry[idx].to = to;
490 entries->entry[idx].c = c;
491 n_entries++;
492 }
493
494 load_charset_map (charset, head, n_entries, control_flag);
404} 495}
405 496
406static void 497static void
@@ -413,8 +504,9 @@ load_charset (charset)
413 504
414 map = CHARSET_MAP (charset); 505 map = CHARSET_MAP (charset);
415 if (STRINGP (map)) 506 if (STRINGP (map))
416 map = load_charset_map (charset, map); 507 load_charset_map_from_file (charset, map, 1);
417 parse_charset_map (charset, map, 1); 508 else
509 load_charset_map_from_vector (charset, map, 1);
418 CHARSET_METHOD (charset) = CHARSET_METHOD_MAP; 510 CHARSET_METHOD (charset) = CHARSET_METHOD_MAP;
419 } 511 }
420} 512}
@@ -621,6 +713,8 @@ DEFUN ("define-charset-internal", Fdefine_charset_internal,
621 | (charset.code_space[9] << 16) 713 | (charset.code_space[9] << 16)
622 | (charset.code_space[13] << 24)); 714 | (charset.code_space[13] << 24));
623 715
716 charset.compact_codes_p = charset.max_code < 0x1000000;
717
624 val = args[charset_arg_invalid_code]; 718 val = args[charset_arg_invalid_code];
625 if (NILP (val)) 719 if (NILP (val))
626 { 720 {
@@ -708,9 +802,9 @@ DEFUN ("define-charset-internal", Fdefine_charset_internal,
708 val = args[charset_arg_map]; 802 val = args[charset_arg_map];
709 ASET (attrs, charset_map, val); 803 ASET (attrs, charset_map, val);
710 if (STRINGP (val)) 804 if (STRINGP (val))
711 val = load_charset_map (&charset, val); 805 load_charset_map_from_file (&charset, val, 0);
712 CHECK_VECTOR (val); 806 else
713 parse_charset_map (&charset, val, 0); 807 load_charset_map_from_vector (&charset, val, 0);
714 charset.method = CHARSET_METHOD_MAP_DEFERRED; 808 charset.method = CHARSET_METHOD_MAP_DEFERRED;
715 } 809 }
716 else if (! NILP (args[charset_arg_parents])) 810 else if (! NILP (args[charset_arg_parents]))
@@ -901,8 +995,9 @@ DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 2, 0,
901 if (NILP (unify_map)) 995 if (NILP (unify_map))
902 unify_map = CHARSET_UNIFY_MAP (cs); 996 unify_map = CHARSET_UNIFY_MAP (cs);
903 if (STRINGP (unify_map)) 997 if (STRINGP (unify_map))
904 unify_map = load_charset_map (cs, unify_map); 998 load_charset_map_from_file (cs, unify_map, 2);
905 parse_charset_map (cs, unify_map, 2); 999 else
1000 load_charset_map_from_vector (cs, unify_map, 2);
906 CHARSET_UNIFIED_P (cs) = 1; 1001 CHARSET_UNIFIED_P (cs) = 1;
907 return Qnil; 1002 return Qnil;
908} 1003}
@@ -1277,10 +1372,9 @@ encode_char (charset, c)
1277 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset))) 1372 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1278 return CHARSET_INVALID_CODE (charset); 1373 return CHARSET_INVALID_CODE (charset);
1279 val = CHAR_TABLE_REF (encoder, c); 1374 val = CHAR_TABLE_REF (encoder, c);
1280 if (CONSP (val)) 1375 code = XINT (val);
1281 code = (XINT (XCAR (val)) << 16) | XINT (XCDR (val)); 1376 if (! CHARSET_COMPACT_CODES_P (charset))
1282 else 1377 code = INDEX_TO_CODE_POINT (charset, code);
1283 code = XINT (val);
1284 } 1378 }
1285 else 1379 else
1286 { 1380 {