aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorKenichi Handa2008-11-27 08:00:16 +0000
committerKenichi Handa2008-11-27 08:00:16 +0000
commitecca2aad92d1b528fffbebba5867183b110a6fa9 (patch)
tree94b74a250554307f1cd86aa617eb6962bc50d688 /src
parent55870ffc30547ff5dacd37db45fed4b38a0dd7e6 (diff)
downloademacs-ecca2aad92d1b528fffbebba5867183b110a6fa9.tar.gz
emacs-ecca2aad92d1b528fffbebba5867183b110a6fa9.zip
(Vchar_unified_charset_table): Delete it.
(inhibit_load_charset_map): New variable. (temp_charset_work): New variable. (SET_TEMP_CHARSET_WORK_ENCODER, GET_TEMP_CHARSET_WORK_ENCODER) (SET_TEMP_CHARSET_WORK_DECODER, GET_TEMP_CHARSET_WORK_DECODER): New macros. (load_charset_map): Meaning of control_flag changed. If inhibit_load_charset_map is nonzero, setup a table in temp_charset_work. (load_charset): New argument control_flag. (map_charset_for_dump): New function. (map_charset_chars): If inhibit_load_charset_map is nonzero, use map_charset_for_dump. (Fdefine_charset_internal): If the charset method is MAP, load mapping tables by calling load_charset. (Funify_charset): Don't load a mapping table but directly set Vchar_unify_table. (maybe_unify_char): New function. (decode_char): Don't handle the deleted method MAP_DEFERRED. Handle the case of inhibit_load_charset_map being nonzero. (encode_char): Don't handle the deleted method MAP_DEFERRED. Handle the case of inhibit_load_charset_map being nonzero. (Fclear_charset_maps): Just free temp_charset_work. (syms_of_charset): Make `inhibit-load-charset-map' a Lisp variable.
Diffstat (limited to 'src')
-rw-r--r--src/charset.c563
1 files changed, 385 insertions, 178 deletions
diff --git a/src/charset.c b/src/charset.c
index 43155d2cc65..3c52d1f333f 100644
--- a/src/charset.c
+++ b/src/charset.c
@@ -120,7 +120,8 @@ int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
120 120
121Lisp_Object Vcharset_map_path; 121Lisp_Object Vcharset_map_path;
122 122
123Lisp_Object Vchar_unified_charset_table; 123/* If nonzero, don't load charset maps. */
124int inhibit_load_charset_map;
124 125
125Lisp_Object Vcurrent_iso639_language; 126Lisp_Object Vcurrent_iso639_language;
126 127
@@ -166,7 +167,63 @@ map_char_table_for_charset P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
166 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \ 167 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
167 << 24)))) 168 << 24))))
168 169
170/* Structure to hold mapping tables for a charset. Used by temacs
171 invoked for dumping. */
169 172
173static struct
174{
175 /* The current charset for which the following tables are setup. */
176 struct charset *current;
177
178 /* 1 iff the following table is used for encoder. */
179 short for_encoder;
180
181 /* When the following table is used for encoding, mininum and
182 maxinum character of the current charset. */
183 int min_char, max_char;
184
185 /* A Unicode character correspoinding to the code indice 0 (i.e. the
186 minimum code-point) of the current charset, or -1 if the code
187 indice 0 is not a Unicode character. This is checked when
188 table.encoder[CHAR] is zero. */
189 int zero_index_char;
190
191 union {
192 /* Table mapping code-indices (not code-points) of the current
193 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
194 doesn't belong to the current charset. */
195 int decoder[0x10000];
196 /* Table mapping Unicode characters to code-indices of the current
197 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
198 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
199 (0x20000..0x2FFFF). Note that there is no charset map that
200 uses both SMP and SIP. */
201 unsigned short encoder[0x20000];
202 } table;
203} *temp_charset_work;
204
205#define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
206 do { \
207 if ((CODE) == 0) \
208 temp_charset_work->zero_index_char = (C); \
209 else if ((C) < 0x20000) \
210 temp_charset_work->table.encoder[(C)] = (CODE); \
211 else \
212 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
213 } while (0)
214
215#define GET_TEMP_CHARSET_WORK_ENCODER(C) \
216 ((C) == temp_charset_work->zero_index_char ? 0 \
217 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
218 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
219 : temp_charset_work->table.encoder[(C) - 0x10000] \
220 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
221
222#define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
223 (temp_charset_work->table.decoder[(CODE)] = (C))
224
225#define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
226 (temp_charset_work->table.decoder[(CODE)])
170 227
171 228
172/* Set to 1 to warn that a charset map is loaded and thus a buffer 229/* Set to 1 to warn that a charset map is loaded and thus a buffer
@@ -182,16 +239,30 @@ struct charset_map_entries
182 struct charset_map_entries *next; 239 struct charset_map_entries *next;
183}; 240};
184 241
185/* Load the mapping information for CHARSET from ENTRIES. 242/* Load the mapping information of CHARSET from ENTRIES for
243 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
244 encoding (CONTROL_FLAG == 2).
245
246 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
247 and CHARSET->fast_map.
248
249 If CONTROL_FLAG is 1, setup the following tables according to
250 CHARSET->method and inhibit_load_charset_map.
186 251
187 If CONTROL_FLAG is 0, setup CHARSET->min_char and CHARSET->max_char. 252 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
253 ----------------------+--------------------+---------------------------
254 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
255 ----------------------+--------------------+---------------------------
256 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
188 257
189 If CONTROL_FLAG is 1, setup CHARSET->min_char, CHARSET->max_char, 258 If CONTROL_FLAG is 2, setup the following tables.
190 CHARSET->decoder, and CHARSET->encoder.
191 259
192 If CONTROL_FLAG is 2, setup CHARSET->deunifier and 260 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
193 Vchar_unify_table. If Vchar_unified_charset_table is non-nil, 261 ----------------------+--------------------+---------------------------
194 setup it too. */ 262 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
263 ----------------------+--------------------+--------------------------
264 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
265*/
195 266
196static void 267static void
197load_charset_map (charset, entries, n_entries, control_flag) 268load_charset_map (charset, entries, n_entries, control_flag)
@@ -210,16 +281,55 @@ load_charset_map (charset, entries, n_entries, control_flag)
210 if (n_entries <= 0) 281 if (n_entries <= 0)
211 return; 282 return;
212 283
213 if (control_flag > 0) 284 if (control_flag)
214 { 285 {
215 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1; 286 if (! inhibit_load_charset_map)
216 287 {
217 table = Fmake_char_table (Qnil, Qnil); 288 if (control_flag == 1)
218 if (control_flag == 1) 289 {
219 vec = Fmake_vector (make_number (n), make_number (-1)); 290 if (charset->method == CHARSET_METHOD_MAP)
220 else if (! CHAR_TABLE_P (Vchar_unify_table)) 291 {
221 Vchar_unify_table = Fmake_char_table (Qnil, Qnil); 292 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
222 293
294 vec = CHARSET_DECODER (charset)
295 = Fmake_vector (make_number (n), make_number (-1));
296 }
297 else
298 {
299 char_table_set_range (Vchar_unify_table,
300 charset->min_char, charset->max_char,
301 Qnil);
302 }
303 }
304 else
305 {
306 table = Fmake_char_table (Qnil, Qnil);
307 if (charset->method == CHARSET_METHOD_MAP)
308 CHARSET_ENCODER (charset) = table;
309 else
310 CHARSET_DEUNIFIER (charset) = table;
311 }
312 }
313 else
314 {
315 if (! temp_charset_work)
316 temp_charset_work = malloc (sizeof (*temp_charset_work));
317 if (control_flag == 1)
318 {
319 memset (temp_charset_work->table.decoder, -1,
320 sizeof (int) * 0x10000);
321 temp_charset_work->for_encoder = 0;
322 }
323 else
324 {
325 memset (temp_charset_work->table.encoder, 0,
326 sizeof (unsigned short) * 0x20000);
327 temp_charset_work->zero_index_char = -1;
328 }
329 temp_charset_work->current = charset;
330 temp_charset_work->for_encoder = (control_flag == 2);
331 control_flag += 2;
332 }
223 charset_map_loaded = 1; 333 charset_map_loaded = 1;
224 } 334 }
225 335
@@ -251,14 +361,48 @@ load_charset_map (charset, entries, n_entries, control_flag)
251 if (from_index < 0 || to_index < 0) 361 if (from_index < 0 || to_index < 0)
252 continue; 362 continue;
253 363
254 if (control_flag < 2) 364 if (to_c > max_char)
255 { 365 max_char = to_c;
256 int c; 366 else if (from_c < min_char)
367 min_char = from_c;
257 368
258 if (to_c > max_char) 369 if (control_flag == 1)
259 max_char = to_c; 370 {
260 else if (from_c < min_char) 371 if (charset->method == CHARSET_METHOD_MAP)
261 min_char = from_c; 372 for (; from_index <= to_index; from_index++, from_c++)
373 ASET (vec, from_index, make_number (from_c));
374 else
375 for (; from_index <= to_index; from_index++, from_c++)
376 CHAR_TABLE_SET (Vchar_unify_table,
377 CHARSET_CODE_OFFSET (charset) + from_index,
378 make_number (from_c));
379 }
380 else if (control_flag == 2)
381 {
382 if (charset->method == CHARSET_METHOD_MAP
383 && CHARSET_COMPACT_CODES_P (charset))
384 for (; from_index <= to_index; from_index++, from_c++)
385 {
386 unsigned code = INDEX_TO_CODE_POINT (charset, from_index);
387
388 if (NILP (CHAR_TABLE_REF (table, from_c)))
389 CHAR_TABLE_SET (table, from_c, make_number (code));
390 }
391 else
392 for (; from_index <= to_index; from_index++, from_c++)
393 {
394 if (NILP (CHAR_TABLE_REF (table, from_c)))
395 CHAR_TABLE_SET (table, from_c, make_number (from_index));
396 }
397 }
398 else if (control_flag == 3)
399 for (; from_index <= to_index; from_index++, from_c++)
400 SET_TEMP_CHARSET_WORK_DECODER (from_c, from_index);
401 else if (control_flag == 4)
402 for (; from_index <= to_index; from_index++, from_c++)
403 SET_TEMP_CHARSET_WORK_ENCODER (from_c, from_index);
404 else /* control_flag == 0 */
405 {
262 if (ascii_compatible_p) 406 if (ascii_compatible_p)
263 { 407 {
264 if (! ASCII_BYTE_P (from_c)) 408 if (! ASCII_BYTE_P (from_c))
@@ -272,70 +416,22 @@ load_charset_map (charset, entries, n_entries, control_flag)
272 } 416 }
273 } 417 }
274 418
275 for (c = from_c; c <= to_c; c++) 419 for (; from_c <= to_c; from_c++)
276 CHARSET_FAST_MAP_SET (c, fast_map); 420 CHARSET_FAST_MAP_SET (from_c, fast_map);
277
278 if (control_flag == 1)
279 {
280 unsigned code = from;
281
282 if (CHARSET_COMPACT_CODES_P (charset))
283 while (1)
284 {
285 ASET (vec, from_index, make_number (from_c));
286 if (NILP (CHAR_TABLE_REF (table, from_c)))
287 CHAR_TABLE_SET (table, from_c, make_number (code));
288 if (from_index == to_index)
289 break;
290 from_index++, from_c++;
291 code = INDEX_TO_CODE_POINT (charset, from_index);
292 }
293 else
294 for (; from_index <= to_index; from_index++, from_c++)
295 {
296 ASET (vec, from_index, make_number (from_c));
297 if (NILP (CHAR_TABLE_REF (table, from_c)))
298 CHAR_TABLE_SET (table, from_c, make_number (from_index));
299 }
300 }
301 }
302 else
303 {
304 unsigned code = from;
305
306 while (1)
307 {
308 int c1 = DECODE_CHAR (charset, code);
309
310 if (c1 >= 0)
311 {
312 CHAR_TABLE_SET (table, from_c, make_number (c1));
313 CHAR_TABLE_SET (Vchar_unify_table, c1, make_number (from_c));
314 if (CHAR_TABLE_P (Vchar_unified_charset_table))
315 CHAR_TABLE_SET (Vchar_unified_charset_table, c1,
316 CHARSET_NAME (charset));
317 }
318 if (from_index == to_index)
319 break;
320 from_index++, from_c++;
321 code = INDEX_TO_CODE_POINT (charset, from_index);
322 }
323 } 421 }
324 } 422 }
325 423
326 if (control_flag < 2) 424 if (control_flag == 0)
327 { 425 {
328 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p 426 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
329 ? nonascii_min_char : min_char); 427 ? nonascii_min_char : min_char);
330 CHARSET_MAX_CHAR (charset) = max_char; 428 CHARSET_MAX_CHAR (charset) = max_char;
331 if (control_flag == 1)
332 {
333 CHARSET_DECODER (charset) = vec;
334 CHARSET_ENCODER (charset) = table;
335 }
336 } 429 }
337 else 430 else if (control_flag == 4)
338 CHARSET_DEUNIFIER (charset) = table; 431 {
432 temp_charset_work->min_char = min_char;
433 temp_charset_work->max_char = max_char;
434 }
339} 435}
340 436
341 437
@@ -531,21 +627,31 @@ load_charset_map_from_vector (charset, vec, control_flag)
531 load_charset_map (charset, head, n_entries, control_flag); 627 load_charset_map (charset, head, n_entries, control_flag);
532} 628}
533 629
630
631/* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
632 map it is (see the comment of load_charset_map for the detail). */
633
534static void 634static void
535load_charset (charset) 635load_charset (charset, control_flag)
536 struct charset *charset; 636 struct charset *charset;
637 int control_flag;
537{ 638{
538 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED) 639 Lisp_Object map;
539 {
540 Lisp_Object map;
541 640
542 map = CHARSET_MAP (charset); 641 if (inhibit_load_charset_map
543 if (STRINGP (map)) 642 && temp_charset_work
544 load_charset_map_from_file (charset, map, 1); 643 && charset == temp_charset_work->current
545 else 644 && (control_flag == 2 == temp_charset_work->for_encoder))
546 load_charset_map_from_vector (charset, map, 1); 645 return;
547 CHARSET_METHOD (charset) = CHARSET_METHOD_MAP; 646
548 } 647 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
648 map = CHARSET_MAP (charset);
649 else if (CHARSET_UNIFIED_P (charset))
650 map = CHARSET_UNIFY_MAP (charset);
651 if (STRINGP (map))
652 load_charset_map_from_file (charset, map, control_flag);
653 else
654 load_charset_map_from_vector (charset, map, control_flag);
549} 655}
550 656
551 657
@@ -558,6 +664,68 @@ DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
558} 664}
559 665
560 666
667void map_charset_for_dump P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
668 Lisp_Object function, Lisp_Object arg,
669 unsigned from, unsigned to));
670
671void
672map_charset_for_dump (c_function, function, arg, from, to)
673 void (*c_function) (Lisp_Object, Lisp_Object);
674 Lisp_Object function, arg;
675 unsigned from, to;
676{
677 int from_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, from);
678 int to_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, to);
679 Lisp_Object range;
680 int c, stop;
681 struct gcpro gcpro1;
682
683 range = Fcons (Qnil, Qnil);
684 GCPRO1 (range);
685
686 c = temp_charset_work->min_char;
687 stop = (temp_charset_work->max_char < 0x20000
688 ? temp_charset_work->max_char : 0xFFFF);
689
690 while (1)
691 {
692 int index = GET_TEMP_CHARSET_WORK_ENCODER (c);
693
694 if (index >= from_idx && index <= to_idx)
695 {
696 if (NILP (XCAR (range)))
697 XSETCAR (range, make_number (c));
698 }
699 else if (! NILP (XCAR (range)))
700 {
701 XSETCDR (range, make_number (c - 1));
702 if (c_function)
703 (*c_function) (arg, range);
704 else
705 call2 (function, range, arg);
706 XSETCAR (range, Qnil);
707 }
708 if (c == stop)
709 {
710 if (c == temp_charset_work->max_char)
711 {
712 if (! NILP (XCAR (range)))
713 {
714 XSETCDR (range, make_number (c));
715 if (c_function)
716 (*c_function) (arg, range);
717 else
718 call2 (function, range, arg);
719 }
720 break;
721 }
722 c = 0x1FFFF;
723 stop = temp_charset_work->max_char;
724 }
725 c++;
726 }
727}
728
561void 729void
562map_charset_chars (c_function, function, arg, 730map_charset_chars (c_function, function, arg,
563 charset, from, to) 731 charset, from, to)
@@ -569,20 +737,9 @@ map_charset_chars (c_function, function, arg,
569 Lisp_Object range; 737 Lisp_Object range;
570 int partial; 738 int partial;
571 739
572 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED)
573 load_charset (charset);
574
575 partial = (from > CHARSET_MIN_CODE (charset) 740 partial = (from > CHARSET_MIN_CODE (charset)
576 || to < CHARSET_MAX_CODE (charset)); 741 || to < CHARSET_MAX_CODE (charset));
577 742
578 if (CHARSET_UNIFIED_P (charset)
579 && CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
580 {
581 map_char_table_for_charset (c_function, function,
582 CHARSET_DEUNIFIER (charset), arg,
583 partial ? charset : NULL, from, to);
584 }
585
586 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET) 743 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
587 { 744 {
588 int from_idx = CODE_POINT_TO_INDEX (charset, from); 745 int from_idx = CODE_POINT_TO_INDEX (charset, from);
@@ -590,6 +747,18 @@ map_charset_chars (c_function, function, arg,
590 int from_c = from_idx + CHARSET_CODE_OFFSET (charset); 747 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
591 int to_c = to_idx + CHARSET_CODE_OFFSET (charset); 748 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
592 749
750 if (CHARSET_UNIFIED_P (charset))
751 {
752 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
753 load_charset (charset, 2);
754 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
755 map_char_table_for_charset (c_function, function,
756 CHARSET_DEUNIFIER (charset), arg,
757 partial ? charset : NULL, from, to);
758 else
759 map_charset_for_dump (c_function, function, arg, from, to);
760 }
761
593 range = Fcons (make_number (from_c), make_number (to_c)); 762 range = Fcons (make_number (from_c), make_number (to_c));
594 if (NILP (function)) 763 if (NILP (function))
595 (*c_function) (arg, range); 764 (*c_function) (arg, range);
@@ -599,10 +768,13 @@ map_charset_chars (c_function, function, arg,
599 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP) 768 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
600 { 769 {
601 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset))) 770 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
602 return; 771 load_charset (charset, 2);
603 map_char_table_for_charset (c_function, function, 772 if (CHAR_TABLE_P (CHARSET_ENCODER (charset)))
604 CHARSET_ENCODER (charset), arg, 773 map_char_table_for_charset (c_function, function,
605 partial ? charset : NULL, from, to); 774 CHARSET_ENCODER (charset), arg,
775 partial ? charset : NULL, from, to);
776 else
777 map_charset_for_dump (c_function, function, arg, from, to);
606 } 778 }
607 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET) 779 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
608 { 780 {
@@ -821,7 +993,7 @@ usage: (define-charset-internal ...) */)
821 charset.max_code = code; 993 charset.max_code = code;
822 } 994 }
823 995
824 charset.compact_codes_p = charset.max_code < 0x1000000; 996 charset.compact_codes_p = charset.max_code < 0x10000;
825 997
826 val = args[charset_arg_invalid_code]; 998 val = args[charset_arg_invalid_code];
827 if (NILP (val)) 999 if (NILP (val))
@@ -910,11 +1082,7 @@ usage: (define-charset-internal ...) */)
910 { 1082 {
911 val = args[charset_arg_map]; 1083 val = args[charset_arg_map];
912 ASET (attrs, charset_map, val); 1084 ASET (attrs, charset_map, val);
913 if (STRINGP (val)) 1085 charset.method = CHARSET_METHOD_MAP;
914 load_charset_map_from_file (&charset, val, 0);
915 else
916 load_charset_map_from_vector (&charset, val, 0);
917 charset.method = CHARSET_METHOD_MAP_DEFERRED;
918 } 1086 }
919 else if (! NILP (args[charset_arg_subset])) 1087 else if (! NILP (args[charset_arg_subset]))
920 { 1088 {
@@ -1030,6 +1198,9 @@ usage: (define-charset-internal ...) */)
1030 charset.id = id; 1198 charset.id = id;
1031 charset_table[id] = charset; 1199 charset_table[id] = charset;
1032 1200
1201 if (charset.method == CHARSET_METHOD_MAP)
1202 load_charset (&charset, 0);
1203
1033 if (charset.iso_final >= 0) 1204 if (charset.iso_final >= 0)
1034 { 1205 {
1035 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96, 1206 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
@@ -1219,8 +1390,6 @@ Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1219 1390
1220 CHECK_CHARSET_GET_ID (charset, id); 1391 CHECK_CHARSET_GET_ID (charset, id);
1221 cs = CHARSET_FROM_ID (id); 1392 cs = CHARSET_FROM_ID (id);
1222 if (CHARSET_METHOD (cs) == CHARSET_METHOD_MAP_DEFERRED)
1223 load_charset (cs);
1224 if (NILP (deunify) 1393 if (NILP (deunify)
1225 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs)) 1394 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1226 : ! CHARSET_UNIFIED_P (cs)) 1395 : ! CHARSET_UNIFIED_P (cs))
@@ -1229,18 +1398,21 @@ Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1229 CHARSET_UNIFIED_P (cs) = 0; 1398 CHARSET_UNIFIED_P (cs) = 0;
1230 if (NILP (deunify)) 1399 if (NILP (deunify))
1231 { 1400 {
1232 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET) 1401 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET
1402 || CHARSET_CODE_OFFSET (cs) < 0x110000)
1233 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset))); 1403 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
1234 if (NILP (unify_map)) 1404 if (NILP (unify_map))
1235 unify_map = CHARSET_UNIFY_MAP (cs); 1405 unify_map = CHARSET_UNIFY_MAP (cs);
1236 if (STRINGP (unify_map))
1237 load_charset_map_from_file (cs, unify_map, 2);
1238 else if (VECTORP (unify_map))
1239 load_charset_map_from_vector (cs, unify_map, 2);
1240 else if (NILP (unify_map))
1241 error ("No unify-map for charset");
1242 else 1406 else
1243 error ("Bad unify-map arg"); 1407 {
1408 if (! STRINGP (unify_map) && ! VECTORP (unify_map))
1409 signal_error ("Bad unify-map", unify_map);
1410 CHARSET_UNIFY_MAP (cs) = unify_map;
1411 }
1412 if (NILP (Vchar_unify_table))
1413 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
1414 char_table_set_range (Vchar_unify_table,
1415 cs->min_char, cs->max_char, charset);
1244 CHARSET_UNIFIED_P (cs) = 1; 1416 CHARSET_UNIFIED_P (cs) = 1;
1245 } 1417 }
1246 else if (CHAR_TABLE_P (Vchar_unify_table)) 1418 else if (CHAR_TABLE_P (Vchar_unify_table))
@@ -1485,6 +1657,41 @@ only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1485 1657
1486 1658
1487 1659
1660/* Return a unified character code for C (>= 0x110000). VAL is a
1661 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1662 charset symbol. */
1663int
1664maybe_unify_char (c, val)
1665 int c;
1666 Lisp_Object val;
1667{
1668 struct charset *charset;
1669
1670 if (INTEGERP (val))
1671 return XINT (val);
1672 if (NILP (val))
1673 return c;
1674
1675 CHECK_CHARSET_GET_CHARSET (val, charset);
1676 load_charset (charset, 1);
1677 if (! inhibit_load_charset_map)
1678 {
1679 val = CHAR_TABLE_REF (Vchar_unify_table, c);
1680 if (! NILP (val))
1681 c = XINT (val);
1682 }
1683 else
1684 {
1685 int code_index = c - CHARSET_CODE_OFFSET (charset);
1686 int unified = GET_TEMP_CHARSET_WORK_DECODER (code_index);
1687
1688 if (unified > 0)
1689 c = unified;
1690 }
1691 return c;
1692}
1693
1694
1488/* Return a character correponding to the code-point CODE of 1695/* Return a character correponding to the code-point CODE of
1489 CHARSET. */ 1696 CHARSET. */
1490 1697
@@ -1499,12 +1706,6 @@ decode_char (charset, code)
1499 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset)) 1706 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1500 return -1; 1707 return -1;
1501 1708
1502 if (method == CHARSET_METHOD_MAP_DEFERRED)
1503 {
1504 load_charset (charset);
1505 method = CHARSET_METHOD (charset);
1506 }
1507
1508 if (method == CHARSET_METHOD_SUBSET) 1709 if (method == CHARSET_METHOD_SUBSET)
1509 { 1710 {
1510 Lisp_Object subset_info; 1711 Lisp_Object subset_info;
@@ -1547,21 +1748,24 @@ decode_char (charset, code)
1547 1748
1548 decoder = CHARSET_DECODER (charset); 1749 decoder = CHARSET_DECODER (charset);
1549 if (! VECTORP (decoder)) 1750 if (! VECTORP (decoder))
1550 return -1; 1751 {
1551 c = XINT (AREF (decoder, char_index)); 1752 load_charset (charset, 1);
1753 decoder = CHARSET_DECODER (charset);
1754 }
1755 if (VECTORP (decoder))
1756 c = XINT (AREF (decoder, char_index));
1757 else
1758 c = GET_TEMP_CHARSET_WORK_DECODER (char_index);
1552 } 1759 }
1553 else 1760 else /* method == CHARSET_METHOD_OFFSET */
1554 { 1761 {
1555 c = char_index + CHARSET_CODE_OFFSET (charset); 1762 c = char_index + CHARSET_CODE_OFFSET (charset);
1763 if (CHARSET_UNIFIED_P (charset)
1764 && c > MAX_UNICODE_CHAR)
1765 MAYBE_UNIFY_CHAR (c);
1556 } 1766 }
1557 } 1767 }
1558 1768
1559 if (CHARSET_UNIFIED_P (charset)
1560 && c >= 0)
1561 {
1562 MAYBE_UNIFY_CHAR (c);
1563 }
1564
1565 return c; 1769 return c;
1566} 1770}
1567 1771
@@ -1583,16 +1787,27 @@ encode_char (charset, c)
1583 if (CHARSET_UNIFIED_P (charset)) 1787 if (CHARSET_UNIFIED_P (charset))
1584 { 1788 {
1585 Lisp_Object deunifier, deunified; 1789 Lisp_Object deunifier, deunified;
1790 int code_index = -1;
1586 1791
1587 deunifier = CHARSET_DEUNIFIER (charset); 1792 deunifier = CHARSET_DEUNIFIER (charset);
1588 if (! CHAR_TABLE_P (deunifier)) 1793 if (! CHAR_TABLE_P (deunifier))
1589 { 1794 {
1590 Funify_charset (CHARSET_NAME (charset), Qnil, Qnil); 1795 load_charset (charset, 2);
1591 deunifier = CHARSET_DEUNIFIER (charset); 1796 deunifier = CHARSET_DEUNIFIER (charset);
1592 } 1797 }
1593 deunified = CHAR_TABLE_REF (deunifier, c); 1798 if (CHAR_TABLE_P (deunifier))
1594 if (! NILP (deunified)) 1799 {
1595 c = XINT (deunified); 1800 Lisp_Object deunified = CHAR_TABLE_REF (deunifier, c);
1801
1802 if (INTEGERP (deunified))
1803 code_index = XINT (deunified);
1804 }
1805 else
1806 {
1807 code_index = GET_TEMP_CHARSET_WORK_ENCODER (c);
1808 }
1809 if (code_index >= 0)
1810 c = CHARSET_CODE_OFFSET (charset) + code_index;
1596 } 1811 }
1597 1812
1598 if (method == CHARSET_METHOD_SUBSET) 1813 if (method == CHARSET_METHOD_SUBSET)
@@ -1633,12 +1848,6 @@ encode_char (charset, c)
1633 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset)) 1848 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1634 return CHARSET_INVALID_CODE (charset); 1849 return CHARSET_INVALID_CODE (charset);
1635 1850
1636 if (method == CHARSET_METHOD_MAP_DEFERRED)
1637 {
1638 load_charset (charset);
1639 method = CHARSET_METHOD (charset);
1640 }
1641
1642 if (method == CHARSET_METHOD_MAP) 1851 if (method == CHARSET_METHOD_MAP)
1643 { 1852 {
1644 Lisp_Object encoder; 1853 Lisp_Object encoder;
@@ -1646,18 +1855,27 @@ encode_char (charset, c)
1646 1855
1647 encoder = CHARSET_ENCODER (charset); 1856 encoder = CHARSET_ENCODER (charset);
1648 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset))) 1857 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1649 return CHARSET_INVALID_CODE (charset); 1858 load_charset (charset);
1650 val = CHAR_TABLE_REF (encoder, c); 1859 if (CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1651 if (NILP (val)) 1860 {
1652 return CHARSET_INVALID_CODE (charset); 1861 val = CHAR_TABLE_REF (encoder, c);
1653 code = XINT (val); 1862 if (NILP (val))
1654 if (! CHARSET_COMPACT_CODES_P (charset)) 1863 return CHARSET_INVALID_CODE (charset);
1655 code = INDEX_TO_CODE_POINT (charset, code); 1864 code = XINT (val);
1865 if (! CHARSET_COMPACT_CODES_P (charset))
1866 code = INDEX_TO_CODE_POINT (charset, code);
1867 }
1868 else
1869 {
1870 code = GET_TEMP_CHARSET_WORK_ENCODER (c);
1871 code = INDEX_TO_CODE_POINT (charset, code);
1872 }
1656 } 1873 }
1657 else /* method == CHARSET_METHOD_OFFSET */ 1874 else /* method == CHARSET_METHOD_OFFSET */
1658 { 1875 {
1659 code = c - CHARSET_CODE_OFFSET (charset); 1876 int code_index = c - CHARSET_CODE_OFFSET (charset);
1660 code = INDEX_TO_CODE_POINT (charset, code); 1877
1878 code = INDEX_TO_CODE_POINT (charset, code_index);
1661 } 1879 }
1662 1880
1663 return code; 1881 return code;
@@ -1932,35 +2150,23 @@ DIMENSION, CHARS, and FINAL-CHAR. */)
1932DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps, 2150DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
1933 0, 0, 0, 2151 0, 0, 0,
1934 doc: /* 2152 doc: /*
1935Clear encoder and decoder of charsets that are loaded from mapfiles. */) 2153Internal use only.
2154Clear temporary charset mapping tables.
2155It should be called only from temacs invoked for dumping. */)
1936 () 2156 ()
1937{ 2157{
1938 int i; 2158 int i;
1939 struct charset *charset; 2159 struct charset *charset;
1940 Lisp_Object attrs; 2160 Lisp_Object attrs;
1941 2161
1942 for (i = 0; i < charset_table_used; i++) 2162 if (temp_charset_work)
1943 { 2163 {
1944 charset = CHARSET_FROM_ID (i); 2164 free (temp_charset_work);
1945 attrs = CHARSET_ATTRIBUTES (charset); 2165 temp_charset_work = NULL;
1946
1947 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
1948 {
1949 CHARSET_ATTR_DECODER (attrs) = Qnil;
1950 CHARSET_ATTR_ENCODER (attrs) = Qnil;
1951 CHARSET_METHOD (charset) = CHARSET_METHOD_MAP_DEFERRED;
1952 }
1953
1954 if (CHARSET_UNIFIED_P (charset))
1955 CHARSET_ATTR_DEUNIFIER (attrs) = Qnil;
1956 } 2166 }
1957 2167
1958 if (CHAR_TABLE_P (Vchar_unified_charset_table)) 2168 if (CHAR_TABLE_P (Vchar_unify_table))
1959 { 2169 Foptimize_char_table (Vchar_unify_table, Qnil);
1960 Foptimize_char_table (Vchar_unified_charset_table, Qnil);
1961 Vchar_unify_table = Vchar_unified_charset_table;
1962 Vchar_unified_charset_table = Qnil;
1963 }
1964 2170
1965 return Qnil; 2171 return Qnil;
1966} 2172}
@@ -2124,9 +2330,6 @@ syms_of_charset ()
2124 xmalloc (sizeof (struct charset) * charset_table_size)); 2330 xmalloc (sizeof (struct charset) * charset_table_size));
2125 charset_table_used = 0; 2331 charset_table_used = 0;
2126 2332
2127 staticpro (&Vchar_unified_charset_table);
2128 Vchar_unified_charset_table = Fmake_char_table (Qnil, make_number (-1));
2129
2130 defsubr (&Scharsetp); 2333 defsubr (&Scharsetp);
2131 defsubr (&Smap_charset_chars); 2334 defsubr (&Smap_charset_chars);
2132 defsubr (&Sdefine_charset_internal); 2335 defsubr (&Sdefine_charset_internal);
@@ -2154,6 +2357,10 @@ syms_of_charset ()
2154 doc: /* *List of directories to search for charset map files. */); 2357 doc: /* *List of directories to search for charset map files. */);
2155 Vcharset_map_path = Qnil; 2358 Vcharset_map_path = Qnil;
2156 2359
2360 DEFVAR_BOOL ("inhibit-load-charset-map", &inhibit_load_charset_map,
2361 doc: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2362 inhibit_load_charset_map = 0;
2363
2157 DEFVAR_LISP ("charset-list", &Vcharset_list, 2364 DEFVAR_LISP ("charset-list", &Vcharset_list,
2158 doc: /* List of all charsets ever defined. */); 2365 doc: /* List of all charsets ever defined. */);
2159 Vcharset_list = Qnil; 2366 Vcharset_list = Qnil;