aboutsummaryrefslogtreecommitdiffstats
path: root/src/chartab.c
diff options
context:
space:
mode:
authorKenichi Handa2011-07-07 07:43:48 +0900
committerKenichi Handa2011-07-07 07:43:48 +0900
commitc805dec0b5fa81b5c9f2b724e2ec12a17d723aca (patch)
treec29a8490c976fdf4dbf64ef1b13a57f7d1110cc1 /src/chartab.c
parent5c62d133468c354b47a1643092add8292e084765 (diff)
downloademacs-c805dec0b5fa81b5c9f2b724e2ec12a17d723aca.tar.gz
emacs-c805dec0b5fa81b5c9f2b724e2ec12a17d723aca.zip
Add C interface for Unicode character property table.
Diffstat (limited to 'src/chartab.c')
-rw-r--r--src/chartab.c579
1 files changed, 507 insertions, 72 deletions
diff --git a/src/chartab.c b/src/chartab.c
index ed5b238646e..4a9a76bdd60 100644
--- a/src/chartab.c
+++ b/src/chartab.c
@@ -53,7 +53,38 @@ static const int chartab_bits[4] =
53#define CHARTAB_IDX(c, depth, min_char) \ 53#define CHARTAB_IDX(c, depth, min_char) \
54 (((c) - (min_char)) >> chartab_bits[(depth)]) 54 (((c) - (min_char)) >> chartab_bits[(depth)])
55 55
56
57/* Preamble for uniprop (Unicode character property) tables. See the
58 comment of "Unicode character property tables". */
59
60/* Purpose of uniprop tables. */
61static Lisp_Object Qchar_code_property_table;
62
63/* Types of decoder and encoder functions for uniprop values. */
64typedef Lisp_Object (*uniprop_decoder_t) (Lisp_Object, Lisp_Object);
65typedef Lisp_Object (*uniprop_encoder_t) (Lisp_Object, Lisp_Object);
66
67static Lisp_Object uniprop_table_uncompress (Lisp_Object, int);
68static uniprop_decoder_t uniprop_get_decoder (Lisp_Object);
69
70/* 1 iff TABLE is a uniprop table. */
71#define UNIPROP_TABLE_P(TABLE) \
72 (EQ (XCHAR_TABLE (TABLE)->purpose, Qchar_code_property_table) \
73 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (TABLE)) == 5)
74
75/* Return a decoder for values in the uniprop table TABLE. */
76#define UNIPROP_GET_DECODER(TABLE) \
77 (UNIPROP_TABLE_P (TABLE) ? uniprop_get_decoder (TABLE) : NULL)
56 78
79/* Nonzero iff OBJ is a string representing uniprop values of 128
80 succeeding characters (the bottom level of a char-table) by a
81 compressed format. We are sure that no property value has a string
82 starting with '\001' nor '\002'. */
83#define UNIPROP_COMPRESSED_FORM_P(OBJ) \
84 (STRINGP (OBJ) && SCHARS (OBJ) > 0 \
85 && ((SREF (OBJ, 0) == 1 || (SREF (OBJ, 0) == 2))))
86
87
57DEFUN ("make-char-table", Fmake_char_table, Smake_char_table, 1, 2, 0, 88DEFUN ("make-char-table", Fmake_char_table, Smake_char_table, 1, 2, 0,
58 doc: /* Return a newly created char-table, with purpose PURPOSE. 89 doc: /* Return a newly created char-table, with purpose PURPOSE.
59Each element is initialized to INIT, which defaults to nil. 90Each element is initialized to INIT, which defaults to nil.
@@ -107,7 +138,7 @@ make_sub_char_table (int depth, int min_char, Lisp_Object defalt)
107static Lisp_Object 138static Lisp_Object
108char_table_ascii (Lisp_Object table) 139char_table_ascii (Lisp_Object table)
109{ 140{
110 Lisp_Object sub; 141 Lisp_Object sub, val;
111 142
112 sub = XCHAR_TABLE (table)->contents[0]; 143 sub = XCHAR_TABLE (table)->contents[0];
113 if (! SUB_CHAR_TABLE_P (sub)) 144 if (! SUB_CHAR_TABLE_P (sub))
@@ -115,7 +146,10 @@ char_table_ascii (Lisp_Object table)
115 sub = XSUB_CHAR_TABLE (sub)->contents[0]; 146 sub = XSUB_CHAR_TABLE (sub)->contents[0];
116 if (! SUB_CHAR_TABLE_P (sub)) 147 if (! SUB_CHAR_TABLE_P (sub))
117 return sub; 148 return sub;
118 return XSUB_CHAR_TABLE (sub)->contents[0]; 149 val = XSUB_CHAR_TABLE (sub)->contents[0];
150 if (UNIPROP_TABLE_P (table) && UNIPROP_COMPRESSED_FORM_P (val))
151 val = uniprop_table_uncompress (sub, 0);
152 return val;
119} 153}
120 154
121static Lisp_Object 155static Lisp_Object
@@ -169,16 +203,19 @@ copy_char_table (Lisp_Object table)
169} 203}
170 204
171static Lisp_Object 205static Lisp_Object
172sub_char_table_ref (Lisp_Object table, int c) 206sub_char_table_ref (Lisp_Object table, int c, int is_uniprop)
173{ 207{
174 struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table); 208 struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table);
175 int depth = XINT (tbl->depth); 209 int depth = XINT (tbl->depth);
176 int min_char = XINT (tbl->min_char); 210 int min_char = XINT (tbl->min_char);
177 Lisp_Object val; 211 Lisp_Object val;
212 int idx = CHARTAB_IDX (c, depth, min_char);
178 213
179 val = tbl->contents[CHARTAB_IDX (c, depth, min_char)]; 214 val = tbl->contents[idx];
215 if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (val))
216 val = uniprop_table_uncompress (table, idx);
180 if (SUB_CHAR_TABLE_P (val)) 217 if (SUB_CHAR_TABLE_P (val))
181 val = sub_char_table_ref (val, c); 218 val = sub_char_table_ref (val, c, is_uniprop);
182 return val; 219 return val;
183} 220}
184 221
@@ -198,7 +235,7 @@ char_table_ref (Lisp_Object table, int c)
198 { 235 {
199 val = tbl->contents[CHARTAB_IDX (c, 0, 0)]; 236 val = tbl->contents[CHARTAB_IDX (c, 0, 0)];
200 if (SUB_CHAR_TABLE_P (val)) 237 if (SUB_CHAR_TABLE_P (val))
201 val = sub_char_table_ref (val, c); 238 val = sub_char_table_ref (val, c, UNIPROP_TABLE_P (table));
202 } 239 }
203 if (NILP (val)) 240 if (NILP (val))
204 { 241 {
@@ -210,7 +247,8 @@ char_table_ref (Lisp_Object table, int c)
210} 247}
211 248
212static Lisp_Object 249static Lisp_Object
213sub_char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to, Lisp_Object defalt) 250sub_char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to,
251 Lisp_Object defalt, int is_uniprop)
214{ 252{
215 struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table); 253 struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table);
216 int depth = XINT (tbl->depth); 254 int depth = XINT (tbl->depth);
@@ -219,8 +257,10 @@ sub_char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to, Lisp
219 Lisp_Object val; 257 Lisp_Object val;
220 258
221 val = tbl->contents[chartab_idx]; 259 val = tbl->contents[chartab_idx];
260 if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (val))
261 val = uniprop_table_uncompress (table, chartab_idx);
222 if (SUB_CHAR_TABLE_P (val)) 262 if (SUB_CHAR_TABLE_P (val))
223 val = sub_char_table_ref_and_range (val, c, from, to, defalt); 263 val = sub_char_table_ref_and_range (val, c, from, to, defalt, is_uniprop);
224 else if (NILP (val)) 264 else if (NILP (val))
225 val = defalt; 265 val = defalt;
226 266
@@ -232,8 +272,11 @@ sub_char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to, Lisp
232 c = min_char + idx * chartab_chars[depth] - 1; 272 c = min_char + idx * chartab_chars[depth] - 1;
233 idx--; 273 idx--;
234 this_val = tbl->contents[idx]; 274 this_val = tbl->contents[idx];
275 if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (this_val))
276 this_val = uniprop_table_uncompress (table, idx);
235 if (SUB_CHAR_TABLE_P (this_val)) 277 if (SUB_CHAR_TABLE_P (this_val))
236 this_val = sub_char_table_ref_and_range (this_val, c, from, to, defalt); 278 this_val = sub_char_table_ref_and_range (this_val, c, from, to, defalt,
279 is_uniprop);
237 else if (NILP (this_val)) 280 else if (NILP (this_val))
238 this_val = defalt; 281 this_val = defalt;
239 282
@@ -251,8 +294,11 @@ sub_char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to, Lisp
251 294
252 chartab_idx++; 295 chartab_idx++;
253 this_val = tbl->contents[chartab_idx]; 296 this_val = tbl->contents[chartab_idx];
297 if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (this_val))
298 this_val = uniprop_table_uncompress (table, chartab_idx);
254 if (SUB_CHAR_TABLE_P (this_val)) 299 if (SUB_CHAR_TABLE_P (this_val))
255 this_val = sub_char_table_ref_and_range (this_val, c, from, to, defalt); 300 this_val = sub_char_table_ref_and_range (this_val, c, from, to, defalt,
301 is_uniprop);
256 else if (NILP (this_val)) 302 else if (NILP (this_val))
257 this_val = defalt; 303 this_val = defalt;
258 if (! EQ (this_val, val)) 304 if (! EQ (this_val, val))
@@ -277,17 +323,20 @@ char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to)
277 struct Lisp_Char_Table *tbl = XCHAR_TABLE (table); 323 struct Lisp_Char_Table *tbl = XCHAR_TABLE (table);
278 int chartab_idx = CHARTAB_IDX (c, 0, 0), idx; 324 int chartab_idx = CHARTAB_IDX (c, 0, 0), idx;
279 Lisp_Object val; 325 Lisp_Object val;
326 int is_uniprop = UNIPROP_TABLE_P (table);
280 327
281 val = tbl->contents[chartab_idx]; 328 val = tbl->contents[chartab_idx];
282 if (*from < 0) 329 if (*from < 0)
283 *from = 0; 330 *from = 0;
284 if (*to < 0) 331 if (*to < 0)
285 *to = MAX_CHAR; 332 *to = MAX_CHAR;
333 if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (val))
334 val = uniprop_table_uncompress (table, chartab_idx);
286 if (SUB_CHAR_TABLE_P (val)) 335 if (SUB_CHAR_TABLE_P (val))
287 val = sub_char_table_ref_and_range (val, c, from, to, tbl->defalt); 336 val = sub_char_table_ref_and_range (val, c, from, to, tbl->defalt,
337 is_uniprop);
288 else if (NILP (val)) 338 else if (NILP (val))
289 val = tbl->defalt; 339 val = tbl->defalt;
290
291 idx = chartab_idx; 340 idx = chartab_idx;
292 while (*from < idx * chartab_chars[0]) 341 while (*from < idx * chartab_chars[0])
293 { 342 {
@@ -296,9 +345,11 @@ char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to)
296 c = idx * chartab_chars[0] - 1; 345 c = idx * chartab_chars[0] - 1;
297 idx--; 346 idx--;
298 this_val = tbl->contents[idx]; 347 this_val = tbl->contents[idx];
348 if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (this_val))
349 this_val = uniprop_table_uncompress (table, idx);
299 if (SUB_CHAR_TABLE_P (this_val)) 350 if (SUB_CHAR_TABLE_P (this_val))
300 this_val = sub_char_table_ref_and_range (this_val, c, from, to, 351 this_val = sub_char_table_ref_and_range (this_val, c, from, to,
301 tbl->defalt); 352 tbl->defalt, is_uniprop);
302 else if (NILP (this_val)) 353 else if (NILP (this_val))
303 this_val = tbl->defalt; 354 this_val = tbl->defalt;
304 355
@@ -315,9 +366,11 @@ char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to)
315 chartab_idx++; 366 chartab_idx++;
316 c = chartab_idx * chartab_chars[0]; 367 c = chartab_idx * chartab_chars[0];
317 this_val = tbl->contents[chartab_idx]; 368 this_val = tbl->contents[chartab_idx];
369 if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (this_val))
370 this_val = uniprop_table_uncompress (table, chartab_idx);
318 if (SUB_CHAR_TABLE_P (this_val)) 371 if (SUB_CHAR_TABLE_P (this_val))
319 this_val = sub_char_table_ref_and_range (this_val, c, from, to, 372 this_val = sub_char_table_ref_and_range (this_val, c, from, to,
320 tbl->defalt); 373 tbl->defalt, is_uniprop);
321 else if (NILP (this_val)) 374 else if (NILP (this_val))
322 this_val = tbl->defalt; 375 this_val = tbl->defalt;
323 if (! EQ (this_val, val)) 376 if (! EQ (this_val, val))
@@ -332,7 +385,7 @@ char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to)
332 385
333 386
334static void 387static void
335sub_char_table_set (Lisp_Object table, int c, Lisp_Object val) 388sub_char_table_set (Lisp_Object table, int c, Lisp_Object val, int is_uniprop)
336{ 389{
337 struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table); 390 struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table);
338 int depth = XINT ((tbl)->depth); 391 int depth = XINT ((tbl)->depth);
@@ -347,11 +400,17 @@ sub_char_table_set (Lisp_Object table, int c, Lisp_Object val)
347 sub = tbl->contents[i]; 400 sub = tbl->contents[i];
348 if (! SUB_CHAR_TABLE_P (sub)) 401 if (! SUB_CHAR_TABLE_P (sub))
349 { 402 {
350 sub = make_sub_char_table (depth + 1, 403 if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (sub))
351 min_char + i * chartab_chars[depth], sub); 404 sub = uniprop_table_uncompress (table, i);
352 tbl->contents[i] = sub; 405 else
406 {
407 sub = make_sub_char_table (depth + 1,
408 min_char + i * chartab_chars[depth],
409 sub);
410 tbl->contents[i] = sub;
411 }
353 } 412 }
354 sub_char_table_set (sub, c, val); 413 sub_char_table_set (sub, c, val, is_uniprop);
355 } 414 }
356} 415}
357 416
@@ -376,7 +435,7 @@ char_table_set (Lisp_Object table, int c, Lisp_Object val)
376 sub = make_sub_char_table (1, i * chartab_chars[0], sub); 435 sub = make_sub_char_table (1, i * chartab_chars[0], sub);
377 tbl->contents[i] = sub; 436 tbl->contents[i] = sub;
378 } 437 }
379 sub_char_table_set (sub, c, val); 438 sub_char_table_set (sub, c, val, UNIPROP_TABLE_P (table));
380 if (ASCII_CHAR_P (c)) 439 if (ASCII_CHAR_P (c))
381 tbl->ascii = char_table_ascii (table); 440 tbl->ascii = char_table_ascii (table);
382 } 441 }
@@ -384,30 +443,40 @@ char_table_set (Lisp_Object table, int c, Lisp_Object val)
384} 443}
385 444
386static void 445static void
387sub_char_table_set_range (Lisp_Object *table, int depth, int min_char, int from, int to, Lisp_Object val) 446sub_char_table_set_range (Lisp_Object table, int from, int to, Lisp_Object val,
447 int is_uniprop)
388{ 448{
389 int max_char = min_char + chartab_chars[depth] - 1; 449 struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table);
390 450 int depth = XINT ((tbl)->depth);
391 if (depth == 3 || (from <= min_char && to >= max_char)) 451 int min_char = XINT ((tbl)->min_char);
392 *table = val; 452 int chars_in_block = chartab_chars[depth];
393 else 453 int i, c, lim = chartab_size[depth];
454
455 if (from < min_char)
456 from = min_char;
457 i = CHARTAB_IDX (from, depth, min_char);
458 c = min_char + chars_in_block * i;
459 for (; i <= lim; i++, c += chars_in_block)
394 { 460 {
395 int i; 461 if (c > to)
396 unsigned j; 462 break;
397 463 if (from <= c && c + chars_in_block - 1 <= to)
398 depth++; 464 tbl->contents[i] = val;
399 if (! SUB_CHAR_TABLE_P (*table)) 465 else
400 *table = make_sub_char_table (depth, min_char, *table); 466 {
401 if (from < min_char) 467 Lisp_Object sub = tbl->contents[i];
402 from = min_char; 468 if (! SUB_CHAR_TABLE_P (sub))
403 if (to > max_char) 469 {
404 to = max_char; 470 if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (sub))
405 i = CHARTAB_IDX (from, depth, min_char); 471 sub = uniprop_table_uncompress (table, i);
406 j = CHARTAB_IDX (to, depth, min_char); 472 else
407 min_char += chartab_chars[depth] * i; 473 {
408 for (j++; i < j; i++, min_char += chartab_chars[depth]) 474 sub = make_sub_char_table (depth + 1, c, sub);
409 sub_char_table_set_range (XSUB_CHAR_TABLE (*table)->contents + i, 475 tbl->contents[i] = sub;
410 depth, min_char, from, to, val); 476 }
477 }
478 sub_char_table_set_range (sub, from, to, val, is_uniprop);
479 }
411 } 480 }
412} 481}
413 482
@@ -417,16 +486,33 @@ char_table_set_range (Lisp_Object table, int from, int to, Lisp_Object val)
417{ 486{
418 struct Lisp_Char_Table *tbl = XCHAR_TABLE (table); 487 struct Lisp_Char_Table *tbl = XCHAR_TABLE (table);
419 Lisp_Object *contents = tbl->contents; 488 Lisp_Object *contents = tbl->contents;
420 int i;
421 489
422 if (from == to) 490 if (from == to)
423 char_table_set (table, from, val); 491 char_table_set (table, from, val);
424 else 492 else
425 { 493 {
426 unsigned lim = to / chartab_chars[0] + 1; 494 int is_uniprop = UNIPROP_TABLE_P (table);
427 for (i = CHARTAB_IDX (from, 0, 0); i < lim; i++) 495 int lim = CHARTAB_IDX (to, 0, 0);
428 sub_char_table_set_range (contents + i, 0, i * chartab_chars[0], 496 int i, c;
429 from, to, val); 497
498 for (i = CHARTAB_IDX (from, 0, 0), c = 0; i <= lim;
499 i++, c += chartab_chars[0])
500 {
501 if (c > to)
502 break;
503 if (from <= c && c + chartab_chars[0] - 1 <= to)
504 tbl->contents[i] = val;
505 else
506 {
507 Lisp_Object sub = tbl->contents[i];
508 if (! SUB_CHAR_TABLE_P (sub))
509 {
510 sub = make_sub_char_table (1, i * chartab_chars[0], sub);
511 tbl->contents[i] = sub;
512 }
513 sub_char_table_set_range (sub, from, to, val, is_uniprop);
514 }
515 }
430 if (ASCII_CHAR_P (from)) 516 if (ASCII_CHAR_P (from))
431 tbl->ascii = char_table_ascii (table); 517 tbl->ascii = char_table_ascii (table);
432 } 518 }
@@ -504,6 +590,8 @@ DEFUN ("set-char-table-extra-slot", Fset_char_table_extra_slot,
504 (Lisp_Object char_table, Lisp_Object n, Lisp_Object value) 590 (Lisp_Object char_table, Lisp_Object n, Lisp_Object value)
505{ 591{
506 CHECK_CHAR_TABLE (char_table); 592 CHECK_CHAR_TABLE (char_table);
593 if (EQ (XCHAR_TABLE (char_table)->purpose, Qchar_code_property_table))
594 error ("Can't change extra-slot of char-code-property-table");
507 CHECK_NUMBER (n); 595 CHECK_NUMBER (n);
508 if (XINT (n) < 0 596 if (XINT (n) < 0
509 || XINT (n) >= CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (char_table))) 597 || XINT (n) >= CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (char_table)))
@@ -532,8 +620,9 @@ a cons of character codes (for characters in the range), or a character code. *
532 620
533 CHECK_CHARACTER_CAR (range); 621 CHECK_CHARACTER_CAR (range);
534 CHECK_CHARACTER_CDR (range); 622 CHECK_CHARACTER_CDR (range);
535 val = char_table_ref_and_range (char_table, XFASTINT (XCAR (range)), 623 from = XFASTINT (XCAR (range));
536 &from, &to); 624 to = XFASTINT (XCDR (range));
625 val = char_table_ref_and_range (char_table, from, &from, &to);
537 /* Not yet implemented. */ 626 /* Not yet implemented. */
538 } 627 }
539 else 628 else
@@ -655,8 +744,7 @@ equivalent and can be merged. It defaults to `equal'. */)
655/* Map C_FUNCTION or FUNCTION over TABLE (top or sub char-table), 744/* Map C_FUNCTION or FUNCTION over TABLE (top or sub char-table),
656 calling it for each character or group of characters that share a 745 calling it for each character or group of characters that share a
657 value. RANGE is a cons (FROM . TO) specifying the range of target 746 value. RANGE is a cons (FROM . TO) specifying the range of target
658 characters, VAL is a value of FROM in TABLE, DEFAULT_VAL is the 747 characters, VAL is a value of FROM in TABLE, TOP is the top
659 default value of the char-table, PARENT is the parent of the
660 char-table. 748 char-table.
661 749
662 ARG is passed to C_FUNCTION when that is called. 750 ARG is passed to C_FUNCTION when that is called.
@@ -669,7 +757,7 @@ equivalent and can be merged. It defaults to `equal'. */)
669static Lisp_Object 757static Lisp_Object
670map_sub_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object), 758map_sub_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object),
671 Lisp_Object function, Lisp_Object table, Lisp_Object arg, Lisp_Object val, 759 Lisp_Object function, Lisp_Object table, Lisp_Object arg, Lisp_Object val,
672 Lisp_Object range, Lisp_Object default_val, Lisp_Object parent) 760 Lisp_Object range, Lisp_Object top)
673{ 761{
674 /* Pointer to the elements of TABLE. */ 762 /* Pointer to the elements of TABLE. */
675 Lisp_Object *contents; 763 Lisp_Object *contents;
@@ -681,6 +769,8 @@ map_sub_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object),
681 int chars_in_block; 769 int chars_in_block;
682 int from = XINT (XCAR (range)), to = XINT (XCDR (range)); 770 int from = XINT (XCAR (range)), to = XINT (XCDR (range));
683 int i, c; 771 int i, c;
772 int is_uniprop = UNIPROP_TABLE_P (top);
773 uniprop_decoder_t decoder = UNIPROP_GET_DECODER (top);
684 774
685 if (SUB_CHAR_TABLE_P (table)) 775 if (SUB_CHAR_TABLE_P (table))
686 { 776 {
@@ -710,28 +800,33 @@ map_sub_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object),
710 for (c = min_char + chars_in_block * i; c <= max_char; 800 for (c = min_char + chars_in_block * i; c <= max_char;
711 i++, c += chars_in_block) 801 i++, c += chars_in_block)
712 { 802 {
713 Lisp_Object this = contents[i]; 803 Lisp_Object this = (SUB_CHAR_TABLE_P (table)
804 ? XSUB_CHAR_TABLE (table)->contents[i]
805 : XCHAR_TABLE (table)->contents[i]);
714 int nextc = c + chars_in_block; 806 int nextc = c + chars_in_block;
715 807
808 if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (this))
809 this = uniprop_table_uncompress (table, i);
716 if (SUB_CHAR_TABLE_P (this)) 810 if (SUB_CHAR_TABLE_P (this))
717 { 811 {
718 if (to >= nextc) 812 if (to >= nextc)
719 XSETCDR (range, make_number (nextc - 1)); 813 XSETCDR (range, make_number (nextc - 1));
720 val = map_sub_char_table (c_function, function, this, arg, 814 val = map_sub_char_table (c_function, function, this, arg,
721 val, range, default_val, parent); 815 val, range, top);
722 } 816 }
723 else 817 else
724 { 818 {
725 if (NILP (this)) 819 if (NILP (this))
726 this = default_val; 820 this = XCHAR_TABLE (top)->defalt;
727 if (!EQ (val, this)) 821 if (!EQ (val, this))
728 { 822 {
729 int different_value = 1; 823 int different_value = 1;
730 824
731 if (NILP (val)) 825 if (NILP (val))
732 { 826 {
733 if (! NILP (parent)) 827 if (! NILP (XCHAR_TABLE (top)->parent))
734 { 828 {
829 Lisp_Object parent = XCHAR_TABLE (top)->parent;
735 Lisp_Object temp = XCHAR_TABLE (parent)->parent; 830 Lisp_Object temp = XCHAR_TABLE (parent)->parent;
736 831
737 /* This is to get a value of FROM in PARENT 832 /* This is to get a value of FROM in PARENT
@@ -742,8 +837,7 @@ map_sub_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object),
742 XSETCDR (range, make_number (c - 1)); 837 XSETCDR (range, make_number (c - 1));
743 val = map_sub_char_table (c_function, function, 838 val = map_sub_char_table (c_function, function,
744 parent, arg, val, range, 839 parent, arg, val, range,
745 XCHAR_TABLE (parent)->defalt, 840 parent);
746 XCHAR_TABLE (parent)->parent);
747 if (EQ (val, this)) 841 if (EQ (val, this))
748 different_value = 0; 842 different_value = 0;
749 } 843 }
@@ -756,14 +850,22 @@ map_sub_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object),
756 if (c_function) 850 if (c_function)
757 (*c_function) (arg, XCAR (range), val); 851 (*c_function) (arg, XCAR (range), val);
758 else 852 else
759 call2 (function, XCAR (range), val); 853 {
854 if (decoder)
855 val = decoder (top, val);
856 call2 (function, XCAR (range), val);
857 }
760 } 858 }
761 else 859 else
762 { 860 {
763 if (c_function) 861 if (c_function)
764 (*c_function) (arg, range, val); 862 (*c_function) (arg, range, val);
765 else 863 else
766 call2 (function, range, val); 864 {
865 if (decoder)
866 val = decoder (top, val);
867 call2 (function, range, val);
868 }
767 } 869 }
768 } 870 }
769 val = this; 871 val = this;
@@ -783,35 +885,39 @@ map_sub_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object),
783 ARG is passed to C_FUNCTION when that is called. */ 885 ARG is passed to C_FUNCTION when that is called. */
784 886
785void 887void
786map_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object), Lisp_Object function, Lisp_Object table, Lisp_Object arg) 888map_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object),
889 Lisp_Object function, Lisp_Object table, Lisp_Object arg)
787{ 890{
788 Lisp_Object range, val; 891 Lisp_Object range, val, parent;
789 struct gcpro gcpro1, gcpro2, gcpro3; 892 struct gcpro gcpro1, gcpro2, gcpro3, gcpro4;
893 uniprop_decoder_t decoder = UNIPROP_GET_DECODER (table);
790 894
791 range = Fcons (make_number (0), make_number (MAX_CHAR)); 895 range = Fcons (make_number (0), make_number (MAX_CHAR));
792 GCPRO3 (table, arg, range); 896 parent = XCHAR_TABLE (table)->parent;
897
898 GCPRO4 (table, arg, range, parent);
793 val = XCHAR_TABLE (table)->ascii; 899 val = XCHAR_TABLE (table)->ascii;
794 if (SUB_CHAR_TABLE_P (val)) 900 if (SUB_CHAR_TABLE_P (val))
795 val = XSUB_CHAR_TABLE (val)->contents[0]; 901 val = XSUB_CHAR_TABLE (val)->contents[0];
796 val = map_sub_char_table (c_function, function, table, arg, val, range, 902 val = map_sub_char_table (c_function, function, table, arg, val, range,
797 XCHAR_TABLE (table)->defalt, 903 table);
798 XCHAR_TABLE (table)->parent); 904
799 /* If VAL is nil and TABLE has a parent, we must consult the parent 905 /* If VAL is nil and TABLE has a parent, we must consult the parent
800 recursively. */ 906 recursively. */
801 while (NILP (val) && ! NILP (XCHAR_TABLE (table)->parent)) 907 while (NILP (val) && ! NILP (XCHAR_TABLE (table)->parent))
802 { 908 {
803 Lisp_Object parent = XCHAR_TABLE (table)->parent; 909 Lisp_Object temp;
804 Lisp_Object temp = XCHAR_TABLE (parent)->parent;
805 int from = XINT (XCAR (range)); 910 int from = XINT (XCAR (range));
806 911
912 parent = XCHAR_TABLE (table)->parent;
913 temp = XCHAR_TABLE (parent)->parent;
807 /* This is to get a value of FROM in PARENT without checking the 914 /* This is to get a value of FROM in PARENT without checking the
808 parent of PARENT. */ 915 parent of PARENT. */
809 XCHAR_TABLE (parent)->parent = Qnil; 916 XCHAR_TABLE (parent)->parent = Qnil;
810 val = CHAR_TABLE_REF (parent, from); 917 val = CHAR_TABLE_REF (parent, from);
811 XCHAR_TABLE (parent)->parent = temp; 918 XCHAR_TABLE (parent)->parent = temp;
812 val = map_sub_char_table (c_function, function, parent, arg, val, range, 919 val = map_sub_char_table (c_function, function, parent, arg, val, range,
813 XCHAR_TABLE (parent)->defalt, 920 parent);
814 XCHAR_TABLE (parent)->parent);
815 table = parent; 921 table = parent;
816 } 922 }
817 923
@@ -822,14 +928,22 @@ map_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object), Lisp
822 if (c_function) 928 if (c_function)
823 (*c_function) (arg, XCAR (range), val); 929 (*c_function) (arg, XCAR (range), val);
824 else 930 else
825 call2 (function, XCAR (range), val); 931 {
932 if (decoder)
933 val = decoder (table, val);
934 call2 (function, XCAR (range), val);
935 }
826 } 936 }
827 else 937 else
828 { 938 {
829 if (c_function) 939 if (c_function)
830 (*c_function) (arg, range, val); 940 (*c_function) (arg, range, val);
831 else 941 else
832 call2 (function, range, val); 942 {
943 if (decoder)
944 val = decoder (table, val);
945 call2 (function, range, val);
946 }
833 } 947 }
834 } 948 }
835 949
@@ -984,9 +1098,315 @@ map_char_table_for_charset (void (*c_function) (Lisp_Object, Lisp_Object),
984} 1098}
985 1099
986 1100
1101/* Unicode character property tables.
1102
1103 This section provides a convenient and efficient way to get a
1104 Unicode character property from C code (from Lisp, you must use
1105 get-char-code-property).
1106
1107 The typical usage is to get a char-table for a specific property at
1108 a proper initialization time as this:
1109
1110 Lisp_Object bidi_class_table = uniprop_table (intern ("bidi-class"));
1111
1112 and get a property value for character CH as this:
1113
1114 Lisp_Object bidi_class = CHAR_TABLE_REF (CH, bidi_class_table);
1115
1116 In this case, what you actually get is an index number to the
1117 vector of property values (symbols nil, L, R, etc).
1118
1119 A table for Unicode character property has these characteristics:
1120
1121 o The purpose is `char-code-property-table', which implies that the
1122 table has 5 extra slots.
1123
1124 o The second extra slot is a Lisp function, an index (integer) to
1125 the array uniprop_decoder[], or nil. If it is a Lisp function, we
1126 can't use such a table from C (at the moment). If it is nil, it
1127 means that we don't have to decode values.
1128
1129 o The third extra slot is a Lisp function, an index (integer) to
1130 the array uniprop_enncoder[], or nil. If it is a Lisp function, we
1131 can't use such a table from C (at the moment). If it is nil, it
1132 means that we don't have to encode values. */
1133
1134
1135/* Uncompress the IDXth element of sub-char-table TABLE. */
1136
1137static Lisp_Object
1138uniprop_table_uncompress (Lisp_Object table, int idx)
1139{
1140 Lisp_Object val = XSUB_CHAR_TABLE (table)->contents[idx];
1141 int min_char = (XINT (XSUB_CHAR_TABLE (table)->min_char)
1142 + chartab_chars[2] * idx);
1143 Lisp_Object sub = make_sub_char_table (3, min_char, Qnil);
1144 struct Lisp_Sub_Char_Table *subtbl = XSUB_CHAR_TABLE (sub);
1145 const unsigned char *p, *pend;
1146 int i;
1147
1148 XSUB_CHAR_TABLE (table)->contents[idx] = sub;
1149 p = SDATA (val), pend = p + SBYTES (val);
1150 if (*p == 1)
1151 {
1152 /* SIMPLE TABLE */
1153 p++;
1154 idx = STRING_CHAR_ADVANCE (p);
1155 while (p < pend && idx < chartab_chars[2])
1156 {
1157 int v = STRING_CHAR_ADVANCE (p);
1158 subtbl->contents[idx++] = v > 0 ? make_number (v) : Qnil;
1159 }
1160 }
1161 else if (*p == 2)
1162 {
1163 /* RUN-LENGTH TABLE */
1164 p++;
1165 for (idx = 0; p < pend; )
1166 {
1167 int v = STRING_CHAR_ADVANCE (p);
1168 int count = 1;
1169 int len;
1170
1171 if (p < pend)
1172 {
1173 count = STRING_CHAR_AND_LENGTH (p, len);
1174 if (count < 128)
1175 count = 1;
1176 else
1177 {
1178 count -= 128;
1179 p += len;
1180 }
1181 }
1182 while (count-- > 0)
1183 subtbl->contents[idx++] = make_number (v);
1184 }
1185 }
1186/* It seems that we don't need this function because C code won't need
1187 to get a property that is compressed in this form. */
1188#if 0
1189 else if (*p == 0)
1190 {
1191 /* WORD-LIST TABLE */
1192 }
1193#endif
1194 return sub;
1195}
1196
1197
1198/* Decode VALUE as an elemnet of char-table TABLE. */
1199
1200static Lisp_Object
1201uniprop_decode_value_run_length (Lisp_Object table, Lisp_Object value)
1202{
1203 if (VECTORP (XCHAR_TABLE (table)->extras[4]))
1204 {
1205 Lisp_Object valvec = XCHAR_TABLE (table)->extras[4];
1206
1207 if (XINT (value) >= 0 && XINT (value) < ASIZE (valvec))
1208 value = AREF (valvec, XINT (value));
1209 }
1210 return value;
1211}
1212
1213static uniprop_decoder_t uniprop_decoder [] =
1214 { uniprop_decode_value_run_length };
1215
1216static int uniprop_decoder_count
1217 = (sizeof uniprop_decoder) / sizeof (uniprop_decoder[0]);
1218
1219
1220/* Return the decoder of char-table TABLE or nil if none. */
1221
1222static uniprop_decoder_t
1223uniprop_get_decoder (Lisp_Object table)
1224{
1225 int i;
1226
1227 if (! INTEGERP (XCHAR_TABLE (table)->extras[1]))
1228 return NULL;
1229 i = XINT (XCHAR_TABLE (table)->extras[1]);
1230 if (i < 0 || i >= uniprop_decoder_count)
1231 return NULL;
1232 return uniprop_decoder[i];
1233}
1234
1235
1236/* Encode VALUE as an element of char-table TABLE which contains
1237 characters as elements. */
1238
1239static Lisp_Object
1240uniprop_encode_value_character (Lisp_Object table, Lisp_Object value)
1241{
1242 if (! NILP (value) && ! CHARACTERP (value))
1243 wrong_type_argument (Qintegerp, value);
1244 return value;
1245}
1246
1247
1248/* Encode VALUE as an element of char-table TABLE which adopts RUN-LENGTH
1249 compression. */
1250
1251static Lisp_Object
1252uniprop_encode_value_run_length (Lisp_Object table, Lisp_Object value)
1253{
1254 Lisp_Object *value_table = XVECTOR (XCHAR_TABLE (table)->extras[4])->contents;
1255 int i, size = ASIZE (XCHAR_TABLE (table)->extras[4]);
1256
1257 for (i = 0; i < size; i++)
1258 if (EQ (value, value_table[i]))
1259 break;
1260 if (i == size)
1261 wrong_type_argument (build_string ("Unicode property value"), value);
1262 return make_number (i);
1263}
1264
1265
1266/* Encode VALUE as an element of char-table TABLE which adopts RUN-LENGTH
1267 compression and contains numbers as elements . */
1268
1269static Lisp_Object
1270uniprop_encode_value_numeric (Lisp_Object table, Lisp_Object value)
1271{
1272 Lisp_Object *value_table = XVECTOR (XCHAR_TABLE (table)->extras[4])->contents;
1273 int i, size = ASIZE (XCHAR_TABLE (table)->extras[4]);
1274
1275 CHECK_NUMBER (value);
1276 for (i = 0; i < size; i++)
1277 if (EQ (value, value_table[i]))
1278 break;
1279 value = make_number (i);
1280 if (i == size)
1281 {
1282 Lisp_Object args[2];
1283
1284 args[0] = XCHAR_TABLE (table)->extras[4];
1285 args[1] = Fmake_vector (make_number (1), value);
1286 XCHAR_TABLE (table)->extras[4] = Fvconcat (2, args);
1287 }
1288 return make_number (i);
1289}
1290
1291static uniprop_encoder_t uniprop_encoder[] =
1292 { uniprop_encode_value_character,
1293 uniprop_encode_value_run_length,
1294 uniprop_encode_value_numeric };
1295
1296static int uniprop_encoder_count
1297 = (sizeof uniprop_encoder) / sizeof (uniprop_encoder[0]);
1298
1299
1300/* Return the encoder of char-table TABLE or nil if none. */
1301
1302static uniprop_decoder_t
1303uniprop_get_encoder (Lisp_Object table)
1304{
1305 int i;
1306
1307 if (! INTEGERP (XCHAR_TABLE (table)->extras[2]))
1308 return NULL;
1309 i = XINT (XCHAR_TABLE (table)->extras[2]);
1310 if (i < 0 || i >= uniprop_encoder_count)
1311 return NULL;
1312 return uniprop_encoder[i];
1313}
1314
1315/* Return a char-table for Unicode character property PROP. This
1316 function may load a Lisp file and thus may cause
1317 garbage-collection. */
1318
1319Lisp_Object
1320uniprop_table (Lisp_Object prop)
1321{
1322 Lisp_Object val, table, result;
1323
1324 val = Fassq (prop, Vchar_code_property_alist);
1325 if (! CONSP (val))
1326 return Qnil;
1327 table = XCDR (val);
1328 if (STRINGP (table))
1329 {
1330 struct gcpro gcpro1;
1331 GCPRO1 (val);
1332 result = Fload (concat2 (build_string ("international/"), table),
1333 Qt, Qt, Qt, Qt);
1334 UNGCPRO;
1335 if (NILP (result))
1336 return Qnil;
1337 table = XCDR (val);
1338 }
1339 if (! CHAR_TABLE_P (table)
1340 || ! UNIPROP_TABLE_P (table))
1341 return Qnil;
1342 val = XCHAR_TABLE (table)->extras[1];
1343 if (INTEGERP (val)
1344 ? (XINT (val) < 0 || XINT (val) >= uniprop_decoder_count)
1345 : ! NILP (val))
1346 return Qnil;
1347 /* Prepare ASCII values in advance for CHAR_TABLE_REF. */
1348 XCHAR_TABLE (table)->ascii = char_table_ascii (table);
1349 return table;
1350}
1351
1352DEFUN ("unicode-property-table-internal", Funicode_property_table_internal,
1353 Sunicode_property_table_internal, 1, 1, 0,
1354 doc: /* Return a char-table for Unicode character property PROP.
1355Use `get-unicode-property-internal' and
1356`put-unicode-property-internal' instead of `aref' and `aset' to get
1357and put an element value. */)
1358 (Lisp_Object prop)
1359{
1360 Lisp_Object table = uniprop_table (prop);
1361
1362 if (CHAR_TABLE_P (table))
1363 return table;
1364 return Fcdr (Fassq (prop, Vchar_code_property_alist));
1365}
1366
1367DEFUN ("get-unicode-property-internal", Fget_unicode_property_internal,
1368 Sget_unicode_property_internal, 2, 2, 0,
1369 doc: /* Return an element of CHAR-TABLE for character CH.
1370CHAR-TABLE must be what returned by `unicode-property-table-internal'. */)
1371 (Lisp_Object char_table, Lisp_Object ch)
1372{
1373 Lisp_Object val;
1374 uniprop_decoder_t decoder;
1375
1376 CHECK_CHAR_TABLE (char_table);
1377 CHECK_CHARACTER (ch);
1378 if (! UNIPROP_TABLE_P (char_table))
1379 error ("Invalid Unicode property table");
1380 val = CHAR_TABLE_REF (char_table, XINT (ch));
1381 decoder = uniprop_get_decoder (char_table);
1382 return (decoder ? decoder (char_table, val) : val);
1383}
1384
1385DEFUN ("put-unicode-property-internal", Fput_unicode_property_internal,
1386 Sput_unicode_property_internal, 3, 3, 0,
1387 doc: /* Set an element of CHAR-TABLE for character CH to VALUE.
1388CHAR-TABLE must be what returned by `unicode-property-table-internal'. */)
1389 (Lisp_Object char_table, Lisp_Object ch, Lisp_Object value)
1390{
1391 uniprop_encoder_t encoder;
1392
1393 CHECK_CHAR_TABLE (char_table);
1394 CHECK_CHARACTER (ch);
1395 if (! UNIPROP_TABLE_P (char_table))
1396 error ("Invalid Unicode property table");
1397 encoder = uniprop_get_encoder (char_table);
1398 if (encoder)
1399 value = encoder (char_table, value);
1400 CHAR_TABLE_SET (char_table, XINT (ch), value);
1401 return Qnil;
1402}
1403
1404
987void 1405void
988syms_of_chartab (void) 1406syms_of_chartab (void)
989{ 1407{
1408 DEFSYM (Qchar_code_property_table, "char-code-property-table");
1409
990 defsubr (&Smake_char_table); 1410 defsubr (&Smake_char_table);
991 defsubr (&Schar_table_parent); 1411 defsubr (&Schar_table_parent);
992 defsubr (&Schar_table_subtype); 1412 defsubr (&Schar_table_subtype);
@@ -998,4 +1418,19 @@ syms_of_chartab (void)
998 defsubr (&Sset_char_table_default); 1418 defsubr (&Sset_char_table_default);
999 defsubr (&Soptimize_char_table); 1419 defsubr (&Soptimize_char_table);
1000 defsubr (&Smap_char_table); 1420 defsubr (&Smap_char_table);
1421 defsubr (&Sunicode_property_table_internal);
1422 defsubr (&Sget_unicode_property_internal);
1423 defsubr (&Sput_unicode_property_internal);
1424
1425 /* Each element has the form (PROP . TABLE).
1426 PROP is a symbol representing a character property.
1427 TABLE is a char-table containing the property value for each character.
1428 TABLE may be a name of file to load to build a char-table.
1429 This variable should be modified only through
1430 `define-char-code-property'. */
1431
1432 DEFVAR_LISP ("char-code-property-alist", Vchar_code_property_alist,
1433 doc: /* Alist of character property name vs char-table containing property values.
1434Internal use only. */);
1435 Vchar_code_property_alist = Qnil;
1001} 1436}