aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKenichi Handa2002-03-01 01:15:38 +0000
committerKenichi Handa2002-03-01 01:15:38 +0000
commit0168c3d80919476f43fcf4f4ae64fc18bf6365a7 (patch)
treec8864abd41e5b1519d0d85541a3b9f47b4aa89ea
parentd2bc5a70e902fc00fb88152ebac7532116c96e29 (diff)
downloademacs-0168c3d80919476f43fcf4f4ae64fc18bf6365a7.tar.gz
emacs-0168c3d80919476f43fcf4f4ae64fc18bf6365a7.zip
New file.
-rw-r--r--src/character.c917
-rw-r--r--src/character.h530
2 files changed, 1447 insertions, 0 deletions
diff --git a/src/character.c b/src/character.c
new file mode 100644
index 00000000000..51527ee7dbb
--- /dev/null
+++ b/src/character.c
@@ -0,0 +1,917 @@
1/* Basic character support.
2 Copyright (C) 1995, 1997, 1998, 2001 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
4 Copyright (C) 2001 Free Software Foundation, Inc.
5 Copyright (C) 2001, 2002
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
8
9This file is part of GNU Emacs.
10
11GNU Emacs is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
13the Free Software Foundation; either version 2, or (at your option)
14any later version.
15
16GNU Emacs is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19GNU General Public License for more details.
20
21You should have received a copy of the GNU General Public License
22along with GNU Emacs; see the file COPYING. If not, write to
23the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24Boston, MA 02111-1307, USA. */
25
26/* At first, see the document in `character.h' to understand the code
27 in this file. */
28
29#ifdef emacs
30#include <config.h>
31#endif
32
33#include <stdio.h>
34
35#ifdef emacs
36
37#include <sys/types.h>
38#include "lisp.h"
39#include "character.h"
40#include "buffer.h"
41#include "charset.h"
42#include "composite.h"
43#include "disptab.h"
44
45#else /* not emacs */
46
47#include "mulelib.h"
48
49#endif /* emacs */
50
51Lisp_Object Qcharacterp;
52
53/* Vector of translation table ever defined.
54 ID of a translation table is used to index this vector. */
55Lisp_Object Vtranslation_table_vector;
56
57/* A char-table for characters which may invoke auto-filling. */
58Lisp_Object Vauto_fill_chars;
59
60Lisp_Object Qauto_fill_chars;
61
62Lisp_Object Vchar_unify_table;
63
64/* A char-table. An element is non-nil iff the corresponding
65 character has a printable glyph. */
66Lisp_Object Vprintable_chars;
67
68/* A char-table. An elemnent is a column-width of the corresponding
69 character. */
70Lisp_Object Vchar_width_table;
71
72/* A char-table. An element is a symbol indicating the direction
73 property of corresponding character. */
74Lisp_Object Vchar_direction_table;
75
76/* Variables used locally in the macro FETCH_MULTIBYTE_CHAR. */
77unsigned char *_fetch_multibyte_char_p;
78int _fetch_multibyte_char_len;
79
80
81
82int
83char_string_with_unification (c, p, advanced)
84 int c;
85 unsigned char *p, **advanced;
86{
87 int bytes;
88
89 MAYBE_UNIFY_CHAR (c);
90
91 if (c <= MAX_3_BYTE_CHAR || c > MAX_5_BYTE_CHAR)
92 {
93 bytes = CHAR_STRING (c, p);
94 }
95 else if (c <= MAX_4_BYTE_CHAR)
96 {
97 p[0] = (0xF0 | (c >> 18));
98 p[1] = (0x80 | ((c >> 12) & 0x3F));
99 p[2] = (0x80 | ((c >> 6) & 0x3F));
100 p[3] = (0x80 | (c & 0x3F));
101 bytes = 4;
102 }
103 else
104 {
105 p[0] = 0xF8;
106 p[1] = (0x80 | ((c >> 18) & 0x0F));
107 p[2] = (0x80 | ((c >> 12) & 0x3F));
108 p[3] = (0x80 | ((c >> 6) & 0x3F));
109 p[4] = (0x80 | (c & 0x3F));
110 bytes = 5;
111 }
112 if (advanced)
113 *advanced = p + bytes;
114 return bytes;
115}
116
117
118int
119string_char_with_unification (p, advanced, len)
120 unsigned char *p, **advanced;
121 int *len;
122{
123 int c, unified;
124 unsigned char *saved_p = p;
125
126 if (*p < 0x80 || ! (*p & 0x20) || ! (*p & 0x10))
127 {
128 c = STRING_CHAR_ADVANCE (p);
129 }
130 else if (! (*p & 0x08))
131 {
132 c = ((((p)[0] & 0xF) << 18)
133 | (((p)[1] & 0x3F) << 12)
134 | (((p)[2] & 0x3F) << 6)
135 | ((p)[3] & 0x3F));
136 p += 4;
137 }
138 else
139 {
140 c = ((((p)[1] & 0x3F) << 18)
141 | (((p)[2] & 0x3F) << 12)
142 | (((p)[3] & 0x3F) << 6)
143 | ((p)[4] & 0x3F));
144 p += 5;
145 }
146
147 MAYBE_UNIFY_CHAR (c);
148
149 if (len)
150 *len = p - saved_p;
151 if (advanced)
152 *advanced = p;
153 return c;
154}
155
156
157/* Translate character C by translation table TABLE. If C is
158 negative, translate a character specified by CHARSET and CODE. If
159 no translation is found in TABLE, return the untranslated
160 character. */
161
162int
163translate_char (table, c)
164 Lisp_Object table;
165 int c;
166{
167 Lisp_Object ch;
168
169 if (! CHAR_TABLE_P (table))
170 return c;
171 ch = CHAR_TABLE_REF (table, c);
172 if (! CHARACTERP (ch))
173 return c;
174 return XINT (ch);
175}
176
177/* Convert the unibyte character C to the corresponding multibyte
178 character based on the current value of charset_primary. If C
179 can't be converted, return C. */
180
181int
182unibyte_char_to_multibyte (c)
183 int c;
184{
185 struct charset *charset = CHARSET_FROM_ID (charset_primary);
186 int c1 = DECODE_CHAR (charset, c);
187
188 return ((c1 >= 0) ? c1 : c);
189}
190
191
192/* Convert the multibyte character C to unibyte 8-bit character based
193 on the current value of charset_primary. If dimension of
194 charset_primary is more than one, return (C & 0xFF).
195
196 The argument REV_TBL is now ignored. It will be removed in the
197 future. */
198
199int
200multibyte_char_to_unibyte (c, rev_tbl)
201 int c;
202 Lisp_Object rev_tbl;
203{
204 struct charset *charset = CHARSET_FROM_ID (charset_primary);
205 unsigned c1 = ENCODE_CHAR (charset, c);
206
207 return ((c1 != CHARSET_INVALID_CODE (charset)) ? c1 : c & 0xFF);
208}
209
210
211DEFUN ("characterp", Fcharacterp, Scharacterp, 1, 2, 0,
212 doc: /* Return non-nil if OBJECT is a character. */)
213 (object, ignore)
214 Lisp_Object object, ignore;
215{
216 return (CHARACTERP (object) ? Qt : Qnil);
217}
218
219DEFUN ("max-char", Fmax_char, Smax_char, 0, 0, 0,
220 doc: /* Return the character of the maximum code. */)
221 ()
222{
223 return make_number (MAX_CHAR);
224}
225
226DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte,
227 Sunibyte_char_to_multibyte, 1, 1, 0,
228 doc: /* Convert the unibyte character CH to multibyte character.
229The multibyte character is a result of decoding CH by
230the current primary charset (value of `charset-primary'). */)
231 (ch)
232 Lisp_Object ch;
233{
234 int c;
235 struct charset *charset;
236
237 CHECK_CHARACTER (ch);
238 c = XFASTINT (ch);
239 if (c >= 0400)
240 error ("Invalid unibyte character: %d", c);
241 charset = CHARSET_FROM_ID (charset_primary);
242 c = DECODE_CHAR (charset, c);
243 if (c < 0)
244 error ("Can't convert to multibyte character: %d", XINT (ch));
245 return make_number (c);
246}
247
248DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte,
249 Smultibyte_char_to_unibyte, 1, 1, 0,
250 doc: /* Convert the multibyte character CH to unibyte character.\n\
251The unibyte character is a result of encoding CH by
252the current primary charset (value of `charset-primary'). */)
253 (ch)
254 Lisp_Object ch;
255{
256 int c;
257 unsigned code;
258 struct charset *charset;
259
260 CHECK_CHARACTER (ch);
261 c = XFASTINT (ch);
262 charset = CHARSET_FROM_ID (charset_primary);
263 code = ENCODE_CHAR (charset, c);
264 if (code < CHARSET_MIN_CODE (charset)
265 || code > CHARSET_MAX_CODE (charset))
266 error ("Can't convert to unibyte character: %d", XINT (ch));
267 return make_number (code);
268}
269
270DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
271 doc: /* Return 1 regardless of the argument CHAR.
272This is now an obsolete function. We keep it just for backward compatibility. */)
273 (ch)
274 Lisp_Object ch;
275{
276 CHECK_CHARACTER (ch);
277 return make_number (1);
278}
279
280DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
281 doc: /* Return width of CHAR when displayed in the current buffer.
282The width is measured by how many columns it occupies on the screen.
283Tab is taken to occupy `tab-width' columns. */)
284 (ch)
285 Lisp_Object ch;
286{
287 Lisp_Object disp;
288 int c, width;
289 struct Lisp_Char_Table *dp = buffer_display_table ();
290
291 CHECK_CHARACTER (ch);
292 c = XINT (ch);
293
294 /* Get the way the display table would display it. */
295 disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil;
296
297 if (VECTORP (disp))
298 width = ASIZE (disp);
299 else
300 width = CHAR_WIDTH (c);
301
302 return make_number (width);
303}
304
305/* Return width of string STR of length LEN when displayed in the
306 current buffer. The width is measured by how many columns it
307 occupies on the screen. */
308
309int
310strwidth (str, len)
311 unsigned char *str;
312 int len;
313{
314 return c_string_width (str, len, -1, NULL, NULL);
315}
316
317/* Return width of string STR of length LEN when displayed in the
318 current buffer. The width is measured by how many columns it
319 occupies on the screen. If PRECISION > 0, return the width of
320 longest substring that doesn't exceed PRECISION, and set number of
321 characters and bytes of the substring in *NCHARS and *NBYTES
322 respectively. */
323
324c_string_width (str, len, precision, nchars, nbytes)
325 unsigned char *str;
326 int precision, *nchars, *nbytes;
327{
328 int i = 0, i_byte = 0;
329 int width = 0;
330 struct Lisp_Char_Table *dp = buffer_display_table ();
331
332 while (i_byte < len)
333 {
334 int bytes, thiswidth;
335 Lisp_Object val;
336 int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
337
338 if (dp)
339 {
340 val = DISP_CHAR_VECTOR (dp, c);
341 if (VECTORP (val))
342 thiswidth = XVECTOR (val)->size;
343 else
344 thiswidth = CHAR_WIDTH (c);
345 }
346 else
347 {
348 thiswidth = CHAR_WIDTH (c);
349 }
350
351 if (precision > 0
352 && (width + thiswidth > precision))
353 {
354 *nchars = i;
355 *nbytes = i_byte;
356 return width;
357 }
358 i++;
359 i_byte += bytes;
360 width += thiswidth;
361 }
362
363 if (precision > 0)
364 {
365 *nchars = i;
366 *nbytes = i_byte;
367 }
368
369 return width;
370}
371
372/* Return width of Lisp string STRING when displayed in the current
373 buffer. The width is measured by how many columns it occupies on
374 the screen while paying attention to compositions. If PRECISION >
375 0, return the width of longest substring that doesn't exceed
376 PRECISION, and set number of characters and bytes of the substring
377 in *NCHARS and *NBYTES respectively. */
378
379int
380lisp_string_width (string, precision, nchars, nbytes)
381 Lisp_Object string;
382 int precision, *nchars, *nbytes;
383{
384 int len = XSTRING (string)->size;
385 int len_byte = STRING_BYTES (XSTRING (string));
386 unsigned char *str = XSTRING (string)->data;
387 int i = 0, i_byte = 0;
388 int width = 0;
389 struct Lisp_Char_Table *dp = buffer_display_table ();
390
391 while (i < len)
392 {
393 int chars, bytes, thiswidth;
394 Lisp_Object val;
395 int cmp_id;
396 int ignore, end;
397
398 if (find_composition (i, -1, &ignore, &end, &val, string)
399 && ((cmp_id = get_composition_id (i, i_byte, end - i, val, string))
400 >= 0))
401 {
402 thiswidth = composition_table[cmp_id]->width;
403 chars = end - i;
404 bytes = string_char_to_byte (string, end) - i_byte;
405 }
406 else if (dp)
407 {
408 int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
409
410 chars = 1;
411 val = DISP_CHAR_VECTOR (dp, c);
412 if (VECTORP (val))
413 thiswidth = XVECTOR (val)->size;
414 else
415 thiswidth = CHAR_WIDTH (c);
416 }
417 else
418 {
419 int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
420
421 chars = 1;
422 thiswidth = CHAR_WIDTH (c);
423 }
424
425 if (precision > 0
426 && (width + thiswidth > precision))
427 {
428 *nchars = i;
429 *nbytes = i_byte;
430 return width;
431 }
432 i += chars;
433 i_byte += bytes;
434 width += thiswidth;
435 }
436
437 if (precision > 0)
438 {
439 *nchars = i;
440 *nbytes = i_byte;
441 }
442
443 return width;
444}
445
446DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
447 doc: /* Return width of STRING when displayed in the current buffer.
448Width is measured by how many columns it occupies on the screen.
449When calculating width of a multibyte character in STRING,
450only the base leading-code is considered; the validity of
451the following bytes is not checked. Tabs in STRING are always
452taken to occupy `tab-width' columns. */)
453 (str)
454 Lisp_Object str;
455{
456 Lisp_Object val;
457
458 CHECK_STRING (str);
459 XSETFASTINT (val, lisp_string_width (str, -1, NULL, NULL));
460 return val;
461}
462
463DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
464 doc: /* Return the direction of CHAR.
465The returned value is 0 for left-to-right and 1 for right-to-left. */)
466 (ch)
467 Lisp_Object ch;
468{
469 int c;
470
471 CHECK_CHARACTER (ch);
472 c = XINT (ch);
473 return CHAR_TABLE_REF (Vchar_direction_table, c);
474}
475
476DEFUN ("chars-in-region", Fchars_in_region, Schars_in_region, 2, 2, 0,
477 doc: /* Return number of characters between BEG and END.
478This is now an obsolete function. We keep it just for backward compatibility. */)
479 (beg, end)
480 Lisp_Object beg, end;
481{
482 int from, to;
483
484 CHECK_NUMBER_COERCE_MARKER (beg);
485 CHECK_NUMBER_COERCE_MARKER (end);
486
487 from = min (XFASTINT (beg), XFASTINT (end));
488 to = max (XFASTINT (beg), XFASTINT (end));
489
490 return make_number (to - from);
491}
492
493/* Return the number of characters in the NBYTES bytes at PTR.
494 This works by looking at the contents and checking for multibyte
495 sequences while assuming that there's no invalid sequence.
496 However, if the current buffer has enable-multibyte-characters =
497 nil, we treat each byte as a character. */
498
499int
500chars_in_text (ptr, nbytes)
501 unsigned char *ptr;
502 int nbytes;
503{
504 /* current_buffer is null at early stages of Emacs initialization. */
505 if (current_buffer == 0
506 || NILP (current_buffer->enable_multibyte_characters))
507 return nbytes;
508
509 return multibyte_chars_in_text (ptr, nbytes);
510}
511
512/* Return the number of characters in the NBYTES bytes at PTR.
513 This works by looking at the contents and checking for multibyte
514 sequences while assuming that there's no invalid sequence. It
515 ignores enable-multibyte-characters. */
516
517int
518multibyte_chars_in_text (ptr, nbytes)
519 unsigned char *ptr;
520 int nbytes;
521{
522 unsigned char *endp = ptr + nbytes;
523 int chars = 0;
524
525 while (ptr < endp)
526 {
527 int len = MULTIBYTE_LENGTH (ptr, endp);
528
529 if (len == 0)
530 abort ();
531 ptr += len;
532 chars++;
533 }
534
535 return chars;
536}
537
538/* Parse unibyte text at STR of LEN bytes as a multibyte text, count
539 characters and bytes in it, and store them in *NCHARS and *NBYTES
540 respectively. On counting bytes, pay attention to that 8-bit
541 characters not constructing a valid multibyte sequence are
542 represented by 2-byte in a multibyte text. */
543
544void
545parse_str_as_multibyte (str, len, nchars, nbytes)
546 unsigned char *str;
547 int len, *nchars, *nbytes;
548{
549 unsigned char *endp = str + len;
550 int n, chars = 0, bytes = 0;
551
552 if (len >= MAX_MULTIBYTE_LENGTH)
553 {
554 unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
555 while (str < adjusted_endp)
556 {
557 if ((n = MULTIBYTE_LENGTH_NO_CHECK (str)) > 0)
558 str += n, bytes += n;
559 else
560 str++, bytes += 2;
561 chars++;
562 }
563 }
564 while (str < endp)
565 {
566 if ((n = MULTIBYTE_LENGTH (str, endp)) > 0)
567 str += n, bytes += n;
568 else
569 str++, bytes += 2;
570 chars++;
571 }
572
573 *nchars = chars;
574 *nbytes = bytes;
575 return;
576}
577
578/* Arrange unibyte text at STR of NBYTES bytes as a multibyte text.
579 It actually converts only such 8-bit characters that don't contruct
580 a multibyte sequence to multibyte forms of Latin-1 characters. If
581 NCHARS is nonzero, set *NCHARS to the number of characters in the
582 text. It is assured that we can use LEN bytes at STR as a work
583 area and that is enough. Return the number of bytes of the
584 resulting text. */
585
586int
587str_as_multibyte (str, len, nbytes, nchars)
588 unsigned char *str;
589 int len, nbytes, *nchars;
590{
591 unsigned char *p = str, *endp = str + nbytes;
592 unsigned char *to;
593 int chars = 0;
594 int n;
595
596 if (nbytes >= MAX_MULTIBYTE_LENGTH)
597 {
598 unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
599 while (p < adjusted_endp
600 && (n = MULTIBYTE_LENGTH_NO_CHECK (p)) > 0)
601 p += n, chars++;
602 }
603 while ((n = MULTIBYTE_LENGTH (p, endp)) > 0)
604 p += n, chars++;
605 if (nchars)
606 *nchars = chars;
607 if (p == endp)
608 return nbytes;
609
610 to = p;
611 nbytes = endp - p;
612 endp = str + len;
613 safe_bcopy ((char *) p, (char *) (endp - nbytes), nbytes);
614 p = endp - nbytes;
615
616 if (nbytes >= MAX_MULTIBYTE_LENGTH)
617 {
618 unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
619 while (p < adjusted_endp)
620 {
621 if ((n = MULTIBYTE_LENGTH_NO_CHECK (p)) > 0)
622 {
623 while (n--)
624 *to++ = *p++;
625 }
626 else
627 {
628 int c = *p++;
629 c = BYTE8_TO_CHAR (c);
630 to += CHAR_STRING (c, to);
631 }
632 }
633 chars++;
634 }
635 while (p < endp)
636 {
637 if ((n = MULTIBYTE_LENGTH (p, endp)) > 0)
638 {
639 while (n--)
640 *to++ = *p++;
641 }
642 else
643 {
644 int c = *p++;
645 c = BYTE8_TO_CHAR (c);
646 to += CHAR_STRING (c, to);
647 }
648 chars++;
649 }
650 if (nchars)
651 *nchars = chars;
652 return (to - str);
653}
654
655/* Parse unibyte string at STR of LEN bytes, and return the number of
656 bytes it may ocupy when converted to multibyte string by
657 `str_to_multibyte'. */
658
659int
660parse_str_to_multibyte (str, len)
661 unsigned char *str;
662 int len;
663{
664 unsigned char *endp = str + len;
665 int bytes;
666
667 for (bytes = 0; str < endp; str++)
668 bytes += (*str < 0x80) ? 1 : 2;
669 return bytes;
670}
671
672
673/* Convert unibyte text at STR of NBYTES bytes to a multibyte text
674 that contains the same single-byte characters. It actually
675 converts all 8-bit characters to multibyte forms. It is assured
676 that we can use LEN bytes at STR as a work area and that is
677 enough. */
678
679int
680str_to_multibyte (str, len, bytes)
681 unsigned char *str;
682 int len, bytes;
683{
684 unsigned char *p = str, *endp = str + bytes;
685 unsigned char *to;
686
687 while (p < endp && *p < 0x80) p++;
688 if (p == endp)
689 return bytes;
690 to = p;
691 bytes = endp - p;
692 endp = str + len;
693 safe_bcopy ((char *) p, (char *) (endp - bytes), bytes);
694 p = endp - bytes;
695 while (p < endp)
696 {
697 int c = *p++;
698
699 if (c >= 0x80)
700 c = BYTE8_TO_CHAR (c);
701 to += CHAR_STRING (c, to);
702 }
703 return (to - str);
704}
705
706/* Arrange multibyte text at STR of LEN bytes as a unibyte text. It
707 actually converts characters in the range 0x80..0xFF to
708 unibyte. */
709
710int
711str_as_unibyte (str, bytes)
712 unsigned char *str;
713 int bytes;
714{
715 unsigned char *p = str, *endp = str + bytes;
716 unsigned char *to = str;
717 int c, len;
718
719 while (p < endp)
720 {
721 c = *p;
722 len = BYTES_BY_CHAR_HEAD (c);
723 if (CHAR_BYTE8_HEAD_P (c))
724 break;
725 p += len;
726 }
727 to = p;
728 while (p < endp)
729 {
730 c = *p;
731 len = BYTES_BY_CHAR_HEAD (c);
732 if (CHAR_BYTE8_HEAD_P (c))
733 {
734 c = STRING_CHAR_ADVANCE (p);
735 *to++ = CHAR_TO_BYTE8 (c);
736 }
737 else
738 {
739 while (len--) *to++ = *p++;
740 }
741 }
742 return (to - str);
743}
744
745int
746string_count_byte8 (string)
747 Lisp_Object string;
748{
749 int multibyte = STRING_MULTIBYTE (string);
750 int nchars = XSTRING (string)->size;
751 int nbytes = STRING_BYTES (XSTRING (string));
752 unsigned char *p = XSTRING (string)->data;
753 unsigned char *pend = p + nbytes;
754 int count = 0;
755 int c, len;
756
757 if (multibyte)
758 while (p < pend)
759 {
760 c = *p;
761 len = BYTES_BY_CHAR_HEAD (c);
762
763 if (CHAR_BYTE8_HEAD_P (c))
764 count++;
765 p += len;
766 }
767 else
768 while (p < pend)
769 {
770 if (*p++ >= 0x80)
771 count++;
772 }
773 return count;
774}
775
776
777Lisp_Object
778string_escape_byte8 (string)
779 Lisp_Object string;
780{
781 int nchars = XSTRING (string)->size;
782 int nbytes = STRING_BYTES (XSTRING (string));
783 int multibyte = STRING_MULTIBYTE (string);
784 int byte8_count;
785 unsigned char *src, *src_end, *dst;
786 Lisp_Object val;
787 int c, len;
788
789 if (multibyte && nchars == nbytes)
790 return string;
791
792 byte8_count = string_count_byte8 (string);
793
794 if (byte8_count == 0)
795 return string;
796
797 if (multibyte)
798 /* Convert 2-byte sequence of byte8 chars to 4-byte octal. */
799 val = make_uninit_multibyte_string (nchars + byte8_count * 2,
800 nbytes + byte8_count * 2);
801 else
802 /* Convert 1-byte sequence of byte8 chars to 4-byte octal. */
803 val = make_uninit_string (nbytes + byte8_count * 3);
804
805 src = XSTRING (string)->data;
806 src_end = src + nbytes;
807 dst = XSTRING (val)->data;
808 if (multibyte)
809 while (src < src_end)
810 {
811 c = *src;
812 len = BYTES_BY_CHAR_HEAD (c);
813
814 if (CHAR_BYTE8_HEAD_P (c))
815 {
816 c = STRING_CHAR_ADVANCE (src);
817 c = CHAR_TO_BYTE8 (c);
818 sprintf (dst, "\\%03o", c);
819 dst += 4;
820 }
821 else
822 while (len--) *dst++ = *src++;
823 }
824 else
825 while (src < src_end)
826 {
827 c = *src++;
828 if (c >= 0x80)
829 {
830 sprintf (dst, "\\%03o", c);
831 dst += 4;
832 }
833 else
834 *dst++ = c;
835 }
836 return val;
837}
838
839
840DEFUN ("string", Fstring, Sstring, 1, MANY, 0,
841 doc: /*
842Concatenate all the argument characters and make the result a string. */)
843 (n, args)
844 int n;
845 Lisp_Object *args;
846{
847 int i;
848 unsigned char *buf = (unsigned char *) alloca (MAX_MULTIBYTE_LENGTH * n);
849 unsigned char *p = buf;
850 int c;
851
852 for (i = 0; i < n; i++)
853 {
854 CHECK_CHARACTER (args[i]);
855 c = XINT (args[i]);
856 p += CHAR_STRING (c, p);
857 }
858
859 return make_string_from_bytes ((char *) buf, n, p - buf);
860}
861
862void
863init_character_once ()
864{
865}
866
867#ifdef emacs
868
869void
870syms_of_character ()
871{
872 DEFSYM (Qcharacterp, "characterp");
873 DEFSYM (Qauto_fill_chars, "auto-fill-chars");
874
875 staticpro (&Vchar_unify_table);
876 Vchar_unify_table = Qnil;
877
878 defsubr (&Smax_char);
879 defsubr (&Scharacterp);
880 defsubr (&Sunibyte_char_to_multibyte);
881 defsubr (&Smultibyte_char_to_unibyte);
882 defsubr (&Schar_bytes);
883 defsubr (&Schar_width);
884 defsubr (&Sstring_width);
885 defsubr (&Schar_direction);
886 defsubr (&Schars_in_region);
887 defsubr (&Sstring);
888
889 DEFVAR_LISP ("translation-table-vector", &Vtranslation_table_vector,
890 doc: /*
891Vector of cons cell of a symbol and translation table ever defined.
892An ID of a translation table is an index of this vector. */);
893 Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil);
894
895 DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars,
896 doc: /*
897A char-table for characters which invoke auto-filling.
898Such characters have value t in this table. */);
899 Vauto_fill_chars = Fmake_char_table (Qauto_fill_chars, Qnil);
900 CHAR_TABLE_SET (Vauto_fill_chars, make_number (' '), Qt);
901 CHAR_TABLE_SET (Vauto_fill_chars, make_number ('\n'), Qt);
902
903 DEFVAR_LISP ("char-width-table", &Vchar_width_table,
904 doc: /*
905A char-table for width (columns) of each character. */);
906 Vchar_width_table = Fmake_char_table (Qnil, make_number (1));
907
908 DEFVAR_LISP ("char-direction-table", &Vchar_direction_table,
909 doc: /* A char-table for direction of each character. */);
910 Vchar_direction_table = Fmake_char_table (Qnil, make_number (1));
911
912 DEFVAR_LISP ("printable-chars", &Vprintable_chars,
913 doc: /* A char-table for each printable character. */);
914 Vprintable_chars = Fmake_char_table (Qnil, Qt);
915}
916
917#endif /* emacs */
diff --git a/src/character.h b/src/character.h
new file mode 100644
index 00000000000..ca9d0fdeb19
--- /dev/null
+++ b/src/character.h
@@ -0,0 +1,530 @@
1/* Header for multibyte character handler.
2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
4 Copyright (C) 2001, 2002
5 National Institute of Advanced Industrial Science and Technology (AIST)
6 Registration Number H13PRO009
7
8This file is part of GNU Emacs.
9
10GNU Emacs is free software; you can redistribute it and/or modify
11it under the terms of the GNU General Public License as published by
12the Free Software Foundation; either version 2, or (at your option)
13any later version.
14
15GNU Emacs is distributed in the hope that it will be useful,
16but WITHOUT ANY WARRANTY; without even the implied warranty of
17MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18GNU General Public License for more details.
19
20You should have received a copy of the GNU General Public License
21along with GNU Emacs; see the file COPYING. If not, write to
22the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23Boston, MA 02111-1307, USA. */
24
25#ifndef EMACS_CHARACTER_H
26#define EMACS_CHARACTER_H
27
28/* 0-7F 0xxxxxxx
29 00..7F
30 80-7FF 110xxxxx 10xxxxxx
31 C2..DF 80..BF
32 800-FFFF 1110xxxx 10xxxxxx 10xxxxxx
33 E0..EF 80..BF 80..BF
34 10000-1FFFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
35 F0..F7 80..BF 80..BF 80..BF
36 200000-3FFF7F 11111000 1000xxxx 10xxxxxx 10xxxxxx 10xxxxxx
37 F8 80..8F 80..BF 80..BF 80..BF
38 invalid 11111001
39 F9
40 invalid 1111101x
41 FA..FB
42 invalid 111111xx
43 FC..FE
44
45 raw-8-bit
46 3FFF80-3FFFFF 1100000x 10xxxxxx
47 C0..C1 80..BF
48
49*/
50
51/* This is the maximum character code ((1 << CHARACTERBITS) - 1). */
52#define MAX_CHAR 0x3FFFFF
53
54#define MAX_UNICODE_CHAR 0x10FFFF
55
56#define MAX_1_BYTE_CHAR 0x7F
57#define MAX_2_BYTE_CHAR 0x7FF
58#define MAX_3_BYTE_CHAR 0xFFFF
59#define MAX_4_BYTE_CHAR 0x1FFFFF
60#define MAX_5_BYTE_CHAR 0x3FFF7F
61
62#define BYTE8_TO_CHAR(byte) ((byte) + 0x3FFF00)
63#define CHAR_TO_BYTE8(c) ((c) - 0x3FFF00)
64#define CHAR_BYTE8_P(c) ((c) > MAX_5_BYTE_CHAR)
65#define CHAR_BYTE8_HEAD_P(byte) ((byte) == 0xC0 || (byte) == 0xC1)
66
67/* This is the maximum byte length of multi-byte sequence. */
68#define MAX_MULTIBYTE_LENGTH 5
69
70/* Return a Lisp character whose code is C. */
71#define make_char(c) make_number (c)
72
73/* Nonzero iff C is an ASCII byte. */
74#define ASCII_BYTE_P(c) ((unsigned) (c) < 0x80)
75
76/* Nonzero iff X is a character. */
77#define CHARACTERP(x) (NATNUMP (x) && XFASTINT (x) <= MAX_CHAR)
78
79/* Nozero iff C is valid as a charater code. GENERICP is not used
80 now. It will be removed in the future. */
81#define CHAR_VALID_P(c, genericp) CHARACTERP (c)
82
83/* Check if Lisp object X is a character or not. */
84#define CHECK_CHARACTER(x) \
85 do { \
86 if (! CHARACTERP(x)) x = wrong_type_argument (Qcharacterp, (x)); \
87 } while (0)
88
89/* Nonzero iff C is an ASCII character. */
90#define ASCII_CHAR_P(c) ((unsigned) (c) < 0x80)
91
92/* Nonzero iff C is a character of code less than 0x100. */
93#define SINGLE_BYTE_CHAR_P(c) ((unsigned) (c) < 0x100)
94
95/* Nonzero if character C has a printable glyph. */
96#define CHAR_PRINTABLE_P(c) \
97 (((c) >= 32 && ((c) < 127) \
98 || ! NILP (CHAR_TABLE_REF (Vprintable_chars, (c)))))
99
100/* How many bytes C occupies in a multibyte buffer. */
101#define CHAR_BYTES(c) \
102 ( (c) <= MAX_1_BYTE_CHAR ? 1 \
103 : (c) <= MAX_2_BYTE_CHAR ? 2 \
104 : (c) <= MAX_3_BYTE_CHAR ? 3 \
105 : (c) <= MAX_4_BYTE_CHAR ? 4 \
106 : (c) <= MAX_5_BYTE_CHAR ? 5 \
107 : 2)
108
109/* Store multibyte form of the character C in STR. The caller should
110 allocate at least MAX_MULTIBYTE_LENGTH bytes area at STR in
111 advance. Returns the length of the multibyte form. */
112
113#define CHAR_STRING(c, p) \
114 ((unsigned) (c) <= MAX_1_BYTE_CHAR \
115 ? ((p)[0] = (c), \
116 1) \
117 : (unsigned) (c) <= MAX_2_BYTE_CHAR \
118 ? ((p)[0] = (0xC0 | ((c) >> 6)), \
119 (p)[1] = (0x80 | ((c) & 0x3F)), \
120 2) \
121 : (unsigned) (c) <= MAX_3_BYTE_CHAR \
122 ? ((p)[0] = (0xE0 | ((c) >> 12)), \
123 (p)[1] = (0x80 | (((c) >> 6) & 0x3F)), \
124 (p)[2] = (0x80 | ((c) & 0x3F)), \
125 3) \
126 : (unsigned) (c) <= MAX_5_BYTE_CHAR \
127 ? char_string_with_unification (c, p, NULL) \
128 : ((p)[0] = (0xC0 | (((c) >> 6) & 0x01)), \
129 (p)[1] = (0x80 | ((c) & 0x3F)), \
130 2))
131
132
133/* Like CHAR_STRING, but advance P to the end of the multibyte
134 form. */
135
136#define CHAR_STRING_ADVANCE(c, p) \
137 ((unsigned) (c) <= MAX_1_BYTE_CHAR \
138 ? *(p)++ = (c) \
139 : (unsigned) (c) <= MAX_2_BYTE_CHAR \
140 ? (*(p)++ = (0xC0 | ((c) >> 6)), \
141 *(p)++ = (0x80 | ((c) & 0x3F))) \
142 : (unsigned) (c) <= MAX_3_BYTE_CHAR \
143 ? (*(p)++ = (0xE0 | ((c) >> 12)), \
144 *(p)++ = (0x80 | (((c) >> 6) & 0x3F)), \
145 *(p)++ = (0x80 | ((c) & 0x3F))) \
146 : (unsigned) (c) <= MAX_5_BYTE_CHAR \
147 ? char_string_with_unification (c, p, &p) \
148 : (*(p)++ = (0xC0 | (((c) >> 6) & 0x01)), \
149 *(p)++ = (0x80 | ((c) & 0x3F))))
150
151
152/* Nonzero iff BYTE starts a character in a multibyte form. */
153#define CHAR_HEAD_P(byte) (((byte) & 0xC0) != 0x80)
154
155/* Nonzero iff BYTE starts a non-ASCII character in a multibyte
156 form. */
157#define LEADING_CODE_P(byte) (((byte) & 0xC0) == 0xC0)
158
159/* Just kept for backward compatibility. This macro will be removed
160 in the future. */
161#define BASE_LEADING_CODE_P LEADING_CODE_P
162
163/* How many bytes a character that starts with BYTE occupies in a
164 multibyte form. */
165#define BYTES_BY_CHAR_HEAD(byte) \
166 (!((byte) & 0x80) ? 1 \
167 : !((byte) & 0x20) ? 2 \
168 : !((byte) & 0x10) ? 3 \
169 : !((byte) & 0x08) ? 4 \
170 : 5)
171
172
173/* Return the length of the multi-byte form at string STR of length
174 LEN while assuming that STR points a valid multi-byte form. As
175 this macro isn't necessary anymore, all callers will be changed to
176 use BYTES_BY_CHAR_HEAD directly in the future. */
177
178#define MULTIBYTE_FORM_LENGTH(str, len) \
179 BYTES_BY_CHAR_HEAD (*(str))
180
181/* Parse multibyte string STR of length LENGTH and set BYTES to the
182 byte length of a character at STR while assuming that STR points a
183 valid multibyte form. As this macro isn't necessary anymore, all
184 callers will be changed to use BYTES_BY_CHAR_HEAD directly in the
185 future. */
186
187#define PARSE_MULTIBYTE_SEQ(str, length, bytes) \
188 (bytes) = BYTES_BY_CHAR_HEAD (*(str))
189
190/* The byte length of multibyte form at unibyte string P ending at
191 PEND. If STR doesn't point a valid multibyte form, return 0. */
192
193#define MULTIBYTE_LENGTH(p, pend) \
194 (p >= pend ? 0 \
195 : !((p)[0] & 0x80) ? 1 \
196 : ((p + 1 >= pend) || (((p)[1] & 0xC0) != 0x80)) ? 0 \
197 : ((p)[0] & 0xE0) == 0xC0 ? 2 \
198 : ((p + 2 >= pend) || (((p)[2] & 0xC0) != 0x80)) ? 0 \
199 : ((p)[0] & 0xF0) == 0xE0 ? 3 \
200 : ((p + 3 >= pend) || (((p)[3] & 0xC0) != 0x80)) ? 0 \
201 : ((p)[0] & 0xF8) == 0xF0 ? 4 \
202 : ((p + 4 >= pend) || (((p)[4] & 0xC0) != 0x80)) ? 0 \
203 : (p)[0] == 0xF8 && ((p)[1] & 0xF0) == 0x80 ? 5 \
204 : 0)
205
206
207/* Like MULTIBYTE_LENGTH but don't check the ending address. */
208
209#define MULTIBYTE_LENGTH_NO_CHECK(p) \
210 (!((p)[0] & 0x80) ? 1 \
211 : ((p)[1] & 0xC0) != 0x80 ? 0 \
212 : ((p)[0] & 0xE0) == 0xC0 ? 2 \
213 : ((p)[2] & 0xC0) != 0x80 ? 0 \
214 : ((p)[0] & 0xF0) == 0xE0 ? 3 \
215 : ((p)[3] & 0xC0) != 0x80 ? 0 \
216 : ((p)[0] & 0xF8) == 0xF0 ? 4 \
217 : ((p)[4] & 0xC0) != 0x80 ? 0 \
218 : (p)[0] == 0xF8 && ((p)[1] & 0xF0) == 0x80 ? 5 \
219 : 0)
220
221
222/* Return the character code of character whose multibyte form is at
223 P. The argument LEN is ignored. It will be removed in the
224 future. */
225
226#define STRING_CHAR(p, len) \
227 (!((p)[0] & 0x80) \
228 ? (p)[0] \
229 : ! ((p)[0] & 0x20) \
230 ? (((((p)[0] & 0x1F) << 6) \
231 | ((p)[1] & 0x3F)) \
232 + (((unsigned char) (p)[0]) < 0xC2 ? 0x3FFF80 : 0)) \
233 : ! ((p)[0] & 0x10) \
234 ? ((((p)[0] & 0x0F) << 12) \
235 | (((p)[1] & 0x3F) << 6) \
236 | ((p)[2] & 0x3F)) \
237 : string_char_with_unification (p, NULL, NULL))
238
239
240/* Like STRING_CHAR but set ACTUAL_LEN to the length of multibyte
241 form. The argument LEN is ignored. It will be removed in the
242 future. */
243
244#define STRING_CHAR_AND_LENGTH(p, len, actual_len) \
245 (!((p)[0] & 0x80) \
246 ? ((actual_len) = 1, (p)[0]) \
247 : ! ((p)[0] & 0x20) \
248 ? ((actual_len) = 2, \
249 (((((p)[0] & 0x1F) << 6) \
250 | ((p)[1] & 0x3F)) \
251 + (((unsigned char) (p)[0]) < 0xC2 ? 0x3FFF80 : 0))) \
252 : ! ((p)[0] & 0x10) \
253 ? ((actual_len) = 3, \
254 ((((p)[0] & 0x0F) << 12) \
255 | (((p)[1] & 0x3F) << 6) \
256 | ((p)[2] & 0x3F))) \
257 : string_char_with_unification (p, NULL, &actual_len))
258
259
260/* Like STRING_CHAR but advacen P to the end of multibyte form. */
261
262#define STRING_CHAR_ADVANCE(p) \
263 (!((p)[0] & 0x80) \
264 ? *(p)++ \
265 : ! ((p)[0] & 0x20) \
266 ? ((p) += 2, \
267 ((((p)[-2] & 0x1F) << 6) \
268 | ((p)[-1] & 0x3F) \
269 | (((unsigned char) (p)[-2]) < 0xC2 ? 0x3FFF80 : 0))) \
270 : ! ((p)[0] & 0x10) \
271 ? ((p) += 3, \
272 ((((p)[-3] & 0x0F) << 12) \
273 | (((p)[-2] & 0x3F) << 6) \
274 | ((p)[-1] & 0x3F))) \
275 : string_char_with_unification (p, &p, NULL))
276
277
278/* Fetch the "next" character from Lisp string STRING at byte position
279 BYTEIDX, character position CHARIDX. Store it into OUTPUT.
280
281 All the args must be side-effect-free.
282 BYTEIDX and CHARIDX must be lvalues;
283 we increment them past the character fetched. */
284
285#define FETCH_STRING_CHAR_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX) \
286 if (1) \
287 { \
288 CHARIDX++; \
289 if (STRING_MULTIBYTE (STRING)) \
290 { \
291 unsigned char *ptr = &XSTRING (STRING)->data[BYTEIDX]; \
292 int len; \
293 \
294 OUTPUT = STRING_CHAR_AND_LENGTH (ptr, 0, len); \
295 BYTEIDX += len; \
296 } \
297 else \
298 OUTPUT = XSTRING (STRING)->data[BYTEIDX++]; \
299 } \
300 else
301
302
303/* Like FETCH_STRING_CHAR_ADVANCE but assumes STRING is multibyte. */
304
305#define FETCH_STRING_CHAR_ADVANCE_NO_CHECK(OUTPUT, STRING, CHARIDX, BYTEIDX) \
306 if (1) \
307 { \
308 unsigned char *ptr = &XSTRING (STRING)->data[BYTEIDX]; \
309 int len; \
310 \
311 OUTPUT = STRING_CHAR_AND_LENGTH (ptr, 0, len); \
312 BYTEIDX += len; \
313 CHARIDX++; \
314 } \
315 else
316
317
318/* Like FETCH_STRING_CHAR_ADVANCE but fetch character from the current
319 buffer. */
320
321#define FETCH_CHAR_ADVANCE(OUTPUT, CHARIDX, BYTEIDX) \
322 if (1) \
323 { \
324 CHARIDX++; \
325 if (!NILP (current_buffer->enable_multibyte_characters)) \
326 { \
327 unsigned char *ptr = BYTE_POS_ADDR (BYTEIDX); \
328 int len; \
329 \
330 OUTPUT= STRING_CHAR_AND_LENGTH (ptr, 0, len); \
331 BYTEIDX += len; \
332 } \
333 else \
334 { \
335 OUTPUT = *(BYTE_POS_ADDR (BYTEIDX)); \
336 BYTEIDX++; \
337 } \
338 } \
339 else
340
341
342/* Like FETCH_CHAR_ADVANCE but assumes STRING is multibyte. */
343
344#define FETCH_CHAR_ADVANCE_NO_CHECK(OUTPUT, CHARIDX, BYTEIDX) \
345 if (1) \
346 { \
347 unsigned char *ptr = BYTE_POS_ADDR (BYTEIDX); \
348 int len; \
349 \
350 OUTPUT= STRING_CHAR_AND_LENGTH (ptr, 0, len); \
351 BYTEIDX += len; \
352 CHARIDX++; \
353 } \
354 else
355
356
357/* Increase the buffer byte position POS_BYTE of the current buffer to
358 the next character boundary. No range checking of POS. */
359
360#define INC_POS(pos_byte) \
361 do { \
362 unsigned char *p = BYTE_POS_ADDR (pos_byte); \
363 pos_byte += BYTES_BY_CHAR_HEAD (*p); \
364 } while (0)
365
366
367/* Decrease the buffer byte position POS_BYTE of the current buffer to
368 the previous character boundary. No range checking of POS. */
369
370#define DEC_POS(pos_byte) \
371 do { \
372 unsigned char *p; \
373 \
374 pos_byte--; \
375 if (pos_byte < GPT_BYTE) \
376 p = BEG_ADDR + pos_byte - 1; \
377 else \
378 p = BEG_ADDR + GAP_SIZE + pos_byte - 1; \
379 while (!CHAR_HEAD_P (*p)) \
380 { \
381 p--; \
382 pos_byte--; \
383 } \
384 } while (0)
385
386/* Increment both CHARPOS and BYTEPOS, each in the appropriate way. */
387
388#define INC_BOTH(charpos, bytepos) \
389 do \
390 { \
391 (charpos)++; \
392 if (NILP (current_buffer->enable_multibyte_characters)) \
393 (bytepos)++; \
394 else \
395 INC_POS ((bytepos)); \
396 } \
397 while (0)
398
399
400/* Decrement both CHARPOS and BYTEPOS, each in the appropriate way. */
401
402#define DEC_BOTH(charpos, bytepos) \
403 do \
404 { \
405 (charpos)--; \
406 if (NILP (current_buffer->enable_multibyte_characters)) \
407 (bytepos)--; \
408 else \
409 DEC_POS ((bytepos)); \
410 } \
411 while (0)
412
413
414/* Increase the buffer byte position POS_BYTE of the current buffer to
415 the next character boundary. This macro relies on the fact that
416 *GPT_ADDR and *Z_ADDR are always accessible and the values are
417 '\0'. No range checking of POS_BYTE. */
418
419#define BUF_INC_POS(buf, pos_byte) \
420 do { \
421 unsigned char *p = BUF_BYTE_ADDRESS (buf, pos_byte); \
422 pos_byte += BYTES_BY_CHAR_HEAD (*p); \
423 } while (0)
424
425
426/* Decrease the buffer byte position POS_BYTE of the current buffer to
427 the previous character boundary. No range checking of POS_BYTE. */
428
429#define BUF_DEC_POS(buf, pos_byte) \
430 do { \
431 unsigned char *p; \
432 pos_byte--; \
433 if (pos_byte < BUF_GPT_BYTE (buf)) \
434 p = BUF_BEG_ADDR (buf) + pos_byte - 1; \
435 else \
436 p = BUF_BEG_ADDR (buf) + BUF_GAP_SIZE (buf) + pos_byte - 1; \
437 while (!CHAR_HEAD_P (*p)) \
438 { \
439 p--; \
440 pos_byte--; \
441 } \
442 } while (0)
443
444
445#define MAYBE_UNIFY_CHAR(c) \
446 if (CHAR_TABLE_P (Vchar_unify_table)) \
447 { \
448 Lisp_Object val; \
449 int unified; \
450 \
451 val = CHAR_TABLE_REF (Vchar_unify_table, c); \
452 if (SYMBOLP (val)) \
453 { \
454 Funify_charset (val, Qnil); \
455 val = CHAR_TABLE_REF (Vchar_unify_table, c); \
456 } \
457 if ((unified = XINT (val)) >= 0) \
458 c = unified; \
459 } \
460 else
461
462/* Return the width of ASCII character C. The width is measured by
463 how many columns occupied on the screen when displayed in the
464 current buffer. */
465
466#define ASCII_CHAR_WIDTH(c) \
467 (c < 0x20 \
468 ? (c == '\t' \
469 ? XFASTINT (current_buffer->tab_width) \
470 : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2))) \
471 : (c < 0x7f \
472 ? 1 \
473 : ((NILP (current_buffer->ctl_arrow) ? 4 : 2))))
474
475/* Return the width of character C. The width is measured by how many
476 columns occupied on the screen when displayed in the current
477 buffer. */
478
479#define CHAR_WIDTH(c) \
480 (ASCII_CHAR_P (c) \
481 ? ASCII_CHAR_WIDTH (c) \
482 : XINT (CHAR_TABLE_REF (Vchar_width_table, c)))
483
484extern int char_string_with_unification P_ ((int, unsigned char *,
485 unsigned char **));
486extern int string_char_with_unification P_ ((unsigned char *,
487 unsigned char **, int *));
488
489extern int translate_char P_ ((Lisp_Object, int c));
490extern int char_printable_p P_ ((int c));
491extern void parse_str_as_multibyte P_ ((unsigned char *, int, int *, int *));
492extern int parse_str_to_multibyte P_ ((unsigned char *, int));
493extern int str_as_multibyte P_ ((unsigned char *, int, int, int *));
494extern int str_to_multibyte P_ ((unsigned char *, int, int));
495extern int str_as_unibyte P_ ((unsigned char *, int));
496extern int strwidth P_ ((unsigned char *, int));
497extern int c_string_width P_ ((unsigned char *, int, int, int *, int *));
498extern int lisp_string_width P_ ((Lisp_Object, int, int *, int *));
499
500extern Lisp_Object Vprintable_chars;
501
502extern Lisp_Object Qcharacterp, Qauto_fill_chars;
503extern Lisp_Object Vtranslation_table_vector;
504extern Lisp_Object Vchar_width_table;
505extern Lisp_Object Vchar_direction_table;
506extern Lisp_Object Vchar_unify_table;
507
508/* Return a translation table of id number ID. */
509#define GET_TRANSLATION_TABLE(id) \
510 (XCDR(XVECTOR(Vtranslation_table_vector)->contents[(id)]))
511
512/* A char-table for characters which may invoke auto-filling. */
513extern Lisp_Object Vauto_fill_chars;
514
515/* Copy LEN bytes from FROM to TO. This macro should be used only
516 when a caller knows that LEN is short and the obvious copy loop is
517 faster than calling bcopy which has some overhead. Copying a
518 multibyte sequence of a character is the typical case. */
519
520#define BCOPY_SHORT(from, to, len) \
521 do { \
522 int i = len; \
523 unsigned char *from_p = from, *to_p = to; \
524 while (i--) *to_p++ = *from_p++; \
525 } while (0)
526
527#define DEFSYM(sym, name) \
528 do { (sym) = intern ((name)); staticpro (&(sym)); } while (0)
529
530#endif /* EMACS_CHARACTER_H */