aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorKenichi Handa1999-09-03 01:28:42 +0000
committerKenichi Handa1999-09-03 01:28:42 +0000
commit384107f281bd6a43a4c66e9bbb1826e1bc8cec05 (patch)
tree9d31c1a4424483e5165f51a2f553b402513bc19e /src
parenta260318815a268ae273669e3116274732e1f1945 (diff)
downloademacs-384107f281bd6a43a4c66e9bbb1826e1bc8cec05.tar.gz
emacs-384107f281bd6a43a4c66e9bbb1826e1bc8cec05.zip
(MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
are negative. (MAKE_CHAR): Don't set MSBs of C1 and C2 to 0. (VALID_MULTIBYTE_CHAR_P): This macro deleted. (PARSE_COMPOSITE_SEQ): New macro. (PARSE_CHARACTER_SEQ): New macro. (PARSE_MULTIBYTE_SEQ): New macro. (CHAR_PRINTABLE_P): New macro. (STRING_CHAR): Adjusted for the change of string_to_non_ascii_char. (STRING_CHAR_AND_LENGTH): Likewise. (STRING_CHAR_AND_CHAR_LENGTH): Define it as STRING_CHAR_AND_LENGTH. (INC_POS): Use the macro PARSE_MULTIBYTE_SEQ. (DEC_POS, BUF_INC_POS, BUF_DEC_POS): Likewise,
Diffstat (limited to 'src')
-rw-r--r--src/charset.h251
1 files changed, 149 insertions, 102 deletions
diff --git a/src/charset.h b/src/charset.h
index 1b2ac59bec5..775b755c372 100644
--- a/src/charset.h
+++ b/src/charset.h
@@ -376,19 +376,19 @@ extern Lisp_Object Vcharset_symbol_table;
376#define CHARSET_SYMBOL(charset) \ 376#define CHARSET_SYMBOL(charset) \
377 XVECTOR (Vcharset_symbol_table)->contents[charset] 377 XVECTOR (Vcharset_symbol_table)->contents[charset]
378 378
379/* 1 if CHARSET is valid, else 0. */ 379/* 1 if CHARSET is in valid value range, else 0. */
380#define CHARSET_VALID_P(charset) \ 380#define CHARSET_VALID_P(charset) \
381 ((charset) == 0 \ 381 ((charset) == 0 \
382 || ((charset) >= 0x80 && (charset) <= MAX_CHARSET_OFFICIAL_DIMENSION2) \ 382 || ((charset) >= 0x80 && (charset) <= MAX_CHARSET_OFFICIAL_DIMENSION2) \
383 || ((charset) >= MIN_CHARSET_PRIVATE_DIMENSION1 && (charset) <= MAX_CHARSET)) 383 || ((charset) >= MIN_CHARSET_PRIVATE_DIMENSION1 && (charset) <= MAX_CHARSET))
384 384
385/* 1 if CHARSET is already defined, else 0. */ 385/* 1 if CHARSET is already defined (and not CHARSET_COMPOSITION), else 0. */
386#define CHARSET_DEFINED_P(charset) \ 386#define CHARSET_DEFINED_P(charset) \
387 (((charset) >= 0) && ((charset) <= MAX_CHARSET) \ 387 (((charset) >= 0) && ((charset) <= MAX_CHARSET) \
388 && !NILP (CHARSET_TABLE_ENTRY (charset))) 388 && !NILP (CHARSET_TABLE_ENTRY (charset)))
389 389
390/* Since the information CHARSET-BYTES and CHARSET-WIDTH of 390/* Since the information CHARSET-BYTES and CHARSET-WIDTH of
391 Vcharset_table can be retrieved only from the first byte of 391 Vcharset_table can be retrieved only the first byte of
392 multi-byte form (an ASCII code or a base leading-code), we provide 392 multi-byte form (an ASCII code or a base leading-code), we provide
393 here tables to be used by macros BYTES_BY_CHAR_HEAD and 393 here tables to be used by macros BYTES_BY_CHAR_HEAD and
394 WIDTH_BY_CHAR_HEAD for faster information retrieval. */ 394 WIDTH_BY_CHAR_HEAD for faster information retrieval. */
@@ -451,12 +451,16 @@ extern int width_by_char_head[256];
451 position-codes are C1 and C2. DIMENSION1 character ignores C2. */ 451 position-codes are C1 and C2. DIMENSION1 character ignores C2. */
452#define MAKE_NON_ASCII_CHAR(charset, c1, c2) \ 452#define MAKE_NON_ASCII_CHAR(charset, c1, c2) \
453 ((charset) == CHARSET_COMPOSITION \ 453 ((charset) == CHARSET_COMPOSITION \
454 ? MAKE_COMPOSITE_CHAR (((c1) << 7) + (c2)) \ 454 ? ((c2) < 0 \
455 ? (((charset) - 0x70) << 7) + (c1) \
456 : MAKE_COMPOSITE_CHAR (((c1) << 7) + (c2))) \
455 : (! CHARSET_DEFINED_P (charset) || CHARSET_DIMENSION (charset) == 1 \ 457 : (! CHARSET_DEFINED_P (charset) || CHARSET_DIMENSION (charset) == 1 \
456 ? (((charset) - 0x70) << 7) | (c1) \ 458 ? (((charset) - 0x70) << 7) | ((c1) <= 0 ? 0 : (c1)) \
457 : ((charset) < MIN_CHARSET_PRIVATE_DIMENSION2 \ 459 : ((charset) < MIN_CHARSET_PRIVATE_DIMENSION2 \
458 ? (((charset) - 0x8F) << 14) | ((c1) << 7) | (c2) \ 460 ? ((((charset) - 0x8F) << 14) \
459 : (((charset) - 0xE0) << 14) | ((c1) << 7) | (c2)))) 461 | ((c1) <= 0 ? 0 : ((c1) << 7)) | ((c2) <= 0 ? 0 : (c2))) \
462 : ((((charset) - 0xE0) << 14) \
463 | ((c1) <= 0 ? 0 : ((c1) << 7)) | ((c2) <= 0 ? 0 : (c2))))))
460 464
461/* Return a composite character of which CMPCHAR-ID is ID. */ 465/* Return a composite character of which CMPCHAR-ID is ID. */
462#define MAKE_COMPOSITE_CHAR(id) (MIN_CHAR_COMPOSITION + (id)) 466#define MAKE_COMPOSITE_CHAR(id) (MIN_CHAR_COMPOSITION + (id))
@@ -466,10 +470,10 @@ extern int width_by_char_head[256];
466 470
467/* Return a character of which charset is CHARSET and position-codes 471/* Return a character of which charset is CHARSET and position-codes
468 are C1 and C2. DIMENSION1 character ignores C2. */ 472 are C1 and C2. DIMENSION1 character ignores C2. */
469#define MAKE_CHAR(charset, c1, c2) \ 473#define MAKE_CHAR(charset, c1, c2) \
470 ((charset) == CHARSET_ASCII \ 474 ((charset) == CHARSET_ASCII \
471 ? (c1) \ 475 ? (c1) \
472 : MAKE_NON_ASCII_CHAR ((charset), (c1) & 0x7F, (c2) & 0x7F)) 476 : MAKE_NON_ASCII_CHAR ((charset), (c1), (c2)))
473 477
474/* If GENERICP is nonzero, return nonzero iff C is a valid normal or 478/* If GENERICP is nonzero, return nonzero iff C is a valid normal or
475 generic character. If GENERICP is zero, return nonzero iff C is a 479 generic character. If GENERICP is zero, return nonzero iff C is a
@@ -484,22 +488,43 @@ extern int width_by_char_head[256];
484 488
485#define DEFAULT_NONASCII_INSERT_OFFSET 0x800 489#define DEFAULT_NONASCII_INSERT_OFFSET 0x800
486 490
487/* Check if the character C is valid as a multibyte character. */ 491/* Parse string STR of length LENGTH (>= 2) and check if a composite
488 492 character is at STR. Actually, the whole multibyte sequence
489#define VALID_MULTIBYTE_CHAR_P(c) \ 493 starting with LEADING_CODE_COMPOSITION is treated as a single
490 ((c) < MIN_CHAR_OFFICIAL_DIMENSION2 \ 494 multibyte character. So, here, we just set BYTES to LENGTH. */
491 ? (!NILP (XCHAR_TABLE (Vcharset_table)->contents[CHAR_FIELD2 (c) \ 495
492 + 0xF0]) \ 496#define PARSE_COMPOSITE_SEQ(str, length, bytes) \
493 && CHAR_FIELD3 (c) >= 32) \ 497 do { \
494 : ((c) < MIN_CHAR_PRIVATE_DIMENSION2 \ 498 (bytes) = (length); \
495 ? (!NILP (XCHAR_TABLE (Vcharset_table)->contents[CHAR_FIELD1 (c) \ 499 } while (0)
496 + 0x10F]) \ 500
497 && CHAR_FIELD2 (c) >= 32 && CHAR_FIELD3 (c) >= 32) \ 501
498 : ((c) < MIN_CHAR_COMPOSITION \ 502/* Parse string STR of length LENGTH (>= 2) and check if a
499 ? (!NILP (XCHAR_TABLE (Vcharset_table)->contents[CHAR_FIELD1 (c) \ 503 non-composite multibyte character is at STR. Set BYTES to the
500 + 0x160]) \ 504 actual sequence length. */
501 && CHAR_FIELD2 (c) >= 32 && CHAR_FIELD3 (c) >= 32) \ 505
502 : (c) < MIN_CHAR_COMPOSITION + n_cmpchars))) 506#define PARSE_CHARACTER_SEQ(str, length, bytes) \
507 do { \
508 (bytes) = BYTES_BY_CHAR_HEAD ((str)[0]); \
509 if ((bytes) > (length)) \
510 (bytes) = (length); \
511 } while (0)
512
513/* Parse string STR of length LENGTH and check if a multibyte
514 characters is at STR. If so, set BYTES for that character, else
515 set BYTES to 1. */
516
517#define PARSE_MULTIBYTE_SEQ(str, length, bytes) \
518 do { \
519 int i = 1; \
520 while (i < (length) && ! CHAR_HEAD_P ((str)[i])) i++; \
521 if (i == 1) \
522 (bytes) = 1; \
523 else if ((str)[0] == LEADING_CODE_COMPOSITION) \
524 PARSE_COMPOSITE_SEQ (str, i, bytes); \
525 else \
526 PARSE_CHARACTER_SEQ (str, i, bytes); \
527 } while (0)
503 528
504/* The charset of non-ASCII character C is stored in CHARSET, and the 529/* The charset of non-ASCII character C is stored in CHARSET, and the
505 position-codes of C are stored in C1 and C2. 530 position-codes of C are stored in C1 and C2.
@@ -521,13 +546,20 @@ extern int width_by_char_head[256];
521 546
522/* The charset of character C is stored in CHARSET, and the 547/* The charset of character C is stored in CHARSET, and the
523 position-codes of C are stored in C1 and C2. 548 position-codes of C are stored in C1 and C2.
524 We store -1 in C2 if the character is just 2 bytes. */ 549 We store -1 in C2 if the dimension of the charset 1. */
525 550
526#define SPLIT_CHAR(c, charset, c1, c2) \ 551#define SPLIT_CHAR(c, charset, c1, c2) \
527 (SINGLE_BYTE_CHAR_P (c) \ 552 (SINGLE_BYTE_CHAR_P (c) \
528 ? charset = CHARSET_ASCII, c1 = (c), c2 = -1 \ 553 ? charset = CHARSET_ASCII, c1 = (c), c2 = -1 \
529 : SPLIT_NON_ASCII_CHAR (c, charset, c1, c2)) 554 : SPLIT_NON_ASCII_CHAR (c, charset, c1, c2))
530 555
556/* Return 1 iff character C has valid printable glyph. */
557#define CHAR_PRINTABLE_P(c) \
558 (SINGLE_BYTE_CHAR_P (c) \
559 || ((c) >= MIN_CHAR_COMPOSITION \
560 ? (c) < MAX_CHAR \
561 : char_printable_p (c)))
562
531/* The charset of the character at STR is stored in CHARSET, and the 563/* The charset of the character at STR is stored in CHARSET, and the
532 position-codes are stored in C1 and C2. 564 position-codes are stored in C1 and C2.
533 We store -1 in C2 if the character is just 2 bytes. 565 We store -1 in C2 if the character is just 2 bytes.
@@ -580,23 +612,20 @@ extern int iso_charset_table[2][2][128];
580#define STRING_CHAR(str, len) \ 612#define STRING_CHAR(str, len) \
581 (BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) == 1 \ 613 (BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) == 1 \
582 ? (unsigned char) *(str) \ 614 ? (unsigned char) *(str) \
583 : string_to_non_ascii_char (str, len, 0, 0)) 615 : string_to_non_ascii_char (str, len, 0))
584 616
585/* This is like STRING_CHAR but the third arg ACTUAL_LEN is set to 617/* This is like STRING_CHAR but the third arg ACTUAL_LEN is set to the
586 the length of the multi-byte form. Just to know the length, use 618 length of the multi-byte form. Just to know the length, use
587 MULTIBYTE_FORM_LENGTH. */ 619 MULTIBYTE_FORM_LENGTH. */
588 620
589#define STRING_CHAR_AND_LENGTH(str, len, actual_len) \ 621#define STRING_CHAR_AND_LENGTH(str, len, actual_len) \
590 (BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) == 1 \ 622 (BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) == 1 \
591 ? (actual_len = 1), (unsigned char) *(str) \ 623 ? ((actual_len) = 1), (unsigned char) *(str) \
592 : string_to_non_ascii_char (str, len, &actual_len, 0)) 624 : string_to_non_ascii_char (str, len, &(actual_len)))
593 625
594/* This is like STRING_CHAR_AND_LENGTH but the third arg ACTUAL_LEN 626/* This is like STRING_CHAR_AND_LENGTH but the third arg ACTUAL_LEN
595 does not include garbage bytes following the multibyte character. */ 627 does not include garbage bytes following the multibyte character. */
596#define STRING_CHAR_AND_CHAR_LENGTH(str, len, actual_len) \ 628#define STRING_CHAR_AND_CHAR_LENGTH STRING_CHAR_AND_LENGTH
597 (BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) == 1 \
598 ? (actual_len = 1), (unsigned char) *(str) \
599 : string_to_non_ascii_char (str, len, &actual_len, 1))
600 629
601/* Fetch the "next" multibyte character from Lisp string STRING 630/* Fetch the "next" multibyte character from Lisp string STRING
602 at byte position BYTEIDX, character position CHARIDX. 631 at byte position BYTEIDX, character position CHARIDX.
@@ -654,36 +683,45 @@ else
654 683
655#ifdef emacs 684#ifdef emacs
656 685
657/* Increase the buffer point POS of the current buffer to the next 686/* Increase the buffer byte position POS_BYTE of the current buffer to
658 character boundary. This macro relies on the fact that *GPT_ADDR 687 the next character boundary. This macro relies on the fact that
659 and *Z_ADDR are always accessible and the values are '\0'. No 688 *GPT_ADDR and *Z_ADDR are always accessible and the values are
660 range checking of POS. */ 689 '\0'. No range checking of POS. */
661#define INC_POS(pos) \ 690#define INC_POS(pos_byte) \
662 do { \ 691 do { \
663 unsigned char *p = BYTE_POS_ADDR (pos); \ 692 unsigned char *p = BYTE_POS_ADDR (pos_byte); \
664 pos++; \ 693 if (BASE_LEADING_CODE_P (*p)) \
665 if (BASE_LEADING_CODE_P (*p++)) \ 694 { \
666 while (!CHAR_HEAD_P (*p)) p++, pos++; \ 695 int len, bytes; \
696 len = Z_BYTE - pos_byte; \
697 PARSE_MULTIBYTE_SEQ (p, len, bytes); \
698 pos_byte += bytes; \
699 } \
700 else \
701 pos_byte++; \
667 } while (0) 702 } while (0)
668 703
669/* Decrease the buffer point POS of the current buffer to the previous 704/* Decrease the buffer byte position POS_BYTE of the current buffer to
670 character boundary. No range checking of POS. */ 705 the previous character boundary. No range checking of POS. */
671#define DEC_POS(pos) \ 706#define DEC_POS(pos_byte) \
672 do { \ 707 do { \
673 unsigned char *p, *p_min; \ 708 unsigned char *p, *p_min; \
674 \ 709 \
675 pos--; \ 710 pos_byte--; \
676 if (pos < GPT_BYTE) \ 711 if (pos_byte < GPT_BYTE) \
677 p = BEG_ADDR + pos - 1, p_min = BEG_ADDR; \ 712 p = BEG_ADDR + pos_byte - 1, p_min = BEG_ADDR; \
678 else \ 713 else \
679 p = BEG_ADDR + GAP_SIZE + pos - 1, p_min = GAP_END_ADDR; \ 714 p = BEG_ADDR + GAP_SIZE + pos_byte - 1, p_min = GAP_END_ADDR; \
680 if (p > p_min && !CHAR_HEAD_P (*p)) \ 715 if (p > p_min && !CHAR_HEAD_P (*p)) \
681 { \ 716 { \
682 int pos_saved = pos--; \ 717 unsigned char *pend = p--; \
683 p--; \ 718 int len, bytes; \
684 while (p > p_min && !CHAR_HEAD_P (*p)) p--, pos--; \ 719 while (p > p_min && !CHAR_HEAD_P (*p)) p--; \
685 if (!BASE_LEADING_CODE_P (*p)) pos = pos_saved; \ 720 len = pend + 1 - p; \
686 } \ 721 PARSE_MULTIBYTE_SEQ (p, len, bytes); \
722 if (bytes == len) \
723 pos_byte -= len - 1; \
724 } \
687 } while (0) 725 } while (0)
688 726
689/* Increment both CHARPOS and BYTEPOS, each in the appropriate way. */ 727/* Increment both CHARPOS and BYTEPOS, each in the appropriate way. */
@@ -712,41 +750,50 @@ do \
712 } \ 750 } \
713while (0) 751while (0)
714 752
715/* Increase the buffer point POS of the current buffer to the next 753/* Increase the buffer byte position POS_BYTE of the current buffer to
716 character boundary. This macro relies on the fact that *GPT_ADDR 754 the next character boundary. This macro relies on the fact that
717 and *Z_ADDR are always accessible and the values are '\0'. No 755 *GPT_ADDR and *Z_ADDR are always accessible and the values are
718 range checking of POS. */ 756 '\0'. No range checking of POS_BYTE. */
719#define BUF_INC_POS(buf, pos) \ 757#define BUF_INC_POS(buf, pos_byte) \
720 do { \ 758 do { \
721 unsigned char *p = BUF_BYTE_ADDRESS (buf, pos); \ 759 unsigned char *p = BUF_BYTE_ADDRESS (buf, pos_byte); \
722 pos++; \ 760 if (BASE_LEADING_CODE_P (*p)) \
723 if (BASE_LEADING_CODE_P (*p++)) \
724 while (!CHAR_HEAD_P (*p)) p++, pos++; \
725 } while (0)
726
727/* Decrease the buffer point POS of the current buffer to the previous
728 character boundary. No range checking of POS. */
729#define BUF_DEC_POS(buf, pos) \
730 do { \
731 unsigned char *p, *p_min; \
732 int pos_saved = --pos; \
733 if (pos < BUF_GPT_BYTE (buf)) \
734 { \
735 p = BUF_BEG_ADDR (buf) + pos - 1; \
736 p_min = BUF_BEG_ADDR (buf); \
737 } \
738 else \
739 { \
740 p = BUF_BEG_ADDR (buf) + BUF_GAP_SIZE (buf) + pos - 1; \
741 p_min = BUF_GAP_END_ADDR (buf); \
742 } \
743 if (p > p_min && !CHAR_HEAD_P (*p)) \
744 { \ 761 { \
745 int pos_saved = pos--; \ 762 int len, bytes; \
746 p--; \ 763 len = BUF_Z_BYTE (buf) - pos_byte; \
747 while (p > p_min && !CHAR_HEAD_P (*p)) p--, pos--; \ 764 PARSE_MULTIBYTE_SEQ (p, len, bytes); \
748 if (!BASE_LEADING_CODE_P (*p)) pos = pos_saved; \ 765 pos_byte += bytes; \
749 } \ 766 } \
767 else \
768 pos_byte++; \
769 } while (0)
770
771/* Decrease the buffer byte position POS_BYTE of the current buffer to
772 the previous character boundary. No range checking of POS_BYTE. */
773#define BUF_DEC_POS(buf, pos_byte) \
774 do { \
775 unsigned char *p, *p_min; \
776 pos_byte--; \
777 if (pos_byte < BUF_GPT_BYTE (buf)) \
778 { \
779 p = BUF_BEG_ADDR (buf) + pos_byte - 1; \
780 p_min = BUF_BEG_ADDR (buf); \
781 } \
782 else \
783 { \
784 p = BUF_BEG_ADDR (buf) + BUF_GAP_SIZE (buf) + pos_byte - 1; \
785 p_min = BUF_GAP_END_ADDR (buf); \
786 } \
787 if (p > p_min && !CHAR_HEAD_P (*p)) \
788 { \
789 unsigned char *pend = p--; \
790 int len, bytes; \
791 while (p > p_min && !CHAR_HEAD_P (*p)) p--; \
792 len = pend + 1 - p; \
793 PARSE_MULTIBYTE_SEQ (p, len, bytes); \
794 if (bytes == len) \
795 pos_byte -= len - 1; \
796 } \
750 } while (0) 797 } while (0)
751 798
752#endif /* emacs */ 799#endif /* emacs */
@@ -806,9 +853,9 @@ extern void invalid_character P_ ((int));
806extern int translate_char P_ ((Lisp_Object, int, int, int, int)); 853extern int translate_char P_ ((Lisp_Object, int, int, int, int));
807extern int split_non_ascii_string P_ ((const unsigned char *, int, int *, 854extern int split_non_ascii_string P_ ((const unsigned char *, int, int *,
808 unsigned char *, unsigned char *)); 855 unsigned char *, unsigned char *));
809extern int string_to_non_ascii_char P_ ((const unsigned char *, int, int *, 856extern int string_to_non_ascii_char P_ ((const unsigned char *, int, int *));
810 int));
811extern int non_ascii_char_to_string P_ ((int, unsigned char *, unsigned char **)); 857extern int non_ascii_char_to_string P_ ((int, unsigned char *, unsigned char **));
858extern int char_printable_p P_ ((int c));
812extern int multibyte_form_length P_ ((const unsigned char *, int)); 859extern int multibyte_form_length P_ ((const unsigned char *, int));
813extern int str_cmpchar_id P_ ((const unsigned char *, int)); 860extern int str_cmpchar_id P_ ((const unsigned char *, int));
814extern int get_charset_id P_ ((Lisp_Object)); 861extern int get_charset_id P_ ((Lisp_Object));