diff options
| author | Kenichi Handa | 1999-09-03 01:28:42 +0000 |
|---|---|---|
| committer | Kenichi Handa | 1999-09-03 01:28:42 +0000 |
| commit | cae184f227ba2607c4ea9ff7b23a3d7ebb0779c4 (patch) | |
| tree | 8873fc5800d66af643973b1b23578701ed994de6 | |
| parent | 1be6387dbd866b33f74edd9389861797dad02c92 (diff) | |
| download | emacs-cae184f227ba2607c4ea9ff7b23a3d7ebb0779c4.tar.gz emacs-cae184f227ba2607c4ea9ff7b23a3d7ebb0779c4.zip | |
(count_combining_composition): New function.
(count_combining_before): Adjust the way to check byte-combining
possibility for the new handling of multibyte sequence. Call
count_combining_composition for a composite character.
(count_combining_after): Likewise.
| -rw-r--r-- | src/insdel.c | 128 |
1 files changed, 93 insertions, 35 deletions
diff --git a/src/insdel.c b/src/insdel.c index e73313c3730..738d55909ed 100644 --- a/src/insdel.c +++ b/src/insdel.c | |||
| @@ -879,6 +879,33 @@ insert_1 (string, nbytes, inherit, prepare, before_markers) | |||
| 879 | inherit, prepare, before_markers); | 879 | inherit, prepare, before_markers); |
| 880 | } | 880 | } |
| 881 | 881 | ||
| 882 | /* See if the byte sequence at STR1 of length LEN1 combine with the | ||
| 883 | byte sequence at STR2 of length LEN2 to form a single composite | ||
| 884 | character. If so, return the number of bytes at the start of STR2 | ||
| 885 | which combine in this way. Otherwise, return 0. If STR3 is not | ||
| 886 | NULL, it is a byte sequence of length LEN3 to be appended to STR1 | ||
| 887 | before checking the combining. */ | ||
| 888 | int | ||
| 889 | count_combining_composition (str1, len1, str2, len2, str3, len3) | ||
| 890 | unsigned char *str1, *str2, *str3; | ||
| 891 | int len1, len2, len3; | ||
| 892 | { | ||
| 893 | int len = len1 + len2 + len3; | ||
| 894 | unsigned char *buf = (unsigned char *) alloca (len + 1); | ||
| 895 | int bytes; | ||
| 896 | |||
| 897 | bcopy (str1, buf, len1); | ||
| 898 | if (str3) | ||
| 899 | { | ||
| 900 | bcopy (str3, buf + len1, len3); | ||
| 901 | len1 += len3; | ||
| 902 | } | ||
| 903 | bcopy (str2, buf + len1 , len2); | ||
| 904 | buf[len] = 0; | ||
| 905 | PARSE_MULTIBYTE_SEQ (buf, len, bytes); | ||
| 906 | return (bytes <= len1 ? 0 : bytes - len1); | ||
| 907 | } | ||
| 908 | |||
| 882 | /* See if the bytes before POS/POS_BYTE combine with bytes | 909 | /* See if the bytes before POS/POS_BYTE combine with bytes |
| 883 | at the start of STRING to form a single character. | 910 | at the start of STRING to form a single character. |
| 884 | If so, return the number of bytes at the start of STRING | 911 | If so, return the number of bytes at the start of STRING |
| @@ -890,30 +917,44 @@ count_combining_before (string, length, pos, pos_byte) | |||
| 890 | int length; | 917 | int length; |
| 891 | int pos, pos_byte; | 918 | int pos, pos_byte; |
| 892 | { | 919 | { |
| 893 | int opos = pos, opos_byte = pos_byte; | 920 | int len, combining_bytes; |
| 894 | int c; | 921 | unsigned char *p; |
| 895 | unsigned char *p = string; | ||
| 896 | 922 | ||
| 897 | if (NILP (current_buffer->enable_multibyte_characters)) | 923 | if (NILP (current_buffer->enable_multibyte_characters)) |
| 898 | return 0; | 924 | return 0; |
| 899 | if (length == 0 || CHAR_HEAD_P (*string)) | 925 | |
| 926 | /* At first, we can exclude the following cases: | ||
| 927 | (1) STRING[0] can't be a following byte of multibyte sequence. | ||
| 928 | (2) POS is the start of the current buffer. | ||
| 929 | (3) A character before POS is not a multibyte character. */ | ||
| 930 | if (length == 0 || CHAR_HEAD_P (*string)) /* case (1) */ | ||
| 900 | return 0; | 931 | return 0; |
| 901 | if (pos == BEG) | 932 | if (pos_byte == BEG_BYTE) /* case (2) */ |
| 902 | return 0; | 933 | return 0; |
| 903 | c = FETCH_BYTE (pos_byte - 1); | 934 | len = 1; |
| 904 | if (ASCII_BYTE_P (c)) | 935 | p = BYTE_POS_ADDR (pos_byte - 1); |
| 936 | while (! CHAR_HEAD_P (*p)) p--, len++; | ||
| 937 | if (! BASE_LEADING_CODE_P (*p)) /* case (3) */ | ||
| 905 | return 0; | 938 | return 0; |
| 906 | DEC_BOTH (pos, pos_byte); | 939 | |
| 907 | c = FETCH_BYTE (pos_byte); | 940 | /* A sequence of a composite character requires a special handling. */ |
| 908 | if (! BASE_LEADING_CODE_P (c)) | 941 | if (*p == LEADING_CODE_COMPOSITION) |
| 942 | return count_combining_composition (p, len, string, length, NULL, 0); | ||
| 943 | |||
| 944 | combining_bytes = BYTES_BY_CHAR_HEAD (*p) - len; | ||
| 945 | if (combining_bytes <= 0) | ||
| 946 | /* The character preceding POS is, complete and no room for | ||
| 947 | combining bytes (combining_bytes == 0), or an independent 8-bit | ||
| 948 | character (combining_bytes < 0). */ | ||
| 909 | return 0; | 949 | return 0; |
| 910 | 950 | ||
| 911 | /* We have a combination situation. | 951 | /* We have a combination situation. Count the bytes at STRING that |
| 912 | Count the bytes at STRING that will combine. */ | 952 | may combine. */ |
| 953 | p = string + 1; | ||
| 913 | while (!CHAR_HEAD_P (*p) && p < string + length) | 954 | while (!CHAR_HEAD_P (*p) && p < string + length) |
| 914 | p++; | 955 | p++; |
| 915 | 956 | ||
| 916 | return p - string; | 957 | return (combining_bytes < p - string ? combining_bytes : p - string); |
| 917 | } | 958 | } |
| 918 | 959 | ||
| 919 | /* See if the bytes after POS/POS_BYTE combine with bytes | 960 | /* See if the bytes after POS/POS_BYTE combine with bytes |
| @@ -929,12 +970,25 @@ count_combining_after (string, length, pos, pos_byte) | |||
| 929 | { | 970 | { |
| 930 | int opos = pos, opos_byte = pos_byte; | 971 | int opos = pos, opos_byte = pos_byte; |
| 931 | int i; | 972 | int i; |
| 932 | int c; | 973 | int c, bytes; |
| 974 | unsigned char *bufp; | ||
| 933 | 975 | ||
| 934 | if (NILP (current_buffer->enable_multibyte_characters)) | 976 | if (NILP (current_buffer->enable_multibyte_characters)) |
| 935 | return 0; | 977 | return 0; |
| 936 | if (length > 0 && ASCII_BYTE_P (string[length - 1])) | 978 | |
| 979 | /* At first, we can exclude the following cases: | ||
| 980 | (1) The last byte of STRING is an ASCII. | ||
| 981 | (2) POS is the last of the current buffer. | ||
| 982 | (3) A character at POS can't be a following byte of multibyte | ||
| 983 | character. */ | ||
| 984 | if (length > 0 && ASCII_BYTE_P (string[length - 1])) /* case (1) */ | ||
| 985 | return 0; | ||
| 986 | if (pos_byte == Z_BYTE) /* case (2) */ | ||
| 937 | return 0; | 987 | return 0; |
| 988 | bufp = BYTE_POS_ADDR (pos_byte); | ||
| 989 | if (CHAR_HEAD_P (*bufp)) /* case (3) */ | ||
| 990 | return 0; | ||
| 991 | |||
| 938 | i = length - 1; | 992 | i = length - 1; |
| 939 | while (i >= 0 && ! CHAR_HEAD_P (string[i])) | 993 | while (i >= 0 && ! CHAR_HEAD_P (string[i])) |
| 940 | { | 994 | { |
| @@ -942,33 +996,37 @@ count_combining_after (string, length, pos, pos_byte) | |||
| 942 | } | 996 | } |
| 943 | if (i < 0) | 997 | if (i < 0) |
| 944 | { | 998 | { |
| 945 | /* All characters in `string' are not character head. | 999 | /* All characters in STRING are not character head. We must |
| 946 | We must check also preceding bytes at POS. | 1000 | check also preceding bytes at POS. We are sure that the gap |
| 947 | We are sure that the gap is at POS. */ | 1001 | is at POS. */ |
| 948 | string = BEG_ADDR; | 1002 | unsigned char *p = BEG_ADDR; |
| 949 | i = pos_byte - 2; | 1003 | i = pos_byte - 2; |
| 950 | while (i >= 0 && ! CHAR_HEAD_P (string[i])) | 1004 | while (i >= 0 && ! CHAR_HEAD_P (p[i])) |
| 951 | i--; | 1005 | i--; |
| 952 | if (i < 0 || !BASE_LEADING_CODE_P (string[i])) | 1006 | if (i < 0 || !BASE_LEADING_CODE_P (p[i])) |
| 953 | return 0; | 1007 | return 0; |
| 1008 | /* A sequence of a composite character requires a special handling. */ | ||
| 1009 | if (p[i] == LEADING_CODE_COMPOSITION) | ||
| 1010 | return count_combining_composition (p + i, pos_byte - 1 - i, | ||
| 1011 | bufp, Z_BYTE - pos_byte, | ||
| 1012 | string, length); | ||
| 1013 | bytes = BYTES_BY_CHAR_HEAD (p[i]); | ||
| 1014 | return (bytes <= pos_byte - 1 - i + length | ||
| 1015 | ? 0 | ||
| 1016 | : bytes - (pos_byte - 1 - i + length)); | ||
| 954 | } | 1017 | } |
| 955 | else if (!BASE_LEADING_CODE_P (string[i])) | 1018 | if (!BASE_LEADING_CODE_P (string[i])) |
| 956 | return 0; | 1019 | return 0; |
| 1020 | /* A sequence of a composite character requires a special handling. */ | ||
| 1021 | if (string[i] == LEADING_CODE_COMPOSITION) | ||
| 1022 | return count_combining_composition (string + i, length - i, | ||
| 1023 | bufp, Z_BYTE - pos_byte, NULL, 0); | ||
| 957 | 1024 | ||
| 958 | if (pos == Z) | 1025 | bytes = BYTES_BY_CHAR_HEAD (string[i]) - (length - i); |
| 959 | return 0; | 1026 | bufp++, pos_byte++; |
| 960 | c = FETCH_BYTE (pos_byte); | 1027 | while (!CHAR_HEAD_P (*bufp)) bufp++, pos_byte++; |
| 961 | if (CHAR_HEAD_P (c)) | ||
| 962 | return 0; | ||
| 963 | while (pos_byte < Z_BYTE) | ||
| 964 | { | ||
| 965 | c = FETCH_BYTE (pos_byte); | ||
| 966 | if (CHAR_HEAD_P (c)) | ||
| 967 | break; | ||
| 968 | pos_byte++; | ||
| 969 | } | ||
| 970 | 1028 | ||
| 971 | return pos_byte - opos_byte; | 1029 | return (bytes <= pos_byte - opos_byte ? bytes : pos_byte - opos_byte); |
| 972 | } | 1030 | } |
| 973 | 1031 | ||
| 974 | /* Adjust the position TARGET/TARGET_BYTE for the combining of NBYTES | 1032 | /* Adjust the position TARGET/TARGET_BYTE for the combining of NBYTES |