(count_combining_composition): New function.

(count_combining_before): Adjust the way to check byte-combining possibility for the new handling of multibyte sequence. Call count_combining_composition for a composite character. (count_combining_after): Likewise.
author: Kenichi Handa 1999-09-03 01:28:42 +0000
committer: Kenichi Handa 1999-09-03 01:28:42 +0000
commit: cae184f227ba2607c4ea9ff7b23a3d7ebb0779c4 (patch)
tree: 8873fc5800d66af643973b1b23578701ed994de6
parent: 1be6387dbd866b33f74edd9389861797dad02c92 (diff)
download: emacs-cae184f227ba2607c4ea9ff7b23a3d7ebb0779c4.tar.gz
emacs-cae184f227ba2607c4ea9ff7b23a3d7ebb0779c4.zip
1 files changed, 93 insertions, 35 deletions
diff --git a/src/insdel.c b/src/insdel.c
index e73313c3730..738d55909ed 100644
--- a/src/insdel.c
+++ b/src/insdel.c
@@ -879,6 +879,33 @@ insert_1 (string, nbytes, inherit, prepare, before_markers)
                 inherit, prepare, before_markers);
 }
+/* See if the byte sequence at STR1 of length LEN1 combine with the
+   byte sequence at STR2 of length LEN2 to form a single composite
+   character.  If so, return the number of bytes at the start of STR2
+   which combine in this way.  Otherwise, return 0.  If STR3 is not
+   NULL, it is a byte sequence of length LEN3 to be appended to STR1
+   before checking the combining.  */
+int
+count_combining_composition (str1, len1, str2, len2, str3, len3)
+     unsigned char *str1, *str2, *str3;
+     int len1, len2, len3;
+{
+  int len = len1 + len2 + len3;
+  unsigned char *buf = (unsigned char *) alloca (len + 1);
+  int bytes;
+  bcopy (str1, buf, len1);
+  if (str3)
+    {
+      bcopy (str3, buf + len1, len3);
+      len1 += len3;
+    }
+  bcopy (str2, buf + len1 , len2);
+  buf[len] = 0;
+  PARSE_MULTIBYTE_SEQ (buf, len, bytes);
+  return (bytes <= len1 ? 0 : bytes - len1);
+}
 /* See if the bytes before POS/POS_BYTE combine with bytes
   at the start of STRING to form a single character.
   If so, return the number of bytes at the start of STRING
@@ -890,30 +917,44 @@ count_combining_before (string, length, pos, pos_byte)
     int length;
     int pos, pos_byte;
 {
-  int opos = pos, opos_byte = pos_byte;
+  int len, combining_bytes;
-  int c;
+  unsigned char *p;
-  unsigned char *p = string;
  if (NILP (current_buffer->enable_multibyte_characters))
    return 0;
-  if (length == 0 || CHAR_HEAD_P (*string))
+  /* At first, we can exclude the following cases:
+        (1) STRING[0] can't be a following byte of multibyte sequence.
+        (2) POS is the start of the current buffer.
+        (3) A character before POS is not a multibyte character.  */
+  if (length == 0 || CHAR_HEAD_P (*string)) /* case (1) */
    return 0;
-  if (pos == BEG)
+  if (pos_byte == BEG_BYTE)     /* case (2) */
    return 0;
-  c = FETCH_BYTE (pos_byte - 1);
+  len = 1;
-  if (ASCII_BYTE_P (c))
+  p = BYTE_POS_ADDR (pos_byte - 1);
+  while (! CHAR_HEAD_P (*p)) p--, len++;
+  if (! BASE_LEADING_CODE_P (*p)) /* case (3) */
    return 0;
-  DEC_BOTH (pos, pos_byte);
-  c = FETCH_BYTE (pos_byte);
+  /* A sequence of a composite character requires a special handling.  */
-  if (! BASE_LEADING_CODE_P (c))
+  if (*p == LEADING_CODE_COMPOSITION)
+    return count_combining_composition (p, len, string, length, NULL, 0);
+  combining_bytes = BYTES_BY_CHAR_HEAD (*p) - len;
+  if (combining_bytes <= 0)
+    /* The character preceding POS is, complete and no room for
+       combining bytes (combining_bytes == 0), or an independent 8-bit
+       character (combining_bytes < 0).  */
    return 0;
-  /* We have a combination situation.
+  /* We have a combination situation.  Count the bytes at STRING that
-     Count the bytes at STRING that will combine.  */
+     may combine.  */
+  p = string + 1;
  while (!CHAR_HEAD_P (*p) && p < string + length)
    p++;
-  return p - string;
+  return (combining_bytes < p - string ? combining_bytes : p - string);
 }
 /* See if the bytes after POS/POS_BYTE combine with bytes
@@ -929,12 +970,25 @@ count_combining_after (string, length, pos, pos_byte)
 {
  int opos = pos, opos_byte = pos_byte;
  int i;
-  int c;
+  int c, bytes;
+  unsigned char *bufp;
  if (NILP (current_buffer->enable_multibyte_characters))
    return 0;
-  if (length > 0 && ASCII_BYTE_P (string[length - 1]))
+  /* At first, we can exclude the following cases:
+        (1) The last byte of STRING is an ASCII.
+        (2) POS is the last of the current buffer.
+        (3) A character at POS can't be a following byte of multibyte
+            character.  */
+  if (length > 0 && ASCII_BYTE_P (string[length - 1])) /* case (1) */
+    return 0;
+  if (pos_byte == Z_BYTE)       /* case (2) */
    return 0;
+  bufp = BYTE_POS_ADDR (pos_byte);
+  if (CHAR_HEAD_P (*bufp))      /* case (3) */
+    return 0;
  i = length - 1;
  while (i >= 0 && ! CHAR_HEAD_P (string[i]))
    {
@@ -942,33 +996,37 @@ count_combining_after (string, length, pos, pos_byte)
    }
  if (i < 0)
    {
-      /* All characters in `string' are not character head.
+      /* All characters in STRING are not character head.  We must
-         We must check also preceding bytes at POS.
+         check also preceding bytes at POS.  We are sure that the gap
-         We are sure that the gap is at POS.  */
+         is at POS.  */
-      string = BEG_ADDR;
+      unsigned char *p = BEG_ADDR;
      i = pos_byte - 2;
-      while (i >= 0 && ! CHAR_HEAD_P (string[i]))
+      while (i >= 0 && ! CHAR_HEAD_P (p[i]))
        i--;
-      if (i < 0 || !BASE_LEADING_CODE_P (string[i]))
+      if (i < 0 || !BASE_LEADING_CODE_P (p[i]))
        return 0;
+      /* A sequence of a composite character requires a special handling.  */
+      if (p[i] == LEADING_CODE_COMPOSITION)
+        return count_combining_composition (p + i, pos_byte - 1 - i,
+                                            bufp, Z_BYTE - pos_byte,
+                                            string, length);
+      bytes = BYTES_BY_CHAR_HEAD (p[i]);
+      return (bytes <= pos_byte - 1 - i + length
+              ? 0
+              : bytes - (pos_byte - 1 - i + length));
    }
-  else if (!BASE_LEADING_CODE_P (string[i]))
+  if (!BASE_LEADING_CODE_P (string[i]))
    return 0;
+  /* A sequence of a composite character requires a special handling.  */
+  if (string[i] == LEADING_CODE_COMPOSITION)
+    return count_combining_composition (string + i, length - i,
+                                        bufp, Z_BYTE - pos_byte, NULL, 0);
-  if (pos == Z)
+  bytes = BYTES_BY_CHAR_HEAD (string[i]) - (length - i);
-    return 0;
+  bufp++, pos_byte++;
-  c = FETCH_BYTE (pos_byte);
+  while (!CHAR_HEAD_P (*bufp)) bufp++, pos_byte++;
-  if (CHAR_HEAD_P (c))
-    return 0;
-  while (pos_byte < Z_BYTE)
-    {
-      c = FETCH_BYTE (pos_byte);
-      if (CHAR_HEAD_P (c))
-        break;
-      pos_byte++;
-    }
-  return pos_byte - opos_byte;
+  return (bytes <= pos_byte - opos_byte ? bytes : pos_byte - opos_byte);
 }
 /* Adjust the position TARGET/TARGET_BYTE for the combining of NBYTES
author	Kenichi Handa	1999-09-03 01:28:42 +0000
committer	Kenichi Handa	1999-09-03 01:28:42 +0000
commit	cae184f227ba2607c4ea9ff7b23a3d7ebb0779c4 (patch)
tree	8873fc5800d66af643973b1b23578701ed994de6
parent	1be6387dbd866b33f74edd9389861797dad02c92 (diff)
download	emacs-cae184f227ba2607c4ea9ff7b23a3d7ebb0779c4.tar.gz emacs-cae184f227ba2607c4ea9ff7b23a3d7ebb0779c4.zip

diff --git a/src/insdel.c b/src/insdel.c index e73313c3730..738d55909ed 100644 --- a/src/insdel.c +++ b/src/insdel.c
@@ -879,6 +879,33 @@ insert_1 (string, nbytes, inherit, prepare, before_markers)
879	inherit, prepare, before_markers);	879	inherit, prepare, before_markers);
880	}	880	}
881		881
		882	/* See if the byte sequence at STR1 of length LEN1 combine with the
		883	byte sequence at STR2 of length LEN2 to form a single composite
		884	character. If so, return the number of bytes at the start of STR2
		885	which combine in this way. Otherwise, return 0. If STR3 is not
		886	NULL, it is a byte sequence of length LEN3 to be appended to STR1
		887	before checking the combining. */
		888	int
		889	count_combining_composition (str1, len1, str2, len2, str3, len3)
		890	unsigned char str1, str2, *str3;
		891	int len1, len2, len3;
		892	{
		893	int len = len1 + len2 + len3;
		894	unsigned char buf = (unsigned char ) alloca (len + 1);
		895	int bytes;
		896
		897	bcopy (str1, buf, len1);
		898	if (str3)
		899	{
		900	bcopy (str3, buf + len1, len3);
		901	len1 += len3;
		902	}
		903	bcopy (str2, buf + len1 , len2);
		904	buf[len] = 0;
		905	PARSE_MULTIBYTE_SEQ (buf, len, bytes);
		906	return (bytes <= len1 ? 0 : bytes - len1);
		907	}
		908
882	/* See if the bytes before POS/POS_BYTE combine with bytes	909	/* See if the bytes before POS/POS_BYTE combine with bytes
883	at the start of STRING to form a single character.	910	at the start of STRING to form a single character.
884	If so, return the number of bytes at the start of STRING	911	If so, return the number of bytes at the start of STRING
@@ -890,30 +917,44 @@ count_combining_before (string, length, pos, pos_byte)
890	int length;	917	int length;
891	int pos, pos_byte;	918	int pos, pos_byte;
892	{	919	{
893	int opos = pos, opos_byte = pos_byte;	920	int len, combining_bytes;
894	int c;	921	unsigned char *p;
895	unsigned char *p = string;
896		922
897	if (NILP (current_buffer->enable_multibyte_characters))	923	if (NILP (current_buffer->enable_multibyte_characters))
898	return 0;	924	return 0;
899	if (length == 0 \|\| CHAR_HEAD_P (*string))	925
		926	/* At first, we can exclude the following cases:
		927	(1) STRING[0] can't be a following byte of multibyte sequence.
		928	(2) POS is the start of the current buffer.
		929	(3) A character before POS is not a multibyte character. */
		930	if (length == 0 \|\| CHAR_HEAD_P (string)) / case (1) */
900	return 0;	931	return 0;
901	if (pos == BEG)	932	if (pos_byte == BEG_BYTE) /* case (2) */
902	return 0;	933	return 0;
903	c = FETCH_BYTE (pos_byte - 1);	934	len = 1;
904	if (ASCII_BYTE_P (c))	935	p = BYTE_POS_ADDR (pos_byte - 1);
		936	while (! CHAR_HEAD_P (*p)) p--, len++;
		937	if (! BASE_LEADING_CODE_P (p)) / case (3) */
905	return 0;	938	return 0;
906	DEC_BOTH (pos, pos_byte);	939
907	c = FETCH_BYTE (pos_byte);	940	/* A sequence of a composite character requires a special handling. */
908	if (! BASE_LEADING_CODE_P (c))	941	if (*p == LEADING_CODE_COMPOSITION)
		942	return count_combining_composition (p, len, string, length, NULL, 0);
		943
		944	combining_bytes = BYTES_BY_CHAR_HEAD (*p) - len;
		945	if (combining_bytes <= 0)
		946	/* The character preceding POS is, complete and no room for
		947	combining bytes (combining_bytes == 0), or an independent 8-bit
		948	character (combining_bytes < 0). */
909	return 0;	949	return 0;
910		950
911	/* We have a combination situation.	951	/* We have a combination situation. Count the bytes at STRING that
912	Count the bytes at STRING that will combine. */	952	may combine. */
		953	p = string + 1;
913	while (!CHAR_HEAD_P (*p) && p < string + length)	954	while (!CHAR_HEAD_P (*p) && p < string + length)
914	p++;	955	p++;
915		956
916	return p - string;	957	return (combining_bytes < p - string ? combining_bytes : p - string);
917	}	958	}
918		959
919	/* See if the bytes after POS/POS_BYTE combine with bytes	960	/* See if the bytes after POS/POS_BYTE combine with bytes
@@ -929,12 +970,25 @@ count_combining_after (string, length, pos, pos_byte)
929	{	970	{
930	int opos = pos, opos_byte = pos_byte;	971	int opos = pos, opos_byte = pos_byte;
931	int i;	972	int i;
932	int c;	973	int c, bytes;
		974	unsigned char *bufp;
933		975
934	if (NILP (current_buffer->enable_multibyte_characters))	976	if (NILP (current_buffer->enable_multibyte_characters))
935	return 0;	977	return 0;
936	if (length > 0 && ASCII_BYTE_P (string[length - 1]))	978
		979	/* At first, we can exclude the following cases:
		980	(1) The last byte of STRING is an ASCII.
		981	(2) POS is the last of the current buffer.
		982	(3) A character at POS can't be a following byte of multibyte
		983	character. */
		984	if (length > 0 && ASCII_BYTE_P (string[length - 1])) /* case (1) */
		985	return 0;
		986	if (pos_byte == Z_BYTE) /* case (2) */
937	return 0;	987	return 0;
		988	bufp = BYTE_POS_ADDR (pos_byte);
		989	if (CHAR_HEAD_P (bufp)) / case (3) */
		990	return 0;
		991
938	i = length - 1;	992	i = length - 1;
939	while (i >= 0 && ! CHAR_HEAD_P (string[i]))	993	while (i >= 0 && ! CHAR_HEAD_P (string[i]))
940	{	994	{
@@ -942,33 +996,37 @@ count_combining_after (string, length, pos, pos_byte)
942	}	996	}
943	if (i < 0)	997	if (i < 0)
944	{	998	{
945	/* All characters in `string' are not character head.	999	/* All characters in STRING are not character head. We must
946	We must check also preceding bytes at POS.	1000	check also preceding bytes at POS. We are sure that the gap
947	We are sure that the gap is at POS. */	1001	is at POS. */
948	string = BEG_ADDR;	1002	unsigned char *p = BEG_ADDR;
949	i = pos_byte - 2;	1003	i = pos_byte - 2;
950	while (i >= 0 && ! CHAR_HEAD_P (string[i]))	1004	while (i >= 0 && ! CHAR_HEAD_P (p[i]))
951	i--;	1005	i--;
952	if (i < 0 \|\| !BASE_LEADING_CODE_P (string[i]))	1006	if (i < 0 \|\| !BASE_LEADING_CODE_P (p[i]))
953	return 0;	1007	return 0;
		1008	/* A sequence of a composite character requires a special handling. */
		1009	if (p[i] == LEADING_CODE_COMPOSITION)
		1010	return count_combining_composition (p + i, pos_byte - 1 - i,
		1011	bufp, Z_BYTE - pos_byte,
		1012	string, length);
		1013	bytes = BYTES_BY_CHAR_HEAD (p[i]);
		1014	return (bytes <= pos_byte - 1 - i + length
		1015	? 0
		1016	: bytes - (pos_byte - 1 - i + length));
954	}	1017	}
955	else if (!BASE_LEADING_CODE_P (string[i]))	1018	if (!BASE_LEADING_CODE_P (string[i]))
956	return 0;	1019	return 0;
		1020	/* A sequence of a composite character requires a special handling. */
		1021	if (string[i] == LEADING_CODE_COMPOSITION)
		1022	return count_combining_composition (string + i, length - i,
		1023	bufp, Z_BYTE - pos_byte, NULL, 0);
957		1024
958	if (pos == Z)	1025	bytes = BYTES_BY_CHAR_HEAD (string[i]) - (length - i);
959	return 0;	1026	bufp++, pos_byte++;
960	c = FETCH_BYTE (pos_byte);	1027	while (!CHAR_HEAD_P (*bufp)) bufp++, pos_byte++;
961	if (CHAR_HEAD_P (c))
962	return 0;
963	while (pos_byte < Z_BYTE)
964	{
965	c = FETCH_BYTE (pos_byte);
966	if (CHAR_HEAD_P (c))
967	break;
968	pos_byte++;
969	}
970		1028
971	return pos_byte - opos_byte;	1029	return (bytes <= pos_byte - opos_byte ? bytes : pos_byte - opos_byte);
972	}	1030	}
973		1031
974	/* Adjust the position TARGET/TARGET_BYTE for the combining of NBYTES	1032	/* Adjust the position TARGET/TARGET_BYTE for the combining of NBYTES