Minor fix to recent coding.c change

* src/coding.c (get_buffer_gap_address): Don’t assume string or buffer length fits in int. Also, improve wording of comments.
author: Paul Eggert 2019-08-04 09:18:46 -0700
committer: Paul Eggert 2019-08-04 09:19:38 -0700
commit: 3c459e3b05e699736b849cb2c4687aef3ce6810b (patch)
tree: 99c05f435066d9c69044a51e7a3027159f3d1cb6 /src/coding.c
parent: 1b20993baaeffa5aa69b282862b5066960604aab (diff)
download: emacs-3c459e3b05e699736b849cb2c4687aef3ce6810b.tar.gz
emacs-3c459e3b05e699736b849cb2c4687aef3ce6810b.zip
1 files changed, 60 insertions, 63 deletions
diff --git a/src/coding.c b/src/coding.c
index ab0e15119f3..877177b1882 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -9520,7 +9520,7 @@ code_convert_string_norecord (Lisp_Object string, Lisp_Object coding_system,
   NBYTES, enlarge the gap in advance.  */
 static unsigned char *
-get_buffer_gap_address (Lisp_Object buffer, int nbytes)
+get_buffer_gap_address (Lisp_Object buffer, ptrdiff_t nbytes)
 {
  struct buffer *buf = XBUFFER (buffer);
@@ -9546,9 +9546,9 @@ get_buffer_gap_address (Lisp_Object buffer, int nbytes)
 static unsigned char *
 get_char_bytes (int c, int *len)
 {
-  /* We uses two chaches considering the situation that
+  /* Use two caches, since encode/decode_string_utf_8 are called
-     encode/decode_string_utf_8 are called repeatedly with the same
+     repeatedly with the same values for HANDLE_8_BIT and
-     values for HANDLE_8_BIT and HANDLE_OVER_UNI arguments.  */
+     HANDLE_OVER_UNI arguments.  */
  static int chars[2];
  static unsigned char bytes[2][6];
  static int nbytes[2];
@@ -9572,55 +9572,51 @@ get_char_bytes (int c, int *len)
 /* Encode STRING by the coding system utf-8-unix.
-   Even if :pre-write-conversion and :encode-translation-table
+   Ignore any :pre-write-conversion and :encode-translation-table
-   properties are put to that coding system, they are ignored.
+   properties of that coding system.
-   It ignores :pre-write-conversion and :encode-translation-table
+   Assume that arguments have values as described below.
-   propeties of that coding system.
+   The validity must be assured by callers.
-   This function assumes that arguments have values as described
-   below.  The validity must be assured by callers.
   STRING is a multibyte string or an ASCII-only unibyte string.
   BUFFER is a unibyte buffer or Qnil.
-   If BUFFER is a unibyte buffer, the encoding result of UTF-8
+   If BUFFER is a unibyte buffer, insert the encoded result
-   sequence is inserted after point of the buffer, and the number of
+   after point of the buffer, and return the number of
-   inserted characters is returned.  Note that a caller should have
+   inserted characters.  The caller should have made BUFFER ready for
-   made BUFFER ready for modifying in advance (e.g. by calling
+   modifying in advance (e.g., by calling invalidate_buffer_caches).
-   invalidate_buffer_caches).
-   If BUFFER is Qnil, a unibyte string is made from the encodnig
+   If BUFFER is Qnil, return a unibyte string from the encoded result.
-   result of UTF-8 sequence, and it is returned.  If NOCOPY and STRING
+   If NOCOPY, and if STRING contains only Unicode characters (i.e.,
-   contains only Unicode characters (i.e. the encoding does not change
+   the encoding does not change the byte sequence), return STRING even
-   the byte sequence), STRING is returned even if it is multibyte.
+   if it is multibyte.
-   HANDLE-8-BIT and HANDE-OVER-UNI specify how to handle a non-Unicode
+   HANDLE-8-BIT and HANDLE-OVER-UNI specify how to handle a non-Unicode
   character.  The former is for an eight-bit character (represented
-   by 2-byte overlong sequence in multibyte STRING).  The latter is
+   by a 2-byte overlong sequence in a multibyte STRING).  The latter is
-   for an over-unicode character (a character whose code is greater
+   for an over-Unicode character (a character whose code is greater
-   than the maximum Unicode character 0x10FFFF, and is represented by
+   than the maximum Unicode character 0x10FFFF, represented by a 4 or
-   4 or 5-byte sequence in multibyte STRING).
+   5-byte sequence in a multibyte STRING).
-   If they are unibyte strings (typically "\357\277\275"; UTF-8
+   If these two arguments are unibyte strings (typically
-   sequence for the Unicode REPLACEMENT CHARACTER #xFFFD), a
+   "\357\277\275", the UTF-8 sequence for the Unicode REPLACEMENT
-   non-Unicode character is encoded into that sequence.
+   CHARACTER #xFFFD), encode a non-Unicode character into that
+   unibyte sequence.
-   If they are characters, a non-Unicode chracters is encoded into the
+   If the two arguments are characters, encode a non-Unicode
-   corresponding UTF-8 sequences.
+   character as if it was the argument.
-   If they are Qignored, a non-Unicode character is skipped on
+   If they are Qignored, skip a non-Unicode character.
-   encoding.
-   If HANDLE-8-BIT is Qt, an eight-bit character is encoded into one
+   If HANDLE-8-BIT is Qt, encode an eight-bit character into one
   byte of the same value.
-   If HANDLE-OVER-UNI is Qt, an over-unicode character is encoded
+   If HANDLE-OVER-UNI is Qt, encode an over-unicode character
   into the the same 4 or 5-byte sequence.
-   If they are Qnil, Qnil is returned if STRING has a non-Unicode
+   If the two arguments are Qnil, return Qnil if STRING has a
-   character. */
+   non-Unicode character.  */
 Lisp_Object
 encode_string_utf_8 (Lisp_Object string, Lisp_Object buffer,
@@ -9633,7 +9629,7 @@ encode_string_utf_8 (Lisp_Object string, Lisp_Object buffer,
    return string;
  ptrdiff_t num_8_bit = 0;   /* number of eight-bit chars in STRING */
-  /* The following two vars are counted only if handle_over_uni is not Qt */
+  /* The following two vars are counted only if handle_over_uni is not Qt.  */
  ptrdiff_t num_over_4 = 0; /* number of 4-byte non-Unicode chars in STRING */
  ptrdiff_t num_over_5 = 0; /* number of 5-byte non-Unicode chars in STRING */
  ptrdiff_t outbytes;        /* number of bytes of decoding result. */
@@ -9828,25 +9824,23 @@ encode_string_utf_8 (Lisp_Object string, Lisp_Object buffer,
 /* Decode STRING by the coding system utf-8-unix.
-   Even if :post-read-conversion and :decode-translation-table
+   Ignore any :pre-write-conversion and :encode-translation-table
-   properties are put to that coding system, they are ignored.
+   properties of that coding system.
-   This function assumes that arguments have values as described
+   Assumes that arguments have values as described below.
-   below.  The validity must be assured by callers.
+   The validity must be assured by callers.
   STRING is a unibyte string or an ASCII-only multibyte string.
   BUFFER is a multibyte buffer or Qnil.
-   If BUFFER is a multibyte buffer, the decoding result of Unicode
+   If BUFFER is a multibyte buffer, insert the decoding result of
-   characters are inserted after point of the buffer, and the number
+   Unicode characters after point of the buffer, and return the number
-   of inserted characters is returned.  Note that a caller should have
+   of inserted characters.  The caller should have made BUFFER ready
-   made BUFFER ready for modifying in advance (e.g. by calling
+   for modifying in advance (e.g., by calling invalidate_buffer_caches).
-   invalidate_buffer_caches).
-   If BUFFER is Qnil, a multibyte string is made from the decoding
+   If BUFFER is Qnil, return a multibyte string from the decoded result.
-   result of Unicode characters, and it is returned.  As a special
+   As a special case, return STRING itself in the following cases:
-   case, STRING itself is returned in the following cases:
   1. STRING contains only ASCII characters.
   2. NOCOPY, and STRING contains only valid UTF-8 sequences.
@@ -9858,24 +9852,26 @@ encode_string_utf_8 (Lisp_Object string, Lisp_Object buffer,
   than #x10FFFF).  Note that this function does not treat an overlong
   UTF-8 sequence as invalid.
-   If they are strings (typically 1-char string of the Unicode
+   If these two arguments are strings (typically a 1-char string of
-   REPLACEMENT CHARACTER #xFFFD), an invalid sequence is decoded into
+   the Unicode REPLACEMENT CHARACTER #xFFFD), decode an invalid byte
-   that string.  They must be multibyte strings if they contain a
+   sequence into that string.  They must be multibyte strings if they
-   non-ASCII character.
+   contain a non-ASCII character.
-   If they are characters, an invalid sequence is decoded into the
+   If the two arguments are characters, decode an invalid byte
-   corresponding multibyte representation of the characters.
+   sequence into the corresponding multibyte representation of the
+   characters.
-   If they are Qignored, an invalid sequence is skipped on decoding.
+   If they are Qignored, skip an invalid byte sequence.
-   If HANDLE-8-BIT is Qt, an 1-byte invalid sequence is deoded into
+   If HANDLE-8-BIT is Qt, decode a 1-byte invalid sequence into
   the corresponding eight-bit character.
-   If HANDLE-OVER-UNI is Qt, a 4 or 5-byte invalid sequence that
+   If HANDLE-OVER-UNI is Qt, decode a 4 or 5-byte invalid sequence
-   follows Emacs' representation for an over-unicode character is
+   that follows Emacs' representation for an over-unicode character
-   decoded into the corresponding character.
+   into the corresponding character.
-   If they are Qnil, Qnil is returned if STRING has an invalid sequence.  */
+   If the two arguments are Qnil, return Qnil if STRING has an invalid
+   sequence.  */
 Lisp_Object
 decode_string_utf_8 (Lisp_Object string, Lisp_Object buffer,
@@ -9883,7 +9879,7 @@ decode_string_utf_8 (Lisp_Object string, Lisp_Object buffer,
                     Lisp_Object handle_over_uni)
 {
  /* This is like BYTES_BY_CHAR_HEAD, but it is assured that C >= 0x80
-     and it returns 0 for invalid sequence.  */
+     and it returns 0 for an invalid sequence.  */
 #define UTF_8_SEQUENCE_LENGTH(c)        \
  ((c) < 0xC2 ? 0                       \
   : (c) < 0xE0 ? 2                     \
@@ -9924,7 +9920,8 @@ decode_string_utf_8 (Lisp_Object string, Lisp_Object buffer,
                  && (len == 3
                      || (UTF_8_EXTRA_OCTET_P (p[3])
                          && len == 4
-                          && string_char (p, NULL, NULL) <= MAX_UNICODE_CHAR)))))
+                          && (string_char (p, NULL, NULL)
+                              <= MAX_UNICODE_CHAR))))))
        {
          p += len;
          continue;
author	Paul Eggert	2019-08-04 09:18:46 -0700
committer	Paul Eggert	2019-08-04 09:19:38 -0700
commit	3c459e3b05e699736b849cb2c4687aef3ce6810b (patch)
tree	99c05f435066d9c69044a51e7a3027159f3d1cb6 /src/coding.c
parent	1b20993baaeffa5aa69b282862b5066960604aab (diff)
download	emacs-3c459e3b05e699736b849cb2c4687aef3ce6810b.tar.gz emacs-3c459e3b05e699736b849cb2c4687aef3ce6810b.zip

diff --git a/src/coding.c b/src/coding.c index ab0e15119f3..877177b1882 100644 --- a/src/coding.c +++ b/src/coding.c
@@ -9520,7 +9520,7 @@ code_convert_string_norecord (Lisp_Object string, Lisp_Object coding_system,
9520	NBYTES, enlarge the gap in advance. */	9520	NBYTES, enlarge the gap in advance. */
9521		9521
9522	static unsigned char *	9522	static unsigned char *
9523	get_buffer_gap_address (Lisp_Object buffer, int nbytes)	9523	get_buffer_gap_address (Lisp_Object buffer, ptrdiff_t nbytes)
9524	{	9524	{
9525	struct buffer *buf = XBUFFER (buffer);	9525	struct buffer *buf = XBUFFER (buffer);
9526		9526
@@ -9546,9 +9546,9 @@ get_buffer_gap_address (Lisp_Object buffer, int nbytes)
9546	static unsigned char *	9546	static unsigned char *
9547	get_char_bytes (int c, int *len)	9547	get_char_bytes (int c, int *len)
9548	{	9548	{
9549	/* We uses two chaches considering the situation that	9549	/* Use two caches, since encode/decode_string_utf_8 are called
9550	encode/decode_string_utf_8 are called repeatedly with the same	9550	repeatedly with the same values for HANDLE_8_BIT and
9551	values for HANDLE_8_BIT and HANDLE_OVER_UNI arguments. */	9551	HANDLE_OVER_UNI arguments. */
9552	static int chars[2];	9552	static int chars[2];
9553	static unsigned char bytes[2][6];	9553	static unsigned char bytes[2][6];
9554	static int nbytes[2];	9554	static int nbytes[2];
@@ -9572,55 +9572,51 @@ get_char_bytes (int c, int *len)
9572		9572
9573	/* Encode STRING by the coding system utf-8-unix.	9573	/* Encode STRING by the coding system utf-8-unix.
9574		9574
9575	Even if :pre-write-conversion and :encode-translation-table	9575	Ignore any :pre-write-conversion and :encode-translation-table
9576	properties are put to that coding system, they are ignored.	9576	properties of that coding system.
9577		9577
9578	It ignores :pre-write-conversion and :encode-translation-table	9578	Assume that arguments have values as described below.
9579	propeties of that coding system.	9579	The validity must be assured by callers.
9580
9581	This function assumes that arguments have values as described
9582	below. The validity must be assured by callers.
9583		9580
9584	STRING is a multibyte string or an ASCII-only unibyte string.	9581	STRING is a multibyte string or an ASCII-only unibyte string.
9585		9582
9586	BUFFER is a unibyte buffer or Qnil.	9583	BUFFER is a unibyte buffer or Qnil.
9587		9584
9588	If BUFFER is a unibyte buffer, the encoding result of UTF-8	9585	If BUFFER is a unibyte buffer, insert the encoded result
9589	sequence is inserted after point of the buffer, and the number of	9586	after point of the buffer, and return the number of
9590	inserted characters is returned. Note that a caller should have	9587	inserted characters. The caller should have made BUFFER ready for
9591	made BUFFER ready for modifying in advance (e.g. by calling	9588	modifying in advance (e.g., by calling invalidate_buffer_caches).
9592	invalidate_buffer_caches).
9593		9589
9594	If BUFFER is Qnil, a unibyte string is made from the encodnig	9590	If BUFFER is Qnil, return a unibyte string from the encoded result.
9595	result of UTF-8 sequence, and it is returned. If NOCOPY and STRING	9591	If NOCOPY, and if STRING contains only Unicode characters (i.e.,
9596	contains only Unicode characters (i.e. the encoding does not change	9592	the encoding does not change the byte sequence), return STRING even
9597	the byte sequence), STRING is returned even if it is multibyte.	9593	if it is multibyte.
9598		9594
9599	HANDLE-8-BIT and HANDE-OVER-UNI specify how to handle a non-Unicode	9595	HANDLE-8-BIT and HANDLE-OVER-UNI specify how to handle a non-Unicode
9600	character. The former is for an eight-bit character (represented	9596	character. The former is for an eight-bit character (represented
9601	by 2-byte overlong sequence in multibyte STRING). The latter is	9597	by a 2-byte overlong sequence in a multibyte STRING). The latter is
9602	for an over-unicode character (a character whose code is greater	9598	for an over-Unicode character (a character whose code is greater
9603	than the maximum Unicode character 0x10FFFF, and is represented by	9599	than the maximum Unicode character 0x10FFFF, represented by a 4 or
9604	4 or 5-byte sequence in multibyte STRING).	9600	5-byte sequence in a multibyte STRING).
9605		9601
9606	If they are unibyte strings (typically "\357\277\275"; UTF-8	9602	If these two arguments are unibyte strings (typically
9607	sequence for the Unicode REPLACEMENT CHARACTER #xFFFD), a	9603	"\357\277\275", the UTF-8 sequence for the Unicode REPLACEMENT
9608	non-Unicode character is encoded into that sequence.	9604	CHARACTER #xFFFD), encode a non-Unicode character into that
		9605	unibyte sequence.
9609		9606
9610	If they are characters, a non-Unicode chracters is encoded into the	9607	If the two arguments are characters, encode a non-Unicode
9611	corresponding UTF-8 sequences.	9608	character as if it was the argument.
9612		9609
9613	If they are Qignored, a non-Unicode character is skipped on	9610	If they are Qignored, skip a non-Unicode character.
9614	encoding.
9615		9611
9616	If HANDLE-8-BIT is Qt, an eight-bit character is encoded into one	9612	If HANDLE-8-BIT is Qt, encode an eight-bit character into one
9617	byte of the same value.	9613	byte of the same value.
9618		9614
9619	If HANDLE-OVER-UNI is Qt, an over-unicode character is encoded	9615	If HANDLE-OVER-UNI is Qt, encode an over-unicode character
9620	into the the same 4 or 5-byte sequence.	9616	into the the same 4 or 5-byte sequence.
9621		9617
9622	If they are Qnil, Qnil is returned if STRING has a non-Unicode	9618	If the two arguments are Qnil, return Qnil if STRING has a
9623	character. */	9619	non-Unicode character. */
9624		9620
9625	Lisp_Object	9621	Lisp_Object
9626	encode_string_utf_8 (Lisp_Object string, Lisp_Object buffer,	9622	encode_string_utf_8 (Lisp_Object string, Lisp_Object buffer,
@@ -9633,7 +9629,7 @@ encode_string_utf_8 (Lisp_Object string, Lisp_Object buffer,
9633	return string;	9629	return string;
9634		9630
9635	ptrdiff_t num_8_bit = 0; /* number of eight-bit chars in STRING */	9631	ptrdiff_t num_8_bit = 0; /* number of eight-bit chars in STRING */
9636	/* The following two vars are counted only if handle_over_uni is not Qt */	9632	/* The following two vars are counted only if handle_over_uni is not Qt. */
9637	ptrdiff_t num_over_4 = 0; /* number of 4-byte non-Unicode chars in STRING */	9633	ptrdiff_t num_over_4 = 0; /* number of 4-byte non-Unicode chars in STRING */
9638	ptrdiff_t num_over_5 = 0; /* number of 5-byte non-Unicode chars in STRING */	9634	ptrdiff_t num_over_5 = 0; /* number of 5-byte non-Unicode chars in STRING */
9639	ptrdiff_t outbytes; /* number of bytes of decoding result. */	9635	ptrdiff_t outbytes; /* number of bytes of decoding result. */
@@ -9828,25 +9824,23 @@ encode_string_utf_8 (Lisp_Object string, Lisp_Object buffer,
9828		9824
9829	/* Decode STRING by the coding system utf-8-unix.	9825	/* Decode STRING by the coding system utf-8-unix.
9830		9826
9831	Even if :post-read-conversion and :decode-translation-table	9827	Ignore any :pre-write-conversion and :encode-translation-table
9832	properties are put to that coding system, they are ignored.	9828	properties of that coding system.
9833		9829
9834	This function assumes that arguments have values as described	9830	Assumes that arguments have values as described below.
9835	below. The validity must be assured by callers.	9831	The validity must be assured by callers.
9836		9832
9837	STRING is a unibyte string or an ASCII-only multibyte string.	9833	STRING is a unibyte string or an ASCII-only multibyte string.
9838		9834
9839	BUFFER is a multibyte buffer or Qnil.	9835	BUFFER is a multibyte buffer or Qnil.
9840		9836
9841	If BUFFER is a multibyte buffer, the decoding result of Unicode	9837	If BUFFER is a multibyte buffer, insert the decoding result of
9842	characters are inserted after point of the buffer, and the number	9838	Unicode characters after point of the buffer, and return the number
9843	of inserted characters is returned. Note that a caller should have	9839	of inserted characters. The caller should have made BUFFER ready
9844	made BUFFER ready for modifying in advance (e.g. by calling	9840	for modifying in advance (e.g., by calling invalidate_buffer_caches).
9845	invalidate_buffer_caches).
9846		9841
9847	If BUFFER is Qnil, a multibyte string is made from the decoding	9842	If BUFFER is Qnil, return a multibyte string from the decoded result.
9848	result of Unicode characters, and it is returned. As a special	9843	As a special case, return STRING itself in the following cases:
9849	case, STRING itself is returned in the following cases:
9850	1. STRING contains only ASCII characters.	9844	1. STRING contains only ASCII characters.
9851	2. NOCOPY, and STRING contains only valid UTF-8 sequences.	9845	2. NOCOPY, and STRING contains only valid UTF-8 sequences.
9852		9846
@@ -9858,24 +9852,26 @@ encode_string_utf_8 (Lisp_Object string, Lisp_Object buffer,
9858	than #x10FFFF). Note that this function does not treat an overlong	9852	than #x10FFFF). Note that this function does not treat an overlong
9859	UTF-8 sequence as invalid.	9853	UTF-8 sequence as invalid.
9860		9854
9861	If they are strings (typically 1-char string of the Unicode	9855	If these two arguments are strings (typically a 1-char string of
9862	REPLACEMENT CHARACTER #xFFFD), an invalid sequence is decoded into	9856	the Unicode REPLACEMENT CHARACTER #xFFFD), decode an invalid byte
9863	that string. They must be multibyte strings if they contain a	9857	sequence into that string. They must be multibyte strings if they
9864	non-ASCII character.	9858	contain a non-ASCII character.
9865		9859
9866	If they are characters, an invalid sequence is decoded into the	9860	If the two arguments are characters, decode an invalid byte
9867	corresponding multibyte representation of the characters.	9861	sequence into the corresponding multibyte representation of the
		9862	characters.
9868		9863
9869	If they are Qignored, an invalid sequence is skipped on decoding.	9864	If they are Qignored, skip an invalid byte sequence.
9870		9865
9871	If HANDLE-8-BIT is Qt, an 1-byte invalid sequence is deoded into	9866	If HANDLE-8-BIT is Qt, decode a 1-byte invalid sequence into
9872	the corresponding eight-bit character.	9867	the corresponding eight-bit character.
9873		9868
9874	If HANDLE-OVER-UNI is Qt, a 4 or 5-byte invalid sequence that	9869	If HANDLE-OVER-UNI is Qt, decode a 4 or 5-byte invalid sequence
9875	follows Emacs' representation for an over-unicode character is	9870	that follows Emacs' representation for an over-unicode character
9876	decoded into the corresponding character.	9871	into the corresponding character.
9877		9872
9878	If they are Qnil, Qnil is returned if STRING has an invalid sequence. */	9873	If the two arguments are Qnil, return Qnil if STRING has an invalid
		9874	sequence. */
9879		9875
9880	Lisp_Object	9876	Lisp_Object
9881	decode_string_utf_8 (Lisp_Object string, Lisp_Object buffer,	9877	decode_string_utf_8 (Lisp_Object string, Lisp_Object buffer,
@@ -9883,7 +9879,7 @@ decode_string_utf_8 (Lisp_Object string, Lisp_Object buffer,
9883	Lisp_Object handle_over_uni)	9879	Lisp_Object handle_over_uni)
9884	{	9880	{
9885	/* This is like BYTES_BY_CHAR_HEAD, but it is assured that C >= 0x80	9881	/* This is like BYTES_BY_CHAR_HEAD, but it is assured that C >= 0x80
9886	and it returns 0 for invalid sequence. */	9882	and it returns 0 for an invalid sequence. */
9887	#define UTF_8_SEQUENCE_LENGTH(c) \	9883	#define UTF_8_SEQUENCE_LENGTH(c) \
9888	((c) < 0xC2 ? 0 \	9884	((c) < 0xC2 ? 0 \
9889	: (c) < 0xE0 ? 2 \	9885	: (c) < 0xE0 ? 2 \
@@ -9924,7 +9920,8 @@ decode_string_utf_8 (Lisp_Object string, Lisp_Object buffer,
9924	&& (len == 3	9920	&& (len == 3
9925	\|\| (UTF_8_EXTRA_OCTET_P (p[3])	9921	\|\| (UTF_8_EXTRA_OCTET_P (p[3])
9926	&& len == 4	9922	&& len == 4
9927	&& string_char (p, NULL, NULL) <= MAX_UNICODE_CHAR)))))	9923	&& (string_char (p, NULL, NULL)
		9924	<= MAX_UNICODE_CHAR))))))
9928	{	9925	{
9929	p += len;	9926	p += len;
9930	continue;	9927	continue;