aboutsummaryrefslogtreecommitdiffstats
path: root/src/coding.c
diff options
context:
space:
mode:
authorPaul Eggert2019-08-04 09:18:46 -0700
committerPaul Eggert2019-08-04 09:19:38 -0700
commit3c459e3b05e699736b849cb2c4687aef3ce6810b (patch)
tree99c05f435066d9c69044a51e7a3027159f3d1cb6 /src/coding.c
parent1b20993baaeffa5aa69b282862b5066960604aab (diff)
downloademacs-3c459e3b05e699736b849cb2c4687aef3ce6810b.tar.gz
emacs-3c459e3b05e699736b849cb2c4687aef3ce6810b.zip
Minor fix to recent coding.c change
* src/coding.c (get_buffer_gap_address): Don’t assume string or buffer length fits in int. Also, improve wording of comments.
Diffstat (limited to 'src/coding.c')
-rw-r--r--src/coding.c123
1 files changed, 60 insertions, 63 deletions
diff --git a/src/coding.c b/src/coding.c
index ab0e15119f3..877177b1882 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -9520,7 +9520,7 @@ code_convert_string_norecord (Lisp_Object string, Lisp_Object coding_system,
9520 NBYTES, enlarge the gap in advance. */ 9520 NBYTES, enlarge the gap in advance. */
9521 9521
9522static unsigned char * 9522static unsigned char *
9523get_buffer_gap_address (Lisp_Object buffer, int nbytes) 9523get_buffer_gap_address (Lisp_Object buffer, ptrdiff_t nbytes)
9524{ 9524{
9525 struct buffer *buf = XBUFFER (buffer); 9525 struct buffer *buf = XBUFFER (buffer);
9526 9526
@@ -9546,9 +9546,9 @@ get_buffer_gap_address (Lisp_Object buffer, int nbytes)
9546static unsigned char * 9546static unsigned char *
9547get_char_bytes (int c, int *len) 9547get_char_bytes (int c, int *len)
9548{ 9548{
9549 /* We uses two chaches considering the situation that 9549 /* Use two caches, since encode/decode_string_utf_8 are called
9550 encode/decode_string_utf_8 are called repeatedly with the same 9550 repeatedly with the same values for HANDLE_8_BIT and
9551 values for HANDLE_8_BIT and HANDLE_OVER_UNI arguments. */ 9551 HANDLE_OVER_UNI arguments. */
9552 static int chars[2]; 9552 static int chars[2];
9553 static unsigned char bytes[2][6]; 9553 static unsigned char bytes[2][6];
9554 static int nbytes[2]; 9554 static int nbytes[2];
@@ -9572,55 +9572,51 @@ get_char_bytes (int c, int *len)
9572 9572
9573/* Encode STRING by the coding system utf-8-unix. 9573/* Encode STRING by the coding system utf-8-unix.
9574 9574
9575 Even if :pre-write-conversion and :encode-translation-table 9575 Ignore any :pre-write-conversion and :encode-translation-table
9576 properties are put to that coding system, they are ignored. 9576 properties of that coding system.
9577 9577
9578 It ignores :pre-write-conversion and :encode-translation-table 9578 Assume that arguments have values as described below.
9579 propeties of that coding system. 9579 The validity must be assured by callers.
9580
9581 This function assumes that arguments have values as described
9582 below. The validity must be assured by callers.
9583 9580
9584 STRING is a multibyte string or an ASCII-only unibyte string. 9581 STRING is a multibyte string or an ASCII-only unibyte string.
9585 9582
9586 BUFFER is a unibyte buffer or Qnil. 9583 BUFFER is a unibyte buffer or Qnil.
9587 9584
9588 If BUFFER is a unibyte buffer, the encoding result of UTF-8 9585 If BUFFER is a unibyte buffer, insert the encoded result
9589 sequence is inserted after point of the buffer, and the number of 9586 after point of the buffer, and return the number of
9590 inserted characters is returned. Note that a caller should have 9587 inserted characters. The caller should have made BUFFER ready for
9591 made BUFFER ready for modifying in advance (e.g. by calling 9588 modifying in advance (e.g., by calling invalidate_buffer_caches).
9592 invalidate_buffer_caches).
9593 9589
9594 If BUFFER is Qnil, a unibyte string is made from the encodnig 9590 If BUFFER is Qnil, return a unibyte string from the encoded result.
9595 result of UTF-8 sequence, and it is returned. If NOCOPY and STRING 9591 If NOCOPY, and if STRING contains only Unicode characters (i.e.,
9596 contains only Unicode characters (i.e. the encoding does not change 9592 the encoding does not change the byte sequence), return STRING even
9597 the byte sequence), STRING is returned even if it is multibyte. 9593 if it is multibyte.
9598 9594
9599 HANDLE-8-BIT and HANDE-OVER-UNI specify how to handle a non-Unicode 9595 HANDLE-8-BIT and HANDLE-OVER-UNI specify how to handle a non-Unicode
9600 character. The former is for an eight-bit character (represented 9596 character. The former is for an eight-bit character (represented
9601 by 2-byte overlong sequence in multibyte STRING). The latter is 9597 by a 2-byte overlong sequence in a multibyte STRING). The latter is
9602 for an over-unicode character (a character whose code is greater 9598 for an over-Unicode character (a character whose code is greater
9603 than the maximum Unicode character 0x10FFFF, and is represented by 9599 than the maximum Unicode character 0x10FFFF, represented by a 4 or
9604 4 or 5-byte sequence in multibyte STRING). 9600 5-byte sequence in a multibyte STRING).
9605 9601
9606 If they are unibyte strings (typically "\357\277\275"; UTF-8 9602 If these two arguments are unibyte strings (typically
9607 sequence for the Unicode REPLACEMENT CHARACTER #xFFFD), a 9603 "\357\277\275", the UTF-8 sequence for the Unicode REPLACEMENT
9608 non-Unicode character is encoded into that sequence. 9604 CHARACTER #xFFFD), encode a non-Unicode character into that
9605 unibyte sequence.
9609 9606
9610 If they are characters, a non-Unicode chracters is encoded into the 9607 If the two arguments are characters, encode a non-Unicode
9611 corresponding UTF-8 sequences. 9608 character as if it was the argument.
9612 9609
9613 If they are Qignored, a non-Unicode character is skipped on 9610 If they are Qignored, skip a non-Unicode character.
9614 encoding.
9615 9611
9616 If HANDLE-8-BIT is Qt, an eight-bit character is encoded into one 9612 If HANDLE-8-BIT is Qt, encode an eight-bit character into one
9617 byte of the same value. 9613 byte of the same value.
9618 9614
9619 If HANDLE-OVER-UNI is Qt, an over-unicode character is encoded 9615 If HANDLE-OVER-UNI is Qt, encode an over-unicode character
9620 into the the same 4 or 5-byte sequence. 9616 into the the same 4 or 5-byte sequence.
9621 9617
9622 If they are Qnil, Qnil is returned if STRING has a non-Unicode 9618 If the two arguments are Qnil, return Qnil if STRING has a
9623 character. */ 9619 non-Unicode character. */
9624 9620
9625Lisp_Object 9621Lisp_Object
9626encode_string_utf_8 (Lisp_Object string, Lisp_Object buffer, 9622encode_string_utf_8 (Lisp_Object string, Lisp_Object buffer,
@@ -9633,7 +9629,7 @@ encode_string_utf_8 (Lisp_Object string, Lisp_Object buffer,
9633 return string; 9629 return string;
9634 9630
9635 ptrdiff_t num_8_bit = 0; /* number of eight-bit chars in STRING */ 9631 ptrdiff_t num_8_bit = 0; /* number of eight-bit chars in STRING */
9636 /* The following two vars are counted only if handle_over_uni is not Qt */ 9632 /* The following two vars are counted only if handle_over_uni is not Qt. */
9637 ptrdiff_t num_over_4 = 0; /* number of 4-byte non-Unicode chars in STRING */ 9633 ptrdiff_t num_over_4 = 0; /* number of 4-byte non-Unicode chars in STRING */
9638 ptrdiff_t num_over_5 = 0; /* number of 5-byte non-Unicode chars in STRING */ 9634 ptrdiff_t num_over_5 = 0; /* number of 5-byte non-Unicode chars in STRING */
9639 ptrdiff_t outbytes; /* number of bytes of decoding result. */ 9635 ptrdiff_t outbytes; /* number of bytes of decoding result. */
@@ -9828,25 +9824,23 @@ encode_string_utf_8 (Lisp_Object string, Lisp_Object buffer,
9828 9824
9829/* Decode STRING by the coding system utf-8-unix. 9825/* Decode STRING by the coding system utf-8-unix.
9830 9826
9831 Even if :post-read-conversion and :decode-translation-table 9827 Ignore any :pre-write-conversion and :encode-translation-table
9832 properties are put to that coding system, they are ignored. 9828 properties of that coding system.
9833 9829
9834 This function assumes that arguments have values as described 9830 Assumes that arguments have values as described below.
9835 below. The validity must be assured by callers. 9831 The validity must be assured by callers.
9836 9832
9837 STRING is a unibyte string or an ASCII-only multibyte string. 9833 STRING is a unibyte string or an ASCII-only multibyte string.
9838 9834
9839 BUFFER is a multibyte buffer or Qnil. 9835 BUFFER is a multibyte buffer or Qnil.
9840 9836
9841 If BUFFER is a multibyte buffer, the decoding result of Unicode 9837 If BUFFER is a multibyte buffer, insert the decoding result of
9842 characters are inserted after point of the buffer, and the number 9838 Unicode characters after point of the buffer, and return the number
9843 of inserted characters is returned. Note that a caller should have 9839 of inserted characters. The caller should have made BUFFER ready
9844 made BUFFER ready for modifying in advance (e.g. by calling 9840 for modifying in advance (e.g., by calling invalidate_buffer_caches).
9845 invalidate_buffer_caches).
9846 9841
9847 If BUFFER is Qnil, a multibyte string is made from the decoding 9842 If BUFFER is Qnil, return a multibyte string from the decoded result.
9848 result of Unicode characters, and it is returned. As a special 9843 As a special case, return STRING itself in the following cases:
9849 case, STRING itself is returned in the following cases:
9850 1. STRING contains only ASCII characters. 9844 1. STRING contains only ASCII characters.
9851 2. NOCOPY, and STRING contains only valid UTF-8 sequences. 9845 2. NOCOPY, and STRING contains only valid UTF-8 sequences.
9852 9846
@@ -9858,24 +9852,26 @@ encode_string_utf_8 (Lisp_Object string, Lisp_Object buffer,
9858 than #x10FFFF). Note that this function does not treat an overlong 9852 than #x10FFFF). Note that this function does not treat an overlong
9859 UTF-8 sequence as invalid. 9853 UTF-8 sequence as invalid.
9860 9854
9861 If they are strings (typically 1-char string of the Unicode 9855 If these two arguments are strings (typically a 1-char string of
9862 REPLACEMENT CHARACTER #xFFFD), an invalid sequence is decoded into 9856 the Unicode REPLACEMENT CHARACTER #xFFFD), decode an invalid byte
9863 that string. They must be multibyte strings if they contain a 9857 sequence into that string. They must be multibyte strings if they
9864 non-ASCII character. 9858 contain a non-ASCII character.
9865 9859
9866 If they are characters, an invalid sequence is decoded into the 9860 If the two arguments are characters, decode an invalid byte
9867 corresponding multibyte representation of the characters. 9861 sequence into the corresponding multibyte representation of the
9862 characters.
9868 9863
9869 If they are Qignored, an invalid sequence is skipped on decoding. 9864 If they are Qignored, skip an invalid byte sequence.
9870 9865
9871 If HANDLE-8-BIT is Qt, an 1-byte invalid sequence is deoded into 9866 If HANDLE-8-BIT is Qt, decode a 1-byte invalid sequence into
9872 the corresponding eight-bit character. 9867 the corresponding eight-bit character.
9873 9868
9874 If HANDLE-OVER-UNI is Qt, a 4 or 5-byte invalid sequence that 9869 If HANDLE-OVER-UNI is Qt, decode a 4 or 5-byte invalid sequence
9875 follows Emacs' representation for an over-unicode character is 9870 that follows Emacs' representation for an over-unicode character
9876 decoded into the corresponding character. 9871 into the corresponding character.
9877 9872
9878 If they are Qnil, Qnil is returned if STRING has an invalid sequence. */ 9873 If the two arguments are Qnil, return Qnil if STRING has an invalid
9874 sequence. */
9879 9875
9880Lisp_Object 9876Lisp_Object
9881decode_string_utf_8 (Lisp_Object string, Lisp_Object buffer, 9877decode_string_utf_8 (Lisp_Object string, Lisp_Object buffer,
@@ -9883,7 +9879,7 @@ decode_string_utf_8 (Lisp_Object string, Lisp_Object buffer,
9883 Lisp_Object handle_over_uni) 9879 Lisp_Object handle_over_uni)
9884{ 9880{
9885 /* This is like BYTES_BY_CHAR_HEAD, but it is assured that C >= 0x80 9881 /* This is like BYTES_BY_CHAR_HEAD, but it is assured that C >= 0x80
9886 and it returns 0 for invalid sequence. */ 9882 and it returns 0 for an invalid sequence. */
9887#define UTF_8_SEQUENCE_LENGTH(c) \ 9883#define UTF_8_SEQUENCE_LENGTH(c) \
9888 ((c) < 0xC2 ? 0 \ 9884 ((c) < 0xC2 ? 0 \
9889 : (c) < 0xE0 ? 2 \ 9885 : (c) < 0xE0 ? 2 \
@@ -9924,7 +9920,8 @@ decode_string_utf_8 (Lisp_Object string, Lisp_Object buffer,
9924 && (len == 3 9920 && (len == 3
9925 || (UTF_8_EXTRA_OCTET_P (p[3]) 9921 || (UTF_8_EXTRA_OCTET_P (p[3])
9926 && len == 4 9922 && len == 4
9927 && string_char (p, NULL, NULL) <= MAX_UNICODE_CHAR))))) 9923 && (string_char (p, NULL, NULL)
9924 <= MAX_UNICODE_CHAR))))))
9928 { 9925 {
9929 p += len; 9926 p += len;
9930 continue; 9927 continue;