diff options
| author | Paul Eggert | 2019-08-04 09:18:46 -0700 |
|---|---|---|
| committer | Paul Eggert | 2019-08-04 09:19:38 -0700 |
| commit | 3c459e3b05e699736b849cb2c4687aef3ce6810b (patch) | |
| tree | 99c05f435066d9c69044a51e7a3027159f3d1cb6 /src/coding.c | |
| parent | 1b20993baaeffa5aa69b282862b5066960604aab (diff) | |
| download | emacs-3c459e3b05e699736b849cb2c4687aef3ce6810b.tar.gz emacs-3c459e3b05e699736b849cb2c4687aef3ce6810b.zip | |
Minor fix to recent coding.c change
* src/coding.c (get_buffer_gap_address):
Don’t assume string or buffer length fits in int.
Also, improve wording of comments.
Diffstat (limited to 'src/coding.c')
| -rw-r--r-- | src/coding.c | 123 |
1 files changed, 60 insertions, 63 deletions
diff --git a/src/coding.c b/src/coding.c index ab0e15119f3..877177b1882 100644 --- a/src/coding.c +++ b/src/coding.c | |||
| @@ -9520,7 +9520,7 @@ code_convert_string_norecord (Lisp_Object string, Lisp_Object coding_system, | |||
| 9520 | NBYTES, enlarge the gap in advance. */ | 9520 | NBYTES, enlarge the gap in advance. */ |
| 9521 | 9521 | ||
| 9522 | static unsigned char * | 9522 | static unsigned char * |
| 9523 | get_buffer_gap_address (Lisp_Object buffer, int nbytes) | 9523 | get_buffer_gap_address (Lisp_Object buffer, ptrdiff_t nbytes) |
| 9524 | { | 9524 | { |
| 9525 | struct buffer *buf = XBUFFER (buffer); | 9525 | struct buffer *buf = XBUFFER (buffer); |
| 9526 | 9526 | ||
| @@ -9546,9 +9546,9 @@ get_buffer_gap_address (Lisp_Object buffer, int nbytes) | |||
| 9546 | static unsigned char * | 9546 | static unsigned char * |
| 9547 | get_char_bytes (int c, int *len) | 9547 | get_char_bytes (int c, int *len) |
| 9548 | { | 9548 | { |
| 9549 | /* We uses two chaches considering the situation that | 9549 | /* Use two caches, since encode/decode_string_utf_8 are called |
| 9550 | encode/decode_string_utf_8 are called repeatedly with the same | 9550 | repeatedly with the same values for HANDLE_8_BIT and |
| 9551 | values for HANDLE_8_BIT and HANDLE_OVER_UNI arguments. */ | 9551 | HANDLE_OVER_UNI arguments. */ |
| 9552 | static int chars[2]; | 9552 | static int chars[2]; |
| 9553 | static unsigned char bytes[2][6]; | 9553 | static unsigned char bytes[2][6]; |
| 9554 | static int nbytes[2]; | 9554 | static int nbytes[2]; |
| @@ -9572,55 +9572,51 @@ get_char_bytes (int c, int *len) | |||
| 9572 | 9572 | ||
| 9573 | /* Encode STRING by the coding system utf-8-unix. | 9573 | /* Encode STRING by the coding system utf-8-unix. |
| 9574 | 9574 | ||
| 9575 | Even if :pre-write-conversion and :encode-translation-table | 9575 | Ignore any :pre-write-conversion and :encode-translation-table |
| 9576 | properties are put to that coding system, they are ignored. | 9576 | properties of that coding system. |
| 9577 | 9577 | ||
| 9578 | It ignores :pre-write-conversion and :encode-translation-table | 9578 | Assume that arguments have values as described below. |
| 9579 | propeties of that coding system. | 9579 | The validity must be assured by callers. |
| 9580 | |||
| 9581 | This function assumes that arguments have values as described | ||
| 9582 | below. The validity must be assured by callers. | ||
| 9583 | 9580 | ||
| 9584 | STRING is a multibyte string or an ASCII-only unibyte string. | 9581 | STRING is a multibyte string or an ASCII-only unibyte string. |
| 9585 | 9582 | ||
| 9586 | BUFFER is a unibyte buffer or Qnil. | 9583 | BUFFER is a unibyte buffer or Qnil. |
| 9587 | 9584 | ||
| 9588 | If BUFFER is a unibyte buffer, the encoding result of UTF-8 | 9585 | If BUFFER is a unibyte buffer, insert the encoded result |
| 9589 | sequence is inserted after point of the buffer, and the number of | 9586 | after point of the buffer, and return the number of |
| 9590 | inserted characters is returned. Note that a caller should have | 9587 | inserted characters. The caller should have made BUFFER ready for |
| 9591 | made BUFFER ready for modifying in advance (e.g. by calling | 9588 | modifying in advance (e.g., by calling invalidate_buffer_caches). |
| 9592 | invalidate_buffer_caches). | ||
| 9593 | 9589 | ||
| 9594 | If BUFFER is Qnil, a unibyte string is made from the encodnig | 9590 | If BUFFER is Qnil, return a unibyte string from the encoded result. |
| 9595 | result of UTF-8 sequence, and it is returned. If NOCOPY and STRING | 9591 | If NOCOPY, and if STRING contains only Unicode characters (i.e., |
| 9596 | contains only Unicode characters (i.e. the encoding does not change | 9592 | the encoding does not change the byte sequence), return STRING even |
| 9597 | the byte sequence), STRING is returned even if it is multibyte. | 9593 | if it is multibyte. |
| 9598 | 9594 | ||
| 9599 | HANDLE-8-BIT and HANDE-OVER-UNI specify how to handle a non-Unicode | 9595 | HANDLE-8-BIT and HANDLE-OVER-UNI specify how to handle a non-Unicode |
| 9600 | character. The former is for an eight-bit character (represented | 9596 | character. The former is for an eight-bit character (represented |
| 9601 | by 2-byte overlong sequence in multibyte STRING). The latter is | 9597 | by a 2-byte overlong sequence in a multibyte STRING). The latter is |
| 9602 | for an over-unicode character (a character whose code is greater | 9598 | for an over-Unicode character (a character whose code is greater |
| 9603 | than the maximum Unicode character 0x10FFFF, and is represented by | 9599 | than the maximum Unicode character 0x10FFFF, represented by a 4 or |
| 9604 | 4 or 5-byte sequence in multibyte STRING). | 9600 | 5-byte sequence in a multibyte STRING). |
| 9605 | 9601 | ||
| 9606 | If they are unibyte strings (typically "\357\277\275"; UTF-8 | 9602 | If these two arguments are unibyte strings (typically |
| 9607 | sequence for the Unicode REPLACEMENT CHARACTER #xFFFD), a | 9603 | "\357\277\275", the UTF-8 sequence for the Unicode REPLACEMENT |
| 9608 | non-Unicode character is encoded into that sequence. | 9604 | CHARACTER #xFFFD), encode a non-Unicode character into that |
| 9605 | unibyte sequence. | ||
| 9609 | 9606 | ||
| 9610 | If they are characters, a non-Unicode chracters is encoded into the | 9607 | If the two arguments are characters, encode a non-Unicode |
| 9611 | corresponding UTF-8 sequences. | 9608 | character as if it was the argument. |
| 9612 | 9609 | ||
| 9613 | If they are Qignored, a non-Unicode character is skipped on | 9610 | If they are Qignored, skip a non-Unicode character. |
| 9614 | encoding. | ||
| 9615 | 9611 | ||
| 9616 | If HANDLE-8-BIT is Qt, an eight-bit character is encoded into one | 9612 | If HANDLE-8-BIT is Qt, encode an eight-bit character into one |
| 9617 | byte of the same value. | 9613 | byte of the same value. |
| 9618 | 9614 | ||
| 9619 | If HANDLE-OVER-UNI is Qt, an over-unicode character is encoded | 9615 | If HANDLE-OVER-UNI is Qt, encode an over-unicode character |
| 9620 | into the the same 4 or 5-byte sequence. | 9616 | into the the same 4 or 5-byte sequence. |
| 9621 | 9617 | ||
| 9622 | If they are Qnil, Qnil is returned if STRING has a non-Unicode | 9618 | If the two arguments are Qnil, return Qnil if STRING has a |
| 9623 | character. */ | 9619 | non-Unicode character. */ |
| 9624 | 9620 | ||
| 9625 | Lisp_Object | 9621 | Lisp_Object |
| 9626 | encode_string_utf_8 (Lisp_Object string, Lisp_Object buffer, | 9622 | encode_string_utf_8 (Lisp_Object string, Lisp_Object buffer, |
| @@ -9633,7 +9629,7 @@ encode_string_utf_8 (Lisp_Object string, Lisp_Object buffer, | |||
| 9633 | return string; | 9629 | return string; |
| 9634 | 9630 | ||
| 9635 | ptrdiff_t num_8_bit = 0; /* number of eight-bit chars in STRING */ | 9631 | ptrdiff_t num_8_bit = 0; /* number of eight-bit chars in STRING */ |
| 9636 | /* The following two vars are counted only if handle_over_uni is not Qt */ | 9632 | /* The following two vars are counted only if handle_over_uni is not Qt. */ |
| 9637 | ptrdiff_t num_over_4 = 0; /* number of 4-byte non-Unicode chars in STRING */ | 9633 | ptrdiff_t num_over_4 = 0; /* number of 4-byte non-Unicode chars in STRING */ |
| 9638 | ptrdiff_t num_over_5 = 0; /* number of 5-byte non-Unicode chars in STRING */ | 9634 | ptrdiff_t num_over_5 = 0; /* number of 5-byte non-Unicode chars in STRING */ |
| 9639 | ptrdiff_t outbytes; /* number of bytes of decoding result. */ | 9635 | ptrdiff_t outbytes; /* number of bytes of decoding result. */ |
| @@ -9828,25 +9824,23 @@ encode_string_utf_8 (Lisp_Object string, Lisp_Object buffer, | |||
| 9828 | 9824 | ||
| 9829 | /* Decode STRING by the coding system utf-8-unix. | 9825 | /* Decode STRING by the coding system utf-8-unix. |
| 9830 | 9826 | ||
| 9831 | Even if :post-read-conversion and :decode-translation-table | 9827 | Ignore any :pre-write-conversion and :encode-translation-table |
| 9832 | properties are put to that coding system, they are ignored. | 9828 | properties of that coding system. |
| 9833 | 9829 | ||
| 9834 | This function assumes that arguments have values as described | 9830 | Assumes that arguments have values as described below. |
| 9835 | below. The validity must be assured by callers. | 9831 | The validity must be assured by callers. |
| 9836 | 9832 | ||
| 9837 | STRING is a unibyte string or an ASCII-only multibyte string. | 9833 | STRING is a unibyte string or an ASCII-only multibyte string. |
| 9838 | 9834 | ||
| 9839 | BUFFER is a multibyte buffer or Qnil. | 9835 | BUFFER is a multibyte buffer or Qnil. |
| 9840 | 9836 | ||
| 9841 | If BUFFER is a multibyte buffer, the decoding result of Unicode | 9837 | If BUFFER is a multibyte buffer, insert the decoding result of |
| 9842 | characters are inserted after point of the buffer, and the number | 9838 | Unicode characters after point of the buffer, and return the number |
| 9843 | of inserted characters is returned. Note that a caller should have | 9839 | of inserted characters. The caller should have made BUFFER ready |
| 9844 | made BUFFER ready for modifying in advance (e.g. by calling | 9840 | for modifying in advance (e.g., by calling invalidate_buffer_caches). |
| 9845 | invalidate_buffer_caches). | ||
| 9846 | 9841 | ||
| 9847 | If BUFFER is Qnil, a multibyte string is made from the decoding | 9842 | If BUFFER is Qnil, return a multibyte string from the decoded result. |
| 9848 | result of Unicode characters, and it is returned. As a special | 9843 | As a special case, return STRING itself in the following cases: |
| 9849 | case, STRING itself is returned in the following cases: | ||
| 9850 | 1. STRING contains only ASCII characters. | 9844 | 1. STRING contains only ASCII characters. |
| 9851 | 2. NOCOPY, and STRING contains only valid UTF-8 sequences. | 9845 | 2. NOCOPY, and STRING contains only valid UTF-8 sequences. |
| 9852 | 9846 | ||
| @@ -9858,24 +9852,26 @@ encode_string_utf_8 (Lisp_Object string, Lisp_Object buffer, | |||
| 9858 | than #x10FFFF). Note that this function does not treat an overlong | 9852 | than #x10FFFF). Note that this function does not treat an overlong |
| 9859 | UTF-8 sequence as invalid. | 9853 | UTF-8 sequence as invalid. |
| 9860 | 9854 | ||
| 9861 | If they are strings (typically 1-char string of the Unicode | 9855 | If these two arguments are strings (typically a 1-char string of |
| 9862 | REPLACEMENT CHARACTER #xFFFD), an invalid sequence is decoded into | 9856 | the Unicode REPLACEMENT CHARACTER #xFFFD), decode an invalid byte |
| 9863 | that string. They must be multibyte strings if they contain a | 9857 | sequence into that string. They must be multibyte strings if they |
| 9864 | non-ASCII character. | 9858 | contain a non-ASCII character. |
| 9865 | 9859 | ||
| 9866 | If they are characters, an invalid sequence is decoded into the | 9860 | If the two arguments are characters, decode an invalid byte |
| 9867 | corresponding multibyte representation of the characters. | 9861 | sequence into the corresponding multibyte representation of the |
| 9862 | characters. | ||
| 9868 | 9863 | ||
| 9869 | If they are Qignored, an invalid sequence is skipped on decoding. | 9864 | If they are Qignored, skip an invalid byte sequence. |
| 9870 | 9865 | ||
| 9871 | If HANDLE-8-BIT is Qt, an 1-byte invalid sequence is deoded into | 9866 | If HANDLE-8-BIT is Qt, decode a 1-byte invalid sequence into |
| 9872 | the corresponding eight-bit character. | 9867 | the corresponding eight-bit character. |
| 9873 | 9868 | ||
| 9874 | If HANDLE-OVER-UNI is Qt, a 4 or 5-byte invalid sequence that | 9869 | If HANDLE-OVER-UNI is Qt, decode a 4 or 5-byte invalid sequence |
| 9875 | follows Emacs' representation for an over-unicode character is | 9870 | that follows Emacs' representation for an over-unicode character |
| 9876 | decoded into the corresponding character. | 9871 | into the corresponding character. |
| 9877 | 9872 | ||
| 9878 | If they are Qnil, Qnil is returned if STRING has an invalid sequence. */ | 9873 | If the two arguments are Qnil, return Qnil if STRING has an invalid |
| 9874 | sequence. */ | ||
| 9879 | 9875 | ||
| 9880 | Lisp_Object | 9876 | Lisp_Object |
| 9881 | decode_string_utf_8 (Lisp_Object string, Lisp_Object buffer, | 9877 | decode_string_utf_8 (Lisp_Object string, Lisp_Object buffer, |
| @@ -9883,7 +9879,7 @@ decode_string_utf_8 (Lisp_Object string, Lisp_Object buffer, | |||
| 9883 | Lisp_Object handle_over_uni) | 9879 | Lisp_Object handle_over_uni) |
| 9884 | { | 9880 | { |
| 9885 | /* This is like BYTES_BY_CHAR_HEAD, but it is assured that C >= 0x80 | 9881 | /* This is like BYTES_BY_CHAR_HEAD, but it is assured that C >= 0x80 |
| 9886 | and it returns 0 for invalid sequence. */ | 9882 | and it returns 0 for an invalid sequence. */ |
| 9887 | #define UTF_8_SEQUENCE_LENGTH(c) \ | 9883 | #define UTF_8_SEQUENCE_LENGTH(c) \ |
| 9888 | ((c) < 0xC2 ? 0 \ | 9884 | ((c) < 0xC2 ? 0 \ |
| 9889 | : (c) < 0xE0 ? 2 \ | 9885 | : (c) < 0xE0 ? 2 \ |
| @@ -9924,7 +9920,8 @@ decode_string_utf_8 (Lisp_Object string, Lisp_Object buffer, | |||
| 9924 | && (len == 3 | 9920 | && (len == 3 |
| 9925 | || (UTF_8_EXTRA_OCTET_P (p[3]) | 9921 | || (UTF_8_EXTRA_OCTET_P (p[3]) |
| 9926 | && len == 4 | 9922 | && len == 4 |
| 9927 | && string_char (p, NULL, NULL) <= MAX_UNICODE_CHAR))))) | 9923 | && (string_char (p, NULL, NULL) |
| 9924 | <= MAX_UNICODE_CHAR)))))) | ||
| 9928 | { | 9925 | { |
| 9929 | p += len; | 9926 | p += len; |
| 9930 | continue; | 9927 | continue; |