diff options
Diffstat (limited to 'src/coding.c')
| -rw-r--r-- | src/coding.c | 104 |
1 files changed, 89 insertions, 15 deletions
diff --git a/src/coding.c b/src/coding.c index c10fb375672..fbe14f1695f 100644 --- a/src/coding.c +++ b/src/coding.c | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* Coding system handler (conversion, detection, etc). | 1 | /* Coding system handler (conversion, detection, etc). |
| 2 | Copyright (C) 2001-2013 Free Software Foundation, Inc. | 2 | Copyright (C) 2001-2014 Free Software Foundation, Inc. |
| 3 | Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, | 3 | Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, |
| 4 | 2005, 2006, 2007, 2008, 2009, 2010, 2011 | 4 | 2005, 2006, 2007, 2008, 2009, 2010, 2011 |
| 5 | National Institute of Advanced Industrial Science and Technology (AIST) | 5 | National Institute of Advanced Industrial Science and Technology (AIST) |
| @@ -1202,7 +1202,7 @@ detect_coding_utf_8 (struct coding_system *coding, | |||
| 1202 | bool multibytep = coding->src_multibyte; | 1202 | bool multibytep = coding->src_multibyte; |
| 1203 | ptrdiff_t consumed_chars = 0; | 1203 | ptrdiff_t consumed_chars = 0; |
| 1204 | bool bom_found = 0; | 1204 | bool bom_found = 0; |
| 1205 | int nchars = coding->head_ascii; | 1205 | ptrdiff_t nchars = coding->head_ascii; |
| 1206 | int eol_seen = coding->eol_seen; | 1206 | int eol_seen = coding->eol_seen; |
| 1207 | 1207 | ||
| 1208 | detect_info->checked |= CATEGORY_MASK_UTF_8; | 1208 | detect_info->checked |= CATEGORY_MASK_UTF_8; |
| @@ -1300,6 +1300,7 @@ detect_coding_utf_8 (struct coding_system *coding, | |||
| 1300 | means that we found a valid non-ASCII characters. */ | 1300 | means that we found a valid non-ASCII characters. */ |
| 1301 | detect_info->found |= CATEGORY_MASK_UTF_8_AUTO | CATEGORY_MASK_UTF_8_NOSIG; | 1301 | detect_info->found |= CATEGORY_MASK_UTF_8_AUTO | CATEGORY_MASK_UTF_8_NOSIG; |
| 1302 | } | 1302 | } |
| 1303 | coding->detected_utf8_bytes = src_base - coding->source; | ||
| 1303 | coding->detected_utf8_chars = nchars; | 1304 | coding->detected_utf8_chars = nchars; |
| 1304 | return 1; | 1305 | return 1; |
| 1305 | } | 1306 | } |
| @@ -2013,7 +2014,7 @@ emacs_mule_char (struct coding_system *coding, const unsigned char *src, | |||
| 2013 | int charset_ID; | 2014 | int charset_ID; |
| 2014 | unsigned code; | 2015 | unsigned code; |
| 2015 | int c; | 2016 | int c; |
| 2016 | int consumed_chars = 0; | 2017 | ptrdiff_t consumed_chars = 0; |
| 2017 | bool mseq_found = 0; | 2018 | bool mseq_found = 0; |
| 2018 | 2019 | ||
| 2019 | ONE_MORE_BYTE (c); | 2020 | ONE_MORE_BYTE (c); |
| @@ -3190,7 +3191,7 @@ detect_coding_iso_2022 (struct coding_system *coding, | |||
| 3190 | if (! single_shifting | 3191 | if (! single_shifting |
| 3191 | && ! (rejected & CATEGORY_MASK_ISO_8_2)) | 3192 | && ! (rejected & CATEGORY_MASK_ISO_8_2)) |
| 3192 | { | 3193 | { |
| 3193 | int len = 1; | 3194 | ptrdiff_t len = 1; |
| 3194 | while (src < src_end) | 3195 | while (src < src_end) |
| 3195 | { | 3196 | { |
| 3196 | src_base = src; | 3197 | src_base = src; |
| @@ -4456,7 +4457,7 @@ encode_coding_iso_2022 (struct coding_system *coding) | |||
| 4456 | { | 4457 | { |
| 4457 | /* We have to produce designation sequences if any now. */ | 4458 | /* We have to produce designation sequences if any now. */ |
| 4458 | unsigned char desig_buf[16]; | 4459 | unsigned char desig_buf[16]; |
| 4459 | int nbytes; | 4460 | ptrdiff_t nbytes; |
| 4460 | ptrdiff_t offset; | 4461 | ptrdiff_t offset; |
| 4461 | 4462 | ||
| 4462 | charset_map_loaded = 0; | 4463 | charset_map_loaded = 0; |
| @@ -5199,7 +5200,7 @@ decode_coding_ccl (struct coding_system *coding) | |||
| 5199 | source_charbuf[i++] = *p++; | 5200 | source_charbuf[i++] = *p++; |
| 5200 | 5201 | ||
| 5201 | if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK) | 5202 | if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK) |
| 5202 | ccl->last_block = 1; | 5203 | ccl->last_block = true; |
| 5203 | /* As ccl_driver calls DECODE_CHAR, buffer may be relocated. */ | 5204 | /* As ccl_driver calls DECODE_CHAR, buffer may be relocated. */ |
| 5204 | charset_map_loaded = 0; | 5205 | charset_map_loaded = 0; |
| 5205 | ccl_driver (ccl, source_charbuf, charbuf, i, charbuf_end - charbuf, | 5206 | ccl_driver (ccl, source_charbuf, charbuf, i, charbuf_end - charbuf, |
| @@ -5259,7 +5260,7 @@ encode_coding_ccl (struct coding_system *coding) | |||
| 5259 | CODING_GET_INFO (coding, attrs, charset_list); | 5260 | CODING_GET_INFO (coding, attrs, charset_list); |
| 5260 | if (coding->consumed_char == coding->src_chars | 5261 | if (coding->consumed_char == coding->src_chars |
| 5261 | && coding->mode & CODING_MODE_LAST_BLOCK) | 5262 | && coding->mode & CODING_MODE_LAST_BLOCK) |
| 5262 | ccl->last_block = 1; | 5263 | ccl->last_block = true; |
| 5263 | 5264 | ||
| 5264 | do | 5265 | do |
| 5265 | { | 5266 | { |
| @@ -5761,6 +5762,7 @@ setup_coding_system (Lisp_Object coding_system, struct coding_system *coding) | |||
| 5761 | coding->safe_charsets = SDATA (val); | 5762 | coding->safe_charsets = SDATA (val); |
| 5762 | coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs)); | 5763 | coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs)); |
| 5763 | coding->carryover_bytes = 0; | 5764 | coding->carryover_bytes = 0; |
| 5765 | coding->raw_destination = 0; | ||
| 5764 | 5766 | ||
| 5765 | coding_type = CODING_ATTR_TYPE (attrs); | 5767 | coding_type = CODING_ATTR_TYPE (attrs); |
| 5766 | if (EQ (coding_type, Qundecided)) | 5768 | if (EQ (coding_type, Qundecided)) |
| @@ -6210,7 +6212,7 @@ static Lisp_Object adjust_coding_eol_type (struct coding_system *coding, | |||
| 6210 | EOL_SEEN_LF, EOL_SEEN_CR, and EOL_SEEN_CRLF, but the value is | 6212 | EOL_SEEN_LF, EOL_SEEN_CR, and EOL_SEEN_CRLF, but the value is |
| 6211 | reliable only when all the source bytes are ASCII. */ | 6213 | reliable only when all the source bytes are ASCII. */ |
| 6212 | 6214 | ||
| 6213 | static int | 6215 | static ptrdiff_t |
| 6214 | check_ascii (struct coding_system *coding) | 6216 | check_ascii (struct coding_system *coding) |
| 6215 | { | 6217 | { |
| 6216 | const unsigned char *src, *end; | 6218 | const unsigned char *src, *end; |
| @@ -6282,12 +6284,12 @@ check_ascii (struct coding_system *coding) | |||
| 6282 | the value is reliable only when all the source bytes are valid | 6284 | the value is reliable only when all the source bytes are valid |
| 6283 | UTF-8. */ | 6285 | UTF-8. */ |
| 6284 | 6286 | ||
| 6285 | static int | 6287 | static ptrdiff_t |
| 6286 | check_utf_8 (struct coding_system *coding) | 6288 | check_utf_8 (struct coding_system *coding) |
| 6287 | { | 6289 | { |
| 6288 | const unsigned char *src, *end; | 6290 | const unsigned char *src, *end; |
| 6289 | int eol_seen; | 6291 | int eol_seen; |
| 6290 | int nchars = coding->head_ascii; | 6292 | ptrdiff_t nchars = coding->head_ascii; |
| 6291 | 6293 | ||
| 6292 | if (coding->head_ascii < 0) | 6294 | if (coding->head_ascii < 0) |
| 6293 | check_ascii (coding); | 6295 | check_ascii (coding); |
| @@ -7413,7 +7415,7 @@ decode_coding (struct coding_system *coding) | |||
| 7413 | coding->carryover_bytes = 0; | 7415 | coding->carryover_bytes = 0; |
| 7414 | if (coding->consumed < coding->src_bytes) | 7416 | if (coding->consumed < coding->src_bytes) |
| 7415 | { | 7417 | { |
| 7416 | int nbytes = coding->src_bytes - coding->consumed; | 7418 | ptrdiff_t nbytes = coding->src_bytes - coding->consumed; |
| 7417 | const unsigned char *src; | 7419 | const unsigned char *src; |
| 7418 | 7420 | ||
| 7419 | coding_set_source (coding); | 7421 | coding_set_source (coding); |
| @@ -7889,7 +7891,7 @@ decode_coding_gap (struct coding_system *coding, | |||
| 7889 | coding->dst_multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters)); | 7891 | coding->dst_multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters)); |
| 7890 | 7892 | ||
| 7891 | coding->head_ascii = -1; | 7893 | coding->head_ascii = -1; |
| 7892 | coding->detected_utf8_chars = -1; | 7894 | coding->detected_utf8_bytes = coding->detected_utf8_chars = -1; |
| 7893 | coding->eol_seen = EOL_SEEN_NONE; | 7895 | coding->eol_seen = EOL_SEEN_NONE; |
| 7894 | if (CODING_REQUIRE_DETECTION (coding)) | 7896 | if (CODING_REQUIRE_DETECTION (coding)) |
| 7895 | detect_coding (coding); | 7897 | detect_coding (coding); |
| @@ -7906,7 +7908,8 @@ decode_coding_gap (struct coding_system *coding, | |||
| 7906 | if (chars != bytes) | 7908 | if (chars != bytes) |
| 7907 | { | 7909 | { |
| 7908 | /* There exists a non-ASCII byte. */ | 7910 | /* There exists a non-ASCII byte. */ |
| 7909 | if (EQ (CODING_ATTR_TYPE (attrs), Qutf_8)) | 7911 | if (EQ (CODING_ATTR_TYPE (attrs), Qutf_8) |
| 7912 | && coding->detected_utf8_bytes == coding->src_bytes) | ||
| 7910 | { | 7913 | { |
| 7911 | if (coding->detected_utf8_chars >= 0) | 7914 | if (coding->detected_utf8_chars >= 0) |
| 7912 | chars = coding->detected_utf8_chars; | 7915 | chars = coding->detected_utf8_chars; |
| @@ -8352,6 +8355,11 @@ encode_coding_object (struct coding_system *coding, | |||
| 8352 | { | 8355 | { |
| 8353 | if (BUFFERP (coding->dst_object)) | 8356 | if (BUFFERP (coding->dst_object)) |
| 8354 | coding->dst_object = Fbuffer_string (); | 8357 | coding->dst_object = Fbuffer_string (); |
| 8358 | else if (coding->raw_destination) | ||
| 8359 | /* This is used to avoid creating huge Lisp string. | ||
| 8360 | NOTE: caller who sets `raw_destination' is also | ||
| 8361 | responsible for freeing `destination' buffer. */ | ||
| 8362 | coding->dst_object = Qnil; | ||
| 8355 | else | 8363 | else |
| 8356 | { | 8364 | { |
| 8357 | coding->dst_object | 8365 | coding->dst_object |
| @@ -8435,11 +8443,11 @@ from_unicode (Lisp_Object str) | |||
| 8435 | } | 8443 | } |
| 8436 | 8444 | ||
| 8437 | Lisp_Object | 8445 | Lisp_Object |
| 8438 | from_unicode_buffer (const wchar_t* wstr) | 8446 | from_unicode_buffer (const wchar_t *wstr) |
| 8439 | { | 8447 | { |
| 8440 | return from_unicode ( | 8448 | return from_unicode ( |
| 8441 | make_unibyte_string ( | 8449 | make_unibyte_string ( |
| 8442 | (char*) wstr, | 8450 | (char *) wstr, |
| 8443 | /* we get one of the two final 0 bytes for free. */ | 8451 | /* we get one of the two final 0 bytes for free. */ |
| 8444 | 1 + sizeof (wchar_t) * wcslen (wstr))); | 8452 | 1 + sizeof (wchar_t) * wcslen (wstr))); |
| 8445 | } | 8453 | } |
| @@ -9352,6 +9360,14 @@ code_convert_region (Lisp_Object start, Lisp_Object end, | |||
| 9352 | setup_coding_system (coding_system, &coding); | 9360 | setup_coding_system (coding_system, &coding); |
| 9353 | coding.mode |= CODING_MODE_LAST_BLOCK; | 9361 | coding.mode |= CODING_MODE_LAST_BLOCK; |
| 9354 | 9362 | ||
| 9363 | if (BUFFERP (dst_object) && !EQ (dst_object, src_object)) | ||
| 9364 | { | ||
| 9365 | struct buffer *buf = XBUFFER (dst_object); | ||
| 9366 | ptrdiff_t buf_pt = BUF_PT (buf); | ||
| 9367 | |||
| 9368 | invalidate_buffer_caches (buf, buf_pt, buf_pt); | ||
| 9369 | } | ||
| 9370 | |||
| 9355 | if (encodep) | 9371 | if (encodep) |
| 9356 | encode_coding_object (&coding, src_object, from, from_byte, to, to_byte, | 9372 | encode_coding_object (&coding, src_object, from, from_byte, to, to_byte, |
| 9357 | dst_object); | 9373 | dst_object); |
| @@ -9441,6 +9457,15 @@ code_convert_string (Lisp_Object string, Lisp_Object coding_system, | |||
| 9441 | coding.mode |= CODING_MODE_LAST_BLOCK; | 9457 | coding.mode |= CODING_MODE_LAST_BLOCK; |
| 9442 | chars = SCHARS (string); | 9458 | chars = SCHARS (string); |
| 9443 | bytes = SBYTES (string); | 9459 | bytes = SBYTES (string); |
| 9460 | |||
| 9461 | if (BUFFERP (dst_object)) | ||
| 9462 | { | ||
| 9463 | struct buffer *buf = XBUFFER (dst_object); | ||
| 9464 | ptrdiff_t buf_pt = BUF_PT (buf); | ||
| 9465 | |||
| 9466 | invalidate_buffer_caches (buf, buf_pt, buf_pt); | ||
| 9467 | } | ||
| 9468 | |||
| 9444 | if (encodep) | 9469 | if (encodep) |
| 9445 | encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object); | 9470 | encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object); |
| 9446 | else | 9471 | else |
| @@ -9467,6 +9492,55 @@ code_convert_string_norecord (Lisp_Object string, Lisp_Object coding_system, | |||
| 9467 | return code_convert_string (string, coding_system, Qt, encodep, 0, 1); | 9492 | return code_convert_string (string, coding_system, Qt, encodep, 0, 1); |
| 9468 | } | 9493 | } |
| 9469 | 9494 | ||
| 9495 | /* Encode or decode a file name, to or from a unibyte string suitable | ||
| 9496 | for passing to C library functions. */ | ||
| 9497 | Lisp_Object | ||
| 9498 | decode_file_name (Lisp_Object fname) | ||
| 9499 | { | ||
| 9500 | #ifdef WINDOWSNT | ||
| 9501 | /* The w32 build pretends to use UTF-8 for file-name encoding, and | ||
| 9502 | converts the file names either to UTF-16LE or to the system ANSI | ||
| 9503 | codepage internally, depending on the underlying OS; see w32.c. */ | ||
| 9504 | if (! NILP (Fcoding_system_p (Qutf_8))) | ||
| 9505 | return code_convert_string_norecord (fname, Qutf_8, 0); | ||
| 9506 | return fname; | ||
| 9507 | #else /* !WINDOWSNT */ | ||
| 9508 | if (! NILP (Vfile_name_coding_system)) | ||
| 9509 | return code_convert_string_norecord (fname, Vfile_name_coding_system, 0); | ||
| 9510 | else if (! NILP (Vdefault_file_name_coding_system)) | ||
| 9511 | return code_convert_string_norecord (fname, | ||
| 9512 | Vdefault_file_name_coding_system, 0); | ||
| 9513 | else | ||
| 9514 | return fname; | ||
| 9515 | #endif | ||
| 9516 | } | ||
| 9517 | |||
| 9518 | Lisp_Object | ||
| 9519 | encode_file_name (Lisp_Object fname) | ||
| 9520 | { | ||
| 9521 | /* This is especially important during bootstrap and dumping, when | ||
| 9522 | file-name encoding is not yet known, and therefore any non-ASCII | ||
| 9523 | file names are unibyte strings, and could only be thrashed if we | ||
| 9524 | try to encode them. */ | ||
| 9525 | if (!STRING_MULTIBYTE (fname)) | ||
| 9526 | return fname; | ||
| 9527 | #ifdef WINDOWSNT | ||
| 9528 | /* The w32 build pretends to use UTF-8 for file-name encoding, and | ||
| 9529 | converts the file names either to UTF-16LE or to the system ANSI | ||
| 9530 | codepage internally, depending on the underlying OS; see w32.c. */ | ||
| 9531 | if (! NILP (Fcoding_system_p (Qutf_8))) | ||
| 9532 | return code_convert_string_norecord (fname, Qutf_8, 1); | ||
| 9533 | return fname; | ||
| 9534 | #else /* !WINDOWSNT */ | ||
| 9535 | if (! NILP (Vfile_name_coding_system)) | ||
| 9536 | return code_convert_string_norecord (fname, Vfile_name_coding_system, 1); | ||
| 9537 | else if (! NILP (Vdefault_file_name_coding_system)) | ||
| 9538 | return code_convert_string_norecord (fname, | ||
| 9539 | Vdefault_file_name_coding_system, 1); | ||
| 9540 | else | ||
| 9541 | return fname; | ||
| 9542 | #endif | ||
| 9543 | } | ||
| 9470 | 9544 | ||
| 9471 | DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string, | 9545 | DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string, |
| 9472 | 2, 4, 0, | 9546 | 2, 4, 0, |