aboutsummaryrefslogtreecommitdiffstats
path: root/src/coding.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/coding.c')
-rw-r--r--src/coding.c104
1 files changed, 89 insertions, 15 deletions
diff --git a/src/coding.c b/src/coding.c
index c10fb375672..fbe14f1695f 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -1,5 +1,5 @@
1/* Coding system handler (conversion, detection, etc). 1/* Coding system handler (conversion, detection, etc).
2 Copyright (C) 2001-2013 Free Software Foundation, Inc. 2 Copyright (C) 2001-2014 Free Software Foundation, Inc.
3 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 3 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
4 2005, 2006, 2007, 2008, 2009, 2010, 2011 4 2005, 2006, 2007, 2008, 2009, 2010, 2011
5 National Institute of Advanced Industrial Science and Technology (AIST) 5 National Institute of Advanced Industrial Science and Technology (AIST)
@@ -1202,7 +1202,7 @@ detect_coding_utf_8 (struct coding_system *coding,
1202 bool multibytep = coding->src_multibyte; 1202 bool multibytep = coding->src_multibyte;
1203 ptrdiff_t consumed_chars = 0; 1203 ptrdiff_t consumed_chars = 0;
1204 bool bom_found = 0; 1204 bool bom_found = 0;
1205 int nchars = coding->head_ascii; 1205 ptrdiff_t nchars = coding->head_ascii;
1206 int eol_seen = coding->eol_seen; 1206 int eol_seen = coding->eol_seen;
1207 1207
1208 detect_info->checked |= CATEGORY_MASK_UTF_8; 1208 detect_info->checked |= CATEGORY_MASK_UTF_8;
@@ -1300,6 +1300,7 @@ detect_coding_utf_8 (struct coding_system *coding,
1300 means that we found a valid non-ASCII characters. */ 1300 means that we found a valid non-ASCII characters. */
1301 detect_info->found |= CATEGORY_MASK_UTF_8_AUTO | CATEGORY_MASK_UTF_8_NOSIG; 1301 detect_info->found |= CATEGORY_MASK_UTF_8_AUTO | CATEGORY_MASK_UTF_8_NOSIG;
1302 } 1302 }
1303 coding->detected_utf8_bytes = src_base - coding->source;
1303 coding->detected_utf8_chars = nchars; 1304 coding->detected_utf8_chars = nchars;
1304 return 1; 1305 return 1;
1305} 1306}
@@ -2013,7 +2014,7 @@ emacs_mule_char (struct coding_system *coding, const unsigned char *src,
2013 int charset_ID; 2014 int charset_ID;
2014 unsigned code; 2015 unsigned code;
2015 int c; 2016 int c;
2016 int consumed_chars = 0; 2017 ptrdiff_t consumed_chars = 0;
2017 bool mseq_found = 0; 2018 bool mseq_found = 0;
2018 2019
2019 ONE_MORE_BYTE (c); 2020 ONE_MORE_BYTE (c);
@@ -3190,7 +3191,7 @@ detect_coding_iso_2022 (struct coding_system *coding,
3190 if (! single_shifting 3191 if (! single_shifting
3191 && ! (rejected & CATEGORY_MASK_ISO_8_2)) 3192 && ! (rejected & CATEGORY_MASK_ISO_8_2))
3192 { 3193 {
3193 int len = 1; 3194 ptrdiff_t len = 1;
3194 while (src < src_end) 3195 while (src < src_end)
3195 { 3196 {
3196 src_base = src; 3197 src_base = src;
@@ -4456,7 +4457,7 @@ encode_coding_iso_2022 (struct coding_system *coding)
4456 { 4457 {
4457 /* We have to produce designation sequences if any now. */ 4458 /* We have to produce designation sequences if any now. */
4458 unsigned char desig_buf[16]; 4459 unsigned char desig_buf[16];
4459 int nbytes; 4460 ptrdiff_t nbytes;
4460 ptrdiff_t offset; 4461 ptrdiff_t offset;
4461 4462
4462 charset_map_loaded = 0; 4463 charset_map_loaded = 0;
@@ -5199,7 +5200,7 @@ decode_coding_ccl (struct coding_system *coding)
5199 source_charbuf[i++] = *p++; 5200 source_charbuf[i++] = *p++;
5200 5201
5201 if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK) 5202 if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
5202 ccl->last_block = 1; 5203 ccl->last_block = true;
5203 /* As ccl_driver calls DECODE_CHAR, buffer may be relocated. */ 5204 /* As ccl_driver calls DECODE_CHAR, buffer may be relocated. */
5204 charset_map_loaded = 0; 5205 charset_map_loaded = 0;
5205 ccl_driver (ccl, source_charbuf, charbuf, i, charbuf_end - charbuf, 5206 ccl_driver (ccl, source_charbuf, charbuf, i, charbuf_end - charbuf,
@@ -5259,7 +5260,7 @@ encode_coding_ccl (struct coding_system *coding)
5259 CODING_GET_INFO (coding, attrs, charset_list); 5260 CODING_GET_INFO (coding, attrs, charset_list);
5260 if (coding->consumed_char == coding->src_chars 5261 if (coding->consumed_char == coding->src_chars
5261 && coding->mode & CODING_MODE_LAST_BLOCK) 5262 && coding->mode & CODING_MODE_LAST_BLOCK)
5262 ccl->last_block = 1; 5263 ccl->last_block = true;
5263 5264
5264 do 5265 do
5265 { 5266 {
@@ -5761,6 +5762,7 @@ setup_coding_system (Lisp_Object coding_system, struct coding_system *coding)
5761 coding->safe_charsets = SDATA (val); 5762 coding->safe_charsets = SDATA (val);
5762 coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs)); 5763 coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
5763 coding->carryover_bytes = 0; 5764 coding->carryover_bytes = 0;
5765 coding->raw_destination = 0;
5764 5766
5765 coding_type = CODING_ATTR_TYPE (attrs); 5767 coding_type = CODING_ATTR_TYPE (attrs);
5766 if (EQ (coding_type, Qundecided)) 5768 if (EQ (coding_type, Qundecided))
@@ -6210,7 +6212,7 @@ static Lisp_Object adjust_coding_eol_type (struct coding_system *coding,
6210 EOL_SEEN_LF, EOL_SEEN_CR, and EOL_SEEN_CRLF, but the value is 6212 EOL_SEEN_LF, EOL_SEEN_CR, and EOL_SEEN_CRLF, but the value is
6211 reliable only when all the source bytes are ASCII. */ 6213 reliable only when all the source bytes are ASCII. */
6212 6214
6213static int 6215static ptrdiff_t
6214check_ascii (struct coding_system *coding) 6216check_ascii (struct coding_system *coding)
6215{ 6217{
6216 const unsigned char *src, *end; 6218 const unsigned char *src, *end;
@@ -6282,12 +6284,12 @@ check_ascii (struct coding_system *coding)
6282 the value is reliable only when all the source bytes are valid 6284 the value is reliable only when all the source bytes are valid
6283 UTF-8. */ 6285 UTF-8. */
6284 6286
6285static int 6287static ptrdiff_t
6286check_utf_8 (struct coding_system *coding) 6288check_utf_8 (struct coding_system *coding)
6287{ 6289{
6288 const unsigned char *src, *end; 6290 const unsigned char *src, *end;
6289 int eol_seen; 6291 int eol_seen;
6290 int nchars = coding->head_ascii; 6292 ptrdiff_t nchars = coding->head_ascii;
6291 6293
6292 if (coding->head_ascii < 0) 6294 if (coding->head_ascii < 0)
6293 check_ascii (coding); 6295 check_ascii (coding);
@@ -7413,7 +7415,7 @@ decode_coding (struct coding_system *coding)
7413 coding->carryover_bytes = 0; 7415 coding->carryover_bytes = 0;
7414 if (coding->consumed < coding->src_bytes) 7416 if (coding->consumed < coding->src_bytes)
7415 { 7417 {
7416 int nbytes = coding->src_bytes - coding->consumed; 7418 ptrdiff_t nbytes = coding->src_bytes - coding->consumed;
7417 const unsigned char *src; 7419 const unsigned char *src;
7418 7420
7419 coding_set_source (coding); 7421 coding_set_source (coding);
@@ -7889,7 +7891,7 @@ decode_coding_gap (struct coding_system *coding,
7889 coding->dst_multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters)); 7891 coding->dst_multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
7890 7892
7891 coding->head_ascii = -1; 7893 coding->head_ascii = -1;
7892 coding->detected_utf8_chars = -1; 7894 coding->detected_utf8_bytes = coding->detected_utf8_chars = -1;
7893 coding->eol_seen = EOL_SEEN_NONE; 7895 coding->eol_seen = EOL_SEEN_NONE;
7894 if (CODING_REQUIRE_DETECTION (coding)) 7896 if (CODING_REQUIRE_DETECTION (coding))
7895 detect_coding (coding); 7897 detect_coding (coding);
@@ -7906,7 +7908,8 @@ decode_coding_gap (struct coding_system *coding,
7906 if (chars != bytes) 7908 if (chars != bytes)
7907 { 7909 {
7908 /* There exists a non-ASCII byte. */ 7910 /* There exists a non-ASCII byte. */
7909 if (EQ (CODING_ATTR_TYPE (attrs), Qutf_8)) 7911 if (EQ (CODING_ATTR_TYPE (attrs), Qutf_8)
7912 && coding->detected_utf8_bytes == coding->src_bytes)
7910 { 7913 {
7911 if (coding->detected_utf8_chars >= 0) 7914 if (coding->detected_utf8_chars >= 0)
7912 chars = coding->detected_utf8_chars; 7915 chars = coding->detected_utf8_chars;
@@ -8352,6 +8355,11 @@ encode_coding_object (struct coding_system *coding,
8352 { 8355 {
8353 if (BUFFERP (coding->dst_object)) 8356 if (BUFFERP (coding->dst_object))
8354 coding->dst_object = Fbuffer_string (); 8357 coding->dst_object = Fbuffer_string ();
8358 else if (coding->raw_destination)
8359 /* This is used to avoid creating huge Lisp string.
8360 NOTE: caller who sets `raw_destination' is also
8361 responsible for freeing `destination' buffer. */
8362 coding->dst_object = Qnil;
8355 else 8363 else
8356 { 8364 {
8357 coding->dst_object 8365 coding->dst_object
@@ -8435,11 +8443,11 @@ from_unicode (Lisp_Object str)
8435} 8443}
8436 8444
8437Lisp_Object 8445Lisp_Object
8438from_unicode_buffer (const wchar_t* wstr) 8446from_unicode_buffer (const wchar_t *wstr)
8439{ 8447{
8440 return from_unicode ( 8448 return from_unicode (
8441 make_unibyte_string ( 8449 make_unibyte_string (
8442 (char*) wstr, 8450 (char *) wstr,
8443 /* we get one of the two final 0 bytes for free. */ 8451 /* we get one of the two final 0 bytes for free. */
8444 1 + sizeof (wchar_t) * wcslen (wstr))); 8452 1 + sizeof (wchar_t) * wcslen (wstr)));
8445} 8453}
@@ -9352,6 +9360,14 @@ code_convert_region (Lisp_Object start, Lisp_Object end,
9352 setup_coding_system (coding_system, &coding); 9360 setup_coding_system (coding_system, &coding);
9353 coding.mode |= CODING_MODE_LAST_BLOCK; 9361 coding.mode |= CODING_MODE_LAST_BLOCK;
9354 9362
9363 if (BUFFERP (dst_object) && !EQ (dst_object, src_object))
9364 {
9365 struct buffer *buf = XBUFFER (dst_object);
9366 ptrdiff_t buf_pt = BUF_PT (buf);
9367
9368 invalidate_buffer_caches (buf, buf_pt, buf_pt);
9369 }
9370
9355 if (encodep) 9371 if (encodep)
9356 encode_coding_object (&coding, src_object, from, from_byte, to, to_byte, 9372 encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
9357 dst_object); 9373 dst_object);
@@ -9441,6 +9457,15 @@ code_convert_string (Lisp_Object string, Lisp_Object coding_system,
9441 coding.mode |= CODING_MODE_LAST_BLOCK; 9457 coding.mode |= CODING_MODE_LAST_BLOCK;
9442 chars = SCHARS (string); 9458 chars = SCHARS (string);
9443 bytes = SBYTES (string); 9459 bytes = SBYTES (string);
9460
9461 if (BUFFERP (dst_object))
9462 {
9463 struct buffer *buf = XBUFFER (dst_object);
9464 ptrdiff_t buf_pt = BUF_PT (buf);
9465
9466 invalidate_buffer_caches (buf, buf_pt, buf_pt);
9467 }
9468
9444 if (encodep) 9469 if (encodep)
9445 encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object); 9470 encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
9446 else 9471 else
@@ -9467,6 +9492,55 @@ code_convert_string_norecord (Lisp_Object string, Lisp_Object coding_system,
9467 return code_convert_string (string, coding_system, Qt, encodep, 0, 1); 9492 return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
9468} 9493}
9469 9494
9495/* Encode or decode a file name, to or from a unibyte string suitable
9496 for passing to C library functions. */
9497Lisp_Object
9498decode_file_name (Lisp_Object fname)
9499{
9500#ifdef WINDOWSNT
9501 /* The w32 build pretends to use UTF-8 for file-name encoding, and
9502 converts the file names either to UTF-16LE or to the system ANSI
9503 codepage internally, depending on the underlying OS; see w32.c. */
9504 if (! NILP (Fcoding_system_p (Qutf_8)))
9505 return code_convert_string_norecord (fname, Qutf_8, 0);
9506 return fname;
9507#else /* !WINDOWSNT */
9508 if (! NILP (Vfile_name_coding_system))
9509 return code_convert_string_norecord (fname, Vfile_name_coding_system, 0);
9510 else if (! NILP (Vdefault_file_name_coding_system))
9511 return code_convert_string_norecord (fname,
9512 Vdefault_file_name_coding_system, 0);
9513 else
9514 return fname;
9515#endif
9516}
9517
9518Lisp_Object
9519encode_file_name (Lisp_Object fname)
9520{
9521 /* This is especially important during bootstrap and dumping, when
9522 file-name encoding is not yet known, and therefore any non-ASCII
9523 file names are unibyte strings, and could only be thrashed if we
9524 try to encode them. */
9525 if (!STRING_MULTIBYTE (fname))
9526 return fname;
9527#ifdef WINDOWSNT
9528 /* The w32 build pretends to use UTF-8 for file-name encoding, and
9529 converts the file names either to UTF-16LE or to the system ANSI
9530 codepage internally, depending on the underlying OS; see w32.c. */
9531 if (! NILP (Fcoding_system_p (Qutf_8)))
9532 return code_convert_string_norecord (fname, Qutf_8, 1);
9533 return fname;
9534#else /* !WINDOWSNT */
9535 if (! NILP (Vfile_name_coding_system))
9536 return code_convert_string_norecord (fname, Vfile_name_coding_system, 1);
9537 else if (! NILP (Vdefault_file_name_coding_system))
9538 return code_convert_string_norecord (fname,
9539 Vdefault_file_name_coding_system, 1);
9540 else
9541 return fname;
9542#endif
9543}
9470 9544
9471DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string, 9545DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
9472 2, 4, 0, 9546 2, 4, 0,