From d3411f89d34bd1009cae738f917abf477be09882 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Mon, 18 Jul 2011 23:07:07 -0700 Subject: Use ptrdiff_t for hash table indexes. * category.c (hash_get_category_set): * ccl.c (ccl_driver): * charset.h (struct charset.hash_index, CHECK_CHARSET_GET_ID): * coding.c (coding_system_charset_list, detect_coding_system): * coding.h (struct coding_system.id): * composite.c (get_composition_id, gstring_lookup_cache): * fns.c (hash_lookup, hash_put, Fgethash, Fputhash): * image.c (xpm_get_color_table_h): * lisp.h (hash_lookup, hash_put): * minibuf.c (Ftest_completion): Use ptrdiff_t for hash table indexes, not int (which is too narrow, on 64-bit hosts) or EMACS_INT (which is too wide, on 32-bit --with-wide-int hosts). --- src/coding.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/coding.c') diff --git a/src/coding.c b/src/coding.c index 65c8a767c2b..73a4bbc5e25 100644 --- a/src/coding.c +++ b/src/coding.c @@ -5838,7 +5838,7 @@ coding_charset_list (struct coding_system *coding) Lisp_Object coding_system_charset_list (Lisp_Object coding_system) { - int id; + ptrdiff_t id; Lisp_Object attrs, charset_list; CHECK_CODING_SYSTEM_GET_ID (coding_system, id); @@ -8076,7 +8076,7 @@ detect_coding_system (const unsigned char *src, Lisp_Object attrs, eol_type; Lisp_Object val = Qnil; struct coding_system coding; - int id; + ptrdiff_t id; struct coding_detection_info detect_info; enum coding_category base_category; int null_byte_found = 0, eight_bit_found = 0; -- cgit v1.2.1 From 5d009b3a6a39627db04094e8164df6bb6231b991 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Thu, 28 Jul 2011 13:31:29 -0700 Subject: * coding.c: Integer and memory overflow fixes. (produce_chars): Redo buffer-overflow calculations to avoid unnecessary integer overflow. Check for size overflow. (encode_coding_object): Don't update size until xmalloc succeeds. --- src/coding.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'src/coding.c') diff --git a/src/coding.c b/src/coding.c index 73a4bbc5e25..5fd59d394d9 100644 --- a/src/coding.c +++ b/src/coding.c @@ -6683,8 +6683,12 @@ produce_chars (struct coding_system *coding, Lisp_Object translation_table, break; } - if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end) + if ((dst_end - dst) / MAX_MULTIBYTE_LENGTH < to_nchars) { + if (((min (PTRDIFF_MAX, SIZE_MAX) - (buf_end - buf)) + / MAX_MULTIBYTE_LENGTH) + < to_nchars) + memory_full (SIZE_MAX); dst = alloc_destination (coding, buf_end - buf + MAX_MULTIBYTE_LENGTH * to_nchars, @@ -7888,11 +7892,10 @@ encode_coding_object (struct coding_system *coding, } else if (EQ (dst_object, Qt)) { + ptrdiff_t dst_bytes = max (1, coding->src_chars); coding->dst_object = Qnil; - coding->dst_bytes = coding->src_chars; - if (coding->dst_bytes == 0) - coding->dst_bytes = 1; - coding->destination = (unsigned char *) xmalloc (coding->dst_bytes); + coding->destination = (unsigned char *) xmalloc (dst_bytes); + coding->dst_bytes = dst_bytes; coding->dst_multibyte = 0; } else -- cgit v1.2.1 From a0241d014e3d762bc7818d6cd8fd7c90bab3c538 Mon Sep 17 00:00:00 2001 From: Kenichi Handa Date: Mon, 7 Nov 2011 10:57:07 +0900 Subject: Set members of the struct coding_system before accessing them (bug#9910,9911,9912). --- src/coding.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/coding.c') diff --git a/src/coding.c b/src/coding.c index 5fd59d394d9..79908e9b29b 100644 --- a/src/coding.c +++ b/src/coding.c @@ -1043,7 +1043,7 @@ coding_set_destination (struct coding_system *coding) { if (BUFFERP (coding->dst_object)) { - if (coding->src_pos < 0) + if (BUFFERP (coding->src_object) && coding->src_pos < 0) { coding->destination = BEG_ADDR + coding->dst_pos_byte - BEG_BYTE; coding->dst_bytes = (GAP_END_ADDR -- cgit v1.2.1 From 6d5eb5b0d2e50b0dd153a988cc52492cb77fc333 Mon Sep 17 00:00:00 2001 From: Stefan Monnier Date: Sun, 4 Dec 2011 10:46:07 -0500 Subject: Don't macro-inline non-performance-critical code. * src/eval.c (process_quit_flag): New function. * src/lisp.h (QUIT): Use it. --- src/coding.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/coding.c') diff --git a/src/coding.c b/src/coding.c index 79908e9b29b..f3506da7358 100644 --- a/src/coding.c +++ b/src/coding.c @@ -9208,7 +9208,7 @@ frame's terminal device. */) = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1)); Lisp_Object coding_system = CODING_ID_NAME (terminal_coding->id); - /* For backward compatibility, return nil if it is `undecided'. */ + /* For backward compatibility, return nil if it is `undecided'. */ return (! EQ (coding_system, Qundecided) ? coding_system : Qnil); } -- cgit v1.2.1 From 5eb05ea3f926fcf31163e87d19a94a1674449196 Mon Sep 17 00:00:00 2001 From: Kenichi Handa Date: Mon, 5 Dec 2011 15:39:26 +0900 Subject: Pay attetion to the buffer relocation on encoding (Bug#9318). --- src/coding.c | 172 +++++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 132 insertions(+), 40 deletions(-) (limited to 'src/coding.c') diff --git a/src/coding.c b/src/coding.c index 79908e9b29b..99a766a915a 100644 --- a/src/coding.c +++ b/src/coding.c @@ -847,16 +847,16 @@ static int encode_coding_ccl (struct coding_system *); static void decode_coding_raw_text (struct coding_system *); static int encode_coding_raw_text (struct coding_system *); -static void coding_set_source (struct coding_system *); -static void coding_set_destination (struct coding_system *); +static EMACS_INT coding_set_source (struct coding_system *); +static EMACS_INT coding_set_destination (struct coding_system *); static void coding_alloc_by_realloc (struct coding_system *, EMACS_INT); static void coding_alloc_by_making_gap (struct coding_system *, EMACS_INT, EMACS_INT); static unsigned char *alloc_destination (struct coding_system *, EMACS_INT, unsigned char *); static void setup_iso_safe_charsets (Lisp_Object); -static unsigned char *encode_designation_at_bol (struct coding_system *, - int *, unsigned char *); +static int encode_designation_at_bol (struct coding_system *, + int *, int *, unsigned char *); static int detect_eol (const unsigned char *, EMACS_INT, enum coding_category); static Lisp_Object adjust_coding_eol_type (struct coding_system *, int); @@ -915,27 +915,68 @@ record_conversion_result (struct coding_system *coding, } } -/* This wrapper macro is used to preserve validity of pointers into - buffer text across calls to decode_char, which could cause - relocation of buffers if it loads a charset map, because loading a - charset map allocates large structures. */ +/* These wrapper macros are used to preserve validity of pointers into + buffer text across calls to decode_char, encode_char, etc, which + could cause relocation of buffers if it loads a charset map, + because loading a charset map allocates large structures. */ + #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \ do { \ + EMACS_INT offset; \ + \ charset_map_loaded = 0; \ c = DECODE_CHAR (charset, code); \ - if (charset_map_loaded) \ + if (charset_map_loaded \ + && (offset = coding_set_source (coding))) \ { \ - const unsigned char *orig = coding->source; \ - EMACS_INT offset; \ - \ - coding_set_source (coding); \ - offset = coding->source - orig; \ src += offset; \ src_base += offset; \ src_end += offset; \ } \ } while (0) +#define CODING_ENCODE_CHAR(coding, dst, dst_end, charset, c, code) \ + do { \ + EMACS_INT offset; \ + \ + charset_map_loaded = 0; \ + code = ENCODE_CHAR (charset, c); \ + if (charset_map_loaded \ + && (offset = coding_set_destination (coding))) \ + { \ + dst += offset; \ + dst_end += offset; \ + } \ + } while (0) + +#define CODING_CHAR_CHARSET(coding, dst, dst_end, c, charset_list, code_return, charset) \ + do { \ + EMACS_INT offset; \ + \ + charset_map_loaded = 0; \ + charset = char_charset (c, charset_list, code_return); \ + if (charset_map_loaded \ + && (offset = coding_set_destination (coding))) \ + { \ + dst += offset; \ + dst_end += offset; \ + } \ + } while (0) + +#define CODING_CHAR_CHARSET_P(coding, dst, dst_end, c, charset, result) \ + do { \ + EMACS_INT offset; \ + \ + charset_map_loaded = 0; \ + result = CHAR_CHARSET_P (c, charset); \ + if (charset_map_loaded \ + && (offset = coding_set_destination (coding))) \ + { \ + dst += offset; \ + dst_end += offset; \ + } \ + } while (0) + /* If there are at least BYTES length of room at dst, allocate memory for coding->destination and update dst and dst_end. We don't have @@ -1015,9 +1056,14 @@ record_conversion_result (struct coding_system *coding, | ((p)[-1] & 0x3F)))) -static void +/* Update coding->source from coding->src_object, and return how many + bytes coding->source was changed. */ + +static EMACS_INT coding_set_source (struct coding_system *coding) { + const unsigned char *orig = coding->source; + if (BUFFERP (coding->src_object)) { struct buffer *buf = XBUFFER (coding->src_object); @@ -1036,11 +1082,18 @@ coding_set_source (struct coding_system *coding) /* Otherwise, the source is C string and is never relocated automatically. Thus we don't have to update anything. */ } + return coding->source - orig; } -static void + +/* Update coding->destination from coding->dst_object, and return how + many bytes coding->destination was changed. */ + +static EMACS_INT coding_set_destination (struct coding_system *coding) { + const unsigned char *orig = coding->destination; + if (BUFFERP (coding->dst_object)) { if (BUFFERP (coding->src_object) && coding->src_pos < 0) @@ -1065,6 +1118,7 @@ coding_set_destination (struct coding_system *coding) /* Otherwise, the destination is C string and is never relocated automatically. Thus we don't have to update anything. */ } + return coding->destination - orig; } @@ -2650,14 +2704,19 @@ encode_coding_emacs_mule (struct coding_system *coding) if (preferred_charset_id >= 0) { + int result; + charset = CHARSET_FROM_ID (preferred_charset_id); - if (CHAR_CHARSET_P (c, charset)) + CODING_CHAR_CHARSET_P (coding, dst, dst_end, c, charset, result); + if (result) code = ENCODE_CHAR (charset, c); else - charset = char_charset (c, charset_list, &code); + CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list, + &code, charset); } else - charset = char_charset (c, charset_list, &code); + CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list, + &code, charset); if (! charset) { c = coding->default_char; @@ -2666,7 +2725,8 @@ encode_coding_emacs_mule (struct coding_system *coding) EMIT_ONE_ASCII_BYTE (c); continue; } - charset = char_charset (c, charset_list, &code); + CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list, + &code, charset); } dimension = CHARSET_DIMENSION (charset); emacs_mule_id = CHARSET_EMACS_MULE_ID (charset); @@ -4185,7 +4245,8 @@ decode_coding_iso_2022 (struct coding_system *coding) #define ENCODE_ISO_CHARACTER(charset, c) \ do { \ - int code = ENCODE_CHAR ((charset), (c)); \ + int code; \ + CODING_ENCODE_CHAR (coding, dst, dst_end, (charset), (c), code); \ \ if (CHARSET_DIMENSION (charset) == 1) \ ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \ @@ -4283,15 +4344,19 @@ encode_invocation_designation (struct charset *charset, /* Produce designation sequences of charsets in the line started from - SRC to a place pointed by DST, and return updated DST. + CHARBUF to a place pointed by DST, and return the number of + produced bytes. DST should not directly point a buffer text area + which may be relocated by char_charset call. If the current block ends before any end-of-line, we may fail to find all the necessary designations. */ -static unsigned char * -encode_designation_at_bol (struct coding_system *coding, int *charbuf, +static int +encode_designation_at_bol (struct coding_system *coding, + int *charbuf, int *charbuf_end, unsigned char *dst) { + unsigned char *orig; struct charset *charset; /* Table of charsets to be designated to each graphic register. */ int r[4]; @@ -4309,7 +4374,7 @@ encode_designation_at_bol (struct coding_system *coding, int *charbuf, for (reg = 0; reg < 4; reg++) r[reg] = -1; - while (found < 4) + while (charbuf < charbuf_end && found < 4) { int id; @@ -4334,7 +4399,7 @@ encode_designation_at_bol (struct coding_system *coding, int *charbuf, ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding); } - return dst; + return dst - orig; } /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */ @@ -4378,13 +4443,26 @@ encode_coding_iso_2022 (struct coding_system *coding) if (bol_designation) { - unsigned char *dst_prev = dst; - /* We have to produce designation sequences if any now. */ - dst = encode_designation_at_bol (coding, charbuf, dst); - bol_designation = 0; + unsigned char desig_buf[16]; + int nbytes; + EMACS_INT offset; + + charset_map_loaded = 0; + nbytes = encode_designation_at_bol (coding, charbuf, charbuf_end, + desig_buf); + if (charset_map_loaded + && (offset = coding_set_destination (coding))) + { + dst += offset; + dst_end += offset; + } + memcpy (dst, desig_buf, nbytes); + dst += nbytes; /* We are sure that designation sequences are all ASCII bytes. */ - produced_chars += dst - dst_prev; + produced_chars += nbytes; + bol_designation = 0; + ASSURE_DESTINATION (safe_room); } c = *charbuf++; @@ -4455,12 +4533,17 @@ encode_coding_iso_2022 (struct coding_system *coding) if (preferred_charset_id >= 0) { + int result; + charset = CHARSET_FROM_ID (preferred_charset_id); - if (! CHAR_CHARSET_P (c, charset)) - charset = char_charset (c, charset_list, NULL); + CODING_CHAR_CHARSET_P (coding, dst, dst_end, c, charset, result); + if (! result) + CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list, + NULL, charset); } else - charset = char_charset (c, charset_list, NULL); + CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list, + NULL, charset); if (!charset) { if (coding->mode & CODING_MODE_SAFE_ENCODING) @@ -4471,7 +4554,8 @@ encode_coding_iso_2022 (struct coding_system *coding) else { c = coding->default_char; - charset = char_charset (c, charset_list, NULL); + CODING_CHAR_CHARSET (coding, dst, dst_end, c, + charset_list, NULL, charset); } } ENCODE_ISO_CHARACTER (charset, c); @@ -4897,7 +4981,9 @@ encode_coding_sjis (struct coding_system *coding) else { unsigned code; - struct charset *charset = char_charset (c, charset_list, &code); + struct charset *charset; + CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list, + &code, charset); if (!charset) { @@ -4909,7 +4995,8 @@ encode_coding_sjis (struct coding_system *coding) else { c = coding->default_char; - charset = char_charset (c, charset_list, &code); + CODING_CHAR_CHARSET (coding, dst, dst_end, c, + charset_list, &code, charset); } } if (code == CHARSET_INVALID_CODE (charset)) @@ -4984,7 +5071,9 @@ encode_coding_big5 (struct coding_system *coding) else { unsigned code; - struct charset *charset = char_charset (c, charset_list, &code); + struct charset *charset; + CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list, + &code, charset); if (! charset) { @@ -4996,7 +5085,8 @@ encode_coding_big5 (struct coding_system *coding) else { c = coding->default_char; - charset = char_charset (c, charset_list, &code); + CODING_CHAR_CHARSET (coding, dst, dst_end, c, + charset_list, &code, charset); } } if (code == CHARSET_INVALID_CODE (charset)) @@ -5572,7 +5662,9 @@ encode_coding_charset (struct coding_system *coding) } else { - charset = char_charset (c, charset_list, &code); + CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list, + &code, charset); + if (charset) { if (CHARSET_DIMENSION (charset) == 1) -- cgit v1.2.1 From 75a3b399a3851210b3384a49587477af165f67e3 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Mon, 5 Dec 2011 01:05:10 -0800 Subject: * coding.c (encode_designation_at_bol): Don't use uninitialized local variable (Bug#9318). --- src/coding.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/coding.c') diff --git a/src/coding.c b/src/coding.c index b1743076630..537f69ebe1f 100644 --- a/src/coding.c +++ b/src/coding.c @@ -4356,7 +4356,7 @@ encode_designation_at_bol (struct coding_system *coding, int *charbuf, int *charbuf_end, unsigned char *dst) { - unsigned char *orig; + unsigned char *orig = dst; struct charset *charset; /* Table of charsets to be designated to each graphic register. */ int r[4]; -- cgit v1.2.1 From 76470ad1a01b2d38137fe5fb9e2a18446e7d4536 Mon Sep 17 00:00:00 2001 From: Kenichi Handa Date: Thu, 8 Dec 2011 14:54:20 +0900 Subject: coding.c (encode_coding_ccl): Check (charbuf < charbuf_end) after the loop to call ccl_driver at least once. --- src/coding.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/coding.c') diff --git a/src/coding.c b/src/coding.c index 537f69ebe1f..50268099413 100644 --- a/src/coding.c +++ b/src/coding.c @@ -5244,7 +5244,7 @@ encode_coding_ccl (struct coding_system *coding) && coding->mode & CODING_MODE_LAST_BLOCK) ccl->last_block = 1; - while (charbuf < charbuf_end) + do { ccl_driver (ccl, charbuf, destination_charbuf, charbuf_end - charbuf, 1024, charset_list); @@ -5266,6 +5266,7 @@ encode_coding_ccl (struct coding_system *coding) || ccl->status == CCL_STAT_INVALID_CMD) break; } + while (charbuf < charbuf_end); switch (ccl->status) { -- cgit v1.2.1 From 3633e3aa59f8df4e12f1b509c952e17dbf205819 Mon Sep 17 00:00:00 2001 From: Kenichi Handa Date: Sun, 11 Dec 2011 21:08:51 +0900 Subject: coding.c (Funencodable_char_position): Pay attention to the buffer text relocation (Bug#9389). --- src/coding.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'src/coding.c') diff --git a/src/coding.c b/src/coding.c index 50268099413..e15d725af3a 100644 --- a/src/coding.c +++ b/src/coding.c @@ -8756,6 +8756,7 @@ to the string. */) } positions = Qnil; + charset_map_loaded = 0; while (1) { int c; @@ -8783,6 +8784,16 @@ to the string. */) } from++; + if (charset_map_loaded && NILP (string)) + { + p = CHAR_POS_ADDR (from); + pend = CHAR_POS_ADDR (to); + if (from < GPT && to >= GPT) + stop = GPT_ADDR; + else + stop = pend; + charset_map_loaded = 0; + } } return (NILP (count) ? Fcar (positions) : Fnreverse (positions)); -- cgit v1.2.1 From 0e5317f7e42c1008c40305a94afcdff509a475be Mon Sep 17 00:00:00 2001 From: Kenichi Handa Date: Wed, 28 Dec 2011 16:55:49 +0900 Subject: coding.c: (Fdefine_coding_system_internal): Make an utf-8 base coding-system ASCII compatible only when it does not produce BOM on encoding (Bug#10383). --- src/coding.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/coding.c') diff --git a/src/coding.c b/src/coding.c index e15d725af3a..2db5385c932 100644 --- a/src/coding.c +++ b/src/coding.c @@ -9999,8 +9999,6 @@ usage: (define-coding-system-internal ...) */) { Lisp_Object bom; - CODING_ATTR_ASCII_COMPAT (attrs) = Qt; - if (nargs < coding_arg_utf8_max) goto short_args; @@ -10014,6 +10012,8 @@ usage: (define-coding-system-internal ...) */) CHECK_CODING_SYSTEM (val); } ASET (attrs, coding_attr_utf_bom, bom); + if (NILP (bom)) + CODING_ATTR_ASCII_COMPAT (attrs) = Qt; category = (CONSP (bom) ? coding_category_utf_8_auto : NILP (bom) ? coding_category_utf_8_nosig -- cgit v1.2.1 From acaf905b1130aae80fa59d2c861ffd4c8eb75486 Mon Sep 17 00:00:00 2001 From: Glenn Morris Date: Thu, 5 Jan 2012 01:46:05 -0800 Subject: Add 2012 to FSF copyright years for Emacs files --- src/coding.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/coding.c') diff --git a/src/coding.c b/src/coding.c index 2db5385c932..5c3048f95e8 100644 --- a/src/coding.c +++ b/src/coding.c @@ -1,5 +1,5 @@ /* Coding system handler (conversion, detection, etc). - Copyright (C) 2001-2011 Free Software Foundation, Inc. + Copyright (C) 2001-2012 Free Software Foundation, Inc. Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 National Institute of Advanced Industrial Science and Technology (AIST) -- cgit v1.2.1 From 49f70d46ea38ceb7a501594db7f6ea35e19681aa Mon Sep 17 00:00:00 2001 From: Glenn Morris Date: Tue, 10 Jan 2012 23:52:35 -0800 Subject: Add 2012 to FSF copyright years for Emacs files (do not merge to trunk) --- src/coding.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/coding.c') diff --git a/src/coding.c b/src/coding.c index 555e6623383..fbb028f658c 100644 --- a/src/coding.c +++ b/src/coding.c @@ -1,6 +1,6 @@ /* Coding system handler (conversion, detection, etc). Copyright (C) 2001, 2002, 2003, 2004, 2005, - 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. + 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 National Institute of Advanced Industrial Science and Technology (AIST) -- cgit v1.2.1 From a32a7dc7214bf5f618d50d0143fe5f8159445d2d Mon Sep 17 00:00:00 2001 From: Kenichi Handa Date: Thu, 19 Jan 2012 22:19:21 +0800 Subject: Pay attention to buffer relocation on encoding (Bug#9318; backport from trunk). Backport of 2011-12-05T06:39:26Z!handa@m17n.org from trunk. --- src/coding.c | 171 +++++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 131 insertions(+), 40 deletions(-) (limited to 'src/coding.c') diff --git a/src/coding.c b/src/coding.c index fbb028f658c..9a2c1f9c3f2 100644 --- a/src/coding.c +++ b/src/coding.c @@ -936,17 +936,16 @@ static int encode_coding_ccl P_ ((struct coding_system *)); static void decode_coding_raw_text P_ ((struct coding_system *)); static int encode_coding_raw_text P_ ((struct coding_system *)); -static void coding_set_source P_ ((struct coding_system *)); -static void coding_set_destination P_ ((struct coding_system *)); +static EMACS_INT coding_set_source P_ ((struct coding_system *)); +static EMACS_INT coding_set_destination P_ ((struct coding_system *)); static void coding_alloc_by_realloc P_ ((struct coding_system *, EMACS_INT)); static void coding_alloc_by_making_gap P_ ((struct coding_system *, EMACS_INT, EMACS_INT)); static unsigned char *alloc_destination P_ ((struct coding_system *, EMACS_INT, unsigned char *)); static void setup_iso_safe_charsets P_ ((Lisp_Object)); -static unsigned char *encode_designation_at_bol P_ ((struct coding_system *, - int *, int *, - unsigned char *)); +static int encode_designation_at_bol P_ ((struct coding_system *, + int *, int *, unsigned char *)); static int detect_eol P_ ((const unsigned char *, EMACS_INT, enum coding_category)); static Lisp_Object adjust_coding_eol_type P_ ((struct coding_system *, int)); @@ -1005,27 +1004,68 @@ record_conversion_result (struct coding_system *coding, } } -/* This wrapper macro is used to preserve validity of pointers into - buffer text across calls to decode_char, which could cause - relocation of buffers if it loads a charset map, because loading a - charset map allocates large structures. */ +/* These wrapper macros are used to preserve validity of pointers into + buffer text across calls to decode_char, encode_char, etc, which + could cause relocation of buffers if it loads a charset map, + because loading a charset map allocates large structures. */ + #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \ do { \ + EMACS_INT offset; \ + \ charset_map_loaded = 0; \ c = DECODE_CHAR (charset, code); \ - if (charset_map_loaded) \ + if (charset_map_loaded \ + && (offset = coding_set_source (coding))) \ { \ - const unsigned char *orig = coding->source; \ - EMACS_INT offset; \ - \ - coding_set_source (coding); \ - offset = coding->source - orig; \ src += offset; \ src_base += offset; \ src_end += offset; \ } \ } while (0) +#define CODING_ENCODE_CHAR(coding, dst, dst_end, charset, c, code) \ + do { \ + EMACS_INT offset; \ + \ + charset_map_loaded = 0; \ + code = ENCODE_CHAR (charset, c); \ + if (charset_map_loaded \ + && (offset = coding_set_destination (coding))) \ + { \ + dst += offset; \ + dst_end += offset; \ + } \ + } while (0) + +#define CODING_CHAR_CHARSET(coding, dst, dst_end, c, charset_list, code_return, charset) \ + do { \ + EMACS_INT offset; \ + \ + charset_map_loaded = 0; \ + charset = char_charset (c, charset_list, code_return); \ + if (charset_map_loaded \ + && (offset = coding_set_destination (coding))) \ + { \ + dst += offset; \ + dst_end += offset; \ + } \ + } while (0) + +#define CODING_CHAR_CHARSET_P(coding, dst, dst_end, c, charset, result) \ + do { \ + EMACS_INT offset; \ + \ + charset_map_loaded = 0; \ + result = CHAR_CHARSET_P (c, charset); \ + if (charset_map_loaded \ + && (offset = coding_set_destination (coding))) \ + { \ + dst += offset; \ + dst_end += offset; \ + } \ + } while (0) + /* If there are at least BYTES length of room at dst, allocate memory for coding->destination and update dst and dst_end. We don't have @@ -1105,10 +1145,15 @@ record_conversion_result (struct coding_system *coding, | ((p)[-1] & 0x3F)))) -static void +/* Update coding->source from coding->src_object, and return how many + bytes coding->source was changed. */ + +static EMACS_INT coding_set_source (coding) struct coding_system *coding; { + const unsigned char *orig = coding->source; + if (BUFFERP (coding->src_object)) { struct buffer *buf = XBUFFER (coding->src_object); @@ -1126,12 +1171,19 @@ coding_set_source (coding) /* Otherwise, the source is C string and is never relocated automatically. Thus we don't have to update anything. */ ; + + return coding->source - orig; } -static void +/* Update coding->destination from coding->dst_object, and return how + many bytes coding->destination was changed. */ + +static EMACS_INT coding_set_destination (coding) struct coding_system *coding; { + const unsigned char *orig = coding->destination; + if (BUFFERP (coding->dst_object)) { if (coding->src_pos < 0) @@ -1155,6 +1207,8 @@ coding_set_destination (coding) /* Otherwise, the destination is C string and is never relocated automatically. Thus we don't have to update anything. */ ; + + return coding->destination - orig; } @@ -2778,14 +2832,19 @@ encode_coding_emacs_mule (coding) if (preferred_charset_id >= 0) { + int result; + charset = CHARSET_FROM_ID (preferred_charset_id); - if (CHAR_CHARSET_P (c, charset)) + CODING_CHAR_CHARSET_P (coding, dst, dst_end, c, charset, result); + if (result) code = ENCODE_CHAR (charset, c); else - charset = char_charset (c, charset_list, &code); + CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list, + &code, charset); } else - charset = char_charset (c, charset_list, &code); + CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list, + &code, charset); if (! charset) { c = coding->default_char; @@ -2794,7 +2853,8 @@ encode_coding_emacs_mule (coding) EMIT_ONE_ASCII_BYTE (c); continue; } - charset = char_charset (c, charset_list, &code); + CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list, + &code, charset); } dimension = CHARSET_DIMENSION (charset); emacs_mule_id = CHARSET_EMACS_MULE_ID (charset); @@ -4317,7 +4377,8 @@ decode_coding_iso_2022 (coding) #define ENCODE_ISO_CHARACTER(charset, c) \ do { \ - int code = ENCODE_CHAR ((charset),(c)); \ + int code; \ + CODING_ENCODE_CHAR (coding, dst, dst_end, (charset), (c), code); \ \ if (CHARSET_DIMENSION (charset) == 1) \ ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \ @@ -4441,17 +4502,20 @@ encode_invocation_designation (charset, coding, dst, p_nchars) /* Produce designation sequences of charsets in the line started from - SRC to a place pointed by DST, and return updated DST. + CHARBUF to a place pointed by DST, and return the number of + produced bytes. DST should not directly point a buffer text area + which may be relocated by char_charset call. If the current block ends before any end-of-line, we may fail to find all the necessary designations. */ -static unsigned char * +static int encode_designation_at_bol (coding, charbuf, charbuf_end, dst) struct coding_system *coding; int *charbuf, *charbuf_end; unsigned char *dst; { + unsigned char *orig; struct charset *charset; /* Table of charsets to be designated to each graphic register. */ int r[4]; @@ -4469,7 +4533,7 @@ encode_designation_at_bol (coding, charbuf, charbuf_end, dst) for (reg = 0; reg < 4; reg++) r[reg] = -1; - while (found < 4) + while (charbuf < charbuf_end && found < 4) { int id; @@ -4494,7 +4558,7 @@ encode_designation_at_bol (coding, charbuf, charbuf_end, dst) ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding); } - return dst; + return dst - orig; } /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */ @@ -4539,13 +4603,26 @@ encode_coding_iso_2022 (coding) if (bol_designation) { - unsigned char *dst_prev = dst; - /* We have to produce designation sequences if any now. */ - dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst); - bol_designation = 0; + unsigned char desig_buf[16]; + int nbytes; + EMACS_INT offset; + + charset_map_loaded = 0; + nbytes = encode_designation_at_bol (coding, charbuf, charbuf_end, + desig_buf); + if (charset_map_loaded + && (offset = coding_set_destination (coding))) + { + dst += offset; + dst_end += offset; + } + memcpy (dst, desig_buf, nbytes); + dst += nbytes; /* We are sure that designation sequences are all ASCII bytes. */ - produced_chars += dst - dst_prev; + produced_chars += nbytes; + bol_designation = 0; + ASSURE_DESTINATION (safe_room); } c = *charbuf++; @@ -4616,12 +4693,17 @@ encode_coding_iso_2022 (coding) if (preferred_charset_id >= 0) { + int result; + charset = CHARSET_FROM_ID (preferred_charset_id); - if (! CHAR_CHARSET_P (c, charset)) - charset = char_charset (c, charset_list, NULL); + CODING_CHAR_CHARSET_P (coding, dst, dst_end, c, charset, result); + if (! result) + CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list, + NULL, charset); } else - charset = char_charset (c, charset_list, NULL); + CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list, + NULL, charset); if (!charset) { if (coding->mode & CODING_MODE_SAFE_ENCODING) @@ -4632,7 +4714,8 @@ encode_coding_iso_2022 (coding) else { c = coding->default_char; - charset = char_charset (c, charset_list, NULL); + CODING_CHAR_CHARSET (coding, dst, dst_end, c, + charset_list, NULL, charset); } } ENCODE_ISO_CHARACTER (charset, c); @@ -5064,7 +5147,9 @@ encode_coding_sjis (coding) else { unsigned code; - struct charset *charset = char_charset (c, charset_list, &code); + struct charset *charset; + CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list, + &code, charset); if (!charset) { @@ -5076,7 +5161,8 @@ encode_coding_sjis (coding) else { c = coding->default_char; - charset = char_charset (c, charset_list, &code); + CODING_CHAR_CHARSET (coding, dst, dst_end, c, + charset_list, &code, charset); } } if (code == CHARSET_INVALID_CODE (charset)) @@ -5153,7 +5239,9 @@ encode_coding_big5 (coding) else { unsigned code; - struct charset *charset = char_charset (c, charset_list, &code); + struct charset *charset; + CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list, + &code, charset); if (! charset) { @@ -5165,7 +5253,8 @@ encode_coding_big5 (coding) else { c = coding->default_char; - charset = char_charset (c, charset_list, &code); + CODING_CHAR_CHARSET (coding, dst, dst_end, c, + charset_list, &code, charset); } } if (code == CHARSET_INVALID_CODE (charset)) @@ -5747,7 +5836,9 @@ encode_coding_charset (coding) } else { - charset = char_charset (c, charset_list, &code); + CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list, + &code, charset); + if (charset) { if (CHARSET_DIMENSION (charset) == 1) -- cgit v1.2.1 From 76774ec8c5284dd327c13e468293a99c04103720 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Thu, 19 Jan 2012 22:35:41 +0800 Subject: Fix usage of unitialized local var (backport from trunk) * src/coding.c (encode_designation_at_bol): Don't use uninitialized local variable (Bug#9318). --- src/coding.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/coding.c') diff --git a/src/coding.c b/src/coding.c index 9a2c1f9c3f2..898bfd71f43 100644 --- a/src/coding.c +++ b/src/coding.c @@ -4515,7 +4515,7 @@ encode_designation_at_bol (coding, charbuf, charbuf_end, dst) int *charbuf, *charbuf_end; unsigned char *dst; { - unsigned char *orig; + unsigned char *orig = dst; struct charset *charset; /* Table of charsets to be designated to each graphic register. */ int r[4]; -- cgit v1.2.1 From 5f53d2441abf6eafe8e14f29d73e14afe8bec35f Mon Sep 17 00:00:00 2001 From: HIROSHI OOTA Date: Wed, 25 Jan 2012 13:35:05 +0800 Subject: * src/coding.c (encode_designation_at_bol): Change return value to EMACS_INT. --- src/coding.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/coding.c') diff --git a/src/coding.c b/src/coding.c index 898bfd71f43..8e96db1f521 100644 --- a/src/coding.c +++ b/src/coding.c @@ -944,7 +944,7 @@ static void coding_alloc_by_making_gap P_ ((struct coding_system *, static unsigned char *alloc_destination P_ ((struct coding_system *, EMACS_INT, unsigned char *)); static void setup_iso_safe_charsets P_ ((Lisp_Object)); -static int encode_designation_at_bol P_ ((struct coding_system *, +static EMACS_INT encode_designation_at_bol P_ ((struct coding_system *, int *, int *, unsigned char *)); static int detect_eol P_ ((const unsigned char *, EMACS_INT, enum coding_category)); @@ -4509,7 +4509,7 @@ encode_invocation_designation (charset, coding, dst, p_nchars) If the current block ends before any end-of-line, we may fail to find all the necessary designations. */ -static int +static EMACS_INT encode_designation_at_bol (coding, charbuf, charbuf_end, dst) struct coding_system *coding; int *charbuf, *charbuf_end; -- cgit v1.2.1 From 14af5f7fc4d7557ee712d3b6a8b46d9034c2ff39 Mon Sep 17 00:00:00 2001 From: Chong Yidong Date: Wed, 25 Jan 2012 13:55:01 +0800 Subject: Merge from emacs-23 branch --- src/coding.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/coding.c') diff --git a/src/coding.c b/src/coding.c index 5c3048f95e8..15e8572edb3 100644 --- a/src/coding.c +++ b/src/coding.c @@ -855,7 +855,7 @@ static void coding_alloc_by_making_gap (struct coding_system *, static unsigned char *alloc_destination (struct coding_system *, EMACS_INT, unsigned char *); static void setup_iso_safe_charsets (Lisp_Object); -static int encode_designation_at_bol (struct coding_system *, +static EMACS_INT encode_designation_at_bol (struct coding_system *, int *, int *, unsigned char *); static int detect_eol (const unsigned char *, EMACS_INT, enum coding_category); @@ -4351,7 +4351,7 @@ encode_invocation_designation (struct charset *charset, If the current block ends before any end-of-line, we may fail to find all the necessary designations. */ -static int +static EMACS_INT encode_designation_at_bol (struct coding_system *coding, int *charbuf, int *charbuf_end, unsigned char *dst) -- cgit v1.2.1