From a2f07cd28277fbcef42541509e3710a9863cac3a Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Wed, 19 Dec 2012 19:52:48 +0400 Subject: * coding.c (Fdetect_coding_region): Do not check start and end with CHECK_NUMBER_COERCE_MARKER since validate_region does that itself. (code_convert_region): Likewise. --- src/coding.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'src/coding.c') diff --git a/src/coding.c b/src/coding.c index 56202e4861d..e42461f52a0 100644 --- a/src/coding.c +++ b/src/coding.c @@ -8426,9 +8426,6 @@ highest priority. */) ptrdiff_t from, to; ptrdiff_t from_byte, to_byte; - CHECK_NUMBER_COERCE_MARKER (start); - CHECK_NUMBER_COERCE_MARKER (end); - validate_region (&start, &end); from = XINT (start), to = XINT (end); from_byte = CHAR_TO_BYTE (from); @@ -8872,8 +8869,6 @@ code_convert_region (Lisp_Object start, Lisp_Object end, ptrdiff_t from, from_byte, to, to_byte; Lisp_Object src_object; - CHECK_NUMBER_COERCE_MARKER (start); - CHECK_NUMBER_COERCE_MARKER (end); if (NILP (coding_system)) coding_system = Qno_conversion; else -- cgit v1.2.1 From 4b298d5a3e0d5fb75f66c48598e80122669cef8b Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Wed, 26 Dec 2012 13:40:45 +0400 Subject: * keyboard.c (record_asynch_buffer_change): Initialize an event only if it's really needed. * frame.h (enum output_method): Remove output_mac member since it's a leftover from the deleted code. * frame.c (Fframep): Adjust user here ... * terminal.c (Fterminal_live_p): ... and here. * coding.c (Qmac): Now here because it's only used to denote end-of-line encoding type. (syms_of_coding): DEFSYM it. * frame.h (Qmac): Remove duplicated declaration. --- src/coding.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/coding.c') diff --git a/src/coding.c b/src/coding.c index e42461f52a0..2c1139ec5fc 100644 --- a/src/coding.c +++ b/src/coding.c @@ -301,7 +301,7 @@ Lisp_Object Vcoding_system_hash_table; static Lisp_Object Qcoding_system, Qeol_type; static Lisp_Object Qcoding_aliases; -Lisp_Object Qunix, Qdos; +Lisp_Object Qunix, Qdos, Qmac; Lisp_Object Qbuffer_file_coding_system; static Lisp_Object Qpost_read_conversion, Qpre_write_conversion; static Lisp_Object Qdefault_char; @@ -10303,6 +10303,7 @@ syms_of_coding (void) DEFSYM (Qeol_type, "eol-type"); DEFSYM (Qunix, "unix"); DEFSYM (Qdos, "dos"); + DEFSYM (Qmac, "mac"); DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system"); DEFSYM (Qpost_read_conversion, "post-read-conversion"); -- cgit v1.2.1 From 84cc1ab62539eed7869a88003a017330d79e8cac Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Sun, 30 Dec 2012 13:34:39 -0800 Subject: * coding.c (Qmac): Now static. --- src/coding.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/coding.c') diff --git a/src/coding.c b/src/coding.c index 2c1139ec5fc..044583d26b6 100644 --- a/src/coding.c +++ b/src/coding.c @@ -301,7 +301,8 @@ Lisp_Object Vcoding_system_hash_table; static Lisp_Object Qcoding_system, Qeol_type; static Lisp_Object Qcoding_aliases; -Lisp_Object Qunix, Qdos, Qmac; +Lisp_Object Qunix, Qdos; +static Lisp_Object Qmac; Lisp_Object Qbuffer_file_coding_system; static Lisp_Object Qpost_read_conversion, Qpre_write_conversion; static Lisp_Object Qdefault_char; -- cgit v1.2.1 From ab422c4d6899b1442cb6954c1829c1fb656b006c Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Tue, 1 Jan 2013 09:11:05 +0000 Subject: Update copyright notices for 2013. --- src/coding.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/coding.c') diff --git a/src/coding.c b/src/coding.c index 56202e4861d..47d5e138e1c 100644 --- a/src/coding.c +++ b/src/coding.c @@ -1,5 +1,5 @@ /* Coding system handler (conversion, detection, etc). - Copyright (C) 2001-2012 Free Software Foundation, Inc. + Copyright (C) 2001-2013 Free Software Foundation, Inc. Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 National Institute of Advanced Industrial Science and Technology (AIST) -- cgit v1.2.1 From eefd727851555237c7bc205b7ad255c50ba3fff9 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Wed, 9 Jan 2013 17:50:22 +0400 Subject: * lisp.h (make_gap_1): New prototype. * buffer.h (GAP_BYTES_DFL, GAP_BYTES_MIN): New macros for the special gap size values. * editfns.c (Fbuffer_size): Rename from Fbufsize to fit the common naming convention. (syms_of_editfns): Adjust defsubr. Drop commented-out obsolete code. * insdel.c (make_gap_larger): Use GAP_BYTES_DFL. (make_gap_smaller): Use GAP_BYTES_MIN. Adjust comment. (make_gap_1): New function to adjust the gap of any buffer. * coding.c (coding_alloc_by_making_gap): Use it. * buffer.c (compact_buffer): Likewise. Use BUF_Z_BYTE, BUF_GAP_SIZE, GAP_BYTES_DFL and GAP_BYTES_MIN. Adjust comment. --- src/coding.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'src/coding.c') diff --git a/src/coding.c b/src/coding.c index 5285a906823..a9bf9032a69 100644 --- a/src/coding.c +++ b/src/coding.c @@ -1049,14 +1049,7 @@ coding_alloc_by_making_gap (struct coding_system *coding, GPT -= gap_head_used, GPT_BYTE -= gap_head_used; } else - { - Lisp_Object this_buffer; - - this_buffer = Fcurrent_buffer (); - set_buffer_internal (XBUFFER (coding->dst_object)); - make_gap (bytes); - set_buffer_internal (XBUFFER (this_buffer)); - } + make_gap_1 (XBUFFER (coding->dst_object), bytes); } -- cgit v1.2.1 From ba14c607ba7fdadb494b57a9788997059ba510bf Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Sun, 20 Jan 2013 23:59:47 +0100 Subject: Fixes: debbugs:13505 * src/coding.c (detect_coding_iso_2022): Move back mis-reordered code at check_extra_latin label. --- src/coding.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) (limited to 'src/coding.c') diff --git a/src/coding.c b/src/coding.c index 47d5e138e1c..20d5d92f046 100644 --- a/src/coding.c +++ b/src/coding.c @@ -3063,20 +3063,7 @@ detect_coding_iso_2022 (struct coding_system *coding, } if (single_shifting) break; - check_extra_latin: - if (! VECTORP (Vlatin_extra_code_table) - || NILP (AREF (Vlatin_extra_code_table, c))) - { - rejected = CATEGORY_MASK_ISO; - break; - } - if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1]) - & CODING_ISO_FLAG_LATIN_EXTRA) - found |= CATEGORY_MASK_ISO_8_1; - else - rejected |= CATEGORY_MASK_ISO_8_1; - rejected |= CATEGORY_MASK_ISO_8_2; - break; + goto check_extra_latin; default: if (c < 0) @@ -3127,6 +3114,20 @@ detect_coding_iso_2022 (struct coding_system *coding, } break; } + check_extra_latin: + if (! VECTORP (Vlatin_extra_code_table) + || NILP (AREF (Vlatin_extra_code_table, c))) + { + rejected = CATEGORY_MASK_ISO; + break; + } + if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1]) + & CODING_ISO_FLAG_LATIN_EXTRA) + found |= CATEGORY_MASK_ISO_8_1; + else + rejected |= CATEGORY_MASK_ISO_8_1; + rejected |= CATEGORY_MASK_ISO_8_2; + break; } } detect_info->rejected |= CATEGORY_MASK_ISO; -- cgit v1.2.1 From 9a9d91d9c247adefa7137338d7609d81734f888d Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Thu, 7 Feb 2013 20:09:04 +0400 Subject: * coding.c (Fdefine_coding_system_internal): Use AREF where argument is known to be a vector. * fns.c (Flocale_info): Likewise for ASET. * xselect.c (selection_data_to_lisp_data): Likewise for ASET. * w32fns.c (w32_parse_hot_key): Likewise for ASIZE and AREF. --- src/coding.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/coding.c') diff --git a/src/coding.c b/src/coding.c index 93da9db0d36..c7bfe25e0cc 100644 --- a/src/coding.c +++ b/src/coding.c @@ -9783,7 +9783,7 @@ usage: (define-coding-system-internal ...) */) CHECK_VECTOR (initial); for (i = 0; i < 4; i++) { - val = Faref (initial, make_number (i)); + val = AREF (initial, i); if (! NILP (val)) { struct charset *charset; -- cgit v1.2.1 From 25721f5bb5681c22f666a0b4e61d94687d92a671 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Fri, 8 Feb 2013 09:28:52 +0400 Subject: * lisp.h (make_uninit_vector): New function. * alloc.c (Fvector, Fmake_byte_code): * ccl.c (Fregister_ccl_program): * charset.c (Fdefine_charset_internal, define_charset_internal): * coding.c (make_subsidiaries, Fdefine_coding_system_internal): * composite.c (syms_of_composite): * font.c (Fquery_font, Ffont_info, syms_of_font): * fontset.c (FONT_DEF_NEW, Fset_fontset_font): * ftfont.c (ftfont_shape_by_flt): * indent.c (recompute_width_table): * nsselect.m (clean_local_selection_data): * syntax.c (init_syntax_once): * w32unsubscribe.c (uniscribe_shape): * window.c (Fcurrent_window_configuration): * xfaces.c (Fx_family_fonts): * xselect.c (selection_data_to_lisp_data): Use it. --- src/coding.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'src/coding.c') diff --git a/src/coding.c b/src/coding.c index c7bfe25e0cc..b881f162ab9 100644 --- a/src/coding.c +++ b/src/coding.c @@ -9483,7 +9483,7 @@ make_subsidiaries (Lisp_Object base) int i; memcpy (buf, SDATA (SYMBOL_NAME (base)), base_name_len); - subsidiaries = Fmake_vector (make_number (3), Qnil); + subsidiaries = make_uninit_vector (3); for (i = 0; i < 3; i++) { strcpy (buf + base_name_len, suffixes[i]); @@ -9988,7 +9988,8 @@ usage: (define-coding-system-internal ...) */) this_name = AREF (eol_type, i); this_aliases = Fcons (this_name, Qnil); this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac); - this_spec = Fmake_vector (make_number (3), attrs); + this_spec = make_uninit_vector (3); + ASET (this_spec, 0, attrs); ASET (this_spec, 1, this_aliases); ASET (this_spec, 2, this_eol_type); Fputhash (this_name, this_spec, Vcoding_system_hash_table); @@ -10001,7 +10002,8 @@ usage: (define-coding-system-internal ...) */) } } - spec_vec = Fmake_vector (make_number (3), attrs); + spec_vec = make_uninit_vector (3); + ASET (spec_vec, 0, attrs); ASET (spec_vec, 1, aliases); ASET (spec_vec, 2, eol_type); -- cgit v1.2.1 From 65e7ca35a69003788134f8c961f561fe6f7a9720 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Tue, 12 Feb 2013 09:36:54 -0800 Subject: In doc, use standard American English style for e.g., etc., i.e. --- src/coding.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/coding.c') diff --git a/src/coding.c b/src/coding.c index 20d5d92f046..346a8573b70 100644 --- a/src/coding.c +++ b/src/coding.c @@ -10720,7 +10720,7 @@ reading if you suppress escape sequence detection. The other way to read escape sequences in a file without decoding is to explicitly specify some coding system that doesn't use ISO-2022 -escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */); +escape sequence (e.g., `latin-1') on reading by \\[universal-coding-system-argument]. */); inhibit_iso_escape_detection = 0; DEFVAR_BOOL ("inhibit-null-byte-detection", -- cgit v1.2.1 From 1af1a51aada18d88fac7b2ba09231428c6a65d7c Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Wed, 6 Mar 2013 15:26:30 +0400 Subject: Coding system support cleanup and minor refactoring. * coding.h (enum coding_result_code): Remove CODING_RESULT_INCONSISTENT_EOL and CODING_RESULT_INSUFFICIENT_MEM. (toplevel): Remove unused CODING_MODE_INHIBIT_INCONSISTENT_EOL. (CODING_MODE_LAST_BLOCK, CODING_MODE_SELECTIVE_DISPLAY) (CODING_MODE_DIRECTION, CODING_MODE_FIXED_DESTINATION) (CODING_MODE_SAFE_ENCODING): Rearrange bit values. (decode_coding_region, encode_coding_region, decode_coding_string): Remove unused compatibility macros. * coding.c (Qinconsistent_eol, Qinsufficient_memory): Remove. (record_conversion_result): Adjust user. (syms_of_coding): Likewise. (ALLOC_CONVERSION_WORK_AREA): Use SAFE_ALLOCA. (decode_coding, encode_coding): Add USE_SAFE_ALLOCA and SAFE_FREE. (decode_coding_object): Simplify since xrealloc never returns NULL. Add eassert. --- src/coding.c | 45 ++++++++++++--------------------------------- 1 file changed, 12 insertions(+), 33 deletions(-) (limited to 'src/coding.c') diff --git a/src/coding.c b/src/coding.c index 868fb7df0ea..32da72ab626 100644 --- a/src/coding.c +++ b/src/coding.c @@ -322,8 +322,7 @@ Lisp_Object Qcall_process, Qcall_process_region; Lisp_Object Qstart_process, Qopen_network_stream; static Lisp_Object Qtarget_idx; -static Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source; -static Lisp_Object Qinterrupted, Qinsufficient_memory; +static Lisp_Object Qinsufficient_source, Qinvalid_source, Qinterrupted; /* If a symbol has this property, evaluate the value to define the symbol as a coding system. */ @@ -820,18 +819,12 @@ record_conversion_result (struct coding_system *coding, case CODING_RESULT_INSUFFICIENT_SRC: Vlast_code_conversion_error = Qinsufficient_source; break; - case CODING_RESULT_INCONSISTENT_EOL: - Vlast_code_conversion_error = Qinconsistent_eol; - break; case CODING_RESULT_INVALID_SRC: Vlast_code_conversion_error = Qinvalid_source; break; case CODING_RESULT_INTERRUPT: Vlast_code_conversion_error = Qinterrupted; break; - case CODING_RESULT_INSUFFICIENT_MEM: - Vlast_code_conversion_error = Qinsufficient_memory; - break; case CODING_RESULT_INSUFFICIENT_DST: /* Don't record this error in Vlast_code_conversion_error because it happens just temporarily and is resolved when the @@ -6884,22 +6877,8 @@ produce_charset (struct coding_system *coding, int *charbuf, ptrdiff_t pos) #define ALLOC_CONVERSION_WORK_AREA(coding) \ do { \ - int size = CHARBUF_SIZE; \ - \ - coding->charbuf = NULL; \ - while (size > 1024) \ - { \ - coding->charbuf = alloca (sizeof (int) * size); \ - if (coding->charbuf) \ - break; \ - size >>= 1; \ - } \ - if (! coding->charbuf) \ - { \ - record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \ - return; \ - } \ - coding->charbuf_size = size; \ + coding->charbuf = SAFE_ALLOCA (CHARBUF_SIZE * sizeof (int)); \ + coding->charbuf_size = CHARBUF_SIZE; \ } while (0) @@ -6968,6 +6947,8 @@ decode_coding (struct coding_system *coding) int carryover; int i; + USE_SAFE_ALLOCA; + if (BUFFERP (coding->src_object) && coding->src_pos > 0 && coding->src_pos < GPT @@ -7090,6 +7071,8 @@ decode_coding (struct coding_system *coding) bset_undo_list (current_buffer, undo_list); record_insert (coding->dst_pos, coding->produced_char); } + + SAFE_FREE (); } @@ -7373,6 +7356,8 @@ encode_coding (struct coding_system *coding) int max_lookup; struct ccl_spec cclspec; + USE_SAFE_ALLOCA; + attrs = CODING_ID_ATTRS (coding->id); if (coding->encoder == encode_coding_raw_text) translation_table = Qnil, max_lookup = 0; @@ -7407,6 +7392,8 @@ encode_coding (struct coding_system *coding) if (BUFFERP (coding->dst_object) && coding->produced_char > 0) insert_from_gap (coding->produced_char, coding->produced); + + SAFE_FREE (); } @@ -7695,14 +7682,8 @@ decode_coding_object (struct coding_system *coding, set_buffer_internal (XBUFFER (coding->dst_object)); if (dst_bytes < coding->produced) { + eassert (coding->produced > 0); destination = xrealloc (destination, coding->produced); - if (! destination) - { - record_conversion_result (coding, - CODING_RESULT_INSUFFICIENT_MEM); - unbind_to (count, Qnil); - return; - } if (BEGV < GPT && GPT < BEGV + coding->produced_char) move_gap_both (BEGV, BEGV_BYTE); memcpy (destination, BEGV_ADDR, coding->produced); @@ -10408,10 +10389,8 @@ syms_of_coding (void) intern_c_string ("coding-category-undecided")); DEFSYM (Qinsufficient_source, "insufficient-source"); - DEFSYM (Qinconsistent_eol, "inconsistent-eol"); DEFSYM (Qinvalid_source, "invalid-source"); DEFSYM (Qinterrupted, "interrupted"); - DEFSYM (Qinsufficient_memory, "insufficient-memory"); DEFSYM (Qcoding_system_define_form, "coding-system-define-form"); defsubr (&Scoding_system_p); -- cgit v1.2.1 From bc989a58e2412c152c2aef9d35ca103979edebd5 Mon Sep 17 00:00:00 2001 From: Eli Zaretskii Date: Sat, 9 Mar 2013 20:09:33 +0200 Subject: coding.c (to_unicode): Fix a typo in a comment. --- src/coding.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/coding.c') diff --git a/src/coding.c b/src/coding.c index 32da72ab626..78e6cff7078 100644 --- a/src/coding.c +++ b/src/coding.c @@ -7970,7 +7970,7 @@ wchar_t * to_unicode (Lisp_Object str, Lisp_Object *buf) { *buf = code_convert_string_norecord (str, Qutf_16le, 1); - /* We need to make a another copy (in addition to the one made by + /* We need to make another copy (in addition to the one made by code_convert_string_norecord) to ensure that the final string is _doubly_ zero terminated --- that is, that the string is terminated by two zero bytes and one utf-16le null character. -- cgit v1.2.1 From c230dd7d89730f565df77046d0666d2082e386ee Mon Sep 17 00:00:00 2001 From: Kenichi Handa Date: Sun, 10 Mar 2013 23:36:35 +0900 Subject: On file insertion, skip decoding if all bytes are ASCII. --- src/coding.c | 43 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 39 insertions(+), 4 deletions(-) (limited to 'src/coding.c') diff --git a/src/coding.c b/src/coding.c index 32da72ab626..f33b5e7c7d5 100644 --- a/src/coding.c +++ b/src/coding.c @@ -6349,7 +6349,12 @@ detect_coding (struct coding_system *coding) coding_systems = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom); detect_info.found = detect_info.rejected = 0; - coding->head_ascii = 0; + for (src = coding->source; src < src_end; src++) + { + if (*src & 0x80) + break; + } + coding->head_ascii = src - coding->source; if (CONSP (coding_systems) && detect_coding_utf_8 (coding, &detect_info)) { @@ -7487,8 +7492,6 @@ decode_coding_gap (struct coding_system *coding, ptrdiff_t count = SPECPDL_INDEX (); Lisp_Object attrs; - code_conversion_save (0, 0); - coding->src_object = Fcurrent_buffer (); coding->src_chars = chars; coding->src_bytes = bytes; @@ -7502,13 +7505,45 @@ decode_coding_gap (struct coding_system *coding, if (CODING_REQUIRE_DETECTION (coding)) detect_coding (coding); + attrs = CODING_ID_ATTRS (coding->id); +#ifndef CODING_DISABLE_ASCII_OPTIMIZATION + if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)) + && NILP (CODING_ATTR_POST_READ (attrs)) + && NILP (get_translation_table (attrs, 0, NULL))) + { + /* We can skip the conversion if all source bytes are ASCII. */ + if (coding->head_ascii < 0) + { + /* We have not yet counted the number of ASCII bytes at the + head of the source. Do it now. */ + const unsigned char *src, *src_end; + + coding_set_source (coding); + src_end = coding->source + coding->src_bytes; + for (src = coding->source; src < src_end; src++) + { + if (*src & 0x80) + break; + } + coding->head_ascii = src - coding->source; + } + if (coding->src_bytes == coding->head_ascii) + { + /* No need of conversion. Use the data in the gap as is. */ + coding->produced_char = chars; + coding->produced = bytes; + adjust_after_replace (PT, PT_BYTE, Qnil, chars, bytes, 1); + return; + } + } +#endif /* not CODING_DISABLE_ASCII_OPTIMIZATION */ + code_conversion_save (0, 0); coding->mode |= CODING_MODE_LAST_BLOCK; current_buffer->text->inhibit_shrinking = 1; decode_coding (coding); current_buffer->text->inhibit_shrinking = 0; - attrs = CODING_ID_ATTRS (coding->id); if (! NILP (CODING_ATTR_POST_READ (attrs))) { ptrdiff_t prev_Z = Z, prev_Z_BYTE = Z_BYTE; -- cgit v1.2.1 From 7d051e215477753b813864caa23c1009c7692bda Mon Sep 17 00:00:00 2001 From: Kenichi Handa Date: Mon, 11 Mar 2013 00:06:04 +0900 Subject: Fix previous change. --- src/coding.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/coding.c') diff --git a/src/coding.c b/src/coding.c index 98af4ddcef7..d6560a92b70 100644 --- a/src/coding.c +++ b/src/coding.c @@ -7509,7 +7509,9 @@ decode_coding_gap (struct coding_system *coding, #ifndef CODING_DISABLE_ASCII_OPTIMIZATION if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)) && NILP (CODING_ATTR_POST_READ (attrs)) - && NILP (get_translation_table (attrs, 0, NULL))) + && NILP (get_translation_table (attrs, 0, NULL)) + && (inhibit_eol_conversion + || EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))) { /* We can skip the conversion if all source bytes are ASCII. */ if (coding->head_ascii < 0) -- cgit v1.2.1 From 819e2da92a18d7af03ccd9cf0a2e5b940eb7b54f Mon Sep 17 00:00:00 2001 From: Daniel Colascione Date: Sun, 10 Mar 2013 14:55:25 -0800 Subject: 2013-03-10 Daniel Colascione * w32term.h (GUISTR, GUI_ENCODE_FILE, GUI_ENCODE_SYSTEM, GUI_FN) (GUI_SDATA, guichar_t): Macros to abstract out differences between NTGUI_UNICODE and !NTGUI_UNICODE builds, some moved out of w32fns.c. * w32term.c (construct_drag_n_drop): Use the above macros to make drag-and-drop work for non-ASCII filenames in cygw32 builds. * w32fns.c (x_set_name, x_set_title): Use the above macros to properly display non-ASCII frame titles in cygw32 builds. * w32fns.c (Fw32_shell_execute): Use the above macros to properly call ShellExecute in cygw32 builds. * w32fn.c (Fx_file_dialog): Use the above macros to simplify the common file dialog code. * w32fns.c (Ffile_system_info): Remove from cygw32 builds, which can just use du like other systems. * coding.c (from_unicode_buffer): Declare. * coding.c (from_unicode_buffer): Implement. --- src/coding.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'src/coding.c') diff --git a/src/coding.c b/src/coding.c index d6560a92b70..c18632f301b 100644 --- a/src/coding.c +++ b/src/coding.c @@ -286,6 +286,10 @@ encode_coding_XXX (struct coding_system *coding) #include #include +#ifdef HAVE_WCHAR_H +#include +#endif /* HAVE_WCHAR_H */ + #include "lisp.h" #include "character.h" #include "buffer.h" @@ -8003,6 +8007,16 @@ from_unicode (Lisp_Object str) return code_convert_string_norecord (str, Qutf_16le, 0); } +Lisp_Object +from_unicode_buffer (const wchar_t* wstr) +{ + return from_unicode ( + make_unibyte_string ( + (char*) wstr, + /* we get one of the two final 0 bytes for free. */ + 1 + sizeof (wchar_t) * wcslen (wstr))); +} + wchar_t * to_unicode (Lisp_Object str, Lisp_Object *buf) { -- cgit v1.2.1 From 8a44e6d176989d8eef140314098c76a70248ba61 Mon Sep 17 00:00:00 2001 From: Kenichi Handa Date: Sat, 16 Mar 2013 01:03:54 +0900 Subject: Optimize ASCII file reading with EOL format detection and decoding. --- src/coding.c | 197 +++++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 159 insertions(+), 38 deletions(-) (limited to 'src/coding.c') diff --git a/src/coding.c b/src/coding.c index c18632f301b..5047e1149bc 100644 --- a/src/coding.c +++ b/src/coding.c @@ -6071,6 +6071,93 @@ complement_process_encoding_system (Lisp_Object coding_system) #define EOL_SEEN_CR 2 #define EOL_SEEN_CRLF 4 + +static Lisp_Object adjust_coding_eol_type (struct coding_system *coding, int eol_seen); + + +/* Return 1 if all the source bytes are ASCII, and return 0 otherwize. + By side effects, set coding->head_ascii and coding->eol_seen. The + value of coding->eol_seen is "logical or" of EOL_SEEN_LF, + EOL_SEEN_CR, and EOL_SEEN_CRLF, but the value is reliable only when + all the source bytes are ASCII. */ + +static bool +detect_ascii (struct coding_system *coding) +{ + const unsigned char *src, *end; + Lisp_Object eol_type = CODING_ID_EOL_TYPE (coding->id); + int eol_seen; + + eol_seen = (VECTORP (eol_type) ? EOL_SEEN_NONE + : EQ (eol_type, Qunix) ? EOL_SEEN_LF + : EQ (eol_type, Qdos) ? EOL_SEEN_CRLF + : EOL_SEEN_CR); + coding_set_source (coding); + src = coding->source; + end = src + coding->src_bytes; + + if (inhibit_eol_conversion) + { + /* We don't have to check EOL format. */ + while (src < end && !( *src & 0x80)) src++; + eol_seen = EOL_SEEN_LF; + adjust_coding_eol_type (coding, eol_seen); + } + else if (eol_seen != EOL_SEEN_NONE) + { + /* We don't have to check EOL format either. */ + while (src < end && !(*src & 0x80)) src++; + } + else + { + end--; /* We look ahead one byte. */ + while (src < end) + { + int c = *src; + + if (c & 0x80) + break; + src++; + if (c < 0x20) + { + if (c == '\r') + { + if (*src == '\n') + { + eol_seen |= EOL_SEEN_CRLF; + src++; + } + else + eol_seen |= EOL_SEEN_CR; + } + else if (c == '\n') + eol_seen |= EOL_SEEN_LF; + } + } + if (src > end) + /* The last two bytes are CR LF, which means that we have + scanned all bytes. */ + end++; + else if (src == end) + { + end++; + if (! (*src & 0x80)) + { + if (*src == '\r') + eol_seen |= EOL_SEEN_CR; + else if (*src == '\n') + eol_seen |= EOL_SEEN_LF; + src++; + } + } + adjust_coding_eol_type (coding, eol_seen); + } + coding->head_ascii = src - coding->source; + coding->eol_seen = eol_seen; + return (src == end); +} + + /* Detect how end-of-line of a text of length SRC_BYTES pointed by SOURCE is encoded. If CATEGORY is one of coding_category_utf_16_XXXX, assume that CR and LF are encoded by @@ -6215,7 +6302,6 @@ detect_coding (struct coding_system *coding) coding_set_source (coding); src_end = coding->source + coding->src_bytes; - coding->head_ascii = 0; /* If we have not yet decided the text encoding type, detect it now. */ @@ -6225,6 +6311,8 @@ detect_coding (struct coding_system *coding) struct coding_detection_info detect_info; bool null_byte_found = 0, eight_bit_found = 0; + coding->head_ascii = 0; + coding->eol_seen = EOL_SEEN_NONE; detect_info.checked = detect_info.found = detect_info.rejected = 0; for (src = coding->source; src < src_end; src++) { @@ -6263,6 +6351,26 @@ detect_coding (struct coding_system *coding) if (eight_bit_found) break; } + else if (! disable_ascii_optimization + && ! inhibit_eol_conversion) + { + if (c == '\r') + { + if (src < src_end && src[1] == '\n') + { + coding->eol_seen |= EOL_SEEN_CRLF; + src++; + coding->head_ascii++; + } + else + coding->eol_seen |= EOL_SEEN_CR; + } + else if (c == '\n') + { + coding->eol_seen |= EOL_SEEN_LF; + } + } + if (! eight_bit_found) coding->head_ascii++; } @@ -6353,19 +6461,20 @@ detect_coding (struct coding_system *coding) coding_systems = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom); detect_info.found = detect_info.rejected = 0; - for (src = coding->source; src < src_end; src++) + if (detect_ascii (coding)) { - if (*src & 0x80) - break; + setup_coding_system (XCDR (coding_systems), coding); } - coding->head_ascii = src - coding->source; - if (CONSP (coding_systems) - && detect_coding_utf_8 (coding, &detect_info)) + else { - if (detect_info.found & CATEGORY_MASK_UTF_8_SIG) - setup_coding_system (XCAR (coding_systems), coding); - else - setup_coding_system (XCDR (coding_systems), coding); + if (CONSP (coding_systems) + && detect_coding_utf_8 (coding, &detect_info)) + { + if (detect_info.found & CATEGORY_MASK_UTF_8_SIG) + setup_coding_system (XCAR (coding_systems), coding); + else + setup_coding_system (XCDR (coding_systems), coding); + } } } else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id))) @@ -6378,6 +6487,7 @@ detect_coding (struct coding_system *coding) = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom); detect_info.found = detect_info.rejected = 0; coding->head_ascii = 0; + coding->eol_seen = EOL_SEEN_NONE; if (CONSP (coding_systems) && detect_coding_utf_16 (coding, &detect_info)) { @@ -6815,7 +6925,7 @@ produce_chars (struct coding_system *coding, Lisp_Object translation_table, produced = dst - (coding->destination + coding->produced); if (BUFFERP (coding->dst_object) && produced_chars > 0) - insert_from_gap (produced_chars, produced); + insert_from_gap (produced_chars, produced, 0); coding->produced += produced; coding->produced_char += produced_chars; return carryover; @@ -7400,7 +7510,7 @@ encode_coding (struct coding_system *coding) } while (coding->consumed_char < coding->src_chars); if (BUFFERP (coding->dst_object) && coding->produced_char > 0) - insert_from_gap (coding->produced_char, coding->produced); + insert_from_gap (coding->produced_char, coding->produced, 0); SAFE_FREE (); } @@ -7510,39 +7620,45 @@ decode_coding_gap (struct coding_system *coding, if (CODING_REQUIRE_DETECTION (coding)) detect_coding (coding); attrs = CODING_ID_ATTRS (coding->id); -#ifndef CODING_DISABLE_ASCII_OPTIMIZATION - if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)) - && NILP (CODING_ATTR_POST_READ (attrs)) - && NILP (get_translation_table (attrs, 0, NULL)) - && (inhibit_eol_conversion - || EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))) + if (! disable_ascii_optimization) { - /* We can skip the conversion if all source bytes are ASCII. */ - if (coding->head_ascii < 0) + if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)) + && NILP (CODING_ATTR_POST_READ (attrs)) + && NILP (get_translation_table (attrs, 0, NULL)) + && (coding->head_ascii >= 0 /* We've already called detect_coding */ + ? coding->head_ascii == bytes + : detect_ascii (coding))) { - /* We have not yet counted the number of ASCII bytes at the - head of the source. Do it now. */ - const unsigned char *src, *src_end; + if (coding->eol_seen == EOL_SEEN_CR) + { + unsigned char *src_end = GAP_END_ADDR; + unsigned char *src = src - coding->src_bytes; - coding_set_source (coding); - src_end = coding->source + coding->src_bytes; - for (src = coding->source; src < src_end; src++) + while (src < src_end) + { + if (*src++ == '\r') + src[-1] = '\n'; + } + } + else if (coding->eol_seen == EOL_SEEN_CRLF) { - if (*src & 0x80) - break; + unsigned char *src = GAP_END_ADDR; + unsigned char *src_beg = src - coding->src_bytes; + unsigned char *dst = src; + + while (src_beg < src) + { + *--dst = *--src; + if (*src == '\n') + src--; + } + bytes -= dst - src; } - coding->head_ascii = src - coding->source; - } - if (coding->src_bytes == coding->head_ascii) - { - /* No need of conversion. Use the data in the gap as is. */ - coding->produced_char = chars; - coding->produced = bytes; - adjust_after_replace (PT, PT_BYTE, Qnil, chars, bytes, 1); + coding->produced_char = coding->produced = bytes; + insert_from_gap (bytes, bytes, 1); return; } } -#endif /* not CODING_DISABLE_ASCII_OPTIMIZATION */ code_conversion_save (0, 0); coding->mode |= CODING_MODE_LAST_BLOCK; @@ -10758,6 +10874,11 @@ from GNU Find and GNU Grep. Emacs will then ignore the null bytes and decode text as usual. */); inhibit_null_byte_detection = 0; + DEFVAR_BOOL ("disable-ascii-optimization", disable_ascii_optimization, + doc: /* If non-nil, Emacs does not optimize code decoder for ASCII files. +Internal use only. Removed after the experimental optimizer gets stable. */); + disable_ascii_optimization = 0; + DEFVAR_LISP ("translation-table-for-input", Vtranslation_table_for_input, doc: /* Char table for translating self-inserting characters. This is applied to the result of input methods, not their input. -- cgit v1.2.1 From cded56c19b30e038537398b5213438c339428ed9 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Fri, 15 Mar 2013 13:03:31 -0700 Subject: * coding.c (decode_coding_gap): Fix typo caught by static checking. --- src/coding.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/coding.c') diff --git a/src/coding.c b/src/coding.c index 5047e1149bc..6cfcec905a1 100644 --- a/src/coding.c +++ b/src/coding.c @@ -7632,7 +7632,7 @@ decode_coding_gap (struct coding_system *coding, if (coding->eol_seen == EOL_SEEN_CR) { unsigned char *src_end = GAP_END_ADDR; - unsigned char *src = src - coding->src_bytes; + unsigned char *src = src_end - coding->src_bytes; while (src < src_end) { -- cgit v1.2.1 From bad98418bf75efc6dd8ac393157413bc6ef769b4 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Mon, 18 Mar 2013 21:41:53 -0700 Subject: Spelling fixes. --- src/coding.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/coding.c') diff --git a/src/coding.c b/src/coding.c index 6cfcec905a1..8a09cd67859 100644 --- a/src/coding.c +++ b/src/coding.c @@ -6075,7 +6075,7 @@ complement_process_encoding_system (Lisp_Object coding_system) static Lisp_Object adjust_coding_eol_type (struct coding_system *coding, int eol_seen); -/* Return 1 if all the source bytes are ASCII, and return 0 otherwize. +/* Return true iff all the source bytes are ASCII. By side effects, set coding->head_ascii and coding->eol_seen. The value of coding->eol_seen is "logical or" of EOL_SEEN_LF, EOL_SEEN_CR, and EOL_SEEN_CRLF, but the value is reliable only when -- cgit v1.2.1 From c0a17406acd4b9db561ba99f8a02bf5461130e82 Mon Sep 17 00:00:00 2001 From: Kenichi Handa Date: Wed, 20 Mar 2013 16:58:20 +0900 Subject: coding.c (syms_of_coding): Initialize disable_ascii_optimization 1. --- src/coding.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/coding.c') diff --git a/src/coding.c b/src/coding.c index 6cfcec905a1..a5b07019388 100644 --- a/src/coding.c +++ b/src/coding.c @@ -10877,7 +10877,7 @@ decode text as usual. */); DEFVAR_BOOL ("disable-ascii-optimization", disable_ascii_optimization, doc: /* If non-nil, Emacs does not optimize code decoder for ASCII files. Internal use only. Removed after the experimental optimizer gets stable. */); - disable_ascii_optimization = 0; + disable_ascii_optimization = 1; DEFVAR_LISP ("translation-table-for-input", Vtranslation_table_for_input, doc: /* Char table for translating self-inserting characters. -- cgit v1.2.1