From bc989a58e2412c152c2aef9d35ca103979edebd5 Mon Sep 17 00:00:00 2001
From: Eli Zaretskii
Date: Sat, 9 Mar 2013 20:09:33 +0200
Subject: coding.c (to_unicode): Fix a typo in a comment.

---
 src/coding.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/coding.c')

diff --git a/src/coding.c b/src/coding.c
index 32da72ab626..78e6cff7078 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -7970,7 +7970,7 @@ wchar_t *
 to_unicode (Lisp_Object str, Lisp_Object *buf)
 {
   *buf = code_convert_string_norecord (str, Qutf_16le, 1);
-  /* We need to make a another copy (in addition to the one made by
+  /* We need to make another copy (in addition to the one made by
      code_convert_string_norecord) to ensure that the final string is
      _doubly_ zero terminated --- that is, that the string is
      terminated by two zero bytes and one utf-16le null character.
-- 
cgit v1.2.1


From c230dd7d89730f565df77046d0666d2082e386ee Mon Sep 17 00:00:00 2001
From: Kenichi Handa
Date: Sun, 10 Mar 2013 23:36:35 +0900
Subject: On file insertion, skip decoding if all bytes are ASCII.

---
 src/coding.c | 43 +++++++++++++++++++++++++++++++++++++++----
 1 file changed, 39 insertions(+), 4 deletions(-)

(limited to 'src/coding.c')

diff --git a/src/coding.c b/src/coding.c
index 32da72ab626..f33b5e7c7d5 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -6349,7 +6349,12 @@ detect_coding (struct coding_system *coding)
       coding_systems
 	= AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
       detect_info.found = detect_info.rejected = 0;
-      coding->head_ascii = 0;
+      for (src = coding->source; src < src_end; src++)
+	{
+	  if (*src & 0x80)
+	    break;
+	}
+      coding->head_ascii = src - coding->source;
       if (CONSP (coding_systems)
 	  && detect_coding_utf_8 (coding, &detect_info))
 	{
@@ -7487,8 +7492,6 @@ decode_coding_gap (struct coding_system *coding,
   ptrdiff_t count = SPECPDL_INDEX ();
   Lisp_Object attrs;
 
-  code_conversion_save (0, 0);
-
   coding->src_object = Fcurrent_buffer ();
   coding->src_chars = chars;
   coding->src_bytes = bytes;
@@ -7502,13 +7505,45 @@ decode_coding_gap (struct coding_system *coding,
 
   if (CODING_REQUIRE_DETECTION (coding))
     detect_coding (coding);
+  attrs = CODING_ID_ATTRS (coding->id);
+#ifndef CODING_DISABLE_ASCII_OPTIMIZATION
+  if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs))
+      && NILP (CODING_ATTR_POST_READ (attrs))
+      && NILP (get_translation_table (attrs, 0, NULL)))
+    {
+      /* We can skip the conversion if all source bytes are ASCII.  */
+      if (coding->head_ascii < 0)
+	{
+	  /* We have not yet counted the number of ASCII bytes at the
+	     head of the source.  Do it now.  */
+	  const unsigned char *src, *src_end;
+
+	  coding_set_source (coding);
+	  src_end = coding->source + coding->src_bytes;
+	  for (src = coding->source; src < src_end; src++)
+	    {
+	      if (*src & 0x80)
+		break;
+	    }
+	  coding->head_ascii = src - coding->source;
+	}
+      if (coding->src_bytes == coding->head_ascii)
+	{
+	  /* No need of conversion.  Use the data in the gap as is.  */
+	  coding->produced_char = chars;
+	  coding->produced = bytes;
+	  adjust_after_replace (PT, PT_BYTE, Qnil, chars, bytes, 1);
+	  return;
+	}
+    }
+#endif	/* not CODING_DISABLE_ASCII_OPTIMIZATION */
+  code_conversion_save (0, 0);
 
   coding->mode |= CODING_MODE_LAST_BLOCK;
   current_buffer->text->inhibit_shrinking = 1;
   decode_coding (coding);
   current_buffer->text->inhibit_shrinking = 0;
 
-  attrs = CODING_ID_ATTRS (coding->id);
   if (! NILP (CODING_ATTR_POST_READ (attrs)))
     {
       ptrdiff_t prev_Z = Z, prev_Z_BYTE = Z_BYTE;
-- 
cgit v1.2.1


From 7d051e215477753b813864caa23c1009c7692bda Mon Sep 17 00:00:00 2001
From: Kenichi Handa
Date: Mon, 11 Mar 2013 00:06:04 +0900
Subject: Fix previous change.

---
 src/coding.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'src/coding.c')

diff --git a/src/coding.c b/src/coding.c
index 98af4ddcef7..d6560a92b70 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -7509,7 +7509,9 @@ decode_coding_gap (struct coding_system *coding,
 #ifndef CODING_DISABLE_ASCII_OPTIMIZATION
   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs))
       && NILP (CODING_ATTR_POST_READ (attrs))
-      && NILP (get_translation_table (attrs, 0, NULL)))
+      && NILP (get_translation_table (attrs, 0, NULL))
+      && (inhibit_eol_conversion
+	  || EQ (CODING_ID_EOL_TYPE (coding->id), Qunix)))
     {
       /* We can skip the conversion if all source bytes are ASCII.  */
       if (coding->head_ascii < 0)
-- 
cgit v1.2.1


From 819e2da92a18d7af03ccd9cf0a2e5b940eb7b54f Mon Sep 17 00:00:00 2001
From: Daniel Colascione
Date: Sun, 10 Mar 2013 14:55:25 -0800
Subject: 2013-03-10  Daniel Colascione  <dancol@dancol.org>

	* w32term.h (GUISTR, GUI_ENCODE_FILE, GUI_ENCODE_SYSTEM, GUI_FN)
	(GUI_SDATA, guichar_t): Macros to abstract out differences between
	NTGUI_UNICODE and !NTGUI_UNICODE builds, some moved out of
	w32fns.c.

	* w32term.c (construct_drag_n_drop): Use the above macros to make
	drag-and-drop work for non-ASCII filenames in cygw32 builds.

	* w32fns.c (x_set_name, x_set_title): Use the above macros to
	properly display non-ASCII frame titles in cygw32 builds.

	* w32fns.c (Fw32_shell_execute): Use the above macros to properly
	call ShellExecute in cygw32 builds.

	* w32fn.c (Fx_file_dialog): Use the above macros to simplify the
	common file dialog code.

	* w32fns.c (Ffile_system_info): Remove from cygw32 builds, which
	can just use du like other systems.

	* coding.c (from_unicode_buffer): Declare.
	* coding.c (from_unicode_buffer): Implement.
---
 src/coding.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'src/coding.c')

diff --git a/src/coding.c b/src/coding.c
index d6560a92b70..c18632f301b 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -286,6 +286,10 @@ encode_coding_XXX (struct coding_system *coding)
 #include <config.h>
 #include <stdio.h>
 
+#ifdef HAVE_WCHAR_H
+#include <wchar.h>
+#endif /* HAVE_WCHAR_H */
+
 #include "lisp.h"
 #include "character.h"
 #include "buffer.h"
@@ -8003,6 +8007,16 @@ from_unicode (Lisp_Object str)
   return code_convert_string_norecord (str, Qutf_16le, 0);
 }
 
+Lisp_Object
+from_unicode_buffer (const wchar_t* wstr)
+{
+    return from_unicode (
+        make_unibyte_string (
+            (char*) wstr,
+            /* we get one of the two final 0 bytes for free. */
+            1 + sizeof (wchar_t) * wcslen (wstr)));
+}
+
 wchar_t *
 to_unicode (Lisp_Object str, Lisp_Object *buf)
 {
-- 
cgit v1.2.1


From 8a44e6d176989d8eef140314098c76a70248ba61 Mon Sep 17 00:00:00 2001
From: Kenichi Handa
Date: Sat, 16 Mar 2013 01:03:54 +0900
Subject: Optimize ASCII file reading with EOL format detection and decoding.

---
 src/coding.c | 197 +++++++++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 159 insertions(+), 38 deletions(-)

(limited to 'src/coding.c')

diff --git a/src/coding.c b/src/coding.c
index c18632f301b..5047e1149bc 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -6071,6 +6071,93 @@ complement_process_encoding_system (Lisp_Object coding_system)
 #define EOL_SEEN_CR	2
 #define EOL_SEEN_CRLF	4
 
+
+static Lisp_Object adjust_coding_eol_type (struct coding_system *coding, int eol_seen);
+
+
+/* Return 1 if all the source bytes are ASCII, and return 0 otherwize.
+   By side effects, set coding->head_ascii and coding->eol_seen.  The
+   value of coding->eol_seen is "logical or" of EOL_SEEN_LF,
+   EOL_SEEN_CR, and EOL_SEEN_CRLF, but the value is reliable only when
+   all the source bytes are ASCII.  */
+
+static bool
+detect_ascii (struct coding_system *coding)
+{
+  const unsigned char *src, *end;
+  Lisp_Object eol_type = CODING_ID_EOL_TYPE (coding->id);
+  int eol_seen;
+
+  eol_seen = (VECTORP (eol_type) ? EOL_SEEN_NONE
+	      : EQ (eol_type, Qunix) ? EOL_SEEN_LF
+	      : EQ (eol_type, Qdos) ? EOL_SEEN_CRLF
+	      : EOL_SEEN_CR);
+  coding_set_source (coding);
+  src = coding->source;
+  end = src + coding->src_bytes;
+
+  if (inhibit_eol_conversion)
+    {
+      /* We don't have to check EOL format.  */
+      while (src < end && !( *src & 0x80)) src++;
+      eol_seen = EOL_SEEN_LF;
+      adjust_coding_eol_type (coding, eol_seen);
+    }
+  else if (eol_seen != EOL_SEEN_NONE)
+    {
+      /* We don't have to check EOL format either.  */
+      while (src < end && !(*src & 0x80)) src++;
+    }
+  else
+    {
+      end--;			/* We look ahead one byte.  */
+      while (src < end)
+	{
+	  int c = *src;
+
+	  if (c & 0x80)
+	    break;
+	  src++;
+	  if (c < 0x20)
+	    {
+	      if (c == '\r')
+		{
+		  if (*src == '\n')
+		    {
+		      eol_seen |= EOL_SEEN_CRLF;
+		      src++;
+		    }
+		  else
+		    eol_seen |= EOL_SEEN_CR;
+		}
+	      else if (c == '\n')
+		eol_seen |= EOL_SEEN_LF;
+	    }
+	}
+      if (src > end)
+	/* The last two bytes are CR LF, which means that we have
+	   scanned all bytes. */
+	end++;
+      else if (src == end)
+	{
+	  end++;
+	  if (! (*src & 0x80))
+	    {
+	      if (*src == '\r')
+		eol_seen |= EOL_SEEN_CR;
+	      else if (*src  == '\n')
+		eol_seen |= EOL_SEEN_LF;
+	      src++;
+	    }
+	}
+      adjust_coding_eol_type (coding, eol_seen);
+    }
+  coding->head_ascii = src - coding->source;
+  coding->eol_seen = eol_seen;
+  return (src == end);
+}
+
+
 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
    SOURCE is encoded.  If CATEGORY is one of
    coding_category_utf_16_XXXX, assume that CR and LF are encoded by
@@ -6215,7 +6302,6 @@ detect_coding (struct coding_system *coding)
   coding_set_source (coding);
 
   src_end = coding->source + coding->src_bytes;
-  coding->head_ascii = 0;
 
   /* If we have not yet decided the text encoding type, detect it
      now.  */
@@ -6225,6 +6311,8 @@ detect_coding (struct coding_system *coding)
       struct coding_detection_info detect_info;
       bool null_byte_found = 0, eight_bit_found = 0;
 
+      coding->head_ascii = 0;
+      coding->eol_seen = EOL_SEEN_NONE;
       detect_info.checked = detect_info.found = detect_info.rejected = 0;
       for (src = coding->source; src < src_end; src++)
 	{
@@ -6263,6 +6351,26 @@ detect_coding (struct coding_system *coding)
 		  if (eight_bit_found)
 		    break;
 		}
+	      else if (! disable_ascii_optimization
+		       && ! inhibit_eol_conversion)
+		{
+		  if (c == '\r')
+		    {
+		      if (src < src_end && src[1] == '\n')
+			{
+			  coding->eol_seen |= EOL_SEEN_CRLF;
+			  src++;
+			  coding->head_ascii++;
+			}
+		      else
+			coding->eol_seen |= EOL_SEEN_CR;
+		    }
+		  else if (c == '\n')
+		    {
+		      coding->eol_seen |= EOL_SEEN_LF;
+		    }
+		}
+
 	      if (! eight_bit_found)
 		coding->head_ascii++;
 	    }
@@ -6353,19 +6461,20 @@ detect_coding (struct coding_system *coding)
       coding_systems
 	= AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
       detect_info.found = detect_info.rejected = 0;
-      for (src = coding->source; src < src_end; src++)
+      if (detect_ascii (coding))
 	{
-	  if (*src & 0x80)
-	    break;
+	  setup_coding_system (XCDR (coding_systems), coding);
 	}
-      coding->head_ascii = src - coding->source;
-      if (CONSP (coding_systems)
-	  && detect_coding_utf_8 (coding, &detect_info))
+      else
 	{
-	  if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
-	    setup_coding_system (XCAR (coding_systems), coding);
-	  else
-	    setup_coding_system (XCDR (coding_systems), coding);
+	  if (CONSP (coding_systems)
+	      && detect_coding_utf_8 (coding, &detect_info))
+	    {
+	      if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
+		setup_coding_system (XCAR (coding_systems), coding);
+	      else
+		setup_coding_system (XCDR (coding_systems), coding);
+	    }
 	}
     }
   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
@@ -6378,6 +6487,7 @@ detect_coding (struct coding_system *coding)
 	= AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
       detect_info.found = detect_info.rejected = 0;
       coding->head_ascii = 0;
+      coding->eol_seen = EOL_SEEN_NONE;
       if (CONSP (coding_systems)
 	  && detect_coding_utf_16 (coding, &detect_info))
 	{
@@ -6815,7 +6925,7 @@ produce_chars (struct coding_system *coding, Lisp_Object translation_table,
 
   produced = dst - (coding->destination + coding->produced);
   if (BUFFERP (coding->dst_object) && produced_chars > 0)
-    insert_from_gap (produced_chars, produced);
+    insert_from_gap (produced_chars, produced, 0);
   coding->produced += produced;
   coding->produced_char += produced_chars;
   return carryover;
@@ -7400,7 +7510,7 @@ encode_coding (struct coding_system *coding)
   } while (coding->consumed_char < coding->src_chars);
 
   if (BUFFERP (coding->dst_object) && coding->produced_char > 0)
-    insert_from_gap (coding->produced_char, coding->produced);
+    insert_from_gap (coding->produced_char, coding->produced, 0);
 
   SAFE_FREE ();
 }
@@ -7510,39 +7620,45 @@ decode_coding_gap (struct coding_system *coding,
   if (CODING_REQUIRE_DETECTION (coding))
     detect_coding (coding);
   attrs = CODING_ID_ATTRS (coding->id);
-#ifndef CODING_DISABLE_ASCII_OPTIMIZATION
-  if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs))
-      && NILP (CODING_ATTR_POST_READ (attrs))
-      && NILP (get_translation_table (attrs, 0, NULL))
-      && (inhibit_eol_conversion
-	  || EQ (CODING_ID_EOL_TYPE (coding->id), Qunix)))
+  if (! disable_ascii_optimization)
     {
-      /* We can skip the conversion if all source bytes are ASCII.  */
-      if (coding->head_ascii < 0)
+      if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs))
+	  && NILP (CODING_ATTR_POST_READ (attrs))
+	  && NILP (get_translation_table (attrs, 0, NULL))
+	  && (coding->head_ascii >= 0 /* We've already called detect_coding */
+	      ? coding->head_ascii == bytes
+	      : detect_ascii (coding)))
 	{
-	  /* We have not yet counted the number of ASCII bytes at the
-	     head of the source.  Do it now.  */
-	  const unsigned char *src, *src_end;
+	  if (coding->eol_seen == EOL_SEEN_CR)
+	    {
+	      unsigned char *src_end = GAP_END_ADDR;
+	      unsigned char *src = src - coding->src_bytes;
 
-	  coding_set_source (coding);
-	  src_end = coding->source + coding->src_bytes;
-	  for (src = coding->source; src < src_end; src++)
+	      while (src < src_end)
+		{
+		  if (*src++ == '\r')
+		    src[-1] = '\n';
+		}
+	    }
+	  else if (coding->eol_seen == EOL_SEEN_CRLF)
 	    {
-	      if (*src & 0x80)
-		break;
+	      unsigned char *src = GAP_END_ADDR;
+	      unsigned char *src_beg = src - coding->src_bytes;
+	      unsigned char *dst = src;
+
+	      while (src_beg < src)
+		{
+		  *--dst = *--src;
+		  if (*src == '\n')
+		    src--;
+		}
+	      bytes -= dst - src;
 	    }
-	  coding->head_ascii = src - coding->source;
-	}
-      if (coding->src_bytes == coding->head_ascii)
-	{
-	  /* No need of conversion.  Use the data in the gap as is.  */
-	  coding->produced_char = chars;
-	  coding->produced = bytes;
-	  adjust_after_replace (PT, PT_BYTE, Qnil, chars, bytes, 1);
+	  coding->produced_char = coding->produced = bytes;
+	  insert_from_gap (bytes, bytes, 1);
 	  return;
 	}
     }
-#endif	/* not CODING_DISABLE_ASCII_OPTIMIZATION */
   code_conversion_save (0, 0);
 
   coding->mode |= CODING_MODE_LAST_BLOCK;
@@ -10758,6 +10874,11 @@ from GNU Find and GNU Grep.  Emacs will then ignore the null bytes and
 decode text as usual.  */);
   inhibit_null_byte_detection = 0;
 
+  DEFVAR_BOOL ("disable-ascii-optimization", disable_ascii_optimization,
+	       doc: /* If non-nil, Emacs does not optimize code decoder for ASCII files.
+Internal use only.  Removed after the experimental optimizer gets stable. */);
+  disable_ascii_optimization = 0;
+
   DEFVAR_LISP ("translation-table-for-input", Vtranslation_table_for_input,
 	       doc: /* Char table for translating self-inserting characters.
 This is applied to the result of input methods, not their input.
-- 
cgit v1.2.1


From cded56c19b30e038537398b5213438c339428ed9 Mon Sep 17 00:00:00 2001
From: Paul Eggert
Date: Fri, 15 Mar 2013 13:03:31 -0700
Subject: * coding.c (decode_coding_gap): Fix typo caught by static checking.

---
 src/coding.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/coding.c')

diff --git a/src/coding.c b/src/coding.c
index 5047e1149bc..6cfcec905a1 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -7632,7 +7632,7 @@ decode_coding_gap (struct coding_system *coding,
 	  if (coding->eol_seen == EOL_SEEN_CR)
 	    {
 	      unsigned char *src_end = GAP_END_ADDR;
-	      unsigned char *src = src - coding->src_bytes;
+	      unsigned char *src = src_end - coding->src_bytes;
 
 	      while (src < src_end)
 		{
-- 
cgit v1.2.1