1 files changed, 186 insertions, 14 deletions
diff --git a/src/coding.c b/src/coding.c
index 32da72ab626..6cfcec905a1 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -286,6 +286,10 @@ encode_coding_XXX (struct coding_system *coding)
 #include <config.h>
 #include <stdio.h>
+#ifdef HAVE_WCHAR_H
+#include <wchar.h>
+#endif /* HAVE_WCHAR_H */
 #include "lisp.h"
 #include "character.h"
 #include "buffer.h"
@@ -6067,6 +6071,93 @@ complement_process_encoding_system (Lisp_Object coding_system)
 #define EOL_SEEN_CR     2
 #define EOL_SEEN_CRLF   4
+static Lisp_Object adjust_coding_eol_type (struct coding_system *coding, int eol_seen);
+/* Return 1 if all the source bytes are ASCII, and return 0 otherwize.
+   By side effects, set coding->head_ascii and coding->eol_seen.  The
+   value of coding->eol_seen is "logical or" of EOL_SEEN_LF,
+   EOL_SEEN_CR, and EOL_SEEN_CRLF, but the value is reliable only when
+   all the source bytes are ASCII.  */
+static bool
+detect_ascii (struct coding_system *coding)
+{
+  const unsigned char *src, *end;
+  Lisp_Object eol_type = CODING_ID_EOL_TYPE (coding->id);
+  int eol_seen;
+  eol_seen = (VECTORP (eol_type) ? EOL_SEEN_NONE
+              : EQ (eol_type, Qunix) ? EOL_SEEN_LF
+              : EQ (eol_type, Qdos) ? EOL_SEEN_CRLF
+              : EOL_SEEN_CR);
+  coding_set_source (coding);
+  src = coding->source;
+  end = src + coding->src_bytes;
+  if (inhibit_eol_conversion)
+    {
+      /* We don't have to check EOL format.  */
+      while (src < end && !( *src & 0x80)) src++;
+      eol_seen = EOL_SEEN_LF;
+      adjust_coding_eol_type (coding, eol_seen);
+    }
+  else if (eol_seen != EOL_SEEN_NONE)
+    {
+      /* We don't have to check EOL format either.  */
+      while (src < end && !(*src & 0x80)) src++;
+    }
+  else
+    {
+      end--;                    /* We look ahead one byte.  */
+      while (src < end)
+        {
+          int c = *src;
+          if (c & 0x80)
+            break;
+          src++;
+          if (c < 0x20)
+            {
+              if (c == '\r')
+                {
+                  if (*src == '\n')
+                    {
+                      eol_seen |= EOL_SEEN_CRLF;
+                      src++;
+                    }
+                  else
+                    eol_seen |= EOL_SEEN_CR;
+                }
+              else if (c == '\n')
+                eol_seen |= EOL_SEEN_LF;
+            }
+        }
+      if (src > end)
+        /* The last two bytes are CR LF, which means that we have
+           scanned all bytes. */
+        end++;
+      else if (src == end)
+        {
+          end++;
+          if (! (*src & 0x80))
+            {
+              if (*src == '\r')
+                eol_seen |= EOL_SEEN_CR;
+              else if (*src  == '\n')
+                eol_seen |= EOL_SEEN_LF;
+              src++;
+            }
+        }
+      adjust_coding_eol_type (coding, eol_seen);
+    }
+  coding->head_ascii = src - coding->source;
+  coding->eol_seen = eol_seen;
+  return (src == end);
+}
 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
   SOURCE is encoded.  If CATEGORY is one of
   coding_category_utf_16_XXXX, assume that CR and LF are encoded by
@@ -6211,7 +6302,6 @@ detect_coding (struct coding_system *coding)
  coding_set_source (coding);
  src_end = coding->source + coding->src_bytes;
-  coding->head_ascii = 0;
  /* If we have not yet decided the text encoding type, detect it
     now.  */
@@ -6221,6 +6311,8 @@ detect_coding (struct coding_system *coding)
      struct coding_detection_info detect_info;
      bool null_byte_found = 0, eight_bit_found = 0;
+      coding->head_ascii = 0;
+      coding->eol_seen = EOL_SEEN_NONE;
      detect_info.checked = detect_info.found = detect_info.rejected = 0;
      for (src = coding->source; src < src_end; src++)
        {
@@ -6259,6 +6351,26 @@ detect_coding (struct coding_system *coding)
                  if (eight_bit_found)
                    break;
                }
+              else if (! disable_ascii_optimization
+                       && ! inhibit_eol_conversion)
+                {
+                  if (c == '\r')
+                    {
+                      if (src < src_end && src[1] == '\n')
+                        {
+                          coding->eol_seen |= EOL_SEEN_CRLF;
+                          src++;
+                          coding->head_ascii++;
+                        }
+                      else
+                        coding->eol_seen |= EOL_SEEN_CR;
+                    }
+                  else if (c == '\n')
+                    {
+                      coding->eol_seen |= EOL_SEEN_LF;
+                    }
+                }
              if (! eight_bit_found)
                coding->head_ascii++;
            }
@@ -6349,14 +6461,20 @@ detect_coding (struct coding_system *coding)
      coding_systems
        = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
      detect_info.found = detect_info.rejected = 0;
-      coding->head_ascii = 0;
+      if (detect_ascii (coding))
-      if (CONSP (coding_systems)
-          && detect_coding_utf_8 (coding, &detect_info))
        {
-          if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
+          setup_coding_system (XCDR (coding_systems), coding);
-            setup_coding_system (XCAR (coding_systems), coding);
+        }
-          else
+      else
-            setup_coding_system (XCDR (coding_systems), coding);
+        {
+          if (CONSP (coding_systems)
+              && detect_coding_utf_8 (coding, &detect_info))
+            {
+              if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
+                setup_coding_system (XCAR (coding_systems), coding);
+              else
+                setup_coding_system (XCDR (coding_systems), coding);
+            }
        }
    }
  else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
@@ -6369,6 +6487,7 @@ detect_coding (struct coding_system *coding)
        = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
      detect_info.found = detect_info.rejected = 0;
      coding->head_ascii = 0;
+      coding->eol_seen = EOL_SEEN_NONE;
      if (CONSP (coding_systems)
          && detect_coding_utf_16 (coding, &detect_info))
        {
@@ -6806,7 +6925,7 @@ produce_chars (struct coding_system *coding, Lisp_Object translation_table,
  produced = dst - (coding->destination + coding->produced);
  if (BUFFERP (coding->dst_object) && produced_chars > 0)
-    insert_from_gap (produced_chars, produced);
+    insert_from_gap (produced_chars, produced, 0);
  coding->produced += produced;
  coding->produced_char += produced_chars;
  return carryover;
@@ -7391,7 +7510,7 @@ encode_coding (struct coding_system *coding)
  } while (coding->consumed_char < coding->src_chars);
  if (BUFFERP (coding->dst_object) && coding->produced_char > 0)
-    insert_from_gap (coding->produced_char, coding->produced);
+    insert_from_gap (coding->produced_char, coding->produced, 0);
  SAFE_FREE ();
 }
@@ -7487,8 +7606,6 @@ decode_coding_gap (struct coding_system *coding,
  ptrdiff_t count = SPECPDL_INDEX ();
  Lisp_Object attrs;
-  code_conversion_save (0, 0);
  coding->src_object = Fcurrent_buffer ();
  coding->src_chars = chars;
  coding->src_bytes = bytes;
@@ -7502,13 +7619,53 @@ decode_coding_gap (struct coding_system *coding,
  if (CODING_REQUIRE_DETECTION (coding))
    detect_coding (coding);
+  attrs = CODING_ID_ATTRS (coding->id);
+  if (! disable_ascii_optimization)
+    {
+      if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs))
+          && NILP (CODING_ATTR_POST_READ (attrs))
+          && NILP (get_translation_table (attrs, 0, NULL))
+          && (coding->head_ascii >= 0 /* We've already called detect_coding */
+              ? coding->head_ascii == bytes
+              : detect_ascii (coding)))
+        {
+          if (coding->eol_seen == EOL_SEEN_CR)
+            {
+              unsigned char *src_end = GAP_END_ADDR;
+              unsigned char *src = src_end - coding->src_bytes;
+              while (src < src_end)
+                {
+                  if (*src++ == '\r')
+                    src[-1] = '\n';
+                }
+            }
+          else if (coding->eol_seen == EOL_SEEN_CRLF)
+            {
+              unsigned char *src = GAP_END_ADDR;
+              unsigned char *src_beg = src - coding->src_bytes;
+              unsigned char *dst = src;
+              while (src_beg < src)
+                {
+                  *--dst = *--src;
+                  if (*src == '\n')
+                    src--;
+                }
+              bytes -= dst - src;
+            }
+          coding->produced_char = coding->produced = bytes;
+          insert_from_gap (bytes, bytes, 1);
+          return;
+        }
+    }
+  code_conversion_save (0, 0);
  coding->mode |= CODING_MODE_LAST_BLOCK;
  current_buffer->text->inhibit_shrinking = 1;
  decode_coding (coding);
  current_buffer->text->inhibit_shrinking = 0;
-  attrs = CODING_ID_ATTRS (coding->id);
  if (! NILP (CODING_ATTR_POST_READ (attrs)))
    {
      ptrdiff_t prev_Z = Z, prev_Z_BYTE = Z_BYTE;
@@ -7966,11 +8123,21 @@ from_unicode (Lisp_Object str)
  return code_convert_string_norecord (str, Qutf_16le, 0);
 }
+Lisp_Object
+from_unicode_buffer (const wchar_t* wstr)
+{
+    return from_unicode (
+        make_unibyte_string (
+            (char*) wstr,
+            /* we get one of the two final 0 bytes for free. */
+            1 + sizeof (wchar_t) * wcslen (wstr)));
+}
 wchar_t *
 to_unicode (Lisp_Object str, Lisp_Object *buf)
 {
  *buf = code_convert_string_norecord (str, Qutf_16le, 1);
-  /* We need to make a another copy (in addition to the one made by
+  /* We need to make another copy (in addition to the one made by
     code_convert_string_norecord) to ensure that the final string is
     _doubly_ zero terminated --- that is, that the string is
     terminated by two zero bytes and one utf-16le null character.
@@ -10707,6 +10874,11 @@ from GNU Find and GNU Grep.  Emacs will then ignore the null bytes and
 decode text as usual.  */);
  inhibit_null_byte_detection = 0;
+  DEFVAR_BOOL ("disable-ascii-optimization", disable_ascii_optimization,
+               doc: /* If non-nil, Emacs does not optimize code decoder for ASCII files.
+Internal use only.  Removed after the experimental optimizer gets stable. */);
+  disable_ascii_optimization = 0;
  DEFVAR_LISP ("translation-table-for-input", Vtranslation_table_for_input,
               doc: /* Char table for translating self-inserting characters.
 This is applied to the result of input methods, not their input.

diff --git a/src/coding.c b/src/coding.c index 32da72ab626..6cfcec905a1 100644 --- a/src/coding.c +++ b/src/coding.c
@@ -286,6 +286,10 @@ encode_coding_XXX (struct coding_system *coding)
286	#include <config.h>	286	#include <config.h>
287	#include <stdio.h>	287	#include <stdio.h>
288		288
		289	#ifdef HAVE_WCHAR_H
		290	#include <wchar.h>
		291	#endif /* HAVE_WCHAR_H */
		292
289	#include "lisp.h"	293	#include "lisp.h"
290	#include "character.h"	294	#include "character.h"
291	#include "buffer.h"	295	#include "buffer.h"
@@ -6067,6 +6071,93 @@ complement_process_encoding_system (Lisp_Object coding_system)
6067	#define EOL_SEEN_CR 2	6071	#define EOL_SEEN_CR 2
6068	#define EOL_SEEN_CRLF 4	6072	#define EOL_SEEN_CRLF 4
6069		6073
		6074
		6075	static Lisp_Object adjust_coding_eol_type (struct coding_system *coding, int eol_seen);
		6076
		6077
		6078	/* Return 1 if all the source bytes are ASCII, and return 0 otherwize.
		6079	By side effects, set coding->head_ascii and coding->eol_seen. The
		6080	value of coding->eol_seen is "logical or" of EOL_SEEN_LF,
		6081	EOL_SEEN_CR, and EOL_SEEN_CRLF, but the value is reliable only when
		6082	all the source bytes are ASCII. */
		6083
		6084	static bool
		6085	detect_ascii (struct coding_system *coding)
		6086	{
		6087	const unsigned char src, end;
		6088	Lisp_Object eol_type = CODING_ID_EOL_TYPE (coding->id);
		6089	int eol_seen;
		6090
		6091	eol_seen = (VECTORP (eol_type) ? EOL_SEEN_NONE
		6092	: EQ (eol_type, Qunix) ? EOL_SEEN_LF
		6093	: EQ (eol_type, Qdos) ? EOL_SEEN_CRLF
		6094	: EOL_SEEN_CR);
		6095	coding_set_source (coding);
		6096	src = coding->source;
		6097	end = src + coding->src_bytes;
		6098
		6099	if (inhibit_eol_conversion)
		6100	{
		6101	/* We don't have to check EOL format. */
		6102	while (src < end && !( *src & 0x80)) src++;
		6103	eol_seen = EOL_SEEN_LF;
		6104	adjust_coding_eol_type (coding, eol_seen);
		6105	}
		6106	else if (eol_seen != EOL_SEEN_NONE)
		6107	{
		6108	/* We don't have to check EOL format either. */
		6109	while (src < end && !(*src & 0x80)) src++;
		6110	}
		6111	else
		6112	{
		6113	end--; /* We look ahead one byte. */
		6114	while (src < end)
		6115	{
		6116	int c = *src;
		6117
		6118	if (c & 0x80)
		6119	break;
		6120	src++;
		6121	if (c < 0x20)
		6122	{
		6123	if (c == '\r')
		6124	{
		6125	if (*src == '\n')
		6126	{
		6127	eol_seen \|= EOL_SEEN_CRLF;
		6128	src++;
		6129	}
		6130	else
		6131	eol_seen \|= EOL_SEEN_CR;
		6132	}
		6133	else if (c == '\n')
		6134	eol_seen \|= EOL_SEEN_LF;
		6135	}
		6136	}
		6137	if (src > end)
		6138	/* The last two bytes are CR LF, which means that we have
		6139	scanned all bytes. */
		6140	end++;
		6141	else if (src == end)
		6142	{
		6143	end++;
		6144	if (! (*src & 0x80))
		6145	{
		6146	if (*src == '\r')
		6147	eol_seen \|= EOL_SEEN_CR;
		6148	else if (*src == '\n')
		6149	eol_seen \|= EOL_SEEN_LF;
		6150	src++;
		6151	}
		6152	}
		6153	adjust_coding_eol_type (coding, eol_seen);
		6154	}
		6155	coding->head_ascii = src - coding->source;
		6156	coding->eol_seen = eol_seen;
		6157	return (src == end);
		6158	}
		6159
		6160
6070	/* Detect how end-of-line of a text of length SRC_BYTES pointed by	6161	/* Detect how end-of-line of a text of length SRC_BYTES pointed by
6071	SOURCE is encoded. If CATEGORY is one of	6162	SOURCE is encoded. If CATEGORY is one of
6072	coding_category_utf_16_XXXX, assume that CR and LF are encoded by	6163	coding_category_utf_16_XXXX, assume that CR and LF are encoded by
@@ -6211,7 +6302,6 @@ detect_coding (struct coding_system *coding)
6211	coding_set_source (coding);	6302	coding_set_source (coding);
6212		6303
6213	src_end = coding->source + coding->src_bytes;	6304	src_end = coding->source + coding->src_bytes;
6214	coding->head_ascii = 0;
6215		6305
6216	/* If we have not yet decided the text encoding type, detect it	6306	/* If we have not yet decided the text encoding type, detect it
6217	now. */	6307	now. */
@@ -6221,6 +6311,8 @@ detect_coding (struct coding_system *coding)
6221	struct coding_detection_info detect_info;	6311	struct coding_detection_info detect_info;
6222	bool null_byte_found = 0, eight_bit_found = 0;	6312	bool null_byte_found = 0, eight_bit_found = 0;
6223		6313
		6314	coding->head_ascii = 0;
		6315	coding->eol_seen = EOL_SEEN_NONE;
6224	detect_info.checked = detect_info.found = detect_info.rejected = 0;	6316	detect_info.checked = detect_info.found = detect_info.rejected = 0;
6225	for (src = coding->source; src < src_end; src++)	6317	for (src = coding->source; src < src_end; src++)
6226	{	6318	{
@@ -6259,6 +6351,26 @@ detect_coding (struct coding_system *coding)
6259	if (eight_bit_found)	6351	if (eight_bit_found)
6260	break;	6352	break;
6261	}	6353	}
		6354	else if (! disable_ascii_optimization
		6355	&& ! inhibit_eol_conversion)
		6356	{
		6357	if (c == '\r')
		6358	{
		6359	if (src < src_end && src[1] == '\n')
		6360	{
		6361	coding->eol_seen \|= EOL_SEEN_CRLF;
		6362	src++;
		6363	coding->head_ascii++;
		6364	}
		6365	else
		6366	coding->eol_seen \|= EOL_SEEN_CR;
		6367	}
		6368	else if (c == '\n')
		6369	{
		6370	coding->eol_seen \|= EOL_SEEN_LF;
		6371	}
		6372	}
		6373
6262	if (! eight_bit_found)	6374	if (! eight_bit_found)
6263	coding->head_ascii++;	6375	coding->head_ascii++;
6264	}	6376	}
@@ -6349,14 +6461,20 @@ detect_coding (struct coding_system *coding)
6349	coding_systems	6461	coding_systems
6350	= AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);	6462	= AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6351	detect_info.found = detect_info.rejected = 0;	6463	detect_info.found = detect_info.rejected = 0;
6352	coding->head_ascii = 0;	6464	if (detect_ascii (coding))
6353	if (CONSP (coding_systems)
6354	&& detect_coding_utf_8 (coding, &detect_info))
6355	{	6465	{
6356	if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)	6466	setup_coding_system (XCDR (coding_systems), coding);
6357	setup_coding_system (XCAR (coding_systems), coding);	6467	}
6358	else	6468	else
6359	setup_coding_system (XCDR (coding_systems), coding);	6469	{
		6470	if (CONSP (coding_systems)
		6471	&& detect_coding_utf_8 (coding, &detect_info))
		6472	{
		6473	if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
		6474	setup_coding_system (XCAR (coding_systems), coding);
		6475	else
		6476	setup_coding_system (XCDR (coding_systems), coding);
		6477	}
6360	}	6478	}
6361	}	6479	}
6362	else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))	6480	else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
@@ -6369,6 +6487,7 @@ detect_coding (struct coding_system *coding)
6369	= AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);	6487	= AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6370	detect_info.found = detect_info.rejected = 0;	6488	detect_info.found = detect_info.rejected = 0;
6371	coding->head_ascii = 0;	6489	coding->head_ascii = 0;
		6490	coding->eol_seen = EOL_SEEN_NONE;
6372	if (CONSP (coding_systems)	6491	if (CONSP (coding_systems)
6373	&& detect_coding_utf_16 (coding, &detect_info))	6492	&& detect_coding_utf_16 (coding, &detect_info))
6374	{	6493	{
@@ -6806,7 +6925,7 @@ produce_chars (struct coding_system *coding, Lisp_Object translation_table,
6806		6925
6807	produced = dst - (coding->destination + coding->produced);	6926	produced = dst - (coding->destination + coding->produced);
6808	if (BUFFERP (coding->dst_object) && produced_chars > 0)	6927	if (BUFFERP (coding->dst_object) && produced_chars > 0)
6809	insert_from_gap (produced_chars, produced);	6928	insert_from_gap (produced_chars, produced, 0);
6810	coding->produced += produced;	6929	coding->produced += produced;
6811	coding->produced_char += produced_chars;	6930	coding->produced_char += produced_chars;
6812	return carryover;	6931	return carryover;
@@ -7391,7 +7510,7 @@ encode_coding (struct coding_system *coding)
7391	} while (coding->consumed_char < coding->src_chars);	7510	} while (coding->consumed_char < coding->src_chars);
7392		7511
7393	if (BUFFERP (coding->dst_object) && coding->produced_char > 0)	7512	if (BUFFERP (coding->dst_object) && coding->produced_char > 0)
7394	insert_from_gap (coding->produced_char, coding->produced);	7513	insert_from_gap (coding->produced_char, coding->produced, 0);
7395		7514
7396	SAFE_FREE ();	7515	SAFE_FREE ();
7397	}	7516	}
@@ -7487,8 +7606,6 @@ decode_coding_gap (struct coding_system *coding,
7487	ptrdiff_t count = SPECPDL_INDEX ();	7606	ptrdiff_t count = SPECPDL_INDEX ();
7488	Lisp_Object attrs;	7607	Lisp_Object attrs;
7489		7608
7490	code_conversion_save (0, 0);
7491
7492	coding->src_object = Fcurrent_buffer ();	7609	coding->src_object = Fcurrent_buffer ();
7493	coding->src_chars = chars;	7610	coding->src_chars = chars;
7494	coding->src_bytes = bytes;	7611	coding->src_bytes = bytes;
@@ -7502,13 +7619,53 @@ decode_coding_gap (struct coding_system *coding,
7502		7619
7503	if (CODING_REQUIRE_DETECTION (coding))	7620	if (CODING_REQUIRE_DETECTION (coding))
7504	detect_coding (coding);	7621	detect_coding (coding);
		7622	attrs = CODING_ID_ATTRS (coding->id);
		7623	if (! disable_ascii_optimization)
		7624	{
		7625	if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs))
		7626	&& NILP (CODING_ATTR_POST_READ (attrs))
		7627	&& NILP (get_translation_table (attrs, 0, NULL))
		7628	&& (coding->head_ascii >= 0 /* We've already called detect_coding */
		7629	? coding->head_ascii == bytes
		7630	: detect_ascii (coding)))
		7631	{
		7632	if (coding->eol_seen == EOL_SEEN_CR)
		7633	{
		7634	unsigned char *src_end = GAP_END_ADDR;
		7635	unsigned char *src = src_end - coding->src_bytes;
		7636
		7637	while (src < src_end)
		7638	{
		7639	if (*src++ == '\r')
		7640	src[-1] = '\n';
		7641	}
		7642	}
		7643	else if (coding->eol_seen == EOL_SEEN_CRLF)
		7644	{
		7645	unsigned char *src = GAP_END_ADDR;
		7646	unsigned char *src_beg = src - coding->src_bytes;
		7647	unsigned char *dst = src;
		7648
		7649	while (src_beg < src)
		7650	{
		7651	--dst = --src;
		7652	if (*src == '\n')
		7653	src--;
		7654	}
		7655	bytes -= dst - src;
		7656	}
		7657	coding->produced_char = coding->produced = bytes;
		7658	insert_from_gap (bytes, bytes, 1);
		7659	return;
		7660	}
		7661	}
		7662	code_conversion_save (0, 0);
7505		7663
7506	coding->mode \|= CODING_MODE_LAST_BLOCK;	7664	coding->mode \|= CODING_MODE_LAST_BLOCK;
7507	current_buffer->text->inhibit_shrinking = 1;	7665	current_buffer->text->inhibit_shrinking = 1;
7508	decode_coding (coding);	7666	decode_coding (coding);
7509	current_buffer->text->inhibit_shrinking = 0;	7667	current_buffer->text->inhibit_shrinking = 0;
7510		7668
7511	attrs = CODING_ID_ATTRS (coding->id);
7512	if (! NILP (CODING_ATTR_POST_READ (attrs)))	7669	if (! NILP (CODING_ATTR_POST_READ (attrs)))
7513	{	7670	{
7514	ptrdiff_t prev_Z = Z, prev_Z_BYTE = Z_BYTE;	7671	ptrdiff_t prev_Z = Z, prev_Z_BYTE = Z_BYTE;
@@ -7966,11 +8123,21 @@ from_unicode (Lisp_Object str)
7966	return code_convert_string_norecord (str, Qutf_16le, 0);	8123	return code_convert_string_norecord (str, Qutf_16le, 0);
7967	}	8124	}
7968		8125
		8126	Lisp_Object
		8127	from_unicode_buffer (const wchar_t* wstr)
		8128	{
		8129	return from_unicode (
		8130	make_unibyte_string (
		8131	(char*) wstr,
		8132	/* we get one of the two final 0 bytes for free. */
		8133	1 + sizeof (wchar_t) * wcslen (wstr)));
		8134	}
		8135
7969	wchar_t *	8136	wchar_t *
7970	to_unicode (Lisp_Object str, Lisp_Object *buf)	8137	to_unicode (Lisp_Object str, Lisp_Object *buf)
7971	{	8138	{
7972	*buf = code_convert_string_norecord (str, Qutf_16le, 1);	8139	*buf = code_convert_string_norecord (str, Qutf_16le, 1);
7973	/* We need to make a another copy (in addition to the one made by	8140	/* We need to make another copy (in addition to the one made by
7974	code_convert_string_norecord) to ensure that the final string is	8141	code_convert_string_norecord) to ensure that the final string is
7975	_doubly_ zero terminated --- that is, that the string is	8142	_doubly_ zero terminated --- that is, that the string is
7976	terminated by two zero bytes and one utf-16le null character.	8143	terminated by two zero bytes and one utf-16le null character.
@@ -10707,6 +10874,11 @@ from GNU Find and GNU Grep. Emacs will then ignore the null bytes and
10707	decode text as usual. */);	10874	decode text as usual. */);
10708	inhibit_null_byte_detection = 0;	10875	inhibit_null_byte_detection = 0;
10709		10876
		10877	DEFVAR_BOOL ("disable-ascii-optimization", disable_ascii_optimization,
		10878	doc: /* If non-nil, Emacs does not optimize code decoder for ASCII files.
		10879	Internal use only. Removed after the experimental optimizer gets stable. */);
		10880	disable_ascii_optimization = 0;
		10881
10710	DEFVAR_LISP ("translation-table-for-input", Vtranslation_table_for_input,	10882	DEFVAR_LISP ("translation-table-for-input", Vtranslation_table_for_input,
10711	doc: /* Char table for translating self-inserting characters.	10883	doc: /* Char table for translating self-inserting characters.
10712	This is applied to the result of input methods, not their input.	10884	This is applied to the result of input methods, not their input.