aboutsummaryrefslogtreecommitdiffstats
path: root/src/coding.c
diff options
context:
space:
mode:
authorTom Tromey2012-12-17 07:56:22 -0700
committerTom Tromey2012-12-17 07:56:22 -0700
commit3d6eced1ae51ffd0a782130e7c334052277e2724 (patch)
tree5d1d2ad7cd3374f922886c4a72062511a035c168 /src/coding.c
parentbf69f522a9e135f9aa483cedd53e71e915f2bf75 (diff)
parent7c3d167f48d6262ee4e5512aa50a07ee96bc1509 (diff)
downloademacs-3d6eced1ae51ffd0a782130e7c334052277e2724.tar.gz
emacs-3d6eced1ae51ffd0a782130e7c334052277e2724.zip
merge from trunk
Diffstat (limited to 'src/coding.c')
-rw-r--r--src/coding.c159
1 files changed, 87 insertions, 72 deletions
diff --git a/src/coding.c b/src/coding.c
index 02e7b34695e..56202e4861d 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -285,7 +285,6 @@ encode_coding_XXX (struct coding_system *coding)
285 285
286#include <config.h> 286#include <config.h>
287#include <stdio.h> 287#include <stdio.h>
288#include <setjmp.h>
289 288
290#include "lisp.h" 289#include "lisp.h"
291#include "character.h" 290#include "character.h"
@@ -344,6 +343,10 @@ Lisp_Object Qcoding_system_p, Qcoding_system_error;
344Lisp_Object Qemacs_mule, Qraw_text; 343Lisp_Object Qemacs_mule, Qraw_text;
345Lisp_Object Qutf_8_emacs; 344Lisp_Object Qutf_8_emacs;
346 345
346#if defined (WINDOWSNT) || defined (CYGWIN)
347static Lisp_Object Qutf_16le;
348#endif
349
347/* Coding-systems are handed between Emacs Lisp programs and C internal 350/* Coding-systems are handed between Emacs Lisp programs and C internal
348 routines by the following three variables. */ 351 routines by the following three variables. */
349/* Coding system to be used to encode text for terminal display when 352/* Coding system to be used to encode text for terminal display when
@@ -416,7 +419,7 @@ enum iso_code_class_type
416 ISO_shift_out, /* ISO_CODE_SO (0x0E) */ 419 ISO_shift_out, /* ISO_CODE_SO (0x0E) */
417 ISO_shift_in, /* ISO_CODE_SI (0x0F) */ 420 ISO_shift_in, /* ISO_CODE_SI (0x0F) */
418 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */ 421 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
419 ISO_escape, /* ISO_CODE_SO (0x1B) */ 422 ISO_escape, /* ISO_CODE_ESC (0x1B) */
420 ISO_control_1, /* Control codes in the range 423 ISO_control_1, /* Control codes in the range
421 0x80..0x9F, except for the 424 0x80..0x9F, except for the
422 following 3 codes. */ 425 following 3 codes. */
@@ -921,65 +924,18 @@ record_conversion_result (struct coding_system *coding,
921 924
922 925
923/* Store multibyte form of the character C in P, and advance P to the 926/* Store multibyte form of the character C in P, and advance P to the
924 end of the multibyte form. This is like CHAR_STRING_ADVANCE but it 927 end of the multibyte form. This used to be like CHAR_STRING_ADVANCE
925 never calls MAYBE_UNIFY_CHAR. */ 928 without ever calling MAYBE_UNIFY_CHAR, but nowadays we don't call
926 929 MAYBE_UNIFY_CHAR in CHAR_STRING_ADVANCE. */
927#define CHAR_STRING_ADVANCE_NO_UNIFY(c, p) \
928 do { \
929 if ((c) <= MAX_1_BYTE_CHAR) \
930 *(p)++ = (c); \
931 else if ((c) <= MAX_2_BYTE_CHAR) \
932 *(p)++ = (0xC0 | ((c) >> 6)), \
933 *(p)++ = (0x80 | ((c) & 0x3F)); \
934 else if ((c) <= MAX_3_BYTE_CHAR) \
935 *(p)++ = (0xE0 | ((c) >> 12)), \
936 *(p)++ = (0x80 | (((c) >> 6) & 0x3F)), \
937 *(p)++ = (0x80 | ((c) & 0x3F)); \
938 else if ((c) <= MAX_4_BYTE_CHAR) \
939 *(p)++ = (0xF0 | (c >> 18)), \
940 *(p)++ = (0x80 | ((c >> 12) & 0x3F)), \
941 *(p)++ = (0x80 | ((c >> 6) & 0x3F)), \
942 *(p)++ = (0x80 | (c & 0x3F)); \
943 else if ((c) <= MAX_5_BYTE_CHAR) \
944 *(p)++ = 0xF8, \
945 *(p)++ = (0x80 | ((c >> 18) & 0x0F)), \
946 *(p)++ = (0x80 | ((c >> 12) & 0x3F)), \
947 *(p)++ = (0x80 | ((c >> 6) & 0x3F)), \
948 *(p)++ = (0x80 | (c & 0x3F)); \
949 else \
950 (p) += BYTE8_STRING ((c) - 0x3FFF80, p); \
951 } while (0)
952 930
931#define CHAR_STRING_ADVANCE_NO_UNIFY(c, p) CHAR_STRING_ADVANCE(c, p)
953 932
954/* Return the character code of character whose multibyte form is at 933/* Return the character code of character whose multibyte form is at
955 P, and advance P to the end of the multibyte form. This is like 934 P, and advance P to the end of the multibyte form. This used to be
956 STRING_CHAR_ADVANCE, but it never calls MAYBE_UNIFY_CHAR. */ 935 like STRING_CHAR_ADVANCE without ever calling MAYBE_UNIFY_CHAR, but
957 936 nowadays STRING_CHAR_ADVANCE doesn't call MAYBE_UNIFY_CHAR. */
958#define STRING_CHAR_ADVANCE_NO_UNIFY(p) \
959 (!((p)[0] & 0x80) \
960 ? *(p)++ \
961 : ! ((p)[0] & 0x20) \
962 ? ((p) += 2, \
963 ((((p)[-2] & 0x1F) << 6) \
964 | ((p)[-1] & 0x3F) \
965 | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0))) \
966 : ! ((p)[0] & 0x10) \
967 ? ((p) += 3, \
968 ((((p)[-3] & 0x0F) << 12) \
969 | (((p)[-2] & 0x3F) << 6) \
970 | ((p)[-1] & 0x3F))) \
971 : ! ((p)[0] & 0x08) \
972 ? ((p) += 4, \
973 ((((p)[-4] & 0xF) << 18) \
974 | (((p)[-3] & 0x3F) << 12) \
975 | (((p)[-2] & 0x3F) << 6) \
976 | ((p)[-1] & 0x3F))) \
977 : ((p) += 5, \
978 ((((p)[-4] & 0x3F) << 18) \
979 | (((p)[-3] & 0x3F) << 12) \
980 | (((p)[-2] & 0x3F) << 6) \
981 | ((p)[-1] & 0x3F))))
982 937
938#define STRING_CHAR_ADVANCE_NO_UNIFY(p) STRING_CHAR_ADVANCE(p)
983 939
984/* Set coding->source from coding->src_object. */ 940/* Set coding->source from coding->src_object. */
985 941
@@ -2051,7 +2007,7 @@ emacs_mule_char (struct coding_system *coding, const unsigned char *src,
2051 break; 2007 break;
2052 2008
2053 default: 2009 default:
2054 abort (); 2010 emacs_abort ();
2055 } 2011 }
2056 CODING_DECODE_CHAR (coding, src, src_base, src_end, 2012 CODING_DECODE_CHAR (coding, src, src_base, src_end,
2057 CHARSET_FROM_ID (charset_ID), code, c); 2013 CHARSET_FROM_ID (charset_ID), code, c);
@@ -2345,7 +2301,7 @@ decode_coding_emacs_mule (struct coding_system *coding)
2345 int i; 2301 int i;
2346 2302
2347 if (charbuf_end - charbuf < cmp_status->length) 2303 if (charbuf_end - charbuf < cmp_status->length)
2348 abort (); 2304 emacs_abort ();
2349 for (i = 0; i < cmp_status->length; i++) 2305 for (i = 0; i < cmp_status->length; i++)
2350 *charbuf++ = cmp_status->carryover[i]; 2306 *charbuf++ = cmp_status->carryover[i];
2351 coding->annotated = 1; 2307 coding->annotated = 1;
@@ -2619,7 +2575,7 @@ encode_coding_emacs_mule (struct coding_system *coding)
2619 preferred_charset_id = -1; 2575 preferred_charset_id = -1;
2620 break; 2576 break;
2621 default: 2577 default:
2622 abort (); 2578 emacs_abort ();
2623 } 2579 }
2624 charbuf += -c - 1; 2580 charbuf += -c - 1;
2625 continue; 2581 continue;
@@ -3482,7 +3438,7 @@ decode_coding_iso_2022 (struct coding_system *coding)
3482 if (cmp_status->state != COMPOSING_NO) 3438 if (cmp_status->state != COMPOSING_NO)
3483 { 3439 {
3484 if (charbuf_end - charbuf < cmp_status->length) 3440 if (charbuf_end - charbuf < cmp_status->length)
3485 abort (); 3441 emacs_abort ();
3486 for (i = 0; i < cmp_status->length; i++) 3442 for (i = 0; i < cmp_status->length; i++)
3487 *charbuf++ = cmp_status->carryover[i]; 3443 *charbuf++ = cmp_status->carryover[i];
3488 coding->annotated = 1; 3444 coding->annotated = 1;
@@ -3864,7 +3820,7 @@ decode_coding_iso_2022 (struct coding_system *coding)
3864 break; 3820 break;
3865 3821
3866 default: 3822 default:
3867 abort (); 3823 emacs_abort ();
3868 } 3824 }
3869 3825
3870 if (cmp_status->state == COMPOSING_NO 3826 if (cmp_status->state == COMPOSING_NO
@@ -4419,7 +4375,7 @@ encode_coding_iso_2022 (struct coding_system *coding)
4419 preferred_charset_id = -1; 4375 preferred_charset_id = -1;
4420 break; 4376 break;
4421 default: 4377 default:
4422 abort (); 4378 emacs_abort ();
4423 } 4379 }
4424 charbuf += -c - 1; 4380 charbuf += -c - 1;
4425 continue; 4381 continue;
@@ -4933,7 +4889,7 @@ encode_coding_sjis (struct coding_system *coding)
4933 } 4889 }
4934 } 4890 }
4935 if (code == CHARSET_INVALID_CODE (charset)) 4891 if (code == CHARSET_INVALID_CODE (charset))
4936 abort (); 4892 emacs_abort ();
4937 if (charset == charset_kanji) 4893 if (charset == charset_kanji)
4938 { 4894 {
4939 int c1, c2; 4895 int c1, c2;
@@ -5023,7 +4979,7 @@ encode_coding_big5 (struct coding_system *coding)
5023 } 4979 }
5024 } 4980 }
5025 if (code == CHARSET_INVALID_CODE (charset)) 4981 if (code == CHARSET_INVALID_CODE (charset))
5026 abort (); 4982 emacs_abort ();
5027 if (charset == charset_big5) 4983 if (charset == charset_big5)
5028 { 4984 {
5029 int c1, c2; 4985 int c1, c2;
@@ -5107,6 +5063,7 @@ decode_coding_ccl (struct coding_system *coding)
5107 while (1) 5063 while (1)
5108 { 5064 {
5109 const unsigned char *p = src; 5065 const unsigned char *p = src;
5066 ptrdiff_t offset;
5110 int i = 0; 5067 int i = 0;
5111 5068
5112 if (multibytep) 5069 if (multibytep)
@@ -5124,8 +5081,17 @@ decode_coding_ccl (struct coding_system *coding)
5124 5081
5125 if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK) 5082 if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
5126 ccl->last_block = 1; 5083 ccl->last_block = 1;
5084 /* As ccl_driver calls DECODE_CHAR, buffer may be relocated. */
5085 charset_map_loaded = 0;
5127 ccl_driver (ccl, source_charbuf, charbuf, i, charbuf_end - charbuf, 5086 ccl_driver (ccl, source_charbuf, charbuf, i, charbuf_end - charbuf,
5128 charset_list); 5087 charset_list);
5088 if (charset_map_loaded
5089 && (offset = coding_change_source (coding)))
5090 {
5091 p += offset;
5092 src += offset;
5093 src_end += offset;
5094 }
5129 charbuf += ccl->produced; 5095 charbuf += ccl->produced;
5130 if (multibytep) 5096 if (multibytep)
5131 src += source_byteidx[ccl->consumed]; 5097 src += source_byteidx[ccl->consumed];
@@ -5178,8 +5144,15 @@ encode_coding_ccl (struct coding_system *coding)
5178 5144
5179 do 5145 do
5180 { 5146 {
5147 ptrdiff_t offset;
5148
5149 /* As ccl_driver calls DECODE_CHAR, buffer may be relocated. */
5150 charset_map_loaded = 0;
5181 ccl_driver (ccl, charbuf, destination_charbuf, 5151 ccl_driver (ccl, charbuf, destination_charbuf,
5182 charbuf_end - charbuf, 1024, charset_list); 5152 charbuf_end - charbuf, 1024, charset_list);
5153 if (charset_map_loaded
5154 && (offset = coding_change_destination (coding)))
5155 dst += offset;
5183 if (multibytep) 5156 if (multibytep)
5184 { 5157 {
5185 ASSURE_DESTINATION (ccl->produced * 2); 5158 ASSURE_DESTINATION (ccl->produced * 2);
@@ -6332,6 +6305,9 @@ detect_coding (struct coding_system *coding)
6332 { 6305 {
6333 category = coding_priorities[i]; 6306 category = coding_priorities[i];
6334 this = coding_categories + category; 6307 this = coding_categories + category;
6308 /* Some of this->detector (e.g. detect_coding_sjis)
6309 require this information. */
6310 coding->id = this->id;
6335 if (this->id < 0) 6311 if (this->id < 0)
6336 { 6312 {
6337 /* No coding system of this category is defined. */ 6313 /* No coding system of this category is defined. */
@@ -6853,7 +6829,7 @@ produce_chars (struct coding_system *coding, Lisp_Object translation_table,
6853 [ -LENGTH ANNOTATION_MASK NCHARS NBYTES METHOD [ COMPONENTS... ] ] 6829 [ -LENGTH ANNOTATION_MASK NCHARS NBYTES METHOD [ COMPONENTS... ] ]
6854 */ 6830 */
6855 6831
6856static inline void 6832static void
6857produce_composition (struct coding_system *coding, int *charbuf, ptrdiff_t pos) 6833produce_composition (struct coding_system *coding, int *charbuf, ptrdiff_t pos)
6858{ 6834{
6859 int len; 6835 int len;
@@ -6897,7 +6873,7 @@ produce_composition (struct coding_system *coding, int *charbuf, ptrdiff_t pos)
6897 [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ] 6873 [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
6898 */ 6874 */
6899 6875
6900static inline void 6876static void
6901produce_charset (struct coding_system *coding, int *charbuf, ptrdiff_t pos) 6877produce_charset (struct coding_system *coding, int *charbuf, ptrdiff_t pos)
6902{ 6878{
6903 ptrdiff_t from = pos - charbuf[2]; 6879 ptrdiff_t from = pos - charbuf[2];
@@ -7132,7 +7108,7 @@ decode_coding (struct coding_system *coding)
7132 position of a composition after POS (if any) or to LIMIT, and 7108 position of a composition after POS (if any) or to LIMIT, and
7133 return BUF. */ 7109 return BUF. */
7134 7110
7135static inline int * 7111static int *
7136handle_composition_annotation (ptrdiff_t pos, ptrdiff_t limit, 7112handle_composition_annotation (ptrdiff_t pos, ptrdiff_t limit,
7137 struct coding_system *coding, int *buf, 7113 struct coding_system *coding, int *buf,
7138 ptrdiff_t *stop) 7114 ptrdiff_t *stop)
@@ -7190,7 +7166,7 @@ handle_composition_annotation (ptrdiff_t pos, ptrdiff_t limit,
7190 *buf++ = XINT (XCAR (components)); 7166 *buf++ = XINT (XCAR (components));
7191 } 7167 }
7192 else 7168 else
7193 abort (); 7169 emacs_abort ();
7194 *head -= len; 7170 *head -= len;
7195 } 7171 }
7196 } 7172 }
@@ -7215,7 +7191,7 @@ handle_composition_annotation (ptrdiff_t pos, ptrdiff_t limit,
7215 If the property value is nil, set *STOP to the position where the 7191 If the property value is nil, set *STOP to the position where the
7216 property value is non-nil (limiting by LIMIT), and return BUF. */ 7192 property value is non-nil (limiting by LIMIT), and return BUF. */
7217 7193
7218static inline int * 7194static int *
7219handle_charset_annotation (ptrdiff_t pos, ptrdiff_t limit, 7195handle_charset_annotation (ptrdiff_t pos, ptrdiff_t limit,
7220 struct coding_system *coding, int *buf, 7196 struct coding_system *coding, int *buf,
7221 ptrdiff_t *stop) 7197 ptrdiff_t *stop)
@@ -7999,6 +7975,40 @@ preferred_coding_system (void)
7999 return CODING_ID_NAME (id); 7975 return CODING_ID_NAME (id);
8000} 7976}
8001 7977
7978#if defined (WINDOWSNT) || defined (CYGWIN)
7979
7980Lisp_Object
7981from_unicode (Lisp_Object str)
7982{
7983 CHECK_STRING (str);
7984 if (!STRING_MULTIBYTE (str) &&
7985 SBYTES (str) & 1)
7986 {
7987 str = Fsubstring (str, make_number (0), make_number (-1));
7988 }
7989
7990 return code_convert_string_norecord (str, Qutf_16le, 0);
7991}
7992
7993wchar_t *
7994to_unicode (Lisp_Object str, Lisp_Object *buf)
7995{
7996 *buf = code_convert_string_norecord (str, Qutf_16le, 1);
7997 /* We need to make a another copy (in addition to the one made by
7998 code_convert_string_norecord) to ensure that the final string is
7999 _doubly_ zero terminated --- that is, that the string is
8000 terminated by two zero bytes and one utf-16le null character.
8001 Because strings are already terminated with a single zero byte,
8002 we just add one additional zero. */
8003 str = make_uninit_string (SBYTES (*buf) + 1);
8004 memcpy (SDATA (str), SDATA (*buf), SBYTES (*buf));
8005 SDATA (str) [SBYTES (*buf)] = '\0';
8006 *buf = str;
8007 return WCSDATA (*buf);
8008}
8009
8010#endif /* WINDOWSNT || CYGWIN */
8011
8002 8012
8003#ifdef emacs 8013#ifdef emacs
8004/*** 8. Emacs Lisp library functions ***/ 8014/*** 8. Emacs Lisp library functions ***/
@@ -8460,7 +8470,7 @@ highest priority. */)
8460} 8470}
8461 8471
8462 8472
8463static inline bool 8473static bool
8464char_encodable_p (int c, Lisp_Object attrs) 8474char_encodable_p (int c, Lisp_Object attrs)
8465{ 8475{
8466 Lisp_Object tail; 8476 Lisp_Object tail;
@@ -9428,7 +9438,7 @@ usage: (set-coding-system-priority &rest coding-systems) */)
9428 && changed[coding_priorities[j]]) 9438 && changed[coding_priorities[j]])
9429 j++; 9439 j++;
9430 if (j == coding_category_max) 9440 if (j == coding_category_max)
9431 abort (); 9441 emacs_abort ();
9432 priorities[i] = coding_priorities[j]; 9442 priorities[i] = coding_priorities[j];
9433 } 9443 }
9434 9444
@@ -10312,6 +10322,11 @@ syms_of_coding (void)
10312 DEFSYM (Qutf_8, "utf-8"); 10322 DEFSYM (Qutf_8, "utf-8");
10313 DEFSYM (Qutf_8_emacs, "utf-8-emacs"); 10323 DEFSYM (Qutf_8_emacs, "utf-8-emacs");
10314 10324
10325#if defined (WINDOWSNT) || defined (CYGWIN)
10326 /* No, not utf-16-le: that one has a BOM. */
10327 DEFSYM (Qutf_16le, "utf-16le");
10328#endif
10329
10315 DEFSYM (Qutf_16, "utf-16"); 10330 DEFSYM (Qutf_16, "utf-16");
10316 DEFSYM (Qbig, "big"); 10331 DEFSYM (Qbig, "big");
10317 DEFSYM (Qlittle, "little"); 10332 DEFSYM (Qlittle, "little");