aboutsummaryrefslogtreecommitdiffstats
path: root/src/coding.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/coding.h')
-rw-r--r--src/coding.h145
1 files changed, 83 insertions, 62 deletions
diff --git a/src/coding.h b/src/coding.h
index 50435282229..2c01a05d197 100644
--- a/src/coding.h
+++ b/src/coding.h
@@ -1,5 +1,5 @@
1/* Header for coding system handler. 1/* Header for coding system handler.
2 Copyright (C) 2001-2012 Free Software Foundation, Inc. 2 Copyright (C) 2001-2013 Free Software Foundation, Inc.
3 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 3 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
4 2005, 2006, 2007, 2008, 2009, 2010, 2011 4 2005, 2006, 2007, 2008, 2009, 2010, 2011
5 National Institute of Advanced Industrial Science and Technology (AIST) 5 National Institute of Advanced Industrial Science and Technology (AIST)
@@ -76,6 +76,14 @@ enum define_coding_ccl_arg_index
76 coding_arg_ccl_max 76 coding_arg_ccl_max
77 }; 77 };
78 78
79enum define_coding_undecided_arg_index
80 {
81 coding_arg_undecided_inhibit_null_byte_detection = coding_arg_max,
82 coding_arg_undecided_inhibit_iso_escape_detection,
83 coding_arg_undecided_prefer_utf_8,
84 coding_arg_undecided_max
85 };
86
79/* Hash table for all coding systems. Keys are coding system symbols 87/* Hash table for all coding systems. Keys are coding system symbols
80 and values are spec vectors of the corresponding coding system. A 88 and values are spec vectors of the corresponding coding system. A
81 spec vector has the form [ ATTRS ALIASES EOL-TYPE ]. ATTRS is a 89 spec vector has the form [ ATTRS ALIASES EOL-TYPE ]. ATTRS is a
@@ -158,6 +166,10 @@ enum coding_attr_index
158 166
159 coding_attr_emacs_mule_full, 167 coding_attr_emacs_mule_full,
160 168
169 coding_attr_undecided_inhibit_null_byte_detection,
170 coding_attr_undecided_inhibit_iso_escape_detection,
171 coding_attr_undecided_prefer_utf_8,
172
161 coding_attr_last_index 173 coding_attr_last_index
162 }; 174 };
163 175
@@ -177,7 +189,6 @@ enum coding_attr_index
177#define CODING_ATTR_PRE_WRITE(attrs) AREF (attrs, coding_attr_pre_write) 189#define CODING_ATTR_PRE_WRITE(attrs) AREF (attrs, coding_attr_pre_write)
178#define CODING_ATTR_DEFAULT_CHAR(attrs) AREF (attrs, coding_attr_default_char) 190#define CODING_ATTR_DEFAULT_CHAR(attrs) AREF (attrs, coding_attr_default_char)
179#define CODING_ATTR_FOR_UNIBYTE(attrs) AREF (attrs, coding_attr_for_unibyte) 191#define CODING_ATTR_FOR_UNIBYTE(attrs) AREF (attrs, coding_attr_for_unibyte)
180#define CODING_ATTR_FLUSHING(attrs) AREF (attrs, coding_attr_flushing)
181#define CODING_ATTR_PLIST(attrs) AREF (attrs, coding_attr_plist) 192#define CODING_ATTR_PLIST(attrs) AREF (attrs, coding_attr_plist)
182#define CODING_ATTR_CATEGORY(attrs) AREF (attrs, coding_attr_category) 193#define CODING_ATTR_CATEGORY(attrs) AREF (attrs, coding_attr_category)
183#define CODING_ATTR_SAFE_CHARSETS(attrs)AREF (attrs, coding_attr_safe_charsets) 194#define CODING_ATTR_SAFE_CHARSETS(attrs)AREF (attrs, coding_attr_safe_charsets)
@@ -273,37 +284,31 @@ enum coding_result_code
273 CODING_RESULT_SUCCESS, 284 CODING_RESULT_SUCCESS,
274 CODING_RESULT_INSUFFICIENT_SRC, 285 CODING_RESULT_INSUFFICIENT_SRC,
275 CODING_RESULT_INSUFFICIENT_DST, 286 CODING_RESULT_INSUFFICIENT_DST,
276 CODING_RESULT_INCONSISTENT_EOL,
277 CODING_RESULT_INVALID_SRC, 287 CODING_RESULT_INVALID_SRC,
278 CODING_RESULT_INTERRUPT, 288 CODING_RESULT_INTERRUPT
279 CODING_RESULT_INSUFFICIENT_MEM
280 }; 289 };
281 290
282 291
283/* Macros used for the member `mode' of the struct coding_system. */ 292/* Macros used for the member `mode' of the struct coding_system. */
284 293
285/* If set, recover the original CR or LF of the already decoded text
286 when the decoding routine encounters an inconsistent eol format. */
287#define CODING_MODE_INHIBIT_INCONSISTENT_EOL 0x01
288
289/* If set, the decoding/encoding routines treat the current data as 294/* If set, the decoding/encoding routines treat the current data as
290 the last block of the whole text to be converted, and do the 295 the last block of the whole text to be converted, and do the
291 appropriate finishing job. */ 296 appropriate finishing job. */
292#define CODING_MODE_LAST_BLOCK 0x02 297#define CODING_MODE_LAST_BLOCK 0x01
293 298
294/* If set, it means that the current source text is in a buffer which 299/* If set, it means that the current source text is in a buffer which
295 enables selective display. */ 300 enables selective display. */
296#define CODING_MODE_SELECTIVE_DISPLAY 0x04 301#define CODING_MODE_SELECTIVE_DISPLAY 0x02
297 302
298/* This flag is used by the decoding/encoding routines on the fly. If 303/* This flag is used by the decoding/encoding routines on the fly. If
299 set, it means that right-to-left text is being processed. */ 304 set, it means that right-to-left text is being processed. */
300#define CODING_MODE_DIRECTION 0x08 305#define CODING_MODE_DIRECTION 0x04
301 306
302#define CODING_MODE_FIXED_DESTINATION 0x10 307#define CODING_MODE_FIXED_DESTINATION 0x08
303 308
304/* If set, it means that the encoding routines produces some safe 309/* If set, it means that the encoding routines produces some safe
305 ASCII characters (usually '?') for unsupported characters. */ 310 ASCII characters (usually '?') for unsupported characters. */
306#define CODING_MODE_SAFE_ENCODING 0x20 311#define CODING_MODE_SAFE_ENCODING 0x10
307 312
308 /* For handling composition sequence. */ 313 /* For handling composition sequence. */
309#include "composite.h" 314#include "composite.h"
@@ -322,7 +327,7 @@ struct composition_status
322{ 327{
323 enum composition_state state; 328 enum composition_state state;
324 enum composition_method method; 329 enum composition_method method;
325 int old_form; /* 0:pre-21 form, 1:post-21 form */ 330 bool old_form; /* true if pre-21 form */
326 int length; /* number of elements produced in charbuf */ 331 int length; /* number of elements produced in charbuf */
327 int nchars; /* number of characters composed */ 332 int nchars; /* number of characters composed */
328 int ncomps; /* number of composition components */ 333 int ncomps; /* number of composition components */
@@ -351,18 +356,18 @@ struct iso_2022_spec
351 there was an invalid designation previously. */ 356 there was an invalid designation previously. */
352 int current_designation[4]; 357 int current_designation[4];
353 358
354 /* Set to 1 temporarily only when graphic register 2 or 3 is invoked
355 by single-shift while encoding. */
356 int single_shifting;
357
358 /* Set to 1 temporarily only when processing at beginning of line. */
359 int bol;
360
361 /* If positive, we are now scanning CTEXT extended segment. */ 359 /* If positive, we are now scanning CTEXT extended segment. */
362 int ctext_extended_segment_len; 360 int ctext_extended_segment_len;
363 361
364 /* If nonzero, we are now scanning embedded UTF-8 sequence. */ 362 /* True temporarily only when graphic register 2 or 3 is invoked by
365 int embedded_utf_8; 363 single-shift while encoding. */
364 unsigned single_shifting : 1;
365
366 /* True temporarily only when processing at beginning of line. */
367 unsigned bol : 1;
368
369 /* If true, we are now scanning embedded UTF-8 sequence. */
370 unsigned embedded_utf_8 : 1;
366 371
367 /* The current composition. */ 372 /* The current composition. */
368 struct composition_status cmp_status; 373 struct composition_status cmp_status;
@@ -370,11 +375,21 @@ struct iso_2022_spec
370 375
371struct emacs_mule_spec 376struct emacs_mule_spec
372{ 377{
373 int full_support;
374 struct composition_status cmp_status; 378 struct composition_status cmp_status;
375}; 379};
376 380
377struct ccl_spec; 381struct undecided_spec
382{
383 /* Inhibit null byte detection. 1 means always inhibit,
384 -1 means do not inhibit, 0 means rely on user variable. */
385 int inhibit_nbd;
386
387 /* Inhibit ISO escape detection. -1, 0, 1 as above. */
388 int inhibit_ied;
389
390 /* Prefer UTF-8 when the input could be other encodings. */
391 bool prefer_utf_8;
392};
378 393
379enum utf_bom_type 394enum utf_bom_type
380 { 395 {
@@ -433,6 +448,7 @@ struct coding_system
433 struct utf_16_spec utf_16; 448 struct utf_16_spec utf_16;
434 enum utf_bom_type utf_8_bom; 449 enum utf_bom_type utf_8_bom;
435 struct emacs_mule_spec emacs_mule; 450 struct emacs_mule_spec emacs_mule;
451 struct undecided_spec undecided;
436 } spec; 452 } spec;
437 453
438 int max_charset_id; 454 int max_charset_id;
@@ -448,9 +464,15 @@ struct coding_system
448 /* How may heading bytes we can skip for decoding. This is set to 464 /* How may heading bytes we can skip for decoding. This is set to
449 -1 in setup_coding_system, and updated by detect_coding. So, 465 -1 in setup_coding_system, and updated by detect_coding. So,
450 when this is equal to the byte length of the text being 466 when this is equal to the byte length of the text being
451 converted, we can skip the actual conversion process. */ 467 converted, we can skip the actual conversion process except for
468 the eol format. */
452 ptrdiff_t head_ascii; 469 ptrdiff_t head_ascii;
453 470
471 ptrdiff_t detected_utf8_chars;
472
473 /* Used internally in coding.c. See the comment of detect_ascii. */
474 int eol_seen;
475
454 /* The following members are set by encoding/decoding routine. */ 476 /* The following members are set by encoding/decoding routine. */
455 ptrdiff_t produced, produced_char, consumed, consumed_char; 477 ptrdiff_t produced, produced_char, consumed, consumed_char;
456 478
@@ -471,10 +493,6 @@ struct coding_system
471 Lisp_Object dst_object; 493 Lisp_Object dst_object;
472 unsigned char *destination; 494 unsigned char *destination;
473 495
474 /* Set to 1 if the source of conversion is not in the member
475 `charbuf', but at `src_object'. */
476 int chars_at_source;
477
478 /* If an element is non-negative, it is a character code. 496 /* If an element is non-negative, it is a character code.
479 497
480 If it is in the range -128..-1, it is a 8-bit character code 498 If it is in the range -128..-1, it is a 8-bit character code
@@ -490,18 +508,21 @@ struct coding_system
490 int *charbuf; 508 int *charbuf;
491 int charbuf_size, charbuf_used; 509 int charbuf_size, charbuf_used;
492 510
511 /* True if the source of conversion is not in the member
512 `charbuf', but at `src_object'. */
513 unsigned chars_at_source : 1;
514
493 /* Set to 1 if charbuf contains an annotation. */ 515 /* Set to 1 if charbuf contains an annotation. */
494 int annotated; 516 unsigned annotated : 1;
495 517
496 unsigned char carryover[64]; 518 unsigned char carryover[64];
497 int carryover_bytes; 519 int carryover_bytes;
498 520
499 int default_char; 521 int default_char;
500 522
501 int (*detector) (struct coding_system *, 523 bool (*detector) (struct coding_system *, struct coding_detection_info *);
502 struct coding_detection_info *);
503 void (*decoder) (struct coding_system *); 524 void (*decoder) (struct coding_system *);
504 int (*encoder) (struct coding_system *); 525 bool (*encoder) (struct coding_system *);
505}; 526};
506 527
507/* Meanings of bits in the member `common_flags' of the structure 528/* Meanings of bits in the member `common_flags' of the structure
@@ -649,10 +670,8 @@ struct coding_system
649 for file names, if any. */ 670 for file names, if any. */
650#define ENCODE_FILE(name) \ 671#define ENCODE_FILE(name) \
651 (! NILP (Vfile_name_coding_system) \ 672 (! NILP (Vfile_name_coding_system) \
652 && !EQ (Vfile_name_coding_system, make_number (0)) \
653 ? code_convert_string_norecord (name, Vfile_name_coding_system, 1) \ 673 ? code_convert_string_norecord (name, Vfile_name_coding_system, 1) \
654 : (! NILP (Vdefault_file_name_coding_system) \ 674 : (! NILP (Vdefault_file_name_coding_system) \
655 && !EQ (Vdefault_file_name_coding_system, make_number (0)) \
656 ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 1) \ 675 ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 1) \
657 : name)) 676 : name))
658 677
@@ -661,10 +680,8 @@ struct coding_system
661 for file names, if any. */ 680 for file names, if any. */
662#define DECODE_FILE(name) \ 681#define DECODE_FILE(name) \
663 (! NILP (Vfile_name_coding_system) \ 682 (! NILP (Vfile_name_coding_system) \
664 && !EQ (Vfile_name_coding_system, make_number (0)) \
665 ? code_convert_string_norecord (name, Vfile_name_coding_system, 0) \ 683 ? code_convert_string_norecord (name, Vfile_name_coding_system, 0) \
666 : (! NILP (Vdefault_file_name_coding_system) \ 684 : (! NILP (Vdefault_file_name_coding_system) \
667 && !EQ (Vdefault_file_name_coding_system, make_number (0)) \
668 ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 0) \ 685 ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 0) \
669 : name)) 686 : name))
670 687
@@ -673,7 +690,6 @@ struct coding_system
673 for system functions, if any. */ 690 for system functions, if any. */
674#define ENCODE_SYSTEM(str) \ 691#define ENCODE_SYSTEM(str) \
675 (! NILP (Vlocale_coding_system) \ 692 (! NILP (Vlocale_coding_system) \
676 && !EQ (Vlocale_coding_system, make_number (0)) \
677 ? code_convert_string_norecord (str, Vlocale_coding_system, 1) \ 693 ? code_convert_string_norecord (str, Vlocale_coding_system, 1) \
678 : str) 694 : str)
679 695
@@ -681,7 +697,6 @@ struct coding_system
681 for system functions, if any. */ 697 for system functions, if any. */
682#define DECODE_SYSTEM(str) \ 698#define DECODE_SYSTEM(str) \
683 (! NILP (Vlocale_coding_system) \ 699 (! NILP (Vlocale_coding_system) \
684 && !EQ (Vlocale_coding_system, make_number (0)) \
685 ? code_convert_string_norecord (str, Vlocale_coding_system, 0) \ 700 ? code_convert_string_norecord (str, Vlocale_coding_system, 0) \
686 : str) 701 : str)
687 702
@@ -689,22 +704,20 @@ struct coding_system
689#define ENCODE_UTF_8(str) code_convert_string_norecord (str, Qutf_8, 1) 704#define ENCODE_UTF_8(str) code_convert_string_norecord (str, Qutf_8, 1)
690 705
691/* Extern declarations. */ 706/* Extern declarations. */
692extern Lisp_Object code_conversion_save (int, int); 707extern Lisp_Object code_conversion_save (bool, bool);
693extern int decoding_buffer_size (struct coding_system *, int);
694extern int encoding_buffer_size (struct coding_system *, int);
695extern void setup_coding_system (Lisp_Object, struct coding_system *); 708extern void setup_coding_system (Lisp_Object, struct coding_system *);
696extern Lisp_Object coding_charset_list (struct coding_system *); 709extern Lisp_Object coding_charset_list (struct coding_system *);
697extern Lisp_Object coding_system_charset_list (Lisp_Object); 710extern Lisp_Object coding_system_charset_list (Lisp_Object);
698extern Lisp_Object code_convert_string (Lisp_Object, Lisp_Object, 711extern Lisp_Object code_convert_string (Lisp_Object, Lisp_Object,
699 Lisp_Object, int, int, int); 712 Lisp_Object, bool, bool, bool);
700extern Lisp_Object code_convert_string_norecord (Lisp_Object, Lisp_Object, 713extern Lisp_Object code_convert_string_norecord (Lisp_Object, Lisp_Object,
701 int); 714 bool);
702extern Lisp_Object raw_text_coding_system (Lisp_Object); 715extern Lisp_Object raw_text_coding_system (Lisp_Object);
703extern Lisp_Object coding_inherit_eol_type (Lisp_Object, Lisp_Object); 716extern Lisp_Object coding_inherit_eol_type (Lisp_Object, Lisp_Object);
704extern Lisp_Object complement_process_encoding_system (Lisp_Object); 717extern Lisp_Object complement_process_encoding_system (Lisp_Object);
705 718
706extern int decode_coding_gap (struct coding_system *, 719extern void decode_coding_gap (struct coding_system *,
707 ptrdiff_t, ptrdiff_t); 720 ptrdiff_t, ptrdiff_t);
708extern void decode_coding_object (struct coding_system *, 721extern void decode_coding_object (struct coding_system *,
709 Lisp_Object, ptrdiff_t, ptrdiff_t, 722 Lisp_Object, ptrdiff_t, ptrdiff_t,
710 ptrdiff_t, ptrdiff_t, Lisp_Object); 723 ptrdiff_t, ptrdiff_t, Lisp_Object);
@@ -712,23 +725,32 @@ extern void encode_coding_object (struct coding_system *,
712 Lisp_Object, ptrdiff_t, ptrdiff_t, 725 Lisp_Object, ptrdiff_t, ptrdiff_t,
713 ptrdiff_t, ptrdiff_t, Lisp_Object); 726 ptrdiff_t, ptrdiff_t, Lisp_Object);
714 727
715/* Macros for backward compatibility. */ 728#if defined (WINDOWSNT) || defined (CYGWIN)
729
730/* These functions use Lisp string objects to store the UTF-16LE
731 strings that modern versions of Windows expect. These strings are
732 not particularly useful to Lisp, and all Lisp strings should be
733 native Emacs multibyte. */
716 734
717#define decode_coding_region(coding, from, to) \ 735/* Access the wide-character string stored in a Lisp string object. */
718 decode_coding_object (coding, Fcurrent_buffer (), \ 736#define WCSDATA(x) ((wchar_t *) SDATA (x))
719 from, CHAR_TO_BYTE (from), \
720 to, CHAR_TO_BYTE (to), Fcurrent_buffer ())
721 737
738/* Convert the multi-byte string in STR to UTF-16LE encoded unibyte
739 string, and store it in *BUF. BUF may safely point to STR on entry. */
740extern wchar_t *to_unicode (Lisp_Object str, Lisp_Object *buf);
722 741
723#define encode_coding_region(coding, from, to) \ 742/* Convert STR, a UTF-16LE encoded string embedded in a unibyte string
724 encode_coding_object (coding, Fcurrent_buffer (), \ 743 object, to a multi-byte Emacs string and return it. This function
725 from, CHAR_TO_BYTE (from), \ 744 calls code_convert_string_norecord internally and has all its
726 to, CHAR_TO_BYTE (to), Fcurrent_buffer ()) 745 failure modes. STR itself is not modified. */
746extern Lisp_Object from_unicode (Lisp_Object str);
727 747
748/* Convert WSTR to an Emacs string. */
749extern Lisp_Object from_unicode_buffer (const wchar_t* wstr);
728 750
729#define decode_coding_string(coding, string, nocopy) \ 751#endif /* WINDOWSNT || CYGWIN */
730 decode_coding_object (coding, string, 0, 0, SCHARS (string), \ 752
731 SBYTES (string), Qt) 753/* Macros for backward compatibility. */
732 754
733#define encode_coding_string(coding, string, nocopy) \ 755#define encode_coding_string(coding, string, nocopy) \
734 (STRING_MULTIBYTE(string) ? \ 756 (STRING_MULTIBYTE(string) ? \
@@ -756,7 +778,7 @@ extern Lisp_Object Qcoding_system_p;
756extern Lisp_Object Qraw_text, Qemacs_mule, Qno_conversion, Qundecided; 778extern Lisp_Object Qraw_text, Qemacs_mule, Qno_conversion, Qundecided;
757extern Lisp_Object Qbuffer_file_coding_system; 779extern Lisp_Object Qbuffer_file_coding_system;
758 780
759extern Lisp_Object Qunix, Qdos, Qmac; 781extern Lisp_Object Qunix, Qdos;
760 782
761extern Lisp_Object Qtranslation_table; 783extern Lisp_Object Qtranslation_table;
762extern Lisp_Object Qtranslation_table_id; 784extern Lisp_Object Qtranslation_table_id;
@@ -779,6 +801,5 @@ extern struct coding_system safe_terminal_coding;
779extern Lisp_Object Qcoding_system_error; 801extern Lisp_Object Qcoding_system_error;
780 802
781extern char emacs_mule_bytes[256]; 803extern char emacs_mule_bytes[256];
782extern int emacs_mule_string_char (unsigned char *);
783 804
784#endif /* EMACS_CODING_H */ 805#endif /* EMACS_CODING_H */