aboutsummaryrefslogtreecommitdiffstats
path: root/src/coding.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/coding.c')
-rw-r--r--src/coding.c102
1 files changed, 71 insertions, 31 deletions
diff --git a/src/coding.c b/src/coding.c
index d6285ed9245..7a3bc40b9c7 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -166,7 +166,7 @@ detect_coding_XXX (struct coding_system *coding,
166 166
167 while (1) 167 while (1)
168 { 168 {
169 /* Get one byte from the source. If the souce is exausted, jump 169 /* Get one byte from the source. If the source is exhausted, jump
170 to no_more_source:. */ 170 to no_more_source:. */
171 ONE_MORE_BYTE (c); 171 ONE_MORE_BYTE (c);
172 172
@@ -180,7 +180,7 @@ detect_coding_XXX (struct coding_system *coding,
180 return 0; 180 return 0;
181 181
182 no_more_source: 182 no_more_source:
183 /* The source exausted successfully. */ 183 /* The source exhausted successfully. */
184 detect_info->found |= found; 184 detect_info->found |= found;
185 return 1; 185 return 1;
186} 186}
@@ -530,7 +530,7 @@ enum iso_code_class_type
530 on output. */ 530 on output. */
531#define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400 531#define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
532 532
533/* If set, do not encode unsafe charactes on output. */ 533/* If set, do not encode unsafe characters on output. */
534#define CODING_ISO_FLAG_SAFE 0x0800 534#define CODING_ISO_FLAG_SAFE 0x0800
535 535
536/* If set, extra latin codes (128..159) are accepted as a valid code 536/* If set, extra latin codes (128..159) are accepted as a valid code
@@ -686,7 +686,7 @@ enum coding_category
686static Lisp_Object Vcoding_category_list; 686static Lisp_Object Vcoding_category_list;
687 687
688/* Table of coding categories (Lisp symbols). This variable is for 688/* Table of coding categories (Lisp symbols). This variable is for
689 internal use oly. */ 689 internal use only. */
690static Lisp_Object Vcoding_category_table; 690static Lisp_Object Vcoding_category_table;
691 691
692/* Table of coding-categories ordered by priority. */ 692/* Table of coding-categories ordered by priority. */
@@ -818,7 +818,7 @@ static struct coding_system coding_categories[coding_category_max];
818 } while (0) 818 } while (0)
819 819
820 820
821/* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2. */ 821/* Like EMIT_ONE_ASCII_BYTE but store two bytes; C1 and C2. */
822 822
823#define EMIT_TWO_ASCII_BYTES(c1, c2) \ 823#define EMIT_TWO_ASCII_BYTES(c1, c2) \
824 do { \ 824 do { \
@@ -1227,7 +1227,7 @@ alloc_destination (struct coding_system *coding, EMACS_INT nbytes,
1227 1227
1228 METHOD is one of enum composition_method. 1228 METHOD is one of enum composition_method.
1229 1229
1230 Optionnal COMPOSITION-COMPONENTS are characters and composition 1230 Optional COMPOSITION-COMPONENTS are characters and composition
1231 rules. 1231 rules.
1232 1232
1233 In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID 1233 In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
@@ -1932,7 +1932,7 @@ encode_coding_utf_16 (struct coding_system *coding)
1932 CHARS is 0xA0 plus a number of characters composed by this 1932 CHARS is 0xA0 plus a number of characters composed by this
1933 data, 1933 data,
1934 1934
1935 COMPONENTs are characters of multibye form or composition 1935 COMPONENTs are characters of multibyte form or composition
1936 rules encoded by two-byte of ASCII codes. 1936 rules encoded by two-byte of ASCII codes.
1937 1937
1938 In addition, for backward compatibility, the following formats are 1938 In addition, for backward compatibility, the following formats are
@@ -2428,8 +2428,8 @@ decode_coding_emacs_mule (struct coding_system *coding)
2428 const unsigned char *src_end = coding->source + coding->src_bytes; 2428 const unsigned char *src_end = coding->source + coding->src_bytes;
2429 const unsigned char *src_base; 2429 const unsigned char *src_base;
2430 int *charbuf = coding->charbuf + coding->charbuf_used; 2430 int *charbuf = coding->charbuf + coding->charbuf_used;
2431 /* We may produce two annocations (charset and composition) in one 2431 /* We may produce two annotations (charset and composition) in one
2432 loop and one more charset annocation at the end. */ 2432 loop and one more charset annotation at the end. */
2433 int *charbuf_end 2433 int *charbuf_end
2434 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3); 2434 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
2435 int consumed_chars = 0, consumed_chars_base; 2435 int consumed_chars = 0, consumed_chars_base;
@@ -2505,7 +2505,7 @@ decode_coding_emacs_mule (struct coding_system *coding)
2505 /* emacs_mule_char can load a charset map from a file, which 2505 /* emacs_mule_char can load a charset map from a file, which
2506 allocates a large structure and might cause buffer text 2506 allocates a large structure and might cause buffer text
2507 to be relocated as result. Thus, we need to remember the 2507 to be relocated as result. Thus, we need to remember the
2508 original pointer to buffer text, and fixup all related 2508 original pointer to buffer text, and fix up all related
2509 pointers after the call. */ 2509 pointers after the call. */
2510 const unsigned char *orig = coding->source; 2510 const unsigned char *orig = coding->source;
2511 EMACS_INT offset; 2511 EMACS_INT offset;
@@ -2532,7 +2532,7 @@ decode_coding_emacs_mule (struct coding_system *coding)
2532 cmp_status->ncomps -= nchars; 2532 cmp_status->ncomps -= nchars;
2533 } 2533 }
2534 2534
2535 /* Now if C >= 0, we found a normally encoded characer, if C < 2535 /* Now if C >= 0, we found a normally encoded character, if C <
2536 0, we found an old-style composition component character or 2536 0, we found an old-style composition component character or
2537 rule. */ 2537 rule. */
2538 2538
@@ -3043,7 +3043,7 @@ setup_iso_safe_charsets (Lisp_Object attrs)
3043 3043
3044 3044
3045/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". 3045/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3046 Check if a text is encoded in one of ISO-2022 based codig systems. 3046 Check if a text is encoded in one of ISO-2022 based coding systems.
3047 If it is, return 1, else return 0. */ 3047 If it is, return 1, else return 0. */
3048 3048
3049static int 3049static int
@@ -3452,7 +3452,7 @@ finish_composition (int *charbuf, struct composition_status *cmp_status)
3452 return new_chars; 3452 return new_chars;
3453} 3453}
3454 3454
3455/* If characers are under composition, finish the composition. */ 3455/* If characters are under composition, finish the composition. */
3456#define MAYBE_FINISH_COMPOSITION() \ 3456#define MAYBE_FINISH_COMPOSITION() \
3457 do { \ 3457 do { \
3458 if (cmp_status->state != COMPOSING_NO) \ 3458 if (cmp_status->state != COMPOSING_NO) \
@@ -3558,8 +3558,8 @@ decode_coding_iso_2022 (struct coding_system *coding)
3558 const unsigned char *src_end = coding->source + coding->src_bytes; 3558 const unsigned char *src_end = coding->source + coding->src_bytes;
3559 const unsigned char *src_base; 3559 const unsigned char *src_base;
3560 int *charbuf = coding->charbuf + coding->charbuf_used; 3560 int *charbuf = coding->charbuf + coding->charbuf_used;
3561 /* We may produce two annocations (charset and composition) in one 3561 /* We may produce two annotations (charset and composition) in one
3562 loop and one more charset annocation at the end. */ 3562 loop and one more charset annotation at the end. */
3563 int *charbuf_end 3563 int *charbuf_end
3564 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3); 3564 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
3565 int consumed_chars = 0, consumed_chars_base; 3565 int consumed_chars = 0, consumed_chars_base;
@@ -3861,7 +3861,7 @@ decode_coding_iso_2022 (struct coding_system *coding)
3861 goto invalid_code; 3861 goto invalid_code;
3862 /* For the moment, nested direction is not supported. 3862 /* For the moment, nested direction is not supported.
3863 So, `coding->mode & CODING_MODE_DIRECTION' zero means 3863 So, `coding->mode & CODING_MODE_DIRECTION' zero means
3864 left-to-right, and nozero means right-to-left. */ 3864 left-to-right, and nonzero means right-to-left. */
3865 ONE_MORE_BYTE (c1); 3865 ONE_MORE_BYTE (c1);
3866 switch (c1) 3866 switch (c1)
3867 { 3867 {
@@ -4766,7 +4766,7 @@ decode_coding_sjis (struct coding_system *coding)
4766 const unsigned char *src_end = coding->source + coding->src_bytes; 4766 const unsigned char *src_end = coding->source + coding->src_bytes;
4767 const unsigned char *src_base; 4767 const unsigned char *src_base;
4768 int *charbuf = coding->charbuf + coding->charbuf_used; 4768 int *charbuf = coding->charbuf + coding->charbuf_used;
4769 /* We may produce one charset annocation in one loop and one more at 4769 /* We may produce one charset annotation in one loop and one more at
4770 the end. */ 4770 the end. */
4771 int *charbuf_end 4771 int *charbuf_end
4772 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2); 4772 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
@@ -4884,7 +4884,7 @@ decode_coding_big5 (struct coding_system *coding)
4884 const unsigned char *src_end = coding->source + coding->src_bytes; 4884 const unsigned char *src_end = coding->source + coding->src_bytes;
4885 const unsigned char *src_base; 4885 const unsigned char *src_base;
4886 int *charbuf = coding->charbuf + coding->charbuf_used; 4886 int *charbuf = coding->charbuf + coding->charbuf_used;
4887 /* We may produce one charset annocation in one loop and one more at 4887 /* We may produce one charset annotation in one loop and one more at
4888 the end. */ 4888 the end. */
4889 int *charbuf_end 4889 int *charbuf_end
4890 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2); 4890 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
@@ -5541,7 +5541,7 @@ decode_coding_charset (struct coding_system *coding)
5541 const unsigned char *src_end = coding->source + coding->src_bytes; 5541 const unsigned char *src_end = coding->source + coding->src_bytes;
5542 const unsigned char *src_base; 5542 const unsigned char *src_base;
5543 int *charbuf = coding->charbuf + coding->charbuf_used; 5543 int *charbuf = coding->charbuf + coding->charbuf_used;
5544 /* We may produce one charset annocation in one loop and one more at 5544 /* We may produce one charset annotation in one loop and one more at
5545 the end. */ 5545 the end. */
5546 int *charbuf_end 5546 int *charbuf_end
5547 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2); 5547 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
@@ -6016,10 +6016,9 @@ raw_text_coding_system (Lisp_Object coding_system)
6016} 6016}
6017 6017
6018 6018
6019/* If CODING_SYSTEM doesn't specify end-of-line format but PARENT 6019/* If CODING_SYSTEM doesn't specify end-of-line format, return one of
6020 does, return one of the subsidiary that has the same eol-spec as 6020 the subsidiary that has the same eol-spec as PARENT (if it is not
6021 PARENT. Otherwise, return CODING_SYSTEM. If PARENT is nil, 6021 nil and specifies end-of-line format) or the system's setting
6022 inherit end-of-line format from the system's setting
6023 (system_eol_type). */ 6022 (system_eol_type). */
6024 6023
6025Lisp_Object 6024Lisp_Object
@@ -6041,6 +6040,8 @@ coding_inherit_eol_type (Lisp_Object coding_system, Lisp_Object parent)
6041 6040
6042 parent_spec = CODING_SYSTEM_SPEC (parent); 6041 parent_spec = CODING_SYSTEM_SPEC (parent);
6043 parent_eol_type = AREF (parent_spec, 2); 6042 parent_eol_type = AREF (parent_spec, 2);
6043 if (VECTORP (parent_eol_type))
6044 parent_eol_type = system_eol_type;
6044 } 6045 }
6045 else 6046 else
6046 parent_eol_type = system_eol_type; 6047 parent_eol_type = system_eol_type;
@@ -6054,6 +6055,45 @@ coding_inherit_eol_type (Lisp_Object coding_system, Lisp_Object parent)
6054 return coding_system; 6055 return coding_system;
6055} 6056}
6056 6057
6058
6059/* Check if text-conversion and eol-conversion of CODING_SYSTEM are
6060 decided for writing to a process. If not, complement them, and
6061 return a new coding system. */
6062
6063Lisp_Object
6064complement_process_encoding_system (Lisp_Object coding_system)
6065{
6066 Lisp_Object coding_base = Qnil, eol_base = Qnil;
6067 Lisp_Object spec, attrs;
6068 int i;
6069
6070 for (i = 0; i < 3; i++)
6071 {
6072 if (i == 1)
6073 coding_system = CDR_SAFE (Vdefault_process_coding_system);
6074 else if (i == 2)
6075 coding_system = preferred_coding_system ();
6076 spec = CODING_SYSTEM_SPEC (coding_system);
6077 if (NILP (spec))
6078 continue;
6079 attrs = AREF (spec, 0);
6080 if (NILP (coding_base) && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
6081 coding_base = CODING_ATTR_BASE_NAME (attrs);
6082 if (NILP (eol_base) && ! VECTORP (AREF (spec, 2)))
6083 eol_base = coding_system;
6084 if (! NILP (coding_base) && ! NILP (eol_base))
6085 break;
6086 }
6087
6088 if (i > 0)
6089 /* The original CODING_SYSTEM didn't specify text-conversion or
6090 eol-conversion. Be sure that we return a fully complemented
6091 coding system. */
6092 coding_system = coding_inherit_eol_type (coding_base, eol_base);
6093 return coding_system;
6094}
6095
6096
6057/* Emacs has a mechanism to automatically detect a coding system if it 6097/* Emacs has a mechanism to automatically detect a coding system if it
6058 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But, 6098 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
6059 it's impossible to distinguish some coding systems accurately 6099 it's impossible to distinguish some coding systems accurately
@@ -6104,14 +6144,14 @@ coding_inherit_eol_type (Lisp_Object coding_system, Lisp_Object parent)
6104 o coding-category-iso-7-else 6144 o coding-category-iso-7-else
6105 6145
6106 The category for a coding system which has the same code range 6146 The category for a coding system which has the same code range
6107 as ISO2022 of 7-bit environemnt but uses locking shift or 6147 as ISO2022 of 7-bit environment but uses locking shift or
6108 single shift functions. Assigned the coding-system (Lisp 6148 single shift functions. Assigned the coding-system (Lisp
6109 symbol) `iso-2022-7bit-lock' by default. 6149 symbol) `iso-2022-7bit-lock' by default.
6110 6150
6111 o coding-category-iso-8-else 6151 o coding-category-iso-8-else
6112 6152
6113 The category for a coding system which has the same code range 6153 The category for a coding system which has the same code range
6114 as ISO2022 of 8-bit environemnt but uses locking shift or 6154 as ISO2022 of 8-bit environment but uses locking shift or
6115 single shift functions. Assigned the coding-system (Lisp 6155 single shift functions. Assigned the coding-system (Lisp
6116 symbol) `iso-2022-8bit-ss2' by default. 6156 symbol) `iso-2022-8bit-ss2' by default.
6117 6157
@@ -7508,7 +7548,7 @@ static Lisp_Object Vcode_conversion_reused_workbuf;
7508static int reused_workbuf_in_use; 7548static int reused_workbuf_in_use;
7509 7549
7510 7550
7511/* Return a working buffer of code convesion. MULTIBYTE specifies the 7551/* Return a working buffer of code conversion. MULTIBYTE specifies the
7512 multibyteness of returning buffer. */ 7552 multibyteness of returning buffer. */
7513 7553
7514static Lisp_Object 7554static Lisp_Object
@@ -8160,7 +8200,7 @@ function `define-coding-system'. */)
8160 8200
8161/* Detect how the bytes at SRC of length SRC_BYTES are encoded. If 8201/* Detect how the bytes at SRC of length SRC_BYTES are encoded. If
8162 HIGHEST is nonzero, return the coding system of the highest 8202 HIGHEST is nonzero, return the coding system of the highest
8163 priority among the detected coding systems. Otherwize return a 8203 priority among the detected coding systems. Otherwise return a
8164 list of detected coding systems sorted by their priorities. If 8204 list of detected coding systems sorted by their priorities. If
8165 MULTIBYTEP is nonzero, it is assumed that the bytes are in correct 8205 MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
8166 multibyte form but contains only ASCII and eight-bit chars. 8206 multibyte form but contains only ASCII and eight-bit chars.
@@ -9262,7 +9302,7 @@ DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_intern
9262 setup_coding_system (Fcheck_coding_system (coding_system), terminal_coding); 9302 setup_coding_system (Fcheck_coding_system (coding_system), terminal_coding);
9263 /* We had better not send unsafe characters to terminal. */ 9303 /* We had better not send unsafe characters to terminal. */
9264 terminal_coding->mode |= CODING_MODE_SAFE_ENCODING; 9304 terminal_coding->mode |= CODING_MODE_SAFE_ENCODING;
9265 /* Characer composition should be disabled. */ 9305 /* Character composition should be disabled. */
9266 terminal_coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK; 9306 terminal_coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9267 terminal_coding->src_multibyte = 1; 9307 terminal_coding->src_multibyte = 1;
9268 terminal_coding->dst_multibyte = 0; 9308 terminal_coding->dst_multibyte = 0;
@@ -9278,7 +9318,7 @@ DEFUN ("set-safe-terminal-coding-system-internal",
9278 CHECK_SYMBOL (coding_system); 9318 CHECK_SYMBOL (coding_system);
9279 setup_coding_system (Fcheck_coding_system (coding_system), 9319 setup_coding_system (Fcheck_coding_system (coding_system),
9280 &safe_terminal_coding); 9320 &safe_terminal_coding);
9281 /* Characer composition should be disabled. */ 9321 /* Character composition should be disabled. */
9282 safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK; 9322 safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9283 safe_terminal_coding.src_multibyte = 1; 9323 safe_terminal_coding.src_multibyte = 1;
9284 safe_terminal_coding.dst_multibyte = 0; 9324 safe_terminal_coding.dst_multibyte = 0;
@@ -9312,7 +9352,7 @@ DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_intern
9312 else 9352 else
9313 Fcheck_coding_system (coding_system); 9353 Fcheck_coding_system (coding_system);
9314 setup_coding_system (coding_system, TERMINAL_KEYBOARD_CODING (t)); 9354 setup_coding_system (coding_system, TERMINAL_KEYBOARD_CODING (t));
9315 /* Characer composition should be disabled. */ 9355 /* Character composition should be disabled. */
9316 TERMINAL_KEYBOARD_CODING (t)->common_flags 9356 TERMINAL_KEYBOARD_CODING (t)->common_flags
9317 &= ~CODING_ANNOTATE_COMPOSITION_MASK; 9357 &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9318 return Qnil; 9358 return Qnil;
@@ -9680,7 +9720,7 @@ usage: (define-coding-system-internal ...) */)
9680 9720
9681 If Nth element is a list of charset IDs, N is the first byte 9721 If Nth element is a list of charset IDs, N is the first byte
9682 of one of them. The list is sorted by dimensions of the 9722 of one of them. The list is sorted by dimensions of the
9683 charsets. A charset of smaller dimension comes firtst. */ 9723 charsets. A charset of smaller dimension comes first. */
9684 val = Fmake_vector (make_number (256), Qnil); 9724 val = Fmake_vector (make_number (256), Qnil);
9685 9725
9686 for (tail = charset_list; CONSP (tail); tail = XCDR (tail)) 9726 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))