diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/coding.c | 2583 |
1 files changed, 1220 insertions, 1363 deletions
diff --git a/src/coding.c b/src/coding.c index 92e5af8b390..148b105e3d6 100644 --- a/src/coding.c +++ b/src/coding.c | |||
| @@ -21,6 +21,7 @@ Boston, MA 02111-1307, USA. */ | |||
| 21 | 21 | ||
| 22 | /*** TABLE OF CONTENTS *** | 22 | /*** TABLE OF CONTENTS *** |
| 23 | 23 | ||
| 24 | 0. General comments | ||
| 24 | 1. Preamble | 25 | 1. Preamble |
| 25 | 2. Emacs' internal format (emacs-mule) handlers | 26 | 2. Emacs' internal format (emacs-mule) handlers |
| 26 | 3. ISO2022 handlers | 27 | 3. ISO2022 handlers |
| @@ -33,6 +34,9 @@ Boston, MA 02111-1307, USA. */ | |||
| 33 | 34 | ||
| 34 | */ | 35 | */ |
| 35 | 36 | ||
| 37 | /*** 0. General comments ***/ | ||
| 38 | |||
| 39 | |||
| 36 | /*** GENERAL NOTE on CODING SYSTEM *** | 40 | /*** GENERAL NOTE on CODING SYSTEM *** |
| 37 | 41 | ||
| 38 | Coding system is an encoding mechanism of one or more character | 42 | Coding system is an encoding mechanism of one or more character |
| @@ -120,15 +124,15 @@ detect_coding_emacs_mule (src, src_end) | |||
| 120 | 124 | ||
| 121 | /*** GENERAL NOTES on `decode_coding_XXX ()' functions *** | 125 | /*** GENERAL NOTES on `decode_coding_XXX ()' functions *** |
| 122 | 126 | ||
| 123 | These functions decode SRC_BYTES length text at SOURCE encoded in | 127 | These functions decode SRC_BYTES length of unibyte text at SOURCE |
| 124 | CODING to Emacs' internal format (emacs-mule). The resulting text | 128 | encoded in CODING to Emacs' internal format. The resulting |
| 125 | goes to a place pointed to by DESTINATION, the length of which | 129 | multibyte text goes to a place pointed to by DESTINATION, the length |
| 126 | should not exceed DST_BYTES. These functions set the information of | 130 | of which should not exceed DST_BYTES. |
| 127 | original and decoded texts in the members produced, produced_char, | ||
| 128 | consumed, and consumed_char of the structure *CODING. | ||
| 129 | 131 | ||
| 130 | The return value is an integer (CODING_FINISH_XXX) indicating how | 132 | These functions set the information of original and decoded texts in |
| 131 | the decoding finished. | 133 | the members produced, produced_char, consumed, and consumed_char of |
| 134 | the structure *CODING. They also set the member result to one of | ||
| 135 | CODING_FINISH_XXX indicating how the decoding finished. | ||
| 132 | 136 | ||
| 133 | DST_BYTES zero means that source area and destination area are | 137 | DST_BYTES zero means that source area and destination area are |
| 134 | overlapped, which means that we can produce a decoded text until it | 138 | overlapped, which means that we can produce a decoded text until it |
| @@ -136,6 +140,7 @@ detect_coding_emacs_mule (src, src_end) | |||
| 136 | 140 | ||
| 137 | Below is a template of these functions. */ | 141 | Below is a template of these functions. */ |
| 138 | #if 0 | 142 | #if 0 |
| 143 | static void | ||
| 139 | decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes) | 144 | decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes) |
| 140 | struct coding_system *coding; | 145 | struct coding_system *coding; |
| 141 | unsigned char *source, *destination; | 146 | unsigned char *source, *destination; |
| @@ -148,21 +153,22 @@ decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes) | |||
| 148 | /*** GENERAL NOTES on `encode_coding_XXX ()' functions *** | 153 | /*** GENERAL NOTES on `encode_coding_XXX ()' functions *** |
| 149 | 154 | ||
| 150 | These functions encode SRC_BYTES length text at SOURCE of Emacs' | 155 | These functions encode SRC_BYTES length text at SOURCE of Emacs' |
| 151 | internal format (emacs-mule) to CODING. The resulting text goes to | 156 | internal multibyte format to CODING. The resulting unibyte text |
| 152 | a place pointed to by DESTINATION, the length of which should not | 157 | goes to a place pointed to by DESTINATION, the length of which |
| 153 | exceed DST_BYTES. These functions set the information of | 158 | should not exceed DST_BYTES. |
| 154 | original and encoded texts in the members produced, produced_char, | ||
| 155 | consumed, and consumed_char of the structure *CODING. | ||
| 156 | 159 | ||
| 157 | The return value is an integer (CODING_FINISH_XXX) indicating how | 160 | These functions set the information of original and encoded texts in |
| 158 | the encoding finished. | 161 | the members produced, produced_char, consumed, and consumed_char of |
| 162 | the structure *CODING. They also set the member result to one of | ||
| 163 | CODING_FINISH_XXX indicating how the encoding finished. | ||
| 159 | 164 | ||
| 160 | DST_BYTES zero means that source area and destination area are | 165 | DST_BYTES zero means that source area and destination area are |
| 161 | overlapped, which means that we can produce a decoded text until it | 166 | overlapped, which means that we can produce a encoded text until it |
| 162 | reaches at the head of not-yet-decoded source text. | 167 | reaches at the head of not-yet-encoded source text. |
| 163 | 168 | ||
| 164 | Below is a template of these functions. */ | 169 | Below is a template of these functions. */ |
| 165 | #if 0 | 170 | #if 0 |
| 171 | static void | ||
| 166 | encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes) | 172 | encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes) |
| 167 | struct coding_system *coding; | 173 | struct coding_system *coding; |
| 168 | unsigned char *source, *destination; | 174 | unsigned char *source, *destination; |
| @@ -174,72 +180,131 @@ encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes) | |||
| 174 | 180 | ||
| 175 | /*** COMMONLY USED MACROS ***/ | 181 | /*** COMMONLY USED MACROS ***/ |
| 176 | 182 | ||
| 177 | /* The following three macros ONE_MORE_BYTE, TWO_MORE_BYTES, and | 183 | /* The following two macros ONE_MORE_BYTE and TWO_MORE_BYTES safely |
| 178 | THREE_MORE_BYTES safely get one, two, and three bytes from the | 184 | get one, two, and three bytes from the source text respectively. |
| 179 | source text respectively. If there are not enough bytes in the | 185 | If there are not enough bytes in the source, they jump to |
| 180 | source, they jump to `label_end_of_loop'. The caller should set | 186 | `label_end_of_loop'. The caller should set variables `coding', |
| 181 | variables `src' and `src_end' to appropriate areas in advance. */ | 187 | `src' and `src_end' to appropriate pointer in advance. These |
| 182 | 188 | macros are called from decoding routines `decode_coding_XXX', thus | |
| 183 | #define ONE_MORE_BYTE(c1) \ | 189 | it is assumed that the source text is unibyte. */ |
| 184 | do { \ | ||
| 185 | if (src < src_end) \ | ||
| 186 | c1 = *src++; \ | ||
| 187 | else \ | ||
| 188 | goto label_end_of_loop; \ | ||
| 189 | } while (0) | ||
| 190 | 190 | ||
| 191 | #define TWO_MORE_BYTES(c1, c2) \ | 191 | #define ONE_MORE_BYTE(c1) \ |
| 192 | do { \ | 192 | do { \ |
| 193 | if (src + 1 < src_end) \ | 193 | if (src >= src_end) \ |
| 194 | c1 = *src++, c2 = *src++; \ | 194 | { \ |
| 195 | else \ | 195 | coding->result = CODING_FINISH_INSUFFICIENT_SRC; \ |
| 196 | goto label_end_of_loop; \ | 196 | goto label_end_of_loop; \ |
| 197 | } \ | ||
| 198 | c1 = *src++; \ | ||
| 197 | } while (0) | 199 | } while (0) |
| 198 | 200 | ||
| 199 | #define THREE_MORE_BYTES(c1, c2, c3) \ | 201 | #define TWO_MORE_BYTES(c1, c2) \ |
| 200 | do { \ | 202 | do { \ |
| 201 | if (src + 2 < src_end) \ | 203 | if (src + 1 >= src_end) \ |
| 202 | c1 = *src++, c2 = *src++, c3 = *src++; \ | 204 | { \ |
| 203 | else \ | 205 | coding->result = CODING_FINISH_INSUFFICIENT_SRC; \ |
| 204 | goto label_end_of_loop; \ | 206 | goto label_end_of_loop; \ |
| 207 | } \ | ||
| 208 | c1 = *src++; \ | ||
| 209 | c2 = *src++; \ | ||
| 205 | } while (0) | 210 | } while (0) |
| 206 | 211 | ||
| 207 | /* The following three macros DECODE_CHARACTER_ASCII, | ||
| 208 | DECODE_CHARACTER_DIMENSION1, and DECODE_CHARACTER_DIMENSION2 put | ||
| 209 | the multi-byte form of a character of each class at the place | ||
| 210 | pointed by `dst'. The caller should set the variable `dst' to | ||
| 211 | point to an appropriate area and the variable `coding' to point to | ||
| 212 | the coding-system of the currently decoding text in advance. */ | ||
| 213 | 212 | ||
| 214 | /* Decode one ASCII character C. */ | 213 | /* Set C to the next character at the source text pointed by `src'. |
| 214 | If there are not enough characters in the source, jump to | ||
| 215 | `label_end_of_loop'. The caller should set variables `coding' | ||
| 216 | `src', `src_end', and `translation_table' to appropriate pointers | ||
| 217 | in advance. This macro is used in encoding routines | ||
| 218 | `encode_coding_XXX', thus it assumes that the source text is in | ||
| 219 | multibyte form except for 8-bit characters. 8-bit characters are | ||
| 220 | in multibyte form if coding->src_multibyte is nonzero, else they | ||
| 221 | are represented by a single byte. */ | ||
| 215 | 222 | ||
| 216 | #define DECODE_CHARACTER_ASCII(c) \ | 223 | #define ONE_MORE_CHAR(c) \ |
| 217 | do { \ | 224 | do { \ |
| 218 | *dst++ = (c) & 0x7F; \ | 225 | int len = src_end - src; \ |
| 219 | coding->produced_char++; \ | 226 | int bytes; \ |
| 227 | if (len <= 0) \ | ||
| 228 | { \ | ||
| 229 | coding->result = CODING_FINISH_INSUFFICIENT_SRC; \ | ||
| 230 | goto label_end_of_loop; \ | ||
| 231 | } \ | ||
| 232 | if (coding->src_multibyte \ | ||
| 233 | || UNIBYTE_STR_AS_MULTIBYTE_P (src, len, bytes)) \ | ||
| 234 | c = STRING_CHAR_AND_LENGTH (src, len, bytes); \ | ||
| 235 | else \ | ||
| 236 | c = *src, bytes = 1; \ | ||
| 237 | if (!NILP (translation_table)) \ | ||
| 238 | c = translate_char (translation_table, c, 0, 0, 0); \ | ||
| 239 | src += bytes; \ | ||
| 220 | } while (0) | 240 | } while (0) |
| 221 | 241 | ||
| 222 | /* Decode one DIMENSION1 character whose charset is CHARSET and whose | ||
| 223 | position-code is C. */ | ||
| 224 | 242 | ||
| 225 | #define DECODE_CHARACTER_DIMENSION1(charset, c) \ | 243 | /* Produce a multibyte form of characater C to `dst'. Jump to |
| 244 | `label_end_of_loop' if there's not enough space at `dst'. | ||
| 245 | |||
| 246 | If we are now in the middle of composition sequence, the decoded | ||
| 247 | character may be ALTCHAR (for the current composition). In that | ||
| 248 | case, the character goes to coding->cmp_data->data instead of | ||
| 249 | `dst'. | ||
| 250 | |||
| 251 | This macro is used in decoding routines. */ | ||
| 252 | |||
| 253 | #define EMIT_CHAR(c) \ | ||
| 226 | do { \ | 254 | do { \ |
| 227 | unsigned char leading_code = CHARSET_LEADING_CODE_BASE (charset); \ | 255 | if (! COMPOSING_P (coding) \ |
| 256 | || coding->composing == COMPOSITION_RELATIVE \ | ||
| 257 | || coding->composing == COMPOSITION_WITH_RULE) \ | ||
| 258 | { \ | ||
| 259 | int bytes = CHAR_BYTES (c); \ | ||
| 260 | if ((dst + bytes) > (dst_bytes ? dst_end : src)) \ | ||
| 261 | { \ | ||
| 262 | coding->result = CODING_FINISH_INSUFFICIENT_DST; \ | ||
| 263 | goto label_end_of_loop; \ | ||
| 264 | } \ | ||
| 265 | dst += CHAR_STRING (c, dst); \ | ||
| 266 | coding->produced_char++; \ | ||
| 267 | } \ | ||
| 228 | \ | 268 | \ |
| 229 | *dst++ = leading_code; \ | 269 | if (COMPOSING_P (coding) \ |
| 230 | if ((leading_code = CHARSET_LEADING_CODE_EXT (charset)) > 0) \ | 270 | && coding->composing != COMPOSITION_RELATIVE) \ |
| 231 | *dst++ = leading_code; \ | 271 | { \ |
| 232 | *dst++ = (c) | 0x80; \ | 272 | CODING_ADD_COMPOSITION_COMPONENT (coding, c); \ |
| 233 | coding->produced_char++; \ | 273 | coding->composition_rule_follows \ |
| 274 | = coding->composing != COMPOSITION_WITH_ALTCHARS; \ | ||
| 275 | } \ | ||
| 234 | } while (0) | 276 | } while (0) |
| 235 | 277 | ||
| 236 | /* Decode one DIMENSION2 character whose charset is CHARSET and whose | ||
| 237 | position-codes are C1 and C2. */ | ||
| 238 | 278 | ||
| 239 | #define DECODE_CHARACTER_DIMENSION2(charset, c1, c2) \ | 279 | #define EMIT_ONE_BYTE(c) \ |
| 240 | do { \ | 280 | do { \ |
| 241 | DECODE_CHARACTER_DIMENSION1 (charset, c1); \ | 281 | if (dst >= (dst_bytes ? dst_end : src)) \ |
| 242 | *dst++ = (c2) | 0x80; \ | 282 | { \ |
| 283 | coding->result = CODING_FINISH_INSUFFICIENT_DST; \ | ||
| 284 | goto label_end_of_loop; \ | ||
| 285 | } \ | ||
| 286 | *dst++ = c; \ | ||
| 287 | } while (0) | ||
| 288 | |||
| 289 | #define EMIT_TWO_BYTES(c1, c2) \ | ||
| 290 | do { \ | ||
| 291 | if (dst + 2 > (dst_bytes ? dst_end : src)) \ | ||
| 292 | { \ | ||
| 293 | coding->result = CODING_FINISH_INSUFFICIENT_DST; \ | ||
| 294 | goto label_end_of_loop; \ | ||
| 295 | } \ | ||
| 296 | *dst++ = c1, *dst++ = c2; \ | ||
| 297 | } while (0) | ||
| 298 | |||
| 299 | #define EMIT_BYTES(from, to) \ | ||
| 300 | do { \ | ||
| 301 | if (dst + (to - from) > (dst_bytes ? dst_end : src)) \ | ||
| 302 | { \ | ||
| 303 | coding->result = CODING_FINISH_INSUFFICIENT_DST; \ | ||
| 304 | goto label_end_of_loop; \ | ||
| 305 | } \ | ||
| 306 | while (from < to) \ | ||
| 307 | *dst++ = *from++; \ | ||
| 243 | } while (0) | 308 | } while (0) |
| 244 | 309 | ||
| 245 | 310 | ||
| @@ -408,102 +473,150 @@ static int inhibit_pre_post_conversion; | |||
| 408 | 473 | ||
| 409 | /* Emacs' internal format for encoding multiple character sets is a | 474 | /* Emacs' internal format for encoding multiple character sets is a |
| 410 | kind of multi-byte encoding, i.e. characters are encoded by | 475 | kind of multi-byte encoding, i.e. characters are encoded by |
| 411 | variable-length sequences of one-byte codes. ASCII characters | 476 | variable-length sequences of one-byte codes. |
| 412 | and control characters (e.g. `tab', `newline') are represented by | 477 | |
| 413 | one-byte sequences which are their ASCII codes, in the range 0x00 | 478 | ASCII characters and control characters (e.g. `tab', `newline') are |
| 414 | through 0x7F. The other characters are represented by a sequence | 479 | represented by one-byte sequences which are their ASCII codes, in |
| 415 | of `base leading-code', optional `extended leading-code', and one | 480 | the range 0x00 through 0x7F. |
| 416 | or two `position-code's. The length of the sequence is determined | 481 | |
| 417 | by the base leading-code. Leading-code takes the range 0x80 | 482 | 8-bit characters of the range 0x80..0x9F are represented by |
| 418 | through 0x9F, whereas extended leading-code and position-code take | 483 | two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit |
| 419 | the range 0xA0 through 0xFF. See `charset.h' for more details | 484 | code + 0x20). |
| 420 | about leading-code and position-code. | 485 | |
| 486 | 8-bit characters of the range 0xA0..0xFF are represented by | ||
| 487 | one-byte sequences which are their 8-bit code. | ||
| 488 | |||
| 489 | The other characters are represented by a sequence of `base | ||
| 490 | leading-code', optional `extended leading-code', and one or two | ||
| 491 | `position-code's. The length of the sequence is determined by the | ||
| 492 | base leading-code. Leading-code takes the range 0x80 through 0x9F, | ||
| 493 | whereas extended leading-code and position-code take the range 0xA0 | ||
| 494 | through 0xFF. See `charset.h' for more details about leading-code | ||
| 495 | and position-code. | ||
| 421 | 496 | ||
| 422 | --- CODE RANGE of Emacs' internal format --- | 497 | --- CODE RANGE of Emacs' internal format --- |
| 423 | (character set) (range) | 498 | character set range |
| 424 | ASCII 0x00 .. 0x7F | 499 | ------------- ----- |
| 425 | ELSE (1st byte) 0x81 .. 0x9F | 500 | ascii 0x00..0x7F |
| 426 | (rest bytes) 0xA0 .. 0xFF | 501 | eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF |
| 502 | eight-bit-graphic 0xA0..0xBF | ||
| 503 | ELSE 0x81..0x9F + [0xA0..0xFF]+ | ||
| 427 | --------------------------------------------- | 504 | --------------------------------------------- |
| 428 | 505 | ||
| 429 | */ | 506 | */ |
| 430 | 507 | ||
| 431 | enum emacs_code_class_type emacs_code_class[256]; | 508 | enum emacs_code_class_type emacs_code_class[256]; |
| 432 | 509 | ||
| 433 | /* Go to the next statement only if *SRC is accessible and the code is | ||
| 434 | greater than 0xA0. */ | ||
| 435 | #define CHECK_CODE_RANGE_A0_FF \ | ||
| 436 | do { \ | ||
| 437 | if (src >= src_end) \ | ||
| 438 | goto label_end_of_switch; \ | ||
| 439 | else if (*src++ < 0xA0) \ | ||
| 440 | return 0; \ | ||
| 441 | } while (0) | ||
| 442 | |||
| 443 | /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". | 510 | /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". |
| 444 | Check if a text is encoded in Emacs' internal format. If it is, | 511 | Check if a text is encoded in Emacs' internal format. If it is, |
| 445 | return CODING_CATEGORY_MASK_EMACS_MULE, else return 0. */ | 512 | return CODING_CATEGORY_MASK_EMACS_MULE, else return 0. */ |
| 446 | 513 | ||
| 447 | int | 514 | int |
| 448 | detect_coding_emacs_mule (src, src_end) | 515 | detect_coding_emacs_mule (src, src_end) |
| 449 | unsigned char *src, *src_end; | 516 | unsigned char *src, *src_end; |
| 450 | { | 517 | { |
| 451 | unsigned char c; | 518 | unsigned char c; |
| 452 | int composing = 0; | 519 | int composing = 0; |
| 520 | /* Dummy for ONE_MORE_BYTE. */ | ||
| 521 | struct coding_system dummy_coding; | ||
| 522 | struct coding_system *coding = &dummy_coding; | ||
| 453 | 523 | ||
| 454 | while (src < src_end) | 524 | while (1) |
| 455 | { | 525 | { |
| 456 | c = *src++; | 526 | ONE_MORE_BYTE (c); |
| 457 | 527 | ||
| 458 | if (composing) | 528 | if (composing) |
| 459 | { | 529 | { |
| 460 | if (c < 0xA0) | 530 | if (c < 0xA0) |
| 461 | composing = 0; | 531 | composing = 0; |
| 532 | else if (c == 0xA0) | ||
| 533 | { | ||
| 534 | ONE_MORE_BYTE (c); | ||
| 535 | c &= 0x7F; | ||
| 536 | } | ||
| 462 | else | 537 | else |
| 463 | c -= 0x20; | 538 | c -= 0x20; |
| 464 | } | 539 | } |
| 465 | 540 | ||
| 466 | switch (emacs_code_class[c]) | 541 | if (c < 0x20) |
| 467 | { | 542 | { |
| 468 | case EMACS_ascii_code: | ||
| 469 | case EMACS_linefeed_code: | ||
| 470 | break; | ||
| 471 | |||
| 472 | case EMACS_control_code: | ||
| 473 | if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) | 543 | if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) |
| 474 | return 0; | 544 | return 0; |
| 475 | break; | 545 | } |
| 546 | else if (c >= 0x80 && c < 0xA0) | ||
| 547 | { | ||
| 548 | if (c == 0x80) | ||
| 549 | /* Old leading code for a composite character. */ | ||
| 550 | composing = 1; | ||
| 551 | else | ||
| 552 | { | ||
| 553 | unsigned char *src_base = src - 1; | ||
| 554 | int bytes; | ||
| 476 | 555 | ||
| 477 | case EMACS_invalid_code: | 556 | if (!UNIBYTE_STR_AS_MULTIBYTE_P (src_base, src_end - src_base, |
| 478 | return 0; | 557 | bytes)) |
| 558 | return 0; | ||
| 559 | src = src_base + bytes; | ||
| 560 | } | ||
| 561 | } | ||
| 562 | } | ||
| 563 | label_end_of_loop: | ||
| 564 | return CODING_CATEGORY_MASK_EMACS_MULE; | ||
| 565 | } | ||
| 479 | 566 | ||
| 480 | case EMACS_leading_code_4: | ||
| 481 | CHECK_CODE_RANGE_A0_FF; | ||
| 482 | /* fall down to check it two more times ... */ | ||
| 483 | 567 | ||
| 484 | case EMACS_leading_code_3: | 568 | /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */ |
| 485 | CHECK_CODE_RANGE_A0_FF; | ||
| 486 | /* fall down to check it one more time ... */ | ||
| 487 | 569 | ||
| 488 | case EMACS_leading_code_2: | 570 | static void |
| 489 | CHECK_CODE_RANGE_A0_FF; | 571 | decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes) |
| 490 | break; | 572 | struct coding_system *coding; |
| 573 | unsigned char *source, *destination; | ||
| 574 | int src_bytes, dst_bytes; | ||
| 575 | { | ||
| 576 | unsigned char *src = source; | ||
| 577 | unsigned char *src_end = source + src_bytes; | ||
| 578 | unsigned char *dst = destination; | ||
| 579 | unsigned char *dst_end = destination + dst_bytes; | ||
| 580 | /* SRC_BASE remembers the start position in source in each loop. | ||
| 581 | The loop will be exited when there's not enough source code, or | ||
| 582 | when there's not enough destination area to produce a | ||
| 583 | character. */ | ||
| 584 | unsigned char *src_base; | ||
| 491 | 585 | ||
| 492 | case 0x80: /* Old leading code for a composite character. */ | 586 | coding->produced_char = 0; |
| 493 | if (composing) | 587 | while (src < src_end) |
| 494 | CHECK_CODE_RANGE_A0_FF; | 588 | { |
| 495 | else | 589 | unsigned char tmp[MAX_MULTIBYTE_LENGTH], *p; |
| 496 | composing = 1; | 590 | int bytes; |
| 497 | break; | ||
| 498 | 591 | ||
| 499 | default: | 592 | src_base = src; |
| 500 | label_end_of_switch: | 593 | if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)) |
| 594 | { | ||
| 595 | p = src; | ||
| 596 | src += bytes; | ||
| 597 | } | ||
| 598 | else | ||
| 599 | { | ||
| 600 | bytes = CHAR_STRING (*src, tmp); | ||
| 601 | p = tmp; | ||
| 602 | src++; | ||
| 603 | } | ||
| 604 | if (dst + bytes >= (dst_bytes ? dst_end : src)) | ||
| 605 | { | ||
| 606 | coding->result = CODING_FINISH_INSUFFICIENT_DST; | ||
| 501 | break; | 607 | break; |
| 502 | } | 608 | } |
| 609 | while (bytes--) *dst++ = *p++; | ||
| 610 | coding->produced_char++; | ||
| 503 | } | 611 | } |
| 504 | return CODING_CATEGORY_MASK_EMACS_MULE; | 612 | coding->consumed = coding->consumed_char = src_base - source; |
| 613 | coding->produced = dst - destination; | ||
| 505 | } | 614 | } |
| 506 | 615 | ||
| 616 | #define encode_coding_emacs_mule(coding, source, destination, src_bytes, dst_bytes) \ | ||
| 617 | encode_eol (coding, source, destination, src_bytes, dst_bytes) | ||
| 618 | |||
| 619 | |||
| 507 | 620 | ||
| 508 | /*** 3. ISO2022 handlers ***/ | 621 | /*** 3. ISO2022 handlers ***/ |
| 509 | 622 | ||
| @@ -518,7 +631,7 @@ detect_coding_emacs_mule (src, src_end) | |||
| 518 | is encoded using bytes less than 128. This may make the encoded | 631 | is encoded using bytes less than 128. This may make the encoded |
| 519 | text a little bit longer, but the text passes more easily through | 632 | text a little bit longer, but the text passes more easily through |
| 520 | several gateways, some of which strip off MSB (Most Signigant Bit). | 633 | several gateways, some of which strip off MSB (Most Signigant Bit). |
| 521 | 634 | ||
| 522 | There are two kinds of character sets: control character set and | 635 | There are two kinds of character sets: control character set and |
| 523 | graphic character set. The former contains control characters such | 636 | graphic character set. The former contains control characters such |
| 524 | as `newline' and `escape' to provide control functions (control | 637 | as `newline' and `escape' to provide control functions (control |
| @@ -660,24 +773,24 @@ detect_coding_emacs_mule (src, src_end) | |||
| 660 | o ESC '2' -- start rule-base composition (*) | 773 | o ESC '2' -- start rule-base composition (*) |
| 661 | o ESC '3' -- start relative composition with alternate chars (**) | 774 | o ESC '3' -- start relative composition with alternate chars (**) |
| 662 | o ESC '4' -- start rule-base composition with alternate chars (**) | 775 | o ESC '4' -- start rule-base composition with alternate chars (**) |
| 663 | Since these are not standard escape sequences of any ISO standard, | 776 | Since these are not standard escape sequences of any ISO standard, |
| 664 | the use of them for these meaning is restricted to Emacs only. | 777 | the use of them for these meaning is restricted to Emacs only. |
| 665 | 778 | ||
| 666 | (*) This form is used only in Emacs 20.5 and the older versions, | 779 | (*) This form is used only in Emacs 20.5 and the older versions, |
| 667 | but the newer versions can safely decode it. | 780 | but the newer versions can safely decode it. |
| 668 | (**) This form is used only in Emacs 21.1 and the newer versions, | 781 | (**) This form is used only in Emacs 21.1 and the newer versions, |
| 669 | and the older versions can't decode it. | 782 | and the older versions can't decode it. |
| 670 | 783 | ||
| 671 | Here's a list of examples usages of these composition escape | 784 | Here's a list of examples usages of these composition escape |
| 672 | sequences (categorized by `enum composition_method'). | 785 | sequences (categorized by `enum composition_method'). |
| 673 | 786 | ||
| 674 | COMPOSITION_RELATIVE: | 787 | COMPOSITION_RELATIVE: |
| 675 | ESC 0 CHAR [ CHAR ] ESC 1 | 788 | ESC 0 CHAR [ CHAR ] ESC 1 |
| 676 | COMPOSITOIN_WITH_RULE: | 789 | COMPOSITOIN_WITH_RULE: |
| 677 | ESC 2 CHAR [ RULE CHAR ] ESC 1 | 790 | ESC 2 CHAR [ RULE CHAR ] ESC 1 |
| 678 | COMPOSITION_WITH_ALTCHARS: | 791 | COMPOSITION_WITH_ALTCHARS: |
| 679 | ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 | 792 | ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 |
| 680 | COMPOSITION_WITH_RULE_ALTCHARS: | 793 | COMPOSITION_WITH_RULE_ALTCHARS: |
| 681 | ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */ | 794 | ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */ |
| 682 | 795 | ||
| 683 | enum iso_code_class_type iso_code_class[256]; | 796 | enum iso_code_class_type iso_code_class[256]; |
| @@ -712,24 +825,23 @@ detect_coding_iso2022 (src, src_end) | |||
| 712 | int mask_found = 0; | 825 | int mask_found = 0; |
| 713 | int reg[4], shift_out = 0, single_shifting = 0; | 826 | int reg[4], shift_out = 0, single_shifting = 0; |
| 714 | int c, c1, i, charset; | 827 | int c, c1, i, charset; |
| 828 | /* Dummy for ONE_MORE_BYTE. */ | ||
| 829 | struct coding_system dummy_coding; | ||
| 830 | struct coding_system *coding = &dummy_coding; | ||
| 715 | 831 | ||
| 716 | reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1; | 832 | reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1; |
| 717 | while (mask && src < src_end) | 833 | while (mask && src < src_end) |
| 718 | { | 834 | { |
| 719 | c = *src++; | 835 | ONE_MORE_BYTE (c); |
| 720 | switch (c) | 836 | switch (c) |
| 721 | { | 837 | { |
| 722 | case ISO_CODE_ESC: | 838 | case ISO_CODE_ESC: |
| 723 | single_shifting = 0; | 839 | single_shifting = 0; |
| 724 | if (src >= src_end) | 840 | ONE_MORE_BYTE (c); |
| 725 | break; | ||
| 726 | c = *src++; | ||
| 727 | if (c >= '(' && c <= '/') | 841 | if (c >= '(' && c <= '/') |
| 728 | { | 842 | { |
| 729 | /* Designation sequence for a charset of dimension 1. */ | 843 | /* Designation sequence for a charset of dimension 1. */ |
| 730 | if (src >= src_end) | 844 | ONE_MORE_BYTE (c1); |
| 731 | break; | ||
| 732 | c1 = *src++; | ||
| 733 | if (c1 < ' ' || c1 >= 0x80 | 845 | if (c1 < ' ' || c1 >= 0x80 |
| 734 | || (charset = iso_charset_table[0][c >= ','][c1]) < 0) | 846 | || (charset = iso_charset_table[0][c >= ','][c1]) < 0) |
| 735 | /* Invalid designation sequence. Just ignore. */ | 847 | /* Invalid designation sequence. Just ignore. */ |
| @@ -739,17 +851,13 @@ detect_coding_iso2022 (src, src_end) | |||
| 739 | else if (c == '$') | 851 | else if (c == '$') |
| 740 | { | 852 | { |
| 741 | /* Designation sequence for a charset of dimension 2. */ | 853 | /* Designation sequence for a charset of dimension 2. */ |
| 742 | if (src >= src_end) | 854 | ONE_MORE_BYTE (c); |
| 743 | break; | ||
| 744 | c = *src++; | ||
| 745 | if (c >= '@' && c <= 'B') | 855 | if (c >= '@' && c <= 'B') |
| 746 | /* Designation for JISX0208.1978, GB2312, or JISX0208. */ | 856 | /* Designation for JISX0208.1978, GB2312, or JISX0208. */ |
| 747 | reg[0] = charset = iso_charset_table[1][0][c]; | 857 | reg[0] = charset = iso_charset_table[1][0][c]; |
| 748 | else if (c >= '(' && c <= '/') | 858 | else if (c >= '(' && c <= '/') |
| 749 | { | 859 | { |
| 750 | if (src >= src_end) | 860 | ONE_MORE_BYTE (c1); |
| 751 | break; | ||
| 752 | c1 = *src++; | ||
| 753 | if (c1 < ' ' || c1 >= 0x80 | 861 | if (c1 < ' ' || c1 >= 0x80 |
| 754 | || (charset = iso_charset_table[1][c >= ','][c1]) < 0) | 862 | || (charset = iso_charset_table[1][c >= ','][c1]) < 0) |
| 755 | /* Invalid designation sequence. Just ignore. */ | 863 | /* Invalid designation sequence. Just ignore. */ |
| @@ -888,11 +996,19 @@ detect_coding_iso2022 (src, src_end) | |||
| 888 | 0xA0..0FF. If the byte length is odd, we exclude | 996 | 0xA0..0FF. If the byte length is odd, we exclude |
| 889 | CODING_CATEGORY_MASK_ISO_8_2. We can check this only | 997 | CODING_CATEGORY_MASK_ISO_8_2. We can check this only |
| 890 | when we are not single shifting. */ | 998 | when we are not single shifting. */ |
| 891 | if (!single_shifting) | 999 | if (!single_shifting |
| 1000 | && mask & CODING_CATEGORY_MASK_ISO_8_2) | ||
| 892 | { | 1001 | { |
| 893 | while (src < src_end && *src >= 0xA0) | 1002 | int i = 0; |
| 894 | src++; | 1003 | while (src < src_end) |
| 895 | if ((src - src_begin - 1) & 1 && src < src_end) | 1004 | { |
| 1005 | ONE_MORE_BYTE (c); | ||
| 1006 | if (c < 0xA0) | ||
| 1007 | break; | ||
| 1008 | i++; | ||
| 1009 | } | ||
| 1010 | |||
| 1011 | if (i & 1 && src < src_end) | ||
| 896 | mask &= ~CODING_CATEGORY_MASK_ISO_8_2; | 1012 | mask &= ~CODING_CATEGORY_MASK_ISO_8_2; |
| 897 | else | 1013 | else |
| 898 | mask_found |= CODING_CATEGORY_MASK_ISO_8_2; | 1014 | mask_found |= CODING_CATEGORY_MASK_ISO_8_2; |
| @@ -901,61 +1017,19 @@ detect_coding_iso2022 (src, src_end) | |||
| 901 | break; | 1017 | break; |
| 902 | } | 1018 | } |
| 903 | } | 1019 | } |
| 904 | 1020 | label_end_of_loop: | |
| 905 | return (mask & mask_found); | 1021 | return (mask & mask_found); |
| 906 | } | 1022 | } |
| 907 | 1023 | ||
| 908 | /* Decode a character of which charset is CHARSET and the 1st position | 1024 | /* Decode a character of which charset is CHARSET, the 1st position |
| 909 | code is C1. If dimension of CHARSET is 2, the 2nd position code is | 1025 | code is C1, the 2nd position code is C2, and return the decoded |
| 910 | fetched from SRC and set to C2. If CHARSET is negative, it means | 1026 | character code. If the variable `translation_table' is non-nil, |
| 911 | that we are decoding ill formed text, and what we can do is just to | 1027 | returned the translated code. */ |
| 912 | read C1 as is. | ||
| 913 | 1028 | ||
| 914 | If we are now in the middle of composition sequence, the decoded | 1029 | #define DECODE_ISO_CHARACTER(charset, c1, c2) \ |
| 915 | character may be ALTCHAR (see the comment above). In that case, | 1030 | (NILP (translation_table) \ |
| 916 | the character goes to coding->cmp_data->data instead of DST. */ | 1031 | ? MAKE_CHAR (charset, c1, c2) \ |
| 917 | 1032 | : translate_char (translation_table, -1, charset, c1, c2)) | |
| 918 | #define DECODE_ISO_CHARACTER(charset, c1) \ | ||
| 919 | do { \ | ||
| 920 | int c_alt = -1, charset_alt = (charset); \ | ||
| 921 | if (charset_alt >= 0) \ | ||
| 922 | { \ | ||
| 923 | if (CHARSET_DIMENSION (charset_alt) == 2) \ | ||
| 924 | { \ | ||
| 925 | ONE_MORE_BYTE (c2); \ | ||
| 926 | if (iso_code_class[(c2) & 0x7F] != ISO_0x20_or_0x7F \ | ||
| 927 | && iso_code_class[(c2) & 0x7F] != ISO_graphic_plane_0) \ | ||
| 928 | { \ | ||
| 929 | src--; \ | ||
| 930 | charset_alt = CHARSET_ASCII; \ | ||
| 931 | } \ | ||
| 932 | } \ | ||
| 933 | if (!NILP (translation_table) \ | ||
| 934 | && ((c_alt = translate_char (translation_table, \ | ||
| 935 | -1, charset_alt, c1, c2)) >= 0)) \ | ||
| 936 | SPLIT_CHAR (c_alt, charset_alt, c1, c2); \ | ||
| 937 | } \ | ||
| 938 | if (! COMPOSING_P (coding) \ | ||
| 939 | || coding->composing == COMPOSITION_RELATIVE \ | ||
| 940 | || coding->composing == COMPOSITION_WITH_RULE) \ | ||
| 941 | { \ | ||
| 942 | if (charset_alt == CHARSET_ASCII || charset_alt < 0) \ | ||
| 943 | DECODE_CHARACTER_ASCII (c1); \ | ||
| 944 | else if (CHARSET_DIMENSION (charset_alt) == 1) \ | ||
| 945 | DECODE_CHARACTER_DIMENSION1 (charset_alt, c1); \ | ||
| 946 | else \ | ||
| 947 | DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2); \ | ||
| 948 | } \ | ||
| 949 | if (COMPOSING_P (coding) \ | ||
| 950 | && coding->composing != COMPOSITION_RELATIVE) \ | ||
| 951 | { \ | ||
| 952 | if (c_alt < 0) \ | ||
| 953 | c_alt = MAKE_CHAR (charset_alt, c1, c2); \ | ||
| 954 | CODING_ADD_COMPOSITION_COMPONENT (coding, c_alt); \ | ||
| 955 | coding->composition_rule_follows \ | ||
| 956 | = coding->composing != COMPOSITION_WITH_ALTCHARS; \ | ||
| 957 | } \ | ||
| 958 | } while (0) | ||
| 959 | 1033 | ||
| 960 | /* Set designation state into CODING. */ | 1034 | /* Set designation state into CODING. */ |
| 961 | #define DECODE_DESIGNATION(reg, dimension, chars, final_char) \ | 1035 | #define DECODE_DESIGNATION(reg, dimension, chars, final_char) \ |
| @@ -1064,8 +1138,8 @@ coding_allocate_composition_data (coding, char_offset) | |||
| 1064 | if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH \ | 1138 | if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH \ |
| 1065 | >= COMPOSITION_DATA_SIZE) \ | 1139 | >= COMPOSITION_DATA_SIZE) \ |
| 1066 | { \ | 1140 | { \ |
| 1067 | result = CODING_FINISH_INSUFFICIENT_CMP; \ | 1141 | coding->result = CODING_FINISH_INSUFFICIENT_CMP; \ |
| 1068 | goto label_end_of_loop_2; \ | 1142 | goto label_end_of_loop; \ |
| 1069 | } \ | 1143 | } \ |
| 1070 | coding->composing = (c1 == '0' ? COMPOSITION_RELATIVE \ | 1144 | coding->composing = (c1 == '0' ? COMPOSITION_RELATIVE \ |
| 1071 | : c1 == '2' ? COMPOSITION_WITH_RULE \ | 1145 | : c1 == '2' ? COMPOSITION_WITH_RULE \ |
| @@ -1122,7 +1196,7 @@ coding_allocate_composition_data (coding, char_offset) | |||
| 1122 | if (nref == 4) nref = 10; \ | 1196 | if (nref == 4) nref = 10; \ |
| 1123 | rule = COMPOSITION_ENCODE_RULE (gref, nref); \ | 1197 | rule = COMPOSITION_ENCODE_RULE (gref, nref); \ |
| 1124 | } \ | 1198 | } \ |
| 1125 | else if (c1 < 93) /* new format (after ver.21 */ \ | 1199 | else if (c1 < 93) /* new format (after ver.21) */ \ |
| 1126 | { \ | 1200 | { \ |
| 1127 | ONE_MORE_BYTE (c2); \ | 1201 | ONE_MORE_BYTE (c2); \ |
| 1128 | rule = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \ | 1202 | rule = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \ |
| @@ -1134,7 +1208,7 @@ coding_allocate_composition_data (coding, char_offset) | |||
| 1134 | 1208 | ||
| 1135 | /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */ | 1209 | /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */ |
| 1136 | 1210 | ||
| 1137 | int | 1211 | static void |
| 1138 | decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | 1212 | decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) |
| 1139 | struct coding_system *coding; | 1213 | struct coding_system *coding; |
| 1140 | unsigned char *source, *destination; | 1214 | unsigned char *source, *destination; |
| @@ -1144,34 +1218,35 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1144 | unsigned char *src_end = source + src_bytes; | 1218 | unsigned char *src_end = source + src_bytes; |
| 1145 | unsigned char *dst = destination; | 1219 | unsigned char *dst = destination; |
| 1146 | unsigned char *dst_end = destination + dst_bytes; | 1220 | unsigned char *dst_end = destination + dst_bytes; |
| 1147 | /* Since the maximum bytes produced by each loop is 7, we subtract 6 | ||
| 1148 | from DST_END to assure that overflow checking is necessary only | ||
| 1149 | at the head of loop. */ | ||
| 1150 | unsigned char *adjusted_dst_end = dst_end - 6; | ||
| 1151 | int charset; | ||
| 1152 | /* Charsets invoked to graphic plane 0 and 1 respectively. */ | 1221 | /* Charsets invoked to graphic plane 0 and 1 respectively. */ |
| 1153 | int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0); | 1222 | int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0); |
| 1154 | int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1); | 1223 | int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1); |
| 1155 | Lisp_Object translation_table | 1224 | /* SRC_BASE remembers the start position in source in each loop. |
| 1156 | = coding->translation_table_for_decode; | 1225 | The loop will be exited when there's not enough source code |
| 1157 | int result = CODING_FINISH_NORMAL; | 1226 | (within macro ONE_MORE_BYTE), or when there's not enough |
| 1227 | destination area to produce a character (within macro | ||
| 1228 | EMIT_CHAR). */ | ||
| 1229 | unsigned char *src_base; | ||
| 1230 | int c, charset; | ||
| 1231 | Lisp_Object translation_table; | ||
| 1158 | 1232 | ||
| 1159 | if (!NILP (Venable_character_translation) && NILP (translation_table)) | 1233 | if (NILP (Venable_character_translation)) |
| 1160 | translation_table = Vstandard_translation_table_for_decode; | 1234 | translation_table = Qnil; |
| 1235 | else | ||
| 1236 | { | ||
| 1237 | translation_table = coding->translation_table_for_decode; | ||
| 1238 | if (NILP (translation_table)) | ||
| 1239 | translation_table = Vstandard_translation_table_for_decode; | ||
| 1240 | } | ||
| 1161 | 1241 | ||
| 1162 | coding->produced_char = 0; | 1242 | coding->result = CODING_FINISH_NORMAL; |
| 1163 | coding->fake_multibyte = 0; | 1243 | |
| 1164 | while (src < src_end && (dst_bytes | 1244 | while (1) |
| 1165 | ? (dst < adjusted_dst_end) | ||
| 1166 | : (dst < src - 6))) | ||
| 1167 | { | 1245 | { |
| 1168 | /* SRC_BASE remembers the start position in source in each loop. | 1246 | int c1, c2; |
| 1169 | The loop will be exited when there's not enough source text | 1247 | |
| 1170 | to analyze long escape sequence or 2-byte code (within macros | 1248 | src_base = src; |
| 1171 | ONE_MORE_BYTE or TWO_MORE_BYTES). In that case, SRC is reset | 1249 | ONE_MORE_BYTE (c1); |
| 1172 | to SRC_BASE before exiting. */ | ||
| 1173 | unsigned char *src_base = src; | ||
| 1174 | int c1 = *src++, c2; | ||
| 1175 | 1250 | ||
| 1176 | /* We produce no character or one character. */ | 1251 | /* We produce no character or one character. */ |
| 1177 | switch (iso_code_class [c1]) | 1252 | switch (iso_code_class [c1]) |
| @@ -1180,22 +1255,23 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1180 | if (COMPOSING_P (coding) && coding->composition_rule_follows) | 1255 | if (COMPOSING_P (coding) && coding->composition_rule_follows) |
| 1181 | { | 1256 | { |
| 1182 | DECODE_COMPOSITION_RULE (c1); | 1257 | DECODE_COMPOSITION_RULE (c1); |
| 1183 | break; | 1258 | continue; |
| 1184 | } | 1259 | } |
| 1185 | if (charset0 < 0 || CHARSET_CHARS (charset0) == 94) | 1260 | if (charset0 < 0 || CHARSET_CHARS (charset0) == 94) |
| 1186 | { | 1261 | { |
| 1187 | /* This is SPACE or DEL. */ | 1262 | /* This is SPACE or DEL. */ |
| 1188 | *dst++ = c1; | 1263 | charset = CHARSET_ASCII; |
| 1189 | coding->produced_char++; | ||
| 1190 | break; | 1264 | break; |
| 1191 | } | 1265 | } |
| 1192 | /* This is a graphic character, we fall down ... */ | 1266 | /* This is a graphic character, we fall down ... */ |
| 1193 | 1267 | ||
| 1194 | case ISO_graphic_plane_0: | 1268 | case ISO_graphic_plane_0: |
| 1195 | if (COMPOSING_P (coding) && coding->composition_rule_follows) | 1269 | if (COMPOSING_P (coding) && coding->composition_rule_follows) |
| 1196 | DECODE_COMPOSITION_RULE (c1); | 1270 | { |
| 1197 | else | 1271 | DECODE_COMPOSITION_RULE (c1); |
| 1198 | DECODE_ISO_CHARACTER (charset0, c1); | 1272 | continue; |
| 1273 | } | ||
| 1274 | charset = charset0; | ||
| 1199 | break; | 1275 | break; |
| 1200 | 1276 | ||
| 1201 | case ISO_0xA0_or_0xFF: | 1277 | case ISO_0xA0_or_0xFF: |
| @@ -1205,12 +1281,12 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1205 | /* This is a graphic character, we fall down ... */ | 1281 | /* This is a graphic character, we fall down ... */ |
| 1206 | 1282 | ||
| 1207 | case ISO_graphic_plane_1: | 1283 | case ISO_graphic_plane_1: |
| 1208 | if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) | 1284 | if (charset1 < 0) |
| 1209 | goto label_invalid_code; | 1285 | goto label_invalid_code; |
| 1210 | DECODE_ISO_CHARACTER (charset1, c1); | 1286 | charset = charset1; |
| 1211 | break; | 1287 | break; |
| 1212 | 1288 | ||
| 1213 | case ISO_control_code: | 1289 | case ISO_control_0: |
| 1214 | if (COMPOSING_P (coding)) | 1290 | if (COMPOSING_P (coding)) |
| 1215 | DECODE_COMPOSITION_END ('1'); | 1291 | DECODE_COMPOSITION_END ('1'); |
| 1216 | 1292 | ||
| @@ -1221,38 +1297,38 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1221 | && (coding->eol_type == CODING_EOL_CR | 1297 | && (coding->eol_type == CODING_EOL_CR |
| 1222 | || coding->eol_type == CODING_EOL_CRLF)) | 1298 | || coding->eol_type == CODING_EOL_CRLF)) |
| 1223 | { | 1299 | { |
| 1224 | result = CODING_FINISH_INCONSISTENT_EOL; | 1300 | coding->result = CODING_FINISH_INCONSISTENT_EOL; |
| 1225 | goto label_end_of_loop_2; | 1301 | goto label_end_of_loop; |
| 1226 | } | 1302 | } |
| 1227 | *dst++ = c1; | 1303 | charset = CHARSET_ASCII; |
| 1228 | coding->produced_char++; | ||
| 1229 | break; | 1304 | break; |
| 1230 | 1305 | ||
| 1306 | case ISO_control_1: | ||
| 1307 | if (COMPOSING_P (coding)) | ||
| 1308 | DECODE_COMPOSITION_END ('1'); | ||
| 1309 | goto label_invalid_code; | ||
| 1310 | |||
| 1231 | case ISO_carriage_return: | 1311 | case ISO_carriage_return: |
| 1232 | if (COMPOSING_P (coding)) | 1312 | if (COMPOSING_P (coding)) |
| 1233 | DECODE_COMPOSITION_END ('1'); | 1313 | DECODE_COMPOSITION_END ('1'); |
| 1234 | 1314 | ||
| 1235 | if (coding->eol_type == CODING_EOL_CR) | 1315 | if (coding->eol_type == CODING_EOL_CR) |
| 1236 | *dst++ = '\n'; | 1316 | c1 = '\n'; |
| 1237 | else if (coding->eol_type == CODING_EOL_CRLF) | 1317 | else if (coding->eol_type == CODING_EOL_CRLF) |
| 1238 | { | 1318 | { |
| 1239 | ONE_MORE_BYTE (c1); | 1319 | ONE_MORE_BYTE (c1); |
| 1240 | if (c1 == ISO_CODE_LF) | 1320 | if (c1 != ISO_CODE_LF) |
| 1241 | *dst++ = '\n'; | ||
| 1242 | else | ||
| 1243 | { | 1321 | { |
| 1244 | if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL) | 1322 | if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL) |
| 1245 | { | 1323 | { |
| 1246 | result = CODING_FINISH_INCONSISTENT_EOL; | 1324 | coding->result = CODING_FINISH_INCONSISTENT_EOL; |
| 1247 | goto label_end_of_loop_2; | 1325 | goto label_end_of_loop; |
| 1248 | } | 1326 | } |
| 1249 | src--; | 1327 | src--; |
| 1250 | *dst++ = '\r'; | 1328 | c1 = '\r'; |
| 1251 | } | 1329 | } |
| 1252 | } | 1330 | } |
| 1253 | else | 1331 | charset = CHARSET_ASCII; |
| 1254 | *dst++ = c1; | ||
| 1255 | coding->produced_char++; | ||
| 1256 | break; | 1332 | break; |
| 1257 | 1333 | ||
| 1258 | case ISO_shift_out: | 1334 | case ISO_shift_out: |
| @@ -1261,14 +1337,14 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1261 | goto label_invalid_code; | 1337 | goto label_invalid_code; |
| 1262 | CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; | 1338 | CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; |
| 1263 | charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0); | 1339 | charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0); |
| 1264 | break; | 1340 | continue; |
| 1265 | 1341 | ||
| 1266 | case ISO_shift_in: | 1342 | case ISO_shift_in: |
| 1267 | if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)) | 1343 | if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)) |
| 1268 | goto label_invalid_code; | 1344 | goto label_invalid_code; |
| 1269 | CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; | 1345 | CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; |
| 1270 | charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0); | 1346 | charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0); |
| 1271 | break; | 1347 | continue; |
| 1272 | 1348 | ||
| 1273 | case ISO_single_shift_2_7: | 1349 | case ISO_single_shift_2_7: |
| 1274 | case ISO_single_shift_2: | 1350 | case ISO_single_shift_2: |
| @@ -1329,7 +1405,10 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1329 | } | 1405 | } |
| 1330 | else | 1406 | else |
| 1331 | goto label_invalid_code; | 1407 | goto label_invalid_code; |
| 1332 | break; | 1408 | /* We must update these variables now. */ |
| 1409 | charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0); | ||
| 1410 | charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1); | ||
| 1411 | continue; | ||
| 1333 | 1412 | ||
| 1334 | case 'n': /* invocation of locking-shift-2 */ | 1413 | case 'n': /* invocation of locking-shift-2 */ |
| 1335 | if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT) | 1414 | if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT) |
| @@ -1337,7 +1416,7 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1337 | goto label_invalid_code; | 1416 | goto label_invalid_code; |
| 1338 | CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; | 1417 | CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; |
| 1339 | charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0); | 1418 | charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0); |
| 1340 | break; | 1419 | continue; |
| 1341 | 1420 | ||
| 1342 | case 'o': /* invocation of locking-shift-3 */ | 1421 | case 'o': /* invocation of locking-shift-3 */ |
| 1343 | if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT) | 1422 | if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT) |
| @@ -1345,33 +1424,31 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1345 | goto label_invalid_code; | 1424 | goto label_invalid_code; |
| 1346 | CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; | 1425 | CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; |
| 1347 | charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0); | 1426 | charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0); |
| 1348 | break; | 1427 | continue; |
| 1349 | 1428 | ||
| 1350 | case 'N': /* invocation of single-shift-2 */ | 1429 | case 'N': /* invocation of single-shift-2 */ |
| 1351 | if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT) | 1430 | if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT) |
| 1352 | || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0) | 1431 | || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0) |
| 1353 | goto label_invalid_code; | 1432 | goto label_invalid_code; |
| 1354 | ONE_MORE_BYTE (c1); | ||
| 1355 | charset = CODING_SPEC_ISO_DESIGNATION (coding, 2); | 1433 | charset = CODING_SPEC_ISO_DESIGNATION (coding, 2); |
| 1356 | DECODE_ISO_CHARACTER (charset, c1); | 1434 | ONE_MORE_BYTE (c1); |
| 1357 | break; | 1435 | break; |
| 1358 | 1436 | ||
| 1359 | case 'O': /* invocation of single-shift-3 */ | 1437 | case 'O': /* invocation of single-shift-3 */ |
| 1360 | if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT) | 1438 | if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT) |
| 1361 | || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0) | 1439 | || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0) |
| 1362 | goto label_invalid_code; | 1440 | goto label_invalid_code; |
| 1363 | ONE_MORE_BYTE (c1); | ||
| 1364 | charset = CODING_SPEC_ISO_DESIGNATION (coding, 3); | 1441 | charset = CODING_SPEC_ISO_DESIGNATION (coding, 3); |
| 1365 | DECODE_ISO_CHARACTER (charset, c1); | 1442 | ONE_MORE_BYTE (c1); |
| 1366 | break; | 1443 | break; |
| 1367 | 1444 | ||
| 1368 | case '0': case '2': case '3': case '4': /* start composition */ | 1445 | case '0': case '2': case '3': case '4': /* start composition */ |
| 1369 | DECODE_COMPOSITION_START (c1); | 1446 | DECODE_COMPOSITION_START (c1); |
| 1370 | break; | 1447 | continue; |
| 1371 | 1448 | ||
| 1372 | case '1': /* end composition */ | 1449 | case '1': /* end composition */ |
| 1373 | DECODE_COMPOSITION_END (c1); | 1450 | DECODE_COMPOSITION_END (c1); |
| 1374 | break; | 1451 | continue; |
| 1375 | 1452 | ||
| 1376 | case '[': /* specification of direction */ | 1453 | case '[': /* specification of direction */ |
| 1377 | if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION) | 1454 | if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION) |
| @@ -1405,7 +1482,7 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1405 | default: | 1482 | default: |
| 1406 | goto label_invalid_code; | 1483 | goto label_invalid_code; |
| 1407 | } | 1484 | } |
| 1408 | break; | 1485 | continue; |
| 1409 | 1486 | ||
| 1410 | default: | 1487 | default: |
| 1411 | if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION)) | 1488 | if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION)) |
| @@ -1421,57 +1498,44 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1421 | DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2); | 1498 | DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2); |
| 1422 | } | 1499 | } |
| 1423 | else | 1500 | else |
| 1424 | { | 1501 | goto label_invalid_code; |
| 1425 | goto label_invalid_code; | 1502 | /* We must update these variables now. */ |
| 1426 | } | 1503 | charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0); |
| 1504 | charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1); | ||
| 1505 | continue; | ||
| 1427 | } | 1506 | } |
| 1428 | /* We must update these variables now. */ | 1507 | } |
| 1429 | charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0); | ||
| 1430 | charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1); | ||
| 1431 | break; | ||
| 1432 | 1508 | ||
| 1433 | label_invalid_code: | 1509 | /* Now we know CHARSET and 1st position code C1 of a character. |
| 1434 | if (COMPOSING_P (coding)) | 1510 | Produce a multibyte sequence for that character while getting |
| 1435 | DECODE_COMPOSITION_END ('1'); | 1511 | 2nd position code C2 if necessary. */ |
| 1436 | coding->produced_char += src - src_base; | 1512 | if (CHARSET_DIMENSION (charset) == 2) |
| 1437 | while (src_base < src) | 1513 | { |
| 1438 | *dst++ = (*src_base++) & 0x7F; | 1514 | ONE_MORE_BYTE (c2); |
| 1515 | if (c1 < 0x80 ? c2 < 0x20 || c2 >= 0x80 : c2 < 0xA0) | ||
| 1516 | /* C2 is not in a valid range. */ | ||
| 1517 | goto label_invalid_code; | ||
| 1439 | } | 1518 | } |
| 1519 | c = DECODE_ISO_CHARACTER (charset, c1, c2); | ||
| 1520 | EMIT_CHAR (c); | ||
| 1440 | continue; | 1521 | continue; |
| 1441 | 1522 | ||
| 1442 | label_end_of_loop: | 1523 | label_invalid_code: |
| 1443 | result = CODING_FINISH_INSUFFICIENT_SRC; | 1524 | coding->errors++; |
| 1444 | label_end_of_loop_2: | 1525 | if (COMPOSING_P (coding)) |
| 1526 | DECODE_COMPOSITION_END ('1'); | ||
| 1445 | src = src_base; | 1527 | src = src_base; |
| 1446 | break; | 1528 | c = *src++; |
| 1447 | } | 1529 | EMIT_CHAR (c); |
| 1448 | |||
| 1449 | if (src < src_end) | ||
| 1450 | { | ||
| 1451 | if (result == CODING_FINISH_NORMAL) | ||
| 1452 | result = CODING_FINISH_INSUFFICIENT_DST; | ||
| 1453 | else if (result != CODING_FINISH_INCONSISTENT_EOL | ||
| 1454 | && coding->mode & CODING_MODE_LAST_BLOCK) | ||
| 1455 | { | ||
| 1456 | /* This is the last block of the text to be decoded. We had | ||
| 1457 | better just flush out all remaining codes in the text | ||
| 1458 | although they are not valid characters. */ | ||
| 1459 | if (COMPOSING_P (coding)) | ||
| 1460 | DECODE_COMPOSITION_END ('1'); | ||
| 1461 | src_bytes = src_end - src; | ||
| 1462 | if (dst_bytes && (dst_end - dst < src_end - src)) | ||
| 1463 | src_end = src + (dst_end - dst); | ||
| 1464 | coding->produced_char += src_end - src; | ||
| 1465 | while (src < src_end) | ||
| 1466 | *dst++ = (*src++) & 0x7F; | ||
| 1467 | } | ||
| 1468 | } | 1530 | } |
| 1469 | 1531 | ||
| 1470 | coding->consumed = coding->consumed_char = src - source; | 1532 | label_end_of_loop: |
| 1533 | coding->consumed = coding->consumed_char = src_base - source; | ||
| 1471 | coding->produced = dst - destination; | 1534 | coding->produced = dst - destination; |
| 1472 | return result; | 1535 | return; |
| 1473 | } | 1536 | } |
| 1474 | 1537 | ||
| 1538 | |||
| 1475 | /* ISO2022 encoding stuff. */ | 1539 | /* ISO2022 encoding stuff. */ |
| 1476 | 1540 | ||
| 1477 | /* | 1541 | /* |
| @@ -1494,9 +1558,9 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1494 | */ | 1558 | */ |
| 1495 | 1559 | ||
| 1496 | /* Produce codes (escape sequence) for designating CHARSET to graphic | 1560 | /* Produce codes (escape sequence) for designating CHARSET to graphic |
| 1497 | register REG. If <final-char> of CHARSET is '@', 'A', or 'B' and | 1561 | register REG at DST, and increment DST. If <final-char> of CHARSET is |
| 1498 | the coding system CODING allows, produce designation sequence of | 1562 | '@', 'A', or 'B' and the coding system CODING allows, produce |
| 1499 | short-form. */ | 1563 | designation sequence of short-form. */ |
| 1500 | 1564 | ||
| 1501 | #define ENCODE_DESIGNATION(charset, reg, coding) \ | 1565 | #define ENCODE_DESIGNATION(charset, reg, coding) \ |
| 1502 | do { \ | 1566 | do { \ |
| @@ -1504,13 +1568,14 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1504 | char *intermediate_char_94 = "()*+"; \ | 1568 | char *intermediate_char_94 = "()*+"; \ |
| 1505 | char *intermediate_char_96 = ",-./"; \ | 1569 | char *intermediate_char_96 = ",-./"; \ |
| 1506 | int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset); \ | 1570 | int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset); \ |
| 1571 | \ | ||
| 1507 | if (revision < 255) \ | 1572 | if (revision < 255) \ |
| 1508 | { \ | 1573 | { \ |
| 1509 | *dst++ = ISO_CODE_ESC; \ | 1574 | *dst++ = ISO_CODE_ESC; \ |
| 1510 | *dst++ = '&'; \ | 1575 | *dst++ = '&'; \ |
| 1511 | *dst++ = '@' + revision; \ | 1576 | *dst++ = '@' + revision; \ |
| 1512 | } \ | 1577 | } \ |
| 1513 | *dst++ = ISO_CODE_ESC; \ | 1578 | *dst++ = ISO_CODE_ESC; \ |
| 1514 | if (CHARSET_DIMENSION (charset) == 1) \ | 1579 | if (CHARSET_DIMENSION (charset) == 1) \ |
| 1515 | { \ | 1580 | { \ |
| 1516 | if (CHARSET_CHARS (charset) == 94) \ | 1581 | if (CHARSET_CHARS (charset) == 94) \ |
| @@ -1523,15 +1588,15 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1523 | *dst++ = '$'; \ | 1588 | *dst++ = '$'; \ |
| 1524 | if (CHARSET_CHARS (charset) == 94) \ | 1589 | if (CHARSET_CHARS (charset) == 94) \ |
| 1525 | { \ | 1590 | { \ |
| 1526 | if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM) \ | 1591 | if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM) \ |
| 1527 | || reg != 0 \ | 1592 | || reg != 0 \ |
| 1528 | || final_char < '@' || final_char > 'B') \ | 1593 | || final_char < '@' || final_char > 'B') \ |
| 1529 | *dst++ = (unsigned char) (intermediate_char_94[reg]); \ | 1594 | *dst++ = (unsigned char) (intermediate_char_94[reg]); \ |
| 1530 | } \ | 1595 | } \ |
| 1531 | else \ | 1596 | else \ |
| 1532 | *dst++ = (unsigned char) (intermediate_char_96[reg]); \ | 1597 | *dst++ = (unsigned char) (intermediate_char_96[reg]); \ |
| 1533 | } \ | 1598 | } \ |
| 1534 | *dst++ = final_char; \ | 1599 | *dst++ = final_char; \ |
| 1535 | CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset; \ | 1600 | CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset; \ |
| 1536 | } while (0) | 1601 | } while (0) |
| 1537 | 1602 | ||
| @@ -1544,10 +1609,7 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1544 | if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \ | 1609 | if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \ |
| 1545 | *dst++ = ISO_CODE_ESC, *dst++ = 'N'; \ | 1610 | *dst++ = ISO_CODE_ESC, *dst++ = 'N'; \ |
| 1546 | else \ | 1611 | else \ |
| 1547 | { \ | 1612 | *dst++ = ISO_CODE_SS2; \ |
| 1548 | *dst++ = ISO_CODE_SS2; \ | ||
| 1549 | coding->fake_multibyte = 1; \ | ||
| 1550 | } \ | ||
| 1551 | CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1; \ | 1613 | CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1; \ |
| 1552 | } while (0) | 1614 | } while (0) |
| 1553 | 1615 | ||
| @@ -1556,10 +1618,7 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1556 | if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \ | 1618 | if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \ |
| 1557 | *dst++ = ISO_CODE_ESC, *dst++ = 'O'; \ | 1619 | *dst++ = ISO_CODE_ESC, *dst++ = 'O'; \ |
| 1558 | else \ | 1620 | else \ |
| 1559 | { \ | 1621 | *dst++ = ISO_CODE_SS3; \ |
| 1560 | *dst++ = ISO_CODE_SS3; \ | ||
| 1561 | coding->fake_multibyte = 1; \ | ||
| 1562 | } \ | ||
| 1563 | CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1; \ | 1622 | CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1; \ |
| 1564 | } while (0) | 1623 | } while (0) |
| 1565 | 1624 | ||
| @@ -1567,15 +1626,15 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1567 | escape sequence) for ISO2022 locking-shift functions (shift-in, | 1626 | escape sequence) for ISO2022 locking-shift functions (shift-in, |
| 1568 | shift-out, locking-shift-2, and locking-shift-3). */ | 1627 | shift-out, locking-shift-2, and locking-shift-3). */ |
| 1569 | 1628 | ||
| 1570 | #define ENCODE_SHIFT_IN \ | 1629 | #define ENCODE_SHIFT_IN \ |
| 1571 | do { \ | 1630 | do { \ |
| 1572 | *dst++ = ISO_CODE_SI; \ | 1631 | *dst++ = ISO_CODE_SI; \ |
| 1573 | CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \ | 1632 | CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \ |
| 1574 | } while (0) | 1633 | } while (0) |
| 1575 | 1634 | ||
| 1576 | #define ENCODE_SHIFT_OUT \ | 1635 | #define ENCODE_SHIFT_OUT \ |
| 1577 | do { \ | 1636 | do { \ |
| 1578 | *dst++ = ISO_CODE_SO; \ | 1637 | *dst++ = ISO_CODE_SO; \ |
| 1579 | CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \ | 1638 | CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \ |
| 1580 | } while (0) | 1639 | } while (0) |
| 1581 | 1640 | ||
| @@ -1585,9 +1644,9 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1585 | CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \ | 1644 | CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \ |
| 1586 | } while (0) | 1645 | } while (0) |
| 1587 | 1646 | ||
| 1588 | #define ENCODE_LOCKING_SHIFT_3 \ | 1647 | #define ENCODE_LOCKING_SHIFT_3 \ |
| 1589 | do { \ | 1648 | do { \ |
| 1590 | *dst++ = ISO_CODE_ESC, *dst++ = 'o'; \ | 1649 | *dst++ = ISO_CODE_ESC, *dst++ = 'o'; \ |
| 1591 | CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \ | 1650 | CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \ |
| 1592 | } while (0) | 1651 | } while (0) |
| 1593 | 1652 | ||
| @@ -1595,7 +1654,6 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1595 | CHARSET and whose position-code is C1. Designation and invocation | 1654 | CHARSET and whose position-code is C1. Designation and invocation |
| 1596 | sequences are also produced in advance if necessary. */ | 1655 | sequences are also produced in advance if necessary. */ |
| 1597 | 1656 | ||
| 1598 | |||
| 1599 | #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \ | 1657 | #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \ |
| 1600 | do { \ | 1658 | do { \ |
| 1601 | if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding)) \ | 1659 | if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding)) \ |
| @@ -1680,50 +1738,31 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1680 | 1738 | ||
| 1681 | #define ENCODE_ISO_CHARACTER(charset, c1, c2) \ | 1739 | #define ENCODE_ISO_CHARACTER(charset, c1, c2) \ |
| 1682 | do { \ | 1740 | do { \ |
| 1683 | int c_alt, charset_alt; \ | 1741 | int alt_charset = charset; \ |
| 1684 | \ | 1742 | \ |
| 1685 | if (!NILP (translation_table) \ | 1743 | if (CHARSET_DEFINED_P (charset)) \ |
| 1686 | && ((c_alt = translate_char (translation_table, -1, \ | ||
| 1687 | charset, c1, c2)) \ | ||
| 1688 | >= 0)) \ | ||
| 1689 | SPLIT_CHAR (c_alt, charset_alt, c1, c2); \ | ||
| 1690 | else \ | ||
| 1691 | charset_alt = charset; \ | ||
| 1692 | if (CHARSET_DEFINED_P (charset_alt)) \ | ||
| 1693 | { \ | 1744 | { \ |
| 1694 | if (CHARSET_DIMENSION (charset_alt) == 1) \ | 1745 | if (CHARSET_DIMENSION (charset) == 1) \ |
| 1695 | { \ | 1746 | { \ |
| 1696 | if (charset == CHARSET_ASCII \ | 1747 | if (charset == CHARSET_ASCII \ |
| 1697 | && coding->flags & CODING_FLAG_ISO_USE_ROMAN) \ | 1748 | && coding->flags & CODING_FLAG_ISO_USE_ROMAN) \ |
| 1698 | charset_alt = charset_latin_jisx0201; \ | 1749 | alt_charset = charset_latin_jisx0201; \ |
| 1699 | ENCODE_ISO_CHARACTER_DIMENSION1 (charset_alt, c1); \ | 1750 | ENCODE_ISO_CHARACTER_DIMENSION1 (alt_charset, c1); \ |
| 1700 | } \ | 1751 | } \ |
| 1701 | else \ | 1752 | else \ |
| 1702 | { \ | 1753 | { \ |
| 1703 | if (charset == charset_jisx0208 \ | 1754 | if (charset == charset_jisx0208 \ |
| 1704 | && coding->flags & CODING_FLAG_ISO_USE_OLDJIS) \ | 1755 | && coding->flags & CODING_FLAG_ISO_USE_OLDJIS) \ |
| 1705 | charset_alt = charset_jisx0208_1978; \ | 1756 | alt_charset = charset_jisx0208_1978; \ |
| 1706 | ENCODE_ISO_CHARACTER_DIMENSION2 (charset_alt, c1, c2); \ | 1757 | ENCODE_ISO_CHARACTER_DIMENSION2 (alt_charset, c1, c2); \ |
| 1707 | } \ | 1758 | } \ |
| 1708 | } \ | 1759 | } \ |
| 1709 | else \ | 1760 | else \ |
| 1710 | { \ | 1761 | { \ |
| 1711 | if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \ | 1762 | *dst++ = c1; \ |
| 1712 | { \ | 1763 | if (c2 >= 0) \ |
| 1713 | *dst++ = charset & 0x7f; \ | 1764 | *dst++ = c2; \ |
| 1714 | *dst++ = c1 & 0x7f; \ | ||
| 1715 | if (c2) \ | ||
| 1716 | *dst++ = c2 & 0x7f; \ | ||
| 1717 | } \ | ||
| 1718 | else \ | ||
| 1719 | { \ | ||
| 1720 | *dst++ = charset; \ | ||
| 1721 | *dst++ = c1; \ | ||
| 1722 | if (c2) \ | ||
| 1723 | *dst++ = c2; \ | ||
| 1724 | } \ | ||
| 1725 | } \ | 1765 | } \ |
| 1726 | coding->consumed_char++; \ | ||
| 1727 | } while (0) | 1766 | } while (0) |
| 1728 | 1767 | ||
| 1729 | /* Produce designation and invocation codes at a place pointed by DST | 1768 | /* Produce designation and invocation codes at a place pointed by DST |
| @@ -1786,6 +1825,7 @@ encode_invocation_designation (charset, coding, dst) | |||
| 1786 | break; | 1825 | break; |
| 1787 | } | 1826 | } |
| 1788 | } | 1827 | } |
| 1828 | |||
| 1789 | return dst; | 1829 | return dst; |
| 1790 | } | 1830 | } |
| 1791 | 1831 | ||
| @@ -1849,19 +1889,19 @@ encode_invocation_designation (charset, coding, dst) | |||
| 1849 | 1889 | ||
| 1850 | /* The following three macros produce codes for indicating direction | 1890 | /* The following three macros produce codes for indicating direction |
| 1851 | of text. */ | 1891 | of text. */ |
| 1852 | #define ENCODE_CONTROL_SEQUENCE_INTRODUCER \ | 1892 | #define ENCODE_CONTROL_SEQUENCE_INTRODUCER \ |
| 1853 | do { \ | 1893 | do { \ |
| 1854 | if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS) \ | 1894 | if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS) \ |
| 1855 | *dst++ = ISO_CODE_ESC, *dst++ = '['; \ | 1895 | *dst++ = ISO_CODE_ESC, *dst++ = '['; \ |
| 1856 | else \ | 1896 | else \ |
| 1857 | *dst++ = ISO_CODE_CSI; \ | 1897 | *dst++ = ISO_CODE_CSI; \ |
| 1858 | } while (0) | 1898 | } while (0) |
| 1859 | 1899 | ||
| 1860 | #define ENCODE_DIRECTION_R2L \ | 1900 | #define ENCODE_DIRECTION_R2L \ |
| 1861 | ENCODE_CONTROL_SEQUENCE_INTRODUCER, *dst++ = '2', *dst++ = ']' | 1901 | ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '2', *dst++ = ']' |
| 1862 | 1902 | ||
| 1863 | #define ENCODE_DIRECTION_L2R \ | 1903 | #define ENCODE_DIRECTION_L2R \ |
| 1864 | ENCODE_CONTROL_SEQUENCE_INTRODUCER, *dst++ = '0', *dst++ = ']' | 1904 | ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '0', *dst++ = ']' |
| 1865 | 1905 | ||
| 1866 | /* Produce codes for designation and invocation to reset the graphic | 1906 | /* Produce codes for designation and invocation to reset the graphic |
| 1867 | planes and registers to initial state. */ | 1907 | planes and registers to initial state. */ |
| @@ -1879,64 +1919,54 @@ encode_invocation_designation (charset, coding, dst) | |||
| 1879 | } while (0) | 1919 | } while (0) |
| 1880 | 1920 | ||
| 1881 | /* Produce designation sequences of charsets in the line started from | 1921 | /* Produce designation sequences of charsets in the line started from |
| 1882 | SRC to a place pointed by *DSTP, and update DSTP. | 1922 | SRC to a place pointed by DST, and return updated DST. |
| 1883 | 1923 | ||
| 1884 | If the current block ends before any end-of-line, we may fail to | 1924 | If the current block ends before any end-of-line, we may fail to |
| 1885 | find all the necessary designations. */ | 1925 | find all the necessary designations. */ |
| 1886 | 1926 | ||
| 1887 | void | 1927 | static unsigned char * |
| 1888 | encode_designation_at_bol (coding, table, src, src_end, dstp) | 1928 | encode_designation_at_bol (coding, translation_table, src, src_end, dst) |
| 1889 | struct coding_system *coding; | 1929 | struct coding_system *coding; |
| 1890 | Lisp_Object table; | 1930 | Lisp_Object translation_table; |
| 1891 | unsigned char *src, *src_end, **dstp; | 1931 | unsigned char *src, *src_end, *dst; |
| 1892 | { | 1932 | { |
| 1893 | int charset, c, found = 0, reg; | 1933 | int charset, c, found = 0, reg; |
| 1894 | /* Table of charsets to be designated to each graphic register. */ | 1934 | /* Table of charsets to be designated to each graphic register. */ |
| 1895 | int r[4]; | 1935 | int r[4]; |
| 1896 | unsigned char *dst = *dstp; | ||
| 1897 | 1936 | ||
| 1898 | for (reg = 0; reg < 4; reg++) | 1937 | for (reg = 0; reg < 4; reg++) |
| 1899 | r[reg] = -1; | 1938 | r[reg] = -1; |
| 1900 | 1939 | ||
| 1901 | while (src < src_end && *src != '\n' && found < 4) | 1940 | while (found < 4) |
| 1902 | { | 1941 | { |
| 1903 | int bytes = BYTES_BY_CHAR_HEAD (*src); | 1942 | ONE_MORE_CHAR (c); |
| 1943 | if (c == '\n') | ||
| 1944 | break; | ||
| 1904 | 1945 | ||
| 1905 | if (NILP (table)) | 1946 | charset = CHAR_CHARSET (c); |
| 1906 | charset = CHARSET_AT (src); | ||
| 1907 | else | ||
| 1908 | { | ||
| 1909 | int c_alt; | ||
| 1910 | unsigned char c1, c2; | ||
| 1911 | |||
| 1912 | SPLIT_STRING(src, bytes, charset, c1, c2); | ||
| 1913 | if ((c_alt = translate_char (table, -1, charset, c1, c2)) >= 0) | ||
| 1914 | charset = CHAR_CHARSET (c_alt); | ||
| 1915 | } | ||
| 1916 | |||
| 1917 | reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset); | 1947 | reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset); |
| 1918 | if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0) | 1948 | if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0) |
| 1919 | { | 1949 | { |
| 1920 | found++; | 1950 | found++; |
| 1921 | r[reg] = charset; | 1951 | r[reg] = charset; |
| 1922 | } | 1952 | } |
| 1923 | |||
| 1924 | src += bytes; | ||
| 1925 | } | 1953 | } |
| 1926 | 1954 | ||
| 1955 | label_end_of_loop: | ||
| 1927 | if (found) | 1956 | if (found) |
| 1928 | { | 1957 | { |
| 1929 | for (reg = 0; reg < 4; reg++) | 1958 | for (reg = 0; reg < 4; reg++) |
| 1930 | if (r[reg] >= 0 | 1959 | if (r[reg] >= 0 |
| 1931 | && CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg]) | 1960 | && CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg]) |
| 1932 | ENCODE_DESIGNATION (r[reg], reg, coding); | 1961 | ENCODE_DESIGNATION (r[reg], reg, coding); |
| 1933 | *dstp = dst; | ||
| 1934 | } | 1962 | } |
| 1963 | |||
| 1964 | return dst; | ||
| 1935 | } | 1965 | } |
| 1936 | 1966 | ||
| 1937 | /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */ | 1967 | /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */ |
| 1938 | 1968 | ||
| 1939 | int | 1969 | static void |
| 1940 | encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | 1970 | encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) |
| 1941 | struct coding_system *coding; | 1971 | struct coding_system *coding; |
| 1942 | unsigned char *source, *destination; | 1972 | unsigned char *source, *destination; |
| @@ -1946,37 +1976,48 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1946 | unsigned char *src_end = source + src_bytes; | 1976 | unsigned char *src_end = source + src_bytes; |
| 1947 | unsigned char *dst = destination; | 1977 | unsigned char *dst = destination; |
| 1948 | unsigned char *dst_end = destination + dst_bytes; | 1978 | unsigned char *dst_end = destination + dst_bytes; |
| 1949 | /* Since the maximum bytes produced by each loop is 14, we subtract 13 | 1979 | /* Since the maximum bytes produced by each loop is 20, we subtract 19 |
| 1950 | from DST_END to assure overflow checking is necessary only at the | 1980 | from DST_END to assure overflow checking is necessary only at the |
| 1951 | head of loop. */ | 1981 | head of loop. */ |
| 1952 | unsigned char *adjusted_dst_end = dst_end - 13; | 1982 | unsigned char *adjusted_dst_end = dst_end - 19; |
| 1953 | Lisp_Object translation_table | 1983 | /* SRC_BASE remembers the start position in source in each loop. |
| 1954 | = coding->translation_table_for_encode; | 1984 | The loop will be exited when there's not enough source text to |
| 1955 | int result = CODING_FINISH_NORMAL; | 1985 | analyze multi-byte codes (within macro ONE_MORE_CHAR), or when |
| 1986 | there's not enough destination area to produce encoded codes | ||
| 1987 | (within macro EMIT_BYTES). */ | ||
| 1988 | unsigned char *src_base; | ||
| 1989 | int c; | ||
| 1990 | Lisp_Object translation_table; | ||
| 1956 | 1991 | ||
| 1957 | if (!NILP (Venable_character_translation) && NILP (translation_table)) | 1992 | if (NILP (Venable_character_translation)) |
| 1958 | translation_table = Vstandard_translation_table_for_encode; | 1993 | translation_table = Qnil; |
| 1994 | else | ||
| 1995 | { | ||
| 1996 | translation_table = coding->translation_table_for_encode; | ||
| 1997 | if (NILP (translation_table)) | ||
| 1998 | translation_table = Vstandard_translation_table_for_encode; | ||
| 1999 | } | ||
| 1959 | 2000 | ||
| 1960 | coding->consumed_char = 0; | 2001 | coding->consumed_char = 0; |
| 1961 | coding->fake_multibyte = 0; | 2002 | coding->errors = 0; |
| 1962 | while (src < src_end && (dst_bytes | 2003 | while (1) |
| 1963 | ? (dst < adjusted_dst_end) | ||
| 1964 | : (dst < src - 13))) | ||
| 1965 | { | 2004 | { |
| 1966 | /* SRC_BASE remembers the start position in source in each loop. | 2005 | int charset, c1, c2; |
| 1967 | The loop will be exited when there's not enough source text | 2006 | |
| 1968 | to analyze multi-byte codes (within macros ONE_MORE_BYTE, | 2007 | src_base = src; |
| 1969 | TWO_MORE_BYTES, and THREE_MORE_BYTES). In that case, SRC is | 2008 | |
| 1970 | reset to SRC_BASE before exiting. */ | 2009 | if (dst >= (dst_bytes ? adjusted_dst_end : (src - 19))) |
| 1971 | unsigned char *src_base = src; | 2010 | { |
| 1972 | int charset, c1, c2, c3, c4; | 2011 | coding->result = CODING_FINISH_INSUFFICIENT_DST; |
| 2012 | break; | ||
| 2013 | } | ||
| 1973 | 2014 | ||
| 1974 | if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL | 2015 | if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL |
| 1975 | && CODING_SPEC_ISO_BOL (coding)) | 2016 | && CODING_SPEC_ISO_BOL (coding)) |
| 1976 | { | 2017 | { |
| 1977 | /* We have to produce designation sequences if any now. */ | 2018 | /* We have to produce designation sequences if any now. */ |
| 1978 | encode_designation_at_bol (coding, translation_table, | 2019 | dst = encode_designation_at_bol (coding, translation_table, |
| 1979 | src, src_end, &dst); | 2020 | src, src_end, dst); |
| 1980 | CODING_SPEC_ISO_BOL (coding) = 0; | 2021 | CODING_SPEC_ISO_BOL (coding) = 0; |
| 1981 | } | 2022 | } |
| 1982 | 2023 | ||
| @@ -2017,8 +2058,6 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 2017 | { | 2058 | { |
| 2018 | SPLIT_CHAR (c, charset, c1, c2); | 2059 | SPLIT_CHAR (c, charset, c1, c2); |
| 2019 | ENCODE_ISO_CHARACTER (charset, c1, c2); | 2060 | ENCODE_ISO_CHARACTER (charset, c1, c2); |
| 2020 | /* But, we didn't consume a character in SRC. */ | ||
| 2021 | coding->consumed_char--; | ||
| 2022 | if (coding->composing == COMPOSITION_WITH_RULE_ALTCHARS) | 2061 | if (coding->composing == COMPOSITION_WITH_RULE_ALTCHARS) |
| 2023 | coding->composition_rule_follows = 1; | 2062 | coding->composition_rule_follows = 1; |
| 2024 | } | 2063 | } |
| @@ -2035,129 +2074,66 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 2035 | } | 2074 | } |
| 2036 | } | 2075 | } |
| 2037 | 2076 | ||
| 2038 | c1 = *src++; | 2077 | ONE_MORE_CHAR (c); |
| 2039 | /* Now encode one character. C1 is a control character, an | ||
| 2040 | ASCII character, or a leading-code of multi-byte character. */ | ||
| 2041 | switch (emacs_code_class[c1]) | ||
| 2042 | { | ||
| 2043 | case EMACS_ascii_code: | ||
| 2044 | c2 = 0; | ||
| 2045 | ENCODE_ISO_CHARACTER (CHARSET_ASCII, c1, /* dummy */ c2); | ||
| 2046 | break; | ||
| 2047 | 2078 | ||
| 2048 | case EMACS_control_code: | 2079 | /* Now encode the character C. */ |
| 2049 | if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL) | 2080 | if (c < 0x20 || c == 0x7F) |
| 2050 | ENCODE_RESET_PLANE_AND_REGISTER; | 2081 | { |
| 2051 | *dst++ = c1; | 2082 | if (c == '\r') |
| 2052 | coding->consumed_char++; | ||
| 2053 | break; | ||
| 2054 | |||
| 2055 | case EMACS_carriage_return_code: | ||
| 2056 | if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)) | ||
| 2057 | { | ||
| 2058 | if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL) | ||
| 2059 | ENCODE_RESET_PLANE_AND_REGISTER; | ||
| 2060 | *dst++ = c1; | ||
| 2061 | coding->consumed_char++; | ||
| 2062 | break; | ||
| 2063 | } | ||
| 2064 | /* fall down to treat '\r' as '\n' ... */ | ||
| 2065 | |||
| 2066 | case EMACS_linefeed_code: | ||
| 2067 | if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL) | ||
| 2068 | ENCODE_RESET_PLANE_AND_REGISTER; | ||
| 2069 | if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL) | ||
| 2070 | bcopy (coding->spec.iso2022.initial_designation, | ||
| 2071 | coding->spec.iso2022.current_designation, | ||
| 2072 | sizeof coding->spec.iso2022.initial_designation); | ||
| 2073 | if (coding->eol_type == CODING_EOL_LF | ||
| 2074 | || coding->eol_type == CODING_EOL_UNDECIDED) | ||
| 2075 | *dst++ = ISO_CODE_LF; | ||
| 2076 | else if (coding->eol_type == CODING_EOL_CRLF) | ||
| 2077 | *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF; | ||
| 2078 | else | ||
| 2079 | *dst++ = ISO_CODE_CR; | ||
| 2080 | CODING_SPEC_ISO_BOL (coding) = 1; | ||
| 2081 | coding->consumed_char++; | ||
| 2082 | break; | ||
| 2083 | |||
| 2084 | case EMACS_leading_code_2: | ||
| 2085 | ONE_MORE_BYTE (c2); | ||
| 2086 | c3 = 0; | ||
| 2087 | if (c2 < 0xA0) | ||
| 2088 | { | 2083 | { |
| 2089 | /* invalid sequence */ | 2084 | if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)) |
| 2090 | *dst++ = c1; | 2085 | { |
| 2091 | src--; | 2086 | if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL) |
| 2092 | coding->consumed_char++; | 2087 | ENCODE_RESET_PLANE_AND_REGISTER; |
| 2088 | *dst++ = c; | ||
| 2089 | continue; | ||
| 2090 | } | ||
| 2091 | /* fall down to treat '\r' as '\n' ... */ | ||
| 2092 | c = '\n'; | ||
| 2093 | } | 2093 | } |
| 2094 | else | 2094 | if (c == '\n') |
| 2095 | ENCODE_ISO_CHARACTER (c1, c2, /* dummy */ c3); | ||
| 2096 | break; | ||
| 2097 | |||
| 2098 | case EMACS_leading_code_3: | ||
| 2099 | TWO_MORE_BYTES (c2, c3); | ||
| 2100 | c4 = 0; | ||
| 2101 | if (c2 < 0xA0 || c3 < 0xA0) | ||
| 2102 | { | 2095 | { |
| 2103 | /* invalid sequence */ | 2096 | if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL) |
| 2104 | *dst++ = c1; | 2097 | ENCODE_RESET_PLANE_AND_REGISTER; |
| 2105 | src -= 2; | 2098 | if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL) |
| 2106 | coding->consumed_char++; | 2099 | bcopy (coding->spec.iso2022.initial_designation, |
| 2100 | coding->spec.iso2022.current_designation, | ||
| 2101 | sizeof coding->spec.iso2022.initial_designation); | ||
| 2102 | if (coding->eol_type == CODING_EOL_LF | ||
| 2103 | || coding->eol_type == CODING_EOL_UNDECIDED) | ||
| 2104 | *dst++ = ISO_CODE_LF; | ||
| 2105 | else if (coding->eol_type == CODING_EOL_CRLF) | ||
| 2106 | *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF; | ||
| 2107 | else | ||
| 2108 | *dst++ = ISO_CODE_CR; | ||
| 2109 | CODING_SPEC_ISO_BOL (coding) = 1; | ||
| 2107 | } | 2110 | } |
| 2108 | else if (c1 < LEADING_CODE_PRIVATE_11) | 2111 | else |
| 2109 | ENCODE_ISO_CHARACTER (c1, c2, c3); | ||
| 2110 | else | ||
| 2111 | ENCODE_ISO_CHARACTER (c2, c3, /* dummy */ c4); | ||
| 2112 | break; | ||
| 2113 | |||
| 2114 | case EMACS_leading_code_4: | ||
| 2115 | THREE_MORE_BYTES (c2, c3, c4); | ||
| 2116 | if (c2 < 0xA0 || c3 < 0xA0 || c4 < 0xA0) | ||
| 2117 | { | 2112 | { |
| 2118 | /* invalid sequence */ | 2113 | if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL) |
| 2119 | *dst++ = c1; | 2114 | ENCODE_RESET_PLANE_AND_REGISTER; |
| 2120 | src -= 3; | 2115 | *dst++ = c; |
| 2121 | coding->consumed_char++; | ||
| 2122 | } | 2116 | } |
| 2123 | else | ||
| 2124 | ENCODE_ISO_CHARACTER (c2, c3, c4); | ||
| 2125 | break; | ||
| 2126 | |||
| 2127 | case EMACS_invalid_code: | ||
| 2128 | if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL) | ||
| 2129 | ENCODE_RESET_PLANE_AND_REGISTER; | ||
| 2130 | *dst++ = c1; | ||
| 2131 | coding->consumed_char++; | ||
| 2132 | break; | ||
| 2133 | } | 2117 | } |
| 2134 | continue; | 2118 | else if (ASCII_BYTE_P (c)) |
| 2135 | label_end_of_loop: | 2119 | ENCODE_ISO_CHARACTER (CHARSET_ASCII, c, /* dummy */ c1); |
| 2136 | result = CODING_FINISH_INSUFFICIENT_SRC; | 2120 | else if (SINGLE_BYTE_CHAR_P (c)) |
| 2137 | src = src_base; | ||
| 2138 | break; | ||
| 2139 | } | ||
| 2140 | |||
| 2141 | if (src < src_end && result == CODING_FINISH_NORMAL) | ||
| 2142 | result = CODING_FINISH_INSUFFICIENT_DST; | ||
| 2143 | |||
| 2144 | /* If this is the last block of the text to be encoded, we must | ||
| 2145 | reset graphic planes and registers to the initial state, and | ||
| 2146 | flush out the carryover if any. */ | ||
| 2147 | if (coding->mode & CODING_MODE_LAST_BLOCK) | ||
| 2148 | { | ||
| 2149 | ENCODE_RESET_PLANE_AND_REGISTER; | ||
| 2150 | if (COMPOSING_P (coding)) | ||
| 2151 | *dst++ = ISO_CODE_ESC, *dst++ = '1'; | ||
| 2152 | if (result == CODING_FINISH_INSUFFICIENT_SRC) | ||
| 2153 | { | 2121 | { |
| 2154 | while (src < src_end && dst < dst_end) | 2122 | *dst++ = c; |
| 2155 | *dst++ = *src++; | 2123 | coding->errors++; |
| 2156 | } | 2124 | } |
| 2125 | else | ||
| 2126 | { | ||
| 2127 | SPLIT_CHAR (c, charset, c1, c2); | ||
| 2128 | ENCODE_ISO_CHARACTER (charset, c1, c2); | ||
| 2129 | } | ||
| 2130 | |||
| 2131 | coding->consumed_char++; | ||
| 2157 | } | 2132 | } |
| 2158 | coding->consumed = src - source; | 2133 | |
| 2134 | label_end_of_loop: | ||
| 2135 | coding->consumed = src_base - source; | ||
| 2159 | coding->produced = coding->produced_char = dst - destination; | 2136 | coding->produced = coding->produced_char = dst - destination; |
| 2160 | return result; | ||
| 2161 | } | 2137 | } |
| 2162 | 2138 | ||
| 2163 | 2139 | ||
| @@ -2235,75 +2211,6 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 2235 | b2 += b2 < 0x3F ? 0x40 : 0x62; \ | 2211 | b2 += b2 < 0x3F ? 0x40 : 0x62; \ |
| 2236 | } while (0) | 2212 | } while (0) |
| 2237 | 2213 | ||
| 2238 | #define DECODE_SJIS_BIG5_CHARACTER(charset, c1, c2) \ | ||
| 2239 | do { \ | ||
| 2240 | int c_alt, charset_alt = (charset); \ | ||
| 2241 | if (!NILP (translation_table) \ | ||
| 2242 | && ((c_alt = translate_char (translation_table, \ | ||
| 2243 | -1, (charset), c1, c2)) >= 0)) \ | ||
| 2244 | SPLIT_CHAR (c_alt, charset_alt, c1, c2); \ | ||
| 2245 | if (charset_alt == CHARSET_ASCII || charset_alt < 0) \ | ||
| 2246 | DECODE_CHARACTER_ASCII (c1); \ | ||
| 2247 | else if (CHARSET_DIMENSION (charset_alt) == 1) \ | ||
| 2248 | DECODE_CHARACTER_DIMENSION1 (charset_alt, c1); \ | ||
| 2249 | else \ | ||
| 2250 | DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2); \ | ||
| 2251 | } while (0) | ||
| 2252 | |||
| 2253 | #define ENCODE_SJIS_BIG5_CHARACTER(charset, c1, c2) \ | ||
| 2254 | do { \ | ||
| 2255 | int c_alt, charset_alt; \ | ||
| 2256 | if (!NILP (translation_table) \ | ||
| 2257 | && ((c_alt = translate_char (translation_table, -1, \ | ||
| 2258 | charset, c1, c2)) \ | ||
| 2259 | >= 0)) \ | ||
| 2260 | SPLIT_CHAR (c_alt, charset_alt, c1, c2); \ | ||
| 2261 | else \ | ||
| 2262 | charset_alt = charset; \ | ||
| 2263 | if (charset_alt == charset_ascii) \ | ||
| 2264 | *dst++ = c1; \ | ||
| 2265 | else if (CHARSET_DIMENSION (charset_alt) == 1) \ | ||
| 2266 | { \ | ||
| 2267 | if (sjis_p && charset_alt == charset_katakana_jisx0201) \ | ||
| 2268 | *dst++ = c1; \ | ||
| 2269 | else if (sjis_p && charset_alt == charset_latin_jisx0201) \ | ||
| 2270 | *dst++ = c1 & 0x7F; \ | ||
| 2271 | else \ | ||
| 2272 | { \ | ||
| 2273 | *dst++ = charset_alt, *dst++ = c1; \ | ||
| 2274 | coding->fake_multibyte = 1; \ | ||
| 2275 | } \ | ||
| 2276 | } \ | ||
| 2277 | else \ | ||
| 2278 | { \ | ||
| 2279 | c1 &= 0x7F, c2 &= 0x7F; \ | ||
| 2280 | if (sjis_p && (charset_alt == charset_jisx0208 \ | ||
| 2281 | || charset_alt == charset_jisx0208_1978))\ | ||
| 2282 | { \ | ||
| 2283 | unsigned char s1, s2; \ | ||
| 2284 | \ | ||
| 2285 | ENCODE_SJIS (c1, c2, s1, s2); \ | ||
| 2286 | *dst++ = s1, *dst++ = s2; \ | ||
| 2287 | coding->fake_multibyte = 1; \ | ||
| 2288 | } \ | ||
| 2289 | else if (!sjis_p \ | ||
| 2290 | && (charset_alt == charset_big5_1 \ | ||
| 2291 | || charset_alt == charset_big5_2)) \ | ||
| 2292 | { \ | ||
| 2293 | unsigned char b1, b2; \ | ||
| 2294 | \ | ||
| 2295 | ENCODE_BIG5 (charset_alt, c1, c2, b1, b2); \ | ||
| 2296 | *dst++ = b1, *dst++ = b2; \ | ||
| 2297 | } \ | ||
| 2298 | else \ | ||
| 2299 | { \ | ||
| 2300 | *dst++ = charset_alt, *dst++ = c1, *dst++ = c2; \ | ||
| 2301 | coding->fake_multibyte = 1; \ | ||
| 2302 | } \ | ||
| 2303 | } \ | ||
| 2304 | coding->consumed_char++; \ | ||
| 2305 | } while (0) | ||
| 2306 | |||
| 2307 | /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". | 2214 | /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". |
| 2308 | Check if a text is encoded in SJIS. If it is, return | 2215 | Check if a text is encoded in SJIS. If it is, return |
| 2309 | CODING_CATEGORY_MASK_SJIS, else return 0. */ | 2216 | CODING_CATEGORY_MASK_SJIS, else return 0. */ |
| @@ -2312,17 +2219,22 @@ int | |||
| 2312 | detect_coding_sjis (src, src_end) | 2219 | detect_coding_sjis (src, src_end) |
| 2313 | unsigned char *src, *src_end; | 2220 | unsigned char *src, *src_end; |
| 2314 | { | 2221 | { |
| 2315 | unsigned char c; | 2222 | int c; |
| 2223 | /* Dummy for ONE_MORE_BYTE. */ | ||
| 2224 | struct coding_system dummy_coding; | ||
| 2225 | struct coding_system *coding = &dummy_coding; | ||
| 2316 | 2226 | ||
| 2317 | while (src < src_end) | 2227 | while (1) |
| 2318 | { | 2228 | { |
| 2319 | c = *src++; | 2229 | ONE_MORE_BYTE (c); |
| 2320 | if ((c >= 0x80 && c < 0xA0) || c >= 0xE0) | 2230 | if ((c >= 0x80 && c < 0xA0) || c >= 0xE0) |
| 2321 | { | 2231 | { |
| 2322 | if (src < src_end && *src++ < 0x40) | 2232 | ONE_MORE_BYTE (c); |
| 2233 | if (c < 0x40) | ||
| 2323 | return 0; | 2234 | return 0; |
| 2324 | } | 2235 | } |
| 2325 | } | 2236 | } |
| 2237 | label_end_of_loop: | ||
| 2326 | return CODING_CATEGORY_MASK_SJIS; | 2238 | return CODING_CATEGORY_MASK_SJIS; |
| 2327 | } | 2239 | } |
| 2328 | 2240 | ||
| @@ -2334,20 +2246,22 @@ int | |||
| 2334 | detect_coding_big5 (src, src_end) | 2246 | detect_coding_big5 (src, src_end) |
| 2335 | unsigned char *src, *src_end; | 2247 | unsigned char *src, *src_end; |
| 2336 | { | 2248 | { |
| 2337 | unsigned char c; | 2249 | int c; |
| 2250 | /* Dummy for ONE_MORE_BYTE. */ | ||
| 2251 | struct coding_system dummy_coding; | ||
| 2252 | struct coding_system *coding = &dummy_coding; | ||
| 2338 | 2253 | ||
| 2339 | while (src < src_end) | 2254 | while (1) |
| 2340 | { | 2255 | { |
| 2341 | c = *src++; | 2256 | ONE_MORE_BYTE (c); |
| 2342 | if (c >= 0xA1) | 2257 | if (c >= 0xA1) |
| 2343 | { | 2258 | { |
| 2344 | if (src >= src_end) | 2259 | ONE_MORE_BYTE (c); |
| 2345 | break; | ||
| 2346 | c = *src++; | ||
| 2347 | if (c < 0x40 || (c >= 0x7F && c <= 0xA0)) | 2260 | if (c < 0x40 || (c >= 0x7F && c <= 0xA0)) |
| 2348 | return 0; | 2261 | return 0; |
| 2349 | } | 2262 | } |
| 2350 | } | 2263 | } |
| 2264 | label_end_of_loop: | ||
| 2351 | return CODING_CATEGORY_MASK_BIG5; | 2265 | return CODING_CATEGORY_MASK_BIG5; |
| 2352 | } | 2266 | } |
| 2353 | 2267 | ||
| @@ -2369,10 +2283,13 @@ detect_coding_utf_8 (src, src_end) | |||
| 2369 | { | 2283 | { |
| 2370 | unsigned char c; | 2284 | unsigned char c; |
| 2371 | int seq_maybe_bytes; | 2285 | int seq_maybe_bytes; |
| 2286 | /* Dummy for ONE_MORE_BYTE. */ | ||
| 2287 | struct coding_system dummy_coding; | ||
| 2288 | struct coding_system *coding = &dummy_coding; | ||
| 2372 | 2289 | ||
| 2373 | while (src < src_end) | 2290 | while (1) |
| 2374 | { | 2291 | { |
| 2375 | c = *src++; | 2292 | ONE_MORE_BYTE (c); |
| 2376 | if (UTF_8_1_OCTET_P (c)) | 2293 | if (UTF_8_1_OCTET_P (c)) |
| 2377 | continue; | 2294 | continue; |
| 2378 | else if (UTF_8_2_OCTET_LEADING_P (c)) | 2295 | else if (UTF_8_2_OCTET_LEADING_P (c)) |
| @@ -2390,10 +2307,7 @@ detect_coding_utf_8 (src, src_end) | |||
| 2390 | 2307 | ||
| 2391 | do | 2308 | do |
| 2392 | { | 2309 | { |
| 2393 | if (src >= src_end) | 2310 | ONE_MORE_BYTE (c); |
| 2394 | return CODING_CATEGORY_MASK_UTF_8; | ||
| 2395 | |||
| 2396 | c = *src++; | ||
| 2397 | if (!UTF_8_EXTRA_OCTET_P (c)) | 2311 | if (!UTF_8_EXTRA_OCTET_P (c)) |
| 2398 | return 0; | 2312 | return 0; |
| 2399 | seq_maybe_bytes--; | 2313 | seq_maybe_bytes--; |
| @@ -2401,6 +2315,7 @@ detect_coding_utf_8 (src, src_end) | |||
| 2401 | while (seq_maybe_bytes > 0); | 2315 | while (seq_maybe_bytes > 0); |
| 2402 | } | 2316 | } |
| 2403 | 2317 | ||
| 2318 | label_end_of_loop: | ||
| 2404 | return CODING_CATEGORY_MASK_UTF_8; | 2319 | return CODING_CATEGORY_MASK_UTF_8; |
| 2405 | } | 2320 | } |
| 2406 | 2321 | ||
| @@ -2424,20 +2339,26 @@ int | |||
| 2424 | detect_coding_utf_16 (src, src_end) | 2339 | detect_coding_utf_16 (src, src_end) |
| 2425 | unsigned char *src, *src_end; | 2340 | unsigned char *src, *src_end; |
| 2426 | { | 2341 | { |
| 2427 | if ((src + 1) >= src_end) return 0; | 2342 | unsigned char c1, c2; |
| 2343 | /* Dummy for TWO_MORE_BYTES. */ | ||
| 2344 | struct coding_system dummy_coding; | ||
| 2345 | struct coding_system *coding = &dummy_coding; | ||
| 2428 | 2346 | ||
| 2429 | if ((src[0] == 0xFF) && (src[1] == 0xFE)) | 2347 | TWO_MORE_BYTES (c1, c2); |
| 2348 | |||
| 2349 | if ((c1 == 0xFF) && (c2 == 0xFE)) | ||
| 2430 | return CODING_CATEGORY_MASK_UTF_16_LE; | 2350 | return CODING_CATEGORY_MASK_UTF_16_LE; |
| 2431 | else if ((src[0] == 0xFE) && (src[1] == 0xFF)) | 2351 | else if ((c1 == 0xFE) && (c2 == 0xFF)) |
| 2432 | return CODING_CATEGORY_MASK_UTF_16_BE; | 2352 | return CODING_CATEGORY_MASK_UTF_16_BE; |
| 2433 | 2353 | ||
| 2354 | label_end_of_loop: | ||
| 2434 | return 0; | 2355 | return 0; |
| 2435 | } | 2356 | } |
| 2436 | 2357 | ||
| 2437 | /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". | 2358 | /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". |
| 2438 | If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */ | 2359 | If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */ |
| 2439 | 2360 | ||
| 2440 | int | 2361 | static void |
| 2441 | decode_coding_sjis_big5 (coding, source, destination, | 2362 | decode_coding_sjis_big5 (coding, source, destination, |
| 2442 | src_bytes, dst_bytes, sjis_p) | 2363 | src_bytes, dst_bytes, sjis_p) |
| 2443 | struct coding_system *coding; | 2364 | struct coding_system *coding; |
| @@ -2449,168 +2370,123 @@ decode_coding_sjis_big5 (coding, source, destination, | |||
| 2449 | unsigned char *src_end = source + src_bytes; | 2370 | unsigned char *src_end = source + src_bytes; |
| 2450 | unsigned char *dst = destination; | 2371 | unsigned char *dst = destination; |
| 2451 | unsigned char *dst_end = destination + dst_bytes; | 2372 | unsigned char *dst_end = destination + dst_bytes; |
| 2452 | /* Since the maximum bytes produced by each loop is 4, we subtract 3 | 2373 | /* SRC_BASE remembers the start position in source in each loop. |
| 2453 | from DST_END to assure overflow checking is necessary only at the | 2374 | The loop will be exited when there's not enough source code |
| 2454 | head of loop. */ | 2375 | (within macro ONE_MORE_BYTE), or when there's not enough |
| 2455 | unsigned char *adjusted_dst_end = dst_end - 3; | 2376 | destination area to produce a character (within macro |
| 2456 | Lisp_Object translation_table | 2377 | EMIT_CHAR). */ |
| 2457 | = coding->translation_table_for_decode; | 2378 | unsigned char *src_base; |
| 2458 | int result = CODING_FINISH_NORMAL; | 2379 | Lisp_Object translation_table; |
| 2459 | 2380 | ||
| 2460 | if (!NILP (Venable_character_translation) && NILP (translation_table)) | 2381 | if (NILP (Venable_character_translation)) |
| 2461 | translation_table = Vstandard_translation_table_for_decode; | 2382 | translation_table = Qnil; |
| 2383 | else | ||
| 2384 | { | ||
| 2385 | translation_table = coding->translation_table_for_decode; | ||
| 2386 | if (NILP (translation_table)) | ||
| 2387 | translation_table = Vstandard_translation_table_for_decode; | ||
| 2388 | } | ||
| 2462 | 2389 | ||
| 2463 | coding->produced_char = 0; | 2390 | coding->produced_char = 0; |
| 2464 | coding->fake_multibyte = 0; | 2391 | while (1) |
| 2465 | while (src < src_end && (dst_bytes | ||
| 2466 | ? (dst < adjusted_dst_end) | ||
| 2467 | : (dst < src - 3))) | ||
| 2468 | { | 2392 | { |
| 2469 | /* SRC_BASE remembers the start position in source in each loop. | 2393 | int c, charset, c1, c2; |
| 2470 | The loop will be exited when there's not enough source text | 2394 | |
| 2471 | to analyze two-byte character (within macro ONE_MORE_BYTE). | 2395 | src_base = src; |
| 2472 | In that case, SRC is reset to SRC_BASE before exiting. */ | 2396 | ONE_MORE_BYTE (c1); |
| 2473 | unsigned char *src_base = src; | 2397 | |
| 2474 | unsigned char c1 = *src++, c2, c3, c4; | 2398 | if (c1 < 0x80) |
| 2475 | |||
| 2476 | if (c1 < 0x20) | ||
| 2477 | { | 2399 | { |
| 2478 | if (c1 == '\r') | 2400 | charset = CHARSET_ASCII; |
| 2401 | if (c1 < 0x20) | ||
| 2479 | { | 2402 | { |
| 2480 | if (coding->eol_type == CODING_EOL_CRLF) | 2403 | if (c1 == '\r') |
| 2481 | { | 2404 | { |
| 2482 | ONE_MORE_BYTE (c2); | 2405 | if (coding->eol_type == CODING_EOL_CRLF) |
| 2483 | if (c2 == '\n') | ||
| 2484 | *dst++ = c2; | ||
| 2485 | else if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL) | ||
| 2486 | { | 2406 | { |
| 2487 | result = CODING_FINISH_INCONSISTENT_EOL; | 2407 | ONE_MORE_BYTE (c2); |
| 2488 | goto label_end_of_loop_2; | 2408 | if (c2 == '\n') |
| 2409 | c1 = c2; | ||
| 2410 | else if (coding->mode | ||
| 2411 | & CODING_MODE_INHIBIT_INCONSISTENT_EOL) | ||
| 2412 | { | ||
| 2413 | coding->result = CODING_FINISH_INCONSISTENT_EOL; | ||
| 2414 | goto label_end_of_loop; | ||
| 2415 | } | ||
| 2416 | else | ||
| 2417 | /* To process C2 again, SRC is subtracted by 1. */ | ||
| 2418 | src--; | ||
| 2489 | } | 2419 | } |
| 2490 | else | 2420 | else if (coding->eol_type == CODING_EOL_CR) |
| 2491 | /* To process C2 again, SRC is subtracted by 1. */ | 2421 | c1 = '\n'; |
| 2492 | *dst++ = c1, src--; | 2422 | } |
| 2423 | else if (c1 == '\n' | ||
| 2424 | && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL) | ||
| 2425 | && (coding->eol_type == CODING_EOL_CR | ||
| 2426 | || coding->eol_type == CODING_EOL_CRLF)) | ||
| 2427 | { | ||
| 2428 | coding->result = CODING_FINISH_INCONSISTENT_EOL; | ||
| 2429 | goto label_end_of_loop; | ||
| 2493 | } | 2430 | } |
| 2494 | else if (coding->eol_type == CODING_EOL_CR) | ||
| 2495 | *dst++ = '\n'; | ||
| 2496 | else | ||
| 2497 | *dst++ = c1; | ||
| 2498 | } | ||
| 2499 | else if (c1 == '\n' | ||
| 2500 | && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL) | ||
| 2501 | && (coding->eol_type == CODING_EOL_CR | ||
| 2502 | || coding->eol_type == CODING_EOL_CRLF)) | ||
| 2503 | { | ||
| 2504 | result = CODING_FINISH_INCONSISTENT_EOL; | ||
| 2505 | goto label_end_of_loop_2; | ||
| 2506 | } | 2431 | } |
| 2507 | else | ||
| 2508 | *dst++ = c1; | ||
| 2509 | coding->produced_char++; | ||
| 2510 | } | 2432 | } |
| 2511 | else if (c1 < 0x80) | ||
| 2512 | { | ||
| 2513 | c2 = 0; /* avoid warning */ | ||
| 2514 | DECODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2); | ||
| 2515 | } | ||
| 2516 | else | 2433 | else |
| 2517 | { | 2434 | { |
| 2518 | if (sjis_p) | 2435 | if (sjis_p) |
| 2519 | { | 2436 | { |
| 2520 | if (c1 < 0xA0 || (c1 >= 0xE0 && c1 < 0xF0)) | 2437 | if (c1 >= 0xF0) |
| 2438 | goto label_invalid_code; | ||
| 2439 | if (c1 < 0xA0 || c1 >= 0xE0) | ||
| 2521 | { | 2440 | { |
| 2522 | /* SJIS -> JISX0208 */ | 2441 | /* SJIS -> JISX0208 */ |
| 2523 | ONE_MORE_BYTE (c2); | 2442 | ONE_MORE_BYTE (c2); |
| 2524 | if (c2 >= 0x40 && c2 != 0x7F && c2 <= 0xFC) | 2443 | if (c2 < 0x40 || c2 == 0x7F || c2 > 0xFC) |
| 2525 | { | 2444 | goto label_invalid_code; |
| 2526 | DECODE_SJIS (c1, c2, c3, c4); | 2445 | DECODE_SJIS (c1, c2, c1, c2); |
| 2527 | DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4); | 2446 | charset = charset_jisx0208; |
| 2528 | } | ||
| 2529 | else | ||
| 2530 | goto label_invalid_code_2; | ||
| 2531 | } | ||
| 2532 | else if (c1 < 0xE0) | ||
| 2533 | /* SJIS -> JISX0201-Kana */ | ||
| 2534 | { | ||
| 2535 | c2 = 0; /* avoid warning */ | ||
| 2536 | DECODE_SJIS_BIG5_CHARACTER (charset_katakana_jisx0201, c1, | ||
| 2537 | /* dummy */ c2); | ||
| 2538 | } | 2447 | } |
| 2539 | else | 2448 | else |
| 2540 | goto label_invalid_code_1; | 2449 | /* SJIS -> JISX0201-Kana */ |
| 2450 | charset = charset_katakana_jisx0201; | ||
| 2541 | } | 2451 | } |
| 2542 | else | 2452 | else |
| 2543 | { | 2453 | { |
| 2544 | /* BIG5 -> Big5 */ | 2454 | /* BIG5 -> Big5 */ |
| 2545 | if (c1 >= 0xA1 && c1 <= 0xFE) | 2455 | if (c1 < 0xA1 || c1 > 0xFE) |
| 2546 | { | 2456 | goto label_invalid_code; |
| 2547 | ONE_MORE_BYTE (c2); | 2457 | ONE_MORE_BYTE (c2); |
| 2548 | if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE)) | 2458 | if (c2 < 0x40 || (c2 > 0x7E && c2 < 0xA1) || c2 > 0xFE) |
| 2549 | { | 2459 | goto label_invalid_code; |
| 2550 | int charset; | 2460 | DECODE_BIG5 (c1, c2, charset, c1, c2); |
| 2551 | |||
| 2552 | DECODE_BIG5 (c1, c2, charset, c3, c4); | ||
| 2553 | DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4); | ||
| 2554 | } | ||
| 2555 | else | ||
| 2556 | goto label_invalid_code_2; | ||
| 2557 | } | ||
| 2558 | else | ||
| 2559 | goto label_invalid_code_1; | ||
| 2560 | } | 2461 | } |
| 2561 | } | 2462 | } |
| 2562 | continue; | ||
| 2563 | |||
| 2564 | label_invalid_code_1: | ||
| 2565 | *dst++ = c1; | ||
| 2566 | coding->produced_char++; | ||
| 2567 | coding->fake_multibyte = 1; | ||
| 2568 | continue; | ||
| 2569 | 2463 | ||
| 2570 | label_invalid_code_2: | 2464 | c = DECODE_ISO_CHARACTER (charset, c1, c2); |
| 2571 | *dst++ = c1; *dst++= c2; | 2465 | EMIT_CHAR (c); |
| 2572 | coding->produced_char += 2; | ||
| 2573 | coding->fake_multibyte = 1; | ||
| 2574 | continue; | 2466 | continue; |
| 2575 | 2467 | ||
| 2576 | label_end_of_loop: | 2468 | label_invalid_code: |
| 2577 | result = CODING_FINISH_INSUFFICIENT_SRC; | 2469 | coding->errors++; |
| 2578 | label_end_of_loop_2: | ||
| 2579 | src = src_base; | 2470 | src = src_base; |
| 2580 | break; | 2471 | c = *src++; |
| 2581 | } | 2472 | EMIT_CHAR (c); |
| 2582 | |||
| 2583 | if (src < src_end) | ||
| 2584 | { | ||
| 2585 | if (result == CODING_FINISH_NORMAL) | ||
| 2586 | result = CODING_FINISH_INSUFFICIENT_DST; | ||
| 2587 | else if (result != CODING_FINISH_INCONSISTENT_EOL | ||
| 2588 | && coding->mode & CODING_MODE_LAST_BLOCK) | ||
| 2589 | { | ||
| 2590 | src_bytes = src_end - src; | ||
| 2591 | if (dst_bytes && (dst_end - dst < src_bytes)) | ||
| 2592 | src_bytes = dst_end - dst; | ||
| 2593 | bcopy (dst, src, src_bytes); | ||
| 2594 | src += src_bytes; | ||
| 2595 | dst += src_bytes; | ||
| 2596 | coding->fake_multibyte = 1; | ||
| 2597 | } | ||
| 2598 | } | 2473 | } |
| 2599 | 2474 | ||
| 2600 | coding->consumed = coding->consumed_char = src - source; | 2475 | label_end_of_loop: |
| 2476 | coding->consumed = coding->consumed_char = src_base - source; | ||
| 2601 | coding->produced = dst - destination; | 2477 | coding->produced = dst - destination; |
| 2602 | return result; | 2478 | return; |
| 2603 | } | 2479 | } |
| 2604 | 2480 | ||
| 2605 | /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". | 2481 | /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". |
| 2606 | This function can encode `charset_ascii', `charset_katakana_jisx0201', | 2482 | This function can encode charsets `ascii', `katakana-jisx0201', |
| 2607 | `charset_jisx0208', `charset_big5_1', and `charset_big5-2'. We are | 2483 | `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We |
| 2608 | sure that all these charsets are registered as official charset | 2484 | are sure that all these charsets are registered as official charset |
| 2609 | (i.e. do not have extended leading-codes). Characters of other | 2485 | (i.e. do not have extended leading-codes). Characters of other |
| 2610 | charsets are produced without any encoding. If SJIS_P is 1, encode | 2486 | charsets are produced without any encoding. If SJIS_P is 1, encode |
| 2611 | SJIS text, else encode BIG5 text. */ | 2487 | SJIS text, else encode BIG5 text. */ |
| 2612 | 2488 | ||
| 2613 | int | 2489 | static void |
| 2614 | encode_coding_sjis_big5 (coding, source, destination, | 2490 | encode_coding_sjis_big5 (coding, source, destination, |
| 2615 | src_bytes, dst_bytes, sjis_p) | 2491 | src_bytes, dst_bytes, sjis_p) |
| 2616 | struct coding_system *coding; | 2492 | struct coding_system *coding; |
| @@ -2622,95 +2498,91 @@ encode_coding_sjis_big5 (coding, source, destination, | |||
| 2622 | unsigned char *src_end = source + src_bytes; | 2498 | unsigned char *src_end = source + src_bytes; |
| 2623 | unsigned char *dst = destination; | 2499 | unsigned char *dst = destination; |
| 2624 | unsigned char *dst_end = destination + dst_bytes; | 2500 | unsigned char *dst_end = destination + dst_bytes; |
| 2625 | /* Since the maximum bytes produced by each loop is 2, we subtract 1 | 2501 | /* SRC_BASE remembers the start position in source in each loop. |
| 2626 | from DST_END to assure overflow checking is necessary only at the | 2502 | The loop will be exited when there's not enough source text to |
| 2627 | head of loop. */ | 2503 | analyze multi-byte codes (within macro ONE_MORE_CHAR), or when |
| 2628 | unsigned char *adjusted_dst_end = dst_end - 1; | 2504 | there's not enough destination area to produce encoded codes |
| 2629 | Lisp_Object translation_table | 2505 | (within macro EMIT_BYTES). */ |
| 2630 | = coding->translation_table_for_encode; | 2506 | unsigned char *src_base; |
| 2631 | int result = CODING_FINISH_NORMAL; | 2507 | Lisp_Object translation_table; |
| 2632 | |||
| 2633 | if (!NILP (Venable_character_translation) && NILP (translation_table)) | ||
| 2634 | translation_table = Vstandard_translation_table_for_encode; | ||
| 2635 | 2508 | ||
| 2636 | coding->consumed_char = 0; | 2509 | if (NILP (Venable_character_translation)) |
| 2637 | coding->fake_multibyte = 0; | 2510 | translation_table = Qnil; |
| 2638 | while (src < src_end && (dst_bytes | 2511 | else |
| 2639 | ? (dst < adjusted_dst_end) | ||
| 2640 | : (dst < src - 1))) | ||
| 2641 | { | 2512 | { |
| 2642 | /* SRC_BASE remembers the start position in source in each loop. | 2513 | translation_table = coding->translation_table_for_decode; |
| 2643 | The loop will be exited when there's not enough source text | 2514 | if (NILP (translation_table)) |
| 2644 | to analyze multi-byte codes (within macros ONE_MORE_BYTE and | 2515 | translation_table = Vstandard_translation_table_for_decode; |
| 2645 | TWO_MORE_BYTES). In that case, SRC is reset to SRC_BASE | 2516 | } |
| 2646 | before exiting. */ | ||
| 2647 | unsigned char *src_base = src; | ||
| 2648 | unsigned char c1 = *src++, c2, c3, c4; | ||
| 2649 | |||
| 2650 | switch (emacs_code_class[c1]) | ||
| 2651 | { | ||
| 2652 | case EMACS_ascii_code: | ||
| 2653 | ENCODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2); | ||
| 2654 | break; | ||
| 2655 | 2517 | ||
| 2656 | case EMACS_control_code: | 2518 | while (1) |
| 2657 | *dst++ = c1; | 2519 | { |
| 2658 | coding->consumed_char++; | 2520 | int c, charset, c1, c2; |
| 2659 | break; | ||
| 2660 | 2521 | ||
| 2661 | case EMACS_carriage_return_code: | 2522 | src_base = src; |
| 2662 | if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)) | 2523 | ONE_MORE_CHAR (c); |
| 2524 | |||
| 2525 | /* Now encode the character C. */ | ||
| 2526 | if (SINGLE_BYTE_CHAR_P (c)) | ||
| 2527 | { | ||
| 2528 | switch (c) | ||
| 2663 | { | 2529 | { |
| 2664 | *dst++ = c1; | 2530 | case '\r': |
| 2665 | coding->consumed_char++; | 2531 | if (!coding->mode & CODING_MODE_SELECTIVE_DISPLAY) |
| 2666 | break; | 2532 | { |
| 2533 | EMIT_ONE_BYTE (c); | ||
| 2534 | break; | ||
| 2535 | } | ||
| 2536 | c = '\n'; | ||
| 2537 | case '\n': | ||
| 2538 | if (coding->eol_type == CODING_EOL_CRLF) | ||
| 2539 | { | ||
| 2540 | EMIT_TWO_BYTES ('\r', c); | ||
| 2541 | break; | ||
| 2542 | } | ||
| 2543 | else if (coding->eol_type == CODING_EOL_CR) | ||
| 2544 | c = '\r'; | ||
| 2545 | default: | ||
| 2546 | EMIT_ONE_BYTE (c); | ||
| 2547 | } | ||
| 2548 | } | ||
| 2549 | else | ||
| 2550 | { | ||
| 2551 | SPLIT_CHAR (c, charset, c1, c2); | ||
| 2552 | if (sjis_p) | ||
| 2553 | { | ||
| 2554 | if (charset == charset_jisx0208 | ||
| 2555 | || charset == charset_jisx0208_1978) | ||
| 2556 | { | ||
| 2557 | ENCODE_SJIS (c1, c2, c1, c2); | ||
| 2558 | EMIT_TWO_BYTES (c1, c2); | ||
| 2559 | } | ||
| 2560 | else if (charset == charset_latin_jisx0201) | ||
| 2561 | EMIT_ONE_BYTE (c1); | ||
| 2562 | else | ||
| 2563 | /* There's no way other than producing the internal | ||
| 2564 | codes as is. */ | ||
| 2565 | EMIT_BYTES (src_base, src); | ||
| 2667 | } | 2566 | } |
| 2668 | /* fall down to treat '\r' as '\n' ... */ | ||
| 2669 | |||
| 2670 | case EMACS_linefeed_code: | ||
| 2671 | if (coding->eol_type == CODING_EOL_LF | ||
| 2672 | || coding->eol_type == CODING_EOL_UNDECIDED) | ||
| 2673 | *dst++ = '\n'; | ||
| 2674 | else if (coding->eol_type == CODING_EOL_CRLF) | ||
| 2675 | *dst++ = '\r', *dst++ = '\n'; | ||
| 2676 | else | 2567 | else |
| 2677 | *dst++ = '\r'; | 2568 | { |
| 2678 | coding->consumed_char++; | 2569 | if (charset == charset_big5_1 || charset == charset_big5_2) |
| 2679 | break; | 2570 | { |
| 2680 | 2571 | ENCODE_BIG5 (charset, c1, c2, c1, c2); | |
| 2681 | case EMACS_leading_code_2: | 2572 | EMIT_TWO_BYTES (c1, c2); |
| 2682 | ONE_MORE_BYTE (c2); | 2573 | } |
| 2683 | ENCODE_SJIS_BIG5_CHARACTER (c1, c2, /* dummy */ c3); | 2574 | else |
| 2684 | break; | 2575 | /* There's no way other than producing the internal |
| 2685 | 2576 | codes as is. */ | |
| 2686 | case EMACS_leading_code_3: | 2577 | EMIT_BYTES (src_base, src); |
| 2687 | TWO_MORE_BYTES (c2, c3); | 2578 | } |
| 2688 | ENCODE_SJIS_BIG5_CHARACTER (c1, c2, c3); | ||
| 2689 | break; | ||
| 2690 | |||
| 2691 | case EMACS_leading_code_4: | ||
| 2692 | THREE_MORE_BYTES (c2, c3, c4); | ||
| 2693 | ENCODE_SJIS_BIG5_CHARACTER (c2, c3, c4); | ||
| 2694 | break; | ||
| 2695 | |||
| 2696 | default: /* i.e. case EMACS_invalid_code: */ | ||
| 2697 | *dst++ = c1; | ||
| 2698 | coding->consumed_char++; | ||
| 2699 | } | 2579 | } |
| 2700 | continue; | 2580 | coding->consumed_char++; |
| 2701 | |||
| 2702 | label_end_of_loop: | ||
| 2703 | result = CODING_FINISH_INSUFFICIENT_SRC; | ||
| 2704 | src = src_base; | ||
| 2705 | break; | ||
| 2706 | } | 2581 | } |
| 2707 | 2582 | ||
| 2708 | if (result == CODING_FINISH_NORMAL | 2583 | label_end_of_loop: |
| 2709 | && src < src_end) | 2584 | coding->consumed = src_base - source; |
| 2710 | result = CODING_FINISH_INSUFFICIENT_DST; | ||
| 2711 | coding->consumed = src - source; | ||
| 2712 | coding->produced = coding->produced_char = dst - destination; | 2585 | coding->produced = coding->produced_char = dst - destination; |
| 2713 | return result; | ||
| 2714 | } | 2586 | } |
| 2715 | 2587 | ||
| 2716 | 2588 | ||
| @@ -2726,179 +2598,124 @@ detect_coding_ccl (src, src_end) | |||
| 2726 | unsigned char *src, *src_end; | 2598 | unsigned char *src, *src_end; |
| 2727 | { | 2599 | { |
| 2728 | unsigned char *valid; | 2600 | unsigned char *valid; |
| 2601 | int c; | ||
| 2602 | /* Dummy for ONE_MORE_BYTE. */ | ||
| 2603 | struct coding_system dummy_coding; | ||
| 2604 | struct coding_system *coding = &dummy_coding; | ||
| 2729 | 2605 | ||
| 2730 | /* No coding system is assigned to coding-category-ccl. */ | 2606 | /* No coding system is assigned to coding-category-ccl. */ |
| 2731 | if (!coding_system_table[CODING_CATEGORY_IDX_CCL]) | 2607 | if (!coding_system_table[CODING_CATEGORY_IDX_CCL]) |
| 2732 | return 0; | 2608 | return 0; |
| 2733 | 2609 | ||
| 2734 | valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes; | 2610 | valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes; |
| 2735 | while (src < src_end) | 2611 | while (1) |
| 2736 | { | 2612 | { |
| 2737 | if (! valid[*src]) return 0; | 2613 | ONE_MORE_BYTE (c); |
| 2738 | src++; | 2614 | if (! valid[c]) |
| 2615 | return 0; | ||
| 2739 | } | 2616 | } |
| 2617 | label_end_of_loop: | ||
| 2740 | return CODING_CATEGORY_MASK_CCL; | 2618 | return CODING_CATEGORY_MASK_CCL; |
| 2741 | } | 2619 | } |
| 2742 | 2620 | ||
| 2743 | 2621 | ||
| 2744 | /*** 6. End-of-line handlers ***/ | 2622 | /*** 6. End-of-line handlers ***/ |
| 2745 | 2623 | ||
| 2746 | /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". | 2624 | /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */ |
| 2747 | This function is called only when `coding->eol_type' is | ||
| 2748 | CODING_EOL_CRLF or CODING_EOL_CR. */ | ||
| 2749 | 2625 | ||
| 2750 | int | 2626 | static void |
| 2751 | decode_eol (coding, source, destination, src_bytes, dst_bytes) | 2627 | decode_eol (coding, source, destination, src_bytes, dst_bytes) |
| 2752 | struct coding_system *coding; | 2628 | struct coding_system *coding; |
| 2753 | unsigned char *source, *destination; | 2629 | unsigned char *source, *destination; |
| 2754 | int src_bytes, dst_bytes; | 2630 | int src_bytes, dst_bytes; |
| 2755 | { | 2631 | { |
| 2756 | unsigned char *src = source; | 2632 | unsigned char *src = source; |
| 2757 | unsigned char *src_end = source + src_bytes; | ||
| 2758 | unsigned char *dst = destination; | 2633 | unsigned char *dst = destination; |
| 2759 | unsigned char *dst_end = destination + dst_bytes; | 2634 | unsigned char *src_end = src + src_bytes; |
| 2760 | unsigned char c; | 2635 | unsigned char *dst_end = dst + dst_bytes; |
| 2761 | int result = CODING_FINISH_NORMAL; | 2636 | Lisp_Object translation_table; |
| 2762 | 2637 | /* SRC_BASE remembers the start position in source in each loop. | |
| 2763 | coding->fake_multibyte = 0; | 2638 | The loop will be exited when there's not enough source code |
| 2764 | 2639 | (within macro ONE_MORE_BYTE), or when there's not enough | |
| 2765 | if (src_bytes <= 0) | 2640 | destination area to produce a character (within macro |
| 2766 | { | 2641 | EMIT_CHAR). */ |
| 2767 | coding->produced = coding->produced_char = 0; | 2642 | unsigned char *src_base; |
| 2768 | coding->consumed = coding->consumed_char = 0; | 2643 | int c; |
| 2769 | return result; | 2644 | |
| 2770 | } | 2645 | translation_table = Qnil; |
| 2771 | |||
| 2772 | switch (coding->eol_type) | 2646 | switch (coding->eol_type) |
| 2773 | { | 2647 | { |
| 2774 | case CODING_EOL_CRLF: | 2648 | case CODING_EOL_CRLF: |
| 2775 | { | 2649 | while (1) |
| 2776 | /* Since the maximum bytes produced by each loop is 2, we | ||
| 2777 | subtract 1 from DST_END to assure overflow checking is | ||
| 2778 | necessary only at the head of loop. */ | ||
| 2779 | unsigned char *adjusted_dst_end = dst_end - 1; | ||
| 2780 | |||
| 2781 | while (src < src_end && (dst_bytes | ||
| 2782 | ? (dst < adjusted_dst_end) | ||
| 2783 | : (dst < src - 1))) | ||
| 2784 | { | ||
| 2785 | unsigned char *src_base = src; | ||
| 2786 | |||
| 2787 | c = *src++; | ||
| 2788 | if (c == '\r') | ||
| 2789 | { | ||
| 2790 | ONE_MORE_BYTE (c); | ||
| 2791 | if (c == '\n') | ||
| 2792 | *dst++ = c; | ||
| 2793 | else | ||
| 2794 | { | ||
| 2795 | if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL) | ||
| 2796 | { | ||
| 2797 | result = CODING_FINISH_INCONSISTENT_EOL; | ||
| 2798 | goto label_end_of_loop_2; | ||
| 2799 | } | ||
| 2800 | src--; | ||
| 2801 | *dst++ = '\r'; | ||
| 2802 | if (BASE_LEADING_CODE_P (c)) | ||
| 2803 | coding->fake_multibyte = 1; | ||
| 2804 | } | ||
| 2805 | } | ||
| 2806 | else if (c == '\n' | ||
| 2807 | && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)) | ||
| 2808 | { | ||
| 2809 | result = CODING_FINISH_INCONSISTENT_EOL; | ||
| 2810 | goto label_end_of_loop_2; | ||
| 2811 | } | ||
| 2812 | else | ||
| 2813 | { | ||
| 2814 | *dst++ = c; | ||
| 2815 | if (BASE_LEADING_CODE_P (c)) | ||
| 2816 | coding->fake_multibyte = 1; | ||
| 2817 | } | ||
| 2818 | continue; | ||
| 2819 | |||
| 2820 | label_end_of_loop: | ||
| 2821 | result = CODING_FINISH_INSUFFICIENT_SRC; | ||
| 2822 | label_end_of_loop_2: | ||
| 2823 | src = src_base; | ||
| 2824 | break; | ||
| 2825 | } | ||
| 2826 | if (src < src_end) | ||
| 2827 | { | ||
| 2828 | if (result == CODING_FINISH_NORMAL) | ||
| 2829 | result = CODING_FINISH_INSUFFICIENT_DST; | ||
| 2830 | else if (result != CODING_FINISH_INCONSISTENT_EOL | ||
| 2831 | && coding->mode & CODING_MODE_LAST_BLOCK) | ||
| 2832 | { | ||
| 2833 | /* This is the last block of the text to be decoded. | ||
| 2834 | We flush out all remaining codes. */ | ||
| 2835 | src_bytes = src_end - src; | ||
| 2836 | if (dst_bytes && (dst_end - dst < src_bytes)) | ||
| 2837 | src_bytes = dst_end - dst; | ||
| 2838 | bcopy (src, dst, src_bytes); | ||
| 2839 | dst += src_bytes; | ||
| 2840 | src += src_bytes; | ||
| 2841 | } | ||
| 2842 | } | ||
| 2843 | } | ||
| 2844 | break; | ||
| 2845 | |||
| 2846 | case CODING_EOL_CR: | ||
| 2847 | if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL) | ||
| 2848 | { | 2650 | { |
| 2849 | while (src < src_end) | 2651 | src_base = src; |
| 2652 | ONE_MORE_BYTE (c); | ||
| 2653 | if (c == '\r') | ||
| 2850 | { | 2654 | { |
| 2851 | if ((c = *src++) == '\n') | 2655 | ONE_MORE_BYTE (c); |
| 2852 | break; | 2656 | if (c != '\n') |
| 2853 | if (BASE_LEADING_CODE_P (c)) | 2657 | { |
| 2854 | coding->fake_multibyte = 1; | 2658 | if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL) |
| 2659 | { | ||
| 2660 | coding->result = CODING_FINISH_INCONSISTENT_EOL; | ||
| 2661 | goto label_end_of_loop; | ||
| 2662 | } | ||
| 2663 | src--; | ||
| 2664 | c = '\r'; | ||
| 2665 | } | ||
| 2855 | } | 2666 | } |
| 2856 | if (*--src == '\n') | 2667 | else if (c == '\n' |
| 2668 | && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)) | ||
| 2857 | { | 2669 | { |
| 2858 | src_bytes = src - source; | 2670 | coding->result = CODING_FINISH_INCONSISTENT_EOL; |
| 2859 | result = CODING_FINISH_INCONSISTENT_EOL; | 2671 | goto label_end_of_loop; |
| 2860 | } | 2672 | } |
| 2673 | EMIT_CHAR (c); | ||
| 2861 | } | 2674 | } |
| 2862 | if (dst_bytes && src_bytes > dst_bytes) | 2675 | break; |
| 2676 | |||
| 2677 | case CODING_EOL_CR: | ||
| 2678 | while (1) | ||
| 2863 | { | 2679 | { |
| 2864 | result = CODING_FINISH_INSUFFICIENT_DST; | 2680 | src_base = src; |
| 2865 | src_bytes = dst_bytes; | 2681 | ONE_MORE_BYTE (c); |
| 2682 | if (c == '\n') | ||
| 2683 | { | ||
| 2684 | if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL) | ||
| 2685 | { | ||
| 2686 | coding->result = CODING_FINISH_INCONSISTENT_EOL; | ||
| 2687 | goto label_end_of_loop; | ||
| 2688 | } | ||
| 2689 | } | ||
| 2690 | else if (c == '\r') | ||
| 2691 | c = '\n'; | ||
| 2692 | EMIT_CHAR (c); | ||
| 2866 | } | 2693 | } |
| 2867 | if (dst_bytes) | ||
| 2868 | bcopy (source, destination, src_bytes); | ||
| 2869 | else | ||
| 2870 | safe_bcopy (source, destination, src_bytes); | ||
| 2871 | src = source + src_bytes; | ||
| 2872 | while (src_bytes--) if (*dst++ == '\r') dst[-1] = '\n'; | ||
| 2873 | break; | 2694 | break; |
| 2874 | 2695 | ||
| 2875 | default: /* i.e. case: CODING_EOL_LF */ | 2696 | default: /* no need for EOL handling */ |
| 2876 | if (dst_bytes && src_bytes > dst_bytes) | 2697 | while (1) |
| 2877 | { | 2698 | { |
| 2878 | result = CODING_FINISH_INSUFFICIENT_DST; | 2699 | src_base = src; |
| 2879 | src_bytes = dst_bytes; | 2700 | ONE_MORE_BYTE (c); |
| 2701 | EMIT_CHAR (c); | ||
| 2880 | } | 2702 | } |
| 2881 | if (dst_bytes) | ||
| 2882 | bcopy (source, destination, src_bytes); | ||
| 2883 | else | ||
| 2884 | safe_bcopy (source, destination, src_bytes); | ||
| 2885 | src += src_bytes; | ||
| 2886 | dst += src_bytes; | ||
| 2887 | coding->fake_multibyte = 1; | ||
| 2888 | break; | ||
| 2889 | } | 2703 | } |
| 2890 | 2704 | ||
| 2891 | coding->consumed = coding->consumed_char = src - source; | 2705 | label_end_of_loop: |
| 2892 | coding->produced = coding->produced_char = dst - destination; | 2706 | coding->consumed = coding->consumed_char = src_base - source; |
| 2893 | return result; | 2707 | coding->produced = dst - destination; |
| 2708 | return; | ||
| 2894 | } | 2709 | } |
| 2895 | 2710 | ||
| 2896 | /* See "GENERAL NOTES about `encode_coding_XXX ()' functions". Encode | 2711 | /* See "GENERAL NOTES about `encode_coding_XXX ()' functions". Encode |
| 2897 | format of end-of-line according to `coding->eol_type'. If | 2712 | format of end-of-line according to `coding->eol_type'. It also |
| 2898 | `coding->mode & CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code | 2713 | convert multibyte form 8-bit characers to unibyte if |
| 2899 | '\r' in source text also means end-of-line. */ | 2714 | CODING->src_multibyte is nonzero. If `coding->mode & |
| 2715 | CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code '\r' in source text | ||
| 2716 | also means end-of-line. */ | ||
| 2900 | 2717 | ||
| 2901 | int | 2718 | static void |
| 2902 | encode_eol (coding, source, destination, src_bytes, dst_bytes) | 2719 | encode_eol (coding, source, destination, src_bytes, dst_bytes) |
| 2903 | struct coding_system *coding; | 2720 | struct coding_system *coding; |
| 2904 | unsigned char *source, *destination; | 2721 | unsigned char *source, *destination; |
| @@ -2906,78 +2723,76 @@ encode_eol (coding, source, destination, src_bytes, dst_bytes) | |||
| 2906 | { | 2723 | { |
| 2907 | unsigned char *src = source; | 2724 | unsigned char *src = source; |
| 2908 | unsigned char *dst = destination; | 2725 | unsigned char *dst = destination; |
| 2909 | int result = CODING_FINISH_NORMAL; | 2726 | unsigned char *src_end = src + src_bytes; |
| 2910 | 2727 | unsigned char *dst_end = dst + dst_bytes; | |
| 2911 | coding->fake_multibyte = 0; | 2728 | Lisp_Object translation_table; |
| 2729 | /* SRC_BASE remembers the start position in source in each loop. | ||
| 2730 | The loop will be exited when there's not enough source text to | ||
| 2731 | analyze multi-byte codes (within macro ONE_MORE_CHAR), or when | ||
| 2732 | there's not enough destination area to produce encoded codes | ||
| 2733 | (within macro EMIT_BYTES). */ | ||
| 2734 | unsigned char *src_base; | ||
| 2735 | int c; | ||
| 2736 | int selective_display = coding->mode & CODING_MODE_SELECTIVE_DISPLAY; | ||
| 2737 | |||
| 2738 | translation_table = Qnil; | ||
| 2739 | if (coding->src_multibyte | ||
| 2740 | && *(src_end - 1) == LEADING_CODE_8_BIT_CONTROL) | ||
| 2741 | { | ||
| 2742 | src_end--; | ||
| 2743 | src_bytes--; | ||
| 2744 | coding->result = CODING_FINISH_INSUFFICIENT_SRC; | ||
| 2745 | } | ||
| 2912 | 2746 | ||
| 2913 | if (coding->eol_type == CODING_EOL_CRLF) | 2747 | if (coding->eol_type == CODING_EOL_CRLF) |
| 2914 | { | 2748 | { |
| 2915 | unsigned char c; | 2749 | while (src < src_end) |
| 2916 | unsigned char *src_end = source + src_bytes; | ||
| 2917 | unsigned char *dst_end = destination + dst_bytes; | ||
| 2918 | /* Since the maximum bytes produced by each loop is 2, we | ||
| 2919 | subtract 1 from DST_END to assure overflow checking is | ||
| 2920 | necessary only at the head of loop. */ | ||
| 2921 | unsigned char *adjusted_dst_end = dst_end - 1; | ||
| 2922 | |||
| 2923 | while (src < src_end && (dst_bytes | ||
| 2924 | ? (dst < adjusted_dst_end) | ||
| 2925 | : (dst < src - 1))) | ||
| 2926 | { | 2750 | { |
| 2751 | src_base = src; | ||
| 2927 | c = *src++; | 2752 | c = *src++; |
| 2928 | if (c == '\n' | 2753 | if (c >= 0x20) |
| 2929 | || (c == '\r' && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))) | 2754 | EMIT_ONE_BYTE (c); |
| 2930 | *dst++ = '\r', *dst++ = '\n'; | 2755 | else if (c == '\n' || (c == '\r' && selective_display)) |
| 2756 | EMIT_TWO_BYTES ('\r', '\n'); | ||
| 2931 | else | 2757 | else |
| 2932 | { | 2758 | EMIT_ONE_BYTE (c); |
| 2933 | *dst++ = c; | ||
| 2934 | if (BASE_LEADING_CODE_P (c)) | ||
| 2935 | coding->fake_multibyte = 1; | ||
| 2936 | } | ||
| 2937 | } | 2759 | } |
| 2938 | if (src < src_end) | 2760 | label_end_of_loop: |
| 2939 | result = CODING_FINISH_INSUFFICIENT_DST; | ||
| 2940 | } | 2761 | } |
| 2941 | else | 2762 | else |
| 2942 | { | 2763 | { |
| 2943 | unsigned char c; | 2764 | if (src_bytes <= dst_bytes) |
| 2944 | |||
| 2945 | if (dst_bytes && src_bytes > dst_bytes) | ||
| 2946 | { | 2765 | { |
| 2947 | src_bytes = dst_bytes; | 2766 | safe_bcopy (src, dst, src_bytes); |
| 2948 | result = CODING_FINISH_INSUFFICIENT_DST; | 2767 | src_base = src_end; |
| 2768 | dst += src_bytes; | ||
| 2949 | } | 2769 | } |
| 2950 | if (dst_bytes) | ||
| 2951 | bcopy (source, destination, src_bytes); | ||
| 2952 | else | 2770 | else |
| 2953 | safe_bcopy (source, destination, src_bytes); | 2771 | { |
| 2954 | dst_bytes = src_bytes; | 2772 | if (coding->src_multibyte |
| 2773 | && *(src + dst_bytes - 1) == LEADING_CODE_8_BIT_CONTROL) | ||
| 2774 | dst_bytes--; | ||
| 2775 | safe_bcopy (src, dst, dst_bytes); | ||
| 2776 | src_base = src + dst_bytes; | ||
| 2777 | dst = destination + dst_bytes; | ||
| 2778 | coding->result = CODING_FINISH_INSUFFICIENT_DST; | ||
| 2779 | } | ||
| 2955 | if (coding->eol_type == CODING_EOL_CR) | 2780 | if (coding->eol_type == CODING_EOL_CR) |
| 2956 | { | 2781 | { |
| 2957 | while (src_bytes--) | 2782 | for (src = destination; src < dst; src++) |
| 2958 | { | 2783 | if (*src == '\n') *src = '\r'; |
| 2959 | if ((c = *dst++) == '\n') | ||
| 2960 | dst[-1] = '\r'; | ||
| 2961 | else if (BASE_LEADING_CODE_P (c)) | ||
| 2962 | coding->fake_multibyte = 1; | ||
| 2963 | } | ||
| 2964 | } | 2784 | } |
| 2965 | else | 2785 | else if (selective_display) |
| 2966 | { | 2786 | { |
| 2967 | if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY) | 2787 | for (src = destination; src < dst; src++) |
| 2968 | { | 2788 | if (*src == '\r') *src = '\n'; |
| 2969 | while (src_bytes--) | ||
| 2970 | if (*dst++ == '\r') dst[-1] = '\n'; | ||
| 2971 | } | ||
| 2972 | coding->fake_multibyte = 1; | ||
| 2973 | } | 2789 | } |
| 2974 | src = source + dst_bytes; | ||
| 2975 | dst = destination + dst_bytes; | ||
| 2976 | } | 2790 | } |
| 2791 | if (coding->src_multibyte) | ||
| 2792 | dst = destination + str_as_unibyte (destination, dst - destination); | ||
| 2977 | 2793 | ||
| 2978 | coding->consumed = coding->consumed_char = src - source; | 2794 | coding->consumed = src_base - source; |
| 2979 | coding->produced = coding->produced_char = dst - destination; | 2795 | coding->produced = dst - destination; |
| 2980 | return result; | ||
| 2981 | } | 2796 | } |
| 2982 | 2797 | ||
| 2983 | 2798 | ||
| @@ -3786,9 +3601,17 @@ detect_coding (coding, src, src_bytes) | |||
| 3786 | if (VECTORP (tmp)) | 3601 | if (VECTORP (tmp)) |
| 3787 | val = XVECTOR (tmp)->contents[coding->eol_type]; | 3602 | val = XVECTOR (tmp)->contents[coding->eol_type]; |
| 3788 | } | 3603 | } |
| 3789 | setup_coding_system (val, coding); | 3604 | |
| 3790 | /* Set this again because setup_coding_system reset this member. */ | 3605 | /* Setup this new coding system while preserving some slots. */ |
| 3791 | coding->heading_ascii = skip; | 3606 | { |
| 3607 | int src_multibyte = coding->src_multibyte; | ||
| 3608 | int dst_multibyte = coding->dst_multibyte; | ||
| 3609 | |||
| 3610 | setup_coding_system (val, coding); | ||
| 3611 | coding->src_multibyte = src_multibyte; | ||
| 3612 | coding->dst_multibyte = dst_multibyte; | ||
| 3613 | coding->heading_ascii = skip; | ||
| 3614 | } | ||
| 3792 | } | 3615 | } |
| 3793 | 3616 | ||
| 3794 | /* Detect how end-of-line of a text of length SRC_BYTES pointed by | 3617 | /* Detect how end-of-line of a text of length SRC_BYTES pointed by |
| @@ -3969,23 +3792,24 @@ detect_eol (coding, src, src_bytes) | |||
| 3969 | val = Fget (coding->symbol, Qeol_type); | 3792 | val = Fget (coding->symbol, Qeol_type); |
| 3970 | if (VECTORP (val) && XVECTOR (val)->size == 3) | 3793 | if (VECTORP (val) && XVECTOR (val)->size == 3) |
| 3971 | { | 3794 | { |
| 3795 | int src_multibyte = coding->src_multibyte; | ||
| 3796 | int dst_multibyte = coding->dst_multibyte; | ||
| 3797 | |||
| 3972 | setup_coding_system (XVECTOR (val)->contents[eol_type], coding); | 3798 | setup_coding_system (XVECTOR (val)->contents[eol_type], coding); |
| 3799 | coding->src_multibyte = src_multibyte; | ||
| 3800 | coding->dst_multibyte = dst_multibyte; | ||
| 3973 | coding->heading_ascii = skip; | 3801 | coding->heading_ascii = skip; |
| 3974 | } | 3802 | } |
| 3975 | } | 3803 | } |
| 3976 | 3804 | ||
| 3977 | #define CONVERSION_BUFFER_EXTRA_ROOM 256 | 3805 | #define CONVERSION_BUFFER_EXTRA_ROOM 256 |
| 3978 | 3806 | ||
| 3979 | #define DECODING_BUFFER_MAG(coding) \ | 3807 | #define DECODING_BUFFER_MAG(coding) \ |
| 3980 | (coding->type == coding_type_iso2022 \ | 3808 | (coding->type == coding_type_iso2022 \ |
| 3981 | ? 3 \ | 3809 | ? 3 \ |
| 3982 | : ((coding->type == coding_type_sjis || coding->type == coding_type_big5) \ | 3810 | : (coding->type == coding_type_ccl \ |
| 3983 | ? 2 \ | 3811 | ? coding->spec.ccl.decoder.buf_magnification \ |
| 3984 | : (coding->type == coding_type_raw_text \ | 3812 | : 2)) |
| 3985 | ? 1 \ | ||
| 3986 | : (coding->type == coding_type_ccl \ | ||
| 3987 | ? coding->spec.ccl.decoder.buf_magnification \ | ||
| 3988 | : 2)))) | ||
| 3989 | 3813 | ||
| 3990 | /* Return maximum size (bytes) of a buffer enough for decoding | 3814 | /* Return maximum size (bytes) of a buffer enough for decoding |
| 3991 | SRC_BYTES of text encoded in CODING. */ | 3815 | SRC_BYTES of text encoded in CODING. */ |
| @@ -4011,8 +3835,10 @@ encoding_buffer_size (coding, src_bytes) | |||
| 4011 | 3835 | ||
| 4012 | if (coding->type == coding_type_ccl) | 3836 | if (coding->type == coding_type_ccl) |
| 4013 | magnification = coding->spec.ccl.encoder.buf_magnification; | 3837 | magnification = coding->spec.ccl.encoder.buf_magnification; |
| 4014 | else | 3838 | else if (CODING_REQUIRE_ENCODING (coding)) |
| 4015 | magnification = 3; | 3839 | magnification = 3; |
| 3840 | else | ||
| 3841 | magnification = 1; | ||
| 4016 | 3842 | ||
| 4017 | return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM); | 3843 | return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM); |
| 4018 | } | 3844 | } |
| @@ -4060,12 +3886,16 @@ ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep) | |||
| 4060 | 3886 | ||
| 4061 | coding->produced = ccl_driver (ccl, source, destination, | 3887 | coding->produced = ccl_driver (ccl, source, destination, |
| 4062 | src_bytes, dst_bytes, &(coding->consumed)); | 3888 | src_bytes, dst_bytes, &(coding->consumed)); |
| 4063 | coding->produced_char | 3889 | if (encodep) |
| 4064 | = (encodep | 3890 | coding->produced_char = coding->produced; |
| 4065 | ? coding->produced | 3891 | else |
| 4066 | : multibyte_chars_in_text (destination, coding->produced)); | 3892 | { |
| 4067 | coding->consumed_char | 3893 | int bytes |
| 4068 | = multibyte_chars_in_text (source, coding->consumed); | 3894 | = dst_bytes ? dst_bytes : source + coding->consumed - destination; |
| 3895 | coding->produced = str_as_multibyte (destination, bytes, | ||
| 3896 | coding->produced, | ||
| 3897 | &(coding->produced_char)); | ||
| 3898 | } | ||
| 4069 | 3899 | ||
| 4070 | switch (ccl->status) | 3900 | switch (ccl->status) |
| 4071 | { | 3901 | { |
| @@ -4088,17 +3918,9 @@ ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep) | |||
| 4088 | 3918 | ||
| 4089 | /* See "GENERAL NOTES about `decode_coding_XXX ()' functions". Before | 3919 | /* See "GENERAL NOTES about `decode_coding_XXX ()' functions". Before |
| 4090 | decoding, it may detect coding system and format of end-of-line if | 3920 | decoding, it may detect coding system and format of end-of-line if |
| 4091 | those are not yet decided. | 3921 | those are not yet decided. The source should be unibyte, the |
| 4092 | 3922 | result is multibyte if CODING->dst_multibyte is nonzero, else | |
| 4093 | This function does not make full use of DESTINATION buffer. For | 3923 | unibyte. */ |
| 4094 | instance, if coding->type is coding_type_iso2022, it uses only | ||
| 4095 | (DST_BYTES - 7) bytes of DESTINATION buffer. In the case that | ||
| 4096 | DST_BYTES is decided by the function decoding_buffer_size, it | ||
| 4097 | contains extra 256 bytes (defined by CONVERSION_BUFFER_EXTRA_ROOM). | ||
| 4098 | So, this function can decode the full SOURCE. But, in the other | ||
| 4099 | case, if you want to avoid carry over, you must supply at least 7 | ||
| 4100 | bytes more area in DESTINATION buffer than expected maximum bytes | ||
| 4101 | that will be produced by this function. */ | ||
| 4102 | 3924 | ||
| 4103 | int | 3925 | int |
| 4104 | decode_coding (coding, source, destination, src_bytes, dst_bytes) | 3926 | decode_coding (coding, source, destination, src_bytes, dst_bytes) |
| @@ -4106,92 +3928,84 @@ decode_coding (coding, source, destination, src_bytes, dst_bytes) | |||
| 4106 | unsigned char *source, *destination; | 3928 | unsigned char *source, *destination; |
| 4107 | int src_bytes, dst_bytes; | 3929 | int src_bytes, dst_bytes; |
| 4108 | { | 3930 | { |
| 4109 | int result; | ||
| 4110 | |||
| 4111 | if (src_bytes <= 0 | ||
| 4112 | && coding->type != coding_type_ccl | ||
| 4113 | && ! (coding->mode & CODING_MODE_LAST_BLOCK | ||
| 4114 | && CODING_REQUIRE_FLUSHING (coding))) | ||
| 4115 | { | ||
| 4116 | coding->produced = coding->produced_char = 0; | ||
| 4117 | coding->consumed = coding->consumed_char = 0; | ||
| 4118 | coding->fake_multibyte = 0; | ||
| 4119 | return CODING_FINISH_NORMAL; | ||
| 4120 | } | ||
| 4121 | |||
| 4122 | if (coding->type == coding_type_undecided) | 3931 | if (coding->type == coding_type_undecided) |
| 4123 | detect_coding (coding, source, src_bytes); | 3932 | detect_coding (coding, source, src_bytes); |
| 4124 | 3933 | ||
| 4125 | if (coding->eol_type == CODING_EOL_UNDECIDED) | 3934 | if (coding->eol_type == CODING_EOL_UNDECIDED) |
| 4126 | detect_eol (coding, source, src_bytes); | 3935 | detect_eol (coding, source, src_bytes); |
| 4127 | 3936 | ||
| 3937 | coding->produced = coding->produced_char = 0; | ||
| 3938 | coding->consumed = coding->consumed_char = 0; | ||
| 3939 | coding->errors = 0; | ||
| 3940 | coding->result = CODING_FINISH_NORMAL; | ||
| 3941 | |||
| 4128 | switch (coding->type) | 3942 | switch (coding->type) |
| 4129 | { | 3943 | { |
| 4130 | case coding_type_emacs_mule: | ||
| 4131 | case coding_type_undecided: | ||
| 4132 | case coding_type_raw_text: | ||
| 4133 | if (coding->eol_type == CODING_EOL_LF | ||
| 4134 | || coding->eol_type == CODING_EOL_UNDECIDED) | ||
| 4135 | goto label_no_conversion; | ||
| 4136 | result = decode_eol (coding, source, destination, src_bytes, dst_bytes); | ||
| 4137 | break; | ||
| 4138 | |||
| 4139 | case coding_type_sjis: | 3944 | case coding_type_sjis: |
| 4140 | result = decode_coding_sjis_big5 (coding, source, destination, | 3945 | decode_coding_sjis_big5 (coding, source, destination, |
| 4141 | src_bytes, dst_bytes, 1); | 3946 | src_bytes, dst_bytes, 1); |
| 4142 | break; | 3947 | break; |
| 4143 | 3948 | ||
| 4144 | case coding_type_iso2022: | 3949 | case coding_type_iso2022: |
| 4145 | result = decode_coding_iso2022 (coding, source, destination, | 3950 | decode_coding_iso2022 (coding, source, destination, |
| 4146 | src_bytes, dst_bytes); | 3951 | src_bytes, dst_bytes); |
| 4147 | break; | 3952 | break; |
| 4148 | 3953 | ||
| 4149 | case coding_type_big5: | 3954 | case coding_type_big5: |
| 4150 | result = decode_coding_sjis_big5 (coding, source, destination, | 3955 | decode_coding_sjis_big5 (coding, source, destination, |
| 4151 | src_bytes, dst_bytes, 0); | 3956 | src_bytes, dst_bytes, 0); |
| 3957 | break; | ||
| 3958 | |||
| 3959 | case coding_type_emacs_mule: | ||
| 3960 | decode_coding_emacs_mule (coding, source, destination, | ||
| 3961 | src_bytes, dst_bytes); | ||
| 4152 | break; | 3962 | break; |
| 4153 | 3963 | ||
| 4154 | case coding_type_ccl: | 3964 | case coding_type_ccl: |
| 4155 | result = ccl_coding_driver (coding, source, destination, | 3965 | ccl_coding_driver (coding, source, destination, |
| 4156 | src_bytes, dst_bytes, 0); | 3966 | src_bytes, dst_bytes, 0); |
| 4157 | break; | 3967 | break; |
| 4158 | 3968 | ||
| 4159 | default: /* i.e. case coding_type_no_conversion: */ | 3969 | default: |
| 4160 | label_no_conversion: | 3970 | decode_eol (coding, source, destination, src_bytes, dst_bytes); |
| 4161 | if (dst_bytes && src_bytes > dst_bytes) | 3971 | } |
| 4162 | { | 3972 | |
| 4163 | coding->produced = dst_bytes; | 3973 | if (coding->result == CODING_FINISH_INSUFFICIENT_SRC |
| 4164 | result = CODING_FINISH_INSUFFICIENT_DST; | 3974 | && coding->consumed == src_bytes) |
| 4165 | } | 3975 | coding->result = CODING_FINISH_NORMAL; |
| 4166 | else | 3976 | |
| 3977 | if (coding->mode & CODING_MODE_LAST_BLOCK | ||
| 3978 | && coding->result == CODING_FINISH_INSUFFICIENT_SRC) | ||
| 3979 | { | ||
| 3980 | unsigned char *src = source + coding->consumed; | ||
| 3981 | unsigned char *dst = destination + coding->produced; | ||
| 3982 | |||
| 3983 | src_bytes -= coding->consumed; | ||
| 3984 | coding->errors++; | ||
| 3985 | if (COMPOSING_P (coding)) | ||
| 3986 | DECODE_COMPOSITION_END ('1'); | ||
| 3987 | while (src_bytes--) | ||
| 4167 | { | 3988 | { |
| 4168 | coding->produced = src_bytes; | 3989 | int c = *src++; |
| 4169 | result = CODING_FINISH_NORMAL; | 3990 | dst += CHAR_STRING (c, dst); |
| 3991 | coding->produced_char++; | ||
| 4170 | } | 3992 | } |
| 4171 | if (dst_bytes) | 3993 | coding->consumed = coding->consumed_char = src - source; |
| 4172 | bcopy (source, destination, coding->produced); | 3994 | coding->produced = dst - destination; |
| 4173 | else | ||
| 4174 | safe_bcopy (source, destination, coding->produced); | ||
| 4175 | coding->fake_multibyte = 1; | ||
| 4176 | coding->consumed | ||
| 4177 | = coding->consumed_char = coding->produced_char = coding->produced; | ||
| 4178 | break; | ||
| 4179 | } | 3995 | } |
| 4180 | 3996 | ||
| 4181 | return result; | 3997 | if (!coding->dst_multibyte) |
| 4182 | } | 3998 | { |
| 3999 | coding->produced = str_as_unibyte (destination, coding->produced); | ||
| 4000 | coding->produced_char = coding->produced; | ||
| 4001 | } | ||
| 4183 | 4002 | ||
| 4184 | /* See "GENERAL NOTES about `encode_coding_XXX ()' functions". | 4003 | return coding->result; |
| 4004 | } | ||
| 4185 | 4005 | ||
| 4186 | This function does not make full use of DESTINATION buffer. For | 4006 | /* See "GENERAL NOTES about `encode_coding_XXX ()' functions". The |
| 4187 | instance, if coding->type is coding_type_iso2022, it uses only | 4007 | multibyteness of the source is CODING->src_multibyte, the |
| 4188 | (DST_BYTES - 20) bytes of DESTINATION buffer. In the case that | 4008 | multibyteness of the result is always unibyte. */ |
| 4189 | DST_BYTES is decided by the function encoding_buffer_size, it | ||
| 4190 | contains extra 256 bytes (defined by CONVERSION_BUFFER_EXTRA_ROOM). | ||
| 4191 | So, this function can encode the full SOURCE. But, in the other | ||
| 4192 | case, if you want to avoid carry over, you must supply at least 20 | ||
| 4193 | bytes more area in DESTINATION buffer than expected maximum bytes | ||
| 4194 | that will be produced by this function. */ | ||
| 4195 | 4009 | ||
| 4196 | int | 4010 | int |
| 4197 | encode_coding (coding, source, destination, src_bytes, dst_bytes) | 4011 | encode_coding (coding, source, destination, src_bytes, dst_bytes) |
| @@ -4199,85 +4013,78 @@ encode_coding (coding, source, destination, src_bytes, dst_bytes) | |||
| 4199 | unsigned char *source, *destination; | 4013 | unsigned char *source, *destination; |
| 4200 | int src_bytes, dst_bytes; | 4014 | int src_bytes, dst_bytes; |
| 4201 | { | 4015 | { |
| 4202 | int result; | 4016 | coding->produced = coding->produced_char = 0; |
| 4203 | 4017 | coding->consumed = coding->consumed_char = 0; | |
| 4204 | if (src_bytes <= 0 | 4018 | coding->errors = 0; |
| 4205 | && ! (coding->mode & CODING_MODE_LAST_BLOCK | 4019 | coding->result = CODING_FINISH_NORMAL; |
| 4206 | && CODING_REQUIRE_FLUSHING (coding))) | ||
| 4207 | { | ||
| 4208 | coding->produced = coding->produced_char = 0; | ||
| 4209 | coding->consumed = coding->consumed_char = 0; | ||
| 4210 | coding->fake_multibyte = 0; | ||
| 4211 | return CODING_FINISH_NORMAL; | ||
| 4212 | } | ||
| 4213 | 4020 | ||
| 4214 | switch (coding->type) | 4021 | switch (coding->type) |
| 4215 | { | 4022 | { |
| 4216 | case coding_type_emacs_mule: | ||
| 4217 | case coding_type_undecided: | ||
| 4218 | case coding_type_raw_text: | ||
| 4219 | if (coding->eol_type == CODING_EOL_LF | ||
| 4220 | || coding->eol_type == CODING_EOL_UNDECIDED) | ||
| 4221 | goto label_no_conversion; | ||
| 4222 | result = encode_eol (coding, source, destination, src_bytes, dst_bytes); | ||
| 4223 | break; | ||
| 4224 | |||
| 4225 | case coding_type_sjis: | 4023 | case coding_type_sjis: |
| 4226 | result = encode_coding_sjis_big5 (coding, source, destination, | 4024 | encode_coding_sjis_big5 (coding, source, destination, |
| 4227 | src_bytes, dst_bytes, 1); | 4025 | src_bytes, dst_bytes, 1); |
| 4228 | break; | 4026 | break; |
| 4229 | 4027 | ||
| 4230 | case coding_type_iso2022: | 4028 | case coding_type_iso2022: |
| 4231 | result = encode_coding_iso2022 (coding, source, destination, | 4029 | encode_coding_iso2022 (coding, source, destination, |
| 4232 | src_bytes, dst_bytes); | 4030 | src_bytes, dst_bytes); |
| 4233 | break; | 4031 | break; |
| 4234 | 4032 | ||
| 4235 | case coding_type_big5: | 4033 | case coding_type_big5: |
| 4236 | result = encode_coding_sjis_big5 (coding, source, destination, | 4034 | encode_coding_sjis_big5 (coding, source, destination, |
| 4237 | src_bytes, dst_bytes, 0); | 4035 | src_bytes, dst_bytes, 0); |
| 4036 | break; | ||
| 4037 | |||
| 4038 | case coding_type_emacs_mule: | ||
| 4039 | encode_coding_emacs_mule (coding, source, destination, | ||
| 4040 | src_bytes, dst_bytes); | ||
| 4238 | break; | 4041 | break; |
| 4239 | 4042 | ||
| 4240 | case coding_type_ccl: | 4043 | case coding_type_ccl: |
| 4241 | result = ccl_coding_driver (coding, source, destination, | 4044 | ccl_coding_driver (coding, source, destination, |
| 4242 | src_bytes, dst_bytes, 1); | 4045 | src_bytes, dst_bytes, 1); |
| 4243 | break; | 4046 | break; |
| 4244 | 4047 | ||
| 4245 | default: /* i.e. case coding_type_no_conversion: */ | 4048 | default: |
| 4246 | label_no_conversion: | 4049 | encode_eol (coding, source, destination, src_bytes, dst_bytes); |
| 4247 | if (dst_bytes && src_bytes > dst_bytes) | 4050 | } |
| 4248 | { | 4051 | |
| 4249 | coding->produced = dst_bytes; | 4052 | if (coding->result == CODING_FINISH_INSUFFICIENT_SRC |
| 4250 | result = CODING_FINISH_INSUFFICIENT_DST; | 4053 | && coding->consumed == src_bytes) |
| 4251 | } | 4054 | coding->result = CODING_FINISH_NORMAL; |
| 4252 | else | 4055 | |
| 4253 | { | 4056 | if (coding->mode & CODING_MODE_LAST_BLOCK) |
| 4254 | coding->produced = src_bytes; | 4057 | { |
| 4255 | result = CODING_FINISH_NORMAL; | 4058 | unsigned char *src = source + coding->consumed; |
| 4256 | } | 4059 | unsigned char *src_end = src + src_bytes; |
| 4257 | if (dst_bytes) | 4060 | unsigned char *dst = destination + coding->produced; |
| 4258 | bcopy (source, destination, coding->produced); | 4061 | |
| 4259 | else | 4062 | if (coding->type == coding_type_iso2022) |
| 4260 | safe_bcopy (source, destination, coding->produced); | 4063 | ENCODE_RESET_PLANE_AND_REGISTER; |
| 4261 | if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY) | 4064 | if (COMPOSING_P (coding)) |
| 4065 | *dst++ = ISO_CODE_ESC, *dst++ = '1'; | ||
| 4066 | if (coding->consumed < src_bytes) | ||
| 4262 | { | 4067 | { |
| 4263 | unsigned char *p = destination, *pend = p + coding->produced; | 4068 | int len = src_bytes - coding->consumed; |
| 4264 | while (p < pend) | 4069 | |
| 4265 | if (*p++ == '\015') p[-1] = '\n'; | 4070 | BCOPY_SHORT (source + coding->consumed, dst, len); |
| 4071 | if (coding->src_multibyte) | ||
| 4072 | len = str_as_unibyte (dst, len); | ||
| 4073 | dst += len; | ||
| 4074 | coding->consumed = src_bytes; | ||
| 4266 | } | 4075 | } |
| 4267 | coding->fake_multibyte = 1; | 4076 | coding->produced = coding->produced_char = dst - destination; |
| 4268 | coding->consumed | ||
| 4269 | = coding->consumed_char = coding->produced_char = coding->produced; | ||
| 4270 | break; | ||
| 4271 | } | 4077 | } |
| 4272 | 4078 | ||
| 4273 | return result; | 4079 | return coding->result; |
| 4274 | } | 4080 | } |
| 4275 | 4081 | ||
| 4276 | /* Scan text in the region between *BEG and *END (byte positions), | 4082 | /* Scan text in the region between *BEG and *END (byte positions), |
| 4277 | skip characters which we don't have to decode by coding system | 4083 | skip characters which we don't have to decode by coding system |
| 4278 | CODING at the head and tail, then set *BEG and *END to the region | 4084 | CODING at the head and tail, then set *BEG and *END to the region |
| 4279 | of the text we actually have to convert. The caller should move | 4085 | of the text we actually have to convert. The caller should move |
| 4280 | the gap out of the region in advance. | 4086 | the gap out of the region in advance if the region is from a |
| 4087 | buffer. | ||
| 4281 | 4088 | ||
| 4282 | If STR is not NULL, *BEG and *END are indices into STR. */ | 4089 | If STR is not NULL, *BEG and *END are indices into STR. */ |
| 4283 | 4090 | ||
| @@ -4293,12 +4100,16 @@ shrink_decoding_region (beg, end, coding, str) | |||
| 4293 | 4100 | ||
| 4294 | if (coding->type == coding_type_ccl | 4101 | if (coding->type == coding_type_ccl |
| 4295 | || coding->type == coding_type_undecided | 4102 | || coding->type == coding_type_undecided |
| 4296 | || !NILP (coding->post_read_conversion)) | 4103 | || coding->eol_type != CODING_EOL_LF |
| 4104 | || !NILP (coding->post_read_conversion) | ||
| 4105 | || coding->composing != COMPOSITION_DISABLED) | ||
| 4297 | { | 4106 | { |
| 4298 | /* We can't skip any data. */ | 4107 | /* We can't skip any data. */ |
| 4299 | return; | 4108 | return; |
| 4300 | } | 4109 | } |
| 4301 | else if (coding->type == coding_type_no_conversion) | 4110 | if (coding->type == coding_type_no_conversion |
| 4111 | || coding->type == coding_type_raw_text | ||
| 4112 | || coding->type == coding_type_emacs_mule) | ||
| 4302 | { | 4113 | { |
| 4303 | /* We need no conversion, but don't have to skip any data here. | 4114 | /* We need no conversion, but don't have to skip any data here. |
| 4304 | Decoding routine handles them effectively anyway. */ | 4115 | Decoding routine handles them effectively anyway. */ |
| @@ -4320,9 +4131,7 @@ shrink_decoding_region (beg, end, coding, str) | |||
| 4320 | return; | 4131 | return; |
| 4321 | } | 4132 | } |
| 4322 | 4133 | ||
| 4323 | eol_conversion = (coding->eol_type != CODING_EOL_LF); | 4134 | if (coding->heading_ascii >= 0) |
| 4324 | |||
| 4325 | if ((! eol_conversion) && (coding->heading_ascii >= 0)) | ||
| 4326 | /* Detection routine has already found how much we can skip at the | 4135 | /* Detection routine has already found how much we can skip at the |
| 4327 | head. */ | 4136 | head. */ |
| 4328 | *beg += coding->heading_ascii; | 4137 | *beg += coding->heading_ascii; |
| @@ -4340,23 +4149,6 @@ shrink_decoding_region (beg, end, coding, str) | |||
| 4340 | 4149 | ||
| 4341 | switch (coding->type) | 4150 | switch (coding->type) |
| 4342 | { | 4151 | { |
| 4343 | case coding_type_emacs_mule: | ||
| 4344 | case coding_type_raw_text: | ||
| 4345 | if (eol_conversion) | ||
| 4346 | { | ||
| 4347 | if (coding->heading_ascii < 0) | ||
| 4348 | while (begp < endp && *begp != '\r' && *begp < 0x80) begp++; | ||
| 4349 | while (begp < endp && endp[-1] != '\r' && endp[-1] < 0x80) | ||
| 4350 | endp--; | ||
| 4351 | /* Do not consider LF as ascii if preceded by CR, since that | ||
| 4352 | confuses eol decoding. */ | ||
| 4353 | if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n') | ||
| 4354 | endp++; | ||
| 4355 | } | ||
| 4356 | else | ||
| 4357 | begp = endp; | ||
| 4358 | break; | ||
| 4359 | |||
| 4360 | case coding_type_sjis: | 4152 | case coding_type_sjis: |
| 4361 | case coding_type_big5: | 4153 | case coding_type_big5: |
| 4362 | /* We can skip all ASCII characters at the head. */ | 4154 | /* We can skip all ASCII characters at the head. */ |
| @@ -4381,7 +4173,7 @@ shrink_decoding_region (beg, end, coding, str) | |||
| 4381 | endp++; | 4173 | endp++; |
| 4382 | break; | 4174 | break; |
| 4383 | 4175 | ||
| 4384 | default: /* i.e. case coding_type_iso2022: */ | 4176 | case coding_type_iso2022: |
| 4385 | if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII) | 4177 | if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII) |
| 4386 | /* We can't skip any data. */ | 4178 | /* We can't skip any data. */ |
| 4387 | break; | 4179 | break; |
| @@ -4452,6 +4244,10 @@ shrink_decoding_region (beg, end, coding, str) | |||
| 4452 | endp = eight_bit; | 4244 | endp = eight_bit; |
| 4453 | } | 4245 | } |
| 4454 | } | 4246 | } |
| 4247 | break; | ||
| 4248 | |||
| 4249 | default: | ||
| 4250 | abort (); | ||
| 4455 | } | 4251 | } |
| 4456 | *beg += begp - begp_orig; | 4252 | *beg += begp - begp_orig; |
| 4457 | *end += endp - endp_orig; | 4253 | *end += endp - endp_orig; |
| @@ -4470,13 +4266,21 @@ shrink_encoding_region (beg, end, coding, str) | |||
| 4470 | int eol_conversion; | 4266 | int eol_conversion; |
| 4471 | Lisp_Object translation_table; | 4267 | Lisp_Object translation_table; |
| 4472 | 4268 | ||
| 4473 | if (coding->type == coding_type_ccl) | 4269 | if (coding->type == coding_type_ccl |
| 4474 | /* We can't skip any data. */ | 4270 | || coding->eol_type == CODING_EOL_CRLF |
| 4475 | return; | 4271 | || coding->eol_type == CODING_EOL_CR |
| 4476 | else if (coding->type == coding_type_no_conversion) | 4272 | || coding->cmp_data && coding->cmp_data->used > 0) |
| 4273 | { | ||
| 4274 | /* We can't skip any data. */ | ||
| 4275 | return; | ||
| 4276 | } | ||
| 4277 | if (coding->type == coding_type_no_conversion | ||
| 4278 | || coding->type == coding_type_raw_text | ||
| 4279 | || coding->type == coding_type_emacs_mule | ||
| 4280 | || coding->type == coding_type_undecided) | ||
| 4477 | { | 4281 | { |
| 4478 | /* We need no conversion. */ | 4282 | /* We need no conversion, but don't have to skip any data here. |
| 4479 | *beg = *end; | 4283 | Encoding routine handles them effectively anyway. */ |
| 4480 | return; | 4284 | return; |
| 4481 | } | 4285 | } |
| 4482 | 4286 | ||
| @@ -4513,18 +4317,6 @@ shrink_encoding_region (beg, end, coding, str) | |||
| 4513 | the caller is expected to have handled it already. */ | 4317 | the caller is expected to have handled it already. */ |
| 4514 | switch (coding->type) | 4318 | switch (coding->type) |
| 4515 | { | 4319 | { |
| 4516 | case coding_type_undecided: | ||
| 4517 | case coding_type_emacs_mule: | ||
| 4518 | case coding_type_raw_text: | ||
| 4519 | if (eol_conversion) | ||
| 4520 | { | ||
| 4521 | while (begp < endp && *begp != '\n') begp++; | ||
| 4522 | while (begp < endp && endp[-1] != '\n') endp--; | ||
| 4523 | } | ||
| 4524 | else | ||
| 4525 | begp = endp; | ||
| 4526 | break; | ||
| 4527 | |||
| 4528 | case coding_type_iso2022: | 4320 | case coding_type_iso2022: |
| 4529 | if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII) | 4321 | if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII) |
| 4530 | /* We can't skip any data. */ | 4322 | /* We can't skip any data. */ |
| @@ -4543,7 +4335,8 @@ shrink_encoding_region (beg, end, coding, str) | |||
| 4543 | } | 4335 | } |
| 4544 | /* fall down ... */ | 4336 | /* fall down ... */ |
| 4545 | 4337 | ||
| 4546 | default: | 4338 | case coding_type_sjis: |
| 4339 | case coding_type_big5: | ||
| 4547 | /* We can skip all ASCII characters at the head and tail. */ | 4340 | /* We can skip all ASCII characters at the head and tail. */ |
| 4548 | if (eol_conversion) | 4341 | if (eol_conversion) |
| 4549 | while (begp < endp && *begp < 0x80 && *begp != '\n') begp++; | 4342 | while (begp < endp && *begp < 0x80 && *begp != '\n') begp++; |
| @@ -4555,6 +4348,9 @@ shrink_encoding_region (beg, end, coding, str) | |||
| 4555 | else | 4348 | else |
| 4556 | while (begp < endp && *(endp - 1) < 0x80) endp--; | 4349 | while (begp < endp && *(endp - 1) < 0x80) endp--; |
| 4557 | break; | 4350 | break; |
| 4351 | |||
| 4352 | default: | ||
| 4353 | abort (); | ||
| 4558 | } | 4354 | } |
| 4559 | 4355 | ||
| 4560 | *beg += begp - begp_orig; | 4356 | *beg += begp - begp_orig; |
| @@ -4719,7 +4515,10 @@ coding_restore_composition (coding, obj) | |||
| 4719 | 4515 | ||
| 4720 | If REPLACE is nonzero, we do various things as if the original text | 4516 | If REPLACE is nonzero, we do various things as if the original text |
| 4721 | is deleted and a new text is inserted. See the comments in | 4517 | is deleted and a new text is inserted. See the comments in |
| 4722 | replace_range (insdel.c) to know what we are doing. */ | 4518 | replace_range (insdel.c) to know what we are doing. |
| 4519 | |||
| 4520 | If REPLACE is zero, it is assumed that the source text is unibyte. | ||
| 4521 | Otherwize, it is assumed that the source text is multibyte. */ | ||
| 4723 | 4522 | ||
| 4724 | int | 4523 | int |
| 4725 | code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace) | 4524 | code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace) |
| @@ -4730,13 +4529,15 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace) | |||
| 4730 | int require, inserted, inserted_byte; | 4529 | int require, inserted, inserted_byte; |
| 4731 | int head_skip, tail_skip, total_skip = 0; | 4530 | int head_skip, tail_skip, total_skip = 0; |
| 4732 | Lisp_Object saved_coding_symbol; | 4531 | Lisp_Object saved_coding_symbol; |
| 4733 | int multibyte = !NILP (current_buffer->enable_multibyte_characters); | ||
| 4734 | int first = 1; | 4532 | int first = 1; |
| 4735 | int fake_multibyte = 0; | ||
| 4736 | unsigned char *src, *dst; | 4533 | unsigned char *src, *dst; |
| 4737 | Lisp_Object deletion; | 4534 | Lisp_Object deletion; |
| 4738 | int orig_point = PT, orig_len = len; | 4535 | int orig_point = PT, orig_len = len; |
| 4739 | int prev_Z; | 4536 | int prev_Z; |
| 4537 | int multibyte_p = !NILP (current_buffer->enable_multibyte_characters); | ||
| 4538 | |||
| 4539 | coding->src_multibyte = replace && multibyte_p; | ||
| 4540 | coding->dst_multibyte = multibyte_p; | ||
| 4740 | 4541 | ||
| 4741 | deletion = Qnil; | 4542 | deletion = Qnil; |
| 4742 | saved_coding_symbol = Qnil; | 4543 | saved_coding_symbol = Qnil; |
| @@ -4755,10 +4556,7 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace) | |||
| 4755 | if (saved_from != from) | 4556 | if (saved_from != from) |
| 4756 | { | 4557 | { |
| 4757 | to = from + len; | 4558 | to = from + len; |
| 4758 | if (multibyte) | 4559 | from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to); |
| 4759 | from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to); | ||
| 4760 | else | ||
| 4761 | from_byte = from, to_byte = to; | ||
| 4762 | len_byte = to_byte - from_byte; | 4560 | len_byte = to_byte - from_byte; |
| 4763 | } | 4561 | } |
| 4764 | } | 4562 | } |
| @@ -4791,40 +4589,11 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace) | |||
| 4791 | } | 4589 | } |
| 4792 | } | 4590 | } |
| 4793 | 4591 | ||
| 4794 | if (encodep | ||
| 4795 | ? ! CODING_REQUIRE_ENCODING (coding) | ||
| 4796 | : ! CODING_REQUIRE_DECODING (coding)) | ||
| 4797 | { | ||
| 4798 | coding->consumed_char = len; | ||
| 4799 | coding->consumed = len_byte; | ||
| 4800 | coding->produced = len_byte; | ||
| 4801 | if (multibyte | ||
| 4802 | && ! replace | ||
| 4803 | /* See the comment of the member heading_ascii in coding.h. */ | ||
| 4804 | && coding->heading_ascii < len_byte) | ||
| 4805 | { | ||
| 4806 | /* We still may have to combine byte at the head and the | ||
| 4807 | tail of the text in the region. */ | ||
| 4808 | if (from < GPT && GPT < to) | ||
| 4809 | move_gap_both (to, to_byte); | ||
| 4810 | len = multibyte_chars_in_text (BYTE_POS_ADDR (from_byte), len_byte); | ||
| 4811 | adjust_after_insert (from, from_byte, to, to_byte, len); | ||
| 4812 | coding->produced_char = len; | ||
| 4813 | } | ||
| 4814 | else | ||
| 4815 | { | ||
| 4816 | if (!replace) | ||
| 4817 | adjust_after_insert (from, from_byte, to, to_byte, len_byte); | ||
| 4818 | coding->produced_char = len_byte; | ||
| 4819 | } | ||
| 4820 | return 0; | ||
| 4821 | } | ||
| 4822 | |||
| 4823 | /* Now we convert the text. */ | 4592 | /* Now we convert the text. */ |
| 4824 | 4593 | ||
| 4825 | /* For encoding, we must process pre-write-conversion in advance. */ | 4594 | /* For encoding, we must process pre-write-conversion in advance. */ |
| 4826 | if (encodep | 4595 | if (! inhibit_pre_post_conversion |
| 4827 | && ! NILP (coding->pre_write_conversion) | 4596 | && encodep |
| 4828 | && SYMBOLP (coding->pre_write_conversion) | 4597 | && SYMBOLP (coding->pre_write_conversion) |
| 4829 | && ! NILP (Ffboundp (coding->pre_write_conversion))) | 4598 | && ! NILP (Ffboundp (coding->pre_write_conversion))) |
| 4830 | { | 4599 | { |
| @@ -4859,8 +4628,8 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace) | |||
| 4859 | orig_point = from; | 4628 | orig_point = from; |
| 4860 | orig_len = len; | 4629 | orig_len = len; |
| 4861 | to = from + len; | 4630 | to = from + len; |
| 4862 | from_byte = multibyte ? CHAR_TO_BYTE (from) : from_byte; | 4631 | from_byte = CHAR_TO_BYTE (from); |
| 4863 | to_byte = multibyte ? CHAR_TO_BYTE (to) : to; | 4632 | to_byte = CHAR_TO_BYTE (to); |
| 4864 | len_byte = to_byte - from_byte; | 4633 | len_byte = to_byte - from_byte; |
| 4865 | TEMP_SET_PT_BOTH (from, from_byte); | 4634 | TEMP_SET_PT_BOTH (from, from_byte); |
| 4866 | } | 4635 | } |
| @@ -4877,40 +4646,32 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace) | |||
| 4877 | coding_allocate_composition_data (coding, from); | 4646 | coding_allocate_composition_data (coding, from); |
| 4878 | } | 4647 | } |
| 4879 | 4648 | ||
| 4880 | /* For conversion by CCL program and for encoding with composition | 4649 | /* Try to skip the heading and tailing ASCIIs. */ |
| 4881 | handling, we can't skip any character because we may convert or | 4650 | { |
| 4882 | compose even ASCII characters. */ | 4651 | int from_byte_orig = from_byte, to_byte_orig = to_byte; |
| 4883 | if (coding->type != coding_type_ccl | 4652 | |
| 4884 | && (!encodep || coding->cmp_data == NULL)) | 4653 | if (from < GPT && GPT < to) |
| 4885 | { | 4654 | move_gap_both (from, from_byte); |
| 4886 | /* Try to skip the heading and tailing ASCIIs. */ | 4655 | SHRINK_CONVERSION_REGION (&from_byte, &to_byte, coding, NULL, encodep); |
| 4887 | int from_byte_orig = from_byte, to_byte_orig = to_byte; | 4656 | if (from_byte == to_byte |
| 4888 | 4657 | && (encodep || NILP (coding->post_read_conversion)) | |
| 4889 | if (from < GPT && GPT < to) | 4658 | && ! CODING_REQUIRE_FLUSHING (coding)) |
| 4890 | move_gap_both (from, from_byte); | 4659 | { |
| 4891 | SHRINK_CONVERSION_REGION (&from_byte, &to_byte, coding, NULL, encodep); | 4660 | coding->produced = len_byte; |
| 4892 | if (from_byte == to_byte | 4661 | coding->produced_char = len; |
| 4893 | && (encodep || NILP (coding->post_read_conversion)) | 4662 | if (!replace) |
| 4894 | && ! CODING_REQUIRE_FLUSHING (coding)) | 4663 | /* We must record and adjust for this new text now. */ |
| 4895 | { | 4664 | adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len); |
| 4896 | coding->produced = len_byte; | 4665 | return 0; |
| 4897 | coding->produced_char = multibyte ? len : len_byte; | 4666 | } |
| 4898 | if (!replace) | ||
| 4899 | /* We must record and adjust for this new text now. */ | ||
| 4900 | adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len); | ||
| 4901 | return 0; | ||
| 4902 | } | ||
| 4903 | |||
| 4904 | head_skip = from_byte - from_byte_orig; | ||
| 4905 | tail_skip = to_byte_orig - to_byte; | ||
| 4906 | total_skip = head_skip + tail_skip; | ||
| 4907 | from += head_skip; | ||
| 4908 | to -= tail_skip; | ||
| 4909 | len -= total_skip; len_byte -= total_skip; | ||
| 4910 | 4667 | ||
| 4911 | if (coding->cmp_data) | 4668 | head_skip = from_byte - from_byte_orig; |
| 4912 | coding->cmp_data->char_offset = from; | 4669 | tail_skip = to_byte_orig - to_byte; |
| 4913 | } | 4670 | total_skip = head_skip + tail_skip; |
| 4671 | from += head_skip; | ||
| 4672 | to -= tail_skip; | ||
| 4673 | len -= total_skip; len_byte -= total_skip; | ||
| 4674 | } | ||
| 4914 | 4675 | ||
| 4915 | /* The code conversion routine can not preserve text properties for | 4676 | /* The code conversion routine can not preserve text properties for |
| 4916 | now. So, we must remove all text properties in the region. | 4677 | now. So, we must remove all text properties in the region. |
| @@ -4947,14 +4708,27 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace) | |||
| 4947 | if (Z - GPT < END_UNCHANGED) | 4708 | if (Z - GPT < END_UNCHANGED) |
| 4948 | END_UNCHANGED = Z - GPT; | 4709 | END_UNCHANGED = Z - GPT; |
| 4949 | 4710 | ||
| 4711 | if (!encodep && coding->src_multibyte) | ||
| 4712 | { | ||
| 4713 | /* Decoding routines expects that the source text is unibyte. | ||
| 4714 | We must convert 8-bit characters of multibyte form to | ||
| 4715 | unibyte. */ | ||
| 4716 | int len_byte_orig = len_byte; | ||
| 4717 | len_byte = str_as_unibyte (GAP_END_ADDR - len_byte, len_byte); | ||
| 4718 | if (len_byte < len_byte_orig) | ||
| 4719 | safe_bcopy (GAP_END_ADDR - len_byte_orig, GAP_END_ADDR - len_byte, | ||
| 4720 | len_byte); | ||
| 4721 | coding->src_multibyte = 0; | ||
| 4722 | } | ||
| 4723 | |||
| 4950 | for (;;) | 4724 | for (;;) |
| 4951 | { | 4725 | { |
| 4952 | int result; | 4726 | int result; |
| 4953 | 4727 | ||
| 4954 | /* The buffer memory is now: | 4728 | /* The buffer memory is now: |
| 4955 | +--------+converted-text+---------+-------original-text------+---+ | 4729 | +--------+converted-text+---------+-------original-text-------+---+ |
| 4956 | |<-from->|<--inserted-->|---------|<-----------len---------->|---| | 4730 | |<-from->|<--inserted-->|---------|<--------len_byte--------->|---| |
| 4957 | |<------------------- GAP_SIZE -------------------->| */ | 4731 | |<---------------------- GAP ----------------------->| */ |
| 4958 | src = GAP_END_ADDR - len_byte; | 4732 | src = GAP_END_ADDR - len_byte; |
| 4959 | dst = GPT_ADDR + inserted_byte; | 4733 | dst = GPT_ADDR + inserted_byte; |
| 4960 | 4734 | ||
| @@ -4964,15 +4738,10 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace) | |||
| 4964 | result = decode_coding (coding, src, dst, len_byte, 0); | 4738 | result = decode_coding (coding, src, dst, len_byte, 0); |
| 4965 | 4739 | ||
| 4966 | /* The buffer memory is now: | 4740 | /* The buffer memory is now: |
| 4967 | +--------+-------converted-text--------+--+---original-text--+---+ | 4741 | +--------+-------converted-text----+--+------original-text----+---+ |
| 4968 | |<-from->|<--inserted-->|<--produced-->|--|<-(len-consumed)->|---| | 4742 | |<-from->|<-inserted->|<-produced->|--|<-(len_byte-consumed)->|---| |
| 4969 | |<------------------- GAP_SIZE -------------------->| */ | 4743 | |<---------------------- GAP ----------------------->| */ |
| 4970 | 4744 | ||
| 4971 | if (coding->fake_multibyte) | ||
| 4972 | fake_multibyte = 1; | ||
| 4973 | |||
| 4974 | if (!encodep && !multibyte) | ||
| 4975 | coding->produced_char = coding->produced; | ||
| 4976 | inserted += coding->produced_char; | 4745 | inserted += coding->produced_char; |
| 4977 | inserted_byte += coding->produced; | 4746 | inserted_byte += coding->produced; |
| 4978 | len_byte -= coding->consumed; | 4747 | len_byte -= coding->consumed; |
| @@ -5068,13 +4837,11 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace) | |||
| 5068 | inserted_byte += len_byte; | 4837 | inserted_byte += len_byte; |
| 5069 | while (len_byte--) | 4838 | while (len_byte--) |
| 5070 | *dst++ = *src++; | 4839 | *dst++ = *src++; |
| 5071 | fake_multibyte = 1; | ||
| 5072 | break; | 4840 | break; |
| 5073 | } | 4841 | } |
| 5074 | if (result == CODING_FINISH_INTERRUPT) | 4842 | if (result == CODING_FINISH_INTERRUPT) |
| 5075 | { | 4843 | { |
| 5076 | /* The conversion procedure was interrupted by a user. */ | 4844 | /* The conversion procedure was interrupted by a user. */ |
| 5077 | fake_multibyte = 1; | ||
| 5078 | break; | 4845 | break; |
| 5079 | } | 4846 | } |
| 5080 | /* Now RESULT == CODING_FINISH_INSUFFICIENT_DST */ | 4847 | /* Now RESULT == CODING_FINISH_INSUFFICIENT_DST */ |
| @@ -5082,7 +4849,6 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace) | |||
| 5082 | { | 4849 | { |
| 5083 | /* It's quite strange to require more memory without | 4850 | /* It's quite strange to require more memory without |
| 5084 | consuming any bytes. Perhaps CCL program bug. */ | 4851 | consuming any bytes. Perhaps CCL program bug. */ |
| 5085 | fake_multibyte = 1; | ||
| 5086 | break; | 4852 | break; |
| 5087 | } | 4853 | } |
| 5088 | if (first) | 4854 | if (first) |
| @@ -5118,11 +4884,24 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace) | |||
| 5118 | } | 4884 | } |
| 5119 | if (src - dst > 0) *dst = 0; /* Put an anchor. */ | 4885 | if (src - dst > 0) *dst = 0; /* Put an anchor. */ |
| 5120 | 4886 | ||
| 5121 | if (multibyte | 4887 | if (encodep && coding->dst_multibyte) |
| 5122 | && (encodep | 4888 | { |
| 5123 | || fake_multibyte | 4889 | /* The output is unibyte. We must convert 8-bit characters to |
| 5124 | || (to - from) != (to_byte - from_byte))) | 4890 | multibyte form. */ |
| 5125 | inserted = multibyte_chars_in_text (GPT_ADDR, inserted_byte); | 4891 | if (inserted_byte * 2 > GAP_SIZE) |
| 4892 | { | ||
| 4893 | GAP_SIZE -= inserted_byte; | ||
| 4894 | ZV += inserted_byte; Z += inserted_byte; | ||
| 4895 | ZV_BYTE += inserted_byte; Z_BYTE += inserted_byte; | ||
| 4896 | GPT += inserted_byte; GPT_BYTE += inserted_byte; | ||
| 4897 | make_gap (inserted_byte - GAP_SIZE); | ||
| 4898 | GAP_SIZE += inserted_byte; | ||
| 4899 | ZV -= inserted_byte; Z -= inserted_byte; | ||
| 4900 | ZV_BYTE -= inserted_byte; Z_BYTE -= inserted_byte; | ||
| 4901 | GPT -= inserted_byte; GPT_BYTE -= inserted_byte; | ||
| 4902 | } | ||
| 4903 | inserted_byte = str_to_multibyte (GPT_ADDR, GAP_SIZE, inserted_byte); | ||
| 4904 | } | ||
| 5126 | 4905 | ||
| 5127 | /* If we have shrinked the conversion area, adjust it now. */ | 4906 | /* If we have shrinked the conversion area, adjust it now. */ |
| 5128 | if (total_skip > 0) | 4907 | if (total_skip > 0) |
| @@ -5146,7 +4925,8 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace) | |||
| 5146 | coding_restore_composition (coding, Fcurrent_buffer ()); | 4925 | coding_restore_composition (coding, Fcurrent_buffer ()); |
| 5147 | coding_free_composition_data (coding); | 4926 | coding_free_composition_data (coding); |
| 5148 | 4927 | ||
| 5149 | if (! encodep && ! NILP (coding->post_read_conversion)) | 4928 | if (! inhibit_pre_post_conversion |
| 4929 | && ! encodep && ! NILP (coding->post_read_conversion)) | ||
| 5150 | { | 4930 | { |
| 5151 | Lisp_Object val; | 4931 | Lisp_Object val; |
| 5152 | int count = specpdl_ptr - specpdl; | 4932 | int count = specpdl_ptr - specpdl; |
| @@ -5192,52 +4972,58 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace) | |||
| 5192 | } | 4972 | } |
| 5193 | 4973 | ||
| 5194 | Lisp_Object | 4974 | Lisp_Object |
| 5195 | code_convert_string (str, coding, encodep, nocopy) | 4975 | run_pre_post_conversion_on_str (str, coding, encodep) |
| 5196 | Lisp_Object str; | 4976 | Lisp_Object str; |
| 5197 | struct coding_system *coding; | 4977 | struct coding_system *coding; |
| 5198 | int encodep, nocopy; | 4978 | int encodep; |
| 4979 | { | ||
| 4980 | int count = specpdl_ptr - specpdl; | ||
| 4981 | struct gcpro gcpro1; | ||
| 4982 | struct buffer *prev = current_buffer; | ||
| 4983 | int multibyte = STRING_MULTIBYTE (str); | ||
| 4984 | |||
| 4985 | record_unwind_protect (Fset_buffer, Fcurrent_buffer ()); | ||
| 4986 | record_unwind_protect (code_convert_region_unwind, Qnil); | ||
| 4987 | GCPRO1 (str); | ||
| 4988 | temp_output_buffer_setup (" *code-converting-work*"); | ||
| 4989 | set_buffer_internal (XBUFFER (Vstandard_output)); | ||
| 4990 | /* We must insert the contents of STR as is without | ||
| 4991 | unibyte<->multibyte conversion. For that, we adjust the | ||
| 4992 | multibyteness of the working buffer to that of STR. */ | ||
| 4993 | Ferase_buffer (); | ||
| 4994 | current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil; | ||
| 4995 | insert_from_string (str, 0, 0, | ||
| 4996 | XSTRING (str)->size, STRING_BYTES (XSTRING (str)), 0); | ||
| 4997 | UNGCPRO; | ||
| 4998 | inhibit_pre_post_conversion = 1; | ||
| 4999 | if (encodep) | ||
| 5000 | call2 (coding->pre_write_conversion, make_number (BEG), make_number (Z)); | ||
| 5001 | else | ||
| 5002 | call1 (coding->post_read_conversion, make_number (Z - BEG)); | ||
| 5003 | inhibit_pre_post_conversion = 0; | ||
| 5004 | str = make_buffer_string (BEG, Z, 0); | ||
| 5005 | return unbind_to (count, str); | ||
| 5006 | } | ||
| 5007 | |||
| 5008 | Lisp_Object | ||
| 5009 | decode_coding_string (str, coding, nocopy) | ||
| 5010 | Lisp_Object str; | ||
| 5011 | struct coding_system *coding; | ||
| 5012 | int nocopy; | ||
| 5199 | { | 5013 | { |
| 5200 | int len; | 5014 | int len; |
| 5201 | char *buf; | 5015 | char *buf; |
| 5202 | int from = 0, to = XSTRING (str)->size; | 5016 | int from, to, to_byte; |
| 5203 | int to_byte = STRING_BYTES (XSTRING (str)); | ||
| 5204 | struct gcpro gcpro1; | 5017 | struct gcpro gcpro1; |
| 5205 | Lisp_Object saved_coding_symbol; | 5018 | Lisp_Object saved_coding_symbol; |
| 5206 | int result; | 5019 | int result; |
| 5207 | 5020 | ||
| 5208 | saved_coding_symbol = Qnil; | 5021 | from = 0; |
| 5209 | if ((encodep && !NILP (coding->pre_write_conversion) | 5022 | to = XSTRING (str)->size; |
| 5210 | || !encodep && !NILP (coding->post_read_conversion))) | 5023 | to_byte = STRING_BYTES (XSTRING (str)); |
| 5211 | { | ||
| 5212 | /* Since we have to call Lisp functions which assume target text | ||
| 5213 | is in a buffer, after setting a temporary buffer, call | ||
| 5214 | code_convert_region. */ | ||
| 5215 | int count = specpdl_ptr - specpdl; | ||
| 5216 | struct buffer *prev = current_buffer; | ||
| 5217 | int multibyte = STRING_MULTIBYTE (str); | ||
| 5218 | |||
| 5219 | record_unwind_protect (Fset_buffer, Fcurrent_buffer ()); | ||
| 5220 | record_unwind_protect (code_convert_region_unwind, Qnil); | ||
| 5221 | inhibit_pre_post_conversion = 1; | ||
| 5222 | GCPRO1 (str); | ||
| 5223 | temp_output_buffer_setup (" *code-converting-work*"); | ||
| 5224 | set_buffer_internal (XBUFFER (Vstandard_output)); | ||
| 5225 | /* We must insert the contents of STR as is without | ||
| 5226 | unibyte<->multibyte conversion. For that, we adjust the | ||
| 5227 | multibyteness of the working buffer to that of STR. */ | ||
| 5228 | Ferase_buffer (); /* for safety */ | ||
| 5229 | current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil; | ||
| 5230 | insert_from_string (str, 0, 0, to, to_byte, 0); | ||
| 5231 | UNGCPRO; | ||
| 5232 | code_convert_region (BEGV, BEGV_BYTE, ZV, ZV_BYTE, coding, encodep, 1); | ||
| 5233 | /* Make a unibyte string if we are encoding, otherwise make a | ||
| 5234 | multibyte string. */ | ||
| 5235 | Fset_buffer_multibyte (encodep ? Qnil : Qt); | ||
| 5236 | str = make_buffer_string (BEGV, ZV, 0); | ||
| 5237 | return unbind_to (count, str); | ||
| 5238 | } | ||
| 5239 | 5024 | ||
| 5240 | if (! encodep && CODING_REQUIRE_DETECTION (coding)) | 5025 | saved_coding_symbol = Qnil; |
| 5026 | if (CODING_REQUIRE_DETECTION (coding)) | ||
| 5241 | { | 5027 | { |
| 5242 | /* See the comments in code_convert_region. */ | 5028 | /* See the comments in code_convert_region. */ |
| 5243 | if (coding->type == coding_type_undecided) | 5029 | if (coding->type == coding_type_undecided) |
| @@ -5258,41 +5044,39 @@ code_convert_string (str, coding, encodep, nocopy) | |||
| 5258 | } | 5044 | } |
| 5259 | } | 5045 | } |
| 5260 | 5046 | ||
| 5261 | if (encodep | 5047 | if (! CODING_REQUIRE_DECODING (coding)) |
| 5262 | ? ! CODING_REQUIRE_ENCODING (coding) | ||
| 5263 | : ! CODING_REQUIRE_DECODING (coding)) | ||
| 5264 | return (nocopy ? str : Fcopy_sequence (str)); | ||
| 5265 | |||
| 5266 | if (coding->composing != COMPOSITION_DISABLED) | ||
| 5267 | { | 5048 | { |
| 5268 | if (encodep) | 5049 | if (!STRING_MULTIBYTE (str)) |
| 5269 | coding_save_composition (coding, from, to, str); | 5050 | { |
| 5270 | else | 5051 | str = Fstring_as_multibyte (str); |
| 5271 | coding_allocate_composition_data (coding, from); | 5052 | nocopy = 1; |
| 5053 | } | ||
| 5054 | return (nocopy ? str : Fcopy_sequence (str)); | ||
| 5272 | } | 5055 | } |
| 5273 | 5056 | ||
| 5274 | /* For conversion by CCL program and for encoding with composition | 5057 | if (STRING_MULTIBYTE (str)) |
| 5275 | handling, we can't skip any character because we may convert or | ||
| 5276 | compose even ASCII characters. */ | ||
| 5277 | if (coding->type != coding_type_ccl | ||
| 5278 | && (!encodep || coding->cmp_data == NULL)) | ||
| 5279 | { | 5058 | { |
| 5280 | /* Try to skip the heading and tailing ASCIIs. */ | 5059 | /* Decoding routines expect the source text to be unibyte. */ |
| 5281 | int from_orig = from; | 5060 | str = Fstring_as_unibyte (str); |
| 5061 | nocopy = 1; | ||
| 5062 | coding->src_multibyte = 0; | ||
| 5063 | } | ||
| 5064 | coding->dst_multibyte = 1; | ||
| 5282 | 5065 | ||
| 5283 | SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data, | 5066 | if (coding->composing != COMPOSITION_DISABLED) |
| 5284 | encodep); | 5067 | coding_allocate_composition_data (coding, from); |
| 5285 | if (from == to_byte) | ||
| 5286 | return (nocopy ? str : Fcopy_sequence (str)); | ||
| 5287 | 5068 | ||
| 5288 | if (coding->cmp_data) | 5069 | /* Try to skip the heading and tailing ASCIIs. */ |
| 5289 | coding->cmp_data->char_offset = from; | 5070 | { |
| 5290 | } | 5071 | int from_orig = from; |
| 5291 | 5072 | ||
| 5292 | if (encodep) | 5073 | SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data, |
| 5293 | len = encoding_buffer_size (coding, to_byte - from); | 5074 | 0); |
| 5294 | else | 5075 | if (from == to_byte) |
| 5295 | len = decoding_buffer_size (coding, to_byte - from); | 5076 | return (nocopy ? str : Fcopy_sequence (str)); |
| 5077 | } | ||
| 5078 | |||
| 5079 | len = decoding_buffer_size (coding, to_byte - from); | ||
| 5296 | len += from + STRING_BYTES (XSTRING (str)) - to_byte; | 5080 | len += from + STRING_BYTES (XSTRING (str)) - to_byte; |
| 5297 | GCPRO1 (str); | 5081 | GCPRO1 (str); |
| 5298 | buf = get_conversion_buffer (len); | 5082 | buf = get_conversion_buffer (len); |
| @@ -5300,39 +5084,103 @@ code_convert_string (str, coding, encodep, nocopy) | |||
| 5300 | 5084 | ||
| 5301 | if (from > 0) | 5085 | if (from > 0) |
| 5302 | bcopy (XSTRING (str)->data, buf, from); | 5086 | bcopy (XSTRING (str)->data, buf, from); |
| 5303 | result = (encodep | 5087 | result = decode_coding (coding, XSTRING (str)->data + from, |
| 5304 | ? encode_coding (coding, XSTRING (str)->data + from, | 5088 | buf + from, to_byte - from, len); |
| 5305 | buf + from, to_byte - from, len) | 5089 | if (result == CODING_FINISH_INCONSISTENT_EOL) |
| 5306 | : decode_coding (coding, XSTRING (str)->data + from, | ||
| 5307 | buf + from, to_byte - from, len)); | ||
| 5308 | if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL) | ||
| 5309 | { | 5090 | { |
| 5310 | /* We simply try to decode the whole string again but without | 5091 | /* We simply try to decode the whole string again but without |
| 5311 | eol-conversion this time. */ | 5092 | eol-conversion this time. */ |
| 5312 | coding->eol_type = CODING_EOL_LF; | 5093 | coding->eol_type = CODING_EOL_LF; |
| 5313 | coding->symbol = saved_coding_symbol; | 5094 | coding->symbol = saved_coding_symbol; |
| 5314 | coding_free_composition_data (coding); | 5095 | coding_free_composition_data (coding); |
| 5315 | return code_convert_string (str, coding, encodep, nocopy); | 5096 | return decode_coding_string (str, coding, nocopy); |
| 5316 | } | 5097 | } |
| 5317 | 5098 | ||
| 5318 | bcopy (XSTRING (str)->data + to_byte, buf + from + coding->produced, | 5099 | bcopy (XSTRING (str)->data + to_byte, buf + from + coding->produced, |
| 5319 | STRING_BYTES (XSTRING (str)) - to_byte); | 5100 | STRING_BYTES (XSTRING (str)) - to_byte); |
| 5320 | 5101 | ||
| 5321 | len = from + STRING_BYTES (XSTRING (str)) - to_byte; | 5102 | len = from + STRING_BYTES (XSTRING (str)) - to_byte; |
| 5322 | if (encodep) | 5103 | str = make_multibyte_string (buf, len + coding->produced_char, |
| 5323 | str = make_unibyte_string (buf, len + coding->produced); | 5104 | len + coding->produced); |
| 5324 | else | 5105 | |
| 5106 | if (coding->cmp_data && coding->cmp_data->used) | ||
| 5107 | coding_restore_composition (coding, str); | ||
| 5108 | coding_free_composition_data (coding); | ||
| 5109 | |||
| 5110 | if (SYMBOLP (coding->post_read_conversion) | ||
| 5111 | && !NILP (Ffboundp (coding->post_read_conversion))) | ||
| 5112 | str = run_pre_post_conversion_on_str (str, 0); | ||
| 5113 | |||
| 5114 | return str; | ||
| 5115 | } | ||
| 5116 | |||
| 5117 | Lisp_Object | ||
| 5118 | encode_coding_string (str, coding, nocopy) | ||
| 5119 | Lisp_Object str; | ||
| 5120 | struct coding_system *coding; | ||
| 5121 | int nocopy; | ||
| 5122 | { | ||
| 5123 | int len; | ||
| 5124 | char *buf; | ||
| 5125 | int from, to, to_byte; | ||
| 5126 | struct gcpro gcpro1; | ||
| 5127 | Lisp_Object saved_coding_symbol; | ||
| 5128 | int result; | ||
| 5129 | |||
| 5130 | if (SYMBOLP (coding->pre_write_conversion) | ||
| 5131 | && !NILP (Ffboundp (coding->pre_write_conversion))) | ||
| 5132 | str = run_pre_post_conversion_on_str (str, 1); | ||
| 5133 | |||
| 5134 | from = 0; | ||
| 5135 | to = XSTRING (str)->size; | ||
| 5136 | to_byte = STRING_BYTES (XSTRING (str)); | ||
| 5137 | |||
| 5138 | saved_coding_symbol = Qnil; | ||
| 5139 | if (! CODING_REQUIRE_ENCODING (coding)) | ||
| 5325 | { | 5140 | { |
| 5326 | int chars= (coding->fake_multibyte | 5141 | if (STRING_MULTIBYTE (str)) |
| 5327 | ? multibyte_chars_in_text (buf + from, coding->produced) | 5142 | { |
| 5328 | : coding->produced_char); | 5143 | str = Fstring_as_unibyte (str); |
| 5329 | str = make_multibyte_string (buf, len + chars, len + coding->produced); | 5144 | nocopy = 1; |
| 5145 | } | ||
| 5146 | return (nocopy ? str : Fcopy_sequence (str)); | ||
| 5330 | } | 5147 | } |
| 5331 | 5148 | ||
| 5332 | if (!encodep && coding->cmp_data && coding->cmp_data->used) | 5149 | /* Encoding routines determine the multibyteness of the source text |
| 5333 | coding_restore_composition (coding, str); | 5150 | by coding->src_multibyte. */ |
| 5151 | coding->src_multibyte = STRING_MULTIBYTE (str); | ||
| 5152 | coding->dst_multibyte = 0; | ||
| 5153 | |||
| 5154 | if (coding->composing != COMPOSITION_DISABLED) | ||
| 5155 | coding_save_composition (coding, from, to, str); | ||
| 5156 | |||
| 5157 | /* Try to skip the heading and tailing ASCIIs. */ | ||
| 5158 | { | ||
| 5159 | int from_orig = from; | ||
| 5334 | 5160 | ||
| 5161 | SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data, | ||
| 5162 | 1); | ||
| 5163 | if (from == to_byte) | ||
| 5164 | return (nocopy ? str : Fcopy_sequence (str)); | ||
| 5165 | } | ||
| 5166 | |||
| 5167 | len = encoding_buffer_size (coding, to_byte - from); | ||
| 5168 | len += from + STRING_BYTES (XSTRING (str)) - to_byte; | ||
| 5169 | GCPRO1 (str); | ||
| 5170 | buf = get_conversion_buffer (len); | ||
| 5171 | UNGCPRO; | ||
| 5172 | |||
| 5173 | if (from > 0) | ||
| 5174 | bcopy (XSTRING (str)->data, buf, from); | ||
| 5175 | result = encode_coding (coding, XSTRING (str)->data + from, | ||
| 5176 | buf + from, to_byte - from, len); | ||
| 5177 | bcopy (XSTRING (str)->data + to_byte, buf + from + coding->produced, | ||
| 5178 | STRING_BYTES (XSTRING (str)) - to_byte); | ||
| 5179 | |||
| 5180 | len = from + STRING_BYTES (XSTRING (str)) - to_byte; | ||
| 5181 | str = make_unibyte_string (buf, len + coding->produced); | ||
| 5335 | coding_free_composition_data (coding); | 5182 | coding_free_composition_data (coding); |
| 5183 | |||
| 5336 | return str; | 5184 | return str; |
| 5337 | } | 5185 | } |
| 5338 | 5186 | ||
| @@ -5543,6 +5391,8 @@ code_convert_region1 (start, end, coding_system, encodep) | |||
| 5543 | error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data); | 5391 | error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data); |
| 5544 | 5392 | ||
| 5545 | coding.mode |= CODING_MODE_LAST_BLOCK; | 5393 | coding.mode |= CODING_MODE_LAST_BLOCK; |
| 5394 | coding.src_multibyte = coding.dst_multibyte | ||
| 5395 | = !NILP (current_buffer->enable_multibyte_characters); | ||
| 5546 | code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to), | 5396 | code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to), |
| 5547 | &coding, encodep, 1); | 5397 | &coding, encodep, 1); |
| 5548 | Vlast_coding_system_used = coding.symbol; | 5398 | Vlast_coding_system_used = coding.symbol; |
| @@ -5596,7 +5446,9 @@ code_convert_string1 (string, coding_system, nocopy, encodep) | |||
| 5596 | error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data); | 5446 | error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data); |
| 5597 | 5447 | ||
| 5598 | coding.mode |= CODING_MODE_LAST_BLOCK; | 5448 | coding.mode |= CODING_MODE_LAST_BLOCK; |
| 5599 | string = code_convert_string (string, &coding, encodep, !NILP (nocopy)); | 5449 | string = (encodep |
| 5450 | ? encode_coding_string (string, &coding, !NILP (nocopy)) | ||
| 5451 | : decode_coding_string (string, &coding, !NILP (nocopy))); | ||
| 5600 | Vlast_coding_system_used = coding.symbol; | 5452 | Vlast_coding_system_used = coding.symbol; |
| 5601 | 5453 | ||
| 5602 | return string; | 5454 | return string; |
| @@ -5654,7 +5506,9 @@ code_convert_string_norecord (string, coding_system, encodep) | |||
| 5654 | 5506 | ||
| 5655 | coding.composing = COMPOSITION_DISABLED; | 5507 | coding.composing = COMPOSITION_DISABLED; |
| 5656 | coding.mode |= CODING_MODE_LAST_BLOCK; | 5508 | coding.mode |= CODING_MODE_LAST_BLOCK; |
| 5657 | return code_convert_string (string, &coding, encodep, 1); | 5509 | return (encodep |
| 5510 | ? encode_coding_string (string, &coding, 1) | ||
| 5511 | : decode_coding_string (string, &coding, 1)); | ||
| 5658 | } | 5512 | } |
| 5659 | 5513 | ||
| 5660 | DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0, | 5514 | DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0, |
| @@ -5673,8 +5527,7 @@ Return the corresponding character.") | |||
| 5673 | if (s2 < 0x80) | 5527 | if (s2 < 0x80) |
| 5674 | XSETFASTINT (val, s2); | 5528 | XSETFASTINT (val, s2); |
| 5675 | else if (s2 >= 0xA0 || s2 <= 0xDF) | 5529 | else if (s2 >= 0xA0 || s2 <= 0xDF) |
| 5676 | XSETFASTINT (val, | 5530 | XSETFASTINT (val, MAKE_CHAR (charset_katakana_jisx0201, s2, 0)); |
| 5677 | MAKE_NON_ASCII_CHAR (charset_katakana_jisx0201, s2, 0)); | ||
| 5678 | else | 5531 | else |
| 5679 | error ("Invalid Shift JIS code: %x", XFASTINT (code)); | 5532 | error ("Invalid Shift JIS code: %x", XFASTINT (code)); |
| 5680 | } | 5533 | } |
| @@ -5684,7 +5537,7 @@ Return the corresponding character.") | |||
| 5684 | || (s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)) | 5537 | || (s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)) |
| 5685 | error ("Invalid Shift JIS code: %x", XFASTINT (code)); | 5538 | error ("Invalid Shift JIS code: %x", XFASTINT (code)); |
| 5686 | DECODE_SJIS (s1, s2, c1, c2); | 5539 | DECODE_SJIS (s1, s2, c1, c2); |
| 5687 | XSETFASTINT (val, MAKE_NON_ASCII_CHAR (charset_jisx0208, c1, c2)); | 5540 | XSETFASTINT (val, MAKE_CHAR (charset_jisx0208, c1, c2)); |
| 5688 | } | 5541 | } |
| 5689 | return val; | 5542 | return val; |
| 5690 | } | 5543 | } |
| @@ -5744,7 +5597,7 @@ Return the corresponding character.") | |||
| 5744 | || (b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)) | 5597 | || (b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)) |
| 5745 | error ("Invalid BIG5 code: %x", XFASTINT (code)); | 5598 | error ("Invalid BIG5 code: %x", XFASTINT (code)); |
| 5746 | DECODE_BIG5 (b1, b2, charset, c1, c2); | 5599 | DECODE_BIG5 (b1, b2, charset, c1, c2); |
| 5747 | XSETFASTINT (val, MAKE_NON_ASCII_CHAR (charset, c1, c2)); | 5600 | XSETFASTINT (val, MAKE_CHAR (charset, c1, c2)); |
| 5748 | } | 5601 | } |
| 5749 | return val; | 5602 | return val; |
| 5750 | } | 5603 | } |
| @@ -5789,6 +5642,8 @@ DEFUN ("set-terminal-coding-system-internal", | |||
| 5789 | terminal_coding.flags |= CODING_FLAG_ISO_SAFE; | 5642 | terminal_coding.flags |= CODING_FLAG_ISO_SAFE; |
| 5790 | /* Characer composition should be disabled. */ | 5643 | /* Characer composition should be disabled. */ |
| 5791 | terminal_coding.composing = COMPOSITION_DISABLED; | 5644 | terminal_coding.composing = COMPOSITION_DISABLED; |
| 5645 | terminal_coding.src_multibyte = 1; | ||
| 5646 | terminal_coding.dst_multibyte = 0; | ||
| 5792 | return Qnil; | 5647 | return Qnil; |
| 5793 | } | 5648 | } |
| 5794 | 5649 | ||
| @@ -5803,6 +5658,8 @@ DEFUN ("set-safe-terminal-coding-system-internal", | |||
| 5803 | &safe_terminal_coding); | 5658 | &safe_terminal_coding); |
| 5804 | /* Characer composition should be disabled. */ | 5659 | /* Characer composition should be disabled. */ |
| 5805 | safe_terminal_coding.composing = COMPOSITION_DISABLED; | 5660 | safe_terminal_coding.composing = COMPOSITION_DISABLED; |
| 5661 | safe_terminal_coding.src_multibyte = 1; | ||
| 5662 | safe_terminal_coding.dst_multibyte = 0; | ||
| 5806 | return Qnil; | 5663 | return Qnil; |
| 5807 | } | 5664 | } |
| 5808 | 5665 | ||
| @@ -6024,11 +5881,11 @@ init_coding_once () | |||
| 6024 | 5881 | ||
| 6025 | /* ISO2022 specific initialize routine. */ | 5882 | /* ISO2022 specific initialize routine. */ |
| 6026 | for (i = 0; i < 0x20; i++) | 5883 | for (i = 0; i < 0x20; i++) |
| 6027 | iso_code_class[i] = ISO_control_code; | 5884 | iso_code_class[i] = ISO_control_0; |
| 6028 | for (i = 0x21; i < 0x7F; i++) | 5885 | for (i = 0x21; i < 0x7F; i++) |
| 6029 | iso_code_class[i] = ISO_graphic_plane_0; | 5886 | iso_code_class[i] = ISO_graphic_plane_0; |
| 6030 | for (i = 0x80; i < 0xA0; i++) | 5887 | for (i = 0x80; i < 0xA0; i++) |
| 6031 | iso_code_class[i] = ISO_control_code; | 5888 | iso_code_class[i] = ISO_control_1; |
| 6032 | for (i = 0xA1; i < 0xFF; i++) | 5889 | for (i = 0xA1; i < 0xFF; i++) |
| 6033 | iso_code_class[i] = ISO_graphic_plane_1; | 5890 | iso_code_class[i] = ISO_graphic_plane_1; |
| 6034 | iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F; | 5891 | iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F; |