diff options
| author | Kenichi Handa | 1997-02-27 11:10:42 +0000 |
|---|---|---|
| committer | Kenichi Handa | 1997-02-27 11:10:42 +0000 |
| commit | e0e989f659d1b8fffcc9055028a50c8dc0c153fb (patch) | |
| tree | 93207cf7f5697bcc61d89e55818c8289f6c76e74 /src/coding.c | |
| parent | 8ddb35b2ab9e9a0a36879596983882c19fedb110 (diff) | |
| download | emacs-e0e989f659d1b8fffcc9055028a50c8dc0c153fb.tar.gz emacs-e0e989f659d1b8fffcc9055028a50c8dc0c153fb.zip | |
(create_process, Fopen_network_stream): Typo in indexes
of array proc_encode_coding_system fixed.
Remove prefix "coding-system-" from coding system symbol names.
(encode_coding) : Fix typo ("=" -> "==").
(detect_coding_iso2022): Detect coding-category-iso-8-2
more precisely.
(ENCODE_RESET_PLANE_AND_REGISTER): Argument `eol' is
deleted. Don't call ENCODE_DESIGNATION if nothing designated
initially.
(encode_designation_at_bol) New function.
(encode_coding_iso2022): Handle CODING_FLAG_ISO_INIT_AT_BOL and
CODING_FLAG_ISO_DESIGNATE_AT_BOL.
(setup_coding_system): Now, flags of ISO2022 coding
systems contains charsets instead of charset IDs.
(detect_coding_iso2022, decode_coding_iso2022): Make the code
robust against invalid SI and SO.
(Ffind_coding_system, syms_of_coding): Escape newlines in docstring.
(setup_coding_system): Correct setting coding->symbol
and coding->eol_type. The performance improved.
Diffstat (limited to 'src/coding.c')
| -rw-r--r-- | src/coding.c | 421 |
1 files changed, 254 insertions, 167 deletions
diff --git a/src/coding.c b/src/coding.c index 7d93362e0a3..929e7e666bb 100644 --- a/src/coding.c +++ b/src/coding.c | |||
| @@ -581,45 +581,43 @@ int | |||
| 581 | detect_coding_iso2022 (src, src_end) | 581 | detect_coding_iso2022 (src, src_end) |
| 582 | unsigned char *src, *src_end; | 582 | unsigned char *src, *src_end; |
| 583 | { | 583 | { |
| 584 | unsigned char graphic_register[4]; | 584 | unsigned char c, g1 = 0; |
| 585 | unsigned char c, esc_cntl; | ||
| 586 | int mask = (CODING_CATEGORY_MASK_ISO_7 | 585 | int mask = (CODING_CATEGORY_MASK_ISO_7 |
| 587 | | CODING_CATEGORY_MASK_ISO_8_1 | 586 | | CODING_CATEGORY_MASK_ISO_8_1 |
| 588 | | CODING_CATEGORY_MASK_ISO_8_2); | 587 | | CODING_CATEGORY_MASK_ISO_8_2); |
| 589 | /* We may look ahead maximum 3 bytes. */ | 588 | /* We may look ahead at most 4 bytes. */ |
| 590 | unsigned char *adjusted_src_end = src_end - 3; | 589 | unsigned char *adjusted_src_end = src_end - 4; |
| 591 | int i; | 590 | int i; |
| 592 | 591 | ||
| 593 | for (i = 0; i < 4; i++) | 592 | while (src < src_end) |
| 594 | graphic_register[i] = CHARSET_ASCII; | ||
| 595 | |||
| 596 | while (src < adjusted_src_end) | ||
| 597 | { | 593 | { |
| 598 | c = *src++; | 594 | c = *src++; |
| 599 | switch (c) | 595 | switch (c) |
| 600 | { | 596 | { |
| 601 | case ISO_CODE_ESC: | 597 | case ISO_CODE_ESC: |
| 602 | if (src >= adjusted_src_end) | 598 | if (src >= src_end) |
| 603 | break; | 599 | break; |
| 604 | c = *src++; | 600 | c = *src++; |
| 605 | if (c == '$') | 601 | if (src + 2 >= src_end |
| 602 | && ((c >= '(' && c <= '/') | ||
| 603 | || c == '$' && ((*src >= '(' && *src <= '/') | ||
| 604 | || (*src >= '@' && *src <= 'B')))) | ||
| 606 | { | 605 | { |
| 607 | /* Designation of 2-byte character set. */ | 606 | /* Valid designation sequence. */ |
| 608 | if (src >= adjusted_src_end) | 607 | if (c == ')' || (c == '$' && *src == ')')) |
| 609 | break; | 608 | g1 = 1; |
| 610 | c = *src++; | 609 | src++; |
| 610 | break; | ||
| 611 | } | 611 | } |
| 612 | if ((c >= ')' && c <= '+') || (c >= '-' && c <= '/')) | ||
| 613 | /* Designation to graphic register 1, 2, or 3. */ | ||
| 614 | mask &= ~CODING_CATEGORY_MASK_ISO_7; | ||
| 615 | else if (c == 'N' || c == 'O' || c == 'n' || c == 'o') | 612 | else if (c == 'N' || c == 'O' || c == 'n' || c == 'o') |
| 616 | return CODING_CATEGORY_MASK_ISO_ELSE; | 613 | return CODING_CATEGORY_MASK_ISO_ELSE; |
| 617 | break; | 614 | break; |
| 618 | 615 | ||
| 619 | case ISO_CODE_SI: | ||
| 620 | case ISO_CODE_SO: | 616 | case ISO_CODE_SO: |
| 621 | return CODING_CATEGORY_MASK_ISO_ELSE; | 617 | if (g1) |
| 622 | 618 | return CODING_CATEGORY_MASK_ISO_ELSE; | |
| 619 | break; | ||
| 620 | |||
| 623 | case ISO_CODE_CSI: | 621 | case ISO_CODE_CSI: |
| 624 | case ISO_CODE_SS2: | 622 | case ISO_CODE_SS2: |
| 625 | case ISO_CODE_SS3: | 623 | case ISO_CODE_SS3: |
| @@ -636,9 +634,9 @@ detect_coding_iso2022 (src, src_end) | |||
| 636 | int count = 1; | 634 | int count = 1; |
| 637 | 635 | ||
| 638 | mask &= ~CODING_CATEGORY_MASK_ISO_7; | 636 | mask &= ~CODING_CATEGORY_MASK_ISO_7; |
| 639 | while (src < adjusted_src_end && *src >= 0xA0) | 637 | while (src < src_end && *src >= 0xA0) |
| 640 | count++, src++; | 638 | count++, src++; |
| 641 | if (count & 1 && src < adjusted_src_end) | 639 | if (count & 1 && src < src_end) |
| 642 | mask &= ~CODING_CATEGORY_MASK_ISO_8_2; | 640 | mask &= ~CODING_CATEGORY_MASK_ISO_8_2; |
| 643 | } | 641 | } |
| 644 | break; | 642 | break; |
| @@ -794,6 +792,8 @@ decode_coding_iso2022 (coding, source, destination, | |||
| 794 | break; | 792 | break; |
| 795 | 793 | ||
| 796 | case ISO_shift_out: | 794 | case ISO_shift_out: |
| 795 | if (CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0) | ||
| 796 | goto label_invalid_escape_sequence; | ||
| 797 | CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; | 797 | CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; |
| 798 | charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0); | 798 | charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0); |
| 799 | break; | 799 | break; |
| @@ -830,14 +830,10 @@ decode_coding_iso2022 (coding, source, destination, | |||
| 830 | case '&': /* revision of following character set */ | 830 | case '&': /* revision of following character set */ |
| 831 | ONE_MORE_BYTE (c1); | 831 | ONE_MORE_BYTE (c1); |
| 832 | if (!(c1 >= '@' && c1 <= '~')) | 832 | if (!(c1 >= '@' && c1 <= '~')) |
| 833 | { | 833 | goto label_invalid_escape_sequence; |
| 834 | goto label_invalid_escape_sequence; | ||
| 835 | } | ||
| 836 | ONE_MORE_BYTE (c1); | 834 | ONE_MORE_BYTE (c1); |
| 837 | if (c1 != ISO_CODE_ESC) | 835 | if (c1 != ISO_CODE_ESC) |
| 838 | { | 836 | goto label_invalid_escape_sequence; |
| 839 | goto label_invalid_escape_sequence; | ||
| 840 | } | ||
| 841 | ONE_MORE_BYTE (c1); | 837 | ONE_MORE_BYTE (c1); |
| 842 | goto label_escape_sequence; | 838 | goto label_escape_sequence; |
| 843 | 839 | ||
| @@ -859,26 +855,34 @@ decode_coding_iso2022 (coding, source, destination, | |||
| 859 | DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2); | 855 | DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2); |
| 860 | } | 856 | } |
| 861 | else | 857 | else |
| 862 | { | 858 | goto label_invalid_escape_sequence; |
| 863 | goto label_invalid_escape_sequence; | ||
| 864 | } | ||
| 865 | break; | 859 | break; |
| 866 | 860 | ||
| 867 | case 'n': /* invocation of locking-shift-2 */ | 861 | case 'n': /* invocation of locking-shift-2 */ |
| 862 | if (CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0) | ||
| 863 | goto label_invalid_escape_sequence; | ||
| 868 | CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; | 864 | CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; |
| 865 | charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0); | ||
| 869 | break; | 866 | break; |
| 870 | 867 | ||
| 871 | case 'o': /* invocation of locking-shift-3 */ | 868 | case 'o': /* invocation of locking-shift-3 */ |
| 869 | if (CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0) | ||
| 870 | goto label_invalid_escape_sequence; | ||
| 872 | CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; | 871 | CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; |
| 872 | charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0); | ||
| 873 | break; | 873 | break; |
| 874 | 874 | ||
| 875 | case 'N': /* invocation of single-shift-2 */ | 875 | case 'N': /* invocation of single-shift-2 */ |
| 876 | if (CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0) | ||
| 877 | goto label_invalid_escape_sequence; | ||
| 876 | ONE_MORE_BYTE (c1); | 878 | ONE_MORE_BYTE (c1); |
| 877 | charset = CODING_SPEC_ISO_DESIGNATION (coding, 2); | 879 | charset = CODING_SPEC_ISO_DESIGNATION (coding, 2); |
| 878 | DECODE_ISO_CHARACTER (charset, c1); | 880 | DECODE_ISO_CHARACTER (charset, c1); |
| 879 | break; | 881 | break; |
| 880 | 882 | ||
| 881 | case 'O': /* invocation of single-shift-3 */ | 883 | case 'O': /* invocation of single-shift-3 */ |
| 884 | if (CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0) | ||
| 885 | goto label_invalid_escape_sequence; | ||
| 882 | ONE_MORE_BYTE (c1); | 886 | ONE_MORE_BYTE (c1); |
| 883 | charset = CODING_SPEC_ISO_DESIGNATION (coding, 3); | 887 | charset = CODING_SPEC_ISO_DESIGNATION (coding, 3); |
| 884 | DECODE_ISO_CHARACTER (charset, c1); | 888 | DECODE_ISO_CHARACTER (charset, c1); |
| @@ -1246,24 +1250,63 @@ encode_invocation_designation (charset, coding, dst) | |||
| 1246 | 1250 | ||
| 1247 | /* Produce codes for designation and invocation to reset the graphic | 1251 | /* Produce codes for designation and invocation to reset the graphic |
| 1248 | planes and registers to initial state. */ | 1252 | planes and registers to initial state. */ |
| 1249 | #define ENCODE_RESET_PLANE_AND_REGISTER(eol) \ | 1253 | #define ENCODE_RESET_PLANE_AND_REGISTER \ |
| 1250 | do { \ | 1254 | do { \ |
| 1251 | int reg; \ | 1255 | int reg; \ |
| 1252 | if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0) \ | 1256 | if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0) \ |
| 1253 | ENCODE_SHIFT_IN; \ | 1257 | ENCODE_SHIFT_IN; \ |
| 1254 | for (reg = 0; reg < 4; reg++) \ | 1258 | for (reg = 0; reg < 4; reg++) \ |
| 1255 | { \ | 1259 | if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0 \ |
| 1256 | if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) < 0) \ | 1260 | && (CODING_SPEC_ISO_DESIGNATION (coding, reg) \ |
| 1257 | { \ | 1261 | != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg))) \ |
| 1258 | if (eol) CODING_SPEC_ISO_DESIGNATION (coding, reg) = -1; \ | 1262 | ENCODE_DESIGNATION \ |
| 1259 | } \ | 1263 | (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \ |
| 1260 | else if (CODING_SPEC_ISO_DESIGNATION (coding, reg) \ | ||
| 1261 | != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg)) \ | ||
| 1262 | ENCODE_DESIGNATION \ | ||
| 1263 | (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \ | ||
| 1264 | } \ | ||
| 1265 | } while (0) | 1264 | } while (0) |
| 1266 | 1265 | ||
| 1266 | int | ||
| 1267 | encode_designation_at_bol (coding, src, src_end, dstp) | ||
| 1268 | struct coding_system *coding; | ||
| 1269 | unsigned char *src, *src_end, **dstp; | ||
| 1270 | { | ||
| 1271 | int charset, reg, r[4]; | ||
| 1272 | unsigned char *dst = *dstp, c; | ||
| 1273 | for (reg = 0; reg < 4; reg++) r[reg] = -1; | ||
| 1274 | while (src < src_end && (c = *src++) != '\n') | ||
| 1275 | { | ||
| 1276 | switch (emacs_code_class[c]) | ||
| 1277 | { | ||
| 1278 | case EMACS_ascii_code: | ||
| 1279 | charset = CHARSET_ASCII; | ||
| 1280 | break; | ||
| 1281 | case EMACS_leading_code_2: | ||
| 1282 | if (++src >= src_end) continue; | ||
| 1283 | charset = c; | ||
| 1284 | break; | ||
| 1285 | case EMACS_leading_code_3: | ||
| 1286 | if ((src += 2) >= src_end) continue; | ||
| 1287 | charset = (c < LEADING_CODE_PRIVATE_11 ? c : *(src - 2)); | ||
| 1288 | break; | ||
| 1289 | case EMACS_leading_code_4: | ||
| 1290 | if ((src += 3) >= src_end) continue; | ||
| 1291 | charset = *(src - 3); | ||
| 1292 | break; | ||
| 1293 | default: | ||
| 1294 | continue; | ||
| 1295 | } | ||
| 1296 | reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset); | ||
| 1297 | if (r[reg] < 0 | ||
| 1298 | && CODING_SPEC_ISO_DESIGNATION (coding, reg) != charset) | ||
| 1299 | r[reg] = charset; | ||
| 1300 | } | ||
| 1301 | if (c != '\n' && !coding->last_block) | ||
| 1302 | return -1; | ||
| 1303 | for (reg = 0; reg < 4; reg++) | ||
| 1304 | if (r[reg] >= 0) | ||
| 1305 | ENCODE_DESIGNATION (r[reg], reg, coding); | ||
| 1306 | *dstp = dst; | ||
| 1307 | return 0; | ||
| 1308 | } | ||
| 1309 | |||
| 1267 | /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */ | 1310 | /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */ |
| 1268 | 1311 | ||
| 1269 | int | 1312 | int |
| @@ -1278,10 +1321,10 @@ encode_coding_iso2022 (coding, source, destination, | |||
| 1278 | unsigned char *src_end = source + src_bytes; | 1321 | unsigned char *src_end = source + src_bytes; |
| 1279 | unsigned char *dst = destination; | 1322 | unsigned char *dst = destination; |
| 1280 | unsigned char *dst_end = destination + dst_bytes; | 1323 | unsigned char *dst_end = destination + dst_bytes; |
| 1281 | /* Since the maximum bytes produced by each loop is 6, we subtract 5 | 1324 | /* Since the maximum bytes produced by each loop is 20, we subtract 19 |
| 1282 | from DST_END to assure overflow checking is necessary only at the | 1325 | from DST_END to assure overflow checking is necessary only at the |
| 1283 | head of loop. */ | 1326 | head of loop. */ |
| 1284 | unsigned char *adjusted_dst_end = dst_end - 5; | 1327 | unsigned char *adjusted_dst_end = dst_end - 19; |
| 1285 | 1328 | ||
| 1286 | while (src < src_end && dst < adjusted_dst_end) | 1329 | while (src < src_end && dst < adjusted_dst_end) |
| 1287 | { | 1330 | { |
| @@ -1291,9 +1334,22 @@ encode_coding_iso2022 (coding, source, destination, | |||
| 1291 | TWO_MORE_BYTES, and THREE_MORE_BYTES). In that case, SRC is | 1334 | TWO_MORE_BYTES, and THREE_MORE_BYTES). In that case, SRC is |
| 1292 | reset to SRC_BASE before exiting. */ | 1335 | reset to SRC_BASE before exiting. */ |
| 1293 | unsigned char *src_base = src; | 1336 | unsigned char *src_base = src; |
| 1294 | unsigned char c1 = *src++, c2, c3, c4; | 1337 | unsigned char c1, c2, c3, c4; |
| 1295 | int charset; | 1338 | int charset; |
| 1296 | 1339 | ||
| 1340 | if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL | ||
| 1341 | && CODING_SPEC_ISO_BOL (coding)) | ||
| 1342 | { | ||
| 1343 | /* We have to produce destination sequences now. */ | ||
| 1344 | if (encode_designation_at_bol (coding, src, src_end, &dst) < 0) | ||
| 1345 | /* We can't find end of line in the current block. Let's | ||
| 1346 | repeat encoding starting from the current position | ||
| 1347 | pointed by SRC. */ | ||
| 1348 | break; | ||
| 1349 | CODING_SPEC_ISO_BOL (coding) = 0; | ||
| 1350 | } | ||
| 1351 | |||
| 1352 | c1 = *src++; | ||
| 1297 | /* If we are seeing a component of a composite character, we are | 1353 | /* If we are seeing a component of a composite character, we are |
| 1298 | seeing a leading-code specially encoded for composition, or a | 1354 | seeing a leading-code specially encoded for composition, or a |
| 1299 | composition rule if composing with rule. We must set C1 | 1355 | composition rule if composing with rule. We must set C1 |
| @@ -1339,7 +1395,7 @@ encode_coding_iso2022 (coding, source, destination, | |||
| 1339 | 1395 | ||
| 1340 | case EMACS_control_code: | 1396 | case EMACS_control_code: |
| 1341 | if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL) | 1397 | if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL) |
| 1342 | ENCODE_RESET_PLANE_AND_REGISTER (0); | 1398 | ENCODE_RESET_PLANE_AND_REGISTER; |
| 1343 | *dst++ = c1; | 1399 | *dst++ = c1; |
| 1344 | break; | 1400 | break; |
| 1345 | 1401 | ||
| @@ -1347,7 +1403,7 @@ encode_coding_iso2022 (coding, source, destination, | |||
| 1347 | if (!coding->selective) | 1403 | if (!coding->selective) |
| 1348 | { | 1404 | { |
| 1349 | if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL) | 1405 | if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL) |
| 1350 | ENCODE_RESET_PLANE_AND_REGISTER (0); | 1406 | ENCODE_RESET_PLANE_AND_REGISTER; |
| 1351 | *dst++ = c1; | 1407 | *dst++ = c1; |
| 1352 | break; | 1408 | break; |
| 1353 | } | 1409 | } |
| @@ -1355,7 +1411,11 @@ encode_coding_iso2022 (coding, source, destination, | |||
| 1355 | 1411 | ||
| 1356 | case EMACS_linefeed_code: | 1412 | case EMACS_linefeed_code: |
| 1357 | if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL) | 1413 | if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL) |
| 1358 | ENCODE_RESET_PLANE_AND_REGISTER (1); | 1414 | ENCODE_RESET_PLANE_AND_REGISTER; |
| 1415 | if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL) | ||
| 1416 | bcopy (coding->spec.iso2022.initial_designation, | ||
| 1417 | coding->spec.iso2022.current_designation, | ||
| 1418 | sizeof coding->spec.iso2022.initial_designation); | ||
| 1359 | if (coding->eol_type == CODING_EOL_LF | 1419 | if (coding->eol_type == CODING_EOL_LF |
| 1360 | || coding->eol_type == CODING_EOL_AUTOMATIC) | 1420 | || coding->eol_type == CODING_EOL_AUTOMATIC) |
| 1361 | *dst++ = ISO_CODE_LF; | 1421 | *dst++ = ISO_CODE_LF; |
| @@ -1363,6 +1423,7 @@ encode_coding_iso2022 (coding, source, destination, | |||
| 1363 | *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF; | 1423 | *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF; |
| 1364 | else | 1424 | else |
| 1365 | *dst++ = ISO_CODE_CR; | 1425 | *dst++ = ISO_CODE_CR; |
| 1426 | CODING_SPEC_ISO_BOL (coding) = 1; | ||
| 1366 | break; | 1427 | break; |
| 1367 | 1428 | ||
| 1368 | case EMACS_leading_code_2: | 1429 | case EMACS_leading_code_2: |
| @@ -1418,7 +1479,7 @@ encode_coding_iso2022 (coding, source, destination, | |||
| 1418 | the text although they are not valid characters. */ | 1479 | the text although they are not valid characters. */ |
| 1419 | if (coding->last_block) | 1480 | if (coding->last_block) |
| 1420 | { | 1481 | { |
| 1421 | ENCODE_RESET_PLANE_AND_REGISTER (1); | 1482 | ENCODE_RESET_PLANE_AND_REGISTER; |
| 1422 | bcopy(src, dst, src_end - src); | 1483 | bcopy(src, dst, src_end - src); |
| 1423 | dst += (src_end - src); | 1484 | dst += (src_end - src); |
| 1424 | src = src_end; | 1485 | src = src_end; |
| @@ -1985,11 +2046,10 @@ encode_eol (coding, source, destination, src_bytes, dst_bytes, consumed) | |||
| 1985 | return 0. */ | 2046 | return 0. */ |
| 1986 | 2047 | ||
| 1987 | int | 2048 | int |
| 1988 | setup_coding_system (coding_system_symbol, coding) | 2049 | setup_coding_system (coding_system, coding) |
| 1989 | Lisp_Object coding_system_symbol; | 2050 | Lisp_Object coding_system; |
| 1990 | struct coding_system *coding; | 2051 | struct coding_system *coding; |
| 1991 | { | 2052 | { |
| 1992 | Lisp_Object coding_system_vector = Qnil; | ||
| 1993 | Lisp_Object type, eol_type; | 2053 | Lisp_Object type, eol_type; |
| 1994 | 2054 | ||
| 1995 | /* At first, set several fields default values. */ | 2055 | /* At first, set several fields default values. */ |
| @@ -1999,45 +2059,29 @@ setup_coding_system (coding_system_symbol, coding) | |||
| 1999 | coding->composing = 0; | 2059 | coding->composing = 0; |
| 2000 | coding->direction = 0; | 2060 | coding->direction = 0; |
| 2001 | coding->carryover_size = 0; | 2061 | coding->carryover_size = 0; |
| 2002 | coding->symbol = Qnil; | ||
| 2003 | coding->post_read_conversion = coding->pre_write_conversion = Qnil; | 2062 | coding->post_read_conversion = coding->pre_write_conversion = Qnil; |
| 2004 | 2063 | ||
| 2005 | /* Get value of property `coding-system'. If it is a Lisp symbol | 2064 | Vlast_coding_system_used = coding->symbol = coding_system; |
| 2006 | pointing another coding system, fetch its property until we get a | 2065 | eol_type = Qnil; |
| 2007 | vector. */ | 2066 | /* Get value of property `coding-system' until we get a vector. |
| 2008 | while (!NILP (coding_system_symbol)) | 2067 | While doing that, also get values of properties |
| 2068 | `post-read-conversion', `pre-write-conversion', and `eol-type'. */ | ||
| 2069 | while (!NILP (coding_system) && SYMBOLP (coding_system)) | ||
| 2009 | { | 2070 | { |
| 2010 | coding->symbol = coding_system_symbol; | ||
| 2011 | if (NILP (coding->post_read_conversion)) | 2071 | if (NILP (coding->post_read_conversion)) |
| 2012 | coding->post_read_conversion = Fget (coding_system_symbol, | 2072 | coding->post_read_conversion = Fget (coding_system, |
| 2013 | Qpost_read_conversion); | 2073 | Qpost_read_conversion); |
| 2014 | if (NILP (coding->pre_write_conversion)) | 2074 | if (NILP (coding->pre_write_conversion)) |
| 2015 | coding->pre_write_conversion = Fget (coding_system_symbol, | 2075 | coding->pre_write_conversion = Fget (coding_system, |
| 2016 | Qpre_write_conversion); | 2076 | Qpre_write_conversion); |
| 2017 | 2077 | if (NILP (eol_type)) | |
| 2018 | coding_system_vector = Fget (coding_system_symbol, Qcoding_system); | 2078 | eol_type = Fget (coding_system, Qeol_type); |
| 2019 | if (VECTORP (coding_system_vector)) | 2079 | coding_system = Fget (coding_system, Qcoding_system); |
| 2020 | break; | ||
| 2021 | coding_system_symbol = coding_system_vector; | ||
| 2022 | } | 2080 | } |
| 2023 | Vlast_coding_system_used = coding->symbol; | 2081 | if (!VECTORP (coding_system) |
| 2024 | 2082 | || XVECTOR (coding_system)->size != 5) | |
| 2025 | if (!VECTORP (coding_system_vector) | ||
| 2026 | || XVECTOR (coding_system_vector)->size != 5) | ||
| 2027 | goto label_invalid_coding_system; | 2083 | goto label_invalid_coding_system; |
| 2028 | 2084 | ||
| 2029 | /* Get value of property `eol-type' by searching from the root | ||
| 2030 | coding-system. */ | ||
| 2031 | coding_system_symbol = coding->symbol; | ||
| 2032 | eol_type = Qnil; | ||
| 2033 | while (SYMBOLP (coding_system_symbol) && !NILP (coding_system_symbol)) | ||
| 2034 | { | ||
| 2035 | eol_type = Fget (coding_system_symbol, Qeol_type); | ||
| 2036 | if (!NILP (eol_type)) | ||
| 2037 | break; | ||
| 2038 | coding_system_symbol = Fget (coding_system_symbol, Qcoding_system); | ||
| 2039 | } | ||
| 2040 | |||
| 2041 | if (VECTORP (eol_type)) | 2085 | if (VECTORP (eol_type)) |
| 2042 | coding->eol_type = CODING_EOL_AUTOMATIC; | 2086 | coding->eol_type = CODING_EOL_AUTOMATIC; |
| 2043 | else if (XFASTINT (eol_type) == 1) | 2087 | else if (XFASTINT (eol_type) == 1) |
| @@ -2047,7 +2091,7 @@ setup_coding_system (coding_system_symbol, coding) | |||
| 2047 | else | 2091 | else |
| 2048 | coding->eol_type = CODING_EOL_LF; | 2092 | coding->eol_type = CODING_EOL_LF; |
| 2049 | 2093 | ||
| 2050 | type = XVECTOR (coding_system_vector)->contents[0]; | 2094 | type = XVECTOR (coding_system)->contents[0]; |
| 2051 | switch (XFASTINT (type)) | 2095 | switch (XFASTINT (type)) |
| 2052 | { | 2096 | { |
| 2053 | case 0: | 2097 | case 0: |
| @@ -2061,7 +2105,7 @@ setup_coding_system (coding_system_symbol, coding) | |||
| 2061 | case 2: | 2105 | case 2: |
| 2062 | coding->type = coding_type_iso2022; | 2106 | coding->type = coding_type_iso2022; |
| 2063 | { | 2107 | { |
| 2064 | Lisp_Object val = XVECTOR (coding_system_vector)->contents[4]; | 2108 | Lisp_Object val = XVECTOR (coding_system)->contents[4]; |
| 2065 | Lisp_Object *flags; | 2109 | Lisp_Object *flags; |
| 2066 | int i, charset, default_reg_bits = 0; | 2110 | int i, charset, default_reg_bits = 0; |
| 2067 | 2111 | ||
| @@ -2078,7 +2122,9 @@ setup_coding_system (coding_system_symbol, coding) | |||
| 2078 | | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT) | 2122 | | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT) |
| 2079 | | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN) | 2123 | | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN) |
| 2080 | | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS) | 2124 | | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS) |
| 2081 | | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)); | 2125 | | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION) |
| 2126 | | (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL) | ||
| 2127 | | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL)); | ||
| 2082 | 2128 | ||
| 2083 | /* Invoke graphic register 0 to plane 0. */ | 2129 | /* Invoke graphic register 0 to plane 0. */ |
| 2084 | CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; | 2130 | CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; |
| @@ -2087,6 +2133,8 @@ setup_coding_system (coding_system_symbol, coding) | |||
| 2087 | = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1); | 2133 | = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1); |
| 2088 | /* Not single shifting at first. */ | 2134 | /* Not single shifting at first. */ |
| 2089 | CODING_SPEC_ISO_SINGLE_SHIFTING(coding) = 0; | 2135 | CODING_SPEC_ISO_SINGLE_SHIFTING(coding) = 0; |
| 2136 | /* Beginning of buffer should also be regarded as bol. */ | ||
| 2137 | CODING_SPEC_ISO_BOL(coding) = 1; | ||
| 2090 | 2138 | ||
| 2091 | /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations. | 2139 | /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations. |
| 2092 | FLAGS[REG] can be one of below: | 2140 | FLAGS[REG] can be one of below: |
| @@ -2103,7 +2151,8 @@ setup_coding_system (coding_system_symbol, coding) | |||
| 2103 | for (i = 0; i < 4; i++) | 2151 | for (i = 0; i < 4; i++) |
| 2104 | { | 2152 | { |
| 2105 | if (INTEGERP (flags[i]) | 2153 | if (INTEGERP (flags[i]) |
| 2106 | && (charset = XINT (flags[i]), CHARSET_VALID_P (charset))) | 2154 | && (charset = XINT (flags[i]), CHARSET_VALID_P (charset)) |
| 2155 | || (charset = get_charset_id (flags[i])) >= 0) | ||
| 2107 | { | 2156 | { |
| 2108 | CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset; | 2157 | CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset; |
| 2109 | CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i; | 2158 | CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i; |
| @@ -2119,7 +2168,8 @@ setup_coding_system (coding_system_symbol, coding) | |||
| 2119 | 2168 | ||
| 2120 | if (INTEGERP (XCONS (tail)->car) | 2169 | if (INTEGERP (XCONS (tail)->car) |
| 2121 | && (charset = XINT (XCONS (tail)->car), | 2170 | && (charset = XINT (XCONS (tail)->car), |
| 2122 | CHARSET_VALID_P (charset))) | 2171 | CHARSET_VALID_P (charset)) |
| 2172 | || (charset = get_charset_id (XCONS (tail)->car)) >= 0) | ||
| 2123 | { | 2173 | { |
| 2124 | CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset; | 2174 | CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset; |
| 2125 | CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i; | 2175 | CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i; |
| @@ -2131,7 +2181,8 @@ setup_coding_system (coding_system_symbol, coding) | |||
| 2131 | { | 2181 | { |
| 2132 | if (INTEGERP (XCONS (tail)->car) | 2182 | if (INTEGERP (XCONS (tail)->car) |
| 2133 | && (charset = XINT (XCONS (tail)->car), | 2183 | && (charset = XINT (XCONS (tail)->car), |
| 2134 | CHARSET_VALID_P (charset))) | 2184 | CHARSET_VALID_P (charset)) |
| 2185 | || (charset = get_charset_id (XCONS (tail)->car)) >= 0) | ||
| 2135 | CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) | 2186 | CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) |
| 2136 | = i; | 2187 | = i; |
| 2137 | else if (EQ (XCONS (tail)->car, Qt)) | 2188 | else if (EQ (XCONS (tail)->car, Qt)) |
| @@ -2190,7 +2241,7 @@ setup_coding_system (coding_system_symbol, coding) | |||
| 2190 | case 3: | 2241 | case 3: |
| 2191 | coding->type = coding_type_big5; | 2242 | coding->type = coding_type_big5; |
| 2192 | coding->flags | 2243 | coding->flags |
| 2193 | = (NILP (XVECTOR (coding_system_vector)->contents[4]) | 2244 | = (NILP (XVECTOR (coding_system)->contents[4]) |
| 2194 | ? CODING_FLAG_BIG5_HKU | 2245 | ? CODING_FLAG_BIG5_HKU |
| 2195 | : CODING_FLAG_BIG5_ETEN); | 2246 | : CODING_FLAG_BIG5_ETEN); |
| 2196 | break; | 2247 | break; |
| @@ -2198,7 +2249,7 @@ setup_coding_system (coding_system_symbol, coding) | |||
| 2198 | case 4: | 2249 | case 4: |
| 2199 | coding->type = coding_type_ccl; | 2250 | coding->type = coding_type_ccl; |
| 2200 | { | 2251 | { |
| 2201 | Lisp_Object val = XVECTOR (coding_system_vector)->contents[4]; | 2252 | Lisp_Object val = XVECTOR (coding_system)->contents[4]; |
| 2202 | if (CONSP (val) | 2253 | if (CONSP (val) |
| 2203 | && VECTORP (XCONS (val)->car) | 2254 | && VECTORP (XCONS (val)->car) |
| 2204 | && VECTORP (XCONS (val)->cdr)) | 2255 | && VECTORP (XCONS (val)->cdr)) |
| @@ -2223,6 +2274,8 @@ setup_coding_system (coding_system_symbol, coding) | |||
| 2223 | 2274 | ||
| 2224 | label_invalid_coding_system: | 2275 | label_invalid_coding_system: |
| 2225 | coding->type = coding_type_no_conversion; | 2276 | coding->type = coding_type_no_conversion; |
| 2277 | coding->symbol = coding->pre_write_conversion = coding->post_read_conversion | ||
| 2278 | = Qnil; | ||
| 2226 | return -1; | 2279 | return -1; |
| 2227 | } | 2280 | } |
| 2228 | 2281 | ||
| @@ -2236,52 +2289,52 @@ setup_coding_system (coding_system_symbol, coding) | |||
| 2236 | 2289 | ||
| 2237 | The category for a coding system which has the same code range | 2290 | The category for a coding system which has the same code range |
| 2238 | as Emacs' internal format. Assigned the coding-system (Lisp | 2291 | as Emacs' internal format. Assigned the coding-system (Lisp |
| 2239 | symbol) `coding-system-internal' by default. | 2292 | symbol) `internal' by default. |
| 2240 | 2293 | ||
| 2241 | o coding-category-sjis | 2294 | o coding-category-sjis |
| 2242 | 2295 | ||
| 2243 | The category for a coding system which has the same code range | 2296 | The category for a coding system which has the same code range |
| 2244 | as SJIS. Assigned the coding-system (Lisp | 2297 | as SJIS. Assigned the coding-system (Lisp |
| 2245 | symbol) `coding-system-sjis' by default. | 2298 | symbol) `shift-jis' by default. |
| 2246 | 2299 | ||
| 2247 | o coding-category-iso-7 | 2300 | o coding-category-iso-7 |
| 2248 | 2301 | ||
| 2249 | The category for a coding system which has the same code range | 2302 | The category for a coding system which has the same code range |
| 2250 | as ISO2022 of 7-bit environment. Assigned the coding-system | 2303 | as ISO2022 of 7-bit environment. Assigned the coding-system |
| 2251 | (Lisp symbol) `coding-system-junet' by default. | 2304 | (Lisp symbol) `iso-2022-7' by default. |
| 2252 | 2305 | ||
| 2253 | o coding-category-iso-8-1 | 2306 | o coding-category-iso-8-1 |
| 2254 | 2307 | ||
| 2255 | The category for a coding system which has the same code range | 2308 | The category for a coding system which has the same code range |
| 2256 | as ISO2022 of 8-bit environment and graphic plane 1 used only | 2309 | as ISO2022 of 8-bit environment and graphic plane 1 used only |
| 2257 | for DIMENSION1 charset. Assigned the coding-system (Lisp | 2310 | for DIMENSION1 charset. Assigned the coding-system (Lisp |
| 2258 | symbol) `coding-system-ctext' by default. | 2311 | symbol) `iso-8859-1' by default. |
| 2259 | 2312 | ||
| 2260 | o coding-category-iso-8-2 | 2313 | o coding-category-iso-8-2 |
| 2261 | 2314 | ||
| 2262 | The category for a coding system which has the same code range | 2315 | The category for a coding system which has the same code range |
| 2263 | as ISO2022 of 8-bit environment and graphic plane 1 used only | 2316 | as ISO2022 of 8-bit environment and graphic plane 1 used only |
| 2264 | for DIMENSION2 charset. Assigned the coding-system (Lisp | 2317 | for DIMENSION2 charset. Assigned the coding-system (Lisp |
| 2265 | symbol) `coding-system-euc-japan' by default. | 2318 | symbol) `euc-japan' by default. |
| 2266 | 2319 | ||
| 2267 | o coding-category-iso-else | 2320 | o coding-category-iso-else |
| 2268 | 2321 | ||
| 2269 | The category for a coding system which has the same code range | 2322 | The category for a coding system which has the same code range |
| 2270 | as ISO2022 but not belongs to any of the above three | 2323 | as ISO2022 but not belongs to any of the above three |
| 2271 | categories. Assigned the coding-system (Lisp symbol) | 2324 | categories. Assigned the coding-system (Lisp symbol) |
| 2272 | `coding-system-iso-2022-ss2-7' by default. | 2325 | `iso-2022-ss2-7' by default. |
| 2273 | 2326 | ||
| 2274 | o coding-category-big5 | 2327 | o coding-category-big5 |
| 2275 | 2328 | ||
| 2276 | The category for a coding system which has the same code range | 2329 | The category for a coding system which has the same code range |
| 2277 | as BIG5. Assigned the coding-system (Lisp symbol) | 2330 | as BIG5. Assigned the coding-system (Lisp symbol) |
| 2278 | `coding-system-big5' by default. | 2331 | `cn-big5' by default. |
| 2279 | 2332 | ||
| 2280 | o coding-category-binary | 2333 | o coding-category-binary |
| 2281 | 2334 | ||
| 2282 | The category for a coding system not categorized in any of the | 2335 | The category for a coding system not categorized in any of the |
| 2283 | above. Assigned the coding-system (Lisp symbol) | 2336 | above. Assigned the coding-system (Lisp symbol) |
| 2284 | `coding-system-noconv' by default. | 2337 | `no-conversion' by default. |
| 2285 | 2338 | ||
| 2286 | Each of them is a Lisp symbol and the value is an actual | 2339 | Each of them is a Lisp symbol and the value is an actual |
| 2287 | `coding-system's (this is also a Lisp symbol) assigned by a user. | 2340 | `coding-system's (this is also a Lisp symbol) assigned by a user. |
| @@ -2549,7 +2602,7 @@ encode_coding (coding, source, destination, src_bytes, dst_bytes, consumed) | |||
| 2549 | { | 2602 | { |
| 2550 | unsigned char *p = destination, *pend = destination + produced; | 2603 | unsigned char *p = destination, *pend = destination + produced; |
| 2551 | while (p < pend) | 2604 | while (p < pend) |
| 2552 | if (*p++ = '\015') p[-1] = '\n'; | 2605 | if (*p++ == '\015') p[-1] = '\n'; |
| 2553 | } | 2606 | } |
| 2554 | } | 2607 | } |
| 2555 | *consumed = produced; | 2608 | *consumed = produced; |
| @@ -2687,23 +2740,26 @@ See document of make-coding-system for coding-system object.") | |||
| 2687 | 2740 | ||
| 2688 | DEFUN ("read-non-nil-coding-system", | 2741 | DEFUN ("read-non-nil-coding-system", |
| 2689 | Fread_non_nil_coding_system, Sread_non_nil_coding_system, 1, 1, 0, | 2742 | Fread_non_nil_coding_system, Sread_non_nil_coding_system, 1, 1, 0, |
| 2690 | "Read a coding-system from the minibuffer, prompting with string PROMPT.") | 2743 | "Read a coding system from the minibuffer, prompting with string PROMPT.") |
| 2691 | (prompt) | 2744 | (prompt) |
| 2692 | Lisp_Object prompt; | 2745 | Lisp_Object prompt; |
| 2693 | { | 2746 | { |
| 2694 | return Fintern (Fcompleting_read (prompt, Vobarray, Qcoding_system_vector, | 2747 | Lisp_Object val; |
| 2695 | Qt, Qnil, Qnil), | 2748 | do { |
| 2696 | Qnil); | 2749 | val = Fcompleting_read (prompt, Vobarray, Qcoding_system_vector, |
| 2750 | Qt, Qnil, Qnil); | ||
| 2751 | } while (XSTRING (val)->size == 0); | ||
| 2752 | return (Fintern (val, Qnil)); | ||
| 2697 | } | 2753 | } |
| 2698 | 2754 | ||
| 2699 | DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 1, 0, | 2755 | DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 1, 0, |
| 2700 | "Read a coding-system or nil from the minibuffer, prompting with string PROMPT.") | 2756 | "Read a coding system or nil from the minibuffer, prompting with string PROMPT.") |
| 2701 | (prompt) | 2757 | (prompt) |
| 2702 | Lisp_Object prompt; | 2758 | Lisp_Object prompt; |
| 2703 | { | 2759 | { |
| 2704 | return Fintern (Fcompleting_read (prompt, Vobarray, Qcoding_system_p, | 2760 | Lisp_Object val = Fcompleting_read (prompt, Vobarray, Qcoding_system_p, |
| 2705 | Qt, Qnil, Qnil), | 2761 | Qt, Qnil, Qnil); |
| 2706 | Qnil); | 2762 | return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil)); |
| 2707 | } | 2763 | } |
| 2708 | 2764 | ||
| 2709 | DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system, | 2765 | DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system, |
| @@ -2726,7 +2782,7 @@ DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region, | |||
| 2726 | 2, 2, 0, | 2782 | 2, 2, 0, |
| 2727 | "Detect coding-system of the text in the region between START and END.\n\ | 2783 | "Detect coding-system of the text in the region between START and END.\n\ |
| 2728 | Return a list of possible coding-systems ordered by priority.\n\ | 2784 | Return a list of possible coding-systems ordered by priority.\n\ |
| 2729 | If only ASCII characters are found, it returns `coding-system-automatic'\n\ | 2785 | If only ASCII characters are found, it returns `automatic-conversion'\n\ |
| 2730 | or its subsidiary coding-system according to a detected end-of-line format.") | 2786 | or its subsidiary coding-system according to a detected end-of-line format.") |
| 2731 | (b, e) | 2787 | (b, e) |
| 2732 | Lisp_Object b, e; | 2788 | Lisp_Object b, e; |
| @@ -2744,7 +2800,7 @@ If only ASCII characters are found, it returns `coding-system-automatic'\n\ | |||
| 2744 | 2800 | ||
| 2745 | if (coding_mask == CODING_CATEGORY_MASK_ANY) | 2801 | if (coding_mask == CODING_CATEGORY_MASK_ANY) |
| 2746 | { | 2802 | { |
| 2747 | val = intern ("coding-system-automatic"); | 2803 | val = intern ("automatic-conversion"); |
| 2748 | if (eol_type != CODING_EOL_AUTOMATIC) | 2804 | if (eol_type != CODING_EOL_AUTOMATIC) |
| 2749 | { | 2805 | { |
| 2750 | Lisp_Object val2 = Fget (val, Qeol_type); | 2806 | Lisp_Object val2 = Fget (val, Qeol_type); |
| @@ -2823,9 +2879,24 @@ shrink_conversion_area (begp, endp, coding, encodep) | |||
| 2823 | case coding_type_ccl: | 2879 | case coding_type_ccl: |
| 2824 | /* We can't skip any data. */ | 2880 | /* We can't skip any data. */ |
| 2825 | return; | 2881 | return; |
| 2882 | case coding_type_iso2022: | ||
| 2883 | if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL) | ||
| 2884 | { | ||
| 2885 | unsigned char *bol = beg_addr; | ||
| 2886 | while (beg_addr < end_addr && *beg_addr < 0x80) | ||
| 2887 | { | ||
| 2888 | beg_addr++; | ||
| 2889 | if (*(beg_addr - 1) == '\n') | ||
| 2890 | bol = beg_addr; | ||
| 2891 | } | ||
| 2892 | beg_addr = bol; | ||
| 2893 | goto label_skip_tail; | ||
| 2894 | } | ||
| 2895 | /* fall down ... */ | ||
| 2826 | default: | 2896 | default: |
| 2827 | /* We can skip all ASCII characters at the head and tail. */ | 2897 | /* We can skip all ASCII characters at the head and tail. */ |
| 2828 | while (beg_addr < end_addr && *beg_addr < 0x80) beg_addr++; | 2898 | while (beg_addr < end_addr && *beg_addr < 0x80) beg_addr++; |
| 2899 | label_skip_tail: | ||
| 2829 | while (beg_addr < end_addr && *(end_addr - 1) < 0x80) end_addr--; | 2900 | while (beg_addr < end_addr && *(end_addr - 1) < 0x80) end_addr--; |
| 2830 | break; | 2901 | break; |
| 2831 | } | 2902 | } |
| @@ -2974,8 +3045,8 @@ code_convert_region (b, e, coding, encodep) | |||
| 2974 | } | 3045 | } |
| 2975 | 3046 | ||
| 2976 | Lisp_Object | 3047 | Lisp_Object |
| 2977 | code_convert_string (str, coding, encodep) | 3048 | code_convert_string (str, coding, encodep, nocopy) |
| 2978 | Lisp_Object str; | 3049 | Lisp_Object str, nocopy; |
| 2979 | struct coding_system *coding; | 3050 | struct coding_system *coding; |
| 2980 | int encodep; | 3051 | int encodep; |
| 2981 | { | 3052 | { |
| @@ -3014,7 +3085,7 @@ code_convert_string (str, coding, encodep) | |||
| 3014 | 3085 | ||
| 3015 | if (begp == endp) | 3086 | if (begp == endp) |
| 3016 | /* We need no conversion. */ | 3087 | /* We need no conversion. */ |
| 3017 | return str; | 3088 | return (NILP (nocopy) ? Fcopy_sequence (str) : str); |
| 3018 | 3089 | ||
| 3019 | head_skip = begp - XSTRING (str)->data; | 3090 | head_skip = begp - XSTRING (str)->data; |
| 3020 | tail_skip = XSTRING (str)->size - head_skip - (endp - begp); | 3091 | tail_skip = XSTRING (str)->size - head_skip - (endp - begp); |
| @@ -3044,8 +3115,10 @@ code_convert_string (str, coding, encodep) | |||
| 3044 | } | 3115 | } |
| 3045 | 3116 | ||
| 3046 | DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region, | 3117 | DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region, |
| 3047 | 3, 3, 0, | 3118 | 3, 3, "r\nzCoding system: ", |
| 3048 | "Decode the text between START and END which is encoded in CODING-SYSTEM.\n\ | 3119 | "Decode current region by specified coding system.\n\ |
| 3120 | When called from a program, takes three arguments:\n\ | ||
| 3121 | START, END, and CODING-SYSTEM. START END are buffer positions.\n\ | ||
| 3049 | Return length of decoded text.") | 3122 | Return length of decoded text.") |
| 3050 | (b, e, coding_system) | 3123 | (b, e, coding_system) |
| 3051 | Lisp_Object b, e, coding_system; | 3124 | Lisp_Object b, e, coding_system; |
| @@ -3056,6 +3129,8 @@ Return length of decoded text.") | |||
| 3056 | CHECK_NUMBER_COERCE_MARKER (e, 1); | 3129 | CHECK_NUMBER_COERCE_MARKER (e, 1); |
| 3057 | CHECK_SYMBOL (coding_system, 2); | 3130 | CHECK_SYMBOL (coding_system, 2); |
| 3058 | 3131 | ||
| 3132 | if (NILP (coding_system)) | ||
| 3133 | return make_number (XFASTINT (e) - XFASTINT (b)); | ||
| 3059 | if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0) | 3134 | if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0) |
| 3060 | error ("Invalid coding-system: %s", XSYMBOL (coding_system)->name->data); | 3135 | error ("Invalid coding-system: %s", XSYMBOL (coding_system)->name->data); |
| 3061 | 3136 | ||
| @@ -3063,8 +3138,10 @@ Return length of decoded text.") | |||
| 3063 | } | 3138 | } |
| 3064 | 3139 | ||
| 3065 | DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region, | 3140 | DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region, |
| 3066 | 3, 3, 0, | 3141 | 3, 3, "r\nzCoding system: ", |
| 3067 | "Encode the text between START and END to CODING-SYSTEM.\n\ | 3142 | "Encode current region by specified coding system.\n\ |
| 3143 | When called from a program, takes three arguments:\n\ | ||
| 3144 | START, END, and CODING-SYSTEM. START END are buffer positions.\n\ | ||
| 3068 | Return length of encoded text.") | 3145 | Return length of encoded text.") |
| 3069 | (b, e, coding_system) | 3146 | (b, e, coding_system) |
| 3070 | Lisp_Object b, e, coding_system; | 3147 | Lisp_Object b, e, coding_system; |
| @@ -3075,6 +3152,8 @@ Return length of encoded text.") | |||
| 3075 | CHECK_NUMBER_COERCE_MARKER (e, 1); | 3152 | CHECK_NUMBER_COERCE_MARKER (e, 1); |
| 3076 | CHECK_SYMBOL (coding_system, 2); | 3153 | CHECK_SYMBOL (coding_system, 2); |
| 3077 | 3154 | ||
| 3155 | if (NILP (coding_system)) | ||
| 3156 | return make_number (XFASTINT (e) - XFASTINT (b)); | ||
| 3078 | if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0) | 3157 | if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0) |
| 3079 | error ("Invalid coding-system: %s", XSYMBOL (coding_system)->name->data); | 3158 | error ("Invalid coding-system: %s", XSYMBOL (coding_system)->name->data); |
| 3080 | 3159 | ||
| @@ -3082,41 +3161,49 @@ Return length of encoded text.") | |||
| 3082 | } | 3161 | } |
| 3083 | 3162 | ||
| 3084 | DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string, | 3163 | DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string, |
| 3085 | 2, 2, 0, | 3164 | 2, 3, 0, |
| 3086 | "Decode STRING which is encoded in CODING-SYSTEM, and return the result.") | 3165 | "Decode STRING which is encoded in CODING-SYSTEM, and return the result.\n\ |
| 3087 | (string, coding_system) | 3166 | Optional arg NOCOPY non-nil means return STRING itself if there's no need\n\ |
| 3088 | Lisp_Object string, coding_system; | 3167 | of decoding.") |
| 3168 | (string, coding_system, nocopy) | ||
| 3169 | Lisp_Object string, coding_system, nocopy; | ||
| 3089 | { | 3170 | { |
| 3090 | struct coding_system coding; | 3171 | struct coding_system coding; |
| 3091 | 3172 | ||
| 3092 | CHECK_STRING (string, 0); | 3173 | CHECK_STRING (string, 0); |
| 3093 | CHECK_SYMBOL (coding_system, 1); | 3174 | CHECK_SYMBOL (coding_system, 1); |
| 3094 | 3175 | ||
| 3176 | if (NILP (coding_system)) | ||
| 3177 | return (NILP (nocopy) ? Fcopy_sequence (string) : string); | ||
| 3095 | if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0) | 3178 | if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0) |
| 3096 | error ("Invalid coding-system: %s", XSYMBOL (coding_system)->name->data); | 3179 | error ("Invalid coding-system: %s", XSYMBOL (coding_system)->name->data); |
| 3097 | 3180 | ||
| 3098 | return code_convert_string (string, &coding, 0); | 3181 | return code_convert_string (string, &coding, 0, nocopy); |
| 3099 | } | 3182 | } |
| 3100 | 3183 | ||
| 3101 | DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string, | 3184 | DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string, |
| 3102 | 2, 2, 0, | 3185 | 2, 3, 0, |
| 3103 | "Encode STRING to CODING-SYSTEM, and return the result.") | 3186 | "Encode STRING to CODING-SYSTEM, and return the result.\n\ |
| 3104 | (string, coding_system) | 3187 | Optional arg NOCOPY non-nil means return STRING itself if there's no need\n\ |
| 3105 | Lisp_Object string, coding_system; | 3188 | of encoding.") |
| 3189 | (string, coding_system, nocopy) | ||
| 3190 | Lisp_Object string, coding_system, nocopy; | ||
| 3106 | { | 3191 | { |
| 3107 | struct coding_system coding; | 3192 | struct coding_system coding; |
| 3108 | 3193 | ||
| 3109 | CHECK_STRING (string, 0); | 3194 | CHECK_STRING (string, 0); |
| 3110 | CHECK_SYMBOL (coding_system, 1); | 3195 | CHECK_SYMBOL (coding_system, 1); |
| 3111 | 3196 | ||
| 3197 | if (NILP (coding_system)) | ||
| 3198 | return (NILP (nocopy) ? Fcopy_sequence (string) : string); | ||
| 3112 | if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0) | 3199 | if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0) |
| 3113 | error ("Invalid coding-system: %s", XSYMBOL (coding_system)->name->data); | 3200 | error ("Invalid coding-system: %s", XSYMBOL (coding_system)->name->data); |
| 3114 | 3201 | ||
| 3115 | return code_convert_string (string, &coding, 1); | 3202 | return code_convert_string (string, &coding, 1, nocopy); |
| 3116 | } | 3203 | } |
| 3117 | 3204 | ||
| 3118 | DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0, | 3205 | DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0, |
| 3119 | "Decode a JISX0208 character of SJIS coding-system-sjis.\n\ | 3206 | "Decode a JISX0208 character of shift-jis encoding.\n\ |
| 3120 | CODE is the character code in SJIS.\n\ | 3207 | CODE is the character code in SJIS.\n\ |
| 3121 | Return the corresponding character.") | 3208 | Return the corresponding character.") |
| 3122 | (code) | 3209 | (code) |
| @@ -3255,7 +3342,7 @@ For each OPERATION, TARGET is selected from the arguments as below:\n\ | |||
| 3255 | \n\ | 3342 | \n\ |
| 3256 | The return value is a cons of coding systems for decoding and encoding\n\ | 3343 | The return value is a cons of coding systems for decoding and encoding\n\ |
| 3257 | registered in nested alist `coding-system-alist' (which see) at a slot\n\ | 3344 | registered in nested alist `coding-system-alist' (which see) at a slot\n\ |
| 3258 | corresponding to OPERATION and TARGET. | 3345 | corresponding to OPERATION and TARGET.\n\ |
| 3259 | If a function symbol is at the slot, return a result of the function call.\n\ | 3346 | If a function symbol is at the slot, return a result of the function call.\n\ |
| 3260 | The function is called with one argument, a list of all the arguments.") | 3347 | The function is called with one argument, a list of all the arguments.") |
| 3261 | (nargs, args) | 3348 | (nargs, args) |
| @@ -3346,6 +3433,39 @@ init_coding_once () | |||
| 3346 | iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3; | 3433 | iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3; |
| 3347 | iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer; | 3434 | iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer; |
| 3348 | 3435 | ||
| 3436 | conversion_buffer_size = MINIMUM_CONVERSION_BUFFER_SIZE; | ||
| 3437 | conversion_buffer = (char *) xmalloc (MINIMUM_CONVERSION_BUFFER_SIZE); | ||
| 3438 | |||
| 3439 | setup_coding_system (Qnil, &keyboard_coding); | ||
| 3440 | setup_coding_system (Qnil, &terminal_coding); | ||
| 3441 | } | ||
| 3442 | |||
| 3443 | #ifdef emacs | ||
| 3444 | |||
| 3445 | syms_of_coding () | ||
| 3446 | { | ||
| 3447 | Qtarget_idx = intern ("target-idx"); | ||
| 3448 | staticpro (&Qtarget_idx); | ||
| 3449 | |||
| 3450 | Fput (Qinsert_file_contents, Qtarget_idx, make_number (0)); | ||
| 3451 | Fput (Qwrite_region, Qtarget_idx, make_number (2)); | ||
| 3452 | |||
| 3453 | Qcall_process = intern ("call-process"); | ||
| 3454 | staticpro (&Qcall_process); | ||
| 3455 | Fput (Qcall_process, Qtarget_idx, make_number (0)); | ||
| 3456 | |||
| 3457 | Qcall_process_region = intern ("call-process-region"); | ||
| 3458 | staticpro (&Qcall_process_region); | ||
| 3459 | Fput (Qcall_process_region, Qtarget_idx, make_number (2)); | ||
| 3460 | |||
| 3461 | Qstart_process = intern ("start-process"); | ||
| 3462 | staticpro (&Qstart_process); | ||
| 3463 | Fput (Qstart_process, Qtarget_idx, make_number (2)); | ||
| 3464 | |||
| 3465 | Qopen_network_stream = intern ("open-network-stream"); | ||
| 3466 | staticpro (&Qopen_network_stream); | ||
| 3467 | Fput (Qopen_network_stream, Qtarget_idx, make_number (3)); | ||
| 3468 | |||
| 3349 | Qcoding_system = intern ("coding-system"); | 3469 | Qcoding_system = intern ("coding-system"); |
| 3350 | staticpro (&Qcoding_system); | 3470 | staticpro (&Qcoding_system); |
| 3351 | 3471 | ||
| @@ -3389,39 +3509,6 @@ init_coding_once () | |||
| 3389 | } | 3509 | } |
| 3390 | } | 3510 | } |
| 3391 | 3511 | ||
| 3392 | conversion_buffer_size = MINIMUM_CONVERSION_BUFFER_SIZE; | ||
| 3393 | conversion_buffer = (char *) xmalloc (MINIMUM_CONVERSION_BUFFER_SIZE); | ||
| 3394 | |||
| 3395 | setup_coding_system (Qnil, &keyboard_coding); | ||
| 3396 | setup_coding_system (Qnil, &terminal_coding); | ||
| 3397 | } | ||
| 3398 | |||
| 3399 | #ifdef emacs | ||
| 3400 | |||
| 3401 | syms_of_coding () | ||
| 3402 | { | ||
| 3403 | Qtarget_idx = intern ("target-idx"); | ||
| 3404 | staticpro (&Qtarget_idx); | ||
| 3405 | |||
| 3406 | Fput (Qinsert_file_contents, Qtarget_idx, make_number (0)); | ||
| 3407 | Fput (Qwrite_region, Qtarget_idx, make_number (2)); | ||
| 3408 | |||
| 3409 | Qcall_process = intern ("call-process"); | ||
| 3410 | staticpro (&Qcall_process); | ||
| 3411 | Fput (Qcall_process, Qtarget_idx, make_number (0)); | ||
| 3412 | |||
| 3413 | Qcall_process_region = intern ("call-process-region"); | ||
| 3414 | staticpro (&Qcall_process_region); | ||
| 3415 | Fput (Qcall_process_region, Qtarget_idx, make_number (2)); | ||
| 3416 | |||
| 3417 | Qstart_process = intern ("start-process"); | ||
| 3418 | staticpro (&Qstart_process); | ||
| 3419 | Fput (Qstart_process, Qtarget_idx, make_number (2)); | ||
| 3420 | |||
| 3421 | Qopen_network_stream = intern ("open-network-stream"); | ||
| 3422 | staticpro (&Qopen_network_stream); | ||
| 3423 | Fput (Qopen_network_stream, Qtarget_idx, make_number (3)); | ||
| 3424 | |||
| 3425 | defsubr (&Scoding_system_vector); | 3512 | defsubr (&Scoding_system_vector); |
| 3426 | defsubr (&Scoding_system_p); | 3513 | defsubr (&Scoding_system_p); |
| 3427 | defsubr (&Sread_coding_system); | 3514 | defsubr (&Sread_coding_system); |
| @@ -3472,7 +3559,7 @@ If not, an appropriate element in `coding-system-alist' (which see) is used."); | |||
| 3472 | DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist, | 3559 | DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist, |
| 3473 | "Nested alist to decide a coding system for a specific I/O operation.\n\ | 3560 | "Nested alist to decide a coding system for a specific I/O operation.\n\ |
| 3474 | The format is ((OPERATION . ((REGEXP . CODING-SYSTEMS) ...)) ...).\n\ | 3561 | The format is ((OPERATION . ((REGEXP . CODING-SYSTEMS) ...)) ...).\n\ |
| 3475 | 3562 | \n\ | |
| 3476 | OPERATION is one of the following Emacs I/O primitives:\n\ | 3563 | OPERATION is one of the following Emacs I/O primitives:\n\ |
| 3477 | For file I/O, insert-file-contents and write-region.\n\ | 3564 | For file I/O, insert-file-contents and write-region.\n\ |
| 3478 | For process I/O, call-process, call-process-region, and start-process.\n\ | 3565 | For process I/O, call-process, call-process-region, and start-process.\n\ |