diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/ccl.c | 514 |
1 files changed, 182 insertions, 332 deletions
| @@ -2,6 +2,9 @@ | |||
| 2 | Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN. | 2 | Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN. |
| 3 | Copyright (C) 2001 Free Software Foundation, Inc. | 3 | Copyright (C) 2001 Free Software Foundation, Inc. |
| 4 | Licensed to the Free Software Foundation. | 4 | Licensed to the Free Software Foundation. |
| 5 | Copyright (C) 2001, 2002 | ||
| 6 | National Institute of Advanced Industrial Science and Technology (AIST) | ||
| 7 | Registration Number H13PRO009 | ||
| 5 | 8 | ||
| 6 | This file is part of GNU Emacs. | 9 | This file is part of GNU Emacs. |
| 7 | 10 | ||
| @@ -29,6 +32,7 @@ Boston, MA 02111-1307, USA. */ | |||
| 29 | #ifdef emacs | 32 | #ifdef emacs |
| 30 | 33 | ||
| 31 | #include "lisp.h" | 34 | #include "lisp.h" |
| 35 | #include "character.h" | ||
| 32 | #include "charset.h" | 36 | #include "charset.h" |
| 33 | #include "ccl.h" | 37 | #include "ccl.h" |
| 34 | #include "coding.h" | 38 | #include "coding.h" |
| @@ -39,6 +43,8 @@ Boston, MA 02111-1307, USA. */ | |||
| 39 | 43 | ||
| 40 | #endif /* not emacs */ | 44 | #endif /* not emacs */ |
| 41 | 45 | ||
| 46 | Lisp_Object Qccl, Qcclp; | ||
| 47 | |||
| 42 | /* This contains all code conversion map available to CCL. */ | 48 | /* This contains all code conversion map available to CCL. */ |
| 43 | Lisp_Object Vcode_conversion_map_vector; | 49 | Lisp_Object Vcode_conversion_map_vector; |
| 44 | 50 | ||
| @@ -711,56 +717,24 @@ while(0) | |||
| 711 | 717 | ||
| 712 | /* Encode one character CH to multibyte form and write to the current | 718 | /* Encode one character CH to multibyte form and write to the current |
| 713 | output buffer. If CH is less than 256, CH is written as is. */ | 719 | output buffer. If CH is less than 256, CH is written as is. */ |
| 714 | #define CCL_WRITE_CHAR(ch) \ | 720 | #define CCL_WRITE_CHAR(ch) \ |
| 715 | do { \ | 721 | do { \ |
| 716 | int bytes = SINGLE_BYTE_CHAR_P (ch) ? 1: CHAR_BYTES (ch); \ | 722 | if (! dst) \ |
| 717 | if (!dst) \ | 723 | CCL_INVALID_CMD; \ |
| 718 | CCL_INVALID_CMD; \ | 724 | else if (dst < dst_end) \ |
| 719 | else if (dst + bytes + extra_bytes < (dst_bytes ? dst_end : src)) \ | 725 | *dst++ = (ch); \ |
| 720 | { \ | 726 | else \ |
| 721 | if (bytes == 1) \ | 727 | CCL_SUSPEND (CCL_STAT_SUSPEND_BY_DST); \ |
| 722 | { \ | ||
| 723 | *dst++ = (ch); \ | ||
| 724 | if ((ch) >= 0x80 && (ch) < 0xA0) \ | ||
| 725 | /* We may have to convert this eight-bit char to \ | ||
| 726 | multibyte form later. */ \ | ||
| 727 | extra_bytes++; \ | ||
| 728 | } \ | ||
| 729 | else if (CHAR_VALID_P (ch, 0)) \ | ||
| 730 | dst += CHAR_STRING (ch, dst); \ | ||
| 731 | else \ | ||
| 732 | CCL_INVALID_CMD; \ | ||
| 733 | } \ | ||
| 734 | else \ | ||
| 735 | CCL_SUSPEND (CCL_STAT_SUSPEND_BY_DST); \ | ||
| 736 | } while (0) | ||
| 737 | |||
| 738 | /* Encode one character CH to multibyte form and write to the current | ||
| 739 | output buffer. The output bytes always forms a valid multibyte | ||
| 740 | sequence. */ | ||
| 741 | #define CCL_WRITE_MULTIBYTE_CHAR(ch) \ | ||
| 742 | do { \ | ||
| 743 | int bytes = CHAR_BYTES (ch); \ | ||
| 744 | if (!dst) \ | ||
| 745 | CCL_INVALID_CMD; \ | ||
| 746 | else if (dst + bytes + extra_bytes < (dst_bytes ? dst_end : src)) \ | ||
| 747 | { \ | ||
| 748 | if (CHAR_VALID_P ((ch), 0)) \ | ||
| 749 | dst += CHAR_STRING ((ch), dst); \ | ||
| 750 | else \ | ||
| 751 | CCL_INVALID_CMD; \ | ||
| 752 | } \ | ||
| 753 | else \ | ||
| 754 | CCL_SUSPEND (CCL_STAT_SUSPEND_BY_DST); \ | ||
| 755 | } while (0) | 728 | } while (0) |
| 756 | 729 | ||
| 757 | /* Write a string at ccl_prog[IC] of length LEN to the current output | 730 | /* Write a string at ccl_prog[IC] of length LEN to the current output |
| 758 | buffer. */ | 731 | buffer. */ |
| 759 | #define CCL_WRITE_STRING(len) \ | 732 | #define CCL_WRITE_STRING(len) \ |
| 760 | do { \ | 733 | do { \ |
| 734 | int i; \ | ||
| 761 | if (!dst) \ | 735 | if (!dst) \ |
| 762 | CCL_INVALID_CMD; \ | 736 | CCL_INVALID_CMD; \ |
| 763 | else if (dst + len <= (dst_bytes ? dst_end : src)) \ | 737 | else if (dst + len <= dst_end) \ |
| 764 | for (i = 0; i < len; i++) \ | 738 | for (i = 0; i < len; i++) \ |
| 765 | *dst++ = ((XFASTINT (ccl_prog[ic + (i / 3)])) \ | 739 | *dst++ = ((XFASTINT (ccl_prog[ic + (i / 3)])) \ |
| 766 | >> ((2 - (i % 3)) * 8)) & 0xFF; \ | 740 | >> ((2 - (i % 3)) * 8)) & 0xFF; \ |
| @@ -768,78 +742,30 @@ while(0) | |||
| 768 | CCL_SUSPEND (CCL_STAT_SUSPEND_BY_DST); \ | 742 | CCL_SUSPEND (CCL_STAT_SUSPEND_BY_DST); \ |
| 769 | } while (0) | 743 | } while (0) |
| 770 | 744 | ||
| 771 | /* Read one byte from the current input buffer into REGth register. */ | 745 | /* Read one byte from the current input buffer into Rth register. */ |
| 772 | #define CCL_READ_CHAR(REG) \ | 746 | #define CCL_READ_CHAR(r) \ |
| 773 | do { \ | 747 | do { \ |
| 774 | if (!src) \ | 748 | if (! src) \ |
| 775 | CCL_INVALID_CMD; \ | 749 | CCL_INVALID_CMD; \ |
| 776 | else if (src < src_end) \ | 750 | else if (src < src_end) \ |
| 777 | { \ | 751 | r = *src++; \ |
| 778 | REG = *src++; \ | 752 | else if (ccl->last_block) \ |
| 779 | if (REG == '\n' \ | 753 | { \ |
| 780 | && ccl->eol_type != CODING_EOL_LF) \ | 754 | ic = ccl->eof_ic; \ |
| 781 | { \ | 755 | goto ccl_repeat; \ |
| 782 | /* We are encoding. */ \ | 756 | } \ |
| 783 | if (ccl->eol_type == CODING_EOL_CRLF) \ | 757 | else \ |
| 784 | { \ | 758 | CCL_SUSPEND (CCL_STAT_SUSPEND_BY_SRC); \ |
| 785 | if (ccl->cr_consumed) \ | 759 | } while (0) |
| 786 | ccl->cr_consumed = 0; \ | 760 | |
| 787 | else \ | 761 | |
| 788 | { \ | 762 | /* Execute CCL code on characters at SOURCE (length SRC_SIZE). The |
| 789 | ccl->cr_consumed = 1; \ | 763 | resulting text goes to a place pointed by DESTINATION, the length |
| 790 | REG = '\r'; \ | 764 | of which should not exceed DST_SIZE. As a side effect, how many |
| 791 | src--; \ | 765 | characters are consumed and produced are recorded in CCL->consumed |
| 792 | } \ | 766 | and CCL->produced, and the contents of CCL registers are updated. |
| 793 | } \ | 767 | If SOURCE or DESTINATION is NULL, only operations on registers are |
| 794 | else \ | 768 | permitted. */ |
| 795 | REG = '\r'; \ | ||
| 796 | } \ | ||
| 797 | if (REG == LEADING_CODE_8_BIT_CONTROL \ | ||
| 798 | && ccl->multibyte) \ | ||
| 799 | REG = *src++ - 0x20; \ | ||
| 800 | } \ | ||
| 801 | else if (ccl->last_block) \ | ||
| 802 | { \ | ||
| 803 | ic = ccl->eof_ic; \ | ||
| 804 | goto ccl_repeat; \ | ||
| 805 | } \ | ||
| 806 | else \ | ||
| 807 | CCL_SUSPEND (CCL_STAT_SUSPEND_BY_SRC); \ | ||
| 808 | } while (0) | ||
| 809 | |||
| 810 | |||
| 811 | /* Set C to the character code made from CHARSET and CODE. This is | ||
| 812 | like MAKE_CHAR but check the validity of CHARSET and CODE. If they | ||
| 813 | are not valid, set C to (CODE & 0xFF) because that is usually the | ||
| 814 | case that CCL_ReadMultibyteChar2 read an invalid code and it set | ||
| 815 | CODE to that invalid byte. */ | ||
| 816 | |||
| 817 | #define CCL_MAKE_CHAR(charset, code, c) \ | ||
| 818 | do { \ | ||
| 819 | if (charset == CHARSET_ASCII) \ | ||
| 820 | c = code & 0xFF; \ | ||
| 821 | else if (CHARSET_DEFINED_P (charset) \ | ||
| 822 | && (code & 0x7F) >= 32 \ | ||
| 823 | && (code < 256 || ((code >> 7) & 0x7F) >= 32)) \ | ||
| 824 | { \ | ||
| 825 | int c1 = code & 0x7F, c2 = 0; \ | ||
| 826 | \ | ||
| 827 | if (code >= 256) \ | ||
| 828 | c2 = c1, c1 = (code >> 7) & 0x7F; \ | ||
| 829 | c = MAKE_CHAR (charset, c1, c2); \ | ||
| 830 | } \ | ||
| 831 | else \ | ||
| 832 | c = code & 0xFF; \ | ||
| 833 | } while (0) | ||
| 834 | |||
| 835 | |||
| 836 | /* Execute CCL code on SRC_BYTES length text at SOURCE. The resulting | ||
| 837 | text goes to a place pointed by DESTINATION, the length of which | ||
| 838 | should not exceed DST_BYTES. The bytes actually processed is | ||
| 839 | returned as *CONSUMED. The return value is the length of the | ||
| 840 | resulting text. As a side effect, the contents of CCL registers | ||
| 841 | are updated. If SOURCE or DESTINATION is NULL, only operations on | ||
| 842 | registers are permitted. */ | ||
| 843 | 769 | ||
| 844 | #ifdef CCL_DEBUG | 770 | #ifdef CCL_DEBUG |
| 845 | #define CCL_DEBUG_BACKTRACE_LEN 256 | 771 | #define CCL_DEBUG_BACKTRACE_LEN 256 |
| @@ -856,34 +782,29 @@ struct ccl_prog_stack | |||
| 856 | /* For the moment, we only support depth 256 of stack. */ | 782 | /* For the moment, we only support depth 256 of stack. */ |
| 857 | static struct ccl_prog_stack ccl_prog_stack_struct[256]; | 783 | static struct ccl_prog_stack ccl_prog_stack_struct[256]; |
| 858 | 784 | ||
| 859 | int | 785 | void |
| 860 | ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed) | 786 | ccl_driver (ccl, source, destination, src_size, dst_size) |
| 861 | struct ccl_program *ccl; | 787 | struct ccl_program *ccl; |
| 862 | unsigned char *source, *destination; | 788 | int *source, *destination; |
| 863 | int src_bytes, dst_bytes; | 789 | int src_size, dst_size; |
| 864 | int *consumed; | ||
| 865 | { | 790 | { |
| 866 | register int *reg = ccl->reg; | 791 | register int *reg = ccl->reg; |
| 867 | register int ic = ccl->ic; | 792 | register int ic = ccl->ic; |
| 868 | register int code = 0, field1, field2; | 793 | register int code = 0, field1, field2; |
| 869 | register Lisp_Object *ccl_prog = ccl->prog; | 794 | register Lisp_Object *ccl_prog = ccl->prog; |
| 870 | unsigned char *src = source, *src_end = src + src_bytes; | 795 | int *src = source, *src_end = src + src_size; |
| 871 | unsigned char *dst = destination, *dst_end = dst + dst_bytes; | 796 | int *dst = destination, *dst_end = dst + dst_size; |
| 872 | int jump_address; | 797 | int jump_address; |
| 873 | int i = 0, j, op; | 798 | int i = 0, j, op; |
| 874 | int stack_idx = ccl->stack_idx; | 799 | int stack_idx = ccl->stack_idx; |
| 875 | /* Instruction counter of the current CCL code. */ | 800 | /* Instruction counter of the current CCL code. */ |
| 876 | int this_ic = 0; | 801 | int this_ic = 0; |
| 877 | /* CCL_WRITE_CHAR will produce 8-bit code of range 0x80..0x9F. But, | 802 | struct charset *charset; |
| 878 | each of them will be converted to multibyte form of 2-byte | ||
| 879 | sequence. For that conversion, we remember how many more bytes | ||
| 880 | we must keep in DESTINATION in this variable. */ | ||
| 881 | int extra_bytes = 0; | ||
| 882 | 803 | ||
| 883 | if (ic >= ccl->eof_ic) | 804 | if (ic >= ccl->eof_ic) |
| 884 | ic = CCL_HEADER_MAIN; | 805 | ic = CCL_HEADER_MAIN; |
| 885 | 806 | ||
| 886 | if (ccl->buf_magnification == 0) /* We can't produce any bytes. */ | 807 | if (ccl->buf_magnification == 0) /* We can't read/produce any bytes. */ |
| 887 | dst = NULL; | 808 | dst = NULL; |
| 888 | 809 | ||
| 889 | /* Set mapping stack pointer. */ | 810 | /* Set mapping stack pointer. */ |
| @@ -908,8 +829,8 @@ ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed) | |||
| 908 | /* We can't just signal Qquit, instead break the loop as if | 829 | /* We can't just signal Qquit, instead break the loop as if |
| 909 | the whole data is processed. Don't reset Vquit_flag, it | 830 | the whole data is processed. Don't reset Vquit_flag, it |
| 910 | must be handled later at a safer place. */ | 831 | must be handled later at a safer place. */ |
| 911 | if (consumed) | 832 | if (src) |
| 912 | src = source + src_bytes; | 833 | src = source + src_size; |
| 913 | ccl->status = CCL_STAT_QUIT; | 834 | ccl->status = CCL_STAT_QUIT; |
| 914 | break; | 835 | break; |
| 915 | } | 836 | } |
| @@ -1224,8 +1145,22 @@ ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed) | |||
| 1224 | case CCL_LE: reg[rrr] = i <= j; break; | 1145 | case CCL_LE: reg[rrr] = i <= j; break; |
| 1225 | case CCL_GE: reg[rrr] = i >= j; break; | 1146 | case CCL_GE: reg[rrr] = i >= j; break; |
| 1226 | case CCL_NE: reg[rrr] = i != j; break; | 1147 | case CCL_NE: reg[rrr] = i != j; break; |
| 1227 | case CCL_DECODE_SJIS: DECODE_SJIS (i, j, reg[rrr], reg[7]); break; | 1148 | case CCL_DECODE_SJIS: |
| 1228 | case CCL_ENCODE_SJIS: ENCODE_SJIS (i, j, reg[rrr], reg[7]); break; | 1149 | { |
| 1150 | i = (i << 8) | j; | ||
| 1151 | SJIS_TO_JIS (i); | ||
| 1152 | reg[rrr] = i >> 8; | ||
| 1153 | reg[7] = i & 0xFF; | ||
| 1154 | break; | ||
| 1155 | } | ||
| 1156 | case CCL_ENCODE_SJIS: | ||
| 1157 | { | ||
| 1158 | i = (i << 8) | j; | ||
| 1159 | JIS_TO_SJIS (i); | ||
| 1160 | reg[rrr] = i >> 8; | ||
| 1161 | reg[7] = i & 0xFF; | ||
| 1162 | break; | ||
| 1163 | } | ||
| 1229 | default: CCL_INVALID_CMD; | 1164 | default: CCL_INVALID_CMD; |
| 1230 | } | 1165 | } |
| 1231 | code &= 0x1F; | 1166 | code &= 0x1F; |
| @@ -1245,165 +1180,38 @@ ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed) | |||
| 1245 | case CCL_ReadMultibyteChar2: | 1180 | case CCL_ReadMultibyteChar2: |
| 1246 | if (!src) | 1181 | if (!src) |
| 1247 | CCL_INVALID_CMD; | 1182 | CCL_INVALID_CMD; |
| 1248 | 1183 | CCL_READ_CHAR (i); | |
| 1249 | if (src >= src_end) | 1184 | charset = CHAR_CHARSET (i); |
| 1250 | { | 1185 | reg[rrr] = CHARSET_ID (charset); |
| 1251 | src++; | 1186 | reg[RRR] = ENCODE_CHAR (charset, i); |
| 1252 | goto ccl_read_multibyte_character_suspend; | ||
| 1253 | } | ||
| 1254 | |||
| 1255 | if (!ccl->multibyte) | ||
| 1256 | { | ||
| 1257 | int bytes; | ||
| 1258 | if (!UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)) | ||
| 1259 | { | ||
| 1260 | reg[RRR] = CHARSET_8_BIT_CONTROL; | ||
| 1261 | reg[rrr] = *src++; | ||
| 1262 | break; | ||
| 1263 | } | ||
| 1264 | } | ||
| 1265 | i = *src++; | ||
| 1266 | if (i == '\n' && ccl->eol_type != CODING_EOL_LF) | ||
| 1267 | { | ||
| 1268 | /* We are encoding. */ | ||
| 1269 | if (ccl->eol_type == CODING_EOL_CRLF) | ||
| 1270 | { | ||
| 1271 | if (ccl->cr_consumed) | ||
| 1272 | ccl->cr_consumed = 0; | ||
| 1273 | else | ||
| 1274 | { | ||
| 1275 | ccl->cr_consumed = 1; | ||
| 1276 | i = '\r'; | ||
| 1277 | src--; | ||
| 1278 | } | ||
| 1279 | } | ||
| 1280 | else | ||
| 1281 | i = '\r'; | ||
| 1282 | reg[rrr] = i; | ||
| 1283 | reg[RRR] = CHARSET_ASCII; | ||
| 1284 | } | ||
| 1285 | else if (i < 0x80) | ||
| 1286 | { | ||
| 1287 | /* ASCII */ | ||
| 1288 | reg[rrr] = i; | ||
| 1289 | reg[RRR] = CHARSET_ASCII; | ||
| 1290 | } | ||
| 1291 | else if (i <= MAX_CHARSET_OFFICIAL_DIMENSION2) | ||
| 1292 | { | ||
| 1293 | int dimension = BYTES_BY_CHAR_HEAD (i) - 1; | ||
| 1294 | |||
| 1295 | if (dimension == 0) | ||
| 1296 | { | ||
| 1297 | /* `i' is a leading code for an undefined charset. */ | ||
| 1298 | reg[RRR] = CHARSET_8_BIT_GRAPHIC; | ||
| 1299 | reg[rrr] = i; | ||
| 1300 | } | ||
| 1301 | else if (src + dimension > src_end) | ||
| 1302 | goto ccl_read_multibyte_character_suspend; | ||
| 1303 | else | ||
| 1304 | { | ||
| 1305 | reg[RRR] = i; | ||
| 1306 | i = (*src++ & 0x7F); | ||
| 1307 | if (dimension == 1) | ||
| 1308 | reg[rrr] = i; | ||
| 1309 | else | ||
| 1310 | reg[rrr] = ((i << 7) | (*src++ & 0x7F)); | ||
| 1311 | } | ||
| 1312 | } | ||
| 1313 | else if ((i == LEADING_CODE_PRIVATE_11) | ||
| 1314 | || (i == LEADING_CODE_PRIVATE_12)) | ||
| 1315 | { | ||
| 1316 | if ((src + 1) >= src_end) | ||
| 1317 | goto ccl_read_multibyte_character_suspend; | ||
| 1318 | reg[RRR] = *src++; | ||
| 1319 | reg[rrr] = (*src++ & 0x7F); | ||
| 1320 | } | ||
| 1321 | else if ((i == LEADING_CODE_PRIVATE_21) | ||
| 1322 | || (i == LEADING_CODE_PRIVATE_22)) | ||
| 1323 | { | ||
| 1324 | if ((src + 2) >= src_end) | ||
| 1325 | goto ccl_read_multibyte_character_suspend; | ||
| 1326 | reg[RRR] = *src++; | ||
| 1327 | i = (*src++ & 0x7F); | ||
| 1328 | reg[rrr] = ((i << 7) | (*src & 0x7F)); | ||
| 1329 | src++; | ||
| 1330 | } | ||
| 1331 | else if (i == LEADING_CODE_8_BIT_CONTROL) | ||
| 1332 | { | ||
| 1333 | if (src >= src_end) | ||
| 1334 | goto ccl_read_multibyte_character_suspend; | ||
| 1335 | reg[RRR] = CHARSET_8_BIT_CONTROL; | ||
| 1336 | reg[rrr] = (*src++ - 0x20); | ||
| 1337 | } | ||
| 1338 | else if (i >= 0xA0) | ||
| 1339 | { | ||
| 1340 | reg[RRR] = CHARSET_8_BIT_GRAPHIC; | ||
| 1341 | reg[rrr] = i; | ||
| 1342 | } | ||
| 1343 | else | ||
| 1344 | { | ||
| 1345 | /* INVALID CODE. Return a single byte character. */ | ||
| 1346 | reg[RRR] = CHARSET_ASCII; | ||
| 1347 | reg[rrr] = i; | ||
| 1348 | } | ||
| 1349 | break; | ||
| 1350 | |||
| 1351 | ccl_read_multibyte_character_suspend: | ||
| 1352 | if (src <= src_end && !ccl->multibyte && ccl->last_block) | ||
| 1353 | { | ||
| 1354 | reg[RRR] = CHARSET_8_BIT_CONTROL; | ||
| 1355 | reg[rrr] = i; | ||
| 1356 | break; | ||
| 1357 | } | ||
| 1358 | src--; | ||
| 1359 | if (ccl->last_block) | ||
| 1360 | { | ||
| 1361 | ic = ccl->eof_ic; | ||
| 1362 | goto ccl_repeat; | ||
| 1363 | } | ||
| 1364 | else | ||
| 1365 | CCL_SUSPEND (CCL_STAT_SUSPEND_BY_SRC); | ||
| 1366 | |||
| 1367 | break; | 1187 | break; |
| 1368 | 1188 | ||
| 1369 | case CCL_WriteMultibyteChar2: | 1189 | case CCL_WriteMultibyteChar2: |
| 1370 | i = reg[RRR]; /* charset */ | 1190 | if (! dst) |
| 1371 | if (i == CHARSET_ASCII | 1191 | CCL_INVALID_CMD; |
| 1372 | || i == CHARSET_8_BIT_CONTROL | 1192 | charset = CHARSET_FROM_ID (reg[RRR]); |
| 1373 | || i == CHARSET_8_BIT_GRAPHIC) | 1193 | i = DECODE_CHAR (charset, reg[rrr]); |
| 1374 | i = reg[rrr] & 0xFF; | 1194 | CCL_WRITE_CHAR (i); |
| 1375 | else if (CHARSET_DIMENSION (i) == 1) | ||
| 1376 | i = ((i - 0x70) << 7) | (reg[rrr] & 0x7F); | ||
| 1377 | else if (i < MIN_CHARSET_PRIVATE_DIMENSION2) | ||
| 1378 | i = ((i - 0x8F) << 14) | reg[rrr]; | ||
| 1379 | else | ||
| 1380 | i = ((i - 0xE0) << 14) | reg[rrr]; | ||
| 1381 | |||
| 1382 | CCL_WRITE_MULTIBYTE_CHAR (i); | ||
| 1383 | |||
| 1384 | break; | 1195 | break; |
| 1385 | 1196 | ||
| 1386 | case CCL_TranslateCharacter: | 1197 | case CCL_TranslateCharacter: |
| 1387 | CCL_MAKE_CHAR (reg[RRR], reg[rrr], i); | 1198 | charset = CHARSET_FROM_ID (reg[RRR]); |
| 1388 | op = translate_char (GET_TRANSLATION_TABLE (reg[Rrr]), | 1199 | i = DECODE_CHAR (charset, reg[rrr]); |
| 1389 | i, -1, 0, 0); | 1200 | op = translate_char (GET_TRANSLATION_TABLE (reg[Rrr]), i); |
| 1390 | SPLIT_CHAR (op, reg[RRR], i, j); | 1201 | charset = CHAR_CHARSET (op); |
| 1391 | if (j != -1) | 1202 | reg[RRR] = CHARSET_ID (charset); |
| 1392 | i = (i << 7) | j; | 1203 | reg[rrr] = ENCODE_CHAR (charset, op); |
| 1393 | |||
| 1394 | reg[rrr] = i; | ||
| 1395 | break; | 1204 | break; |
| 1396 | 1205 | ||
| 1397 | case CCL_TranslateCharacterConstTbl: | 1206 | case CCL_TranslateCharacterConstTbl: |
| 1398 | op = XINT (ccl_prog[ic]); /* table */ | 1207 | op = XINT (ccl_prog[ic]); /* table */ |
| 1399 | ic++; | 1208 | ic++; |
| 1400 | CCL_MAKE_CHAR (reg[RRR], reg[rrr], i); | 1209 | charset = CHARSET_FROM_ID (reg[RRR]); |
| 1401 | op = translate_char (GET_TRANSLATION_TABLE (op), i, -1, 0, 0); | 1210 | i = DECODE_CHAR (charset, reg[rrr]); |
| 1402 | SPLIT_CHAR (op, reg[RRR], i, j); | 1211 | op = translate_char (GET_TRANSLATION_TABLE (op), i); |
| 1403 | if (j != -1) | 1212 | charset = CHAR_CHARSET (op); |
| 1404 | i = (i << 7) | j; | 1213 | reg[RRR] = CHARSET_ID (charset); |
| 1405 | 1214 | reg[rrr] = ENCODE_CHAR (charset, op); | |
| 1406 | reg[rrr] = i; | ||
| 1407 | break; | 1215 | break; |
| 1408 | 1216 | ||
| 1409 | case CCL_IterateMultipleMap: | 1217 | case CCL_IterateMultipleMap: |
| @@ -1821,28 +1629,10 @@ ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed) | |||
| 1821 | } | 1629 | } |
| 1822 | 1630 | ||
| 1823 | msglen = strlen (msg); | 1631 | msglen = strlen (msg); |
| 1824 | if (dst + msglen <= (dst_bytes ? dst_end : src)) | 1632 | if (dst + msglen <= dst_end) |
| 1825 | { | ||
| 1826 | bcopy (msg, dst, msglen); | ||
| 1827 | dst += msglen; | ||
| 1828 | } | ||
| 1829 | |||
| 1830 | if (ccl->status == CCL_STAT_INVALID_CMD) | ||
| 1831 | { | 1633 | { |
| 1832 | #if 0 /* If the remaining bytes contain 0x80..0x9F, copying them | 1634 | for (i = 0; i < msglen; i++) |
| 1833 | results in an invalid multibyte sequence. */ | 1635 | *dst++ = msg[i]; |
| 1834 | |||
| 1835 | /* Copy the remaining source data. */ | ||
| 1836 | int i = src_end - src; | ||
| 1837 | if (dst_bytes && (dst_end - dst) < i) | ||
| 1838 | i = dst_end - dst; | ||
| 1839 | bcopy (src, dst, i); | ||
| 1840 | src += i; | ||
| 1841 | dst += i; | ||
| 1842 | #else | ||
| 1843 | /* Signal that we've consumed everything. */ | ||
| 1844 | src = src_end; | ||
| 1845 | #endif | ||
| 1846 | } | 1636 | } |
| 1847 | } | 1637 | } |
| 1848 | 1638 | ||
| @@ -1850,10 +1640,8 @@ ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed) | |||
| 1850 | ccl->ic = ic; | 1640 | ccl->ic = ic; |
| 1851 | ccl->stack_idx = stack_idx; | 1641 | ccl->stack_idx = stack_idx; |
| 1852 | ccl->prog = ccl_prog; | 1642 | ccl->prog = ccl_prog; |
| 1853 | ccl->eight_bit_control = (extra_bytes > 0); | 1643 | ccl->consumed = src - source; |
| 1854 | if (consumed) | 1644 | ccl->produced = dst - destination; |
| 1855 | *consumed = src - source; | ||
| 1856 | return (dst ? dst - destination : 0); | ||
| 1857 | } | 1645 | } |
| 1858 | 1646 | ||
| 1859 | /* Resolve symbols in the specified CCL code (Lisp vector). This | 1647 | /* Resolve symbols in the specified CCL code (Lisp vector). This |
| @@ -2003,7 +1791,6 @@ setup_ccl_program (ccl, ccl_prog) | |||
| 2003 | ccl->private_state = 0; | 1791 | ccl->private_state = 0; |
| 2004 | ccl->status = 0; | 1792 | ccl->status = 0; |
| 2005 | ccl->stack_idx = 0; | 1793 | ccl->stack_idx = 0; |
| 2006 | ccl->eol_type = CODING_EOL_LF; | ||
| 2007 | ccl->suppress_error = 0; | 1794 | ccl->suppress_error = 0; |
| 2008 | return 0; | 1795 | return 0; |
| 2009 | } | 1796 | } |
| @@ -2066,7 +1853,7 @@ programs. */) | |||
| 2066 | ? XINT (AREF (reg, i)) | 1853 | ? XINT (AREF (reg, i)) |
| 2067 | : 0); | 1854 | : 0); |
| 2068 | 1855 | ||
| 2069 | ccl_driver (&ccl, (unsigned char *)0, (unsigned char *)0, 0, 0, (int *)0); | 1856 | ccl_driver (&ccl, NULL, NULL, 0, 0); |
| 2070 | QUIT; | 1857 | QUIT; |
| 2071 | if (ccl.status != CCL_STAT_SUCCESS) | 1858 | if (ccl.status != CCL_STAT_SUCCESS) |
| 2072 | error ("Error in CCL program at %dth code", ccl.ic); | 1859 | error ("Error in CCL program at %dth code", ccl.ic); |
| @@ -2107,10 +1894,13 @@ See the documentation of `define-ccl-program' for the detail of CCL program. */ | |||
| 2107 | { | 1894 | { |
| 2108 | Lisp_Object val; | 1895 | Lisp_Object val; |
| 2109 | struct ccl_program ccl; | 1896 | struct ccl_program ccl; |
| 2110 | int i, produced; | 1897 | int i; |
| 2111 | int outbufsize; | 1898 | int outbufsize; |
| 2112 | char *outbuf; | 1899 | unsigned char *outbuf, *outp; |
| 2113 | struct gcpro gcpro1, gcpro2; | 1900 | int str_chars, str_bytes; |
| 1901 | #define CCL_EXECUTE_BUF_SIZE 1024 | ||
| 1902 | int source[CCL_EXECUTE_BUF_SIZE], destination[CCL_EXECUTE_BUF_SIZE]; | ||
| 1903 | int consumed_chars, consumed_bytes, produced_chars; | ||
| 2114 | 1904 | ||
| 2115 | if (setup_ccl_program (&ccl, ccl_prog) < 0) | 1905 | if (setup_ccl_program (&ccl, ccl_prog) < 0) |
| 2116 | error ("Invalid CCL program"); | 1906 | error ("Invalid CCL program"); |
| @@ -2119,8 +1909,8 @@ See the documentation of `define-ccl-program' for the detail of CCL program. */ | |||
| 2119 | if (ASIZE (status) != 9) | 1909 | if (ASIZE (status) != 9) |
| 2120 | error ("Length of vector STATUS is not 9"); | 1910 | error ("Length of vector STATUS is not 9"); |
| 2121 | CHECK_STRING (str); | 1911 | CHECK_STRING (str); |
| 2122 | 1912 | str_chars = XSTRING (str)->size; | |
| 2123 | GCPRO2 (status, str); | 1913 | str_bytes = STRING_BYTES (XSTRING (str)); |
| 2124 | 1914 | ||
| 2125 | for (i = 0; i < 8; i++) | 1915 | for (i = 0; i < 8; i++) |
| 2126 | { | 1916 | { |
| @@ -2135,33 +1925,87 @@ See the documentation of `define-ccl-program' for the detail of CCL program. */ | |||
| 2135 | if (ccl.ic < i && i < ccl.size) | 1925 | if (ccl.ic < i && i < ccl.size) |
| 2136 | ccl.ic = i; | 1926 | ccl.ic = i; |
| 2137 | } | 1927 | } |
| 2138 | outbufsize = STRING_BYTES (XSTRING (str)) * ccl.buf_magnification + 256; | ||
| 2139 | outbuf = (char *) xmalloc (outbufsize); | ||
| 2140 | ccl.last_block = NILP (contin); | ||
| 2141 | ccl.multibyte = STRING_MULTIBYTE (str); | ||
| 2142 | produced = ccl_driver (&ccl, XSTRING (str)->data, outbuf, | ||
| 2143 | STRING_BYTES (XSTRING (str)), outbufsize, (int *) 0); | ||
| 2144 | for (i = 0; i < 8; i++) | ||
| 2145 | XSET (AREF (status, i), Lisp_Int, ccl.reg[i]); | ||
| 2146 | XSETINT (AREF (status, 8), ccl.ic); | ||
| 2147 | UNGCPRO; | ||
| 2148 | 1928 | ||
| 2149 | if (NILP (unibyte_p)) | 1929 | outbufsize = (ccl.buf_magnification |
| 1930 | ? str_bytes * ccl.buf_magnification + 256 | ||
| 1931 | : str_bytes + 256); | ||
| 1932 | outp = outbuf = (unsigned char *) xmalloc (outbufsize); | ||
| 1933 | |||
| 1934 | consumed_chars = consumed_bytes = 0; | ||
| 1935 | produced_chars = 0; | ||
| 1936 | while (consumed_bytes < str_bytes) | ||
| 2150 | { | 1937 | { |
| 2151 | int nchars; | 1938 | unsigned char *p = XSTRING (str)->data + consumed_bytes; |
| 1939 | unsigned char *endp = XSTRING (str)->data + str_bytes; | ||
| 1940 | int i = 0; | ||
| 1941 | int *src, src_size; | ||
| 1942 | |||
| 1943 | if (endp - p == str_chars - consumed_chars) | ||
| 1944 | while (i < CCL_EXECUTE_BUF_SIZE && p < endp) | ||
| 1945 | source[i++] = *p++; | ||
| 1946 | else | ||
| 1947 | while (i < CCL_EXECUTE_BUF_SIZE && p < endp) | ||
| 1948 | source[i++] = STRING_CHAR_ADVANCE (p); | ||
| 1949 | consumed_chars += i; | ||
| 1950 | consumed_bytes = p - XSTRING (str)->data; | ||
| 1951 | |||
| 1952 | if (consumed_bytes == str_bytes) | ||
| 1953 | ccl.last_block = NILP (contin); | ||
| 1954 | src = source; | ||
| 1955 | src_size = i; | ||
| 1956 | while (1) | ||
| 1957 | { | ||
| 1958 | ccl_driver (&ccl, src, destination, src_size, CCL_EXECUTE_BUF_SIZE); | ||
| 1959 | if (ccl.status != CCL_STAT_SUSPEND_BY_DST) | ||
| 1960 | break; | ||
| 1961 | produced_chars += ccl.produced; | ||
| 1962 | if (NILP (unibyte_p)) | ||
| 1963 | { | ||
| 1964 | if (outp - outbuf + MAX_MULTIBYTE_LENGTH * ccl.produced | ||
| 1965 | > outbufsize) | ||
| 1966 | { | ||
| 1967 | int offset = outp - outbuf; | ||
| 1968 | outbufsize += MAX_MULTIBYTE_LENGTH * ccl.produced; | ||
| 1969 | outbuf = (unsigned char *) xrealloc (outbuf, outbufsize); | ||
| 1970 | outp = outbuf + offset; | ||
| 1971 | } | ||
| 1972 | for (i = 0; i < ccl.produced; i++) | ||
| 1973 | CHAR_STRING_ADVANCE (destination[i], outp); | ||
| 1974 | } | ||
| 1975 | else | ||
| 1976 | { | ||
| 1977 | if (outp - outbuf + ccl.produced > outbufsize) | ||
| 1978 | { | ||
| 1979 | int offset = outp - outbuf; | ||
| 1980 | outbufsize += ccl.produced; | ||
| 1981 | outbuf = (unsigned char *) xrealloc (outbuf, outbufsize); | ||
| 1982 | outp = outbuf + offset; | ||
| 1983 | } | ||
| 1984 | for (i = 0; i < ccl.produced; i++) | ||
| 1985 | *outp++ = destination[i]; | ||
| 1986 | } | ||
| 1987 | src += ccl.consumed; | ||
| 1988 | src_size -= ccl.consumed; | ||
| 1989 | } | ||
| 1990 | |||
| 1991 | if (ccl.status != CCL_STAT_SUSPEND_BY_SRC) | ||
| 1992 | break; | ||
| 1993 | } | ||
| 2152 | 1994 | ||
| 2153 | produced = str_as_multibyte (outbuf, outbufsize, produced, &nchars); | ||
| 2154 | val = make_multibyte_string (outbuf, nchars, produced); | ||
| 2155 | } | ||
| 2156 | else | ||
| 2157 | val = make_unibyte_string (outbuf, produced); | ||
| 2158 | xfree (outbuf); | ||
| 2159 | QUIT; | ||
| 2160 | if (ccl.status == CCL_STAT_SUSPEND_BY_DST) | ||
| 2161 | error ("Output buffer for the CCL programs overflow"); | ||
| 2162 | if (ccl.status != CCL_STAT_SUCCESS | 1995 | if (ccl.status != CCL_STAT_SUCCESS |
| 2163 | && ccl.status != CCL_STAT_SUSPEND_BY_SRC) | 1996 | && ccl.status != CCL_STAT_SUSPEND_BY_SRC) |
| 2164 | error ("Error in CCL program at %dth code", ccl.ic); | 1997 | error ("Error in CCL program at %dth code", ccl.ic); |
| 1998 | |||
| 1999 | for (i = 0; i < 8; i++) | ||
| 2000 | XSET (XVECTOR (status)->contents[i], Lisp_Int, ccl.reg[i]); | ||
| 2001 | XSETINT (XVECTOR (status)->contents[8], ccl.ic); | ||
| 2002 | |||
| 2003 | if (NILP (unibyte_p)) | ||
| 2004 | val = make_multibyte_string ((char *) outbuf, produced_chars, | ||
| 2005 | outp - outbuf); | ||
| 2006 | else | ||
| 2007 | val = make_unibyte_string ((char *) outbuf, produced_chars); | ||
| 2008 | xfree (outbuf); | ||
| 2165 | 2009 | ||
| 2166 | return val; | 2010 | return val; |
| 2167 | } | 2011 | } |
| @@ -2307,6 +2151,12 @@ syms_of_ccl () | |||
| 2307 | staticpro (&Vccl_program_table); | 2151 | staticpro (&Vccl_program_table); |
| 2308 | Vccl_program_table = Fmake_vector (make_number (32), Qnil); | 2152 | Vccl_program_table = Fmake_vector (make_number (32), Qnil); |
| 2309 | 2153 | ||
| 2154 | Qccl = intern ("ccl"); | ||
| 2155 | staticpro (&Qccl); | ||
| 2156 | |||
| 2157 | Qcclp = intern ("cclp"); | ||
| 2158 | staticpro (&Qcclp); | ||
| 2159 | |||
| 2310 | Qccl_program = intern ("ccl-program"); | 2160 | Qccl_program = intern ("ccl-program"); |
| 2311 | staticpro (&Qccl_program); | 2161 | staticpro (&Qccl_program); |
| 2312 | 2162 | ||