diff options
| author | Kenichi Handa | 2004-06-13 00:21:39 +0000 |
|---|---|---|
| committer | Kenichi Handa | 2004-06-13 00:21:39 +0000 |
| commit | 064cff0b3d73506b9134d3eb4b64281b4a725860 (patch) | |
| tree | d7d11f51efbc6dbf129ffba7f53482d02d68f1b1 | |
| parent | e9b95bbf8a5106747ca93fcb901e46e9c4aac614 (diff) | |
| download | emacs-064cff0b3d73506b9134d3eb4b64281b4a725860.tar.gz emacs-064cff0b3d73506b9134d3eb4b64281b4a725860.zip | |
(ccl-decode-mule-utf-8): Fix previous change.
(ccl-untranslated-to-ucs): Fix typo.
| -rw-r--r-- | lisp/international/utf-8.el | 114 |
1 files changed, 59 insertions, 55 deletions
diff --git a/lisp/international/utf-8.el b/lisp/international/utf-8.el index e324d0c0270..25ec185f545 100644 --- a/lisp/international/utf-8.el +++ b/lisp/international/utf-8.el | |||
| @@ -395,50 +395,37 @@ by the above coding systems, you can customize this option to nil." | |||
| 395 | ;; Thus magnification factor is two. | 395 | ;; Thus magnification factor is two. |
| 396 | ;; | 396 | ;; |
| 397 | `(2 | 397 | `(2 |
| 398 | ((r0 = -1) | 398 | ((r6 = ,(charset-id 'latin-iso8859-1)) |
| 399 | (read r0) | ||
| 399 | (loop | 400 | (loop |
| 400 | (if (r0 < 0) | ||
| 401 | (read r0)) | ||
| 402 | (if (r0 < #x80) | 401 | (if (r0 < #x80) |
| 403 | ;; 1-byte encoding, i.e., ascii | 402 | ;; 1-byte encoding, i.e., ascii |
| 404 | ((write r0) | 403 | (write-read-repeat r0)) |
| 405 | (r0 = -1) | 404 | (if (r0 < #xc2) |
| 406 | (repeat))) | 405 | ;; continuation byte (invalid here) or 1st byte of overlong |
| 407 | (if (r0 < #xc0) ; continuation byte (invalid here) | 406 | ;; 2-byte sequence. |
| 408 | ((call ccl-mule-utf-untrans) | 407 | ((call ccl-mule-utf-untrans) |
| 409 | (r0 = -1) | 408 | (r6 = ,(charset-id 'latin-iso8859-1)) |
| 409 | (read r0) | ||
| 410 | (repeat))) | 410 | (repeat))) |
| 411 | 411 | ||
| 412 | ;; Read the 2nd byte. | 412 | ;; Read the 2nd byte. |
| 413 | (r1 = -1) | ||
| 414 | (read r1) | 413 | (read r1) |
| 415 | (if ((r1 & #b11000000) != #b10000000) ; Invalid 2nd byte | 414 | (if ((r1 & #b11000000) != #b10000000) ; Invalid 2nd byte |
| 416 | ((call ccl-mule-utf-untrans) | 415 | ((call ccl-mule-utf-untrans) |
| 416 | (r6 = ,(charset-id 'latin-iso8859-1)) | ||
| 417 | ;; Handle it in the next loop. | 417 | ;; Handle it in the next loop. |
| 418 | (r0 = r1) | 418 | (r0 = r1) |
| 419 | (repeat))) | 419 | (repeat))) |
| 420 | 420 | ||
| 421 | (if (r0 < #xe0) | 421 | (if (r0 < #xe0) |
| 422 | ;; 2-byte encoding 00000yyyyyxxxxxx = 110yyyyy 10xxxxxx | 422 | ;; 2-byte encoding 00000yyyyyxxxxxx = 110yyyyy 10xxxxxx |
| 423 | ((r2 = ((r0 & #x1F) << 6)) | 423 | ((r1 &= #x3F) |
| 424 | (r2 |= (r1 & #x3F)) | 424 | (r1 |= ((r0 & #x1F) << 6)) |
| 425 | ;; Now r2 holds scalar value | 425 | ;; Now r2 holds scalar value. We don't have to check |
| 426 | 426 | ;; `overlong sequence' because r0 >= 0xC2. | |
| 427 | (if (r2 < 128) ; `overlong sequence' | ||
| 428 | ((call ccl-mule-utf-untrans) | ||
| 429 | (r0 = r1) | ||
| 430 | (call ccl-mule-utf-untrans) | ||
| 431 | (r0 = -1) | ||
| 432 | (repeat))) | ||
| 433 | 427 | ||
| 434 | (r1 = r2) | 428 | (if (r1 >= 256) |
| 435 | (if (r1 < 160) | ||
| 436 | ;; eight-bit-control | ||
| 437 | (r0 = ,(charset-id 'eight-bit-control)) | ||
| 438 | (if (r1 < 256) | ||
| 439 | ;; latin-iso8859-1 | ||
| 440 | ((r0 = ,(charset-id 'latin-iso8859-1)) | ||
| 441 | (r1 -= 128)) | ||
| 442 | ;; mule-unicode-0100-24ff (< 0800) | 429 | ;; mule-unicode-0100-24ff (< 0800) |
| 443 | ((r0 = ,(charset-id 'mule-unicode-0100-24ff)) | 430 | ((r0 = ,(charset-id 'mule-unicode-0100-24ff)) |
| 444 | (r1 -= #x0100) | 431 | (r1 -= #x0100) |
| @@ -446,18 +433,29 @@ by the above coding systems, you can customize this option to nil." | |||
| 446 | (r1 %= 96) | 433 | (r1 %= 96) |
| 447 | (r1 += (r2 + 32)) | 434 | (r1 += (r2 + 32)) |
| 448 | (translate-character | 435 | (translate-character |
| 449 | utf-translation-table-for-decode r0 r1)))) | 436 | utf-translation-table-for-decode r0 r1) |
| 450 | (write-multibyte-character r0 r1) | 437 | (write-multibyte-character r0 r1) |
| 451 | (r0 = -1) | 438 | (read r0) |
| 452 | (repeat))) | 439 | (repeat)) |
| 440 | (if (r1 >= 160) | ||
| 441 | ;; latin-iso8859-1 | ||
| 442 | ((r1 -= 128) | ||
| 443 | (write-multibyte-character r6 r1) | ||
| 444 | (read r0) | ||
| 445 | (repeat)) | ||
| 446 | ;; eight-bit-control | ||
| 447 | ((r0 = ,(charset-id 'eight-bit-control)) | ||
| 448 | (write-multibyte-character r0 r1) | ||
| 449 | (read r0) | ||
| 450 | (repeat)))))) | ||
| 453 | 451 | ||
| 454 | ;; Read the 3rd bytes. | 452 | ;; Read the 3rd bytes. |
| 455 | (r2 = -1) | ||
| 456 | (read r2) | 453 | (read r2) |
| 457 | (if ((r2 & #b11000000) != #b10000000) ; Invalid 3rd byte | 454 | (if ((r2 & #b11000000) != #b10000000) ; Invalid 3rd byte |
| 458 | ((call ccl-mule-utf-untrans) | 455 | ((call ccl-mule-utf-untrans) |
| 459 | (r0 = r1) | 456 | (r0 = r1) |
| 460 | (call ccl-mule-utf-untrans) | 457 | (call ccl-mule-utf-untrans) |
| 458 | (r6 = ,(charset-id 'latin-iso8859-1)) | ||
| 461 | ;; Handle it in the next loop. | 459 | ;; Handle it in the next loop. |
| 462 | (r0 = r2) | 460 | (r0 = r2) |
| 463 | (repeat))) | 461 | (repeat))) |
| @@ -475,7 +473,8 @@ by the above coding systems, you can customize this option to nil." | |||
| 475 | (call ccl-mule-utf-untrans) | 473 | (call ccl-mule-utf-untrans) |
| 476 | (r0 = r2) | 474 | (r0 = r2) |
| 477 | (call ccl-mule-utf-untrans) | 475 | (call ccl-mule-utf-untrans) |
| 478 | (r0 = -1) | 476 | (r6 = ,(charset-id 'latin-iso8859-1)) |
| 477 | (read r0) | ||
| 479 | (repeat))) | 478 | (repeat))) |
| 480 | 479 | ||
| 481 | (if (r3 < #x2500) | 480 | (if (r3 < #x2500) |
| @@ -488,7 +487,7 @@ by the above coding systems, you can customize this option to nil." | |||
| 488 | (translate-character | 487 | (translate-character |
| 489 | utf-translation-table-for-decode r0 r1) | 488 | utf-translation-table-for-decode r0 r1) |
| 490 | (write-multibyte-character r0 r1) | 489 | (write-multibyte-character r0 r1) |
| 491 | (r0 = -1) | 490 | (read r0) |
| 492 | (repeat))) | 491 | (repeat))) |
| 493 | 492 | ||
| 494 | (if (r3 < #x3400) | 493 | (if (r3 < #x3400) |
| @@ -502,7 +501,7 @@ by the above coding systems, you can customize this option to nil." | |||
| 502 | (r1 = (r7 + 32)) | 501 | (r1 = (r7 + 32)) |
| 503 | (r1 += ((r3 + 32) << 7)))) | 502 | (r1 += ((r3 + 32) << 7)))) |
| 504 | (write-multibyte-character r0 r1) | 503 | (write-multibyte-character r0 r1) |
| 505 | (r0 = -1) | 504 | (read r0) |
| 506 | (repeat))) | 505 | (repeat))) |
| 507 | 506 | ||
| 508 | (if (r3 < #xE000) | 507 | (if (r3 < #xE000) |
| @@ -512,10 +511,13 @@ by the above coding systems, you can customize this option to nil." | |||
| 512 | (lookup-integer utf-subst-table-for-decode r3 r1) | 511 | (lookup-integer utf-subst-table-for-decode r3 r1) |
| 513 | (if r7 | 512 | (if r7 |
| 514 | ;; got a translation | 513 | ;; got a translation |
| 515 | (write-multibyte-character r3 r1) | 514 | ((write-multibyte-character r3 r1) |
| 516 | (call ccl-mule-utf-untrans)) | 515 | (read r0) |
| 517 | (r0 = -1) | 516 | (repeat)) |
| 518 | (repeat))) | 517 | ((call ccl-mule-utf-untrans) |
| 518 | (r6 = ,(charset-id 'latin-iso8859-1)) | ||
| 519 | (read r0) | ||
| 520 | (repeat))))) | ||
| 519 | 521 | ||
| 520 | ;; mule-unicode-e000-ffff | 522 | ;; mule-unicode-e000-ffff |
| 521 | ;; Fixme: fffe and ffff are invalid. | 523 | ;; Fixme: fffe and ffff are invalid. |
| @@ -528,21 +530,23 @@ by the above coding systems, you can customize this option to nil." | |||
| 528 | (r1 = (r7 + 32)) | 530 | (r1 = (r7 + 32)) |
| 529 | (r1 += ((r3 + 32) << 7)))) | 531 | (r1 += ((r3 + 32) << 7)))) |
| 530 | (write-multibyte-character r0 r1) | 532 | (write-multibyte-character r0 r1) |
| 531 | (r0 = -1) | 533 | (read r0) |
| 532 | (repeat))) | 534 | (repeat))) |
| 533 | 535 | ||
| 534 | ;; Read the 4th bytes. | 536 | ;; Read the 4th bytes. |
| 535 | (r3 = -1) | ||
| 536 | (read r3) | 537 | (read r3) |
| 537 | (if ((r3 & #b11000000) != #b10000000) ; Invalid 4th byte | 538 | (if ((r3 & #b11000000) != #b10000000) ; Invalid 4th byte |
| 538 | ((call ccl-mule-utf-untrans) | 539 | ((call ccl-mule-utf-untrans) |
| 539 | (r0 = r1) | 540 | (r0 = r1) |
| 540 | (call ccl-mule-utf-untrans) | 541 | (call ccl-mule-utf-untrans) |
| 542 | (r0 = r2) | ||
| 543 | (call ccl-mule-utf-untrans) | ||
| 544 | (r6 = ,(charset-id 'latin-iso8859-1)) | ||
| 541 | ;; Handle it in the next loop. | 545 | ;; Handle it in the next loop. |
| 542 | (r0 = r3) | 546 | (r0 = r3) |
| 543 | (repeat))) | 547 | (repeat))) |
| 544 | 548 | ||
| 545 | (if (r3 < #xF8) | 549 | (if (r0 < #xF8) |
| 546 | ;; 4-byte encoding: | 550 | ;; 4-byte encoding: |
| 547 | ;; wwwzzzzzzyyyyyyxxxxxx = 11110www 10zzzzzz 10yyyyyy 10xxxxxx | 551 | ;; wwwzzzzzzyyyyyyxxxxxx = 11110www 10zzzzzz 10yyyyyy 10xxxxxx |
| 548 | ;; keep those bytes as eight-bit-{control|graphic} | 552 | ;; keep those bytes as eight-bit-{control|graphic} |
| @@ -561,21 +565,21 @@ by the above coding systems, you can customize this option to nil." | |||
| 561 | (r0 = r3) | 565 | (r0 = r3) |
| 562 | (call ccl-mule-utf-untrans)) | 566 | (call ccl-mule-utf-untrans)) |
| 563 | ((r0 = r4) | 567 | ((r0 = r4) |
| 564 | (call ccl-mule-utf-untrans))) | 568 | (call ccl-mule-utf-untrans)))) |
| 565 | (r0 = -1) | ||
| 566 | (repeat))) | ||
| 567 | 569 | ||
| 568 | ;; Unsupported sequence. | 570 | ;; Unsupported sequence. |
| 569 | (call ccl-mule-utf-untrans) | 571 | ((call ccl-mule-utf-untrans) |
| 570 | (r0 = r1) | 572 | (r0 = r1) |
| 571 | (call ccl-mule-utf-untrans) | 573 | (call ccl-mule-utf-untrans) |
| 572 | (r0 = r2) | 574 | (r0 = r2) |
| 573 | (call ccl-mule-utf-untrans) | 575 | (call ccl-mule-utf-untrans) |
| 574 | (r0 = r3) | 576 | (r0 = r3) |
| 575 | (call ccl-mule-utf-untrans) | 577 | (call ccl-mule-utf-untrans))) |
| 576 | (r0 = -1) | 578 | (r6 = ,(charset-id 'latin-iso8859-1)) |
| 579 | (read r0) | ||
| 577 | (repeat))) | 580 | (repeat))) |
| 578 | 581 | ||
| 582 | |||
| 579 | ;; At EOF... | 583 | ;; At EOF... |
| 580 | (if (r0 >= 0) | 584 | (if (r0 >= 0) |
| 581 | ;; r0 >= #x80 | 585 | ;; r0 >= #x80 |
| @@ -786,7 +790,7 @@ eight-bit-control and eight-bit-graphic characters.") | |||
| 786 | (if (r0 < #xF0) ; 3-byte encoding, as above | 790 | (if (r0 < #xF0) ; 3-byte encoding, as above |
| 787 | ((r0 = ((r0 & #xF) << 12)) | 791 | ((r0 = ((r0 & #xF) << 12)) |
| 788 | (r0 |= ((r1 & #x3F) << 6)) | 792 | (r0 |= ((r1 & #x3F) << 6)) |
| 789 | (r0 |= (r1 & #x3F)) | 793 | (r0 |= (r2 & #x3F)) |
| 790 | (r1 = 3)) | 794 | (r1 = 3)) |
| 791 | (if (r3 == 0) | 795 | (if (r3 == 0) |
| 792 | (r1 = 0) | 796 | (r1 = 0) |