aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKenichi Handa2004-06-13 00:21:39 +0000
committerKenichi Handa2004-06-13 00:21:39 +0000
commit064cff0b3d73506b9134d3eb4b64281b4a725860 (patch)
treed7d11f51efbc6dbf129ffba7f53482d02d68f1b1
parente9b95bbf8a5106747ca93fcb901e46e9c4aac614 (diff)
downloademacs-064cff0b3d73506b9134d3eb4b64281b4a725860.tar.gz
emacs-064cff0b3d73506b9134d3eb4b64281b4a725860.zip
(ccl-decode-mule-utf-8): Fix previous change.
(ccl-untranslated-to-ucs): Fix typo.
-rw-r--r--lisp/international/utf-8.el114
1 files changed, 59 insertions, 55 deletions
diff --git a/lisp/international/utf-8.el b/lisp/international/utf-8.el
index e324d0c0270..25ec185f545 100644
--- a/lisp/international/utf-8.el
+++ b/lisp/international/utf-8.el
@@ -395,50 +395,37 @@ by the above coding systems, you can customize this option to nil."
395 ;; Thus magnification factor is two. 395 ;; Thus magnification factor is two.
396 ;; 396 ;;
397 `(2 397 `(2
398 ((r0 = -1) 398 ((r6 = ,(charset-id 'latin-iso8859-1))
399 (read r0)
399 (loop 400 (loop
400 (if (r0 < 0)
401 (read r0))
402 (if (r0 < #x80) 401 (if (r0 < #x80)
403 ;; 1-byte encoding, i.e., ascii 402 ;; 1-byte encoding, i.e., ascii
404 ((write r0) 403 (write-read-repeat r0))
405 (r0 = -1) 404 (if (r0 < #xc2)
406 (repeat))) 405 ;; continuation byte (invalid here) or 1st byte of overlong
407 (if (r0 < #xc0) ; continuation byte (invalid here) 406 ;; 2-byte sequence.
408 ((call ccl-mule-utf-untrans) 407 ((call ccl-mule-utf-untrans)
409 (r0 = -1) 408 (r6 = ,(charset-id 'latin-iso8859-1))
409 (read r0)
410 (repeat))) 410 (repeat)))
411 411
412 ;; Read the 2nd byte. 412 ;; Read the 2nd byte.
413 (r1 = -1)
414 (read r1) 413 (read r1)
415 (if ((r1 & #b11000000) != #b10000000) ; Invalid 2nd byte 414 (if ((r1 & #b11000000) != #b10000000) ; Invalid 2nd byte
416 ((call ccl-mule-utf-untrans) 415 ((call ccl-mule-utf-untrans)
416 (r6 = ,(charset-id 'latin-iso8859-1))
417 ;; Handle it in the next loop. 417 ;; Handle it in the next loop.
418 (r0 = r1) 418 (r0 = r1)
419 (repeat))) 419 (repeat)))
420 420
421 (if (r0 < #xe0) 421 (if (r0 < #xe0)
422 ;; 2-byte encoding 00000yyyyyxxxxxx = 110yyyyy 10xxxxxx 422 ;; 2-byte encoding 00000yyyyyxxxxxx = 110yyyyy 10xxxxxx
423 ((r2 = ((r0 & #x1F) << 6)) 423 ((r1 &= #x3F)
424 (r2 |= (r1 & #x3F)) 424 (r1 |= ((r0 & #x1F) << 6))
425 ;; Now r2 holds scalar value 425 ;; Now r2 holds scalar value. We don't have to check
426 426 ;; `overlong sequence' because r0 >= 0xC2.
427 (if (r2 < 128) ; `overlong sequence'
428 ((call ccl-mule-utf-untrans)
429 (r0 = r1)
430 (call ccl-mule-utf-untrans)
431 (r0 = -1)
432 (repeat)))
433 427
434 (r1 = r2) 428 (if (r1 >= 256)
435 (if (r1 < 160)
436 ;; eight-bit-control
437 (r0 = ,(charset-id 'eight-bit-control))
438 (if (r1 < 256)
439 ;; latin-iso8859-1
440 ((r0 = ,(charset-id 'latin-iso8859-1))
441 (r1 -= 128))
442 ;; mule-unicode-0100-24ff (< 0800) 429 ;; mule-unicode-0100-24ff (< 0800)
443 ((r0 = ,(charset-id 'mule-unicode-0100-24ff)) 430 ((r0 = ,(charset-id 'mule-unicode-0100-24ff))
444 (r1 -= #x0100) 431 (r1 -= #x0100)
@@ -446,18 +433,29 @@ by the above coding systems, you can customize this option to nil."
446 (r1 %= 96) 433 (r1 %= 96)
447 (r1 += (r2 + 32)) 434 (r1 += (r2 + 32))
448 (translate-character 435 (translate-character
449 utf-translation-table-for-decode r0 r1)))) 436 utf-translation-table-for-decode r0 r1)
450 (write-multibyte-character r0 r1) 437 (write-multibyte-character r0 r1)
451 (r0 = -1) 438 (read r0)
452 (repeat))) 439 (repeat))
440 (if (r1 >= 160)
441 ;; latin-iso8859-1
442 ((r1 -= 128)
443 (write-multibyte-character r6 r1)
444 (read r0)
445 (repeat))
446 ;; eight-bit-control
447 ((r0 = ,(charset-id 'eight-bit-control))
448 (write-multibyte-character r0 r1)
449 (read r0)
450 (repeat))))))
453 451
454 ;; Read the 3rd bytes. 452 ;; Read the 3rd bytes.
455 (r2 = -1)
456 (read r2) 453 (read r2)
457 (if ((r2 & #b11000000) != #b10000000) ; Invalid 3rd byte 454 (if ((r2 & #b11000000) != #b10000000) ; Invalid 3rd byte
458 ((call ccl-mule-utf-untrans) 455 ((call ccl-mule-utf-untrans)
459 (r0 = r1) 456 (r0 = r1)
460 (call ccl-mule-utf-untrans) 457 (call ccl-mule-utf-untrans)
458 (r6 = ,(charset-id 'latin-iso8859-1))
461 ;; Handle it in the next loop. 459 ;; Handle it in the next loop.
462 (r0 = r2) 460 (r0 = r2)
463 (repeat))) 461 (repeat)))
@@ -475,7 +473,8 @@ by the above coding systems, you can customize this option to nil."
475 (call ccl-mule-utf-untrans) 473 (call ccl-mule-utf-untrans)
476 (r0 = r2) 474 (r0 = r2)
477 (call ccl-mule-utf-untrans) 475 (call ccl-mule-utf-untrans)
478 (r0 = -1) 476 (r6 = ,(charset-id 'latin-iso8859-1))
477 (read r0)
479 (repeat))) 478 (repeat)))
480 479
481 (if (r3 < #x2500) 480 (if (r3 < #x2500)
@@ -488,7 +487,7 @@ by the above coding systems, you can customize this option to nil."
488 (translate-character 487 (translate-character
489 utf-translation-table-for-decode r0 r1) 488 utf-translation-table-for-decode r0 r1)
490 (write-multibyte-character r0 r1) 489 (write-multibyte-character r0 r1)
491 (r0 = -1) 490 (read r0)
492 (repeat))) 491 (repeat)))
493 492
494 (if (r3 < #x3400) 493 (if (r3 < #x3400)
@@ -502,7 +501,7 @@ by the above coding systems, you can customize this option to nil."
502 (r1 = (r7 + 32)) 501 (r1 = (r7 + 32))
503 (r1 += ((r3 + 32) << 7)))) 502 (r1 += ((r3 + 32) << 7))))
504 (write-multibyte-character r0 r1) 503 (write-multibyte-character r0 r1)
505 (r0 = -1) 504 (read r0)
506 (repeat))) 505 (repeat)))
507 506
508 (if (r3 < #xE000) 507 (if (r3 < #xE000)
@@ -512,10 +511,13 @@ by the above coding systems, you can customize this option to nil."
512 (lookup-integer utf-subst-table-for-decode r3 r1) 511 (lookup-integer utf-subst-table-for-decode r3 r1)
513 (if r7 512 (if r7
514 ;; got a translation 513 ;; got a translation
515 (write-multibyte-character r3 r1) 514 ((write-multibyte-character r3 r1)
516 (call ccl-mule-utf-untrans)) 515 (read r0)
517 (r0 = -1) 516 (repeat))
518 (repeat))) 517 ((call ccl-mule-utf-untrans)
518 (r6 = ,(charset-id 'latin-iso8859-1))
519 (read r0)
520 (repeat)))))
519 521
520 ;; mule-unicode-e000-ffff 522 ;; mule-unicode-e000-ffff
521 ;; Fixme: fffe and ffff are invalid. 523 ;; Fixme: fffe and ffff are invalid.
@@ -528,21 +530,23 @@ by the above coding systems, you can customize this option to nil."
528 (r1 = (r7 + 32)) 530 (r1 = (r7 + 32))
529 (r1 += ((r3 + 32) << 7)))) 531 (r1 += ((r3 + 32) << 7))))
530 (write-multibyte-character r0 r1) 532 (write-multibyte-character r0 r1)
531 (r0 = -1) 533 (read r0)
532 (repeat))) 534 (repeat)))
533 535
534 ;; Read the 4th bytes. 536 ;; Read the 4th bytes.
535 (r3 = -1)
536 (read r3) 537 (read r3)
537 (if ((r3 & #b11000000) != #b10000000) ; Invalid 4th byte 538 (if ((r3 & #b11000000) != #b10000000) ; Invalid 4th byte
538 ((call ccl-mule-utf-untrans) 539 ((call ccl-mule-utf-untrans)
539 (r0 = r1) 540 (r0 = r1)
540 (call ccl-mule-utf-untrans) 541 (call ccl-mule-utf-untrans)
542 (r0 = r2)
543 (call ccl-mule-utf-untrans)
544 (r6 = ,(charset-id 'latin-iso8859-1))
541 ;; Handle it in the next loop. 545 ;; Handle it in the next loop.
542 (r0 = r3) 546 (r0 = r3)
543 (repeat))) 547 (repeat)))
544 548
545 (if (r3 < #xF8) 549 (if (r0 < #xF8)
546 ;; 4-byte encoding: 550 ;; 4-byte encoding:
547 ;; wwwzzzzzzyyyyyyxxxxxx = 11110www 10zzzzzz 10yyyyyy 10xxxxxx 551 ;; wwwzzzzzzyyyyyyxxxxxx = 11110www 10zzzzzz 10yyyyyy 10xxxxxx
548 ;; keep those bytes as eight-bit-{control|graphic} 552 ;; keep those bytes as eight-bit-{control|graphic}
@@ -561,21 +565,21 @@ by the above coding systems, you can customize this option to nil."
561 (r0 = r3) 565 (r0 = r3)
562 (call ccl-mule-utf-untrans)) 566 (call ccl-mule-utf-untrans))
563 ((r0 = r4) 567 ((r0 = r4)
564 (call ccl-mule-utf-untrans))) 568 (call ccl-mule-utf-untrans))))
565 (r0 = -1)
566 (repeat)))
567 569
568 ;; Unsupported sequence. 570 ;; Unsupported sequence.
569 (call ccl-mule-utf-untrans) 571 ((call ccl-mule-utf-untrans)
570 (r0 = r1) 572 (r0 = r1)
571 (call ccl-mule-utf-untrans) 573 (call ccl-mule-utf-untrans)
572 (r0 = r2) 574 (r0 = r2)
573 (call ccl-mule-utf-untrans) 575 (call ccl-mule-utf-untrans)
574 (r0 = r3) 576 (r0 = r3)
575 (call ccl-mule-utf-untrans) 577 (call ccl-mule-utf-untrans)))
576 (r0 = -1) 578 (r6 = ,(charset-id 'latin-iso8859-1))
579 (read r0)
577 (repeat))) 580 (repeat)))
578 581
582
579 ;; At EOF... 583 ;; At EOF...
580 (if (r0 >= 0) 584 (if (r0 >= 0)
581 ;; r0 >= #x80 585 ;; r0 >= #x80
@@ -786,7 +790,7 @@ eight-bit-control and eight-bit-graphic characters.")
786 (if (r0 < #xF0) ; 3-byte encoding, as above 790 (if (r0 < #xF0) ; 3-byte encoding, as above
787 ((r0 = ((r0 & #xF) << 12)) 791 ((r0 = ((r0 & #xF) << 12))
788 (r0 |= ((r1 & #x3F) << 6)) 792 (r0 |= ((r1 & #x3F) << 6))
789 (r0 |= (r1 & #x3F)) 793 (r0 |= (r2 & #x3F))
790 (r1 = 3)) 794 (r1 = 3))
791 (if (r3 == 0) 795 (if (r3 == 0)
792 (r1 = 0) 796 (r1 = 0)