diff options
| author | Karl Heuer | 1998-01-18 04:37:08 +0000 |
|---|---|---|
| committer | Karl Heuer | 1998-01-18 04:37:08 +0000 |
| commit | 09ab3c3bdbef1f042638f2a8db4d87b1ba5dea1c (patch) | |
| tree | 13525f624d8b6d451865e517e113ecb0277cc04e /src | |
| parent | f6e7abc110d43407fbd1b61790cefa4c381614bb (diff) | |
| download | emacs-09ab3c3bdbef1f042638f2a8db4d87b1ba5dea1c.tar.gz emacs-09ab3c3bdbef1f042638f2a8db4d87b1ba5dea1c.zip | |
(concat): Move the test for all nil in `append'
back to the right place.
Always compute result_len_byte as for a multibyte string. Then,
if the result will be single-byte, don't use that computation.
When converting single-byte to multibyte, don't use copy_text.
Do use copy_text between single-byte string and multibyte string.
(Fstring_make_multibyte, Fstring_make_unibyte): New functions.
(syms_of_fns): defsubr them.
(string_char_to_byte): Count from the beginning or the end,
whichever is closer. Keep a cache of recent values.
(string_byte_to_char): Likewise.
(Fstring_lessp): Compare the strings char by char.
Diffstat (limited to 'src')
| -rw-r--r-- | src/fns.c | 209 |
1 files changed, 180 insertions, 29 deletions
| @@ -40,6 +40,8 @@ Boston, MA 02111-1307, USA. */ | |||
| 40 | #define NULL (void *)0 | 40 | #define NULL (void *)0 |
| 41 | #endif | 41 | #endif |
| 42 | 42 | ||
| 43 | #define DEFAULT_NONASCII_INSERT_OFFSET 0x800 | ||
| 44 | |||
| 43 | /* Nonzero enables use of dialog boxes for questions | 45 | /* Nonzero enables use of dialog boxes for questions |
| 44 | asked by mouse commands. */ | 46 | asked by mouse commands. */ |
| 45 | int use_dialog_box; | 47 | int use_dialog_box; |
| @@ -205,9 +207,8 @@ Symbols are also allowed; their print names are used instead.") | |||
| 205 | (s1, s2) | 207 | (s1, s2) |
| 206 | register Lisp_Object s1, s2; | 208 | register Lisp_Object s1, s2; |
| 207 | { | 209 | { |
| 208 | register int i; | ||
| 209 | register unsigned char *p1, *p2; | ||
| 210 | register int end; | 210 | register int end; |
| 211 | register int i1, i1_byte, i2, i2_byte; | ||
| 211 | 212 | ||
| 212 | if (SYMBOLP (s1)) | 213 | if (SYMBOLP (s1)) |
| 213 | XSETSTRING (s1, XSYMBOL (s1)->name); | 214 | XSETSTRING (s1, XSYMBOL (s1)->name); |
| @@ -216,18 +217,32 @@ Symbols are also allowed; their print names are used instead.") | |||
| 216 | CHECK_STRING (s1, 0); | 217 | CHECK_STRING (s1, 0); |
| 217 | CHECK_STRING (s2, 1); | 218 | CHECK_STRING (s2, 1); |
| 218 | 219 | ||
| 219 | p1 = XSTRING (s1)->data; | 220 | i1 = i1_byte = i2 = i2_byte = 0; |
| 220 | p2 = XSTRING (s2)->data; | 221 | |
| 221 | end = XSTRING (s1)->size_byte; | 222 | end = XSTRING (s1)->size; |
| 222 | if (end > XSTRING (s2)->size_byte) | 223 | if (end > XSTRING (s2)->size) |
| 223 | end = XSTRING (s2)->size_byte; | 224 | end = XSTRING (s2)->size; |
| 224 | 225 | ||
| 225 | for (i = 0; i < end; i++) | 226 | while (i1 < end) |
| 226 | { | 227 | { |
| 227 | if (p1[i] != p2[i]) | 228 | /* When we find a mismatch, we must compare the |
| 228 | return p1[i] < p2[i] ? Qt : Qnil; | 229 | characters, not just the bytes. */ |
| 230 | int c1, c2; | ||
| 231 | |||
| 232 | if (STRING_MULTIBYTE (s1)) | ||
| 233 | FETCH_STRING_CHAR_ADVANCE (c1, s1, i1, i1_byte); | ||
| 234 | else | ||
| 235 | c1 = XSTRING (s1)->data[i1++]; | ||
| 236 | |||
| 237 | if (STRING_MULTIBYTE (s2)) | ||
| 238 | FETCH_STRING_CHAR_ADVANCE (c2, s2, i2, i2_byte); | ||
| 239 | else | ||
| 240 | c2 = XSTRING (s2)->data[i2++]; | ||
| 241 | |||
| 242 | if (c1 != c2) | ||
| 243 | return c1 < c2 ? Qt : Qnil; | ||
| 229 | } | 244 | } |
| 230 | return i < XSTRING (s2)->size_byte ? Qt : Qnil; | 245 | return i1 < XSTRING (s2)->size ? Qt : Qnil; |
| 231 | } | 246 | } |
| 232 | 247 | ||
| 233 | static Lisp_Object concat (); | 248 | static Lisp_Object concat (); |
| @@ -428,8 +443,8 @@ concat (nargs, args, target_type, last_special) | |||
| 428 | len = XFASTINT (Flength (this)); | 443 | len = XFASTINT (Flength (this)); |
| 429 | if (target_type == Lisp_String) | 444 | if (target_type == Lisp_String) |
| 430 | { | 445 | { |
| 431 | /* We must pay attention to a multibyte character which | 446 | /* We must count the number of bytes needed in the string |
| 432 | takes more than one byte in string. */ | 447 | as well as the number of characters. */ |
| 433 | int i; | 448 | int i; |
| 434 | Lisp_Object ch; | 449 | Lisp_Object ch; |
| 435 | int this_len_byte; | 450 | int this_len_byte; |
| @@ -459,17 +474,21 @@ concat (nargs, args, target_type, last_special) | |||
| 459 | else if (STRINGP (this)) | 474 | else if (STRINGP (this)) |
| 460 | { | 475 | { |
| 461 | result_len_byte += XSTRING (this)->size_byte; | 476 | result_len_byte += XSTRING (this)->size_byte; |
| 462 | if (STRING_MULTIBYTE (this)) | 477 | { |
| 463 | some_multibyte = 1; | 478 | some_multibyte = 1; |
| 479 | result_len_byte += XSTRING (this)->size_byte; | ||
| 480 | } | ||
| 481 | else | ||
| 482 | result_len_byte += count_size_as_multibyte (XSTRING (this)->data, | ||
| 483 | XSTRING (this)->size); | ||
| 464 | } | 484 | } |
| 465 | } | 485 | } |
| 466 | 486 | ||
| 467 | result_len += len; | 487 | result_len += len; |
| 468 | } | 488 | } |
| 469 | 489 | ||
| 470 | /* In `append', if all but last arg are nil, return last arg. */ | 490 | if (! some_multibyte) |
| 471 | if (target_type == Lisp_Cons && EQ (val, Qnil)) | 491 | result_len_byte = result_len; |
| 472 | return last_tail; | ||
| 473 | 492 | ||
| 474 | /* Create the output object. */ | 493 | /* Create the output object. */ |
| 475 | if (target_type == Lisp_Cons) | 494 | if (target_type == Lisp_Cons) |
| @@ -479,6 +498,9 @@ concat (nargs, args, target_type, last_special) | |||
| 479 | else | 498 | else |
| 480 | val = make_uninit_multibyte_string (result_len, result_len_byte); | 499 | val = make_uninit_multibyte_string (result_len, result_len_byte); |
| 481 | 500 | ||
| 501 | /* In `append', if all but last arg are nil, return last arg. */ | ||
| 502 | if (target_type == Lisp_Cons && EQ (val, Qnil)) | ||
| 503 | return last_tail; | ||
| 482 | 504 | ||
| 483 | /* Copy the contents of the args into the result. */ | 505 | /* Copy the contents of the args into the result. */ |
| 484 | if (CONSP (val)) | 506 | if (CONSP (val)) |
| @@ -514,6 +536,14 @@ concat (nargs, args, target_type, last_special) | |||
| 514 | toindex_byte += thislen_byte; | 536 | toindex_byte += thislen_byte; |
| 515 | toindex += thisleni; | 537 | toindex += thisleni; |
| 516 | } | 538 | } |
| 539 | /* Copy a single-byte string to a multibyte string. */ | ||
| 540 | else if (STRINGP (this) && STRINGP (val)) | ||
| 541 | { | ||
| 542 | toindex_byte += copy_text (XSTRING (this)->data, | ||
| 543 | XSTRING (val)->data + toindex_byte, | ||
| 544 | XSTRING (this)->size, 0, 1); | ||
| 545 | toindex += thisleni; | ||
| 546 | } | ||
| 517 | else | 547 | else |
| 518 | /* Copy element by element. */ | 548 | /* Copy element by element. */ |
| 519 | while (1) | 549 | while (1) |
| @@ -546,7 +576,11 @@ concat (nargs, args, target_type, last_special) | |||
| 546 | && XINT (elt) < 0400) | 576 | && XINT (elt) < 0400) |
| 547 | { | 577 | { |
| 548 | c = XINT (elt); | 578 | c = XINT (elt); |
| 549 | copy_text (&c, &c, 1, 0, 1); | 579 | if (nonascii_insert_offset > 0) |
| 580 | c += nonascii_insert_offset; | ||
| 581 | else | ||
| 582 | c += DEFAULT_NONASCII_INSERT_OFFSET; | ||
| 583 | |||
| 550 | XSETINT (elt, c); | 584 | XSETINT (elt, c); |
| 551 | } | 585 | } |
| 552 | } | 586 | } |
| @@ -608,6 +642,10 @@ concat (nargs, args, target_type, last_special) | |||
| 608 | return val; | 642 | return val; |
| 609 | } | 643 | } |
| 610 | 644 | ||
| 645 | static Lisp_Object string_char_byte_cache_string; | ||
| 646 | static int string_char_byte_cache_charpos; | ||
| 647 | static int string_char_byte_cache_bytepos; | ||
| 648 | |||
| 611 | /* Return the character index corresponding to CHAR_INDEX in STRING. */ | 649 | /* Return the character index corresponding to CHAR_INDEX in STRING. */ |
| 612 | 650 | ||
| 613 | int | 651 | int |
| @@ -615,20 +653,65 @@ string_char_to_byte (string, char_index) | |||
| 615 | Lisp_Object string; | 653 | Lisp_Object string; |
| 616 | int char_index; | 654 | int char_index; |
| 617 | { | 655 | { |
| 618 | int i = 0, i_byte = 0; | 656 | int i, i_byte; |
| 657 | int best_below, best_below_byte; | ||
| 658 | int best_above, best_above_byte; | ||
| 619 | 659 | ||
| 620 | if (! STRING_MULTIBYTE (string)) | 660 | if (! STRING_MULTIBYTE (string)) |
| 621 | return char_index; | 661 | return char_index; |
| 622 | 662 | ||
| 623 | while (i < char_index) | 663 | best_below = best_below_byte = 0; |
| 664 | best_above = XSTRING (string)->size; | ||
| 665 | best_above_byte = XSTRING (string)->size_byte; | ||
| 666 | |||
| 667 | if (EQ (string, string_char_byte_cache_string)) | ||
| 668 | { | ||
| 669 | if (string_char_byte_cache_charpos < char_index) | ||
| 670 | { | ||
| 671 | best_below = string_char_byte_cache_charpos; | ||
| 672 | best_below_byte = string_char_byte_cache_bytepos; | ||
| 673 | } | ||
| 674 | else | ||
| 675 | { | ||
| 676 | best_above = string_char_byte_cache_charpos; | ||
| 677 | best_above_byte = string_char_byte_cache_bytepos; | ||
| 678 | } | ||
| 679 | } | ||
| 680 | |||
| 681 | if (char_index - best_below < best_above - char_index) | ||
| 682 | { | ||
| 683 | while (best_below < char_index) | ||
| 684 | { | ||
| 685 | int c; | ||
| 686 | FETCH_STRING_CHAR_ADVANCE (c, string, best_below, best_below_byte); | ||
| 687 | } | ||
| 688 | i = best_below; | ||
| 689 | i_byte = best_below_byte; | ||
| 690 | } | ||
| 691 | else | ||
| 624 | { | 692 | { |
| 625 | int c; | 693 | while (best_above > char_index) |
| 626 | FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte); | 694 | { |
| 695 | int best_above_byte_saved = --best_above_byte; | ||
| 696 | |||
| 697 | while (best_above_byte > 0 | ||
| 698 | && !CHAR_HEAD_P (XSTRING (string)->data[best_above_byte])) | ||
| 699 | best_above_byte--; | ||
| 700 | if (XSTRING (string)->data[best_above_byte] < 0x80) | ||
| 701 | best_above_byte = best_above_byte_saved; | ||
| 702 | best_above--; | ||
| 703 | } | ||
| 704 | i = best_above; | ||
| 705 | i_byte = best_above_byte; | ||
| 627 | } | 706 | } |
| 628 | 707 | ||
| 708 | string_char_byte_cache_bytepos = i_byte; | ||
| 709 | string_char_byte_cache_charpos = i; | ||
| 710 | string_char_byte_cache_string = string; | ||
| 711 | |||
| 629 | return i_byte; | 712 | return i_byte; |
| 630 | } | 713 | } |
| 631 | 714 | ||
| 632 | /* Return the character index corresponding to BYTE_INDEX in STRING. */ | 715 | /* Return the character index corresponding to BYTE_INDEX in STRING. */ |
| 633 | 716 | ||
| 634 | int | 717 | int |
| @@ -636,20 +719,65 @@ string_byte_to_char (string, byte_index) | |||
| 636 | Lisp_Object string; | 719 | Lisp_Object string; |
| 637 | int byte_index; | 720 | int byte_index; |
| 638 | { | 721 | { |
| 639 | int i = 0, i_byte = 0; | 722 | int i, i_byte; |
| 723 | int best_below, best_below_byte; | ||
| 724 | int best_above, best_above_byte; | ||
| 640 | 725 | ||
| 641 | if (! STRING_MULTIBYTE (string)) | 726 | if (! STRING_MULTIBYTE (string)) |
| 642 | return byte_index; | 727 | return byte_index; |
| 643 | 728 | ||
| 644 | while (i_byte < byte_index) | 729 | best_below = best_below_byte = 0; |
| 730 | best_above = XSTRING (string)->size; | ||
| 731 | best_above_byte = XSTRING (string)->size_byte; | ||
| 732 | |||
| 733 | if (EQ (string, string_char_byte_cache_string)) | ||
| 734 | { | ||
| 735 | if (string_char_byte_cache_bytepos < byte_index) | ||
| 736 | { | ||
| 737 | best_below = string_char_byte_cache_charpos; | ||
| 738 | best_below_byte = string_char_byte_cache_bytepos; | ||
| 739 | } | ||
| 740 | else | ||
| 741 | { | ||
| 742 | best_above = string_char_byte_cache_charpos; | ||
| 743 | best_above_byte = string_char_byte_cache_bytepos; | ||
| 744 | } | ||
| 745 | } | ||
| 746 | |||
| 747 | if (byte_index - best_below_byte < best_above_byte - byte_index) | ||
| 748 | { | ||
| 749 | while (best_below_byte < byte_index) | ||
| 750 | { | ||
| 751 | int c; | ||
| 752 | FETCH_STRING_CHAR_ADVANCE (c, string, best_below, best_below_byte); | ||
| 753 | } | ||
| 754 | i = best_below; | ||
| 755 | i_byte = best_below_byte; | ||
| 756 | } | ||
| 757 | else | ||
| 645 | { | 758 | { |
| 646 | int c; | 759 | while (best_above_byte > byte_index) |
| 647 | FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte); | 760 | { |
| 761 | int best_above_byte_saved = --best_above_byte; | ||
| 762 | |||
| 763 | while (best_above_byte > 0 | ||
| 764 | && !CHAR_HEAD_P (XSTRING (string)->data[best_above_byte])) | ||
| 765 | best_above_byte--; | ||
| 766 | if (XSTRING (string)->data[best_above_byte] < 0x80) | ||
| 767 | best_above_byte = best_above_byte_saved; | ||
| 768 | best_above--; | ||
| 769 | } | ||
| 770 | i = best_above; | ||
| 771 | i_byte = best_above_byte; | ||
| 648 | } | 772 | } |
| 649 | 773 | ||
| 774 | string_char_byte_cache_bytepos = i_byte; | ||
| 775 | string_char_byte_cache_charpos = i; | ||
| 776 | string_char_byte_cache_string = string; | ||
| 777 | |||
| 650 | return i; | 778 | return i; |
| 651 | } | 779 | } |
| 652 | 780 | ||
| 653 | /* Convert STRING to a multibyte string. | 781 | /* Convert STRING to a multibyte string. |
| 654 | Single-byte characters 0200 through 0377 are converted | 782 | Single-byte characters 0200 through 0377 are converted |
| 655 | by adding nonascii_insert_offset to each. */ | 783 | by adding nonascii_insert_offset to each. */ |
| @@ -691,6 +819,24 @@ string_make_unibyte (string) | |||
| 691 | 819 | ||
| 692 | return make_unibyte_string (buf, XSTRING (string)->size); | 820 | return make_unibyte_string (buf, XSTRING (string)->size); |
| 693 | } | 821 | } |
| 822 | |||
| 823 | DEFUN ("string-make-multibyte", Fstring_make_multibyte, Sstring_make_multibyte, | ||
| 824 | 1, 1, 0, | ||
| 825 | "Return the multibyte equivalent of STRING.") | ||
| 826 | (string) | ||
| 827 | Lisp_Object string; | ||
| 828 | { | ||
| 829 | return string_make_multibyte (string); | ||
| 830 | } | ||
| 831 | |||
| 832 | DEFUN ("string-make-unibyte", Fstring_make_unibyte, Sstring_make_unibyte, | ||
| 833 | 1, 1, 0, | ||
| 834 | "Return the unibyte equivalent of STRING.") | ||
| 835 | (string) | ||
| 836 | Lisp_Object string; | ||
| 837 | { | ||
| 838 | return string_make_unibyte (string); | ||
| 839 | } | ||
| 694 | 840 | ||
| 695 | DEFUN ("copy-alist", Fcopy_alist, Scopy_alist, 1, 1, 0, | 841 | DEFUN ("copy-alist", Fcopy_alist, Scopy_alist, 1, 1, 0, |
| 696 | "Return a copy of ALIST.\n\ | 842 | "Return a copy of ALIST.\n\ |
| @@ -2313,6 +2459,9 @@ syms_of_fns () | |||
| 2313 | Qwidget_type = intern ("widget-type"); | 2459 | Qwidget_type = intern ("widget-type"); |
| 2314 | staticpro (&Qwidget_type); | 2460 | staticpro (&Qwidget_type); |
| 2315 | 2461 | ||
| 2462 | staticpro (&string_char_byte_cache_string); | ||
| 2463 | string_char_byte_cache_string = Qnil; | ||
| 2464 | |||
| 2316 | Fset (Qyes_or_no_p_history, Qnil); | 2465 | Fset (Qyes_or_no_p_history, Qnil); |
| 2317 | 2466 | ||
| 2318 | DEFVAR_LISP ("features", &Vfeatures, | 2467 | DEFVAR_LISP ("features", &Vfeatures, |
| @@ -2336,6 +2485,8 @@ invoked by mouse clicks and mouse menu items."); | |||
| 2336 | defsubr (&Sconcat); | 2485 | defsubr (&Sconcat); |
| 2337 | defsubr (&Svconcat); | 2486 | defsubr (&Svconcat); |
| 2338 | defsubr (&Scopy_sequence); | 2487 | defsubr (&Scopy_sequence); |
| 2488 | defsubr (&Sstring_make_multibyte); | ||
| 2489 | defsubr (&Sstring_make_unibyte); | ||
| 2339 | defsubr (&Scopy_alist); | 2490 | defsubr (&Scopy_alist); |
| 2340 | defsubr (&Ssubstring); | 2491 | defsubr (&Ssubstring); |
| 2341 | defsubr (&Snthcdr); | 2492 | defsubr (&Snthcdr); |