diff options
| author | Jens Schmidt | 2025-12-17 22:20:15 +0100 |
|---|---|---|
| committer | Stefan Monnier | 2025-12-20 13:23:29 -0500 |
| commit | 0aabe62b64b4698340e8414d28b0fa0a3eabbf82 (patch) | |
| tree | b0dcd8b62c796fcc0a08f68b0b17dd2591ae7b3c /src | |
| parent | 875e42d501df262fcd9903528657997d025e5c68 (diff) | |
| download | emacs-0aabe62b64b4698340e8414d28b0fa0a3eabbf82.tar.gz emacs-0aabe62b64b4698340e8414d28b0fa0a3eabbf82.zip | |
Improve handling of non-ASCII characters in 'transpose-regions'
* src/editfns.c (Ftranspose_regions): Separate code related to character
semantics from that related to byte semantics and in that way leverage
optimizations for regions of equal length with respect to both
semantics. Move and update comments dating back to the initial
implementation.
* test/src/editfns-tests.el (editfns-tests--transpose-regions-tests)
(editfns-tests--transpose-regions-markups)
(editfns-tests--transpose-regions): New test and accompanying variables.
Diffstat (limited to 'src')
| -rw-r--r-- | src/editfns.c | 153 |
1 files changed, 91 insertions, 62 deletions
diff --git a/src/editfns.c b/src/editfns.c index 52a3e4e4266..6ffdd3c3109 100644 --- a/src/editfns.c +++ b/src/editfns.c | |||
| @@ -4525,7 +4525,8 @@ ring. */) | |||
| 4525 | ptrdiff_t len1_byte, len_mid_byte, len2_byte; | 4525 | ptrdiff_t len1_byte, len_mid_byte, len2_byte; |
| 4526 | unsigned char *start1_addr, *start2_addr, *temp; | 4526 | unsigned char *start1_addr, *start2_addr, *temp; |
| 4527 | 4527 | ||
| 4528 | INTERVAL cur_intv, tmp_interval1, tmp_interval2, tmp_interval3; | 4528 | INTERVAL cur_intv, tmp_interval1, tmp_interval2; |
| 4529 | INTERVAL tmp_interval_mid, tmp_interval3; | ||
| 4529 | Lisp_Object buf; | 4530 | Lisp_Object buf; |
| 4530 | 4531 | ||
| 4531 | XSETBUFFER (buf, current_buffer); | 4532 | XSETBUFFER (buf, current_buffer); |
| @@ -4540,7 +4541,9 @@ ring. */) | |||
| 4540 | end2 = XFIXNAT (endr2); | 4541 | end2 = XFIXNAT (endr2); |
| 4541 | gap = GPT; | 4542 | gap = GPT; |
| 4542 | 4543 | ||
| 4543 | /* Swap the regions if they're reversed. */ | 4544 | /* Swap the regions if they're reversed. We do not swap the |
| 4545 | corresponding Lisp objects as well, since we reference these only | ||
| 4546 | to clear text properties in both regions. */ | ||
| 4544 | if (start2 < end1) | 4547 | if (start2 < end1) |
| 4545 | { | 4548 | { |
| 4546 | register ptrdiff_t glumph = start1; | 4549 | register ptrdiff_t glumph = start1; |
| @@ -4560,28 +4563,6 @@ ring. */) | |||
| 4560 | else if ((start1 == end1 || start2 == end2) && end1 == start2) | 4563 | else if ((start1 == end1 || start2 == end2) && end1 == start2) |
| 4561 | return Qnil; | 4564 | return Qnil; |
| 4562 | 4565 | ||
| 4563 | /* The possibilities are: | ||
| 4564 | 1. Adjacent (contiguous) regions, or separate but equal regions | ||
| 4565 | (no, really equal, in this case!), or | ||
| 4566 | 2. Separate regions of unequal size. | ||
| 4567 | |||
| 4568 | The worst case is usually No. 2. It means that (aside from | ||
| 4569 | potential need for getting the gap out of the way), there also | ||
| 4570 | needs to be a shifting of the text between the two regions. So | ||
| 4571 | if they are spread far apart, we are that much slower... sigh. */ | ||
| 4572 | |||
| 4573 | /* It must be pointed out that the really studly thing to do would | ||
| 4574 | be not to move the gap at all, but to leave it in place and work | ||
| 4575 | around it if necessary. This would be extremely efficient, | ||
| 4576 | especially considering that people are likely to do | ||
| 4577 | transpositions near where they are working interactively, which | ||
| 4578 | is exactly where the gap would be found. However, such code | ||
| 4579 | would be much harder to write and to read. So, if you are | ||
| 4580 | reading this comment and are feeling squirrely, by all means have | ||
| 4581 | a go! I just didn't feel like doing it, so I will simply move | ||
| 4582 | the gap the minimum distance to get it out of the way, and then | ||
| 4583 | deal with an unbroken array. */ | ||
| 4584 | |||
| 4585 | start1_byte = CHAR_TO_BYTE (start1); | 4566 | start1_byte = CHAR_TO_BYTE (start1); |
| 4586 | end2_byte = CHAR_TO_BYTE (end2); | 4567 | end2_byte = CHAR_TO_BYTE (end2); |
| 4587 | 4568 | ||
| @@ -4597,6 +4578,22 @@ ring. */) | |||
| 4597 | /* Run the before-change-functions *before* we move the gap. */ | 4578 | /* Run the before-change-functions *before* we move the gap. */ |
| 4598 | modify_text (start1, end2); | 4579 | modify_text (start1, end2); |
| 4599 | 4580 | ||
| 4581 | /* It must be pointed out that the really studly thing to do would | ||
| 4582 | be not to move the gap at all, but to leave it in place and work | ||
| 4583 | around it if necessary. This would be extremely efficient, | ||
| 4584 | especially considering that people are likely to do | ||
| 4585 | transpositions near where they are working interactively, which | ||
| 4586 | is exactly where the gap would be found. However, such code | ||
| 4587 | would be much harder to write and to read. So, if you are | ||
| 4588 | reading this comment and are feeling squirrely, by all means have | ||
| 4589 | a go! I just didn't feel like doing it, so I will simply move | ||
| 4590 | the gap the minimum distance to get it out of the way, and then | ||
| 4591 | deal with an unbroken array. */ | ||
| 4592 | |||
| 4593 | /* Hmmm... how about checking to see if the gap is large | ||
| 4594 | enough to use as the temporary storage? That would avoid an | ||
| 4595 | allocation... interesting. Later, don't fool with it now. */ | ||
| 4596 | |||
| 4600 | /* Make sure the gap won't interfere, by moving it out of the text | 4597 | /* Make sure the gap won't interfere, by moving it out of the text |
| 4601 | we will operate on. */ | 4598 | we will operate on. */ |
| 4602 | if (start1 < gap && gap < end2) | 4599 | if (start1 < gap && gap < end2) |
| @@ -4637,16 +4634,36 @@ ring. */) | |||
| 4637 | } | 4634 | } |
| 4638 | #endif | 4635 | #endif |
| 4639 | 4636 | ||
| 4640 | /* Hmmm... how about checking to see if the gap is large | 4637 | /* The possibilities are: |
| 4641 | enough to use as the temporary storage? That would avoid an | 4638 | 1. Regions of equal size, possibly even adjacent (contiguous). |
| 4642 | allocation... interesting. Later, don't fool with it now. */ | 4639 | 2. Regions of unequal size. |
| 4640 | |||
| 4641 | In case 1. we can leave the "mid", that is, the region between the | ||
| 4642 | two regions untouched. | ||
| 4643 | |||
| 4644 | The worst case is usually No. 2. It means that (aside from | ||
| 4645 | potential need for getting the gap out of the way), there also | ||
| 4646 | needs to be a shifting of the text between the two regions. So | ||
| 4647 | if they are spread far apart, we are that much slower... sigh. */ | ||
| 4648 | |||
| 4649 | /* As an additional difficulty, we have to carefully consider byte vs. | ||
| 4650 | character semantics: Maintaining undo and text properties needs to | ||
| 4651 | be done in terms of characters, swapping text in memory needs to be | ||
| 4652 | done in terms of bytes. | ||
| 4653 | |||
| 4654 | Handling case 1. mentioned above in a special way is beneficial | ||
| 4655 | both for undo/text properties and for memory swapping, only we have | ||
| 4656 | to consider case 1. for the character-related bits (len1 == len2) | ||
| 4657 | and case 1. for the byte-related bits (len1_byte == len2_byte) | ||
| 4658 | separately. */ | ||
| 4643 | 4659 | ||
| 4644 | tmp_interval1 = copy_intervals (cur_intv, start1, len1); | 4660 | tmp_interval1 = copy_intervals (cur_intv, start1, len1); |
| 4645 | tmp_interval2 = copy_intervals (cur_intv, start2, len2); | 4661 | tmp_interval2 = copy_intervals (cur_intv, start2, len2); |
| 4646 | USE_SAFE_ALLOCA; | 4662 | |
| 4647 | if (len1_byte == len2_byte && len1 == len2) | 4663 | len_mid = start2 - end1; |
| 4648 | /* Regions are same size, though, how nice. */ | 4664 | len_mid_byte = start2_byte - end1_byte; |
| 4649 | /* The char lengths also have to match, for text-properties. */ | 4665 | |
| 4666 | if (len1 == len2) | ||
| 4650 | { | 4667 | { |
| 4651 | if (end1 == start2) /* Merge the two parts into a single one. */ | 4668 | if (end1 == start2) /* Merge the two parts into a single one. */ |
| 4652 | record_change (start1, (end2 - start1)); | 4669 | record_change (start1, (end2 - start1)); |
| @@ -4663,7 +4680,24 @@ ring. */) | |||
| 4663 | tmp_interval3 = validate_interval_range (buf, &startr2, &endr2, 0); | 4680 | tmp_interval3 = validate_interval_range (buf, &startr2, &endr2, 0); |
| 4664 | if (tmp_interval3) | 4681 | if (tmp_interval3) |
| 4665 | set_text_properties_1 (startr2, endr2, Qnil, buf, tmp_interval3); | 4682 | set_text_properties_1 (startr2, endr2, Qnil, buf, tmp_interval3); |
| 4683 | } | ||
| 4684 | else | ||
| 4685 | /* Regions have different length, character-wise. Handle undo and | ||
| 4686 | text properties for both regions as one long piece of text | ||
| 4687 | spanning both regions and the mid. But while doing so, save the | ||
| 4688 | intervals of the mid to later restore them in their new | ||
| 4689 | position. */ | ||
| 4690 | { | ||
| 4691 | record_change (start1, (end2 - start1)); | ||
| 4692 | tmp_interval_mid = copy_intervals (cur_intv, end1, len_mid); | ||
| 4693 | tmp_interval3 = validate_interval_range (buf, &startr1, &endr2, 0); | ||
| 4694 | if (tmp_interval3) | ||
| 4695 | set_text_properties_1 (startr1, endr2, Qnil, buf, tmp_interval3); | ||
| 4696 | } | ||
| 4666 | 4697 | ||
| 4698 | USE_SAFE_ALLOCA; | ||
| 4699 | if (len1_byte == len2_byte) | ||
| 4700 | { | ||
| 4667 | temp = SAFE_ALLOCA (len1_byte); | 4701 | temp = SAFE_ALLOCA (len1_byte); |
| 4668 | start1_addr = BYTE_POS_ADDR (start1_byte); | 4702 | start1_addr = BYTE_POS_ADDR (start1_byte); |
| 4669 | start2_addr = BYTE_POS_ADDR (start2_byte); | 4703 | start2_addr = BYTE_POS_ADDR (start2_byte); |
| @@ -4671,42 +4705,37 @@ ring. */) | |||
| 4671 | memcpy (start1_addr, start2_addr, len2_byte); | 4705 | memcpy (start1_addr, start2_addr, len2_byte); |
| 4672 | memcpy (start2_addr, temp, len1_byte); | 4706 | memcpy (start2_addr, temp, len1_byte); |
| 4673 | } | 4707 | } |
| 4708 | else if (len1_byte < len2_byte) /* Second region larger than first */ | ||
| 4709 | { | ||
| 4710 | /* holds region 2 */ | ||
| 4711 | temp = SAFE_ALLOCA (len2_byte); | ||
| 4712 | start1_addr = BYTE_POS_ADDR (start1_byte); | ||
| 4713 | start2_addr = BYTE_POS_ADDR (start2_byte); | ||
| 4714 | memcpy (temp, start2_addr, len2_byte); | ||
| 4715 | memcpy (start1_addr + len_mid_byte + len2_byte, start1_addr, len1_byte); | ||
| 4716 | memmove (start1_addr + len2_byte, start1_addr + len1_byte, len_mid_byte); | ||
| 4717 | memcpy (start1_addr, temp, len2_byte); | ||
| 4718 | } | ||
| 4674 | else | 4719 | else |
| 4720 | /* Second region smaller than first. */ | ||
| 4721 | { | ||
| 4722 | /* holds region 1 */ | ||
| 4723 | temp = SAFE_ALLOCA (len1_byte); | ||
| 4724 | start1_addr = BYTE_POS_ADDR (start1_byte); | ||
| 4725 | start2_addr = BYTE_POS_ADDR (start2_byte); | ||
| 4726 | memcpy (temp, start1_addr, len1_byte); | ||
| 4727 | memcpy (start1_addr, start2_addr, len2_byte); | ||
| 4728 | memmove (start1_addr + len2_byte, start1_addr + len1_byte, len_mid_byte); | ||
| 4729 | memcpy (start1_addr + len2_byte + len_mid_byte, temp, len1_byte); | ||
| 4730 | } | ||
| 4731 | SAFE_FREE (); | ||
| 4732 | |||
| 4733 | if (len1 != len2) | ||
| 4734 | /* Restore intervals of the mid. */ | ||
| 4675 | { | 4735 | { |
| 4676 | len_mid = start2 - end1; | ||
| 4677 | len_mid_byte = start2_byte - end1_byte; | ||
| 4678 | record_change (start1, (end2 - start1)); | ||
| 4679 | INTERVAL tmp_interval_mid = copy_intervals (cur_intv, end1, len_mid); | ||
| 4680 | tmp_interval3 = validate_interval_range (buf, &startr1, &endr2, 0); | ||
| 4681 | if (tmp_interval3) | ||
| 4682 | set_text_properties_1 (startr1, endr2, Qnil, buf, tmp_interval3); | ||
| 4683 | if (len1_byte < len2_byte) /* Second region larger than first */ | ||
| 4684 | { | ||
| 4685 | /* holds region 2 */ | ||
| 4686 | temp = SAFE_ALLOCA (len2_byte); | ||
| 4687 | start1_addr = BYTE_POS_ADDR (start1_byte); | ||
| 4688 | start2_addr = BYTE_POS_ADDR (start2_byte); | ||
| 4689 | memcpy (temp, start2_addr, len2_byte); | ||
| 4690 | memcpy (start1_addr + len_mid_byte + len2_byte, start1_addr, len1_byte); | ||
| 4691 | memmove (start1_addr + len2_byte, start1_addr + len1_byte, len_mid_byte); | ||
| 4692 | memcpy (start1_addr, temp, len2_byte); | ||
| 4693 | } | ||
| 4694 | else | ||
| 4695 | /* Second region smaller than first. */ | ||
| 4696 | { | ||
| 4697 | /* holds region 1 */ | ||
| 4698 | temp = SAFE_ALLOCA (len1_byte); | ||
| 4699 | start1_addr = BYTE_POS_ADDR (start1_byte); | ||
| 4700 | start2_addr = BYTE_POS_ADDR (start2_byte); | ||
| 4701 | memcpy (temp, start1_addr, len1_byte); | ||
| 4702 | memcpy (start1_addr, start2_addr, len2_byte); | ||
| 4703 | memmove (start1_addr + len2_byte, start1_addr + len1_byte, len_mid_byte); | ||
| 4704 | memcpy (start1_addr + len2_byte + len_mid_byte, temp, len1_byte); | ||
| 4705 | } | ||
| 4706 | graft_intervals_into_buffer (tmp_interval_mid, start1 + len2, | 4736 | graft_intervals_into_buffer (tmp_interval_mid, start1 + len2, |
| 4707 | len_mid, current_buffer, 0); | 4737 | len_mid, current_buffer, 0); |
| 4708 | } | 4738 | } |
| 4709 | SAFE_FREE (); | ||
| 4710 | graft_intervals_into_buffer (tmp_interval1, end2 - len1, | 4739 | graft_intervals_into_buffer (tmp_interval1, end2 - len1, |
| 4711 | len1, current_buffer, 0); | 4740 | len1, current_buffer, 0); |
| 4712 | graft_intervals_into_buffer (tmp_interval2, start1, | 4741 | graft_intervals_into_buffer (tmp_interval2, start1, |