aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorJens Schmidt2025-12-17 22:20:15 +0100
committerStefan Monnier2025-12-20 13:23:29 -0500
commit0aabe62b64b4698340e8414d28b0fa0a3eabbf82 (patch)
treeb0dcd8b62c796fcc0a08f68b0b17dd2591ae7b3c /src
parent875e42d501df262fcd9903528657997d025e5c68 (diff)
downloademacs-0aabe62b64b4698340e8414d28b0fa0a3eabbf82.tar.gz
emacs-0aabe62b64b4698340e8414d28b0fa0a3eabbf82.zip
Improve handling of non-ASCII characters in 'transpose-regions'
* src/editfns.c (Ftranspose_regions): Separate code related to character semantics from that related to byte semantics and in that way leverage optimizations for regions of equal length with respect to both semantics. Move and update comments dating back to the initial implementation. * test/src/editfns-tests.el (editfns-tests--transpose-regions-tests) (editfns-tests--transpose-regions-markups) (editfns-tests--transpose-regions): New test and accompanying variables.
Diffstat (limited to 'src')
-rw-r--r--src/editfns.c153
1 files changed, 91 insertions, 62 deletions
diff --git a/src/editfns.c b/src/editfns.c
index 52a3e4e4266..6ffdd3c3109 100644
--- a/src/editfns.c
+++ b/src/editfns.c
@@ -4525,7 +4525,8 @@ ring. */)
4525 ptrdiff_t len1_byte, len_mid_byte, len2_byte; 4525 ptrdiff_t len1_byte, len_mid_byte, len2_byte;
4526 unsigned char *start1_addr, *start2_addr, *temp; 4526 unsigned char *start1_addr, *start2_addr, *temp;
4527 4527
4528 INTERVAL cur_intv, tmp_interval1, tmp_interval2, tmp_interval3; 4528 INTERVAL cur_intv, tmp_interval1, tmp_interval2;
4529 INTERVAL tmp_interval_mid, tmp_interval3;
4529 Lisp_Object buf; 4530 Lisp_Object buf;
4530 4531
4531 XSETBUFFER (buf, current_buffer); 4532 XSETBUFFER (buf, current_buffer);
@@ -4540,7 +4541,9 @@ ring. */)
4540 end2 = XFIXNAT (endr2); 4541 end2 = XFIXNAT (endr2);
4541 gap = GPT; 4542 gap = GPT;
4542 4543
4543 /* Swap the regions if they're reversed. */ 4544 /* Swap the regions if they're reversed. We do not swap the
4545 corresponding Lisp objects as well, since we reference these only
4546 to clear text properties in both regions. */
4544 if (start2 < end1) 4547 if (start2 < end1)
4545 { 4548 {
4546 register ptrdiff_t glumph = start1; 4549 register ptrdiff_t glumph = start1;
@@ -4560,28 +4563,6 @@ ring. */)
4560 else if ((start1 == end1 || start2 == end2) && end1 == start2) 4563 else if ((start1 == end1 || start2 == end2) && end1 == start2)
4561 return Qnil; 4564 return Qnil;
4562 4565
4563 /* The possibilities are:
4564 1. Adjacent (contiguous) regions, or separate but equal regions
4565 (no, really equal, in this case!), or
4566 2. Separate regions of unequal size.
4567
4568 The worst case is usually No. 2. It means that (aside from
4569 potential need for getting the gap out of the way), there also
4570 needs to be a shifting of the text between the two regions. So
4571 if they are spread far apart, we are that much slower... sigh. */
4572
4573 /* It must be pointed out that the really studly thing to do would
4574 be not to move the gap at all, but to leave it in place and work
4575 around it if necessary. This would be extremely efficient,
4576 especially considering that people are likely to do
4577 transpositions near where they are working interactively, which
4578 is exactly where the gap would be found. However, such code
4579 would be much harder to write and to read. So, if you are
4580 reading this comment and are feeling squirrely, by all means have
4581 a go! I just didn't feel like doing it, so I will simply move
4582 the gap the minimum distance to get it out of the way, and then
4583 deal with an unbroken array. */
4584
4585 start1_byte = CHAR_TO_BYTE (start1); 4566 start1_byte = CHAR_TO_BYTE (start1);
4586 end2_byte = CHAR_TO_BYTE (end2); 4567 end2_byte = CHAR_TO_BYTE (end2);
4587 4568
@@ -4597,6 +4578,22 @@ ring. */)
4597 /* Run the before-change-functions *before* we move the gap. */ 4578 /* Run the before-change-functions *before* we move the gap. */
4598 modify_text (start1, end2); 4579 modify_text (start1, end2);
4599 4580
4581 /* It must be pointed out that the really studly thing to do would
4582 be not to move the gap at all, but to leave it in place and work
4583 around it if necessary. This would be extremely efficient,
4584 especially considering that people are likely to do
4585 transpositions near where they are working interactively, which
4586 is exactly where the gap would be found. However, such code
4587 would be much harder to write and to read. So, if you are
4588 reading this comment and are feeling squirrely, by all means have
4589 a go! I just didn't feel like doing it, so I will simply move
4590 the gap the minimum distance to get it out of the way, and then
4591 deal with an unbroken array. */
4592
4593 /* Hmmm... how about checking to see if the gap is large
4594 enough to use as the temporary storage? That would avoid an
4595 allocation... interesting. Later, don't fool with it now. */
4596
4600 /* Make sure the gap won't interfere, by moving it out of the text 4597 /* Make sure the gap won't interfere, by moving it out of the text
4601 we will operate on. */ 4598 we will operate on. */
4602 if (start1 < gap && gap < end2) 4599 if (start1 < gap && gap < end2)
@@ -4637,16 +4634,36 @@ ring. */)
4637 } 4634 }
4638#endif 4635#endif
4639 4636
4640 /* Hmmm... how about checking to see if the gap is large 4637 /* The possibilities are:
4641 enough to use as the temporary storage? That would avoid an 4638 1. Regions of equal size, possibly even adjacent (contiguous).
4642 allocation... interesting. Later, don't fool with it now. */ 4639 2. Regions of unequal size.
4640
4641 In case 1. we can leave the "mid", that is, the region between the
4642 two regions untouched.
4643
4644 The worst case is usually No. 2. It means that (aside from
4645 potential need for getting the gap out of the way), there also
4646 needs to be a shifting of the text between the two regions. So
4647 if they are spread far apart, we are that much slower... sigh. */
4648
4649 /* As an additional difficulty, we have to carefully consider byte vs.
4650 character semantics: Maintaining undo and text properties needs to
4651 be done in terms of characters, swapping text in memory needs to be
4652 done in terms of bytes.
4653
4654 Handling case 1. mentioned above in a special way is beneficial
4655 both for undo/text properties and for memory swapping, only we have
4656 to consider case 1. for the character-related bits (len1 == len2)
4657 and case 1. for the byte-related bits (len1_byte == len2_byte)
4658 separately. */
4643 4659
4644 tmp_interval1 = copy_intervals (cur_intv, start1, len1); 4660 tmp_interval1 = copy_intervals (cur_intv, start1, len1);
4645 tmp_interval2 = copy_intervals (cur_intv, start2, len2); 4661 tmp_interval2 = copy_intervals (cur_intv, start2, len2);
4646 USE_SAFE_ALLOCA; 4662
4647 if (len1_byte == len2_byte && len1 == len2) 4663 len_mid = start2 - end1;
4648 /* Regions are same size, though, how nice. */ 4664 len_mid_byte = start2_byte - end1_byte;
4649 /* The char lengths also have to match, for text-properties. */ 4665
4666 if (len1 == len2)
4650 { 4667 {
4651 if (end1 == start2) /* Merge the two parts into a single one. */ 4668 if (end1 == start2) /* Merge the two parts into a single one. */
4652 record_change (start1, (end2 - start1)); 4669 record_change (start1, (end2 - start1));
@@ -4663,7 +4680,24 @@ ring. */)
4663 tmp_interval3 = validate_interval_range (buf, &startr2, &endr2, 0); 4680 tmp_interval3 = validate_interval_range (buf, &startr2, &endr2, 0);
4664 if (tmp_interval3) 4681 if (tmp_interval3)
4665 set_text_properties_1 (startr2, endr2, Qnil, buf, tmp_interval3); 4682 set_text_properties_1 (startr2, endr2, Qnil, buf, tmp_interval3);
4683 }
4684 else
4685 /* Regions have different length, character-wise. Handle undo and
4686 text properties for both regions as one long piece of text
4687 spanning both regions and the mid. But while doing so, save the
4688 intervals of the mid to later restore them in their new
4689 position. */
4690 {
4691 record_change (start1, (end2 - start1));
4692 tmp_interval_mid = copy_intervals (cur_intv, end1, len_mid);
4693 tmp_interval3 = validate_interval_range (buf, &startr1, &endr2, 0);
4694 if (tmp_interval3)
4695 set_text_properties_1 (startr1, endr2, Qnil, buf, tmp_interval3);
4696 }
4666 4697
4698 USE_SAFE_ALLOCA;
4699 if (len1_byte == len2_byte)
4700 {
4667 temp = SAFE_ALLOCA (len1_byte); 4701 temp = SAFE_ALLOCA (len1_byte);
4668 start1_addr = BYTE_POS_ADDR (start1_byte); 4702 start1_addr = BYTE_POS_ADDR (start1_byte);
4669 start2_addr = BYTE_POS_ADDR (start2_byte); 4703 start2_addr = BYTE_POS_ADDR (start2_byte);
@@ -4671,42 +4705,37 @@ ring. */)
4671 memcpy (start1_addr, start2_addr, len2_byte); 4705 memcpy (start1_addr, start2_addr, len2_byte);
4672 memcpy (start2_addr, temp, len1_byte); 4706 memcpy (start2_addr, temp, len1_byte);
4673 } 4707 }
4708 else if (len1_byte < len2_byte) /* Second region larger than first */
4709 {
4710 /* holds region 2 */
4711 temp = SAFE_ALLOCA (len2_byte);
4712 start1_addr = BYTE_POS_ADDR (start1_byte);
4713 start2_addr = BYTE_POS_ADDR (start2_byte);
4714 memcpy (temp, start2_addr, len2_byte);
4715 memcpy (start1_addr + len_mid_byte + len2_byte, start1_addr, len1_byte);
4716 memmove (start1_addr + len2_byte, start1_addr + len1_byte, len_mid_byte);
4717 memcpy (start1_addr, temp, len2_byte);
4718 }
4674 else 4719 else
4720 /* Second region smaller than first. */
4721 {
4722 /* holds region 1 */
4723 temp = SAFE_ALLOCA (len1_byte);
4724 start1_addr = BYTE_POS_ADDR (start1_byte);
4725 start2_addr = BYTE_POS_ADDR (start2_byte);
4726 memcpy (temp, start1_addr, len1_byte);
4727 memcpy (start1_addr, start2_addr, len2_byte);
4728 memmove (start1_addr + len2_byte, start1_addr + len1_byte, len_mid_byte);
4729 memcpy (start1_addr + len2_byte + len_mid_byte, temp, len1_byte);
4730 }
4731 SAFE_FREE ();
4732
4733 if (len1 != len2)
4734 /* Restore intervals of the mid. */
4675 { 4735 {
4676 len_mid = start2 - end1;
4677 len_mid_byte = start2_byte - end1_byte;
4678 record_change (start1, (end2 - start1));
4679 INTERVAL tmp_interval_mid = copy_intervals (cur_intv, end1, len_mid);
4680 tmp_interval3 = validate_interval_range (buf, &startr1, &endr2, 0);
4681 if (tmp_interval3)
4682 set_text_properties_1 (startr1, endr2, Qnil, buf, tmp_interval3);
4683 if (len1_byte < len2_byte) /* Second region larger than first */
4684 {
4685 /* holds region 2 */
4686 temp = SAFE_ALLOCA (len2_byte);
4687 start1_addr = BYTE_POS_ADDR (start1_byte);
4688 start2_addr = BYTE_POS_ADDR (start2_byte);
4689 memcpy (temp, start2_addr, len2_byte);
4690 memcpy (start1_addr + len_mid_byte + len2_byte, start1_addr, len1_byte);
4691 memmove (start1_addr + len2_byte, start1_addr + len1_byte, len_mid_byte);
4692 memcpy (start1_addr, temp, len2_byte);
4693 }
4694 else
4695 /* Second region smaller than first. */
4696 {
4697 /* holds region 1 */
4698 temp = SAFE_ALLOCA (len1_byte);
4699 start1_addr = BYTE_POS_ADDR (start1_byte);
4700 start2_addr = BYTE_POS_ADDR (start2_byte);
4701 memcpy (temp, start1_addr, len1_byte);
4702 memcpy (start1_addr, start2_addr, len2_byte);
4703 memmove (start1_addr + len2_byte, start1_addr + len1_byte, len_mid_byte);
4704 memcpy (start1_addr + len2_byte + len_mid_byte, temp, len1_byte);
4705 }
4706 graft_intervals_into_buffer (tmp_interval_mid, start1 + len2, 4736 graft_intervals_into_buffer (tmp_interval_mid, start1 + len2,
4707 len_mid, current_buffer, 0); 4737 len_mid, current_buffer, 0);
4708 } 4738 }
4709 SAFE_FREE ();
4710 graft_intervals_into_buffer (tmp_interval1, end2 - len1, 4739 graft_intervals_into_buffer (tmp_interval1, end2 - len1,
4711 len1, current_buffer, 0); 4740 len1, current_buffer, 0);
4712 graft_intervals_into_buffer (tmp_interval2, start1, 4741 graft_intervals_into_buffer (tmp_interval2, start1,