diff options
| author | Philipp Stephani | 2017-05-07 21:01:53 +0200 |
|---|---|---|
| committer | Philipp Stephani | 2017-06-17 15:40:58 +0200 |
| commit | d682f0daa3c0bfdd5ee8ce0e9226353d505e85a9 (patch) | |
| tree | ef0a6de2163d2f3c3c9bcb5012875e0018ef496f /src | |
| parent | 46279c1ea117bab75bdeccfd04703033c9e7d26d (diff) | |
| download | emacs-d682f0daa3c0bfdd5ee8ce0e9226353d505e85a9.tar.gz emacs-d682f0daa3c0bfdd5ee8ce0e9226353d505e85a9.zip | |
Add command to replace buffer contents
Add a new command 'replace-buffer-contents' that uses the Myers diff
algorithm to non-destructively replace the accessible portion of the
current buffer. The Myers algorithm is implemented in Gnulib.
* src/editfns.c (Freplace_buffer_contents): New command.
(set_bit, bit_is_set, buffer_chars_equal): New helper functions.
(syms_of_editfns): Define new command.
* test/src/editfns-tests.el (replace-buffer-contents-1)
(replace-buffer-contents-2): New unit tests.
* src/buffer.h (BUF_FETCH_CHAR_AS_MULTIBYTE): New helper macro.
* admin/merge-gnulib (GNULIB_MODULES): Add diffseq.h and minmax.h.
Diffstat (limited to 'src')
| -rw-r--r-- | src/buffer.h | 9 | ||||
| -rw-r--r-- | src/editfns.c | 201 |
2 files changed, 210 insertions, 0 deletions
diff --git a/src/buffer.h b/src/buffer.h index a2bdc4e7294..be270fe4823 100644 --- a/src/buffer.h +++ b/src/buffer.h | |||
| @@ -412,6 +412,15 @@ extern void enlarge_buffer_text (struct buffer *, ptrdiff_t); | |||
| 412 | ? BUF_FETCH_MULTIBYTE_CHAR ((buf), (pos)) \ | 412 | ? BUF_FETCH_MULTIBYTE_CHAR ((buf), (pos)) \ |
| 413 | : BUF_FETCH_BYTE ((buf), (pos))) | 413 | : BUF_FETCH_BYTE ((buf), (pos))) |
| 414 | 414 | ||
| 415 | /* Return character at byte position POS in buffer BUF. If BUF is | ||
| 416 | unibyte and the character is not ASCII, make the returning | ||
| 417 | character multibyte. */ | ||
| 418 | |||
| 419 | #define BUF_FETCH_CHAR_AS_MULTIBYTE(buf, pos) \ | ||
| 420 | (! NILP (BVAR ((buf), enable_multibyte_characters)) \ | ||
| 421 | ? BUF_FETCH_MULTIBYTE_CHAR ((buf), (pos)) \ | ||
| 422 | : UNIBYTE_TO_CHAR (BUF_FETCH_BYTE ((buf), (pos)))) | ||
| 423 | |||
| 415 | /* Return the byte at byte position N in buffer BUF. */ | 424 | /* Return the byte at byte position N in buffer BUF. */ |
| 416 | 425 | ||
| 417 | #define BUF_FETCH_BYTE(buf, n) \ | 426 | #define BUF_FETCH_BYTE(buf, n) \ |
diff --git a/src/editfns.c b/src/editfns.c index 43b17f9f116..76b4aaf81bc 100644 --- a/src/editfns.c +++ b/src/editfns.c | |||
| @@ -3105,6 +3105,206 @@ determines whether case is significant or ignored. */) | |||
| 3105 | /* Same length too => they are equal. */ | 3105 | /* Same length too => they are equal. */ |
| 3106 | return make_number (0); | 3106 | return make_number (0); |
| 3107 | } | 3107 | } |
| 3108 | |||
| 3109 | |||
| 3110 | /* Set up necessary definitions for diffseq.h; see comments in | ||
| 3111 | diffseq.h for explanation. */ | ||
| 3112 | |||
| 3113 | #undef ELEMENT | ||
| 3114 | #undef EQUAL | ||
| 3115 | |||
| 3116 | #define XVECREF_YVECREF_EQUAL(ctx, xoff, yoff) \ | ||
| 3117 | buffer_chars_equal ((ctx), (xoff), (yoff)) | ||
| 3118 | |||
| 3119 | #define OFFSET ptrdiff_t | ||
| 3120 | |||
| 3121 | #define EXTRA_CONTEXT_FIELDS \ | ||
| 3122 | /* Buffers to compare. */ \ | ||
| 3123 | struct buffer *buffer_a; \ | ||
| 3124 | struct buffer *buffer_b; \ | ||
| 3125 | /* Bit vectors recording for each character whether it was deleted | ||
| 3126 | or inserted. */ \ | ||
| 3127 | unsigned char *deletions; \ | ||
| 3128 | unsigned char *insertions; | ||
| 3129 | |||
| 3130 | #define NOTE_DELETE(ctx, xoff) set_bit ((ctx)->deletions, (xoff)) | ||
| 3131 | #define NOTE_INSERT(ctx, yoff) set_bit ((ctx)->insertions, (yoff)) | ||
| 3132 | |||
| 3133 | struct context; | ||
| 3134 | static void set_bit (unsigned char *, OFFSET); | ||
| 3135 | static bool bit_is_set (const unsigned char *, OFFSET); | ||
| 3136 | static bool buffer_chars_equal (struct context *, OFFSET, OFFSET); | ||
| 3137 | |||
| 3138 | #include "minmax.h" | ||
| 3139 | #include "diffseq.h" | ||
| 3140 | |||
| 3141 | DEFUN ("replace-buffer-contents", Freplace_buffer_contents, | ||
| 3142 | Sreplace_buffer_contents, 1, 1, "bSource buffer: ", | ||
| 3143 | doc: /* Replace accessible portion of the current buffer with accessible portion of SOURCE. | ||
| 3144 | As far as possible the replacement is non-destructive, i.e. existing | ||
| 3145 | buffer contents, markers, properties, and overlays in the current | ||
| 3146 | buffer stay intact. */) | ||
| 3147 | (Lisp_Object source) | ||
| 3148 | { | ||
| 3149 | struct buffer *a = current_buffer; | ||
| 3150 | Lisp_Object source_buffer = Fget_buffer (source); | ||
| 3151 | if (NILP (source_buffer)) | ||
| 3152 | nsberror (source); | ||
| 3153 | struct buffer *b = XBUFFER (source_buffer); | ||
| 3154 | if (! BUFFER_LIVE_P (b)) | ||
| 3155 | error ("Selecting deleted buffer"); | ||
| 3156 | if (a == b) | ||
| 3157 | error ("Cannot replace a buffer with itself"); | ||
| 3158 | |||
| 3159 | ptrdiff_t min_a = BEGV; | ||
| 3160 | ptrdiff_t min_b = BUF_BEGV (b); | ||
| 3161 | ptrdiff_t size_a = ZV - min_a; | ||
| 3162 | ptrdiff_t size_b = BUF_ZV (b) - min_b; | ||
| 3163 | eassume (size_a >= 0); | ||
| 3164 | eassume (size_b >= 0); | ||
| 3165 | bool a_empty = size_a == 0; | ||
| 3166 | bool b_empty = size_b == 0; | ||
| 3167 | |||
| 3168 | /* Handle trivial cases where at least one accessible portion is | ||
| 3169 | empty. */ | ||
| 3170 | |||
| 3171 | if (a_empty && b_empty) | ||
| 3172 | return Qnil; | ||
| 3173 | |||
| 3174 | if (a_empty) | ||
| 3175 | return Finsert_buffer_substring (source, Qnil, Qnil); | ||
| 3176 | |||
| 3177 | if (b_empty) | ||
| 3178 | { | ||
| 3179 | del_range_both (BEGV, BEGV_BYTE, ZV, ZV_BYTE, true); | ||
| 3180 | return Qnil; | ||
| 3181 | } | ||
| 3182 | |||
| 3183 | /* FIXME: It is not documented how to initialize the contents of the | ||
| 3184 | context structure. This code cargo-cults from the existing | ||
| 3185 | caller in src/analyze.c of GNU Diffutils, which appears to | ||
| 3186 | work. */ | ||
| 3187 | |||
| 3188 | ptrdiff_t diags = size_a + size_b + 3; | ||
| 3189 | ptrdiff_t *buffer; | ||
| 3190 | USE_SAFE_ALLOCA; | ||
| 3191 | SAFE_NALLOCA (buffer, 2, diags); | ||
| 3192 | /* Micro-optimization: Casting to size_t generates much better | ||
| 3193 | code. */ | ||
| 3194 | ptrdiff_t del_bytes = (size_t) size_a / CHAR_BIT + 1; | ||
| 3195 | ptrdiff_t ins_bytes = (size_t) size_b / CHAR_BIT + 1; | ||
| 3196 | struct context ctx = { | ||
| 3197 | .buffer_a = a, | ||
| 3198 | .buffer_b = b, | ||
| 3199 | .deletions = SAFE_ALLOCA (del_bytes), | ||
| 3200 | .insertions = SAFE_ALLOCA (ins_bytes), | ||
| 3201 | .fdiag = buffer + size_b + 1, | ||
| 3202 | .bdiag = buffer + diags + size_b + 1, | ||
| 3203 | /* FIXME: Find a good number for .too_expensive. */ | ||
| 3204 | .too_expensive = 1000000, | ||
| 3205 | }; | ||
| 3206 | memclear (ctx.deletions, del_bytes); | ||
| 3207 | memclear (ctx.insertions, ins_bytes); | ||
| 3208 | /* compareseq requires indices to be zero-based. We add BEGV back | ||
| 3209 | later. */ | ||
| 3210 | bool early_abort = compareseq (0, size_a, 0, size_b, false, &ctx); | ||
| 3211 | /* Since we didn’t define EARLY_ABORT, we should never abort | ||
| 3212 | early. */ | ||
| 3213 | eassert (! early_abort); | ||
| 3214 | SAFE_FREE (); | ||
| 3215 | |||
| 3216 | Fundo_boundary (); | ||
| 3217 | ptrdiff_t count = SPECPDL_INDEX (); | ||
| 3218 | record_unwind_protect (save_excursion_restore, save_excursion_save ()); | ||
| 3219 | |||
| 3220 | SET_PT_BOTH (BEGV, BEGV_BYTE); | ||
| 3221 | ptrdiff_t i = size_a; | ||
| 3222 | ptrdiff_t j = size_b; | ||
| 3223 | /* Walk backwards through the lists of changes. This was also | ||
| 3224 | cargo-culted from src/analyze.c in GNU Diffutils. Because we | ||
| 3225 | walk backwards, we don’t have to keep the positions in sync. */ | ||
| 3226 | while (i >= 0 || j >= 0) | ||
| 3227 | { | ||
| 3228 | /* Check whether there is a change (insertion or deletion) | ||
| 3229 | before the current position. */ | ||
| 3230 | if ((i > 0 && bit_is_set (ctx.deletions, i - 1)) || | ||
| 3231 | (j > 0 && bit_is_set (ctx.insertions, j - 1))) | ||
| 3232 | { | ||
| 3233 | ptrdiff_t end_a = min_a + i; | ||
| 3234 | ptrdiff_t end_b = min_b + j; | ||
| 3235 | /* Find the beginning of the current change run. */ | ||
| 3236 | while (i > 0 && bit_is_set (ctx.deletions, i - 1)) | ||
| 3237 | --i; | ||
| 3238 | while (j > 0 && bit_is_set (ctx.insertions, j - 1)) | ||
| 3239 | --j; | ||
| 3240 | ptrdiff_t beg_a = min_a + i; | ||
| 3241 | ptrdiff_t beg_b = min_b + j; | ||
| 3242 | eassert (beg_a >= BEGV); | ||
| 3243 | eassert (beg_b >= BUF_BEGV (b)); | ||
| 3244 | eassert (beg_a <= end_a); | ||
| 3245 | eassert (beg_b <= end_b); | ||
| 3246 | eassert (end_a <= ZV); | ||
| 3247 | eassert (end_b <= BUF_ZV (b)); | ||
| 3248 | eassert (beg_a < end_a || beg_b < end_b); | ||
| 3249 | if (beg_a < end_a) | ||
| 3250 | del_range (beg_a, end_a); | ||
| 3251 | if (beg_b < end_b) | ||
| 3252 | { | ||
| 3253 | SET_PT (beg_a); | ||
| 3254 | Finsert_buffer_substring (source, make_natnum (beg_b), | ||
| 3255 | make_natnum (end_b)); | ||
| 3256 | } | ||
| 3257 | } | ||
| 3258 | --i; | ||
| 3259 | --j; | ||
| 3260 | } | ||
| 3261 | |||
| 3262 | return unbind_to (count, Qnil); | ||
| 3263 | } | ||
| 3264 | |||
| 3265 | static void | ||
| 3266 | set_bit (unsigned char *a, ptrdiff_t i) | ||
| 3267 | { | ||
| 3268 | eassert (i >= 0); | ||
| 3269 | /* Micro-optimization: Casting to size_t generates much better | ||
| 3270 | code. */ | ||
| 3271 | size_t j = i; | ||
| 3272 | a[j / CHAR_BIT] |= (1 << (j % CHAR_BIT)); | ||
| 3273 | } | ||
| 3274 | |||
| 3275 | static bool | ||
| 3276 | bit_is_set (const unsigned char *a, ptrdiff_t i) | ||
| 3277 | { | ||
| 3278 | eassert (i >= 0); | ||
| 3279 | /* Micro-optimization: Casting to size_t generates much better | ||
| 3280 | code. */ | ||
| 3281 | size_t j = i; | ||
| 3282 | return a[j / CHAR_BIT] & (1 << (j % CHAR_BIT)); | ||
| 3283 | } | ||
| 3284 | |||
| 3285 | /* Return true if the characters at position POS_A of buffer | ||
| 3286 | CTX->buffer_a and at position POS_B of buffer CTX->buffer_b are | ||
| 3287 | equal. POS_A and POS_B are zero-based. Text properties are | ||
| 3288 | ignored. */ | ||
| 3289 | |||
| 3290 | static bool | ||
| 3291 | buffer_chars_equal (struct context *ctx, | ||
| 3292 | ptrdiff_t pos_a, ptrdiff_t pos_b) | ||
| 3293 | { | ||
| 3294 | eassert (pos_a >= 0); | ||
| 3295 | pos_a += BUF_BEGV (ctx->buffer_a); | ||
| 3296 | eassert (pos_a >= BUF_BEGV (ctx->buffer_a)); | ||
| 3297 | eassert (pos_a < BUF_ZV (ctx->buffer_a)); | ||
| 3298 | |||
| 3299 | eassert (pos_b >= 0); | ||
| 3300 | pos_b += BUF_BEGV (ctx->buffer_b); | ||
| 3301 | eassert (pos_b >= BUF_BEGV (ctx->buffer_b)); | ||
| 3302 | eassert (pos_b < BUF_ZV (ctx->buffer_b)); | ||
| 3303 | |||
| 3304 | return BUF_FETCH_CHAR_AS_MULTIBYTE (ctx->buffer_a, pos_a) | ||
| 3305 | == BUF_FETCH_CHAR_AS_MULTIBYTE (ctx->buffer_b, pos_b); | ||
| 3306 | } | ||
| 3307 | |||
| 3108 | 3308 | ||
| 3109 | static void | 3309 | static void |
| 3110 | subst_char_in_region_unwind (Lisp_Object arg) | 3310 | subst_char_in_region_unwind (Lisp_Object arg) |
| @@ -5315,6 +5515,7 @@ functions if all the text being accessed has this property. */); | |||
| 5315 | 5515 | ||
| 5316 | defsubr (&Sinsert_buffer_substring); | 5516 | defsubr (&Sinsert_buffer_substring); |
| 5317 | defsubr (&Scompare_buffer_substrings); | 5517 | defsubr (&Scompare_buffer_substrings); |
| 5518 | defsubr (&Sreplace_buffer_contents); | ||
| 5318 | defsubr (&Ssubst_char_in_region); | 5519 | defsubr (&Ssubst_char_in_region); |
| 5319 | defsubr (&Stranslate_region_internal); | 5520 | defsubr (&Stranslate_region_internal); |
| 5320 | defsubr (&Sdelete_region); | 5521 | defsubr (&Sdelete_region); |