diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/region-cache.c | 833 | ||||
| -rw-r--r-- | src/region-cache.h | 111 |
2 files changed, 944 insertions, 0 deletions
diff --git a/src/region-cache.c b/src/region-cache.c new file mode 100644 index 00000000000..b852e2ae7a7 --- /dev/null +++ b/src/region-cache.c | |||
| @@ -0,0 +1,833 @@ | |||
| 1 | /* Caching facts about regions of the buffer, for optimization. | ||
| 2 | Copyright (C) 1985, 1986, 1987, 1988, 1989, 1993 | ||
| 3 | Free Software Foundation, Inc. | ||
| 4 | |||
| 5 | This file is part of GNU Emacs. | ||
| 6 | |||
| 7 | GNU Emacs is free software; you can redistribute it and/or modify | ||
| 8 | it under the terms of the GNU General Public License as published by | ||
| 9 | the Free Software Foundation; either version 2, or (at your option) | ||
| 10 | any later version. | ||
| 11 | |||
| 12 | GNU Emacs is distributed in the hope that it will be useful, | ||
| 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 15 | GNU General Public License for more details. | ||
| 16 | |||
| 17 | You should have received a copy of the GNU General Public License | ||
| 18 | along with GNU Emacs; see the file COPYING. If not, write to | ||
| 19 | the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ | ||
| 20 | |||
| 21 | |||
| 22 | #include <config.h> | ||
| 23 | #include "lisp.h" | ||
| 24 | #include "buffer.h" | ||
| 25 | #include "region-cache.h" | ||
| 26 | |||
| 27 | #include <stdio.h> | ||
| 28 | |||
| 29 | |||
| 30 | /* Data structures. */ | ||
| 31 | |||
| 32 | /* The region cache. | ||
| 33 | |||
| 34 | We want something that maps character positions in a buffer onto | ||
| 35 | values. The representation should deal well with long runs of | ||
| 36 | characters with the same value. | ||
| 37 | |||
| 38 | The tricky part: the representation should be very cheap to | ||
| 39 | maintain in the presence of many insertions and deletions. If the | ||
| 40 | overhead of maintaining the cache is too high, the speedups it | ||
| 41 | offers will be worthless. | ||
| 42 | |||
| 43 | |||
| 44 | We represent the region cache as a sorted array of struct | ||
| 45 | boundary's, each of which contains a buffer position and a value; | ||
| 46 | the value applies to all the characters after the buffer position, | ||
| 47 | until the position of the next boundary, or the end of the buffer. | ||
| 48 | |||
| 49 | The cache always has a boundary whose position is BUF_BEG, so | ||
| 50 | there's always a value associated with every character in the | ||
| 51 | buffer. Since the cache is sorted, this is always the first | ||
| 52 | element of the cache. | ||
| 53 | |||
| 54 | To facilitate the insertion and deletion of boundaries in the | ||
| 55 | cache, the cache has a gap, just like Emacs's text buffers do. | ||
| 56 | |||
| 57 | To help boundary positions float along with insertions and | ||
| 58 | deletions, all boundary positions before the cache gap are stored | ||
| 59 | relative to BUF_BEG (buf) (thus they're >= 0), and all boundary | ||
| 60 | positions after the gap are stored relative to BUF_Z (buf) (thus | ||
| 61 | they're <= 0). Look at BOUNDARY_POS to see this in action. See | ||
| 62 | revalidate_region_cache to see how this helps. */ | ||
| 63 | |||
| 64 | struct boundary { | ||
| 65 | int pos; | ||
| 66 | int value; | ||
| 67 | }; | ||
| 68 | |||
| 69 | struct region_cache { | ||
| 70 | /* A sorted array of locations where the known-ness of the buffer | ||
| 71 | changes. */ | ||
| 72 | struct boundary *boundaries; | ||
| 73 | |||
| 74 | /* boundaries[gap_start ... gap_start + gap_len - 1] is the gap. */ | ||
| 75 | int gap_start, gap_len; | ||
| 76 | |||
| 77 | /* The number of elements allocated to boundaries, not including the | ||
| 78 | gap. */ | ||
| 79 | int cache_len; | ||
| 80 | |||
| 81 | /* The areas that haven't changed since the last time we cleaned out | ||
| 82 | invalid entries from the cache. These overlap when the buffer is | ||
| 83 | entirely unchanged. */ | ||
| 84 | int beg_unchanged, end_unchanged; | ||
| 85 | |||
| 86 | /* The first and last positions in the buffer. Because boundaries | ||
| 87 | store their positions relative to the start (BEG) and end (Z) of | ||
| 88 | the buffer, knowing these positions allows us to accurately | ||
| 89 | interpret positions without having to pass the buffer structure | ||
| 90 | or its endpoints around all the time. | ||
| 91 | |||
| 92 | Yes, buffer_beg is always 1. It's there for symmetry with | ||
| 93 | buffer_end and the BEG and BUF_BEG macros. */ | ||
| 94 | int buffer_beg, buffer_end; | ||
| 95 | }; | ||
| 96 | |||
| 97 | /* Return the position of boundary i in cache c. */ | ||
| 98 | #define BOUNDARY_POS(c, i) \ | ||
| 99 | ((i) < (c)->gap_start \ | ||
| 100 | ? (c)->buffer_beg + (c)->boundaries[(i)].pos \ | ||
| 101 | : (c)->buffer_end + (c)->boundaries[(c)->gap_len + (i)].pos) | ||
| 102 | |||
| 103 | /* Return the value for text after boundary i in cache c. */ | ||
| 104 | #define BOUNDARY_VALUE(c, i) \ | ||
| 105 | ((i) < (c)->gap_start \ | ||
| 106 | ? (c)->boundaries[(i)].value \ | ||
| 107 | : (c)->boundaries[(c)->gap_len + (i)].value) | ||
| 108 | |||
| 109 | /* Set the value for text after boundary i in cache c to v. */ | ||
| 110 | #define SET_BOUNDARY_VALUE(c, i, v) \ | ||
| 111 | ((i) < (c)->gap_start \ | ||
| 112 | ? ((c)->boundaries[(i)].value = (v))\ | ||
| 113 | : ((c)->boundaries[(c)->gap_len + (i)].value = (v))) | ||
| 114 | |||
| 115 | |||
| 116 | /* How many elements to add to the gap when we resize the buffer. */ | ||
| 117 | #define NEW_CACHE_GAP (40) | ||
| 118 | |||
| 119 | /* See invalidate_region_cache; if an invalidation would throw away | ||
| 120 | information about this many characters, call | ||
| 121 | revalidate_region_cache before doing the new invalidation, to | ||
| 122 | preserve that information, instead of throwing it away. */ | ||
| 123 | #define PRESERVE_THRESHOLD (500) | ||
| 124 | |||
| 125 | static void revalidate_region_cache (); | ||
| 126 | |||
| 127 | |||
| 128 | /* Interface: Allocating, initializing, and disposing of region caches. */ | ||
| 129 | |||
| 130 | struct region_cache * | ||
| 131 | new_region_cache () | ||
| 132 | { | ||
| 133 | struct region_cache *c | ||
| 134 | = (struct region_cache *) xmalloc (sizeof (struct region_cache)); | ||
| 135 | |||
| 136 | c->gap_start = 0; | ||
| 137 | c->gap_len = NEW_CACHE_GAP; | ||
| 138 | c->cache_len = 0; | ||
| 139 | c->boundaries = | ||
| 140 | (struct boundary *) xmalloc ((c->gap_len + c->cache_len) | ||
| 141 | * sizeof (*c->boundaries)); | ||
| 142 | |||
| 143 | c->beg_unchanged = 0; | ||
| 144 | c->end_unchanged = 0; | ||
| 145 | c->buffer_beg = 1; | ||
| 146 | c->buffer_end = 1; | ||
| 147 | |||
| 148 | /* Insert the boundary for the buffer start. */ | ||
| 149 | c->cache_len++; | ||
| 150 | c->gap_len--; | ||
| 151 | c->gap_start++; | ||
| 152 | c->boundaries[0].pos = 0; /* from buffer_beg */ | ||
| 153 | c->boundaries[0].value = 0; | ||
| 154 | |||
| 155 | return c; | ||
| 156 | } | ||
| 157 | |||
| 158 | void | ||
| 159 | free_region_cache (c) | ||
| 160 | struct region_cache *c; | ||
| 161 | { | ||
| 162 | xfree (c->boundaries); | ||
| 163 | xfree (c); | ||
| 164 | } | ||
| 165 | |||
| 166 | |||
| 167 | /* Finding positions in the cache. */ | ||
| 168 | |||
| 169 | /* Return the index of the last boundary in cache C at or before POS. | ||
| 170 | In other words, return the boundary that specifies the value for | ||
| 171 | the region POS..(POS + 1). | ||
| 172 | |||
| 173 | This operation should be logarithmic in the number of cache | ||
| 174 | entries. It would be nice if it took advantage of locality of | ||
| 175 | reference, too, by searching entries near the last entry found. */ | ||
| 176 | static int | ||
| 177 | find_cache_boundary (c, pos) | ||
| 178 | struct region_cache *c; | ||
| 179 | int pos; | ||
| 180 | { | ||
| 181 | int low = 0, high = c->cache_len; | ||
| 182 | |||
| 183 | while (low + 1 < high) | ||
| 184 | { | ||
| 185 | /* mid is always a valid index, because low < high and ">> 1" | ||
| 186 | rounds down. */ | ||
| 187 | int mid = (low + high) >> 1; | ||
| 188 | int boundary = BOUNDARY_POS (c, mid); | ||
| 189 | |||
| 190 | if (pos < boundary) | ||
| 191 | high = mid; | ||
| 192 | else | ||
| 193 | low = mid; | ||
| 194 | } | ||
| 195 | |||
| 196 | /* Some testing. */ | ||
| 197 | if (BOUNDARY_POS (c, low) > pos | ||
| 198 | || (low + 1 < c->cache_len | ||
| 199 | && BOUNDARY_POS (c, low + 1) <= pos)) | ||
| 200 | abort (); | ||
| 201 | |||
| 202 | return low; | ||
| 203 | } | ||
| 204 | |||
| 205 | |||
| 206 | |||
| 207 | /* Moving the cache gap around, inserting, and deleting. */ | ||
| 208 | |||
| 209 | |||
| 210 | /* Move the gap of cache C to index POS, and make sure it has space | ||
| 211 | for at least MIN_SIZE boundaries. */ | ||
| 212 | static void | ||
| 213 | move_cache_gap (c, pos, min_size) | ||
| 214 | struct region_cache *c; | ||
| 215 | int pos; | ||
| 216 | int min_size; | ||
| 217 | { | ||
| 218 | /* Copy these out of the cache and into registers. */ | ||
| 219 | int gap_start = c->gap_start; | ||
| 220 | int gap_len = c->gap_len; | ||
| 221 | int buffer_beg = c->buffer_beg; | ||
| 222 | int buffer_end = c->buffer_end; | ||
| 223 | |||
| 224 | if (pos < 0 | ||
| 225 | || pos > c->cache_len) | ||
| 226 | abort (); | ||
| 227 | |||
| 228 | /* We mustn't ever try to put the gap before the dummy start | ||
| 229 | boundary. That must always be start-relative. */ | ||
| 230 | if (pos == 0) | ||
| 231 | abort (); | ||
| 232 | |||
| 233 | /* Need we move the gap right? */ | ||
| 234 | while (gap_start < pos) | ||
| 235 | { | ||
| 236 | /* Copy one boundary from after to before the gap, and | ||
| 237 | convert its position to start-relative. */ | ||
| 238 | c->boundaries[gap_start].pos | ||
| 239 | = (buffer_end | ||
| 240 | + c->boundaries[gap_start + gap_len].pos | ||
| 241 | - buffer_beg); | ||
| 242 | c->boundaries[gap_start].value | ||
| 243 | = c->boundaries[gap_start + gap_len].value; | ||
| 244 | gap_start++; | ||
| 245 | } | ||
| 246 | |||
| 247 | /* To enlarge the gap, we need to re-allocate the boundary array, and | ||
| 248 | then shift the area after the gap to the new end. Since the cost | ||
| 249 | is proportional to the amount of stuff after the gap, we do the | ||
| 250 | enlargement here, after a right shift but before a left shift, | ||
| 251 | when the portion after the gap is smallest. */ | ||
| 252 | if (gap_len < min_size) | ||
| 253 | { | ||
| 254 | int i; | ||
| 255 | |||
| 256 | /* Always make at least NEW_CACHE_GAP elements, as long as we're | ||
| 257 | expanding anyway. */ | ||
| 258 | if (min_size < NEW_CACHE_GAP) | ||
| 259 | min_size = NEW_CACHE_GAP; | ||
| 260 | |||
| 261 | c->boundaries = | ||
| 262 | (struct boundary *) xrealloc (c->boundaries, | ||
| 263 | ((min_size + c->cache_len) | ||
| 264 | * sizeof (*c->boundaries))); | ||
| 265 | |||
| 266 | /* Some systems don't provide a version of the copy routine that | ||
| 267 | can be trusted to shift memory upward into an overlapping | ||
| 268 | region. memmove isn't widely available. */ | ||
| 269 | min_size -= gap_len; | ||
| 270 | for (i = c->cache_len - 1; i >= gap_start; i--) | ||
| 271 | { | ||
| 272 | c->boundaries[i + min_size].pos = c->boundaries[i + gap_len].pos; | ||
| 273 | c->boundaries[i + min_size].value = c->boundaries[i + gap_len].value; | ||
| 274 | } | ||
| 275 | |||
| 276 | gap_len = min_size; | ||
| 277 | } | ||
| 278 | |||
| 279 | /* Need we move the gap left? */ | ||
| 280 | while (pos < gap_start) | ||
| 281 | { | ||
| 282 | gap_start--; | ||
| 283 | |||
| 284 | /* Copy one region from before to after the gap, and | ||
| 285 | convert its position to end-relative. */ | ||
| 286 | c->boundaries[gap_start + gap_len].pos | ||
| 287 | = c->boundaries[gap_start].pos + buffer_beg - buffer_end; | ||
| 288 | c->boundaries[gap_start + gap_len].value | ||
| 289 | = c->boundaries[gap_start].value; | ||
| 290 | } | ||
| 291 | |||
| 292 | /* Assign these back into the cache. */ | ||
| 293 | c->gap_start = gap_start; | ||
| 294 | c->gap_len = gap_len; | ||
| 295 | } | ||
| 296 | |||
| 297 | |||
| 298 | /* Insert a new boundary in cache C; it will have cache index INDEX, | ||
| 299 | and have the specified POS and VALUE. */ | ||
| 300 | static void | ||
| 301 | insert_cache_boundary (c, index, pos, value) | ||
| 302 | struct region_cache *c; | ||
| 303 | int index; | ||
| 304 | int pos, value; | ||
| 305 | { | ||
| 306 | /* index must be a valid cache index. */ | ||
| 307 | if (index < 0 || index > c->cache_len) | ||
| 308 | abort (); | ||
| 309 | |||
| 310 | /* We must never want to insert something before the dummy first | ||
| 311 | boundary. */ | ||
| 312 | if (index == 0) | ||
| 313 | abort (); | ||
| 314 | |||
| 315 | /* We must only be inserting things in order. */ | ||
| 316 | if (! (BOUNDARY_POS (c, index-1) < pos | ||
| 317 | && (index == c->cache_len | ||
| 318 | || pos < BOUNDARY_POS (c, index)))) | ||
| 319 | abort (); | ||
| 320 | |||
| 321 | /* The value must be different from the ones around it. However, we | ||
| 322 | temporarily create boundaries that establish the same value as | ||
| 323 | the subsequent boundary, so we're not going to flag that case. */ | ||
| 324 | if (BOUNDARY_VALUE (c, index-1) == value) | ||
| 325 | abort (); | ||
| 326 | |||
| 327 | move_cache_gap (c, index, 1); | ||
| 328 | |||
| 329 | c->boundaries[index].pos = pos - c->buffer_beg; | ||
| 330 | c->boundaries[index].value = value; | ||
| 331 | c->gap_start++; | ||
| 332 | c->gap_len--; | ||
| 333 | c->cache_len++; | ||
| 334 | } | ||
| 335 | |||
| 336 | |||
| 337 | /* Delete the i'th entry from cache C if START <= i < END. */ | ||
| 338 | |||
| 339 | static void | ||
| 340 | delete_cache_boundaries (c, start, end) | ||
| 341 | struct region_cache *c; | ||
| 342 | int start, end; | ||
| 343 | { | ||
| 344 | int len = end - start; | ||
| 345 | |||
| 346 | /* Gotta be in range. */ | ||
| 347 | if (start < 0 | ||
| 348 | || end > c->cache_len) | ||
| 349 | abort (); | ||
| 350 | |||
| 351 | /* Gotta be in order. */ | ||
| 352 | if (start > end) | ||
| 353 | abort (); | ||
| 354 | |||
| 355 | /* Can't delete the dummy entry. */ | ||
| 356 | if (start == 0 | ||
| 357 | && end >= 1) | ||
| 358 | abort (); | ||
| 359 | |||
| 360 | /* Minimize gap motion. If we're deleting nothing, do nothing. */ | ||
| 361 | if (len == 0) | ||
| 362 | ; | ||
| 363 | /* If the gap is before the region to delete, delete from the start | ||
| 364 | forward. */ | ||
| 365 | else if (c->gap_start <= start) | ||
| 366 | { | ||
| 367 | move_cache_gap (c, start, 0); | ||
| 368 | c->gap_len += len; | ||
| 369 | } | ||
| 370 | /* If the gap is after the region to delete, delete from the end | ||
| 371 | backward. */ | ||
| 372 | else if (end <= c->gap_start) | ||
| 373 | { | ||
| 374 | move_cache_gap (c, end, 0); | ||
| 375 | c->gap_start -= len; | ||
| 376 | c->gap_len += len; | ||
| 377 | } | ||
| 378 | /* If the gap is in the region to delete, just expand it. */ | ||
| 379 | else | ||
| 380 | { | ||
| 381 | c->gap_start = start; | ||
| 382 | c->gap_len += len; | ||
| 383 | } | ||
| 384 | |||
| 385 | c->cache_len -= len; | ||
| 386 | } | ||
| 387 | |||
| 388 | |||
| 389 | |||
| 390 | /* Set the value for a region. */ | ||
| 391 | |||
| 392 | /* Set the value in cache C for the region START..END to VALUE. */ | ||
| 393 | static void | ||
| 394 | set_cache_region (c, start, end, value) | ||
| 395 | struct region_cache *c; | ||
| 396 | int start, end; | ||
| 397 | int value; | ||
| 398 | { | ||
| 399 | if (start > end) | ||
| 400 | abort (); | ||
| 401 | if (start < c->buffer_beg | ||
| 402 | || end > c->buffer_end) | ||
| 403 | abort (); | ||
| 404 | |||
| 405 | /* Eliminate this case; then we can assume that start and end-1 are | ||
| 406 | both the locations of real characters in the buffer. */ | ||
| 407 | if (start == end) | ||
| 408 | return; | ||
| 409 | |||
| 410 | { | ||
| 411 | /* We need to make sure that there are no boundaries in the area | ||
| 412 | between start to end; the whole area will have the same value, | ||
| 413 | so those boundaries will not be necessary. | ||
| 414 | |||
| 415 | Let start_ix be the cache index of the boundary governing the | ||
| 416 | first character of start..end, and let end_ix be the cache | ||
| 417 | index of the earliest boundary after the last character in | ||
| 418 | start..end. (This tortured terminology is intended to answer | ||
| 419 | all the "< or <=?" sort of questions.) */ | ||
| 420 | int start_ix = find_cache_boundary (c, start); | ||
| 421 | int end_ix = find_cache_boundary (c, end - 1) + 1; | ||
| 422 | |||
| 423 | /* We must remember the value established by the last boundary | ||
| 424 | before end; if that boundary's domain stretches beyond end, | ||
| 425 | we'll need to create a new boundary at end, and that boundary | ||
| 426 | must have that remembered value. */ | ||
| 427 | int value_at_end = BOUNDARY_VALUE (c, end_ix - 1); | ||
| 428 | |||
| 429 | /* Delete all boundaries strictly within start..end; this means | ||
| 430 | those whose indices are between start_ix (exclusive) and end_ix | ||
| 431 | (exclusive). */ | ||
| 432 | delete_cache_boundaries (c, start_ix + 1, end_ix); | ||
| 433 | |||
| 434 | /* Make sure we have the right value established going in to | ||
| 435 | start..end from the left, and no unnecessary boundaries. */ | ||
| 436 | if (BOUNDARY_POS (c, start_ix) == start) | ||
| 437 | { | ||
| 438 | /* Is this boundary necessary? If no, remove it; if yes, set | ||
| 439 | its value. */ | ||
| 440 | if (start_ix > 0 | ||
| 441 | && BOUNDARY_VALUE (c, start_ix - 1) == value) | ||
| 442 | { | ||
| 443 | delete_cache_boundaries (c, start_ix, start_ix + 1); | ||
| 444 | start_ix--; | ||
| 445 | } | ||
| 446 | else | ||
| 447 | SET_BOUNDARY_VALUE (c, start_ix, value); | ||
| 448 | } | ||
| 449 | else | ||
| 450 | { | ||
| 451 | /* Do we need to add a new boundary here? */ | ||
| 452 | if (BOUNDARY_VALUE (c, start_ix) != value) | ||
| 453 | { | ||
| 454 | insert_cache_boundary (c, start_ix + 1, start, value); | ||
| 455 | start_ix++; | ||
| 456 | } | ||
| 457 | } | ||
| 458 | |||
| 459 | /* This is equivalent to letting end_ix float (like a buffer | ||
| 460 | marker does) with the insertions and deletions we may have | ||
| 461 | done. */ | ||
| 462 | end_ix = start_ix + 1; | ||
| 463 | |||
| 464 | /* Make sure we have the correct value established as we leave | ||
| 465 | start..end to the right. */ | ||
| 466 | if (end == c->buffer_end) | ||
| 467 | /* There is no text after start..end; nothing to do. */ | ||
| 468 | ; | ||
| 469 | else if (end_ix >= c->cache_len | ||
| 470 | || end < BOUNDARY_POS (c, end_ix)) | ||
| 471 | { | ||
| 472 | /* There is no boundary at end, but we may need one. */ | ||
| 473 | if (value_at_end != value) | ||
| 474 | insert_cache_boundary (c, end_ix, end, value_at_end); | ||
| 475 | } | ||
| 476 | else | ||
| 477 | { | ||
| 478 | /* There is a boundary at end; should it be there? */ | ||
| 479 | if (value == BOUNDARY_VALUE (c, end_ix)) | ||
| 480 | delete_cache_boundaries (c, end_ix, end_ix + 1); | ||
| 481 | } | ||
| 482 | } | ||
| 483 | } | ||
| 484 | |||
| 485 | |||
| 486 | |||
| 487 | /* Interface: Invalidating the cache. Private: Re-validating the cache. */ | ||
| 488 | |||
| 489 | /* Indicate that a section of BUF has changed, to invalidate CACHE. | ||
| 490 | HEAD is the number of chars unchanged at the beginning of the buffer. | ||
| 491 | TAIL is the number of chars unchanged at the end of the buffer. | ||
| 492 | NOTE: this is *not* the same as the ending position of modified | ||
| 493 | region. | ||
| 494 | (This way of specifying regions makes more sense than absolute | ||
| 495 | buffer positions in the presence of insertions and deletions; the | ||
| 496 | args to pass are the same before and after such an operation.) */ | ||
| 497 | void | ||
| 498 | invalidate_region_cache (buf, c, head, tail) | ||
| 499 | struct buffer *buf; | ||
| 500 | struct region_cache *c; | ||
| 501 | int head, tail; | ||
| 502 | { | ||
| 503 | /* Let chead = c->beg_unchanged, and | ||
| 504 | ctail = c->end_unchanged. | ||
| 505 | If z-tail < beg+chead by a large amount, or | ||
| 506 | z-ctail < beg+head by a large amount, | ||
| 507 | |||
| 508 | then cutting back chead and ctail to head and tail would lose a | ||
| 509 | lot of information that we could preserve by revalidating the | ||
| 510 | cache before processing this invalidation. Losing that | ||
| 511 | information may be more costly than revalidating the cache now. | ||
| 512 | So go ahead and call revalidate_region_cache if it seems that it | ||
| 513 | might be worthwhile. */ | ||
| 514 | if (((BUF_BEG (buf) + c->beg_unchanged) - (BUF_Z (buf) - tail) | ||
| 515 | > PRESERVE_THRESHOLD) | ||
| 516 | || ((BUF_BEG (buf) + head) - (BUF_Z (buf) - c->end_unchanged) | ||
| 517 | > PRESERVE_THRESHOLD)) | ||
| 518 | revalidate_region_cache (buf, c); | ||
| 519 | |||
| 520 | |||
| 521 | if (head < c->beg_unchanged) | ||
| 522 | c->beg_unchanged = head; | ||
| 523 | if (tail < c->end_unchanged) | ||
| 524 | c->end_unchanged = tail; | ||
| 525 | |||
| 526 | /* We now know nothing about the region between the unchanged head | ||
| 527 | and the unchanged tail (call it the "modified region"), not even | ||
| 528 | its length. | ||
| 529 | |||
| 530 | If the modified region has shrunk in size (deletions do this), | ||
| 531 | then the cache may now contain boundaries originally located in | ||
| 532 | text that doesn't exist any more. | ||
| 533 | |||
| 534 | If the modified region has increased in size (insertions do | ||
| 535 | this), then there may now be boundaries in the modified region | ||
| 536 | whose positions are wrong. | ||
| 537 | |||
| 538 | Even calling BOUNDARY_POS on boundaries still in the unchanged | ||
| 539 | head or tail may well give incorrect answers now, since | ||
| 540 | c->buffer_beg and c->buffer_end may well be wrong now. (Well, | ||
| 541 | okay, c->buffer_beg never changes, so boundaries in the unchanged | ||
| 542 | head will still be okay. But it's the principle of the thing.) | ||
| 543 | |||
| 544 | So things are generally a mess. | ||
| 545 | |||
| 546 | But we don't clean up this mess here; that would be expensive, | ||
| 547 | and this function gets called every time any buffer modification | ||
| 548 | occurs. Rather, we can clean up everything in one swell foop, | ||
| 549 | accounting for all the modifications at once, by calling | ||
| 550 | revalidate_region_cache before we try to consult the cache the | ||
| 551 | next time. */ | ||
| 552 | } | ||
| 553 | |||
| 554 | |||
| 555 | /* Clean out any cache entries applying to the modified region, and | ||
| 556 | make the positions of the remaining entries accurate again. | ||
| 557 | |||
| 558 | After calling this function, the mess described in the comment in | ||
| 559 | invalidate_region_cache is cleaned up. | ||
| 560 | |||
| 561 | This function operates by simply throwing away everything it knows | ||
| 562 | about the modified region. It doesn't care exactly which | ||
| 563 | insertions and deletions took place; it just tosses it all. | ||
| 564 | |||
| 565 | For example, if you insert a single character at the beginning of | ||
| 566 | the buffer, and a single character at the end of the buffer (for | ||
| 567 | example), without calling this function in between the two | ||
| 568 | insertions, then the entire cache will be freed of useful | ||
| 569 | information. On the other hand, if you do manage to call this | ||
| 570 | function in between the two insertions, then the modified regions | ||
| 571 | will be small in both cases, no information will be tossed, and the | ||
| 572 | cache will know that it doesn't have knowledge of the first and | ||
| 573 | last characters any more. | ||
| 574 | |||
| 575 | Calling this function may be expensive; it does binary searches in | ||
| 576 | the cache, and causes cache gap motion. */ | ||
| 577 | |||
| 578 | static void | ||
| 579 | revalidate_region_cache (buf, c) | ||
| 580 | struct buffer *buf; | ||
| 581 | struct region_cache *c; | ||
| 582 | { | ||
| 583 | /* The boundaries now in the cache are expressed relative to the | ||
| 584 | buffer_beg and buffer_end values stored in the cache. Now, | ||
| 585 | buffer_beg and buffer_end may not be the same as BUF_BEG (buf) | ||
| 586 | and BUF_Z (buf), so we have two different "bases" to deal with | ||
| 587 | --- the cache's, and the buffer's. */ | ||
| 588 | |||
| 589 | /* If the entire buffer is still valid, don't waste time. Yes, this | ||
| 590 | should be a >, not a >=; think about what beg_unchanged and | ||
| 591 | end_unchanged get set to when the only change has been an | ||
| 592 | insertion. */ | ||
| 593 | if (c->buffer_beg + c->beg_unchanged | ||
| 594 | > c->buffer_end - c->end_unchanged) | ||
| 595 | return; | ||
| 596 | |||
| 597 | /* If all the text we knew about as of the last cache revalidation | ||
| 598 | is still there, then all of the information in the cache is still | ||
| 599 | valid. Because c->buffer_beg and c->buffer_end are out-of-date, | ||
| 600 | the modified region appears from the cache's point of view to be | ||
| 601 | a null region located someplace in the buffer. | ||
| 602 | |||
| 603 | Now, invalidating that empty string will have no actual affect on | ||
| 604 | the cache; instead, we need to update the cache's basis first | ||
| 605 | (which will give the modified region the same size in the cache | ||
| 606 | as it has in the buffer), and then invalidate the modified | ||
| 607 | region. */ | ||
| 608 | if (c->buffer_beg + c->beg_unchanged | ||
| 609 | == c->buffer_end - c->end_unchanged) | ||
| 610 | { | ||
| 611 | /* Move the gap so that all the boundaries in the unchanged head | ||
| 612 | are expressed beg-relative, and all the boundaries in the | ||
| 613 | unchanged tail are expressed end-relative. That done, we can | ||
| 614 | plug in the new buffer beg and end, and all the positions | ||
| 615 | will be accurate. | ||
| 616 | |||
| 617 | The boundary which has jurisdiction over the modified region | ||
| 618 | should be left before the gap. */ | ||
| 619 | move_cache_gap (c, | ||
| 620 | (find_cache_boundary (c, (c->buffer_beg | ||
| 621 | + c->beg_unchanged)) | ||
| 622 | + 1), | ||
| 623 | 0); | ||
| 624 | |||
| 625 | c->buffer_beg = BUF_BEG (buf); | ||
| 626 | c->buffer_end = BUF_Z (buf); | ||
| 627 | |||
| 628 | /* Now that the cache's basis has been changed, the modified | ||
| 629 | region actually takes up some space in the cache, so we can | ||
| 630 | invalidate it. */ | ||
| 631 | set_cache_region (c, | ||
| 632 | c->buffer_beg + c->beg_unchanged, | ||
| 633 | c->buffer_end - c->end_unchanged, | ||
| 634 | 0); | ||
| 635 | } | ||
| 636 | |||
| 637 | /* Otherwise, there is a non-empty region in the cache which | ||
| 638 | corresponds to the modified region of the buffer. */ | ||
| 639 | else | ||
| 640 | { | ||
| 641 | int modified_ix; | ||
| 642 | |||
| 643 | /* These positions are correct, relative to both the cache basis | ||
| 644 | and the buffer basis. */ | ||
| 645 | set_cache_region (c, | ||
| 646 | c->buffer_beg + c->beg_unchanged, | ||
| 647 | c->buffer_end - c->end_unchanged, | ||
| 648 | 0); | ||
| 649 | |||
| 650 | /* Now the cache contains only boundaries that are in the | ||
| 651 | unchanged head and tail; we've disposed of any boundaries | ||
| 652 | whose positions we can't be sure of given the information | ||
| 653 | we've saved. | ||
| 654 | |||
| 655 | If we put the cache gap between the unchanged head and the | ||
| 656 | unchanged tail, we can adjust all the boundary positions at | ||
| 657 | once, simply by setting buffer_beg and buffer_end. | ||
| 658 | |||
| 659 | The boundary which has jurisdiction over the modified region | ||
| 660 | should be left before the gap. */ | ||
| 661 | modified_ix = | ||
| 662 | find_cache_boundary (c, (c->buffer_beg + c->beg_unchanged)) + 1; | ||
| 663 | move_cache_gap (c, modified_ix, 0); | ||
| 664 | |||
| 665 | c->buffer_beg = BUF_BEG (buf); | ||
| 666 | c->buffer_end = BUF_Z (buf); | ||
| 667 | |||
| 668 | /* Now, we may have shrunk the buffer when we changed the basis, | ||
| 669 | and brought the boundaries we created for the start and end | ||
| 670 | of the modified region together, giving them the same | ||
| 671 | position. If that's the case, we should collapse them into | ||
| 672 | one boundary. Or we may even delete them both, if the values | ||
| 673 | before and after them are the same. */ | ||
| 674 | if (modified_ix < c->cache_len | ||
| 675 | && (BOUNDARY_POS (c, modified_ix - 1) | ||
| 676 | == BOUNDARY_POS (c, modified_ix))) | ||
| 677 | { | ||
| 678 | int value_after = BOUNDARY_VALUE (c, modified_ix); | ||
| 679 | |||
| 680 | /* Should we remove both of the boundaries? Yes, if the | ||
| 681 | latter boundary is now establishing the same value that | ||
| 682 | the former boundary's predecessor does. */ | ||
| 683 | if (modified_ix - 1 > 0 | ||
| 684 | && value_after == BOUNDARY_VALUE (c, modified_ix - 2)) | ||
| 685 | delete_cache_boundaries (c, modified_ix - 1, modified_ix + 1); | ||
| 686 | else | ||
| 687 | { | ||
| 688 | /* We do need a boundary here; collapse the two | ||
| 689 | boundaries into one. */ | ||
| 690 | SET_BOUNDARY_VALUE (c, modified_ix - 1, value_after); | ||
| 691 | delete_cache_boundaries (c, modified_ix, modified_ix + 1); | ||
| 692 | } | ||
| 693 | } | ||
| 694 | } | ||
| 695 | |||
| 696 | /* Now the entire cache is valid. */ | ||
| 697 | c->beg_unchanged | ||
| 698 | = c->end_unchanged | ||
| 699 | = c->buffer_end - c->buffer_beg; | ||
| 700 | } | ||
| 701 | |||
| 702 | |||
| 703 | /* Interface: Adding information to the cache. */ | ||
| 704 | |||
| 705 | /* Assert that the region of BUF between START and END (absolute | ||
| 706 | buffer positions) is "known," for the purposes of CACHE (e.g. "has | ||
| 707 | no newlines", in the case of the line cache). */ | ||
| 708 | void | ||
| 709 | know_region_cache (buf, c, start, end) | ||
| 710 | struct buffer *buf; | ||
| 711 | struct region_cache *c; | ||
| 712 | int start, end; | ||
| 713 | { | ||
| 714 | revalidate_region_cache (buf, c); | ||
| 715 | |||
| 716 | set_cache_region (c, start, end, 1); | ||
| 717 | } | ||
| 718 | |||
| 719 | |||
| 720 | /* Interface: using the cache. */ | ||
| 721 | |||
| 722 | /* Return true if the text immediately after POS in BUF is known, for | ||
| 723 | the purposes of CACHE. If NEXT is non-zero, set *NEXT to the nearest | ||
| 724 | position after POS where the knownness changes. */ | ||
| 725 | int | ||
| 726 | region_cache_forward (buf, c, pos, next) | ||
| 727 | struct buffer *buf; | ||
| 728 | struct region_cache *c; | ||
| 729 | int pos; | ||
| 730 | int *next; | ||
| 731 | { | ||
| 732 | revalidate_region_cache (buf, c); | ||
| 733 | |||
| 734 | { | ||
| 735 | int i = find_cache_boundary (c, pos); | ||
| 736 | int i_value = BOUNDARY_VALUE (c, i); | ||
| 737 | int j; | ||
| 738 | |||
| 739 | /* Beyond the end of the buffer is unknown, by definition. */ | ||
| 740 | if (pos >= BUF_Z (buf)) | ||
| 741 | { | ||
| 742 | if (next) *next = BUF_Z (buf); | ||
| 743 | i_value = 0; | ||
| 744 | } | ||
| 745 | else if (next) | ||
| 746 | { | ||
| 747 | /* Scan forward from i to find the next differing position. */ | ||
| 748 | for (j = i + 1; j < c->cache_len; j++) | ||
| 749 | if (BOUNDARY_VALUE (c, j) != i_value) | ||
| 750 | break; | ||
| 751 | |||
| 752 | if (j < c->cache_len) | ||
| 753 | *next = BOUNDARY_POS (c, j); | ||
| 754 | else | ||
| 755 | *next = BUF_Z (buf); | ||
| 756 | } | ||
| 757 | |||
| 758 | return i_value; | ||
| 759 | } | ||
| 760 | } | ||
| 761 | |||
| 762 | /* Return true if the text immediately before POS in BUF is known, for | ||
| 763 | the purposes of CACHE. If NEXT is non-zero, set *NEXT to the nearest | ||
| 764 | position before POS where the knownness changes. */ | ||
| 765 | int region_cache_backward (buf, c, pos, next) | ||
| 766 | struct buffer *buf; | ||
| 767 | struct region_cache *c; | ||
| 768 | int pos; | ||
| 769 | int *next; | ||
| 770 | { | ||
| 771 | revalidate_region_cache (buf, c); | ||
| 772 | |||
| 773 | /* Before the beginning of the buffer is unknown, by | ||
| 774 | definition. */ | ||
| 775 | if (pos <= BUF_BEG (buf)) | ||
| 776 | { | ||
| 777 | if (next) *next = BUF_BEG (buf); | ||
| 778 | return 0; | ||
| 779 | } | ||
| 780 | |||
| 781 | { | ||
| 782 | int i = find_cache_boundary (c, pos - 1); | ||
| 783 | int i_value = BOUNDARY_VALUE (c, i); | ||
| 784 | int j; | ||
| 785 | |||
| 786 | if (next) | ||
| 787 | { | ||
| 788 | /* Scan backward from i to find the next differing position. */ | ||
| 789 | for (j = i - 1; j >= 0; j--) | ||
| 790 | if (BOUNDARY_VALUE (c, j) != i_value) | ||
| 791 | break; | ||
| 792 | |||
| 793 | if (j >= 0) | ||
| 794 | *next = BOUNDARY_POS (c, j + 1); | ||
| 795 | else | ||
| 796 | *next = BUF_BEG (buf); | ||
| 797 | } | ||
| 798 | |||
| 799 | return i_value; | ||
| 800 | } | ||
| 801 | } | ||
| 802 | |||
| 803 | |||
| 804 | /* Debugging: pretty-print a cache to the standard error output. */ | ||
| 805 | |||
| 806 | void | ||
| 807 | pp_cache (c) | ||
| 808 | struct region_cache *c; | ||
| 809 | { | ||
| 810 | int i; | ||
| 811 | int beg_u = c->buffer_beg + c->beg_unchanged; | ||
| 812 | int end_u = c->buffer_end - c->end_unchanged; | ||
| 813 | |||
| 814 | fprintf (stderr, | ||
| 815 | "basis: %d..%d modified: %d..%d\n", | ||
| 816 | c->buffer_beg, c->buffer_end, | ||
| 817 | beg_u, end_u); | ||
| 818 | |||
| 819 | for (i = 0; i < c->cache_len; i++) | ||
| 820 | { | ||
| 821 | int pos = BOUNDARY_POS (c, i); | ||
| 822 | |||
| 823 | putc (((pos < beg_u) ? 'v' | ||
| 824 | : (pos == beg_u) ? '-' | ||
| 825 | : ' '), | ||
| 826 | stderr); | ||
| 827 | putc (((pos > end_u) ? '^' | ||
| 828 | : (pos == end_u) ? '-' | ||
| 829 | : ' '), | ||
| 830 | stderr); | ||
| 831 | fprintf (stderr, "%d : %d\n", pos, BOUNDARY_VALUE (c, i)); | ||
| 832 | } | ||
| 833 | } | ||
diff --git a/src/region-cache.h b/src/region-cache.h new file mode 100644 index 00000000000..b9f20fc4f58 --- /dev/null +++ b/src/region-cache.h | |||
| @@ -0,0 +1,111 @@ | |||
| 1 | /* Header file: Caching facts about regions of the buffer, for optimization. | ||
| 2 | Copyright (C) 1985, 1986, 1993 Free Software Foundation, Inc. | ||
| 3 | |||
| 4 | This file is part of GNU Emacs. | ||
| 5 | |||
| 6 | GNU Emacs is free software; you can redistribute it and/or modify | ||
| 7 | it under the terms of the GNU General Public License as published by | ||
| 8 | the Free Software Foundation; either version 2, or (at your option) | ||
| 9 | any later version. | ||
| 10 | |||
| 11 | GNU Emacs is distributed in the hope that it will be useful, | ||
| 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 14 | GNU General Public License for more details. | ||
| 15 | |||
| 16 | You should have received a copy of the GNU General Public License | ||
| 17 | along with GNU Emacs; see the file COPYING. If not, write to | ||
| 18 | the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ | ||
| 19 | |||
| 20 | |||
| 21 | /* This code was written by Jim Blandy <jimb@cs.oberlin.edu> to help | ||
| 22 | GNU Emacs better support the gene editor written for the University | ||
| 23 | of Illinois at Urbana-Champagne's Ribosome Database Project (RDP). | ||
| 24 | |||
| 25 | Emacs implements line operations (finding the beginning/end of the | ||
| 26 | line, vertical motion, all the redisplay stuff) by searching for | ||
| 27 | newlines in the buffer. Usually, this is a good design; it's very | ||
| 28 | clean to just represent the buffer as an unstructured string of | ||
| 29 | characters, and the lines in most files are very short (less than | ||
| 30 | eighty characters), meaning that scanning usually costs about the | ||
| 31 | same as the overhead of maintaining some more complicated data | ||
| 32 | structure. | ||
| 33 | |||
| 34 | However, some applications, like gene editing, make use of very | ||
| 35 | long lines --- on the order of tens of kilobytes. In such cases, | ||
| 36 | it may well be worthwhile to try to avoid scanning, because the | ||
| 37 | scans have become two orders of magnitude more expensive. It would | ||
| 38 | be nice if this speedup could preserve the simplicity of the | ||
| 39 | existing data structure, and disturb as little of the existing code | ||
| 40 | as possible. | ||
| 41 | |||
| 42 | So here's the tack. We add some caching to the scan_buffer | ||
| 43 | function, so that when it searches for a newline, it notes that the | ||
| 44 | region between the start and end of the search contained no | ||
| 45 | newlines; then, the next time around, it consults this cache to see | ||
| 46 | if there are regions of text it can skip over completely. The | ||
| 47 | buffer modification primitives invalidate this cache. | ||
| 48 | |||
| 49 | (Note: Since the redisplay code needs similar information on | ||
| 50 | modified regions of the buffer, we can use the code that helps out | ||
| 51 | redisplay as a guide to where we need to add our own code to | ||
| 52 | invalidate our cache. prepare_to_modify_buffer seems to be the | ||
| 53 | central spot.) | ||
| 54 | |||
| 55 | Note that the cache code itself never mentions newlines | ||
| 56 | specifically, so if you wanted to cache other properties of regions | ||
| 57 | of the buffer, you could use this code pretty much unchanged. So | ||
| 58 | this cache really holds "known/unknown" information --- "I know | ||
| 59 | this region has property P" vs. "I don't know if this region has | ||
| 60 | property P or not." */ | ||
| 61 | |||
| 62 | |||
| 63 | /* Allocate, initialize and return a new, empty region cache. */ | ||
| 64 | struct region_cache *new_region_cache ( /* void */ ); | ||
| 65 | |||
| 66 | /* Free a region cache. */ | ||
| 67 | void free_region_cache ( /* struct region_cache * */ ); | ||
| 68 | |||
| 69 | /* Assert that the region of BUF between START and END (absolute | ||
| 70 | buffer positions) is "known," for the purposes of CACHE (e.g. "has | ||
| 71 | no newlines", in the case of the line cache). */ | ||
| 72 | extern void know_region_cache ( /* struct buffer *BUF, | ||
| 73 | struct region_cache *CACHE, | ||
| 74 | int START, END */ ); | ||
| 75 | |||
| 76 | /* Indicate that a section of BUF has changed, to invalidate CACHE. | ||
| 77 | HEAD is the number of chars unchanged at the beginning of the buffer. | ||
| 78 | TAIL is the number of chars unchanged at the end of the buffer. | ||
| 79 | NOTE: this is *not* the same as the ending position of modified | ||
| 80 | region. | ||
| 81 | (This way of specifying regions makes more sense than absolute | ||
| 82 | buffer positions in the presence of insertions and deletions; the | ||
| 83 | args to pass are the same before and after such an operation.) */ | ||
| 84 | extern void invalidate_region_cache ( /* struct buffer *BUF, | ||
| 85 | struct region_cache *CACHE, | ||
| 86 | int HEAD, TAIL */ ); | ||
| 87 | |||
| 88 | /* The scanning functions. | ||
| 89 | |||
| 90 | Basically, if you're scanning forward/backward from position POS, | ||
| 91 | and region_cache_forward/backward returns true, you can skip all | ||
| 92 | the text between POS and *NEXT. And if the function returns false, | ||
| 93 | you should examine all the text from POS to *NEXT, and call | ||
| 94 | know_region_cache depending on what you find there; this way, you | ||
| 95 | might be able to avoid scanning it again. */ | ||
| 96 | |||
| 97 | /* Return true if the text immediately after POS in BUF is known, for | ||
| 98 | the purposes of CACHE. If NEXT is non-zero, set *NEXT to the nearest | ||
| 99 | position after POS where the knownness changes. */ | ||
| 100 | extern int region_cache_forward ( /* struct buffer *BUF, | ||
| 101 | struct region_cache *CACHE, | ||
| 102 | int POS, | ||
| 103 | int *NEXT */ ); | ||
| 104 | |||
| 105 | /* Return true if the text immediately before POS in BUF is known, for | ||
| 106 | the purposes of CACHE. If NEXT is non-zero, set *NEXT to the nearest | ||
| 107 | position before POS where the knownness changes. */ | ||
| 108 | extern int region_cache_backward ( /* struct buffer *BUF, | ||
| 109 | struct region_cache *CACHE, | ||
| 110 | int POS, | ||
| 111 | int *NEXT */ ); | ||