diff options
Diffstat (limited to 'lib/diffseq.h')
| -rw-r--r-- | lib/diffseq.h | 136 |
1 files changed, 88 insertions, 48 deletions
diff --git a/lib/diffseq.h b/lib/diffseq.h index b6f9f6f9d19..0f76ea1d5ad 100644 --- a/lib/diffseq.h +++ b/lib/diffseq.h | |||
| @@ -1,11 +1,11 @@ | |||
| 1 | /* Analyze differences between two vectors. | 1 | /* Analyze differences between two vectors. |
| 2 | 2 | ||
| 3 | Copyright (C) 1988-1989, 1992-1995, 2001-2004, 2006-2017 Free Software | 3 | Copyright (C) 1988-1989, 1992-1995, 2001-2004, 2006-2022 Free Software |
| 4 | Foundation, Inc. | 4 | Foundation, Inc. |
| 5 | 5 | ||
| 6 | This program is free software: you can redistribute it and/or modify | 6 | This program is free software: you can redistribute it and/or modify |
| 7 | it under the terms of the GNU General Public License as published by | 7 | it under the terms of the GNU General Public License as published by |
| 8 | the Free Software Foundation; either version 3 of the License, or | 8 | the Free Software Foundation, either version 3 of the License, or |
| 9 | (at your option) any later version. | 9 | (at your option) any later version. |
| 10 | 10 | ||
| 11 | This program is distributed in the hope that it will be useful, | 11 | This program is distributed in the hope that it will be useful, |
| @@ -28,13 +28,13 @@ | |||
| 28 | The basic algorithm is described in: | 28 | The basic algorithm is described in: |
| 29 | "An O(ND) Difference Algorithm and its Variations", Eugene W. Myers, | 29 | "An O(ND) Difference Algorithm and its Variations", Eugene W. Myers, |
| 30 | Algorithmica Vol. 1, 1986, pp. 251-266, | 30 | Algorithmica Vol. 1, 1986, pp. 251-266, |
| 31 | <http://dx.doi.org/10.1007/BF01840446>. | 31 | <https://doi.org/10.1007/BF01840446>. |
| 32 | See especially section 4.2, which describes the variation used below. | 32 | See especially section 4.2, which describes the variation used below. |
| 33 | 33 | ||
| 34 | The basic algorithm was independently discovered as described in: | 34 | The basic algorithm was independently discovered as described in: |
| 35 | "Algorithms for Approximate String Matching", Esko Ukkonen, | 35 | "Algorithms for Approximate String Matching", Esko Ukkonen, |
| 36 | Information and Control Vol. 64, 1985, pp. 100-118, | 36 | Information and Control Vol. 64, 1985, pp. 100-118, |
| 37 | <http://dx.doi.org/10.1016/S0019-9958(85)80046-2>. | 37 | <https://doi.org/10.1016/S0019-9958(85)80046-2>. |
| 38 | 38 | ||
| 39 | Unless the 'find_minimal' flag is set, this code uses the TOO_EXPENSIVE | 39 | Unless the 'find_minimal' flag is set, this code uses the TOO_EXPENSIVE |
| 40 | heuristic, by Paul Eggert, to limit the cost to O(N**1.5 log N) | 40 | heuristic, by Paul Eggert, to limit the cost to O(N**1.5 log N) |
| @@ -51,10 +51,14 @@ | |||
| 51 | EXTRA_CONTEXT_FIELDS Declarations of fields for 'struct context'. | 51 | EXTRA_CONTEXT_FIELDS Declarations of fields for 'struct context'. |
| 52 | NOTE_DELETE(ctxt, xoff) Record the removal of the object xvec[xoff]. | 52 | NOTE_DELETE(ctxt, xoff) Record the removal of the object xvec[xoff]. |
| 53 | NOTE_INSERT(ctxt, yoff) Record the insertion of the object yvec[yoff]. | 53 | NOTE_INSERT(ctxt, yoff) Record the insertion of the object yvec[yoff]. |
| 54 | NOTE_ORDERED (Optional) A boolean expression saying that | ||
| 55 | NOTE_DELETE and NOTE_INSERT calls must be | ||
| 56 | issued in offset order. | ||
| 54 | EARLY_ABORT(ctxt) (Optional) A boolean expression that triggers an | 57 | EARLY_ABORT(ctxt) (Optional) A boolean expression that triggers an |
| 55 | early abort of the computation. | 58 | early abort of the computation. |
| 56 | USE_HEURISTIC (Optional) Define if you want to support the | 59 | USE_HEURISTIC (Optional) Define if you want to support the |
| 57 | heuristic for large vectors. | 60 | heuristic for large vectors. |
| 61 | |||
| 58 | It is also possible to use this file with abstract arrays. In this case, | 62 | It is also possible to use this file with abstract arrays. In this case, |
| 59 | xvec and yvec are not represented in memory. They only exist conceptually. | 63 | xvec and yvec are not represented in memory. They only exist conceptually. |
| 60 | In this case, the list of defines above is amended as follows: | 64 | In this case, the list of defines above is amended as follows: |
| @@ -63,6 +67,7 @@ | |||
| 63 | XVECREF_YVECREF_EQUAL(ctxt, xoff, yoff) | 67 | XVECREF_YVECREF_EQUAL(ctxt, xoff, yoff) |
| 64 | A three-argument macro: References xvec[xoff] and | 68 | A three-argument macro: References xvec[xoff] and |
| 65 | yvec[yoff] and tests these elements for equality. | 69 | yvec[yoff] and tests these elements for equality. |
| 70 | |||
| 66 | Before including this file, you also need to include: | 71 | Before including this file, you also need to include: |
| 67 | #include <limits.h> | 72 | #include <limits.h> |
| 68 | #include <stdbool.h> | 73 | #include <stdbool.h> |
| @@ -78,6 +83,10 @@ | |||
| 78 | # define EARLY_ABORT(ctxt) false | 83 | # define EARLY_ABORT(ctxt) false |
| 79 | #endif | 84 | #endif |
| 80 | 85 | ||
| 86 | #ifndef NOTE_ORDERED | ||
| 87 | # define NOTE_ORDERED false | ||
| 88 | #endif | ||
| 89 | |||
| 81 | /* Use this to suppress gcc's "...may be used before initialized" warnings. | 90 | /* Use this to suppress gcc's "...may be used before initialized" warnings. |
| 82 | Beware: The Code argument must not contain commas. */ | 91 | Beware: The Code argument must not contain commas. */ |
| 83 | #ifndef IF_LINT | 92 | #ifndef IF_LINT |
| @@ -88,15 +97,6 @@ | |||
| 88 | # endif | 97 | # endif |
| 89 | #endif | 98 | #endif |
| 90 | 99 | ||
| 91 | /* As above, but when Code must contain one comma. */ | ||
| 92 | #ifndef IF_LINT2 | ||
| 93 | # if defined GCC_LINT || defined lint | ||
| 94 | # define IF_LINT2(Code1, Code2) Code1, Code2 | ||
| 95 | # else | ||
| 96 | # define IF_LINT2(Code1, Code2) /* empty */ | ||
| 97 | # endif | ||
| 98 | #endif | ||
| 99 | |||
| 100 | /* | 100 | /* |
| 101 | * Context of comparison operation. | 101 | * Context of comparison operation. |
| 102 | */ | 102 | */ |
| @@ -468,49 +468,89 @@ compareseq (OFFSET xoff, OFFSET xlim, OFFSET yoff, OFFSET ylim, | |||
| 468 | #define XREF_YREF_EQUAL(x,y) XVECREF_YVECREF_EQUAL (ctxt, x, y) | 468 | #define XREF_YREF_EQUAL(x,y) XVECREF_YVECREF_EQUAL (ctxt, x, y) |
| 469 | #endif | 469 | #endif |
| 470 | 470 | ||
| 471 | /* Slide down the bottom initial diagonal. */ | 471 | while (true) |
| 472 | while (xoff < xlim && yoff < ylim && XREF_YREF_EQUAL (xoff, yoff)) | ||
| 473 | { | 472 | { |
| 474 | xoff++; | 473 | /* Slide down the bottom initial diagonal. */ |
| 475 | yoff++; | 474 | while (xoff < xlim && yoff < ylim && XREF_YREF_EQUAL (xoff, yoff)) |
| 476 | } | 475 | { |
| 476 | xoff++; | ||
| 477 | yoff++; | ||
| 478 | } | ||
| 477 | 479 | ||
| 478 | /* Slide up the top initial diagonal. */ | 480 | /* Slide up the top initial diagonal. */ |
| 479 | while (xoff < xlim && yoff < ylim && XREF_YREF_EQUAL (xlim - 1, ylim - 1)) | 481 | while (xoff < xlim && yoff < ylim && XREF_YREF_EQUAL (xlim - 1, ylim - 1)) |
| 480 | { | 482 | { |
| 481 | xlim--; | 483 | xlim--; |
| 482 | ylim--; | 484 | ylim--; |
| 483 | } | 485 | } |
| 484 | 486 | ||
| 485 | /* Handle simple cases. */ | 487 | /* Handle simple cases. */ |
| 486 | if (xoff == xlim) | 488 | if (xoff == xlim) |
| 487 | while (yoff < ylim) | 489 | { |
| 488 | { | 490 | while (yoff < ylim) |
| 489 | NOTE_INSERT (ctxt, yoff); | 491 | { |
| 490 | if (EARLY_ABORT (ctxt)) | 492 | NOTE_INSERT (ctxt, yoff); |
| 491 | return true; | 493 | if (EARLY_ABORT (ctxt)) |
| 492 | yoff++; | 494 | return true; |
| 493 | } | 495 | yoff++; |
| 494 | else if (yoff == ylim) | 496 | } |
| 495 | while (xoff < xlim) | 497 | break; |
| 496 | { | 498 | } |
| 497 | NOTE_DELETE (ctxt, xoff); | 499 | if (yoff == ylim) |
| 498 | if (EARLY_ABORT (ctxt)) | 500 | { |
| 499 | return true; | 501 | while (xoff < xlim) |
| 500 | xoff++; | 502 | { |
| 501 | } | 503 | NOTE_DELETE (ctxt, xoff); |
| 502 | else | 504 | if (EARLY_ABORT (ctxt)) |
| 503 | { | 505 | return true; |
| 504 | struct partition part IF_LINT2 (= { .xmid = 0, .ymid = 0 }); | 506 | xoff++; |
| 507 | } | ||
| 508 | break; | ||
| 509 | } | ||
| 510 | |||
| 511 | struct partition part; | ||
| 505 | 512 | ||
| 506 | /* Find a point of correspondence in the middle of the vectors. */ | 513 | /* Find a point of correspondence in the middle of the vectors. */ |
| 507 | diag (xoff, xlim, yoff, ylim, find_minimal, &part, ctxt); | 514 | diag (xoff, xlim, yoff, ylim, find_minimal, &part, ctxt); |
| 508 | 515 | ||
| 509 | /* Use the partitions to split this problem into subproblems. */ | 516 | /* Use the partitions to split this problem into subproblems. */ |
| 510 | if (compareseq (xoff, part.xmid, yoff, part.ymid, part.lo_minimal, ctxt)) | 517 | OFFSET xoff1, xlim1, yoff1, ylim1, xoff2, xlim2, yoff2, ylim2; |
| 511 | return true; | 518 | bool find_minimal1, find_minimal2; |
| 512 | if (compareseq (part.xmid, xlim, part.ymid, ylim, part.hi_minimal, ctxt)) | 519 | if (!NOTE_ORDERED |
| 513 | return true; | 520 | && ((xlim + ylim) - (part.xmid + part.ymid) |
| 521 | < (part.xmid + part.ymid) - (xoff + yoff))) | ||
| 522 | { | ||
| 523 | /* The second problem is smaller and the caller doesn't | ||
| 524 | care about order, so do the second problem first to | ||
| 525 | lessen recursion. */ | ||
| 526 | xoff1 = part.xmid; xlim1 = xlim; | ||
| 527 | yoff1 = part.ymid; ylim1 = ylim; | ||
| 528 | find_minimal1 = part.hi_minimal; | ||
| 529 | |||
| 530 | xoff2 = xoff; xlim2 = part.xmid; | ||
| 531 | yoff2 = yoff; ylim2 = part.ymid; | ||
| 532 | find_minimal2 = part.lo_minimal; | ||
| 533 | } | ||
| 534 | else | ||
| 535 | { | ||
| 536 | xoff1 = xoff; xlim1 = part.xmid; | ||
| 537 | yoff1 = yoff; ylim1 = part.ymid; | ||
| 538 | find_minimal1 = part.lo_minimal; | ||
| 539 | |||
| 540 | xoff2 = part.xmid; xlim2 = xlim; | ||
| 541 | yoff2 = part.ymid; ylim2 = ylim; | ||
| 542 | find_minimal2 = part.hi_minimal; | ||
| 543 | } | ||
| 544 | |||
| 545 | /* Recurse to do one subproblem. */ | ||
| 546 | bool early = compareseq (xoff1, xlim1, yoff1, ylim1, find_minimal1, ctxt); | ||
| 547 | if (early) | ||
| 548 | return early; | ||
| 549 | |||
| 550 | /* Iterate to do the other subproblem. */ | ||
| 551 | xoff = xoff2; xlim = xlim2; | ||
| 552 | yoff = yoff2; ylim = ylim2; | ||
| 553 | find_minimal = find_minimal2; | ||
| 514 | } | 554 | } |
| 515 | 555 | ||
| 516 | return false; | 556 | return false; |