diff options
| author | Stefan Monnier | 2017-07-27 00:21:35 -0400 |
|---|---|---|
| committer | Stefan Monnier | 2017-07-27 00:21:35 -0400 |
| commit | 2d1d54d333735c0128fd31edb183a71298ef5cfc (patch) | |
| tree | 66800cf607cd1820bfe92a671e8293a9c8d2f68a | |
| parent | ea875060882a0de5c35574eb815dc45290cd2135 (diff) | |
| download | emacs-2d1d54d333735c0128fd31edb183a71298ef5cfc.tar.gz emacs-2d1d54d333735c0128fd31edb183a71298ef5cfc.zip | |
* lisp/vc/smerge-mode.el: Avoid N² blow up in degenerate cases
(smerge--refine-long-words): New var.
(smerge--refine-chopup-region): Use it.
| -rw-r--r-- | lisp/vc/smerge-mode.el | 66 |
1 files changed, 49 insertions, 17 deletions
diff --git a/lisp/vc/smerge-mode.el b/lisp/vc/smerge-mode.el index 21c39c85ca8..f94f8a6d4d2 100644 --- a/lisp/vc/smerge-mode.el +++ b/lisp/vc/smerge-mode.el | |||
| @@ -938,15 +938,15 @@ It has the following disadvantages: | |||
| 938 | - cannot use `diff -w' because the weighting causes added spaces in a line | 938 | - cannot use `diff -w' because the weighting causes added spaces in a line |
| 939 | to be represented as added copies of some line, so `diff -w' can't do the | 939 | to be represented as added copies of some line, so `diff -w' can't do the |
| 940 | right thing any more. | 940 | right thing any more. |
| 941 | - may in degenerate cases take a 1KB input region and turn it into a 1MB | 941 | - Is a bit more costly (may in degenerate cases use temp files that are 10x |
| 942 | file to pass to diff.") | 942 | larger than the refined regions).") |
| 943 | 943 | ||
| 944 | (defun smerge--refine-forward (n) | 944 | (defun smerge--refine-forward (n) |
| 945 | (let ((case-fold-search nil) | 945 | (let ((case-fold-search nil) |
| 946 | (re "[[:upper:]]?[[:lower:]]+\\|[[:upper:]]+\\|[[:digit:]]+\\|.\\|\n")) | 946 | (re "[[:upper:]]?[[:lower:]]+\\|[[:upper:]]+\\|[[:digit:]]+\\|.\\|\n")) |
| 947 | (when (and smerge-refine-ignore-whitespace | 947 | (when (and smerge-refine-ignore-whitespace |
| 948 | ;; smerge-refine-weight-hack causes additional spaces to | 948 | ;; smerge-refine-weight-hack causes additional spaces to |
| 949 | ;; appear as additional lines as well, so even if diff ignore | 949 | ;; appear as additional lines as well, so even if diff ignores |
| 950 | ;; whitespace changes, it'll report added/removed lines :-( | 950 | ;; whitespace changes, it'll report added/removed lines :-( |
| 951 | (not smerge-refine-weight-hack)) | 951 | (not smerge-refine-weight-hack)) |
| 952 | (setq re (concat "[ \t]*\\(?:" re "\\)"))) | 952 | (setq re (concat "[ \t]*\\(?:" re "\\)"))) |
| @@ -954,6 +954,8 @@ It has the following disadvantages: | |||
| 954 | (unless (looking-at re) (error "Smerge refine internal error")) | 954 | (unless (looking-at re) (error "Smerge refine internal error")) |
| 955 | (goto-char (match-end 0))))) | 955 | (goto-char (match-end 0))))) |
| 956 | 956 | ||
| 957 | (defvar smerge--refine-long-words) | ||
| 958 | |||
| 957 | (defun smerge--refine-chopup-region (beg end file &optional preproc) | 959 | (defun smerge--refine-chopup-region (beg end file &optional preproc) |
| 958 | "Chopup the region into small elements, one per line. | 960 | "Chopup the region into small elements, one per line. |
| 959 | Save the result into FILE. | 961 | Save the result into FILE. |
| @@ -976,18 +978,46 @@ chars to try and eliminate some spurious differences." | |||
| 976 | (subst-char-in-region (point-min) (point-max) ?\n ?\s)) | 978 | (subst-char-in-region (point-min) (point-max) ?\n ?\s)) |
| 977 | (goto-char (point-min)) | 979 | (goto-char (point-min)) |
| 978 | (while (not (eobp)) | 980 | (while (not (eobp)) |
| 979 | (funcall smerge-refine-forward-function 1) | 981 | (cl-assert (bolp)) |
| 980 | (let ((s (if (prog2 (forward-char -1) (bolp) (forward-char 1)) | 982 | (let ((start (point))) |
| 981 | nil | 983 | (funcall smerge-refine-forward-function 1) |
| 982 | (buffer-substring (line-beginning-position) (point))))) | 984 | (let ((len (- (point) start))) |
| 983 | ;; We add \n after each char except after \n, so we get | 985 | (cl-assert (>= len 1)) |
| 984 | ;; one line per text char, where each line contains | 986 | ;; We add \n after each chunk except after \n, so we get |
| 985 | ;; just one char, except for \n chars which are | 987 | ;; one line per text chunk, where each line contains |
| 986 | ;; represented by the empty line. | 988 | ;; just one chunk, except for \n chars which are |
| 987 | (unless (eq (char-before) ?\n) (insert ?\n)) | 989 | ;; represented by the empty line. |
| 988 | ;; HACK ALERT!! | 990 | (unless (bolp) (insert ?\n)) |
| 989 | (if smerge-refine-weight-hack | 991 | (when (and smerge-refine-weight-hack (> len 1)) |
| 990 | (dotimes (_i (1- (length s))) (insert s "\n"))))) | 992 | (let ((s (buffer-substring-no-properties start (point)))) |
| 993 | ;; The weight-hack inserts N copies of words of size N, | ||
| 994 | ;; so it naturally suffers from an O(N²) blow up. | ||
| 995 | ;; To circumvent this, we map each long word | ||
| 996 | ;; to a shorter (but still unique) replacement. | ||
| 997 | ;; Another option would be to change smerge--refine-forward | ||
| 998 | ;; so it chops up long words into smaller ones. | ||
| 999 | (when (> len 8) | ||
| 1000 | (let ((short (gethash s smerge--refine-long-words))) | ||
| 1001 | (unless short | ||
| 1002 | ;; To avoid accidental conflicts with ≤8 words, | ||
| 1003 | ;; we make sure the replacement is >8 chars. Overall, | ||
| 1004 | ;; this should bound the blowup factor to ~10x, | ||
| 1005 | ;; tho if those chars end up encoded as multiple bytes | ||
| 1006 | ;; each, it could probably still reach ~30x in | ||
| 1007 | ;; pathological cases. | ||
| 1008 | (setq short | ||
| 1009 | (concat (substring s 0 7) | ||
| 1010 | " " | ||
| 1011 | (string | ||
| 1012 | (+ ?0 | ||
| 1013 | (hash-table-count | ||
| 1014 | smerge--refine-long-words))) | ||
| 1015 | "\n")) | ||
| 1016 | (puthash s short smerge--refine-long-words)) | ||
| 1017 | (delete-region start (point)) | ||
| 1018 | (insert short) | ||
| 1019 | (setq s short))) | ||
| 1020 | (dotimes (_i (1- len)) (insert s))))))) | ||
| 991 | (unless (bolp) (error "Smerge refine internal error")) | 1021 | (unless (bolp) (error "Smerge refine internal error")) |
| 992 | (let ((coding-system-for-write 'emacs-internal)) | 1022 | (let ((coding-system-for-write 'emacs-internal)) |
| 993 | (write-region (point-min) (point-max) file nil 'nomessage)))) | 1023 | (write-region (point-min) (point-max) file nil 'nomessage)))) |
| @@ -1042,7 +1072,9 @@ used to replace chars to try and eliminate some spurious differences." | |||
| 1042 | (let* ((pos (point)) | 1072 | (let* ((pos (point)) |
| 1043 | deactivate-mark ; The code does not modify any visible buffer. | 1073 | deactivate-mark ; The code does not modify any visible buffer. |
| 1044 | (file1 (make-temp-file "diff1")) | 1074 | (file1 (make-temp-file "diff1")) |
| 1045 | (file2 (make-temp-file "diff2"))) | 1075 | (file2 (make-temp-file "diff2")) |
| 1076 | (smerge--refine-long-words | ||
| 1077 | (if smerge-refine-weight-hack (make-hash-table :test #'equal)))) | ||
| 1046 | (unless (markerp beg1) (setq beg1 (copy-marker beg1))) | 1078 | (unless (markerp beg1) (setq beg1 (copy-marker beg1))) |
| 1047 | (unless (markerp beg2) (setq beg2 (copy-marker beg2))) | 1079 | (unless (markerp beg2) (setq beg2 (copy-marker beg2))) |
| 1048 | ;; Chop up regions into smaller elements and save into files. | 1080 | ;; Chop up regions into smaller elements and save into files. |
| @@ -1062,7 +1094,7 @@ used to replace chars to try and eliminate some spurious differences." | |||
| 1062 | ;; also and more importantly because otherwise it | 1094 | ;; also and more importantly because otherwise it |
| 1063 | ;; may happen that diff doesn't behave like | 1095 | ;; may happen that diff doesn't behave like |
| 1064 | ;; smerge-refine-weight-hack expects it to. | 1096 | ;; smerge-refine-weight-hack expects it to. |
| 1065 | ;; See http://thread.gmane.org/gmane.emacs.devel/82685. | 1097 | ;; See http://thread.gmane.org/gmane.emacs.devel/82685, aka https://lists.gnu.org/archive/html/emacs-devel/2007-11/msg00401.html |
| 1066 | "-awd" "-ad") | 1098 | "-awd" "-ad") |
| 1067 | file1 file2)) | 1099 | file1 file2)) |
| 1068 | ;; Process diff's output. | 1100 | ;; Process diff's output. |