aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStefan Monnier2017-07-27 00:21:35 -0400
committerStefan Monnier2017-07-27 00:21:35 -0400
commit2d1d54d333735c0128fd31edb183a71298ef5cfc (patch)
tree66800cf607cd1820bfe92a671e8293a9c8d2f68a
parentea875060882a0de5c35574eb815dc45290cd2135 (diff)
downloademacs-2d1d54d333735c0128fd31edb183a71298ef5cfc.tar.gz
emacs-2d1d54d333735c0128fd31edb183a71298ef5cfc.zip
* lisp/vc/smerge-mode.el: Avoid N² blow up in degenerate cases
(smerge--refine-long-words): New var. (smerge--refine-chopup-region): Use it.
-rw-r--r--lisp/vc/smerge-mode.el66
1 files changed, 49 insertions, 17 deletions
diff --git a/lisp/vc/smerge-mode.el b/lisp/vc/smerge-mode.el
index 21c39c85ca8..f94f8a6d4d2 100644
--- a/lisp/vc/smerge-mode.el
+++ b/lisp/vc/smerge-mode.el
@@ -938,15 +938,15 @@ It has the following disadvantages:
938- cannot use `diff -w' because the weighting causes added spaces in a line 938- cannot use `diff -w' because the weighting causes added spaces in a line
939 to be represented as added copies of some line, so `diff -w' can't do the 939 to be represented as added copies of some line, so `diff -w' can't do the
940 right thing any more. 940 right thing any more.
941- may in degenerate cases take a 1KB input region and turn it into a 1MB 941- Is a bit more costly (may in degenerate cases use temp files that are 10x
942 file to pass to diff.") 942 larger than the refined regions).")
943 943
944(defun smerge--refine-forward (n) 944(defun smerge--refine-forward (n)
945 (let ((case-fold-search nil) 945 (let ((case-fold-search nil)
946 (re "[[:upper:]]?[[:lower:]]+\\|[[:upper:]]+\\|[[:digit:]]+\\|.\\|\n")) 946 (re "[[:upper:]]?[[:lower:]]+\\|[[:upper:]]+\\|[[:digit:]]+\\|.\\|\n"))
947 (when (and smerge-refine-ignore-whitespace 947 (when (and smerge-refine-ignore-whitespace
948 ;; smerge-refine-weight-hack causes additional spaces to 948 ;; smerge-refine-weight-hack causes additional spaces to
949 ;; appear as additional lines as well, so even if diff ignore 949 ;; appear as additional lines as well, so even if diff ignores
950 ;; whitespace changes, it'll report added/removed lines :-( 950 ;; whitespace changes, it'll report added/removed lines :-(
951 (not smerge-refine-weight-hack)) 951 (not smerge-refine-weight-hack))
952 (setq re (concat "[ \t]*\\(?:" re "\\)"))) 952 (setq re (concat "[ \t]*\\(?:" re "\\)")))
@@ -954,6 +954,8 @@ It has the following disadvantages:
954 (unless (looking-at re) (error "Smerge refine internal error")) 954 (unless (looking-at re) (error "Smerge refine internal error"))
955 (goto-char (match-end 0))))) 955 (goto-char (match-end 0)))))
956 956
957(defvar smerge--refine-long-words)
958
957(defun smerge--refine-chopup-region (beg end file &optional preproc) 959(defun smerge--refine-chopup-region (beg end file &optional preproc)
958 "Chopup the region into small elements, one per line. 960 "Chopup the region into small elements, one per line.
959Save the result into FILE. 961Save the result into FILE.
@@ -976,18 +978,46 @@ chars to try and eliminate some spurious differences."
976 (subst-char-in-region (point-min) (point-max) ?\n ?\s)) 978 (subst-char-in-region (point-min) (point-max) ?\n ?\s))
977 (goto-char (point-min)) 979 (goto-char (point-min))
978 (while (not (eobp)) 980 (while (not (eobp))
979 (funcall smerge-refine-forward-function 1) 981 (cl-assert (bolp))
980 (let ((s (if (prog2 (forward-char -1) (bolp) (forward-char 1)) 982 (let ((start (point)))
981 nil 983 (funcall smerge-refine-forward-function 1)
982 (buffer-substring (line-beginning-position) (point))))) 984 (let ((len (- (point) start)))
983 ;; We add \n after each char except after \n, so we get 985 (cl-assert (>= len 1))
984 ;; one line per text char, where each line contains 986 ;; We add \n after each chunk except after \n, so we get
985 ;; just one char, except for \n chars which are 987 ;; one line per text chunk, where each line contains
986 ;; represented by the empty line. 988 ;; just one chunk, except for \n chars which are
987 (unless (eq (char-before) ?\n) (insert ?\n)) 989 ;; represented by the empty line.
988 ;; HACK ALERT!! 990 (unless (bolp) (insert ?\n))
989 (if smerge-refine-weight-hack 991 (when (and smerge-refine-weight-hack (> len 1))
990 (dotimes (_i (1- (length s))) (insert s "\n"))))) 992 (let ((s (buffer-substring-no-properties start (point))))
993 ;; The weight-hack inserts N copies of words of size N,
994 ;; so it naturally suffers from an O(N²) blow up.
995 ;; To circumvent this, we map each long word
996 ;; to a shorter (but still unique) replacement.
997 ;; Another option would be to change smerge--refine-forward
998 ;; so it chops up long words into smaller ones.
999 (when (> len 8)
1000 (let ((short (gethash s smerge--refine-long-words)))
1001 (unless short
1002 ;; To avoid accidental conflicts with ≤8 words,
1003 ;; we make sure the replacement is >8 chars. Overall,
1004 ;; this should bound the blowup factor to ~10x,
1005 ;; tho if those chars end up encoded as multiple bytes
1006 ;; each, it could probably still reach ~30x in
1007 ;; pathological cases.
1008 (setq short
1009 (concat (substring s 0 7)
1010 " "
1011 (string
1012 (+ ?0
1013 (hash-table-count
1014 smerge--refine-long-words)))
1015 "\n"))
1016 (puthash s short smerge--refine-long-words))
1017 (delete-region start (point))
1018 (insert short)
1019 (setq s short)))
1020 (dotimes (_i (1- len)) (insert s)))))))
991 (unless (bolp) (error "Smerge refine internal error")) 1021 (unless (bolp) (error "Smerge refine internal error"))
992 (let ((coding-system-for-write 'emacs-internal)) 1022 (let ((coding-system-for-write 'emacs-internal))
993 (write-region (point-min) (point-max) file nil 'nomessage)))) 1023 (write-region (point-min) (point-max) file nil 'nomessage))))
@@ -1042,7 +1072,9 @@ used to replace chars to try and eliminate some spurious differences."
1042 (let* ((pos (point)) 1072 (let* ((pos (point))
1043 deactivate-mark ; The code does not modify any visible buffer. 1073 deactivate-mark ; The code does not modify any visible buffer.
1044 (file1 (make-temp-file "diff1")) 1074 (file1 (make-temp-file "diff1"))
1045 (file2 (make-temp-file "diff2"))) 1075 (file2 (make-temp-file "diff2"))
1076 (smerge--refine-long-words
1077 (if smerge-refine-weight-hack (make-hash-table :test #'equal))))
1046 (unless (markerp beg1) (setq beg1 (copy-marker beg1))) 1078 (unless (markerp beg1) (setq beg1 (copy-marker beg1)))
1047 (unless (markerp beg2) (setq beg2 (copy-marker beg2))) 1079 (unless (markerp beg2) (setq beg2 (copy-marker beg2)))
1048 ;; Chop up regions into smaller elements and save into files. 1080 ;; Chop up regions into smaller elements and save into files.
@@ -1062,7 +1094,7 @@ used to replace chars to try and eliminate some spurious differences."
1062 ;; also and more importantly because otherwise it 1094 ;; also and more importantly because otherwise it
1063 ;; may happen that diff doesn't behave like 1095 ;; may happen that diff doesn't behave like
1064 ;; smerge-refine-weight-hack expects it to. 1096 ;; smerge-refine-weight-hack expects it to.
1065 ;; See http://thread.gmane.org/gmane.emacs.devel/82685. 1097 ;; See http://thread.gmane.org/gmane.emacs.devel/82685, aka https://lists.gnu.org/archive/html/emacs-devel/2007-11/msg00401.html
1066 "-awd" "-ad") 1098 "-awd" "-ad")
1067 file1 file2)) 1099 file1 file2))
1068 ;; Process diff's output. 1100 ;; Process diff's output.