aboutsummaryrefslogtreecommitdiffstats
path: root/lisp
diff options
context:
space:
mode:
authorEli Zaretskii2014-12-04 11:31:33 +0200
committerEli Zaretskii2014-12-04 11:31:33 +0200
commitf1827846d715cfef05afe52ad2a9df2289df6952 (patch)
tree048ee9690b7bcc87353137fe72031fe083908371 /lisp
parent2bef807c322b44cf63db85e4b60001a76cbe3d9f (diff)
downloademacs-f1827846d715cfef05afe52ad2a9df2289df6952.tar.gz
emacs-f1827846d715cfef05afe52ad2a9df2289df6952.zip
Implement copying of a buffer portion while preserving visual order.
See http://lists.gnu.org/archive/html/emacs-devel/2014-11/msg02203.html and http://lists.gnu.org/archive/html/emacs-devel/2014-12/msg00063.html for the rationale. lisp/simple.el (bidi-directional-controls-chars) (bidi-directional-non-controls-chars): New variables. (squeeze-bidi-context-1, squeeze-bidi-context) (line-substring-with-bidi-context) (buffer-substring-with-bidi-context): New functions. doc/lispref/display.texi (Bidirectional Display): Document 'buffer-substring-with-bidi-context'. doc/lispref/text.texi (Buffer Contents): Mention 'buffer-substring-with-bidi-context' with a cross-reference. etc/NEWS: Mention 'buffer-substring-with-bidi-context'.
Diffstat (limited to 'lisp')
-rw-r--r--lisp/ChangeLog7
-rw-r--r--lisp/simple.el138
2 files changed, 145 insertions, 0 deletions
diff --git a/lisp/ChangeLog b/lisp/ChangeLog
index 9c729bc936f..157b2b45f98 100644
--- a/lisp/ChangeLog
+++ b/lisp/ChangeLog
@@ -1,5 +1,12 @@
12014-12-04 Eli Zaretskii <eliz@gnu.org> 12014-12-04 Eli Zaretskii <eliz@gnu.org>
2 2
3 Implement copying of a buffer portion while preserving visual order.
4 * simple.el (bidi-directional-controls-chars)
5 (bidi-directional-non-controls-chars): New variables.
6 (squeeze-bidi-context-1, squeeze-bidi-context)
7 (line-substring-with-bidi-context)
8 (buffer-substring-with-bidi-context): New functions.
9
3 * files.el (file-tree-walk): Doc fix. 10 * files.el (file-tree-walk): Doc fix.
4 11
52014-12-04 Rupert Swarbrick <ruperts@broadcom.com> (tiny change) 122014-12-04 Rupert Swarbrick <ruperts@broadcom.com> (tiny change)
diff --git a/lisp/simple.el b/lisp/simple.el
index 16db05a2158..46b346a8fd2 100644
--- a/lisp/simple.el
+++ b/lisp/simple.el
@@ -4126,6 +4126,144 @@ The argument is used for internal purposes; do not supply one."
4126 (setq this-command 'kill-region) 4126 (setq this-command 'kill-region)
4127 (message "If the next command is a kill, it will append")) 4127 (message "If the next command is a kill, it will append"))
4128 (setq last-command 'kill-region))) 4128 (setq last-command 'kill-region)))
4129
4130(defvar bidi-directional-controls-chars "\x202a-\x202e\x2066-\x2069"
4131 "Character set that matches bidirectional formatting control characters.")
4132
4133(defvar bidi-directional-non-controls-chars "^\x202a-\x202e\x2066-\x2069"
4134 "Character set that matches any character except bidirectional controls.")
4135
4136(defun squeeze-bidi-context-1 (from to category replacement)
4137 "A subroutine of `squeeze-bidi-context'.
4138FROM and TO should be markers, CATEGORY and REPLACEMENT should be strings."
4139 (let ((pt (copy-marker from))
4140 (limit (copy-marker to))
4141 (old-pt 0)
4142 lim1)
4143 (setq lim1 limit)
4144 (goto-char pt)
4145 (while (< pt limit)
4146 (if (> pt old-pt)
4147 (move-marker lim1
4148 (save-excursion
4149 ;; L and R categories include embedding and
4150 ;; override controls, but we don't want to
4151 ;; replace them, because that might change
4152 ;; the visual order. Likewise with PDF and
4153 ;; isolate controls.
4154 (+ pt (skip-chars-forward
4155 bidi-directional-non-controls-chars
4156 limit)))))
4157 ;; Replace any run of non-RTL characters by a single LRM.
4158 (if (null (re-search-forward category lim1 t))
4159 ;; No more characters of CATEGORY, we are done.
4160 (setq pt limit)
4161 (replace-match replacement nil t)
4162 (move-marker pt (point)))
4163 (setq old-pt pt)
4164 ;; Skip directional controls, if any.
4165 (move-marker
4166 pt (+ pt (skip-chars-forward bidi-directional-controls-chars limit))))))
4167
4168(defun squeeze-bidi-context (from to)
4169 "Replace characters between FROM and TO while keeping bidi context.
4170
4171This function replaces the region of text with as few characters
4172as possible, while preserving the effect that region will have on
4173bidirectional display before and after the region."
4174 (let ((start (set-marker (make-marker)
4175 (if (> from 0) from (+ (point-max) from))))
4176 (end (set-marker (make-marker) to))
4177 ;; This is for when they copy text with read-only text
4178 ;; properties.
4179 (inhibit-read-only t))
4180 (if (null (marker-position end))
4181 (setq end (point-max-marker)))
4182 ;; Replace each run of non-RTL characters with a single LRM.
4183 (squeeze-bidi-context-1 start end "\\CR+" "\x200e")
4184 ;; Replace each run of non-LTR characters with a single RLM. Note
4185 ;; that the \cR category includes both the Arabic Letter (AL) and
4186 ;; R characters; here we ignore the distinction between them,
4187 ;; because that distinction only affects Arabic Number (AN)
4188 ;; characters, which are weak and don't affect the reordering.
4189 (squeeze-bidi-context-1 start end "\\CL+" "\x200f")))
4190
4191(defun line-substring-with-bidi-context (start end &optional no-properties)
4192 "Return buffer text between START and END with its bidi context.
4193
4194START and END are assumed to belong to the same physical line
4195of buffer text. This function prepends and appends to the text
4196between START and END bidi control characters that preserve the
4197visual order of that text when it is inserted at some other place."
4198 (if (or (< start (point-min))
4199 (> end (point-max)))
4200 (signal 'args-out-of-range (list (current-buffer) start end)))
4201 (let ((buf (current-buffer))
4202 substr para-dir from to)
4203 (save-excursion
4204 (goto-char start)
4205 (setq para-dir (current-bidi-paragraph-direction))
4206 (setq from (line-beginning-position)
4207 to (line-end-position))
4208 (goto-char from)
4209 ;; If we don't have any mixed directional characters in the
4210 ;; entire line, we can just copy the substring without adding
4211 ;; any context.
4212 (if (or (looking-at-p "\\CR*$")
4213 (looking-at-p "\\CL*$"))
4214 (setq substr (if no-properties
4215 (buffer-substring-no-properties start end)
4216 (buffer-substring start end)))
4217 (setq substr
4218 (with-temp-buffer
4219 (if no-properties
4220 (insert-buffer-substring-no-properties buf from to)
4221 (insert-buffer-substring buf from to))
4222 (squeeze-bidi-context 1 (1+ (- start from)))
4223 (squeeze-bidi-context (- end to) nil)
4224 (buffer-substring 1 (point-max)))))
4225
4226 ;; Wrap the string in LRI/RLI..PDI pair to achieve 2 effects:
4227 ;; (1) force the string to have the same base embedding
4228 ;; direction as the paragraph direction at the source, no matter
4229 ;; what is the paragraph direction at destination; and (2) avoid
4230 ;; affecting the visual order of the surrounding text at
4231 ;; destination if there are characters of different
4232 ;; directionality there.
4233 (concat (if (eq para-dir 'left-to-right) "\x2066" "\x2067")
4234 substr "\x2069"))))
4235
4236(defun buffer-substring-with-bidi-context (start end &optional no-properties)
4237 "Return portion of current buffer between START and END with bidi context.
4238
4239This function works similar to `buffer-substring', but it prepends and
4240appends to the text bidi directional control characters necessary to
4241preserve the visual appearance of the text if it is inserted at another
4242place. This is useful when the buffer substring includes bidirectional
4243text and control characters that cause non-trivial reordering on display.
4244If copied verbatim, such text can have a very different visual appearance,
4245and can also change the visual appearance of the surrounding text at the
4246destination of the copy.
4247
4248Optional argument NO-PROPERTIES, if non-nil, means copy the text without
4249the text properties."
4250 (let (line-end substr)
4251 (if (or (< start (point-min))
4252 (> end (point-max)))
4253 (signal 'args-out-of-range (list (current-buffer) start end)))
4254 (save-excursion
4255 (goto-char start)
4256 (setq line-end (min end (line-end-position)))
4257 (while (< start end)
4258 (setq substr
4259 (concat substr
4260 (if substr "\n" "")
4261 (line-substring-with-bidi-context start line-end
4262 no-properties)))
4263 (forward-line 1)
4264 (setq start (point))
4265 (setq line-end (min end (line-end-position))))
4266 substr)))
4129 4267
4130;; Yanking. 4268;; Yanking.
4131 4269