diff options
| author | Eli Zaretskii | 2014-12-04 11:31:33 +0200 |
|---|---|---|
| committer | Eli Zaretskii | 2014-12-04 11:31:33 +0200 |
| commit | f1827846d715cfef05afe52ad2a9df2289df6952 (patch) | |
| tree | 048ee9690b7bcc87353137fe72031fe083908371 /lisp | |
| parent | 2bef807c322b44cf63db85e4b60001a76cbe3d9f (diff) | |
| download | emacs-f1827846d715cfef05afe52ad2a9df2289df6952.tar.gz emacs-f1827846d715cfef05afe52ad2a9df2289df6952.zip | |
Implement copying of a buffer portion while preserving visual order.
See http://lists.gnu.org/archive/html/emacs-devel/2014-11/msg02203.html
and http://lists.gnu.org/archive/html/emacs-devel/2014-12/msg00063.html
for the rationale.
lisp/simple.el (bidi-directional-controls-chars)
(bidi-directional-non-controls-chars): New variables.
(squeeze-bidi-context-1, squeeze-bidi-context)
(line-substring-with-bidi-context)
(buffer-substring-with-bidi-context): New functions.
doc/lispref/display.texi (Bidirectional Display): Document
'buffer-substring-with-bidi-context'.
doc/lispref/text.texi (Buffer Contents): Mention
'buffer-substring-with-bidi-context' with a cross-reference.
etc/NEWS: Mention 'buffer-substring-with-bidi-context'.
Diffstat (limited to 'lisp')
| -rw-r--r-- | lisp/ChangeLog | 7 | ||||
| -rw-r--r-- | lisp/simple.el | 138 |
2 files changed, 145 insertions, 0 deletions
diff --git a/lisp/ChangeLog b/lisp/ChangeLog index 9c729bc936f..157b2b45f98 100644 --- a/lisp/ChangeLog +++ b/lisp/ChangeLog | |||
| @@ -1,5 +1,12 @@ | |||
| 1 | 2014-12-04 Eli Zaretskii <eliz@gnu.org> | 1 | 2014-12-04 Eli Zaretskii <eliz@gnu.org> |
| 2 | 2 | ||
| 3 | Implement copying of a buffer portion while preserving visual order. | ||
| 4 | * simple.el (bidi-directional-controls-chars) | ||
| 5 | (bidi-directional-non-controls-chars): New variables. | ||
| 6 | (squeeze-bidi-context-1, squeeze-bidi-context) | ||
| 7 | (line-substring-with-bidi-context) | ||
| 8 | (buffer-substring-with-bidi-context): New functions. | ||
| 9 | |||
| 3 | * files.el (file-tree-walk): Doc fix. | 10 | * files.el (file-tree-walk): Doc fix. |
| 4 | 11 | ||
| 5 | 2014-12-04 Rupert Swarbrick <ruperts@broadcom.com> (tiny change) | 12 | 2014-12-04 Rupert Swarbrick <ruperts@broadcom.com> (tiny change) |
diff --git a/lisp/simple.el b/lisp/simple.el index 16db05a2158..46b346a8fd2 100644 --- a/lisp/simple.el +++ b/lisp/simple.el | |||
| @@ -4126,6 +4126,144 @@ The argument is used for internal purposes; do not supply one." | |||
| 4126 | (setq this-command 'kill-region) | 4126 | (setq this-command 'kill-region) |
| 4127 | (message "If the next command is a kill, it will append")) | 4127 | (message "If the next command is a kill, it will append")) |
| 4128 | (setq last-command 'kill-region))) | 4128 | (setq last-command 'kill-region))) |
| 4129 | |||
| 4130 | (defvar bidi-directional-controls-chars "\x202a-\x202e\x2066-\x2069" | ||
| 4131 | "Character set that matches bidirectional formatting control characters.") | ||
| 4132 | |||
| 4133 | (defvar bidi-directional-non-controls-chars "^\x202a-\x202e\x2066-\x2069" | ||
| 4134 | "Character set that matches any character except bidirectional controls.") | ||
| 4135 | |||
| 4136 | (defun squeeze-bidi-context-1 (from to category replacement) | ||
| 4137 | "A subroutine of `squeeze-bidi-context'. | ||
| 4138 | FROM and TO should be markers, CATEGORY and REPLACEMENT should be strings." | ||
| 4139 | (let ((pt (copy-marker from)) | ||
| 4140 | (limit (copy-marker to)) | ||
| 4141 | (old-pt 0) | ||
| 4142 | lim1) | ||
| 4143 | (setq lim1 limit) | ||
| 4144 | (goto-char pt) | ||
| 4145 | (while (< pt limit) | ||
| 4146 | (if (> pt old-pt) | ||
| 4147 | (move-marker lim1 | ||
| 4148 | (save-excursion | ||
| 4149 | ;; L and R categories include embedding and | ||
| 4150 | ;; override controls, but we don't want to | ||
| 4151 | ;; replace them, because that might change | ||
| 4152 | ;; the visual order. Likewise with PDF and | ||
| 4153 | ;; isolate controls. | ||
| 4154 | (+ pt (skip-chars-forward | ||
| 4155 | bidi-directional-non-controls-chars | ||
| 4156 | limit))))) | ||
| 4157 | ;; Replace any run of non-RTL characters by a single LRM. | ||
| 4158 | (if (null (re-search-forward category lim1 t)) | ||
| 4159 | ;; No more characters of CATEGORY, we are done. | ||
| 4160 | (setq pt limit) | ||
| 4161 | (replace-match replacement nil t) | ||
| 4162 | (move-marker pt (point))) | ||
| 4163 | (setq old-pt pt) | ||
| 4164 | ;; Skip directional controls, if any. | ||
| 4165 | (move-marker | ||
| 4166 | pt (+ pt (skip-chars-forward bidi-directional-controls-chars limit)))))) | ||
| 4167 | |||
| 4168 | (defun squeeze-bidi-context (from to) | ||
| 4169 | "Replace characters between FROM and TO while keeping bidi context. | ||
| 4170 | |||
| 4171 | This function replaces the region of text with as few characters | ||
| 4172 | as possible, while preserving the effect that region will have on | ||
| 4173 | bidirectional display before and after the region." | ||
| 4174 | (let ((start (set-marker (make-marker) | ||
| 4175 | (if (> from 0) from (+ (point-max) from)))) | ||
| 4176 | (end (set-marker (make-marker) to)) | ||
| 4177 | ;; This is for when they copy text with read-only text | ||
| 4178 | ;; properties. | ||
| 4179 | (inhibit-read-only t)) | ||
| 4180 | (if (null (marker-position end)) | ||
| 4181 | (setq end (point-max-marker))) | ||
| 4182 | ;; Replace each run of non-RTL characters with a single LRM. | ||
| 4183 | (squeeze-bidi-context-1 start end "\\CR+" "\x200e") | ||
| 4184 | ;; Replace each run of non-LTR characters with a single RLM. Note | ||
| 4185 | ;; that the \cR category includes both the Arabic Letter (AL) and | ||
| 4186 | ;; R characters; here we ignore the distinction between them, | ||
| 4187 | ;; because that distinction only affects Arabic Number (AN) | ||
| 4188 | ;; characters, which are weak and don't affect the reordering. | ||
| 4189 | (squeeze-bidi-context-1 start end "\\CL+" "\x200f"))) | ||
| 4190 | |||
| 4191 | (defun line-substring-with-bidi-context (start end &optional no-properties) | ||
| 4192 | "Return buffer text between START and END with its bidi context. | ||
| 4193 | |||
| 4194 | START and END are assumed to belong to the same physical line | ||
| 4195 | of buffer text. This function prepends and appends to the text | ||
| 4196 | between START and END bidi control characters that preserve the | ||
| 4197 | visual order of that text when it is inserted at some other place." | ||
| 4198 | (if (or (< start (point-min)) | ||
| 4199 | (> end (point-max))) | ||
| 4200 | (signal 'args-out-of-range (list (current-buffer) start end))) | ||
| 4201 | (let ((buf (current-buffer)) | ||
| 4202 | substr para-dir from to) | ||
| 4203 | (save-excursion | ||
| 4204 | (goto-char start) | ||
| 4205 | (setq para-dir (current-bidi-paragraph-direction)) | ||
| 4206 | (setq from (line-beginning-position) | ||
| 4207 | to (line-end-position)) | ||
| 4208 | (goto-char from) | ||
| 4209 | ;; If we don't have any mixed directional characters in the | ||
| 4210 | ;; entire line, we can just copy the substring without adding | ||
| 4211 | ;; any context. | ||
| 4212 | (if (or (looking-at-p "\\CR*$") | ||
| 4213 | (looking-at-p "\\CL*$")) | ||
| 4214 | (setq substr (if no-properties | ||
| 4215 | (buffer-substring-no-properties start end) | ||
| 4216 | (buffer-substring start end))) | ||
| 4217 | (setq substr | ||
| 4218 | (with-temp-buffer | ||
| 4219 | (if no-properties | ||
| 4220 | (insert-buffer-substring-no-properties buf from to) | ||
| 4221 | (insert-buffer-substring buf from to)) | ||
| 4222 | (squeeze-bidi-context 1 (1+ (- start from))) | ||
| 4223 | (squeeze-bidi-context (- end to) nil) | ||
| 4224 | (buffer-substring 1 (point-max))))) | ||
| 4225 | |||
| 4226 | ;; Wrap the string in LRI/RLI..PDI pair to achieve 2 effects: | ||
| 4227 | ;; (1) force the string to have the same base embedding | ||
| 4228 | ;; direction as the paragraph direction at the source, no matter | ||
| 4229 | ;; what is the paragraph direction at destination; and (2) avoid | ||
| 4230 | ;; affecting the visual order of the surrounding text at | ||
| 4231 | ;; destination if there are characters of different | ||
| 4232 | ;; directionality there. | ||
| 4233 | (concat (if (eq para-dir 'left-to-right) "\x2066" "\x2067") | ||
| 4234 | substr "\x2069")))) | ||
| 4235 | |||
| 4236 | (defun buffer-substring-with-bidi-context (start end &optional no-properties) | ||
| 4237 | "Return portion of current buffer between START and END with bidi context. | ||
| 4238 | |||
| 4239 | This function works similar to `buffer-substring', but it prepends and | ||
| 4240 | appends to the text bidi directional control characters necessary to | ||
| 4241 | preserve the visual appearance of the text if it is inserted at another | ||
| 4242 | place. This is useful when the buffer substring includes bidirectional | ||
| 4243 | text and control characters that cause non-trivial reordering on display. | ||
| 4244 | If copied verbatim, such text can have a very different visual appearance, | ||
| 4245 | and can also change the visual appearance of the surrounding text at the | ||
| 4246 | destination of the copy. | ||
| 4247 | |||
| 4248 | Optional argument NO-PROPERTIES, if non-nil, means copy the text without | ||
| 4249 | the text properties." | ||
| 4250 | (let (line-end substr) | ||
| 4251 | (if (or (< start (point-min)) | ||
| 4252 | (> end (point-max))) | ||
| 4253 | (signal 'args-out-of-range (list (current-buffer) start end))) | ||
| 4254 | (save-excursion | ||
| 4255 | (goto-char start) | ||
| 4256 | (setq line-end (min end (line-end-position))) | ||
| 4257 | (while (< start end) | ||
| 4258 | (setq substr | ||
| 4259 | (concat substr | ||
| 4260 | (if substr "\n" "") | ||
| 4261 | (line-substring-with-bidi-context start line-end | ||
| 4262 | no-properties))) | ||
| 4263 | (forward-line 1) | ||
| 4264 | (setq start (point)) | ||
| 4265 | (setq line-end (min end (line-end-position)))) | ||
| 4266 | substr))) | ||
| 4129 | 4267 | ||
| 4130 | ;; Yanking. | 4268 | ;; Yanking. |
| 4131 | 4269 | ||