diff options
| author | Eli Zaretskii | 2014-12-04 11:31:33 +0200 |
|---|---|---|
| committer | Eli Zaretskii | 2014-12-04 11:31:33 +0200 |
| commit | f1827846d715cfef05afe52ad2a9df2289df6952 (patch) | |
| tree | 048ee9690b7bcc87353137fe72031fe083908371 | |
| parent | 2bef807c322b44cf63db85e4b60001a76cbe3d9f (diff) | |
| download | emacs-f1827846d715cfef05afe52ad2a9df2289df6952.tar.gz emacs-f1827846d715cfef05afe52ad2a9df2289df6952.zip | |
Implement copying of a buffer portion while preserving visual order.
See http://lists.gnu.org/archive/html/emacs-devel/2014-11/msg02203.html
and http://lists.gnu.org/archive/html/emacs-devel/2014-12/msg00063.html
for the rationale.
lisp/simple.el (bidi-directional-controls-chars)
(bidi-directional-non-controls-chars): New variables.
(squeeze-bidi-context-1, squeeze-bidi-context)
(line-substring-with-bidi-context)
(buffer-substring-with-bidi-context): New functions.
doc/lispref/display.texi (Bidirectional Display): Document
'buffer-substring-with-bidi-context'.
doc/lispref/text.texi (Buffer Contents): Mention
'buffer-substring-with-bidi-context' with a cross-reference.
etc/NEWS: Mention 'buffer-substring-with-bidi-context'.
| -rw-r--r-- | doc/lispref/ChangeLog | 8 | ||||
| -rw-r--r-- | doc/lispref/display.texi | 24 | ||||
| -rw-r--r-- | doc/lispref/text.texi | 6 | ||||
| -rw-r--r-- | etc/ChangeLog | 4 | ||||
| -rw-r--r-- | etc/NEWS | 7 | ||||
| -rw-r--r-- | lisp/ChangeLog | 7 | ||||
| -rw-r--r-- | lisp/simple.el | 138 |
7 files changed, 194 insertions, 0 deletions
diff --git a/doc/lispref/ChangeLog b/doc/lispref/ChangeLog index f98e457566e..d8215be6b15 100644 --- a/doc/lispref/ChangeLog +++ b/doc/lispref/ChangeLog | |||
| @@ -1,3 +1,11 @@ | |||
| 1 | 2014-12-04 Eli Zaretskii <eliz@gnu.org> | ||
| 2 | |||
| 3 | * display.texi (Bidirectional Display): Document | ||
| 4 | 'buffer-substring-with-bidi-context'. | ||
| 5 | |||
| 6 | * text.texi (Buffer Contents): Mention | ||
| 7 | 'buffer-substring-with-bidi-context' with a cross-reference. | ||
| 8 | |||
| 1 | 2014-12-02 Eli Zaretskii <eliz@gnu.org> | 9 | 2014-12-02 Eli Zaretskii <eliz@gnu.org> |
| 2 | 10 | ||
| 3 | * display.texi (Bidirectional Display): Document | 11 | * display.texi (Bidirectional Display): Document |
diff --git a/doc/lispref/display.texi b/doc/lispref/display.texi index 59f73223a1f..90aa9797a59 100644 --- a/doc/lispref/display.texi +++ b/doc/lispref/display.texi | |||
| @@ -6854,3 +6854,27 @@ allows it to correctly account for window-specific overlays, which | |||
| 6854 | might change the result of the function if some text in the buffer is | 6854 | might change the result of the function if some text in the buffer is |
| 6855 | covered by overlays. | 6855 | covered by overlays. |
| 6856 | @end defun | 6856 | @end defun |
| 6857 | |||
| 6858 | @cindex copying bidirectional text, preserve visual order | ||
| 6859 | @cindex visual order, preserve when copying bidirectional text | ||
| 6860 | When text that includes mixed right-to-left and left-to-right | ||
| 6861 | characters and bidirectional controls is copied into a different | ||
| 6862 | location, it can change its visual appearance, and also can affect the | ||
| 6863 | visual appearance of the surrounding text at destination. This is | ||
| 6864 | because reordering of bidirectional text specified by the | ||
| 6865 | @acronym{UBA} has non-trivial context-dependent effects both on the | ||
| 6866 | copied text and on the text at copy destination that will surround it. | ||
| 6867 | |||
| 6868 | Sometimes, a Lisp program may need to preserve the exact visual | ||
| 6869 | appearance of the copied text at destination, and of the text that | ||
| 6870 | surrounds the copy. Lisp programs can use the following function to | ||
| 6871 | achieve that effect. | ||
| 6872 | |||
| 6873 | @defun buffer-substring-with-bidi-context start end &optional no-properties | ||
| 6874 | This function works similar to @code{buffer-substring} (@pxref{Buffer | ||
| 6875 | Contents}), but it prepends and appends to the copied text bidi | ||
| 6876 | directional control characters necessary to preserve the visual | ||
| 6877 | appearance of the text when it is inserted at another place. Optional | ||
| 6878 | argument @var{no-properties}, if non-@code{nil}, means remove the text | ||
| 6879 | properties from the copy of the text. | ||
| 6880 | @end defun | ||
diff --git a/doc/lispref/text.texi b/doc/lispref/text.texi index 9c878a00c94..720343cb17d 100644 --- a/doc/lispref/text.texi +++ b/doc/lispref/text.texi | |||
| @@ -219,6 +219,12 @@ This function returns the contents of the entire accessible portion of | |||
| 219 | the current buffer, as a string. | 219 | the current buffer, as a string. |
| 220 | @end defun | 220 | @end defun |
| 221 | 221 | ||
| 222 | If you need to make sure the resulting string, when copied to a | ||
| 223 | different location, will not change its visual appearance due to | ||
| 224 | reordering of bidirectional text, use the | ||
| 225 | @code{buffer-substring-with-bidi-context} function | ||
| 226 | (@pxref{Bidirectional Display, buffer-substring-with-bidi-context}). | ||
| 227 | |||
| 222 | @defun filter-buffer-substring start end &optional delete | 228 | @defun filter-buffer-substring start end &optional delete |
| 223 | This function filters the buffer text between @var{start} and @var{end} | 229 | This function filters the buffer text between @var{start} and @var{end} |
| 224 | using a function specified by the variable | 230 | using a function specified by the variable |
diff --git a/etc/ChangeLog b/etc/ChangeLog index 4f672dfce5e..84a1c483b47 100644 --- a/etc/ChangeLog +++ b/etc/ChangeLog | |||
| @@ -1,3 +1,7 @@ | |||
| 1 | 2014-12-04 Eli Zaretskii <eliz@gnu.org> | ||
| 2 | |||
| 3 | * NEWS: Mention 'buffer-substring-with-bidi-context'. | ||
| 4 | |||
| 1 | 2014-12-02 Eli Zaretskii <eliz@gnu.org> | 5 | 2014-12-02 Eli Zaretskii <eliz@gnu.org> |
| 2 | 6 | ||
| 3 | * NEWS: Mention 'bidi-find-overridden-directionality'. | 7 | * NEWS: Mention 'bidi-find-overridden-directionality'. |
| @@ -105,6 +105,13 @@ overridden by directional override control characters. Lisp programs | |||
| 105 | can use this to detect potential phishing of URLs and other links that | 105 | can use this to detect potential phishing of URLs and other links that |
| 106 | exploits bidirectional display reordering. | 106 | exploits bidirectional display reordering. |
| 107 | 107 | ||
| 108 | +++ | ||
| 109 | ** The new function `buffer-substring-with-bidi-context' allows to | ||
| 110 | copy a portion of a buffer into a different location while preserving | ||
| 111 | the visual appearance both of the copied text and the text at | ||
| 112 | destination, even when the copied text includes mixed bidirectional | ||
| 113 | text and directional control characters. | ||
| 114 | |||
| 108 | *** The ls-lisp package uses `string-collate-lessp' to sort file names. | 115 | *** The ls-lisp package uses `string-collate-lessp' to sort file names. |
| 109 | If you want the old, locale-independent sorting, customize the new | 116 | If you want the old, locale-independent sorting, customize the new |
| 110 | option `ls-lisp-use-string-collate' to a nil value. | 117 | option `ls-lisp-use-string-collate' to a nil value. |
diff --git a/lisp/ChangeLog b/lisp/ChangeLog index 9c729bc936f..157b2b45f98 100644 --- a/lisp/ChangeLog +++ b/lisp/ChangeLog | |||
| @@ -1,5 +1,12 @@ | |||
| 1 | 2014-12-04 Eli Zaretskii <eliz@gnu.org> | 1 | 2014-12-04 Eli Zaretskii <eliz@gnu.org> |
| 2 | 2 | ||
| 3 | Implement copying of a buffer portion while preserving visual order. | ||
| 4 | * simple.el (bidi-directional-controls-chars) | ||
| 5 | (bidi-directional-non-controls-chars): New variables. | ||
| 6 | (squeeze-bidi-context-1, squeeze-bidi-context) | ||
| 7 | (line-substring-with-bidi-context) | ||
| 8 | (buffer-substring-with-bidi-context): New functions. | ||
| 9 | |||
| 3 | * files.el (file-tree-walk): Doc fix. | 10 | * files.el (file-tree-walk): Doc fix. |
| 4 | 11 | ||
| 5 | 2014-12-04 Rupert Swarbrick <ruperts@broadcom.com> (tiny change) | 12 | 2014-12-04 Rupert Swarbrick <ruperts@broadcom.com> (tiny change) |
diff --git a/lisp/simple.el b/lisp/simple.el index 16db05a2158..46b346a8fd2 100644 --- a/lisp/simple.el +++ b/lisp/simple.el | |||
| @@ -4126,6 +4126,144 @@ The argument is used for internal purposes; do not supply one." | |||
| 4126 | (setq this-command 'kill-region) | 4126 | (setq this-command 'kill-region) |
| 4127 | (message "If the next command is a kill, it will append")) | 4127 | (message "If the next command is a kill, it will append")) |
| 4128 | (setq last-command 'kill-region))) | 4128 | (setq last-command 'kill-region))) |
| 4129 | |||
| 4130 | (defvar bidi-directional-controls-chars "\x202a-\x202e\x2066-\x2069" | ||
| 4131 | "Character set that matches bidirectional formatting control characters.") | ||
| 4132 | |||
| 4133 | (defvar bidi-directional-non-controls-chars "^\x202a-\x202e\x2066-\x2069" | ||
| 4134 | "Character set that matches any character except bidirectional controls.") | ||
| 4135 | |||
| 4136 | (defun squeeze-bidi-context-1 (from to category replacement) | ||
| 4137 | "A subroutine of `squeeze-bidi-context'. | ||
| 4138 | FROM and TO should be markers, CATEGORY and REPLACEMENT should be strings." | ||
| 4139 | (let ((pt (copy-marker from)) | ||
| 4140 | (limit (copy-marker to)) | ||
| 4141 | (old-pt 0) | ||
| 4142 | lim1) | ||
| 4143 | (setq lim1 limit) | ||
| 4144 | (goto-char pt) | ||
| 4145 | (while (< pt limit) | ||
| 4146 | (if (> pt old-pt) | ||
| 4147 | (move-marker lim1 | ||
| 4148 | (save-excursion | ||
| 4149 | ;; L and R categories include embedding and | ||
| 4150 | ;; override controls, but we don't want to | ||
| 4151 | ;; replace them, because that might change | ||
| 4152 | ;; the visual order. Likewise with PDF and | ||
| 4153 | ;; isolate controls. | ||
| 4154 | (+ pt (skip-chars-forward | ||
| 4155 | bidi-directional-non-controls-chars | ||
| 4156 | limit))))) | ||
| 4157 | ;; Replace any run of non-RTL characters by a single LRM. | ||
| 4158 | (if (null (re-search-forward category lim1 t)) | ||
| 4159 | ;; No more characters of CATEGORY, we are done. | ||
| 4160 | (setq pt limit) | ||
| 4161 | (replace-match replacement nil t) | ||
| 4162 | (move-marker pt (point))) | ||
| 4163 | (setq old-pt pt) | ||
| 4164 | ;; Skip directional controls, if any. | ||
| 4165 | (move-marker | ||
| 4166 | pt (+ pt (skip-chars-forward bidi-directional-controls-chars limit)))))) | ||
| 4167 | |||
| 4168 | (defun squeeze-bidi-context (from to) | ||
| 4169 | "Replace characters between FROM and TO while keeping bidi context. | ||
| 4170 | |||
| 4171 | This function replaces the region of text with as few characters | ||
| 4172 | as possible, while preserving the effect that region will have on | ||
| 4173 | bidirectional display before and after the region." | ||
| 4174 | (let ((start (set-marker (make-marker) | ||
| 4175 | (if (> from 0) from (+ (point-max) from)))) | ||
| 4176 | (end (set-marker (make-marker) to)) | ||
| 4177 | ;; This is for when they copy text with read-only text | ||
| 4178 | ;; properties. | ||
| 4179 | (inhibit-read-only t)) | ||
| 4180 | (if (null (marker-position end)) | ||
| 4181 | (setq end (point-max-marker))) | ||
| 4182 | ;; Replace each run of non-RTL characters with a single LRM. | ||
| 4183 | (squeeze-bidi-context-1 start end "\\CR+" "\x200e") | ||
| 4184 | ;; Replace each run of non-LTR characters with a single RLM. Note | ||
| 4185 | ;; that the \cR category includes both the Arabic Letter (AL) and | ||
| 4186 | ;; R characters; here we ignore the distinction between them, | ||
| 4187 | ;; because that distinction only affects Arabic Number (AN) | ||
| 4188 | ;; characters, which are weak and don't affect the reordering. | ||
| 4189 | (squeeze-bidi-context-1 start end "\\CL+" "\x200f"))) | ||
| 4190 | |||
| 4191 | (defun line-substring-with-bidi-context (start end &optional no-properties) | ||
| 4192 | "Return buffer text between START and END with its bidi context. | ||
| 4193 | |||
| 4194 | START and END are assumed to belong to the same physical line | ||
| 4195 | of buffer text. This function prepends and appends to the text | ||
| 4196 | between START and END bidi control characters that preserve the | ||
| 4197 | visual order of that text when it is inserted at some other place." | ||
| 4198 | (if (or (< start (point-min)) | ||
| 4199 | (> end (point-max))) | ||
| 4200 | (signal 'args-out-of-range (list (current-buffer) start end))) | ||
| 4201 | (let ((buf (current-buffer)) | ||
| 4202 | substr para-dir from to) | ||
| 4203 | (save-excursion | ||
| 4204 | (goto-char start) | ||
| 4205 | (setq para-dir (current-bidi-paragraph-direction)) | ||
| 4206 | (setq from (line-beginning-position) | ||
| 4207 | to (line-end-position)) | ||
| 4208 | (goto-char from) | ||
| 4209 | ;; If we don't have any mixed directional characters in the | ||
| 4210 | ;; entire line, we can just copy the substring without adding | ||
| 4211 | ;; any context. | ||
| 4212 | (if (or (looking-at-p "\\CR*$") | ||
| 4213 | (looking-at-p "\\CL*$")) | ||
| 4214 | (setq substr (if no-properties | ||
| 4215 | (buffer-substring-no-properties start end) | ||
| 4216 | (buffer-substring start end))) | ||
| 4217 | (setq substr | ||
| 4218 | (with-temp-buffer | ||
| 4219 | (if no-properties | ||
| 4220 | (insert-buffer-substring-no-properties buf from to) | ||
| 4221 | (insert-buffer-substring buf from to)) | ||
| 4222 | (squeeze-bidi-context 1 (1+ (- start from))) | ||
| 4223 | (squeeze-bidi-context (- end to) nil) | ||
| 4224 | (buffer-substring 1 (point-max))))) | ||
| 4225 | |||
| 4226 | ;; Wrap the string in LRI/RLI..PDI pair to achieve 2 effects: | ||
| 4227 | ;; (1) force the string to have the same base embedding | ||
| 4228 | ;; direction as the paragraph direction at the source, no matter | ||
| 4229 | ;; what is the paragraph direction at destination; and (2) avoid | ||
| 4230 | ;; affecting the visual order of the surrounding text at | ||
| 4231 | ;; destination if there are characters of different | ||
| 4232 | ;; directionality there. | ||
| 4233 | (concat (if (eq para-dir 'left-to-right) "\x2066" "\x2067") | ||
| 4234 | substr "\x2069")))) | ||
| 4235 | |||
| 4236 | (defun buffer-substring-with-bidi-context (start end &optional no-properties) | ||
| 4237 | "Return portion of current buffer between START and END with bidi context. | ||
| 4238 | |||
| 4239 | This function works similar to `buffer-substring', but it prepends and | ||
| 4240 | appends to the text bidi directional control characters necessary to | ||
| 4241 | preserve the visual appearance of the text if it is inserted at another | ||
| 4242 | place. This is useful when the buffer substring includes bidirectional | ||
| 4243 | text and control characters that cause non-trivial reordering on display. | ||
| 4244 | If copied verbatim, such text can have a very different visual appearance, | ||
| 4245 | and can also change the visual appearance of the surrounding text at the | ||
| 4246 | destination of the copy. | ||
| 4247 | |||
| 4248 | Optional argument NO-PROPERTIES, if non-nil, means copy the text without | ||
| 4249 | the text properties." | ||
| 4250 | (let (line-end substr) | ||
| 4251 | (if (or (< start (point-min)) | ||
| 4252 | (> end (point-max))) | ||
| 4253 | (signal 'args-out-of-range (list (current-buffer) start end))) | ||
| 4254 | (save-excursion | ||
| 4255 | (goto-char start) | ||
| 4256 | (setq line-end (min end (line-end-position))) | ||
| 4257 | (while (< start end) | ||
| 4258 | (setq substr | ||
| 4259 | (concat substr | ||
| 4260 | (if substr "\n" "") | ||
| 4261 | (line-substring-with-bidi-context start line-end | ||
| 4262 | no-properties))) | ||
| 4263 | (forward-line 1) | ||
| 4264 | (setq start (point)) | ||
| 4265 | (setq line-end (min end (line-end-position)))) | ||
| 4266 | substr))) | ||
| 4129 | 4267 | ||
| 4130 | ;; Yanking. | 4268 | ;; Yanking. |
| 4131 | 4269 | ||