diff options
| author | Eli Zaretskii | 2014-12-02 16:13:47 +0200 |
|---|---|---|
| committer | Eli Zaretskii | 2014-12-02 16:13:47 +0200 |
| commit | dd601050e7db69f322eea09d99751d8e6363b153 (patch) | |
| tree | 41457d53d1ea7c37e452c0c346177e6188bea0b2 | |
| parent | a92789b1fccf7ae9a39ec1cc9316a18e01d905bb (diff) | |
| download | emacs-dd601050e7db69f322eea09d99751d8e6363b153.tar.gz emacs-dd601050e7db69f322eea09d99751d8e6363b153.zip | |
Allow to search for characters whose bidi directionality was overridden.
src/bidi.c (bidi_find_first_overridden): New function.
src/xdisp.c (Fbidi_find_overridden_directionality): New function.
(syms_of_xdisp): Defsubr it.
src/dispextern.h (bidi_find_first_overridden): Add prototype.
doc/lispref/display.texi (Bidirectional Display): Document
'bidi-find-overridden-directionality'.
etc/NEWS: Mention 'bidi-find-overridden-directionality'.
| -rw-r--r-- | doc/lispref/ChangeLog | 5 | ||||
| -rw-r--r-- | doc/lispref/display.texi | 54 | ||||
| -rw-r--r-- | etc/ChangeLog | 4 | ||||
| -rw-r--r-- | etc/NEWS | 7 | ||||
| -rw-r--r-- | src/ChangeLog | 9 | ||||
| -rw-r--r-- | src/bidi.c | 27 | ||||
| -rw-r--r-- | src/dispextern.h | 1 | ||||
| -rw-r--r-- | src/xdisp.c | 138 |
8 files changed, 245 insertions, 0 deletions
diff --git a/doc/lispref/ChangeLog b/doc/lispref/ChangeLog index 31a9cbf04e4..f98e457566e 100644 --- a/doc/lispref/ChangeLog +++ b/doc/lispref/ChangeLog | |||
| @@ -1,3 +1,8 @@ | |||
| 1 | 2014-12-02 Eli Zaretskii <eliz@gnu.org> | ||
| 2 | |||
| 3 | * display.texi (Bidirectional Display): Document | ||
| 4 | 'bidi-find-overridden-directionality'. | ||
| 5 | |||
| 1 | 2014-11-29 Paul Eggert <eggert@cs.ucla.edu> | 6 | 2014-11-29 Paul Eggert <eggert@cs.ucla.edu> |
| 2 | 7 | ||
| 3 | Lessen focus on ChangeLog files, as opposed to change log entries. | 8 | Lessen focus on ChangeLog files, as opposed to change log entries. |
diff --git a/doc/lispref/display.texi b/doc/lispref/display.texi index 4cb06dd188f..59f73223a1f 100644 --- a/doc/lispref/display.texi +++ b/doc/lispref/display.texi | |||
| @@ -6800,3 +6800,57 @@ affect all Emacs frames and windows. | |||
| 6800 | appropriate mirrored character in the reordered text. Lisp programs | 6800 | appropriate mirrored character in the reordered text. Lisp programs |
| 6801 | can affect the mirrored display by changing this property. Again, any | 6801 | can affect the mirrored display by changing this property. Again, any |
| 6802 | such changes affect all of Emacs display. | 6802 | such changes affect all of Emacs display. |
| 6803 | |||
| 6804 | @cindex overriding bidirectional properties | ||
| 6805 | @cindex directional overrides | ||
| 6806 | @cindex LRO | ||
| 6807 | @cindex RLO | ||
| 6808 | The bidirectional properties of characters can be overridden by | ||
| 6809 | inserting into the text special directional control characters, | ||
| 6810 | LEFT-TO-RIGHT OVERRIDE (@acronym{LRO}) and RIGHT-TO-LEFT OVERRIDE | ||
| 6811 | (@acronym{RLO}). Any characters between a @acronym{RLO} and the | ||
| 6812 | following newline or POP DIRECTIONAL FORMATTING (@acronym{PDF}) | ||
| 6813 | control character, whichever comes first, will be displayed as if they | ||
| 6814 | were strong right-to-left characters, i.e.@: they will be reversed on | ||
| 6815 | display. Similarly, any characters between @acronym{LRO} and | ||
| 6816 | @acronym{PDF} or newline will display as if they were strong | ||
| 6817 | left-to-right, and will @emph{not} be reversed even if they are strong | ||
| 6818 | right-to-left characters. | ||
| 6819 | |||
| 6820 | @cindex phishing using directional overrides | ||
| 6821 | @cindex malicious use of directional overrides | ||
| 6822 | These overrides are useful when you want to make some text | ||
| 6823 | unaffected by the reordering algorithm, and instead directly control | ||
| 6824 | the display order. But they can also be used for malicious purposes, | ||
| 6825 | known as @dfn{phishing}. Specifically, a URL on a Web page or a link | ||
| 6826 | in an email message can be manipulated to make its visual appearance | ||
| 6827 | unrecognizable, or similar to some popular benign location, while the | ||
| 6828 | real location, interpreted by a browser in the logical order, is very | ||
| 6829 | different. | ||
| 6830 | |||
| 6831 | Emacs provides a primitive that applications can use to detect | ||
| 6832 | instances of text whose bidirectional properties were overridden so as | ||
| 6833 | to make a left-to-right character display as if it were a | ||
| 6834 | right-to-left character, or vise versa. | ||
| 6835 | |||
| 6836 | @defun bidi-find-overridden-directionality from to &optional object | ||
| 6837 | This function looks at the text of the specified @var{object} between | ||
| 6838 | positions @var{from} (inclusive) and @var{to} (exclusive), and returns | ||
| 6839 | the first position where it finds a strong left-to-right character | ||
| 6840 | whose directional properties were forced to display the character as | ||
| 6841 | right-to-left, or for a strong right-to-left character that was forced | ||
| 6842 | to display as left-to-right. If it finds no such characters in the | ||
| 6843 | specified region of text, it returns @code{nil}. | ||
| 6844 | |||
| 6845 | The optional argument @var{object} specifies which text to search, and | ||
| 6846 | defaults to the current buffer. If @var{object} is non-@code{nil}, it | ||
| 6847 | can be some other buffer, or it can be a string or a window. If it is | ||
| 6848 | a string, the function searches that string. If it is a window, the | ||
| 6849 | function searches the buffer displayed in that window. If a buffer | ||
| 6850 | whose text you want to examine is displayed in some window, we | ||
| 6851 | recommend to specify it by that window, rather than pass the buffer to | ||
| 6852 | the function. This is because telling the function about the window | ||
| 6853 | allows it to correctly account for window-specific overlays, which | ||
| 6854 | might change the result of the function if some text in the buffer is | ||
| 6855 | covered by overlays. | ||
| 6856 | @end defun | ||
diff --git a/etc/ChangeLog b/etc/ChangeLog index 09dfd7f2e35..4f672dfce5e 100644 --- a/etc/ChangeLog +++ b/etc/ChangeLog | |||
| @@ -1,3 +1,7 @@ | |||
| 1 | 2014-12-02 Eli Zaretskii <eliz@gnu.org> | ||
| 2 | |||
| 3 | * NEWS: Mention 'bidi-find-overridden-directionality'. | ||
| 4 | |||
| 1 | 2014-11-29 Paul Eggert <eggert@cs.ucla.edu> | 5 | 2014-11-29 Paul Eggert <eggert@cs.ucla.edu> |
| 2 | 6 | ||
| 3 | Lessen focus on ChangeLog files, as opposed to change log entries. | 7 | Lessen focus on ChangeLog files, as opposed to change log entries. |
| @@ -98,6 +98,13 @@ environment. For the time being this is implemented for modern POSIX | |||
| 98 | systems and for MS-Windows, for other systems they fall back to their | 98 | systems and for MS-Windows, for other systems they fall back to their |
| 99 | counterparts `string-lessp' and `string-equal'. | 99 | counterparts `string-lessp' and `string-equal'. |
| 100 | 100 | ||
| 101 | +++ | ||
| 102 | ** The new function `bidi-find-overridden-directionality' allows to | ||
| 103 | find characters whose directionality was, perhaps maliciously, | ||
| 104 | overridden by directional override control characters. Lisp programs | ||
| 105 | can use this to detect potential phishing of URLs and other links that | ||
| 106 | exploits bidirectional display reordering. | ||
| 107 | |||
| 101 | *** The ls-lisp package uses `string-collate-lessp' to sort file names. | 108 | *** The ls-lisp package uses `string-collate-lessp' to sort file names. |
| 102 | If you want the old, locale-independent sorting, customize the new | 109 | If you want the old, locale-independent sorting, customize the new |
| 103 | option `ls-lisp-use-string-collate' to a nil value. | 110 | option `ls-lisp-use-string-collate' to a nil value. |
diff --git a/src/ChangeLog b/src/ChangeLog index 5c3376562d3..7dc2b928f1e 100644 --- a/src/ChangeLog +++ b/src/ChangeLog | |||
| @@ -1,3 +1,12 @@ | |||
| 1 | 2014-12-02 Eli Zaretskii <eliz@gnu.org> | ||
| 2 | |||
| 3 | * bidi.c (bidi_find_first_overridden): New function. | ||
| 4 | |||
| 5 | * xdisp.c (Fbidi_find_overridden_directionality): New function. | ||
| 6 | (syms_of_xdisp): Defsubr it. | ||
| 7 | |||
| 8 | * dispextern.h (bidi_find_first_overridden): Add prototype. | ||
| 9 | |||
| 1 | 2014-12-02 Jan Djärv <jan.h.d@swipnet.se> | 10 | 2014-12-02 Jan Djärv <jan.h.d@swipnet.se> |
| 2 | 11 | ||
| 3 | * nsimage.m (initFromSkipXBM:width:height:flip:length:): Set bmRep | 12 | * nsimage.m (initFromSkipXBM:width:height:flip:length:): Set bmRep |
diff --git a/src/bidi.c b/src/bidi.c index 225acd9d655..a0bcf528e12 100644 --- a/src/bidi.c +++ b/src/bidi.c | |||
| @@ -3376,6 +3376,33 @@ bidi_move_to_visually_next (struct bidi_it *bidi_it) | |||
| 3376 | UNGCPRO; | 3376 | UNGCPRO; |
| 3377 | } | 3377 | } |
| 3378 | 3378 | ||
| 3379 | /* Utility function for looking for strong directional characters | ||
| 3380 | whose bidi type was overridden by a directional override. */ | ||
| 3381 | ptrdiff_t | ||
| 3382 | bidi_find_first_overridden (struct bidi_it *bidi_it) | ||
| 3383 | { | ||
| 3384 | ptrdiff_t found_pos = ZV; | ||
| 3385 | |||
| 3386 | do | ||
| 3387 | { | ||
| 3388 | /* Need to call bidi_resolve_weak, not bidi_resolve_explicit, | ||
| 3389 | because the directional overrides are applied by the | ||
| 3390 | former. */ | ||
| 3391 | bidi_type_t type = bidi_resolve_weak (bidi_it); | ||
| 3392 | |||
| 3393 | if ((type == STRONG_R && bidi_it->orig_type == STRONG_L) | ||
| 3394 | || (type == STRONG_L | ||
| 3395 | && (bidi_it->orig_type == STRONG_R | ||
| 3396 | || bidi_it->orig_type == STRONG_AL))) | ||
| 3397 | found_pos = bidi_it->charpos; | ||
| 3398 | } while (found_pos == ZV | ||
| 3399 | && bidi_it->charpos < ZV | ||
| 3400 | && bidi_it->ch != BIDI_EOB | ||
| 3401 | && bidi_it->ch != '\n'); | ||
| 3402 | |||
| 3403 | return found_pos; | ||
| 3404 | } | ||
| 3405 | |||
| 3379 | /* This is meant to be called from within the debugger, whenever you | 3406 | /* This is meant to be called from within the debugger, whenever you |
| 3380 | wish to examine the cache contents. */ | 3407 | wish to examine the cache contents. */ |
| 3381 | void bidi_dump_cached_states (void) EXTERNALLY_VISIBLE; | 3408 | void bidi_dump_cached_states (void) EXTERNALLY_VISIBLE; |
diff --git a/src/dispextern.h b/src/dispextern.h index 0dd0887c7e6..0ee5fd62f7d 100644 --- a/src/dispextern.h +++ b/src/dispextern.h | |||
| @@ -3173,6 +3173,7 @@ extern void bidi_push_it (struct bidi_it *); | |||
| 3173 | extern void bidi_pop_it (struct bidi_it *); | 3173 | extern void bidi_pop_it (struct bidi_it *); |
| 3174 | extern void *bidi_shelve_cache (void); | 3174 | extern void *bidi_shelve_cache (void); |
| 3175 | extern void bidi_unshelve_cache (void *, bool); | 3175 | extern void bidi_unshelve_cache (void *, bool); |
| 3176 | extern ptrdiff_t bidi_find_first_overridden (struct bidi_it *); | ||
| 3176 | 3177 | ||
| 3177 | /* Defined in xdisp.c */ | 3178 | /* Defined in xdisp.c */ |
| 3178 | 3179 | ||
diff --git a/src/xdisp.c b/src/xdisp.c index 989cbd10d81..0d314688c26 100644 --- a/src/xdisp.c +++ b/src/xdisp.c | |||
| @@ -21032,6 +21032,143 @@ See also `bidi-paragraph-direction'. */) | |||
| 21032 | } | 21032 | } |
| 21033 | } | 21033 | } |
| 21034 | 21034 | ||
| 21035 | DEFUN ("bidi-find-overridden-directionality", | ||
| 21036 | Fbidi_find_overridden_directionality, | ||
| 21037 | Sbidi_find_overridden_directionality, 2, 3, 0, | ||
| 21038 | doc: /* Return position between FROM and TO where directionality was overridden. | ||
| 21039 | |||
| 21040 | This function returns the first character position in the specified | ||
| 21041 | region of OBJECT where there is a character whose `bidi-class' property | ||
| 21042 | is `L', but which was forced to display as `R' by a directional | ||
| 21043 | override, and likewise with characters whose `bidi-class' is `R' | ||
| 21044 | or `AL' that were forced to display as `L'. | ||
| 21045 | |||
| 21046 | If no such character is found, the function returns nil. | ||
| 21047 | |||
| 21048 | OBJECT is a Lisp string or buffer to search for overridden | ||
| 21049 | directionality, and defaults to the current buffer if nil or omitted. | ||
| 21050 | OBJECT can also be a window, in which case the function will search | ||
| 21051 | the buffer displayed in that window. Passing the window instead of | ||
| 21052 | a buffer is preferable when the buffer is displayed in some window, | ||
| 21053 | because this function will then be able to correctly account for | ||
| 21054 | window-specific overlays, which can affect the results. | ||
| 21055 | |||
| 21056 | Strong directional characters `L', `R', and `AL' can have their | ||
| 21057 | intrinsic directionality overridden by directional override | ||
| 21058 | control characters RLO \(u+202e) and LRO \(u+202d). See the | ||
| 21059 | function `get-char-code-property' for a way to inquire about | ||
| 21060 | the `bidi-class' property of a character. */) | ||
| 21061 | (Lisp_Object from, Lisp_Object to, Lisp_Object object) | ||
| 21062 | { | ||
| 21063 | struct buffer *buf = current_buffer; | ||
| 21064 | struct buffer *old = buf; | ||
| 21065 | struct window *w = NULL; | ||
| 21066 | bool frame_window_p = FRAME_WINDOW_P (SELECTED_FRAME ()); | ||
| 21067 | struct bidi_it itb; | ||
| 21068 | ptrdiff_t from_pos, to_pos, from_bpos; | ||
| 21069 | void *itb_data; | ||
| 21070 | |||
| 21071 | if (!NILP (object)) | ||
| 21072 | { | ||
| 21073 | if (BUFFERP (object)) | ||
| 21074 | buf = XBUFFER (object); | ||
| 21075 | else if (WINDOWP (object)) | ||
| 21076 | { | ||
| 21077 | w = decode_live_window (object); | ||
| 21078 | buf = XBUFFER (w->contents); | ||
| 21079 | frame_window_p = FRAME_WINDOW_P (XFRAME (w->frame)); | ||
| 21080 | } | ||
| 21081 | else | ||
| 21082 | CHECK_STRING (object); | ||
| 21083 | } | ||
| 21084 | |||
| 21085 | if (STRINGP (object)) | ||
| 21086 | { | ||
| 21087 | /* Characters in unibyte strings are always treated by bidi.c as | ||
| 21088 | strong LTR. */ | ||
| 21089 | if (!STRING_MULTIBYTE (object) | ||
| 21090 | /* When we are loading loadup.el, the character property | ||
| 21091 | tables needed for bidi iteration are not yet | ||
| 21092 | available. */ | ||
| 21093 | || !NILP (Vpurify_flag)) | ||
| 21094 | return Qnil; | ||
| 21095 | |||
| 21096 | validate_subarray (object, from, to, SCHARS (object), &from_pos, &to_pos); | ||
| 21097 | if (from_pos >= SCHARS (object)) | ||
| 21098 | return Qnil; | ||
| 21099 | |||
| 21100 | /* Set up the bidi iterator. */ | ||
| 21101 | itb_data = bidi_shelve_cache (); | ||
| 21102 | itb.paragraph_dir = NEUTRAL_DIR; | ||
| 21103 | itb.string.lstring = object; | ||
| 21104 | itb.string.s = NULL; | ||
| 21105 | itb.string.schars = SCHARS (object); | ||
| 21106 | itb.string.bufpos = 0; | ||
| 21107 | itb.string.from_disp_str = 0; | ||
| 21108 | itb.string.unibyte = 0; | ||
| 21109 | itb.w = w; | ||
| 21110 | bidi_init_it (0, 0, frame_window_p, &itb); | ||
| 21111 | } | ||
| 21112 | else | ||
| 21113 | { | ||
| 21114 | /* Nothing this fancy can happen in unibyte buffers, or in a | ||
| 21115 | buffer that disabled reordering, or if FROM is at EOB. */ | ||
| 21116 | if (NILP (BVAR (buf, bidi_display_reordering)) | ||
| 21117 | || NILP (BVAR (buf, enable_multibyte_characters)) | ||
| 21118 | /* When we are loading loadup.el, the character property | ||
| 21119 | tables needed for bidi iteration are not yet | ||
| 21120 | available. */ | ||
| 21121 | || !NILP (Vpurify_flag)) | ||
| 21122 | return Qnil; | ||
| 21123 | |||
| 21124 | set_buffer_temp (buf); | ||
| 21125 | validate_region (&from, &to); | ||
| 21126 | from_pos = XINT (from); | ||
| 21127 | to_pos = XINT (to); | ||
| 21128 | if (from_pos >= ZV) | ||
| 21129 | return Qnil; | ||
| 21130 | |||
| 21131 | /* Set up the bidi iterator. */ | ||
| 21132 | itb_data = bidi_shelve_cache (); | ||
| 21133 | from_bpos = CHAR_TO_BYTE (from_pos); | ||
| 21134 | if (from_pos == BEGV) | ||
| 21135 | { | ||
| 21136 | itb.charpos = BEGV; | ||
| 21137 | itb.bytepos = BEGV_BYTE; | ||
| 21138 | } | ||
| 21139 | else if (FETCH_CHAR (from_bpos - 1) == '\n') | ||
| 21140 | { | ||
| 21141 | itb.charpos = from_pos; | ||
| 21142 | itb.bytepos = from_bpos; | ||
| 21143 | } | ||
| 21144 | else | ||
| 21145 | itb.charpos = find_newline_no_quit (from_pos, CHAR_TO_BYTE (from_pos), | ||
| 21146 | -1, &itb.bytepos); | ||
| 21147 | itb.paragraph_dir = NEUTRAL_DIR; | ||
| 21148 | itb.string.s = NULL; | ||
| 21149 | itb.string.lstring = Qnil; | ||
| 21150 | itb.string.bufpos = 0; | ||
| 21151 | itb.string.from_disp_str = 0; | ||
| 21152 | itb.string.unibyte = 0; | ||
| 21153 | itb.w = w; | ||
| 21154 | bidi_init_it (itb.charpos, itb.bytepos, frame_window_p, &itb); | ||
| 21155 | } | ||
| 21156 | |||
| 21157 | ptrdiff_t found; | ||
| 21158 | do { | ||
| 21159 | /* For the purposes of this function, the actual base direction of | ||
| 21160 | the paragraph doesn't matter, so just set it to L2R. */ | ||
| 21161 | bidi_paragraph_init (L2R, &itb, 0); | ||
| 21162 | while ((found = bidi_find_first_overridden (&itb)) < from_pos) | ||
| 21163 | ; | ||
| 21164 | } while (found == ZV && itb.ch == '\n' && itb.charpos < to_pos); | ||
| 21165 | |||
| 21166 | bidi_unshelve_cache (itb_data, 0); | ||
| 21167 | set_buffer_temp (old); | ||
| 21168 | |||
| 21169 | return (from_pos <= found && found < to_pos) ? make_number (found) : Qnil; | ||
| 21170 | } | ||
| 21171 | |||
| 21035 | DEFUN ("move-point-visually", Fmove_point_visually, | 21172 | DEFUN ("move-point-visually", Fmove_point_visually, |
| 21036 | Smove_point_visually, 1, 1, 0, | 21173 | Smove_point_visually, 1, 1, 0, |
| 21037 | doc: /* Move point in the visual order in the specified DIRECTION. | 21174 | doc: /* Move point in the visual order in the specified DIRECTION. |
| @@ -30461,6 +30598,7 @@ syms_of_xdisp (void) | |||
| 30461 | defsubr (&Scurrent_bidi_paragraph_direction); | 30598 | defsubr (&Scurrent_bidi_paragraph_direction); |
| 30462 | defsubr (&Swindow_text_pixel_size); | 30599 | defsubr (&Swindow_text_pixel_size); |
| 30463 | defsubr (&Smove_point_visually); | 30600 | defsubr (&Smove_point_visually); |
| 30601 | defsubr (&Sbidi_find_overridden_directionality); | ||
| 30464 | 30602 | ||
| 30465 | DEFSYM (Qmenu_bar_update_hook, "menu-bar-update-hook"); | 30603 | DEFSYM (Qmenu_bar_update_hook, "menu-bar-update-hook"); |
| 30466 | DEFSYM (Qoverriding_terminal_local_map, "overriding-terminal-local-map"); | 30604 | DEFSYM (Qoverriding_terminal_local_map, "overriding-terminal-local-map"); |