aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEli Zaretskii2014-12-02 16:13:47 +0200
committerEli Zaretskii2014-12-02 16:13:47 +0200
commitdd601050e7db69f322eea09d99751d8e6363b153 (patch)
tree41457d53d1ea7c37e452c0c346177e6188bea0b2
parenta92789b1fccf7ae9a39ec1cc9316a18e01d905bb (diff)
downloademacs-dd601050e7db69f322eea09d99751d8e6363b153.tar.gz
emacs-dd601050e7db69f322eea09d99751d8e6363b153.zip
Allow to search for characters whose bidi directionality was overridden.
src/bidi.c (bidi_find_first_overridden): New function. src/xdisp.c (Fbidi_find_overridden_directionality): New function. (syms_of_xdisp): Defsubr it. src/dispextern.h (bidi_find_first_overridden): Add prototype. doc/lispref/display.texi (Bidirectional Display): Document 'bidi-find-overridden-directionality'. etc/NEWS: Mention 'bidi-find-overridden-directionality'.
-rw-r--r--doc/lispref/ChangeLog5
-rw-r--r--doc/lispref/display.texi54
-rw-r--r--etc/ChangeLog4
-rw-r--r--etc/NEWS7
-rw-r--r--src/ChangeLog9
-rw-r--r--src/bidi.c27
-rw-r--r--src/dispextern.h1
-rw-r--r--src/xdisp.c138
8 files changed, 245 insertions, 0 deletions
diff --git a/doc/lispref/ChangeLog b/doc/lispref/ChangeLog
index 31a9cbf04e4..f98e457566e 100644
--- a/doc/lispref/ChangeLog
+++ b/doc/lispref/ChangeLog
@@ -1,3 +1,8 @@
12014-12-02 Eli Zaretskii <eliz@gnu.org>
2
3 * display.texi (Bidirectional Display): Document
4 'bidi-find-overridden-directionality'.
5
12014-11-29 Paul Eggert <eggert@cs.ucla.edu> 62014-11-29 Paul Eggert <eggert@cs.ucla.edu>
2 7
3 Lessen focus on ChangeLog files, as opposed to change log entries. 8 Lessen focus on ChangeLog files, as opposed to change log entries.
diff --git a/doc/lispref/display.texi b/doc/lispref/display.texi
index 4cb06dd188f..59f73223a1f 100644
--- a/doc/lispref/display.texi
+++ b/doc/lispref/display.texi
@@ -6800,3 +6800,57 @@ affect all Emacs frames and windows.
6800appropriate mirrored character in the reordered text. Lisp programs 6800appropriate mirrored character in the reordered text. Lisp programs
6801can affect the mirrored display by changing this property. Again, any 6801can affect the mirrored display by changing this property. Again, any
6802such changes affect all of Emacs display. 6802such changes affect all of Emacs display.
6803
6804@cindex overriding bidirectional properties
6805@cindex directional overrides
6806@cindex LRO
6807@cindex RLO
6808 The bidirectional properties of characters can be overridden by
6809inserting into the text special directional control characters,
6810LEFT-TO-RIGHT OVERRIDE (@acronym{LRO}) and RIGHT-TO-LEFT OVERRIDE
6811(@acronym{RLO}). Any characters between a @acronym{RLO} and the
6812following newline or POP DIRECTIONAL FORMATTING (@acronym{PDF})
6813control character, whichever comes first, will be displayed as if they
6814were strong right-to-left characters, i.e.@: they will be reversed on
6815display. Similarly, any characters between @acronym{LRO} and
6816@acronym{PDF} or newline will display as if they were strong
6817left-to-right, and will @emph{not} be reversed even if they are strong
6818right-to-left characters.
6819
6820@cindex phishing using directional overrides
6821@cindex malicious use of directional overrides
6822 These overrides are useful when you want to make some text
6823unaffected by the reordering algorithm, and instead directly control
6824the display order. But they can also be used for malicious purposes,
6825known as @dfn{phishing}. Specifically, a URL on a Web page or a link
6826in an email message can be manipulated to make its visual appearance
6827unrecognizable, or similar to some popular benign location, while the
6828real location, interpreted by a browser in the logical order, is very
6829different.
6830
6831 Emacs provides a primitive that applications can use to detect
6832instances of text whose bidirectional properties were overridden so as
6833to make a left-to-right character display as if it were a
6834right-to-left character, or vise versa.
6835
6836@defun bidi-find-overridden-directionality from to &optional object
6837This function looks at the text of the specified @var{object} between
6838positions @var{from} (inclusive) and @var{to} (exclusive), and returns
6839the first position where it finds a strong left-to-right character
6840whose directional properties were forced to display the character as
6841right-to-left, or for a strong right-to-left character that was forced
6842to display as left-to-right. If it finds no such characters in the
6843specified region of text, it returns @code{nil}.
6844
6845The optional argument @var{object} specifies which text to search, and
6846defaults to the current buffer. If @var{object} is non-@code{nil}, it
6847can be some other buffer, or it can be a string or a window. If it is
6848a string, the function searches that string. If it is a window, the
6849function searches the buffer displayed in that window. If a buffer
6850whose text you want to examine is displayed in some window, we
6851recommend to specify it by that window, rather than pass the buffer to
6852the function. This is because telling the function about the window
6853allows it to correctly account for window-specific overlays, which
6854might change the result of the function if some text in the buffer is
6855covered by overlays.
6856@end defun
diff --git a/etc/ChangeLog b/etc/ChangeLog
index 09dfd7f2e35..4f672dfce5e 100644
--- a/etc/ChangeLog
+++ b/etc/ChangeLog
@@ -1,3 +1,7 @@
12014-12-02 Eli Zaretskii <eliz@gnu.org>
2
3 * NEWS: Mention 'bidi-find-overridden-directionality'.
4
12014-11-29 Paul Eggert <eggert@cs.ucla.edu> 52014-11-29 Paul Eggert <eggert@cs.ucla.edu>
2 6
3 Lessen focus on ChangeLog files, as opposed to change log entries. 7 Lessen focus on ChangeLog files, as opposed to change log entries.
diff --git a/etc/NEWS b/etc/NEWS
index 6c636cf3095..bb016ee8944 100644
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -98,6 +98,13 @@ environment. For the time being this is implemented for modern POSIX
98systems and for MS-Windows, for other systems they fall back to their 98systems and for MS-Windows, for other systems they fall back to their
99counterparts `string-lessp' and `string-equal'. 99counterparts `string-lessp' and `string-equal'.
100 100
101+++
102** The new function `bidi-find-overridden-directionality' allows to
103find characters whose directionality was, perhaps maliciously,
104overridden by directional override control characters. Lisp programs
105can use this to detect potential phishing of URLs and other links that
106exploits bidirectional display reordering.
107
101*** The ls-lisp package uses `string-collate-lessp' to sort file names. 108*** The ls-lisp package uses `string-collate-lessp' to sort file names.
102If you want the old, locale-independent sorting, customize the new 109If you want the old, locale-independent sorting, customize the new
103option `ls-lisp-use-string-collate' to a nil value. 110option `ls-lisp-use-string-collate' to a nil value.
diff --git a/src/ChangeLog b/src/ChangeLog
index 5c3376562d3..7dc2b928f1e 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,12 @@
12014-12-02 Eli Zaretskii <eliz@gnu.org>
2
3 * bidi.c (bidi_find_first_overridden): New function.
4
5 * xdisp.c (Fbidi_find_overridden_directionality): New function.
6 (syms_of_xdisp): Defsubr it.
7
8 * dispextern.h (bidi_find_first_overridden): Add prototype.
9
12014-12-02 Jan Djärv <jan.h.d@swipnet.se> 102014-12-02 Jan Djärv <jan.h.d@swipnet.se>
2 11
3 * nsimage.m (initFromSkipXBM:width:height:flip:length:): Set bmRep 12 * nsimage.m (initFromSkipXBM:width:height:flip:length:): Set bmRep
diff --git a/src/bidi.c b/src/bidi.c
index 225acd9d655..a0bcf528e12 100644
--- a/src/bidi.c
+++ b/src/bidi.c
@@ -3376,6 +3376,33 @@ bidi_move_to_visually_next (struct bidi_it *bidi_it)
3376 UNGCPRO; 3376 UNGCPRO;
3377} 3377}
3378 3378
3379/* Utility function for looking for strong directional characters
3380 whose bidi type was overridden by a directional override. */
3381ptrdiff_t
3382bidi_find_first_overridden (struct bidi_it *bidi_it)
3383{
3384 ptrdiff_t found_pos = ZV;
3385
3386 do
3387 {
3388 /* Need to call bidi_resolve_weak, not bidi_resolve_explicit,
3389 because the directional overrides are applied by the
3390 former. */
3391 bidi_type_t type = bidi_resolve_weak (bidi_it);
3392
3393 if ((type == STRONG_R && bidi_it->orig_type == STRONG_L)
3394 || (type == STRONG_L
3395 && (bidi_it->orig_type == STRONG_R
3396 || bidi_it->orig_type == STRONG_AL)))
3397 found_pos = bidi_it->charpos;
3398 } while (found_pos == ZV
3399 && bidi_it->charpos < ZV
3400 && bidi_it->ch != BIDI_EOB
3401 && bidi_it->ch != '\n');
3402
3403 return found_pos;
3404}
3405
3379/* This is meant to be called from within the debugger, whenever you 3406/* This is meant to be called from within the debugger, whenever you
3380 wish to examine the cache contents. */ 3407 wish to examine the cache contents. */
3381void bidi_dump_cached_states (void) EXTERNALLY_VISIBLE; 3408void bidi_dump_cached_states (void) EXTERNALLY_VISIBLE;
diff --git a/src/dispextern.h b/src/dispextern.h
index 0dd0887c7e6..0ee5fd62f7d 100644
--- a/src/dispextern.h
+++ b/src/dispextern.h
@@ -3173,6 +3173,7 @@ extern void bidi_push_it (struct bidi_it *);
3173extern void bidi_pop_it (struct bidi_it *); 3173extern void bidi_pop_it (struct bidi_it *);
3174extern void *bidi_shelve_cache (void); 3174extern void *bidi_shelve_cache (void);
3175extern void bidi_unshelve_cache (void *, bool); 3175extern void bidi_unshelve_cache (void *, bool);
3176extern ptrdiff_t bidi_find_first_overridden (struct bidi_it *);
3176 3177
3177/* Defined in xdisp.c */ 3178/* Defined in xdisp.c */
3178 3179
diff --git a/src/xdisp.c b/src/xdisp.c
index 989cbd10d81..0d314688c26 100644
--- a/src/xdisp.c
+++ b/src/xdisp.c
@@ -21032,6 +21032,143 @@ See also `bidi-paragraph-direction'. */)
21032 } 21032 }
21033} 21033}
21034 21034
21035DEFUN ("bidi-find-overridden-directionality",
21036 Fbidi_find_overridden_directionality,
21037 Sbidi_find_overridden_directionality, 2, 3, 0,
21038 doc: /* Return position between FROM and TO where directionality was overridden.
21039
21040This function returns the first character position in the specified
21041region of OBJECT where there is a character whose `bidi-class' property
21042is `L', but which was forced to display as `R' by a directional
21043override, and likewise with characters whose `bidi-class' is `R'
21044or `AL' that were forced to display as `L'.
21045
21046If no such character is found, the function returns nil.
21047
21048OBJECT is a Lisp string or buffer to search for overridden
21049directionality, and defaults to the current buffer if nil or omitted.
21050OBJECT can also be a window, in which case the function will search
21051the buffer displayed in that window. Passing the window instead of
21052a buffer is preferable when the buffer is displayed in some window,
21053because this function will then be able to correctly account for
21054window-specific overlays, which can affect the results.
21055
21056Strong directional characters `L', `R', and `AL' can have their
21057intrinsic directionality overridden by directional override
21058control characters RLO \(u+202e) and LRO \(u+202d). See the
21059function `get-char-code-property' for a way to inquire about
21060the `bidi-class' property of a character. */)
21061 (Lisp_Object from, Lisp_Object to, Lisp_Object object)
21062{
21063 struct buffer *buf = current_buffer;
21064 struct buffer *old = buf;
21065 struct window *w = NULL;
21066 bool frame_window_p = FRAME_WINDOW_P (SELECTED_FRAME ());
21067 struct bidi_it itb;
21068 ptrdiff_t from_pos, to_pos, from_bpos;
21069 void *itb_data;
21070
21071 if (!NILP (object))
21072 {
21073 if (BUFFERP (object))
21074 buf = XBUFFER (object);
21075 else if (WINDOWP (object))
21076 {
21077 w = decode_live_window (object);
21078 buf = XBUFFER (w->contents);
21079 frame_window_p = FRAME_WINDOW_P (XFRAME (w->frame));
21080 }
21081 else
21082 CHECK_STRING (object);
21083 }
21084
21085 if (STRINGP (object))
21086 {
21087 /* Characters in unibyte strings are always treated by bidi.c as
21088 strong LTR. */
21089 if (!STRING_MULTIBYTE (object)
21090 /* When we are loading loadup.el, the character property
21091 tables needed for bidi iteration are not yet
21092 available. */
21093 || !NILP (Vpurify_flag))
21094 return Qnil;
21095
21096 validate_subarray (object, from, to, SCHARS (object), &from_pos, &to_pos);
21097 if (from_pos >= SCHARS (object))
21098 return Qnil;
21099
21100 /* Set up the bidi iterator. */
21101 itb_data = bidi_shelve_cache ();
21102 itb.paragraph_dir = NEUTRAL_DIR;
21103 itb.string.lstring = object;
21104 itb.string.s = NULL;
21105 itb.string.schars = SCHARS (object);
21106 itb.string.bufpos = 0;
21107 itb.string.from_disp_str = 0;
21108 itb.string.unibyte = 0;
21109 itb.w = w;
21110 bidi_init_it (0, 0, frame_window_p, &itb);
21111 }
21112 else
21113 {
21114 /* Nothing this fancy can happen in unibyte buffers, or in a
21115 buffer that disabled reordering, or if FROM is at EOB. */
21116 if (NILP (BVAR (buf, bidi_display_reordering))
21117 || NILP (BVAR (buf, enable_multibyte_characters))
21118 /* When we are loading loadup.el, the character property
21119 tables needed for bidi iteration are not yet
21120 available. */
21121 || !NILP (Vpurify_flag))
21122 return Qnil;
21123
21124 set_buffer_temp (buf);
21125 validate_region (&from, &to);
21126 from_pos = XINT (from);
21127 to_pos = XINT (to);
21128 if (from_pos >= ZV)
21129 return Qnil;
21130
21131 /* Set up the bidi iterator. */
21132 itb_data = bidi_shelve_cache ();
21133 from_bpos = CHAR_TO_BYTE (from_pos);
21134 if (from_pos == BEGV)
21135 {
21136 itb.charpos = BEGV;
21137 itb.bytepos = BEGV_BYTE;
21138 }
21139 else if (FETCH_CHAR (from_bpos - 1) == '\n')
21140 {
21141 itb.charpos = from_pos;
21142 itb.bytepos = from_bpos;
21143 }
21144 else
21145 itb.charpos = find_newline_no_quit (from_pos, CHAR_TO_BYTE (from_pos),
21146 -1, &itb.bytepos);
21147 itb.paragraph_dir = NEUTRAL_DIR;
21148 itb.string.s = NULL;
21149 itb.string.lstring = Qnil;
21150 itb.string.bufpos = 0;
21151 itb.string.from_disp_str = 0;
21152 itb.string.unibyte = 0;
21153 itb.w = w;
21154 bidi_init_it (itb.charpos, itb.bytepos, frame_window_p, &itb);
21155 }
21156
21157 ptrdiff_t found;
21158 do {
21159 /* For the purposes of this function, the actual base direction of
21160 the paragraph doesn't matter, so just set it to L2R. */
21161 bidi_paragraph_init (L2R, &itb, 0);
21162 while ((found = bidi_find_first_overridden (&itb)) < from_pos)
21163 ;
21164 } while (found == ZV && itb.ch == '\n' && itb.charpos < to_pos);
21165
21166 bidi_unshelve_cache (itb_data, 0);
21167 set_buffer_temp (old);
21168
21169 return (from_pos <= found && found < to_pos) ? make_number (found) : Qnil;
21170}
21171
21035DEFUN ("move-point-visually", Fmove_point_visually, 21172DEFUN ("move-point-visually", Fmove_point_visually,
21036 Smove_point_visually, 1, 1, 0, 21173 Smove_point_visually, 1, 1, 0,
21037 doc: /* Move point in the visual order in the specified DIRECTION. 21174 doc: /* Move point in the visual order in the specified DIRECTION.
@@ -30461,6 +30598,7 @@ syms_of_xdisp (void)
30461 defsubr (&Scurrent_bidi_paragraph_direction); 30598 defsubr (&Scurrent_bidi_paragraph_direction);
30462 defsubr (&Swindow_text_pixel_size); 30599 defsubr (&Swindow_text_pixel_size);
30463 defsubr (&Smove_point_visually); 30600 defsubr (&Smove_point_visually);
30601 defsubr (&Sbidi_find_overridden_directionality);
30464 30602
30465 DEFSYM (Qmenu_bar_update_hook, "menu-bar-update-hook"); 30603 DEFSYM (Qmenu_bar_update_hook, "menu-bar-update-hook");
30466 DEFSYM (Qoverriding_terminal_local_map, "overriding-terminal-local-map"); 30604 DEFSYM (Qoverriding_terminal_local_map, "overriding-terminal-local-map");