aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorEli Zaretskii2014-10-18 14:07:44 +0300
committerEli Zaretskii2014-10-18 14:07:44 +0300
commitb5e71861a3b15de7651be4524f38337aa451bfd7 (patch)
tree5c7c23e50dcaee496d0960ce9a243361a35805a7 /src
parent1c396384998d74133fe13efda416f7a0afa53405 (diff)
downloademacs-b5e71861a3b15de7651be4524f38337aa451bfd7.tar.gz
emacs-b5e71861a3b15de7651be4524f38337aa451bfd7.zip
Add support for canonically equivalent bracket characters.
Diffstat (limited to 'src')
-rw-r--r--src/bidi.c59
1 files changed, 48 insertions, 11 deletions
diff --git a/src/bidi.c b/src/bidi.c
index 8f996eb6539..59fade3f785 100644
--- a/src/bidi.c
+++ b/src/bidi.c
@@ -2338,6 +2338,41 @@ typedef struct bpa_stack_entry {
2338 BPA stack, which should be more than enough for actual bidi text. */ 2338 BPA stack, which should be more than enough for actual bidi text. */
2339#define MAX_BPA_STACK (max (MAX_ALLOCA / sizeof (bpa_stack_entry), 1)) 2339#define MAX_BPA_STACK (max (MAX_ALLOCA / sizeof (bpa_stack_entry), 1))
2340 2340
2341/* UAX#9 says to match opening brackets with the matching closing
2342 brackets or their canonical equivalents. As of Unicode 7.0, there
2343 are only 2 bracket characters that have canonical equivalence
2344 decompositions: u+2329 and u+232A. So instead of accessing the
2345 table in uni-decomposition.el, we just handle these 2 characters
2346 with this simple macro. Note that ASCII characters don't have
2347 canonical equivalents by definition. */
2348
2349/* To find all the characters that need to be processed by
2350 CANONICAL_EQU, first find all the characters which have
2351 decompositions in UnicodeData.txt, with this Awk script:
2352
2353 awk -F ";" " {if ($6 != \"\") print $1, $6}" UnicodeData.txt
2354
2355 Then produce a list of all the bracket characters in BidiBrackets.txt:
2356
2357 awk -F "[ ;]" " {if ($1 != \"#\" && $1 != \"\") print $1}" BidiBrackets.txt
2358
2359 And finally, cross-reference these two:
2360
2361 fgrep -w -f brackets.txt decompositions.txt
2362
2363 where "decompositions.txt" was produced by the 1st script, and
2364 "brackets.txt" by the 2nd script. In the output of fgrep, look
2365 only for decompositions that don't begin with some compatibility
2366 formatting tag, such as "<compat>". Only decompositions that
2367 consist solely of character codepoints are relevant to bidi
2368 brackets processing. */
2369
2370#define CANONICAL_EQU(c) \
2371 ( ASCII_CHAR_P (c) ? c \
2372 : (c) == 0x2329 ? 0x3008 \
2373 : (c) == 0x232a ? 0x3009 \
2374 : c )
2375
2341#ifdef ENABLE_CHECKING 2376#ifdef ENABLE_CHECKING
2342# define STORE_BRACKET_CHARPOS \ 2377# define STORE_BRACKET_CHARPOS \
2343 bpa_stack[bpa_sp].open_bracket_pos = bidi_it->charpos 2378 bpa_stack[bpa_sp].open_bracket_pos = bidi_it->charpos
@@ -2347,16 +2382,18 @@ typedef struct bpa_stack_entry {
2347 2382
2348#define PUSH_BPA_STACK \ 2383#define PUSH_BPA_STACK \
2349 do { \ 2384 do { \
2350 bpa_sp++; \ 2385 int ch; \
2351 if (bpa_sp >= MAX_BPA_STACK) \ 2386 bpa_sp++; \
2352 { \ 2387 if (bpa_sp >= MAX_BPA_STACK) \
2353 bpa_sp = MAX_BPA_STACK - 1; \ 2388 { \
2354 goto bpa_give_up; \ 2389 bpa_sp = MAX_BPA_STACK - 1; \
2355 } \ 2390 goto bpa_give_up; \
2356 bpa_stack[bpa_sp].close_bracket_char = bidi_mirror_char (bidi_it->ch); \ 2391 } \
2357 bpa_stack[bpa_sp].open_bracket_idx = bidi_cache_last_idx; \ 2392 ch = CANONICAL_EQU (bidi_it->ch); \
2358 bpa_stack[bpa_sp].flags = 0; \ 2393 bpa_stack[bpa_sp].close_bracket_char = bidi_mirror_char (ch); \
2359 STORE_BRACKET_CHARPOS; \ 2394 bpa_stack[bpa_sp].open_bracket_idx = bidi_cache_last_idx; \
2395 bpa_stack[bpa_sp].flags = 0; \
2396 STORE_BRACKET_CHARPOS; \
2360 } while (0) 2397 } while (0)
2361 2398
2362 2399
@@ -2416,7 +2453,7 @@ bidi_find_bracket_pairs (struct bidi_it *bidi_it)
2416 else if (btype == BIDI_BRACKET_CLOSE) 2453 else if (btype == BIDI_BRACKET_CLOSE)
2417 { 2454 {
2418 int sp = bpa_sp; 2455 int sp = bpa_sp;
2419 int curchar = bidi_it->ch; 2456 int curchar = CANONICAL_EQU (bidi_it->ch);
2420 2457
2421 eassert (sp >= 0); 2458 eassert (sp >= 0);
2422 while (sp >= 0 && bpa_stack[sp].close_bracket_char != curchar) 2459 while (sp >= 0 && bpa_stack[sp].close_bracket_char != curchar)