diff options
| author | Eli Zaretskii | 2014-10-18 14:07:44 +0300 |
|---|---|---|
| committer | Eli Zaretskii | 2014-10-18 14:07:44 +0300 |
| commit | b5e71861a3b15de7651be4524f38337aa451bfd7 (patch) | |
| tree | 5c7c23e50dcaee496d0960ce9a243361a35805a7 /src | |
| parent | 1c396384998d74133fe13efda416f7a0afa53405 (diff) | |
| download | emacs-b5e71861a3b15de7651be4524f38337aa451bfd7.tar.gz emacs-b5e71861a3b15de7651be4524f38337aa451bfd7.zip | |
Add support for canonically equivalent bracket characters.
Diffstat (limited to 'src')
| -rw-r--r-- | src/bidi.c | 59 |
1 files changed, 48 insertions, 11 deletions
diff --git a/src/bidi.c b/src/bidi.c index 8f996eb6539..59fade3f785 100644 --- a/src/bidi.c +++ b/src/bidi.c | |||
| @@ -2338,6 +2338,41 @@ typedef struct bpa_stack_entry { | |||
| 2338 | BPA stack, which should be more than enough for actual bidi text. */ | 2338 | BPA stack, which should be more than enough for actual bidi text. */ |
| 2339 | #define MAX_BPA_STACK (max (MAX_ALLOCA / sizeof (bpa_stack_entry), 1)) | 2339 | #define MAX_BPA_STACK (max (MAX_ALLOCA / sizeof (bpa_stack_entry), 1)) |
| 2340 | 2340 | ||
| 2341 | /* UAX#9 says to match opening brackets with the matching closing | ||
| 2342 | brackets or their canonical equivalents. As of Unicode 7.0, there | ||
| 2343 | are only 2 bracket characters that have canonical equivalence | ||
| 2344 | decompositions: u+2329 and u+232A. So instead of accessing the | ||
| 2345 | table in uni-decomposition.el, we just handle these 2 characters | ||
| 2346 | with this simple macro. Note that ASCII characters don't have | ||
| 2347 | canonical equivalents by definition. */ | ||
| 2348 | |||
| 2349 | /* To find all the characters that need to be processed by | ||
| 2350 | CANONICAL_EQU, first find all the characters which have | ||
| 2351 | decompositions in UnicodeData.txt, with this Awk script: | ||
| 2352 | |||
| 2353 | awk -F ";" " {if ($6 != \"\") print $1, $6}" UnicodeData.txt | ||
| 2354 | |||
| 2355 | Then produce a list of all the bracket characters in BidiBrackets.txt: | ||
| 2356 | |||
| 2357 | awk -F "[ ;]" " {if ($1 != \"#\" && $1 != \"\") print $1}" BidiBrackets.txt | ||
| 2358 | |||
| 2359 | And finally, cross-reference these two: | ||
| 2360 | |||
| 2361 | fgrep -w -f brackets.txt decompositions.txt | ||
| 2362 | |||
| 2363 | where "decompositions.txt" was produced by the 1st script, and | ||
| 2364 | "brackets.txt" by the 2nd script. In the output of fgrep, look | ||
| 2365 | only for decompositions that don't begin with some compatibility | ||
| 2366 | formatting tag, such as "<compat>". Only decompositions that | ||
| 2367 | consist solely of character codepoints are relevant to bidi | ||
| 2368 | brackets processing. */ | ||
| 2369 | |||
| 2370 | #define CANONICAL_EQU(c) \ | ||
| 2371 | ( ASCII_CHAR_P (c) ? c \ | ||
| 2372 | : (c) == 0x2329 ? 0x3008 \ | ||
| 2373 | : (c) == 0x232a ? 0x3009 \ | ||
| 2374 | : c ) | ||
| 2375 | |||
| 2341 | #ifdef ENABLE_CHECKING | 2376 | #ifdef ENABLE_CHECKING |
| 2342 | # define STORE_BRACKET_CHARPOS \ | 2377 | # define STORE_BRACKET_CHARPOS \ |
| 2343 | bpa_stack[bpa_sp].open_bracket_pos = bidi_it->charpos | 2378 | bpa_stack[bpa_sp].open_bracket_pos = bidi_it->charpos |
| @@ -2347,16 +2382,18 @@ typedef struct bpa_stack_entry { | |||
| 2347 | 2382 | ||
| 2348 | #define PUSH_BPA_STACK \ | 2383 | #define PUSH_BPA_STACK \ |
| 2349 | do { \ | 2384 | do { \ |
| 2350 | bpa_sp++; \ | 2385 | int ch; \ |
| 2351 | if (bpa_sp >= MAX_BPA_STACK) \ | 2386 | bpa_sp++; \ |
| 2352 | { \ | 2387 | if (bpa_sp >= MAX_BPA_STACK) \ |
| 2353 | bpa_sp = MAX_BPA_STACK - 1; \ | 2388 | { \ |
| 2354 | goto bpa_give_up; \ | 2389 | bpa_sp = MAX_BPA_STACK - 1; \ |
| 2355 | } \ | 2390 | goto bpa_give_up; \ |
| 2356 | bpa_stack[bpa_sp].close_bracket_char = bidi_mirror_char (bidi_it->ch); \ | 2391 | } \ |
| 2357 | bpa_stack[bpa_sp].open_bracket_idx = bidi_cache_last_idx; \ | 2392 | ch = CANONICAL_EQU (bidi_it->ch); \ |
| 2358 | bpa_stack[bpa_sp].flags = 0; \ | 2393 | bpa_stack[bpa_sp].close_bracket_char = bidi_mirror_char (ch); \ |
| 2359 | STORE_BRACKET_CHARPOS; \ | 2394 | bpa_stack[bpa_sp].open_bracket_idx = bidi_cache_last_idx; \ |
| 2395 | bpa_stack[bpa_sp].flags = 0; \ | ||
| 2396 | STORE_BRACKET_CHARPOS; \ | ||
| 2360 | } while (0) | 2397 | } while (0) |
| 2361 | 2398 | ||
| 2362 | 2399 | ||
| @@ -2416,7 +2453,7 @@ bidi_find_bracket_pairs (struct bidi_it *bidi_it) | |||
| 2416 | else if (btype == BIDI_BRACKET_CLOSE) | 2453 | else if (btype == BIDI_BRACKET_CLOSE) |
| 2417 | { | 2454 | { |
| 2418 | int sp = bpa_sp; | 2455 | int sp = bpa_sp; |
| 2419 | int curchar = bidi_it->ch; | 2456 | int curchar = CANONICAL_EQU (bidi_it->ch); |
| 2420 | 2457 | ||
| 2421 | eassert (sp >= 0); | 2458 | eassert (sp >= 0); |
| 2422 | while (sp >= 0 && bpa_stack[sp].close_bracket_char != curchar) | 2459 | while (sp >= 0 && bpa_stack[sp].close_bracket_char != curchar) |