aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKenichi Handa2004-03-03 23:51:17 +0000
committerKenichi Handa2004-03-03 23:51:17 +0000
commitde27e296e7e0b1bb5ffa30a87e6dfab44bd90d7d (patch)
tree1bdf6f229488055dd0402e9b9bfbd4bbfcea8344
parent4ca81955be9b7fed99f1300f8a96102b3b2b5dc5 (diff)
downloademacs-de27e296e7e0b1bb5ffa30a87e6dfab44bd90d7d.tar.gz
emacs-de27e296e7e0b1bb5ffa30a87e6dfab44bd90d7d.zip
New file.
-rw-r--r--src/bidi.c2102
1 files changed, 2102 insertions, 0 deletions
diff --git a/src/bidi.c b/src/bidi.c
new file mode 100644
index 00000000000..c07312639a7
--- /dev/null
+++ b/src/bidi.c
@@ -0,0 +1,2102 @@
1/* A sequential implementation of the Unicode Bidirectional algorithm,
2 as per UAX#9.
3
4 Unlike the reference and most other implementations, this one is
5 designed to be called once for every character in the buffer.
6
7 The main entry point is bidi_get_next_char_visually. Each time it
8 is called, it finds the next character in the visual order, and
9 returns its information in a special structure. The caller is then
10 expected to process this character for display or any other
11 purposes, and call bidi_get_next_char_visually for the next
12 character.
13
14 A note about references to UAX#9 rules: if the reference says
15 something like "X9/Retaining", it means that you need to refer to
16 rule X9 and to its modifications decribed in the "Implementation
17 Notes" section of UAX#9, under "Retaining Format Codes". */
18
19#ifdef HAVE_CONFIG_H
20#include <config.h>
21#endif
22
23#ifdef HAVE_STRING_H
24#include <string.h>
25#endif
26
27#include "lisp.h"
28#include "buffer.h"
29#include "character.h"
30#include "dispextern.h"
31
32static int bidi_initialized = 0;
33
34static Lisp_Object bidi_type_table;
35
36#define LRM_CHAR 0x200E
37#define RLM_CHAR 0x200F
38#define LRE_CHAR 0x202A
39#define RLE_CHAR 0x202B
40#define PDF_CHAR 0x202C
41#define LRO_CHAR 0x202D
42#define RLO_CHAR 0x202E
43
44#define CHARSET_HEBREW 0x88
45#define CHARSET_ARABIC 0x87
46#define CHARSET_SYRIAC -1 /* these are undefined yet, -1 is invalid */
47#define CHARSET_THAANA -1
48
49/* FIXME: need to define wrappers for FETCH_CHAR etc. that return
50 BIDI_EOB when they hit ZV. */
51#define BIDI_EOB -1
52#define BIDI_BOB -2
53
54#ifdef TEST_STANDALONE
55/* Testing. */
56
57#include <stdio.h>
58
59static unsigned char *input_buf;
60static size_t input_buf_size;
61
62int _fetch_multibyte_char_len, _c_c_;
63
64#undef FETCH_CHAR_ADVANCE
65#define FETCH_CHAR_ADVANCE(ch, cp, bp) \
66 do { \
67 ch = input_buf[cp]; \
68 (cp)++; \
69 (bp)++; \
70 if (ch == '\0') \
71 ch = BIDI_EOB; \
72 } while (0)
73
74#undef FETCH_CHAR
75#define FETCH_CHAR(n) ((_c_c_ = input_buf[n]) ? _c_c_ : BIDI_EOB)
76
77#undef CHAR_CHARSET
78#define CHAR_CHARSET(c) \
79 (((c) >= 128 || ((c) < 8 && (c)) || ((c) >= 'A' && (c) < 'X')) \
80 ? CHARSET_HEBREW \
81 : ((((c) >= 'X' && (c) <= 'Z') || ((c) >= '6' && (c) <= '9')) \
82 ? CHARSET_ARABIC \
83 : CHARSET_ASCII))
84
85#undef CHAR_TO_BYTE
86#define CHAR_TO_BYTE(pos) (pos)
87
88#define char_bytes(ch) 1
89
90#undef LRE_CHAR
91#undef LRO_CHAR
92#undef RLE_CHAR
93#undef RLO_CHAR
94#undef PDF_CHAR
95#undef RLM_CHAR
96#undef LRM_CHAR
97
98#define LRE_CHAR 1
99#define LRO_CHAR 2
100#define RLE_CHAR 3
101#define RLO_CHAR 4
102#define PDF_CHAR 5
103#define RLM_CHAR 6
104#define LRM_CHAR 7
105
106static const char *bidi_name[] =
107 {
108 "[???]", "[LRE]", "[LRO]", "[RLE]", "[RLO]", "[PDF]", "[RLM]", "[LRM]"
109 };
110
111#endif /* TEST_STANDALONE */
112
113/* Data structures. */
114
115/* What we need to know about the current paragraph. */
116struct bidi_paragraph_info {
117 int start_bytepos; /* byte position where it begins */
118 int end_bytepos; /* byte position where it ends */
119 int embedding_level; /* its basic embedding level */
120 bidi_dir_t base_dir; /* its base direction */
121};
122
123/* Data type for describing the bidirectional character categories. */
124typedef enum {
125 UNKNOWN_BC,
126 NEUTRAL,
127 WEAK,
128 STRONG
129} bidi_category_t;
130
131int bidi_ignore_explicit_marks_for_paragraph_level = 1;
132
133bidi_dir_t bidi_overriding_paragraph_direction = NEUTRAL_DIR;
134
135#define ASCII_BIDI_TYPE_SET(STR, TYPE) \
136 do { \
137 unsigned char *p; \
138 for (p = (STR); *p; p++) \
139 CHAR_TABLE_SET (bidi_type_table, *p, (TYPE)); \
140 } while (0)
141
142static void
143bidi_initialize ()
144{
145 struct {
146 int from, to;
147 bidi_type_t type;
148 } bidi_type[] =
149 { { 0x0000, 0x0008, WEAK_BN },
150 { 0x0009, 0x0000, NEUTRAL_S },
151 { 0x000A, 0x0000, NEUTRAL_B },
152 { 0x000B, 0x0000, NEUTRAL_S },
153 { 0x000C, 0x0000, NEUTRAL_WS },
154 { 0x000D, 0x0000, NEUTRAL_B },
155 { 0x000E, 0x001B, WEAK_BN },
156 { 0x001C, 0x001E, NEUTRAL_B },
157 { 0x001F, 0x0000, NEUTRAL_S },
158 { 0x0020, 0x0000, NEUTRAL_WS },
159 { 0x0021, 0x0022, NEUTRAL_ON },
160 { 0x0023, 0x0025, WEAK_ET },
161 { 0x0026, 0x002A, NEUTRAL_ON },
162 { 0x002B, 0x0000, WEAK_ET },
163 { 0x002C, 0x0000, WEAK_CS },
164 { 0x002D, 0x0000, WEAK_ET },
165 { 0x002E, 0x0000, WEAK_CS },
166 { 0x002F, 0x0000, WEAK_ES },
167 { 0x0030, 0x0039, WEAK_EN },
168 { 0x003A, 0x0000, WEAK_CS },
169 { 0x003B, 0x0040, NEUTRAL_ON },
170 { 0x005B, 0x0060, NEUTRAL_ON },
171 { 0x007B, 0x007E, NEUTRAL_ON },
172 { 0x007F, 0x0084, WEAK_BN },
173 { 0x0085, 0x0000, NEUTRAL_B },
174 { 0x0086, 0x009F, WEAK_BN },
175 { 0x00A0, 0x0000, WEAK_CS },
176 { 0x00A1, 0x0000, NEUTRAL_ON },
177 { 0x00A2, 0x00A5, WEAK_ET },
178 { 0x00A6, 0x00A9, NEUTRAL_ON },
179 { 0x00AB, 0x00AF, NEUTRAL_ON },
180 { 0x00B0, 0x00B1, WEAK_ET },
181 { 0x00B2, 0x00B3, WEAK_EN },
182 { 0x00B4, 0x0000, NEUTRAL_ON },
183 { 0x00B6, 0x00B8, NEUTRAL_ON },
184 { 0x00B9, 0x0000, WEAK_EN },
185 { 0x00BB, 0x00BF, NEUTRAL_ON },
186 { 0x00D7, 0x0000, NEUTRAL_ON },
187 { 0x00F7, 0x0000, NEUTRAL_ON },
188 { 0x02B9, 0x02BA, NEUTRAL_ON },
189 { 0x02C2, 0x02CF, NEUTRAL_ON },
190 { 0x02D2, 0x02DF, NEUTRAL_ON },
191 { 0x02E5, 0x02ED, NEUTRAL_ON },
192 { 0x0300, 0x036F, WEAK_NSM },
193 { 0x0374, 0x0375, NEUTRAL_ON },
194 { 0x037E, 0x0385, NEUTRAL_ON },
195 { 0x0387, 0x0000, NEUTRAL_ON },
196 { 0x03F6, 0x0000, NEUTRAL_ON },
197 { 0x0483, 0x0489, WEAK_NSM },
198 { 0x058A, 0x0000, NEUTRAL_ON },
199 { 0x0591, 0x05BD, WEAK_NSM },
200 { 0x05BE, 0x0000, STRONG_R },
201 { 0x05BF, 0x0000, WEAK_NSM },
202 { 0x05C0, 0x0000, STRONG_R },
203 { 0x05C1, 0x05C2, WEAK_NSM },
204 { 0x05C3, 0x0000, STRONG_R },
205 { 0x05C4, 0x0000, WEAK_NSM },
206 { 0x05D0, 0x05F4, STRONG_R },
207 { 0x060C, 0x0000, WEAK_CS },
208 { 0x061B, 0x064A, STRONG_AL },
209 { 0x064B, 0x0655, WEAK_NSM },
210 { 0x0660, 0x0669, WEAK_AN },
211 { 0x066A, 0x0000, WEAK_ET },
212 { 0x066B, 0x066C, WEAK_AN },
213 { 0x066D, 0x066F, STRONG_AL },
214 { 0x0670, 0x0000, WEAK_NSM },
215 { 0x0671, 0x06D5, STRONG_AL },
216 { 0x06D6, 0x06DC, WEAK_NSM },
217 { 0x06DD, 0x0000, STRONG_AL },
218 { 0x06DE, 0x06E4, WEAK_NSM },
219 { 0x06E5, 0x06E6, STRONG_AL },
220 { 0x06E7, 0x06E8, WEAK_NSM },
221 { 0x06E9, 0x0000, NEUTRAL_ON },
222 { 0x06EA, 0x06ED, WEAK_NSM },
223 { 0x06F0, 0x06F9, WEAK_EN },
224 { 0x06FA, 0x070D, STRONG_AL },
225 { 0x070F, 0x0000, WEAK_BN },
226 { 0x0710, 0x0000, STRONG_AL },
227 { 0x0711, 0x0000, WEAK_NSM },
228 { 0x0712, 0x072C, STRONG_AL },
229 { 0x0730, 0x074A, WEAK_NSM },
230 { 0x0780, 0x07A5, STRONG_AL },
231 { 0x07A6, 0x07B0, WEAK_NSM },
232 { 0x07B1, 0x0000, STRONG_AL },
233 { 0x0901, 0x0902, WEAK_NSM },
234 { 0x093C, 0x0000, WEAK_NSM },
235 { 0x0941, 0x0948, WEAK_NSM },
236 { 0x094D, 0x0000, WEAK_NSM },
237 { 0x0951, 0x0954, WEAK_NSM },
238 { 0x0962, 0x0963, WEAK_NSM },
239 { 0x0981, 0x0000, WEAK_NSM },
240 { 0x09BC, 0x0000, WEAK_NSM },
241 { 0x09C1, 0x09C4, WEAK_NSM },
242 { 0x09CD, 0x0000, WEAK_NSM },
243 { 0x09E2, 0x09E3, WEAK_NSM },
244 { 0x09F2, 0x09F3, WEAK_ET },
245 { 0x0A02, 0x0000, WEAK_NSM },
246 { 0x0A3C, 0x0000, WEAK_NSM },
247 { 0x0A41, 0x0A4D, WEAK_NSM },
248 { 0x0A70, 0x0A71, WEAK_NSM },
249 { 0x0A81, 0x0A82, WEAK_NSM },
250 { 0x0ABC, 0x0000, WEAK_NSM },
251 { 0x0AC1, 0x0AC8, WEAK_NSM },
252 { 0x0ACD, 0x0000, WEAK_NSM },
253 { 0x0B01, 0x0000, WEAK_NSM },
254 { 0x0B3C, 0x0000, WEAK_NSM },
255 { 0x0B3F, 0x0000, WEAK_NSM },
256 { 0x0B41, 0x0B43, WEAK_NSM },
257 { 0x0B4D, 0x0B56, WEAK_NSM },
258 { 0x0B82, 0x0000, WEAK_NSM },
259 { 0x0BC0, 0x0000, WEAK_NSM },
260 { 0x0BCD, 0x0000, WEAK_NSM },
261 { 0x0C3E, 0x0C40, WEAK_NSM },
262 { 0x0C46, 0x0C56, WEAK_NSM },
263 { 0x0CBF, 0x0000, WEAK_NSM },
264 { 0x0CC6, 0x0000, WEAK_NSM },
265 { 0x0CCC, 0x0CCD, WEAK_NSM },
266 { 0x0D41, 0x0D43, WEAK_NSM },
267 { 0x0D4D, 0x0000, WEAK_NSM },
268 { 0x0DCA, 0x0000, WEAK_NSM },
269 { 0x0DD2, 0x0DD6, WEAK_NSM },
270 { 0x0E31, 0x0000, WEAK_NSM },
271 { 0x0E34, 0x0E3A, WEAK_NSM },
272 { 0x0E3F, 0x0000, WEAK_ET },
273 { 0x0E47, 0x0E4E, WEAK_NSM },
274 { 0x0EB1, 0x0000, WEAK_NSM },
275 { 0x0EB4, 0x0EBC, WEAK_NSM },
276 { 0x0EC8, 0x0ECD, WEAK_NSM },
277 { 0x0F18, 0x0F19, WEAK_NSM },
278 { 0x0F35, 0x0000, WEAK_NSM },
279 { 0x0F37, 0x0000, WEAK_NSM },
280 { 0x0F39, 0x0000, WEAK_NSM },
281 { 0x0F3A, 0x0F3D, NEUTRAL_ON },
282 { 0x0F71, 0x0F7E, WEAK_NSM },
283 { 0x0F80, 0x0F84, WEAK_NSM },
284 { 0x0F86, 0x0F87, WEAK_NSM },
285 { 0x0F90, 0x0FBC, WEAK_NSM },
286 { 0x0FC6, 0x0000, WEAK_NSM },
287 { 0x102D, 0x1030, WEAK_NSM },
288 { 0x1032, 0x1037, WEAK_NSM },
289 { 0x1039, 0x0000, WEAK_NSM },
290 { 0x1058, 0x1059, WEAK_NSM },
291 { 0x1680, 0x0000, NEUTRAL_WS },
292 { 0x169B, 0x169C, NEUTRAL_ON },
293 { 0x1712, 0x1714, WEAK_NSM },
294 { 0x1732, 0x1734, WEAK_NSM },
295 { 0x1752, 0x1753, WEAK_NSM },
296 { 0x1772, 0x1773, WEAK_NSM },
297 { 0x17B7, 0x17BD, WEAK_NSM },
298 { 0x17C6, 0x0000, WEAK_NSM },
299 { 0x17C9, 0x17D3, WEAK_NSM },
300 { 0x17DB, 0x0000, WEAK_ET },
301 { 0x1800, 0x180A, NEUTRAL_ON },
302 { 0x180B, 0x180D, WEAK_NSM },
303 { 0x180E, 0x0000, WEAK_BN },
304 { 0x18A9, 0x0000, WEAK_NSM },
305 { 0x1FBD, 0x0000, NEUTRAL_ON },
306 { 0x1FBF, 0x1FC1, NEUTRAL_ON },
307 { 0x1FCD, 0x1FCF, NEUTRAL_ON },
308 { 0x1FDD, 0x1FDF, NEUTRAL_ON },
309 { 0x1FED, 0x1FEF, NEUTRAL_ON },
310 { 0x1FFD, 0x1FFE, NEUTRAL_ON },
311 { 0x2000, 0x200A, NEUTRAL_WS },
312 { 0x200B, 0x200D, WEAK_BN },
313 { 0x200F, 0x0000, STRONG_R },
314 { 0x2010, 0x2027, NEUTRAL_ON },
315 { 0x2028, 0x0000, NEUTRAL_WS },
316 { 0x2029, 0x0000, NEUTRAL_B },
317 { 0x202A, 0x0000, LRE },
318 { 0x202B, 0x0000, RLE },
319 { 0x202C, 0x0000, PDF },
320 { 0x202D, 0x0000, LRO },
321 { 0x202E, 0x0000, RLO },
322 { 0x202F, 0x0000, NEUTRAL_WS },
323 { 0x2030, 0x2034, WEAK_ET },
324 { 0x2035, 0x2057, NEUTRAL_ON },
325 { 0x205F, 0x0000, NEUTRAL_WS },
326 { 0x2060, 0x206F, WEAK_BN },
327 { 0x2070, 0x0000, WEAK_EN },
328 { 0x2074, 0x2079, WEAK_EN },
329 { 0x207A, 0x207B, WEAK_ET },
330 { 0x207C, 0x207E, NEUTRAL_ON },
331 { 0x2080, 0x2089, WEAK_EN },
332 { 0x208A, 0x208B, WEAK_ET },
333 { 0x208C, 0x208E, NEUTRAL_ON },
334 { 0x20A0, 0x20B1, WEAK_ET },
335 { 0x20D0, 0x20EA, WEAK_NSM },
336 { 0x2100, 0x2101, NEUTRAL_ON },
337 { 0x2103, 0x2106, NEUTRAL_ON },
338 { 0x2108, 0x2109, NEUTRAL_ON },
339 { 0x2114, 0x0000, NEUTRAL_ON },
340 { 0x2116, 0x2118, NEUTRAL_ON },
341 { 0x211E, 0x2123, NEUTRAL_ON },
342 { 0x2125, 0x0000, NEUTRAL_ON },
343 { 0x2127, 0x0000, NEUTRAL_ON },
344 { 0x2129, 0x0000, NEUTRAL_ON },
345 { 0x212E, 0x0000, WEAK_ET },
346 { 0x2132, 0x0000, NEUTRAL_ON },
347 { 0x213A, 0x0000, NEUTRAL_ON },
348 { 0x2140, 0x2144, NEUTRAL_ON },
349 { 0x214A, 0x215F, NEUTRAL_ON },
350 { 0x2190, 0x2211, NEUTRAL_ON },
351 { 0x2212, 0x2213, WEAK_ET },
352 { 0x2214, 0x2335, NEUTRAL_ON },
353 { 0x237B, 0x2394, NEUTRAL_ON },
354 { 0x2396, 0x244A, NEUTRAL_ON },
355 { 0x2460, 0x249B, WEAK_EN },
356 { 0x24EA, 0x0000, WEAK_EN },
357 { 0x24EB, 0x2FFB, NEUTRAL_ON },
358 { 0x3000, 0x0000, NEUTRAL_WS },
359 { 0x3001, 0x3004, NEUTRAL_ON },
360 { 0x3008, 0x3020, NEUTRAL_ON },
361 { 0x302A, 0x302F, WEAK_NSM },
362 { 0x3030, 0x0000, NEUTRAL_ON },
363 { 0x3036, 0x3037, NEUTRAL_ON },
364 { 0x303D, 0x303F, NEUTRAL_ON },
365 { 0x3099, 0x309A, WEAK_NSM },
366 { 0x309B, 0x309C, NEUTRAL_ON },
367 { 0x30A0, 0x0000, NEUTRAL_ON },
368 { 0x30FB, 0x0000, NEUTRAL_ON },
369 { 0x3251, 0x325F, NEUTRAL_ON },
370 { 0x32B1, 0x32BF, NEUTRAL_ON },
371 { 0xA490, 0xA4C6, NEUTRAL_ON },
372 { 0xFB1D, 0x0000, STRONG_R },
373 { 0xFB1E, 0x0000, WEAK_NSM },
374 { 0xFB1F, 0xFB28, STRONG_R },
375 { 0xFB29, 0x0000, WEAK_ET },
376 { 0xFB2A, 0xFB4F, STRONG_R },
377 { 0xFB50, 0xFD3D, STRONG_AL },
378 { 0xFD3E, 0xFD3F, NEUTRAL_ON },
379 { 0xFD50, 0xFDFC, STRONG_AL },
380 { 0xFE00, 0xFE23, WEAK_NSM },
381 { 0xFE30, 0xFE4F, NEUTRAL_ON },
382 { 0xFE50, 0x0000, WEAK_CS },
383 { 0xFE51, 0x0000, NEUTRAL_ON },
384 { 0xFE52, 0x0000, WEAK_CS },
385 { 0xFE54, 0x0000, NEUTRAL_ON },
386 { 0xFE55, 0x0000, WEAK_CS },
387 { 0xFE56, 0xFE5E, NEUTRAL_ON },
388 { 0xFE5F, 0x0000, WEAK_ET },
389 { 0xFE60, 0xFE61, NEUTRAL_ON },
390 { 0xFE62, 0xFE63, WEAK_ET },
391 { 0xFE64, 0xFE68, NEUTRAL_ON },
392 { 0xFE69, 0xFE6A, WEAK_ET },
393 { 0xFE6B, 0x0000, NEUTRAL_ON },
394 { 0xFE70, 0xFEFC, STRONG_AL },
395 { 0xFEFF, 0x0000, WEAK_BN },
396 { 0xFF01, 0xFF02, NEUTRAL_ON },
397 { 0xFF03, 0xFF05, WEAK_ET },
398 { 0xFF06, 0xFF0A, NEUTRAL_ON },
399 { 0xFF0B, 0x0000, WEAK_ET },
400 { 0xFF0C, 0x0000, WEAK_CS },
401 { 0xFF0D, 0x0000, WEAK_ET },
402 { 0xFF0E, 0x0000, WEAK_CS },
403 { 0xFF0F, 0x0000, WEAK_ES },
404 { 0xFF10, 0xFF19, WEAK_EN },
405 { 0xFF1A, 0x0000, WEAK_CS },
406 { 0xFF1B, 0xFF20, NEUTRAL_ON },
407 { 0xFF3B, 0xFF40, NEUTRAL_ON },
408 { 0xFF5B, 0xFF65, NEUTRAL_ON },
409 { 0xFFE0, 0xFFE1, WEAK_ET },
410 { 0xFFE2, 0xFFE4, NEUTRAL_ON },
411 { 0xFFE5, 0xFFE6, WEAK_ET },
412 { 0xFFE8, 0xFFEE, NEUTRAL_ON },
413 { 0xFFF9, 0xFFFB, WEAK_BN },
414 { 0xFFFC, 0xFFFD, NEUTRAL_ON },
415 { 0x1D167, 0x1D169, WEAK_NSM },
416 { 0x1D173, 0x1D17A, WEAK_BN },
417 { 0x1D17B, 0x1D182, WEAK_NSM },
418 { 0x1D185, 0x1D18B, WEAK_NSM },
419 { 0x1D1AA, 0x1D1AD, WEAK_NSM },
420 { 0x1D7CE, 0x1D7FF, WEAK_EN },
421 { 0xE0001, 0xE007F, WEAK_BN } };
422 int i;
423
424 bidi_type_table = Fmake_char_table (Qnil, make_number (STRONG_L));
425
426 for (i = 0; i < sizeof bidi_type / sizeof bidi_type[0]; i++)
427 char_table_set_range (bidi_type_table, bidi_type[i].from, bidi_type[i].to,
428 make_number (bidi_type[i].type));
429 bidi_initialized = 1;
430}
431
432static int
433bidi_is_arabic_number (int ch)
434{
435#ifdef TEST_STANDALONE
436 return ch >= '6' && ch <= '9';
437#else
438 return 0; /* FIXME! */
439#endif
440}
441
442/* Return the bidi type of a character CH. */
443bidi_type_t
444bidi_get_type (int ch)
445{
446 return (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
447}
448
449/* Given a bidi TYPE of a character, return its category. */
450bidi_category_t
451bidi_get_category (bidi_type_t type)
452{
453 switch (type)
454 {
455 case UNKNOWN_BT:
456 return UNKNOWN_BC;
457 case STRONG_L:
458 case STRONG_R:
459 case STRONG_AL:
460 case LRE:
461 case LRO:
462 case RLE:
463 case RLO:
464 return STRONG;
465 case PDF: /* ??? really?? */
466 case WEAK_EN:
467 case WEAK_ES:
468 case WEAK_ET:
469 case WEAK_AN:
470 case WEAK_CS:
471 case WEAK_NSM:
472 case WEAK_BN:
473 return WEAK;
474 case NEUTRAL_B:
475 case NEUTRAL_S:
476 case NEUTRAL_WS:
477 case NEUTRAL_ON:
478 return NEUTRAL;
479 default:
480 abort ();
481 }
482}
483
484/* FIXME: exceedingly temporary! Should consult some TBD data base of
485 character properties. */
486int
487bidi_mirror_char (int c)
488{
489 static const char mirrored_pairs[] = "()<>[]{}";
490 const char *p = strchr (mirrored_pairs, c);
491
492 if (p)
493 {
494 size_t i = p - mirrored_pairs;
495
496 if ((i & 1) == 0)
497 return mirrored_pairs[i + 1];
498 else
499 return mirrored_pairs[i - 1];
500 }
501 return c;
502}
503
504/* Copy the bidi iterator from FROM to TO. To save cycles, this only
505 copies the part of the level stack that is actually in use. */
506static inline void
507bidi_copy_it (struct bidi_it *to, struct bidi_it *from)
508{
509 int i;
510
511 /* Copy everything except the level stack. */
512 memcpy (to, from, ((int)&((struct bidi_it *)0)->level_stack[0]));
513
514 /* Copy the active part of the level stack. */
515 to->level_stack[0] = from->level_stack[0]; /* level zero is always in use */
516 for (i = 1; i <= from->stack_idx; i++)
517 to->level_stack[i] = from->level_stack[i];
518}
519
520/* Caching the bidi iterator states. */
521
522static struct bidi_it bidi_cache[1000]; /* FIXME: make this dynamically allocated! */
523static int bidi_cache_idx;
524static int bidi_cache_last_idx;
525
526static inline void
527bidi_cache_reset (void)
528{
529 bidi_cache_idx = 0;
530 bidi_cache_last_idx = -1;
531}
532
533static inline void
534bidi_cache_fetch_state (int idx, struct bidi_it *bidi_it)
535{
536 int current_scan_dir = bidi_it->scan_dir;
537
538 if (idx < 0 || idx >= bidi_cache_idx)
539 abort ();
540
541 bidi_copy_it (bidi_it, &bidi_cache[idx]);
542 bidi_it->scan_dir = current_scan_dir;
543 bidi_cache_last_idx = idx;
544}
545
546/* Find a cached state with a given CHARPOS and resolved embedding
547 level less or equal to LEVEL. if LEVEL is -1, disregard the
548 resolved levels in cached states. DIR, if non-zero, means search
549 in that direction from the last cache hit. */
550static inline int
551bidi_cache_search (int charpos, int level, int dir)
552{
553 int i, i_start;
554
555 if (bidi_cache_idx)
556 {
557 if (charpos < bidi_cache[bidi_cache_last_idx].charpos)
558 dir = -1;
559 else if (charpos > bidi_cache[bidi_cache_last_idx].charpos)
560 dir = 1;
561 if (dir)
562 i_start = bidi_cache_last_idx;
563 else
564 {
565 dir = -1;
566 i_start = bidi_cache_idx - 1;
567 }
568
569 if (dir < 0)
570 {
571 /* Linear search for now; FIXME! */
572 for (i = i_start; i >= 0; i--)
573 if (bidi_cache[i].charpos == charpos
574 && (level == -1 || bidi_cache[i].resolved_level <= level))
575 return i;
576 }
577 else
578 {
579 for (i = i_start; i < bidi_cache_idx; i++)
580 if (bidi_cache[i].charpos == charpos
581 && (level == -1 || bidi_cache[i].resolved_level <= level))
582 return i;
583 }
584 }
585
586 return -1;
587}
588
589/* Find a cached state where the resolved level changes to a value
590 that is lower than LEVEL, and return its cache slot index. DIR is
591 the direction to search, starting with the last used cache slot.
592 BEFORE, if non-zero, means return the index of the slot that is
593 ``before'' the level change in the search direction. That is,
594 given the cached levels like this:
595
596 1122333442211
597 AB C
598
599 and assuming we are at the position cached at the slot marked with
600 C, searching backwards (DIR = -1) for LEVEL = 2 will return the
601 index of slot B or A, depending whether BEFORE is, respectively,
602 non-zero or zero. */
603static int
604bidi_cache_find_level_change (int level, int dir, int before)
605{
606 if (bidi_cache_idx)
607 {
608 int i = dir ? bidi_cache_last_idx : bidi_cache_idx - 1;
609 int incr = before ? 1 : 0;
610
611 if (!dir)
612 dir = -1;
613 else if (!incr)
614 i += dir;
615
616 if (dir < 0)
617 {
618 while (i >= incr)
619 {
620 if (bidi_cache[i - incr].resolved_level >= 0
621 && bidi_cache[i - incr].resolved_level < level)
622 return i;
623 i--;
624 }
625 }
626 else
627 {
628 while (i < bidi_cache_idx - incr)
629 {
630 if (bidi_cache[i + incr].resolved_level >= 0
631 && bidi_cache[i + incr].resolved_level < level)
632 return i;
633 i++;
634 }
635 }
636 }
637
638 return -1;
639}
640
641static inline void
642bidi_cache_iterator_state (struct bidi_it *bidi_it, int resolved)
643{
644 int idx;
645
646 /* We should never cache on backward scans. */
647 if (bidi_it->scan_dir == -1)
648 abort ();
649 idx = bidi_cache_search (bidi_it->charpos, -1, 1);
650
651 if (idx < 0)
652 {
653 idx = bidi_cache_idx;
654 if (idx > sizeof (bidi_cache) / sizeof (bidi_cache[0]) - 1)
655 abort ();
656 bidi_copy_it (&bidi_cache[idx], bidi_it);
657 if (!resolved)
658 bidi_cache[idx].resolved_level = -1;
659 }
660 else
661 {
662 /* Copy only the members which could have changed, to avoid
663 costly copying of the entire struct. */
664 bidi_cache[idx].type = bidi_it->type;
665 bidi_cache[idx].orig_type = bidi_it->orig_type;
666 if (resolved)
667 bidi_cache[idx].resolved_level = bidi_it->resolved_level;
668 else
669 bidi_cache[idx].resolved_level = -1;
670 bidi_cache[idx].invalid_levels = bidi_it->invalid_levels;
671 bidi_cache[idx].invalid_rl_levels = bidi_it->invalid_rl_levels;
672 bidi_cache[idx].next_for_neutral = bidi_it->next_for_neutral;
673 bidi_cache[idx].next_for_ws = bidi_it->next_for_ws;
674 bidi_cache[idx].ignore_bn_limit = bidi_it->ignore_bn_limit;
675 }
676
677 bidi_cache_last_idx = idx;
678 if (idx >= bidi_cache_idx)
679 bidi_cache_idx = idx + 1;
680}
681
682static inline bidi_type_t
683bidi_cache_find (int charpos, int level, struct bidi_it *bidi_it)
684{
685 int i = bidi_cache_search (charpos, level, bidi_it->scan_dir);
686
687 if (i >= 0)
688 {
689 bidi_dir_t current_scan_dir = bidi_it->scan_dir;
690
691 *bidi_it = bidi_cache[i];
692 bidi_cache_last_idx = i;
693 /* Don't let scan direction from from the cached state override
694 the current scan direction. */
695 bidi_it->scan_dir = current_scan_dir;
696 return bidi_it->type;
697 }
698
699 return UNKNOWN_BT;
700}
701
702static inline int
703bidi_peek_at_next_level (struct bidi_it *bidi_it)
704{
705 if (bidi_cache_idx == 0 || bidi_cache_last_idx == -1)
706 abort ();
707 return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level;
708}
709
710/* Return non-zero if buffer's byte position POS is the last character
711 of a paragraph. THIS_CH is the character preceding the one at POS in
712 the buffer. */
713int
714bidi_at_paragraph_end (int this_ch, int pos)
715{
716 int next_ch = FETCH_CHAR (pos);
717
718 return (this_ch == '\n' && next_ch == '\n') || this_ch == BIDI_EOB;
719}
720
721/* Determine the start-of-run (sor) directional type given the two
722 embedding levels on either side of the run boundary. Also, update
723 the saved info about previously seen characters, since that info is
724 generally valid for a single level run. */
725static inline void
726bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after)
727{
728 int higher_level = level_before > level_after ? level_before : level_after;
729
730 /* The prev_was_pdf gork is required for when we have several PDFs
731 in a row. In that case, we want to compute the sor type for the
732 next level run only once: when we see the first PDF. That's
733 because the sor type depends only on the higher of the two levels
734 that we find on the two sides of the level boundary (see UAX#9,
735 clause X10), and so we don't need to know the final embedding
736 level to which we descend after processing all the PDFs. */
737 if (level_before < level_after || !bidi_it->prev_was_pdf)
738 /* FIXME: should the default sor direction be user selectable? */
739 bidi_it->sor = (higher_level & 1) != 0 ? R2L : L2R;
740 if (level_before > level_after)
741 bidi_it->prev_was_pdf = 1;
742
743 bidi_it->prev.type = UNKNOWN_BT;
744 bidi_it->last_strong.type = bidi_it->last_strong.orig_type =
745 bidi_it->last_strong.pristine_type = UNKNOWN_BT;
746 bidi_it->prev_for_neutral.type = bidi_it->sor == R2L ? STRONG_R : STRONG_L;
747 bidi_it->prev_for_neutral.charpos = bidi_it->charpos;
748 bidi_it->prev_for_neutral.bytepos = bidi_it->bytepos;
749 bidi_it->next_for_neutral.type = bidi_it->next_for_neutral.orig_type =
750 bidi_it->next_for_neutral.pristine_type = UNKNOWN_BT;
751 bidi_it->ignore_bn_limit = 0; /* meaning it's unknown */
752}
753
754void
755bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it)
756{
757 bidi_it->level_stack[0].level = 0;
758 if (dir == R2L)
759 bidi_it->level_stack[0].level = 1;
760 else if (dir == NEUTRAL_DIR) /* P2 */
761 {
762 bidi_type_t type;
763 int pos = bidi_it->charpos, bytepos = bidi_it->bytepos;
764 int ch;
765
766 if (pos < 0)
767 pos = bytepos = 0;
768 else if (bidi_it->ch != BIDI_EOB)
769 {
770 pos++;
771 bytepos += bidi_it->ch_len;
772 }
773
774 ch = FETCH_CHAR (bytepos);
775 pos++;
776 bytepos += CHAR_BYTES (ch);
777
778 /* FIXME: should actually go to where the paragraph begins and
779 start the loop below from there, since UAX#9 says to find the
780 first strong directional character in the paragraph. */
781
782 for (type = bidi_get_type (ch);
783 /* NOTE: UAX#9 says to search only for L, AL, or R types of
784 characters, and ignore RLE, RLO, LRE, and LRO. However,
785 I'm not sure it makes sense to omit those 4; should try
786 with and without that to see the effect. */
787 (bidi_get_category (type) != STRONG)
788 || (bidi_ignore_explicit_marks_for_paragraph_level
789 && (type == RLE || type == RLO
790 || type == LRE || type == LRO));
791 type = bidi_get_type (ch))
792 {
793 if (type == NEUTRAL_B || bidi_at_paragraph_end (ch, bytepos))
794 break;
795 FETCH_CHAR_ADVANCE (ch, pos, bytepos);
796 }
797 if (type == STRONG_R || type == STRONG_AL) /* P3 */
798 bidi_it->level_stack[0].level = 1;
799 }
800 bidi_it->scan_dir = 1; /* FIXME: do we need to have control on this? */
801 bidi_it->resolved_level = bidi_it->level_stack[0].level;
802 bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */
803 bidi_it->invalid_levels = 0;
804 bidi_it->invalid_rl_levels = -1;
805 bidi_it->new_paragraph = 0;
806 bidi_it->next_en_pos = -1;
807 bidi_it->next_for_ws.type = UNKNOWN_BT;
808 bidi_set_sor_type (bidi_it, bidi_it->level_stack[0].level, 0); /* X10 */
809
810 bidi_cache_reset ();
811}
812
813/* Do whatever UAX#9 clause X8 says should be done at paragraph's end,
814 and set the new paragraph flag in the iterator. */
815static inline void
816bidi_set_paragraph_end (struct bidi_it *bidi_it)
817{
818 bidi_it->invalid_levels = 0;
819 bidi_it->invalid_rl_levels = -1;
820 bidi_it->stack_idx = 0;
821 bidi_it->resolved_level = bidi_it->level_stack[0].level;
822 bidi_it->new_paragraph = 1;
823}
824
825/* Initialize the bidi iterator from buffer position POS for paragraph
826 direction DIR. Return the embedding level at POS. */
827void
828bidi_init_it (int pos, bidi_dir_t dir, struct bidi_it *bidi_it)
829{
830 if (! bidi_initialized)
831 bidi_initialize ();
832 bidi_set_paragraph_end (bidi_it);
833 bidi_it->charpos = pos;
834 if (pos <= 0)
835 {
836 bidi_it->bytepos = bidi_it->charpos;
837 bidi_it->ch_len = 1; /* so that incrementing bytepos works */
838 }
839 else
840 bidi_it->bytepos = CHAR_TO_BYTE (pos);
841 bidi_it->ch = '\x1d'; /* FIXME: should be U+2029 */
842 bidi_it->type = NEUTRAL_B;
843 bidi_it->orig_type = UNKNOWN_BT;
844 bidi_it->pristine_type = UNKNOWN_BT;
845 bidi_it->prev_was_pdf = 0;
846 bidi_it->prev.type = bidi_it->prev.orig_type = UNKNOWN_BT;
847 bidi_it->last_strong.type = bidi_it->last_strong.orig_type =
848 bidi_it->last_strong.pristine_type = UNKNOWN_BT;
849 bidi_it->next_for_neutral.charpos = -1;
850 bidi_it->next_for_neutral.type =
851 bidi_it->next_for_neutral.orig_type =
852 bidi_it->next_for_neutral.pristine_type = UNKNOWN_BT;
853 bidi_it->prev_for_neutral.charpos = -1;
854 bidi_it->prev_for_neutral.type =
855 bidi_it->prev_for_neutral.orig_type =
856 bidi_it->prev_for_neutral.pristine_type = UNKNOWN_BT;
857 bidi_it->sor = L2R; /* FIXME: should it be user-selectable? */
858 bidi_paragraph_init (dir, bidi_it);
859}
860
861/* Push the current embedding level and override status; reset the
862 current level to LEVEL and the current override status to OVERRIDE. */
863static inline void
864bidi_push_embedding_level (struct bidi_it *bidi_it,
865 int level, bidi_dir_t override)
866{
867 bidi_it->stack_idx++;
868 if (bidi_it->stack_idx >= BIDI_MAXLEVEL)
869 abort ();
870 bidi_it->level_stack[bidi_it->stack_idx].level = level;
871 bidi_it->level_stack[bidi_it->stack_idx].override = override;
872}
873
874/* Pop the embedding level and directional override status from the
875 stack, and return the new level. */
876static inline int
877bidi_pop_embedding_level (struct bidi_it *bidi_it)
878{
879 /* UAX#9 says to ignore invalid PDFs. */
880 if (bidi_it->stack_idx > 0)
881 bidi_it->stack_idx--;
882 return bidi_it->level_stack[bidi_it->stack_idx].level;
883}
884
885/* Record in SAVED_INFO the information about the current character. */
886static inline void
887bidi_remember_char (struct bidi_saved_info *saved_info,
888 struct bidi_it *bidi_it)
889{
890 saved_info->charpos = bidi_it->charpos;
891 saved_info->bytepos = bidi_it->bytepos;
892 saved_info->type = bidi_it->type;
893 saved_info->orig_type = bidi_it->orig_type;
894 saved_info->pristine_type = bidi_it->pristine_type;
895}
896
897/* Resolve the type of a neutral character according to the type of
898 surrounding strong text and the current embedding level. */
899static inline bidi_type_t
900bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
901{
902 /* N1: European and Arabic numbers are treated as though they were R. */
903 if (next_type == WEAK_EN || next_type == WEAK_AN)
904 next_type = STRONG_R;
905 if (prev_type == WEAK_EN || prev_type == WEAK_AN)
906 prev_type = STRONG_R;
907
908 if (next_type == prev_type) /* N1 */
909 return next_type;
910 else if ((lev & 1) == 0) /* N2 */
911 return STRONG_L;
912 else
913 return STRONG_R;
914}
915
916static inline int
917bidi_explicit_dir_char (int c)
918{
919 /* FIXME: this should be replaced with a lookup table with suitable
920 bits set, like standard C ctype macros do. */
921 return (c == LRE_CHAR || c == LRO_CHAR
922 || c == RLE_CHAR || c == RLO_CHAR || c == PDF_CHAR);
923}
924
925/* A helper function for bidi_resolve_explicit. It advances to the
926 next character in logical order and determines the new embedding
927 level and directional override, but does not take into account
928 empty embeddings. */
929static int
930bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
931{
932 int curchar;
933 bidi_type_t type;
934 int current_level;
935 int new_level;
936 bidi_dir_t override;
937
938 if (bidi_it->charpos < 0)
939 bidi_it->charpos = bidi_it->bytepos = 0;
940 else
941 {
942 bidi_it->charpos++;
943 bidi_it->bytepos += bidi_it->ch_len;
944 }
945
946 current_level = bidi_it->level_stack[bidi_it->stack_idx].level; /* X1 */
947 override = bidi_it->level_stack[bidi_it->stack_idx].override;
948 new_level = current_level;
949
950 /* in case it is a unibyte character (not yet implemented) */
951 /* _fetch_multibyte_char_len = 1; */
952 curchar = FETCH_CHAR (bidi_it->bytepos);
953 bidi_it->ch = curchar;
954 bidi_it->ch_len = CHAR_BYTES (curchar);
955
956 type = bidi_get_type (curchar);
957 bidi_it->pristine_type = type;
958
959 if (type != PDF)
960 bidi_it->prev_was_pdf = 0;
961
962 bidi_it->orig_type = UNKNOWN_BT;
963
964 switch (type)
965 {
966 case RLE: /* X2 */
967 case RLO: /* X4 */
968 bidi_it->orig_type = type;
969 type = WEAK_BN; /* X9/Retaining */
970 if (bidi_it->ignore_bn_limit <= 0)
971 {
972 if (current_level <= BIDI_MAXLEVEL - 4)
973 {
974 /* Compute the least odd embedding level greater than
975 the current level. */
976 new_level = ((current_level + 1) & ~1) + 1;
977 if (bidi_it->orig_type == RLE)
978 override = NEUTRAL_DIR;
979 else
980 override = R2L;
981 if (current_level == BIDI_MAXLEVEL - 4)
982 bidi_it->invalid_rl_levels = 0;
983 bidi_push_embedding_level (bidi_it, new_level, override);
984 }
985 else
986 {
987 bidi_it->invalid_levels++;
988 /* See the commentary about invalid_rl_levels below. */
989 if (bidi_it->invalid_rl_levels < 0)
990 bidi_it->invalid_rl_levels = 0;
991 bidi_it->invalid_rl_levels++;
992 }
993 }
994 else if (bidi_it->prev.orig_type == WEAK_EN /* W5/Retaining */
995 || bidi_it->next_en_pos > bidi_it->charpos)
996 type = WEAK_EN;
997 break;
998 case LRE: /* X3 */
999 case LRO: /* X5 */
1000 bidi_it->orig_type = type;
1001 type = WEAK_BN; /* X9/Retaining */
1002 if (bidi_it->ignore_bn_limit <= 0)
1003 {
1004 if (current_level <= BIDI_MAXLEVEL - 5)
1005 {
1006 /* Compute the least even embedding level greater than
1007 the current level. */
1008 new_level = ((current_level + 2) & ~1);
1009 if (bidi_it->orig_type == LRE)
1010 override = NEUTRAL_DIR;
1011 else
1012 override = L2R;
1013 bidi_push_embedding_level (bidi_it, new_level, override);
1014 }
1015 else
1016 {
1017 bidi_it->invalid_levels++;
1018 /* invalid_rl_levels counts invalid levels encountered
1019 while the embedding level was already too high for
1020 LRE/LRO, but not for RLE/RLO. That is because
1021 there may be exactly one PDF which we should not
1022 ignore even though invalid_levels is non-zero.
1023 invalid_rl_levels helps to know what PDF is
1024 that. */
1025 if (bidi_it->invalid_rl_levels >= 0)
1026 bidi_it->invalid_rl_levels++;
1027 }
1028 }
1029 else if (bidi_it->prev.orig_type == WEAK_EN /* W5/Retaining */
1030 || bidi_it->next_en_pos > bidi_it->charpos)
1031 type = WEAK_EN;
1032 break;
1033 case PDF: /* X7 */
1034 bidi_it->orig_type = type;
1035 type = WEAK_BN; /* X9/Retaining */
1036 if (bidi_it->ignore_bn_limit <= 0)
1037 {
1038 if (!bidi_it->invalid_rl_levels)
1039 {
1040 new_level = bidi_pop_embedding_level (bidi_it);
1041 bidi_it->invalid_rl_levels = -1;
1042 if (bidi_it->invalid_levels)
1043 bidi_it->invalid_levels--;
1044 /* else nothing: UAX#9 says to ignore invalid PDFs */
1045 }
1046 if (!bidi_it->invalid_levels)
1047 new_level = bidi_pop_embedding_level (bidi_it);
1048 else
1049 {
1050 bidi_it->invalid_levels--;
1051 bidi_it->invalid_rl_levels--;
1052 }
1053 }
1054 else if (bidi_it->prev.orig_type == WEAK_EN /* W5/Retaining */
1055 || bidi_it->next_en_pos > bidi_it->charpos)
1056 type = WEAK_EN;
1057 break;
1058 default:
1059 /* Nothing. */
1060 break;
1061 }
1062
1063 bidi_it->type = type;
1064
1065 return new_level;
1066}
1067
1068/* Given an iterator state in BIDI_IT, advance one character position
1069 in the buffer to the next character (in the logical order), resolve
1070 any explicit embeddings and directional overrides, and return the
1071 embedding level of the character after resolving explicit
1072 directives and ignoring empty embeddings. */
1073static int
1074bidi_resolve_explicit (struct bidi_it *bidi_it)
1075{
1076 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1077 int new_level = bidi_resolve_explicit_1 (bidi_it);
1078
1079 if (prev_level < new_level
1080 && bidi_it->type == WEAK_BN
1081 && bidi_it->ignore_bn_limit == 0 /* only if not already known */
1082 && bidi_explicit_dir_char (FETCH_CHAR (bidi_it->bytepos
1083 + bidi_it->ch_len)))
1084 {
1085 /* Avoid pushing and popping embedding levels if the level run
1086 is empty, as this breaks level runs where it shouldn't.
1087 UAX#9 removes all the explicit embedding and override codes,
1088 so empty embeddings disappear without a trace. We need to
1089 behave as if we did the same. */
1090 struct bidi_it saved_it;
1091 int level = prev_level;
1092
1093 bidi_copy_it (&saved_it, bidi_it);
1094
1095 while (bidi_explicit_dir_char (FETCH_CHAR (bidi_it->bytepos
1096 + bidi_it->ch_len)))
1097 {
1098 level = bidi_resolve_explicit_1 (bidi_it);
1099 }
1100
1101 if (level == prev_level) /* empty embedding */
1102 saved_it.ignore_bn_limit = bidi_it->charpos + 1;
1103 else /* this embedding is non-empty */
1104 saved_it.ignore_bn_limit = -1;
1105
1106 bidi_copy_it (bidi_it, &saved_it);
1107 if (bidi_it->ignore_bn_limit > 0)
1108 {
1109 /* We pushed a level, but we shouldn't have. Undo that. */
1110 if (!bidi_it->invalid_rl_levels)
1111 {
1112 new_level = bidi_pop_embedding_level (bidi_it);
1113 bidi_it->invalid_rl_levels = -1;
1114 if (bidi_it->invalid_levels)
1115 bidi_it->invalid_levels--;
1116 }
1117 if (!bidi_it->invalid_levels)
1118 new_level = bidi_pop_embedding_level (bidi_it);
1119 else
1120 {
1121 bidi_it->invalid_levels--;
1122 bidi_it->invalid_rl_levels--;
1123 }
1124 }
1125 }
1126
1127 /* For when the paragraph end is defined by anything other than a
1128 special Unicode character (a.k.a. ``higher protocols''). */
1129 if (bidi_it->type != NEUTRAL_B)
1130 if (bidi_at_paragraph_end (bidi_it->ch,
1131 bidi_it->bytepos + bidi_it->ch_len))
1132 bidi_it->type = NEUTRAL_B;
1133
1134 if (bidi_it->type == NEUTRAL_B) /* X8 */
1135 {
1136 bidi_set_paragraph_end (bidi_it);
1137 bidi_it->orig_type = bidi_it->type; /* needed below and in L1 */
1138 }
1139
1140 return new_level;
1141}
1142
1143/* Advance in the buffer, resolve weak types and return the type of
1144 the next character after weak type resolution. */
1145bidi_type_t
1146bidi_resolve_weak (struct bidi_it *bidi_it)
1147{
1148 bidi_type_t type;
1149 bidi_dir_t override;
1150 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1151 int new_level = bidi_resolve_explicit (bidi_it);
1152 int next_char;
1153 bidi_type_t type_of_next;
1154 struct bidi_it saved_it;
1155
1156 type = bidi_it->type;
1157 override = bidi_it->level_stack[bidi_it->stack_idx].override;
1158
1159 if (type == UNKNOWN_BT
1160 || type == LRE
1161 || type == LRO
1162 || type == RLE
1163 || type == RLO
1164 || type == PDF)
1165 abort ();
1166
1167 if (new_level != prev_level
1168 || bidi_it->type == NEUTRAL_B)
1169 {
1170 /* We've got a new embedding level run, compute the directional
1171 type of sor and initialize per-run variables (UAX#9, clause
1172 X10). */
1173 bidi_set_sor_type (bidi_it, prev_level, new_level);
1174 }
1175 else if (type == NEUTRAL_S || type == NEUTRAL_WS
1176 || type == WEAK_BN || type == STRONG_AL)
1177 bidi_it->orig_type = type; /* needed in L1 */
1178
1179 /* Level and directional override status are already recorded in
1180 bidi_it, and do not need any change; see X6. */
1181 if (override == R2L) /* X6 */
1182 type = STRONG_R;
1183 else if (override == L2R)
1184 type = STRONG_L;
1185 else if (type == STRONG_AL)
1186 type = STRONG_R; /* W3 */
1187 else if (type == WEAK_NSM) /* W1 */
1188 {
1189 /* Note that we don't need to consider the case where the prev
1190 character has its type overridden by an RLO or LRO: such
1191 characters are outside the current level run, and thus not
1192 relevant to this NSM. Thus, NSM gets the pristine_type of
1193 the previous character. */
1194 if (bidi_it->prev.type != UNKNOWN_BT)
1195 type = bidi_it->prev.pristine_type;
1196 else if (bidi_it->sor == R2L)
1197 type = STRONG_R;
1198 else if (bidi_it->sor == L2R)
1199 type = STRONG_L;
1200 else /* shouldn't happen! */
1201 abort ();
1202 if (type == WEAK_EN /* W2 after W1 */
1203 && bidi_it->last_strong.orig_type == STRONG_AL)
1204 type = WEAK_AN;
1205 }
1206 else if (type == WEAK_EN /* W2 */
1207 && bidi_it->last_strong.orig_type == STRONG_AL)
1208 type = WEAK_AN;
1209 else if ((type == WEAK_ES
1210 && (bidi_it->prev.orig_type == WEAK_EN /* W4 */
1211 && (bidi_it->prev.pristine_type == WEAK_EN
1212 || bidi_it->prev.pristine_type == WEAK_NSM))) /* aft W1 */
1213 || (type == WEAK_CS
1214 && ((bidi_it->prev.orig_type == WEAK_EN
1215 && (bidi_it->prev.pristine_type == WEAK_EN /* W4 */
1216 || bidi_it->prev.pristine_type == WEAK_NSM)) /* a/W1 */
1217 || bidi_it->prev.orig_type == WEAK_AN))) /* W4 */
1218 {
1219 next_char = FETCH_CHAR (bidi_it->bytepos + bidi_it->ch_len);
1220 type_of_next = bidi_get_type (next_char);
1221
1222 if (type_of_next == WEAK_BN
1223 || bidi_explicit_dir_char (next_char))
1224 {
1225 bidi_copy_it (&saved_it, bidi_it);
1226 while (bidi_resolve_explicit (bidi_it) == new_level
1227 && bidi_it->type == WEAK_BN)
1228 ;
1229 type_of_next = bidi_it->type;
1230 bidi_copy_it (bidi_it, &saved_it);
1231 }
1232
1233 /* If the next character is EN, but the last strong-type
1234 character is AL, that next EN will be changed to AN when we
1235 process it in W2 above. So in that case, this ES should not
1236 be changed into EN. */
1237 if (type == WEAK_ES
1238 && type_of_next == WEAK_EN
1239 && bidi_it->last_strong.orig_type != STRONG_AL)
1240 type = WEAK_EN;
1241 else if (type == WEAK_CS)
1242 {
1243 if (bidi_it->prev.orig_type == WEAK_AN
1244 && (type_of_next == WEAK_AN
1245 /* If the next character is EN, but the last
1246 strong-type character is AL, EN will be later
1247 changed to AN when we process it in W2 above. So
1248 in that case, this ES should not be changed into
1249 EN. */
1250 || (type_of_next == WEAK_EN
1251 && bidi_it->last_strong.orig_type == STRONG_AL)))
1252 type = WEAK_AN;
1253 else if (bidi_it->prev.orig_type == WEAK_EN
1254 && type_of_next == WEAK_EN
1255 && bidi_it->last_strong.orig_type != STRONG_AL)
1256 type = WEAK_EN;
1257 }
1258 }
1259 else if (type == WEAK_ET /* W5: ET with EN before or after it */
1260 || type == WEAK_BN) /* W5/Retaining */
1261 {
1262 if (bidi_it->prev.orig_type == WEAK_EN /* ET/BN with EN before it */
1263 || bidi_it->next_en_pos > bidi_it->charpos)
1264 type = WEAK_EN;
1265 /* W5: ET with EN after it. */
1266 else
1267 {
1268 int en_pos = bidi_it->charpos + 1;
1269
1270 next_char = FETCH_CHAR (bidi_it->bytepos + bidi_it->ch_len);
1271 type_of_next = bidi_get_type (next_char);
1272
1273 if (type_of_next == WEAK_ET
1274 || type_of_next == WEAK_BN
1275 || bidi_explicit_dir_char (next_char))
1276 {
1277 bidi_copy_it (&saved_it, bidi_it);
1278 while (bidi_resolve_explicit (bidi_it) == new_level
1279 && (bidi_it->type == WEAK_BN || bidi_it->type == WEAK_ET))
1280 ;
1281 type_of_next = bidi_it->type;
1282 en_pos = bidi_it->charpos;
1283 bidi_copy_it (bidi_it, &saved_it);
1284 }
1285 if (type_of_next == WEAK_EN)
1286 {
1287 /* If the last strong character is AL, the EN we've
1288 found will become AN when we get to it (W2). */
1289 if (bidi_it->last_strong.orig_type != STRONG_AL)
1290 {
1291 type = WEAK_EN;
1292 /* Remember this EN position, to speed up processing
1293 of the next ETs. */
1294 bidi_it->next_en_pos = en_pos;
1295 }
1296 else if (type == WEAK_BN)
1297 type = NEUTRAL_ON; /* W6/Retaining */
1298 }
1299 }
1300 }
1301
1302 if (type == WEAK_ES || type == WEAK_ET || type == WEAK_CS /* W6 */
1303 || (type == WEAK_BN && (bidi_it->prev.orig_type == WEAK_CS /* W6/Ret. */
1304 || bidi_it->prev.orig_type == WEAK_ES
1305 || bidi_it->prev.orig_type == WEAK_ET)))
1306 type = NEUTRAL_ON;
1307
1308 /* Store the type we've got so far, before we clobber it with strong
1309 types in W7 and while resolving neutral types. But leave alone
1310 the original types that were recorded above, because we will need
1311 them for the L1 clause. */
1312 if (bidi_it->orig_type == UNKNOWN_BT)
1313 bidi_it->orig_type = type;
1314
1315 if (type == WEAK_EN) /* W7 */
1316 {
1317 if ((bidi_it->last_strong.orig_type == STRONG_L)
1318 || (bidi_it->last_strong.type == UNKNOWN_BT && bidi_it->sor == L2R))
1319 type = STRONG_L;
1320 }
1321
1322 bidi_it->type = type;
1323 return type;
1324}
1325
1326bidi_type_t
1327bidi_resolve_neutral (struct bidi_it *bidi_it)
1328{
1329 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1330 bidi_type_t type = bidi_resolve_weak (bidi_it);
1331 int current_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1332
1333 if (!(type == STRONG_R
1334 || type == STRONG_L
1335 || type == WEAK_BN
1336 || type == WEAK_EN
1337 || type == WEAK_AN
1338 || type == NEUTRAL_B
1339 || type == NEUTRAL_S
1340 || type == NEUTRAL_WS
1341 || type == NEUTRAL_ON))
1342 abort ();
1343
1344 if (bidi_get_category (type) == NEUTRAL
1345 || (type == WEAK_BN && prev_level == current_level))
1346 {
1347 if (bidi_it->next_for_neutral.type != UNKNOWN_BT)
1348 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1349 bidi_it->next_for_neutral.type,
1350 current_level);
1351 else
1352 {
1353 /* Arrrgh!! The UAX#9 algorithm is too deeply entrenched in
1354 the assumption of batch-style processing; see clauses W4,
1355 W5, and especially N1, which require to look far forward
1356 (as well as back) in the buffer. May the fleas of a
1357 thousand camels infest the armpits of those who design
1358 supposedly general-purpose algorithms by looking at their
1359 own implementations, and fail to consider other possible
1360 implementations! */
1361 struct bidi_it saved_it;
1362 bidi_type_t next_type;
1363
1364 if (bidi_it->scan_dir == -1)
1365 abort ();
1366
1367 bidi_copy_it (&saved_it, bidi_it);
1368 /* Scan the text forward until we find the first non-neutral
1369 character, and then use that to resolve the neutral we
1370 are dealing with now. We also cache the scanned iterator
1371 states, to salvage some of the effort later. */
1372 bidi_cache_iterator_state (bidi_it, 0);
1373 do {
1374 /* Record the info about the previous character, so that
1375 it will be cached below with this state. */
1376 if (bidi_it->orig_type != WEAK_BN /* W1/Retaining */
1377 && bidi_it->type != WEAK_BN)
1378 bidi_remember_char (&bidi_it->prev, bidi_it);
1379 type = bidi_resolve_weak (bidi_it);
1380 /* Paragraph separators have their levels fully resolved
1381 at this point, so cache them as resolved. */
1382 bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B);
1383 /* FIXME: implement L1 here, by testing for a newline and
1384 resetting the level for any sequence of whitespace
1385 characters adjacent to it. */
1386 } while (!(type == NEUTRAL_B
1387 || (type != WEAK_BN
1388 && bidi_get_category (type) != NEUTRAL)
1389 /* This is all per level run, so stop when we
1390 reach the end of this level run. */
1391 || bidi_it->level_stack[bidi_it->stack_idx].level !=
1392 current_level));
1393
1394 bidi_remember_char (&saved_it.next_for_neutral, bidi_it);
1395
1396 switch (type)
1397 {
1398 case STRONG_L:
1399 case STRONG_R:
1400 case STRONG_AL:
1401 next_type = type;
1402 break;
1403 case WEAK_EN:
1404 case WEAK_AN:
1405 /* N1: ``European and Arabic numbers are treated as
1406 though they were R.'' */
1407 next_type = STRONG_R;
1408 saved_it.next_for_neutral.type = STRONG_R;
1409 break;
1410 case WEAK_BN:
1411 if (!bidi_explicit_dir_char (bidi_it->ch))
1412 abort (); /* can't happen: BNs are skipped */
1413 /* FALLTHROUGH */
1414 case NEUTRAL_B:
1415 /* Marched all the way to the end of this level run.
1416 We need to use the eor type, whose information is
1417 stored by bidi_set_sor_type in the prev_for_neutral
1418 member. */
1419 if (saved_it.type != WEAK_BN
1420 || bidi_get_category (bidi_it->prev.orig_type) == NEUTRAL)
1421 {
1422 next_type = bidi_it->prev_for_neutral.type;
1423 saved_it.next_for_neutral.type = next_type;
1424 }
1425 else
1426 {
1427 /* This is a BN which does not adjoin neutrals.
1428 Leave its type alone. */
1429 bidi_copy_it (bidi_it, &saved_it);
1430 return bidi_it->type;
1431 }
1432 break;
1433 default:
1434 abort ();
1435 }
1436 type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type,
1437 next_type, current_level);
1438 saved_it.type = type;
1439 bidi_copy_it (bidi_it, &saved_it);
1440 }
1441 }
1442 return type;
1443}
1444
1445/* Given an iterator state in BIDI_IT, advance one character position
1446 in the buffer to the next character (in the logical order), resolve
1447 the bidi type of that next character, and return that type. */
1448bidi_type_t
1449bidi_type_of_next_char (struct bidi_it *bidi_it)
1450{
1451 bidi_type_t type;
1452
1453 /* This should always be called during a forward scan. */
1454 if (bidi_it->scan_dir != 1)
1455 abort ();
1456
1457 /* Reset the limit until which to ignore BNs if we step out of the
1458 area where we found only empty levels. */
1459 if ((bidi_it->ignore_bn_limit > 0
1460 && bidi_it->ignore_bn_limit <= bidi_it->charpos)
1461 || (bidi_it->ignore_bn_limit == -1
1462 && !bidi_explicit_dir_char (bidi_it->ch)))
1463 bidi_it->ignore_bn_limit = 0;
1464
1465 type = bidi_resolve_neutral (bidi_it);
1466
1467 return type;
1468}
1469
1470/* Given an iterator state BIDI_IT, advance one character position in
1471 the buffer to the next character (in the logical order), resolve
1472 the embedding and implicit levels of that next character, and
1473 return the resulting level. */
1474int
1475bidi_level_of_next_char (struct bidi_it *bidi_it)
1476{
1477 bidi_type_t type;
1478 int level, prev_level = -1;
1479 struct bidi_saved_info next_for_neutral;
1480
1481 if (bidi_it->scan_dir == 1)
1482 {
1483 /* There's no sense in trying to advance if we hit end of text. */
1484 if (bidi_it->ch == BIDI_EOB)
1485 return bidi_it->resolved_level;
1486
1487 /* Record the info about the previous character. */
1488 if (bidi_it->orig_type != WEAK_BN /* W1/Retaining */
1489 && bidi_it->type != WEAK_BN)
1490 bidi_remember_char (&bidi_it->prev, bidi_it);
1491 if (bidi_it->orig_type == STRONG_R || bidi_it->orig_type == STRONG_L
1492 || bidi_it->orig_type == STRONG_AL)
1493 bidi_remember_char (&bidi_it->last_strong, bidi_it);
1494 /* FIXME: it sounds like we don't need both prev and
1495 prev_for_neutral members, but I'm leaving them both for now. */
1496 if (bidi_it->type == STRONG_R || bidi_it->type == STRONG_L
1497 || bidi_it->type == WEAK_EN || bidi_it->type == WEAK_AN)
1498 bidi_remember_char (&bidi_it->prev_for_neutral, bidi_it);
1499
1500 /* If we overstepped the characters used for resolving neutrals
1501 and whitespace, invalidate their info in the iterator. */
1502 if (bidi_it->charpos >= bidi_it->next_for_neutral.charpos)
1503 bidi_it->next_for_neutral.type = UNKNOWN_BT;
1504 if (bidi_it->next_en_pos >= 0
1505 && bidi_it->charpos >= bidi_it->next_en_pos)
1506 bidi_it->next_en_pos = -1;
1507 if (bidi_it->next_for_ws.type != UNKNOWN_BT
1508 && bidi_it->charpos >= bidi_it->next_for_ws.charpos)
1509 bidi_it->next_for_ws.type = UNKNOWN_BT;
1510
1511 /* This must be taken before we fill the iterator with the info
1512 about the next char. If we scan backwards, the iterator
1513 state must be already cached, so there's no need to know the
1514 embedding level of the previous character, since we will be
1515 returning to our caller shortly. */
1516 prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1517 }
1518 next_for_neutral = bidi_it->next_for_neutral;
1519
1520 /* Perhaps it is already cached. */
1521 type = bidi_cache_find (bidi_it->charpos + bidi_it->scan_dir, -1, bidi_it);
1522 if (type != UNKNOWN_BT)
1523 {
1524 /* Don't lose the information for resolving neutrals! The
1525 cached states could have been cached before their
1526 next_for_neutral member was computed. If we are on our way
1527 forward, we can simply take the info from the previous
1528 state. */
1529 if (bidi_it->scan_dir == 1
1530 && bidi_it->next_for_neutral.type == UNKNOWN_BT)
1531 bidi_it->next_for_neutral = next_for_neutral;
1532
1533 /* If resolved_level is -1, it means this state was cached
1534 before it was completely resolved, so we cannot return
1535 it. */
1536 if (bidi_it->resolved_level != -1)
1537 return bidi_it->resolved_level;
1538 }
1539 if (bidi_it->scan_dir == -1)
1540 /* If we are going backwards, the iterator state is already cached
1541 from previous scans, and should be fully resolved. */
1542 abort ();
1543
1544 if (type == UNKNOWN_BT)
1545 type = bidi_type_of_next_char (bidi_it);
1546
1547 if (type == NEUTRAL_B)
1548 return bidi_it->resolved_level;
1549
1550 level = bidi_it->level_stack[bidi_it->stack_idx].level;
1551 if ((bidi_get_category (type) == NEUTRAL /* && type != NEUTRAL_B */)
1552 || (type == WEAK_BN && prev_level == level))
1553 {
1554 if (bidi_it->next_for_neutral.type == UNKNOWN_BT)
1555 abort ();
1556
1557 /* If the cached state shows a neutral character, it was not
1558 resolved by bidi_resolve_neutral, so do it now. */
1559 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1560 bidi_it->next_for_neutral.type,
1561 level);
1562 }
1563
1564 if (!(type == STRONG_R
1565 || type == STRONG_L
1566 || type == WEAK_BN
1567 || type == WEAK_EN
1568 || type == WEAK_AN))
1569 abort ();
1570 bidi_it->type = type;
1571
1572 /* For L1 below, we need to know, for each WS character, whether
1573 it belongs to a sequence of WS characters preceeding a newline
1574 or a TAB or a paragraph separator. */
1575 if (bidi_it->pristine_type == NEUTRAL_WS
1576 && bidi_it->next_for_ws.type == UNKNOWN_BT)
1577 {
1578 int ch;
1579 int clen = bidi_it->ch_len;
1580 int bpos = bidi_it->bytepos;
1581 int cpos = bidi_it->charpos;
1582 bidi_type_t chtype;
1583
1584 do {
1585 /*_fetch_multibyte_char_len = 1;*/
1586 ch = FETCH_CHAR (bpos + clen);
1587 bpos += clen;
1588 cpos++;
1589 clen = CHAR_BYTES (ch);
1590 if (ch == '\n' /* || ch == LINESEP_CHAR */)
1591 chtype = NEUTRAL_B;
1592 else
1593 chtype = bidi_get_type (ch);
1594 } while (chtype == NEUTRAL_WS || chtype == WEAK_BN
1595 || bidi_explicit_dir_char (ch)); /* L1/Retaining */
1596 bidi_it->next_for_ws.type = chtype;
1597 bidi_it->next_for_ws.charpos = cpos;
1598 bidi_it->next_for_ws.bytepos = bpos;
1599 }
1600
1601 /* Resolve implicit levels, with a twist: PDFs get the embedding
1602 level of the enbedding they terminate. See below for the
1603 reason. */
1604 if (bidi_it->pristine_type == PDF
1605 /* Don't do this if this formatting code didn't change the
1606 embedding level due to invalid or empty embeddings. */
1607 && prev_level != level)
1608 {
1609 /* Don't look in UAX#9 for the reason for this: it's our own
1610 private quirk. The reason is that we want the formatting
1611 codes to be delivered so that they bracket the text of their
1612 embedding. For example, given the text
1613
1614 {RLO}teST{PDF}
1615
1616 we want it to be displayed as
1617
1618 {RLO}STet{PDF}
1619
1620 not as
1621
1622 STet{RLO}{PDF}
1623
1624 which will result because we buump up the embedding level as
1625 soon as we see the RLO and pop it as soon as we see the PDF,
1626 so RLO itself has the same embedding level as "teST", and
1627 thus would be normally delivered last, just before the PDF.
1628 The switch below fiddles with the level of PDF so that this
1629 ugly side effect does not happen.
1630
1631 (This is, of course, only important if the formatting codes
1632 are actually displayed, but Emacs does display them if the
1633 user wants to.) */
1634 level = prev_level;
1635 }
1636 else if (bidi_it->pristine_type == NEUTRAL_B /* L1 */
1637 || bidi_it->pristine_type == NEUTRAL_S
1638 || bidi_it->ch == '\n' /* || bidi_it->ch == LINESEP_CHAR */
1639 || (bidi_it->pristine_type == NEUTRAL_WS
1640 && (bidi_it->next_for_ws.type == NEUTRAL_B
1641 || bidi_it->next_for_ws.type == NEUTRAL_S)))
1642 level = bidi_it->level_stack[0].level;
1643 else if ((level & 1) == 0) /* I1 */
1644 {
1645 if (type == STRONG_R)
1646 level++;
1647 else if (type == WEAK_EN || type == WEAK_AN)
1648 level += 2;
1649 }
1650 else /* I2 */
1651 {
1652 if (type == STRONG_L || type == WEAK_EN || type == WEAK_AN)
1653 level++;
1654 }
1655
1656 bidi_it->resolved_level = level;
1657 return level;
1658}
1659
1660/* Move to the other edge of a level given by LEVEL. If END_FLAG is
1661 non-zero, we are at the end of a level, and we need to prepare to
1662 resume the scan of the lower level.
1663
1664 If this level's other edge is cached, we simply jump to it, filling
1665 the iterator structure with the iterator state on the other edge.
1666 Otherwise, we walk the buffer until we come back to the same level
1667 as LEVEL.
1668
1669 Note: we are not talking here about a ``level run'' in the UAX#9
1670 sense of the term, but rather about a ``level'' which includes
1671 all the levels higher than it. In other words, given the levels
1672 like this:
1673
1674 11111112222222333333334443343222222111111112223322111
1675 A B C
1676
1677 and assuming we are at point A scanning left to right, this
1678 function moves to point C, whereas the UAX#9 ``level 2 run'' ends
1679 at point B. */
1680static void
1681bidi_find_other_level_edge (struct bidi_it *bidi_it, int level, int end_flag)
1682{
1683 int dir = end_flag ? -bidi_it->scan_dir : bidi_it->scan_dir;
1684 int idx;
1685
1686 /* Try the cache first. */
1687 if ((idx = bidi_cache_find_level_change (level, dir, end_flag)) >= 0)
1688 bidi_cache_fetch_state (idx, bidi_it);
1689 else
1690 {
1691 int new_level;
1692
1693 if (end_flag)
1694 abort (); /* if we are at end of level, its edges must be cached */
1695
1696 bidi_cache_iterator_state (bidi_it, 1);
1697 do {
1698 new_level = bidi_level_of_next_char (bidi_it);
1699 bidi_cache_iterator_state (bidi_it, 1);
1700 } while (new_level >= level);
1701 }
1702}
1703
1704void
1705bidi_get_next_char_visually (struct bidi_it *bidi_it)
1706{
1707 int old_level, new_level, next_level;
1708 struct bidi_it prev_bidi_it;
1709
1710 if (bidi_it->scan_dir == 0)
1711 {
1712 bidi_it->scan_dir = 1; /* default to logical order */
1713 }
1714
1715 if (bidi_it->new_paragraph)
1716 bidi_paragraph_init (bidi_overriding_paragraph_direction, bidi_it);
1717 if (bidi_cache_idx == 0)
1718 bidi_copy_it (&prev_bidi_it, bidi_it);
1719
1720 old_level = bidi_it->resolved_level;
1721 new_level = bidi_level_of_next_char (bidi_it);
1722 if (bidi_it->ch == BIDI_EOB)
1723 return;
1724
1725 /* Reordering of resolved levels (clause L2) is implemented by
1726 jumping to the other edge of the level and flipping direction of
1727 scanning the buffer whenever we find a level change. */
1728 if (new_level != old_level)
1729 {
1730 int ascending = new_level > old_level;
1731 int level_to_search = ascending ? old_level + 1 : old_level;
1732 int incr = ascending ? 1 : -1;
1733 int expected_next_level = old_level + incr;
1734
1735 /* If we don't have anything cached yet, we need to cache the
1736 previous character we've seen, since we'll need it to record
1737 where to jump when the last non-base level is exhausted. */
1738 if (bidi_cache_idx == 0)
1739 bidi_cache_iterator_state (&prev_bidi_it, 1);
1740 /* Jump (or walk) to the other edge of this level. */
1741 bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
1742 /* Switch scan direction and peek at the next character in the
1743 new direction. */
1744 bidi_it->scan_dir = -bidi_it->scan_dir;
1745
1746 /* The following loop handles the case where the resolved level
1747 jumps by more than one. This is typical for numbers inside a
1748 run of text with left-to-right embedding direction, but can
1749 also happen in other situations. In those cases the decision
1750 where to continue after a level change, and in what direction,
1751 is tricky. For example, given a text like below:
1752
1753 abcdefgh
1754 11336622
1755
1756 (where the numbers below the text show the resolved levels),
1757 the result of reordering according to UAX#9 should be this:
1758
1759 efdcghba
1760
1761 This is implemented by the loop below which flips direction
1762 and jumps to the other edge of the level each time it finds
1763 the new level not to be the expected one. The expected level
1764 is always one more or one less than the previous one. */
1765 next_level = bidi_peek_at_next_level (bidi_it);
1766 while (next_level != expected_next_level)
1767 {
1768 expected_next_level += incr;
1769 level_to_search += incr;
1770 bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
1771 bidi_it->scan_dir = -bidi_it->scan_dir;
1772 next_level = bidi_peek_at_next_level (bidi_it);
1773 }
1774
1775 /* Finally, deliver the next character in the new direction. */
1776 next_level = bidi_level_of_next_char (bidi_it);
1777 }
1778
1779 if (bidi_it->scan_dir == 1 && bidi_cache_idx)
1780 {
1781 /* If we are at paragraph's base embedding level and beyond the
1782 last cached position, the cache's job is done and we can
1783 discard it. */
1784 if (bidi_it->resolved_level == bidi_it->level_stack[0].level
1785 && bidi_it->charpos > bidi_cache[bidi_cache_idx - 1].charpos)
1786 bidi_cache_reset ();
1787 /* But as long as we are caching during forward scan, we must
1788 cache each state, or else the cache integrity will be
1789 compromised: it assumes cached states correspond to buffer
1790 positions 1:1. */
1791 else
1792 bidi_cache_iterator_state (bidi_it, 1);
1793 }
1794}
1795
1796void
1797bidi_dump_cached_states (void)
1798{
1799 int i;
1800 int ndigits = 1;
1801
1802 if (bidi_cache_idx == 0)
1803 {
1804 fprintf (stderr, "The cache is empty.\n");
1805 return;
1806 }
1807 fprintf (stderr, "Total of %d state%s in cache:\n",
1808 bidi_cache_idx, bidi_cache_idx == 1 ? "" : "s");
1809
1810 for (i = bidi_cache[bidi_cache_idx - 1].charpos; i > 0; i /= 10)
1811 ndigits++;
1812 fputs ("ch ", stderr);
1813 for (i = 0; i < bidi_cache_idx; i++)
1814 fprintf (stderr, "%*c", ndigits, bidi_cache[i].ch);
1815 fputs ("\n", stderr);
1816 fputs ("lvl ", stderr);
1817 for (i = 0; i < bidi_cache_idx; i++)
1818 fprintf (stderr, "%*d", ndigits, bidi_cache[i].resolved_level);
1819 fputs ("\n", stderr);
1820 fputs ("pos ", stderr);
1821 for (i = 0; i < bidi_cache_idx; i++)
1822 fprintf (stderr, "%*d", ndigits, bidi_cache[i].charpos);
1823 fputs ("\n", stderr);
1824}
1825
1826#ifdef TEST_STANDALONE
1827
1828#include <stdio.h>
1829#include <sys/stat.h>
1830#include <signal.h>
1831
1832static char display_line[80];
1833static int simulate_display;
1834static int incr = 1;
1835
1836void
1837signal_catcher (int sig)
1838{
1839 if (simulate_display)
1840 puts (display_line);
1841 else
1842 {
1843 puts ("<");
1844 fflush (stdout);
1845 }
1846 signal (sig, SIG_DFL);
1847 raise (sig);
1848}
1849
1850void
1851put_line (char *p)
1852{
1853 if (simulate_display)
1854 {
1855 if (incr == -1)
1856 {
1857 if (p >= display_line)
1858 memset (display_line, ' ', p - display_line + 1);
1859 }
1860 else
1861 *p = '\0';
1862 fputs (display_line, stdout);
1863 }
1864 fflush (stdout);
1865}
1866
1867char *
1868init_display_direction (bidi_dir_t default_dir, int base_level)
1869{
1870 char *p;
1871
1872 /* To which display margin should we flush the lines? */
1873 switch (default_dir)
1874 {
1875 case NEUTRAL_DIR:
1876 if ((base_level & 1) == 0)
1877 {
1878 p = display_line;
1879 incr = 1;
1880 }
1881 else
1882 {
1883 p = display_line + sizeof (display_line) - 1;
1884 *p-- = '\0';
1885 incr = -1;
1886 }
1887 break;
1888 case L2R:
1889 p = display_line;
1890 incr = 1;
1891 break;
1892 case R2L:
1893 p = display_line + sizeof (display_line) - 1;
1894 *p-- = '\0';
1895 incr = -1;
1896 break;
1897 default:
1898 abort ();
1899 }
1900
1901 return p;
1902}
1903
1904static char *
1905continuation_line (char *p, int need)
1906{
1907 if (incr == -1)
1908 {
1909 if (p < display_line + need)
1910 {
1911 *p-- = '/';
1912 put_line (p);
1913 putc ('\n', stdout);
1914 memset (display_line, '>', sizeof(display_line) - 1);
1915 p = display_line + sizeof (display_line) - 1;
1916 *p-- = '\0';
1917 }
1918 }
1919 else
1920 {
1921 if (p > display_line + sizeof(display_line) - need - 2)
1922 {
1923 *p++ = '\\';
1924 put_line (p);
1925 putc ('\n', stdout);
1926 memset (display_line, '<', sizeof(display_line) - 1);
1927 p = display_line;
1928 }
1929 }
1930
1931 return p;
1932}
1933
1934int main (int argc, char *argv[])
1935{
1936 bidi_dir_t default_dir = NEUTRAL_DIR;
1937 char lots_of_equals[] = "\n===============================================================================\n";
1938
1939
1940 if (argc > 1 && argv[1][0] == '-')
1941 {
1942 switch (argv[1][1])
1943 {
1944 case 'R':
1945 default_dir = R2L;
1946 simulate_display = 1;
1947 ++argv;
1948 break;
1949 case 'L':
1950 default_dir = L2R;
1951 simulate_display = 1;
1952 ++argv;
1953 break;
1954 case 'N':
1955 simulate_display = 1;
1956 ++argv;
1957 break;
1958 default:
1959 break;
1960 }
1961 bidi_overriding_paragraph_direction = default_dir;
1962 }
1963
1964 for (argv++; *argv; argv++)
1965 {
1966 FILE *in = fopen (*argv, "rb");
1967 struct stat stat_buf;
1968 struct bidi_it iterator;
1969 size_t i;
1970 char *p = display_line;
1971 int base_level;
1972 unsigned char *s, *d, *s_end;
1973
1974 if (!in || stat (*argv, &stat_buf))
1975 {
1976 perror (*argv);
1977 continue;
1978 }
1979
1980 if (stat_buf.st_size > input_buf_size)
1981 {
1982 input_buf = realloc (input_buf, stat_buf.st_size + 1);
1983 if (!input_buf)
1984 {
1985 perror ("realloc input buffer");
1986 continue;
1987 }
1988 input_buf_size = stat_buf.st_size;
1989 }
1990 if (fread (input_buf, 1, stat_buf.st_size, in) != stat_buf.st_size)
1991 {
1992 perror ("reading input");
1993 continue;
1994 }
1995 input_buf[stat_buf.st_size] = '\0';
1996 for (d = s = input_buf, s_end = s + stat_buf.st_size - 1; *s; s++)
1997 {
1998 if (*s != '\r' || s >= s_end || s[1] != '\n')
1999 *d++ = *s;
2000 }
2001 stat_buf.st_size = d - input_buf;
2002 input_buf[stat_buf.st_size] = '\0';
2003
2004 /* Done with administrivia, now for some real work... */
2005 signal (SIGABRT, signal_catcher);
2006 signal (SIGINT, signal_catcher);
2007 bidi_init_it (-1, default_dir, &iterator);
2008 if (simulate_display)
2009 {
2010 p = init_display_direction (default_dir,
2011 iterator.level_stack[0].level);
2012 }
2013
2014 memset (display_line, incr == -1 ? '>' : '<', sizeof (display_line) - 1);
2015 display_line[sizeof (display_line) - 1] = '\0';
2016 base_level = iterator.level_stack[0].level;
2017
2018 for (i = 0; i <= stat_buf.st_size; i++)
2019 {
2020 int c;
2021
2022 bidi_get_next_char_visually (&iterator);
2023 c = iterator.ch;
2024
2025 if (c == '\n' || c == BIDI_EOB)
2026 {
2027 if (simulate_display)
2028 {
2029 put_line (p);
2030 /* FIXME: if -R or -L, need to init paragraph here. */
2031 }
2032 if (c == BIDI_EOB)
2033 break;
2034 putc (c, stdout);
2035 }
2036 else if (c >= LRE_CHAR && c <= LRM_CHAR)
2037 {
2038 if (simulate_display)
2039 {
2040 p = continuation_line (p, 5);
2041 if (incr == -1)
2042 {
2043 memcpy (p - 4, bidi_name[c], 5);
2044 p -= 5;
2045 }
2046 else
2047 {
2048 memcpy (p, bidi_name[c], 5);
2049 p += 5;
2050 }
2051 }
2052 else
2053 fputs (bidi_name[c], stdout);
2054 }
2055 else if (c < ' ')
2056 {
2057 if (simulate_display)
2058 {
2059 p = continuation_line (p, 2);
2060 if (incr == -1)
2061 {
2062 *p-- = '@' + c;
2063 *p-- = '^';
2064 }
2065 else
2066 {
2067 *p++ = '^';
2068 *p++ = '@' + c;
2069 }
2070 }
2071 else
2072 printf ("^%c", (c | 0x40));
2073 }
2074 else
2075 {
2076 int c1 = (iterator.type == STRONG_R) ? bidi_mirror_char (c) : c;
2077
2078 if (simulate_display)
2079 {
2080 p = continuation_line (p, 1);
2081 *p = c1;
2082 p += incr;
2083 }
2084 else
2085 putc (c1, stdout);
2086 }
2087
2088 if (iterator.ch == '\n')
2089 {
2090 if (base_level != iterator.level_stack[0].level)
2091 base_level = iterator.level_stack[0].level;
2092 p = init_display_direction (default_dir, base_level);
2093 memset (display_line, incr == -1 ? '>' : '<',
2094 sizeof (display_line) - 1);
2095 }
2096 }
2097 fputs (lots_of_equals, stdout);
2098 fclose (in);
2099 }
2100 return 0;
2101}
2102#endif