aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKenichi Handa2010-03-30 21:46:31 +0900
committerKenichi Handa2010-03-30 21:46:31 +0900
commit42763dda64f1bf29678dd66b9aaadffb7014ea38 (patch)
treed15c91f50a1a8f2798bd0f3640e630dae2bc41b8
parent9ae6e189241f0b917d9b9c299738c79240151c59 (diff)
downloademacs-42763dda64f1bf29678dd66b9aaadffb7014ea38.tar.gz
emacs-42763dda64f1bf29678dd66b9aaadffb7014ea38.zip
Fix Indic composable patterns for the new Unicode specification.
-rw-r--r--lisp/ChangeLog14
-rw-r--r--lisp/language/indian.el248
-rw-r--r--lisp/language/sinhala.el12
3 files changed, 227 insertions, 47 deletions
diff --git a/lisp/ChangeLog b/lisp/ChangeLog
index bbe764b6b4a..a86e387c372 100644
--- a/lisp/ChangeLog
+++ b/lisp/ChangeLog
@@ -1,3 +1,17 @@
12010-03-30 Kenichi Handa <handa@m17n.org>
2
3 * language/sinhala.el (composition-function-table): Fix regexp for
4 the new Unicode specification.
5
6 * language/indian.el (devanagari-composable-pattern)
7 (tamil-composable-pattern, kannada-composable-pattern)
8 (malayalam-composable-pattern): Adjusted for the new Unicode
9 specification.
10 (bengali-composable-pattern, gurmukhi-composable-pattern)
11 (gujarati-composable-pattern, oriya-composable-pattern)
12 (telugu-composable-pattern): New variables to cope with the new
13 Unicode specification. Use them in composition-function-table.
14
12010-03-29 Stefan Monnier <monnier@iro.umontreal.ca> 152010-03-29 Stefan Monnier <monnier@iro.umontreal.ca>
2 16
3 Make tmm-menubar work for the Buffers menu again. 17 Make tmm-menubar work for the Buffers menu again.
diff --git a/lisp/language/indian.el b/lisp/language/indian.el
index d8d7d1a6269..dd5bf2960b1 100644
--- a/lisp/language/indian.el
+++ b/lisp/language/indian.el
@@ -140,28 +140,25 @@ South Indian language Malayalam is supported in this language environment."))
140 140
141(defconst devanagari-composable-pattern 141(defconst devanagari-composable-pattern
142 (let ((table 142 (let ((table
143 '(("V" . "[\u0904-\u0914\u0960-\u0961\u0972]") ; independent vowel 143 '(("a" . "[\u0900-\u0902]") ; vowel modifier (above)
144 ("C" . "[\u0915-\u0939\u0958-\u095F\u097B-\u097C\u097E-\u097F]") ; consonant 144 ("A" . "\u0903") ; vowel modifier (post)
145 ("R" . "\u0930") ; RA 145 ("V" . "[\u0904-\u0914\u0960-\u0961\u0972]") ; independent vowel
146 ("n" . "\u093C") ; NUKTA 146 ("C" . "[\u0915-\u0939\u0958-\u095F\u0979-\u097F]") ; consonant
147 ("H" . "\u094D") ; HALANT 147 ("R" . "\u0930") ; RA
148 ("m" . "\u093F") ; vowel sign (pre) 148 ("n" . "\u093C") ; NUKTA
149 ("u" . "[\u0945-\u0948\u0955]") ; vowel sign (above) 149 ("v" . "[\u093E-\u094C\u094E\u0955\u0962-\u0963]") ; vowel sign
150 ("b" . "[\u0941-\u0944\u0962-\u0963]") ; vowel sign (below) 150 ("H" . "\u094D") ; HALANT
151 ("p" . "[\u093E\u0940\u0949-\u094C]") ; vowel sign (post) 151 ("s" . "[\u0951-\u0952]") ; stress sign
152 ("A" . "[\u0900-\u0902\u0953-\u0954]") ; vowel modifier (above) 152 ("t" . "[\u0953-\u0954]") ; accent
153 ("a" . "\u0903") ; vowel modifier (post) 153 ("N" . "\u200C") ; ZWNJ
154 ("S" . "\u0951") ; stress sign (above) 154 ("J" . "\u200D") ; ZWJ
155 ("s" . "\u0952") ; stress sign (below) 155 ("X" . "[\u0900-\u097F]")))) ; all coverage
156 ("N" . "\u200C") ; ZWNJ
157 ("J" . "\u200D") ; ZWJ
158 ("X" . "[\u0900-\u097F]")))) ; all coverage
159 (indian-compose-regexp 156 (indian-compose-regexp
160 (concat 157 (concat
161 ;; syllables with an independent vowel, or 158 ;; syllables with an independent vowel, or
162 "\\(?:RH\\)?Vn?m?b?u?p?n?A?s?S?a?\\|" 159 "\\(?:RH\\)?Vn?\\(?:J?HR\\)?v*n?a?s?t?A?\\|"
163 ;; consonant-based syllables, or 160 ;; consonant-based syllables, or
164 "\\(?:Cn?J?HJ?\\)*Cn?\\(?:H[NJ]?\\|m?b?u?p?n?A?s?S?a?\\)\\|" 161 "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?a?s?t?A?\\)\\|"
165 ;; special consonant form, or 162 ;; special consonant form, or
166 "JHR\\|" 163 "JHR\\|"
167 ;; any other singleton characters 164 ;; any other singleton characters
@@ -169,43 +166,202 @@ South Indian language Malayalam is supported in this language environment."))
169 table)) 166 table))
170 "Regexp matching a composable sequence of Devanagari characters.") 167 "Regexp matching a composable sequence of Devanagari characters.")
171 168
169(defconst bengali-composable-pattern
170 (let ((table
171 '(("a" . "\u0981") ; SIGN CANDRABINDU
172 ("A" . "[\u0982-\u0983]") ; SIGN ANUSVARA .. VISARGA
173 ("V" . "[\u0985-\u0994\u09E0-\u09E1]") ; independent vowel
174 ("C" . "[\u0995-\u09B9\u09DC-\u09DF\u09F1]") ; consonant
175 ("B" . "[\u09AC\u09AF-\u09B0\u09F0]") ; BA, YA, RA
176 ("R" . "[\u09B0\u09F0]") ; RA
177 ("n" . "\u09BC") ; NUKTA
178 ("v" . "[\u09BE-\u09CC\u09D7\u09E2-\u09E3]") ; vowel sign
179 ("H" . "\u09CD") ; HALANT
180 ("T" . "\u09CE") ; KHANDA TA
181 ("N" . "\u200C") ; ZWNJ
182 ("J" . "\u200D") ; ZWJ
183 ("X" . "[\u0980-\u09FF]")))) ; all coverage
184 (indian-compose-regexp
185 (concat
186 ;; syllables with an independent vowel, or
187 "\\(?:RH\\)?Vn?\\(?:J?HB\\)?v*n?a?A?\\|"
188 ;; consonant-based syllables, or
189 "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*[NJ]?v?a?A?\\)\\|"
190 ;; another syllables with an independent vowel, or
191 "\\(?:RH\\)?T\\|"
192 ;; special consonant form, or
193 "JHB\\|"
194 ;; any other singleton characters
195 "X")
196 table))
197 "Regexp matching a composable sequence of Bengali characters.")
198
199(defconst gurmukhi-composable-pattern
200 (let ((table
201 '(("a" . "[\u0A01-\u0A02]") ; SIGN ADAK BINDI .. BINDI
202 ("A" . "\u0A03]") ; SIGN VISARGA
203 ("V" . "[\u0A05-\u0A14]") ; independent vowel
204 ("C" . "[\u0A15-\u0A39\u0A59-\u0A5E]") ; consonant
205 ("Y" . "[\u0A2F\u0A30\u0A35\u0A39]") ; YA, RA, VA, HA
206 ("n" . "\u0A3C") ; NUKTA
207 ("v" . "[\u0A3E-\u0A4C]") ; vowel sign
208 ("H" . "\u0A4D") ; VIRAMA
209 ("a" . "\u0A70") ; TIPPI
210 ("N" . "\u200C") ; ZWNJ
211 ("J" . "\u200D") ; ZWJ
212 ("X" . "[\u0A00-\u0A7F]")))) ; all coverage
213 (indian-compose-regexp
214 (concat
215 ;; consonant-based syllables, or
216 "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?a?A?\\)\\|"
217 ;; syllables with an independent vowel, or
218 "Vn?\\(?:J?HY\\)?v*n?a?A?\\|"
219 ;; special consonant form, or
220 "JHY\\|"
221 ;; any other singleton characters
222 "X")
223 table))
224 "Regexp matching a composable sequence of Gurmukhi characters.")
225
226(defconst gujarati-composable-pattern
227 (let ((table
228 '(("a" . "[\u0A81-\u0A82]") ; SIGN CANDRABINDU .. ANUSVARA
229 ("A" . "\u0A83]") ; SIGN VISARGA
230 ("V" . "[\u0A85-\u0A94\u0AE0-\u0AE1]") ; independent vowel
231 ("C" . "[\u0A95-\u0AB9]") ; consonant
232 ("R" . "\u0AB0") ; RA
233 ("n" . "\u0ABC") ; NUKTA
234 ("v" . "[\u0ABE-\u0ACC\u0AE2-\u0AE3]") ; vowel sign
235 ("H" . "\u0ACD") ; VIRAMA
236 ("N" . "\u200C") ; ZWNJ
237 ("J" . "\u200D") ; ZWJ
238 ("X" . "[\u0A80-\u0AFF]")))) ; all coverage
239 (indian-compose-regexp
240 (concat
241 ;; syllables with an independent vowel, or
242 "\\(?:RH\\)?Vn?\\(?:J?HR\\)?v*n?a?A?\\|"
243 ;; consonant-based syllables, or
244 "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?|v*n?a?A?\\)\\|"
245 ;; special consonant form, or
246 "JHR\\|"
247 ;; any other singleton characters
248 "X")
249 table))
250 "Regexp matching a composable sequence of Gujarati characters.")
251
252(defconst oriya-composable-pattern
253 (let ((table
254 '(("a" . "\u0B01") ; SIGN CANDRABINDU
255 ("A" . "[\u0B02-\u0B03]") ; SIGN ANUSVARA .. VISARGA
256 ("V" . "[\u0B05-\u0B14\u0B60-\u0B61]") ; independent vowel
257 ("C" . "[\u0B15-\u0B39\u0B5C-\u0B5D\u0B71]") ; consonant
258 ("B" . "[\u0B15-\u0B17\u0B1B-\u0B1D\u0B1F-\u0B21\u0B23-\u0B24\u0B27-\u0B30\u0B32-\u0B35\u0B38-\u0B39]") ; consonant with below form
259 ("n" . "\u0B3C") ; NUKTA
260 ("v" . "[\u0B3E-\u0B44\u0B56-\u0B57\u0B62-\u0B63]") ; vowel sign
261 ("H" . "\u0B4D") ; VIRAMA
262 ("N" . "\u200C") ; ZWNJ
263 ("J" . "\u200D") ; ZWJ
264 ("X" . "[\u0B00-\u0B7F]")))) ; all coverage
265 (indian-compose-regexp
266 (concat
267 ;; syllables with an independent vowel, or
268 "\\(?:RH\\)?Vn?\\(?:J?HB\\)?v*n?a?A?\\|"
269 ;; consonant-based syllables, or
270 "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?|v*n?a?A?\\)\\|"
271 ;; special consonant form, or
272 "JHB\\|"
273 ;; any other singleton characters
274 "X")
275 table))
276 "Regexp matching a composable sequence of Oriya characters.")
277
172(defconst tamil-composable-pattern 278(defconst tamil-composable-pattern
173 (concat 279 (let ((table
174 "\\([அ-ஔ]\\)\\|" 280 '(("a" . "\u0B82") ; SIGN ANUSVARA
175 "[ஂஃ]\\|" ;; vowel modifier considered independent 281 ("V" . "[\u0B85-\u0B94]") ; independent vowel
176 "\\(\\(?:\\(?:க்ஷ\\)\\|[க-ஹ]\\)[்ா-ௌ]?\\)\\|" 282 ("C" . "[\u0B95-\u0BB9]") ; consonant
177 "\\(ஷ்ரீ\\)") 283 ("v" . "[\u0BBE-\u0BC8\u0BD7]") ; vowel sign
284 ("H" . "\u0BCD") ; VIRAMA
285 ("N" . "\u200C") ; ZWNJ
286 ("J" . "\u200D") ; ZWJ
287 ("X" . "[\u0B80-\u0BFF]")))) ; all coverage
288 (indian-compose-regexp
289 (concat
290 ;; consonant-based syllables, or
291 "C\\(?:J?HJ?C\\)*\\(?:H[NJ]?|v*a?\\)\\|"
292 ;; syllables with an independent vowel, or
293 "Vv*a?\\|"
294 ;; any other singleton characters
295 "X")
296 table))
178 "Regexp matching a composable sequence of Tamil characters.") 297 "Regexp matching a composable sequence of Tamil characters.")
179 298
299(defconst telugu-composable-pattern
300 (let ((table
301 '(("a" . "[\u0C01-\u0C03]") ; SIGN CANDRABINDU .. VISARGA
302 ("V" . "[\u0C05-\u0C14\u0C60-\u0C61]") ; independent vowel
303 ("C" . "[\u0C15-\u0C39\u0C58-\u0C59]") ; consonant
304 ("v" . "[\u0C3E-\u0C4C\u0C55-\u0C56\u0C62-\u0C63]") ; vowel sign
305 ("H" . "\u0BCD") ; VIRAMA
306 ("N" . "\u200C") ; ZWNJ
307 ("J" . "\u200D") ; ZWJ
308 ("X" . "[\u0C00-\u0C7F]")))) ; all coverage
309 (indian-compose-regexp
310 (concat
311 ;; consonant-based syllables, or
312 "C\\(?:J?HJ?C\\)*\\(?:H[NJ]?|v*a?\\)\\|"
313 ;; syllables with an independent vowel, or
314 "V\\(?:J?HC\\)?v*a?\\|"
315 ;; special consonant form, or
316 "JHC\\|"
317 ;; any other singleton characters
318 "X")
319 table))
320 "Regexp matching a composable sequence of Telugu characters.")
321
180(defconst kannada-composable-pattern 322(defconst kannada-composable-pattern
181 (concat 323 (let ((table
182 "\\([ಂ-ಔೠಌ]\\)\\|[ಃ]" 324 '(("A" . "[\u0C82-\u0C83]") ; SIGN ANUSVARA .. VISARGA
183 "\\|\\(" 325 ("V" . "[\u0C85-\u0C94\u0CE0-\u0CE1]") ; independent vowel
184 "\\(?:\\(?:[ಕ-ಹ]್\\)?\\(?:[ಕ-ಹ]್\\)?\\(?:[ಕ-ಹ]್\\)?[ಕ-ಹ]್\\)?" 326 ("C" . "[\u0C95-\u0CB9\u0CDE]") ; consonant
185 "[ಕ-ಹ]\\(?:್\\|[ಾ-್ೕೃ]?\\)?" 327 ("B" . "\u0CB0") ; RA
186 "\\)") 328 ("n" . "\u0CBC") ; NUKTA
329 ("v" . "[\u0CBE-\u0CCC\u0CD5-\u0CD6\u0CE2-\u0CE3]") ; vowel sign
330 ("H" . "\u0CCD") ; VIRAMA
331 ("N" . "\u200C") ; ZWNJ
332 ("J" . "\u200D") ; ZWJ
333 ("X" . "[\u0C80-\u0CFF]")))) ; all coverage
334 (indian-compose-regexp
335 (concat
336 ;; syllables with an independent vowel, or
337 "\\(?:RH\\)?Vn?\\(?:J?HC\\)?v?A?\\|"
338 ;; consonant-based syllables, or
339 "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?|v*n?A?\\)\\|"
340 ;; special consonant form, or
341 "JHB\\|"
342 ;; any other singleton characters
343 "X")
344 table))
187 "Regexp matching a composable sequence of Kannada characters.") 345 "Regexp matching a composable sequence of Kannada characters.")
188 346
189(defconst malayalam-composable-pattern 347(defconst malayalam-composable-pattern
190 (let ((table 348 (let ((table
191 '(("V" . "[\u0D05-\u0D14\u0D60-\u0D61]") ; independent vowel 349 '(("A" . "[\u0D02-\u0D03]") ; SIGN ANUSVARA .. VISARGA
350 ("V" . "[\u0D05-\u0D14\u0D60-\u0D61]") ; independent vowel
192 ("C" . "[\u0D15-\u0D39]") ; consonant 351 ("C" . "[\u0D15-\u0D39]") ; consonant
193 ("m" . "[\u0D46-\u0D48\u0D4A-\u0D4C]") ; prebase matra 352 ("Y" . "[\u0D2F-\u0D30\u0D32\u0D35]") ; YA, RA, LA, VA
194 ("p" . "[\u0D3E-\u0D44\u0D57]") ; postbase matra 353 ("v" . "[\u0D3E-\u0D48\u0D57\u0D62-\u0D63]") ; postbase matra
195 ("b" . "[\u0D62-\u0D63]") ; belowbase matra
196 ("a" . "[\u0D02-\u0D03]") ; abovebase sign
197 ("H" . "\u0D4D") ; virama sign
198 ("N" . "\u200C") ; ZWNJ 354 ("N" . "\u200C") ; ZWNJ
199 ("J" . "\u200D") ; ZWJ 355 ("J" . "\u200D") ; ZWJ
200 ("X" . "[\u0D00-\u0D7F]")))) ; all coverage 356 ("X" . "[\u0D00-\u0D7F]")))) ; all coverage
201 (indian-compose-regexp 357 (indian-compose-regexp
202 (concat 358 (concat
203 ;; syllables with an independent vowel, or
204 "V\\(?:J?HC\\)?m?b?p?a?\\|"
205 ;; consonant-based syllables, or 359 ;; consonant-based syllables, or
206 "\\(?:CJ?HJ?\\)\\{0,4\\}C\\(?:H[NJ]?\\|m?b?p?a?\\)\\|" 360 "\\(?:CJ?HJ?C\\)*\\(?:H[NJ]?\\|v?A?\\)\\|"
361 ;; syllables with an independent vowel, or
362 "V\\(?:J?HY\\)?v*?A?\\|"
207 ;; special consonant form, or 363 ;; special consonant form, or
208 "JHC\\|" 364 "JHY\\|"
209 ;; any other singleton characters 365 ;; any other singleton characters
210 "X") 366 "X")
211 table)) 367 table))
@@ -213,13 +369,13 @@ South Indian language Malayalam is supported in this language environment."))
213 369
214(let ((script-regexp-alist 370(let ((script-regexp-alist
215 `((devanagari . ,devanagari-composable-pattern) 371 `((devanagari . ,devanagari-composable-pattern)
216 (bengali . "[\x980-\x9FF\x200C\x200D]+") 372 (bengali . ,bengali-composable-pattern)
217 (gurmukhi . "[\xA00-\xA7F\x200C\x200D]+") 373 (gurmukhi . ,gurmukhi-composable-pattern)
218 (gujarati . "[\xA80-\xAFF\x200C\x200D]+") 374 (gujarati . ,gujarati-composable-pattern)
219 (oriya . "[\xB00-\xB7F\x200C\x200D]+") 375 (oriya . ,oriya-composable-pattern)
220 (tamil . "[\xB80-\xBFF\x200C\x200D]+") 376 (tamil . ,tamil-composable-pattern)
221 (telugu . "[\xC00-\xC7F\x200C\x200D]+") 377 (telugu . ,telugu-composable-pattern)
222 (kannada . "[\xC80-\xCFF\x200C\x200D]+") 378 (kannada . ,kannada-composable-pattern)
223 (malayalam . ,malayalam-composable-pattern)))) 379 (malayalam . ,malayalam-composable-pattern))))
224 (map-char-table 380 (map-char-table
225 #'(lambda (key val) 381 #'(lambda (key val)
diff --git a/lisp/language/sinhala.el b/lisp/language/sinhala.el
index c726c3fb6bf..37a7bc6b465 100644
--- a/lisp/language/sinhala.el
+++ b/lisp/language/sinhala.el
@@ -33,7 +33,17 @@
33(set-char-table-range 33(set-char-table-range
34 composition-function-table 34 composition-function-table
35 '(#xD80 . #xDFF) 35 '(#xD80 . #xDFF)
36 (list (vector "[\xD80-\xDFF\x200C\x200D]+" 0 'font-shape-gstring))) 36 (list (vector
37 ;; C:consonant, H:HALANT, J:ZWJ, v:vowel sign,
38 ;; V:independent vowel, a:ANUSVARA .. VISARGA
39 (concat
40 ;; C(HJC)*v*H?a?, or
41 "[\u0D9A-\u0DC6]\\(?:\u0DCA\u200D[\u0D9A-\u0DC6]\\)*[\u0DCF-\u0DDF\u0DF2-\u0DF3]*\u0DCA?[\u0D82-\u0D83]?\\|"
42 ;; Va?, or
43 "[\u0D85-\u0D96][\u0D82-\u0D83]?\\|"
44 ;; any other singleton characters
45 "[\u0D80-\u0DFF]")
46 0 'font-shape-gstring)))
37 47
38;; arch-tag: 87b9ad3b-5090-422f-b942-eb85b9d52e7c 48;; arch-tag: 87b9ad3b-5090-422f-b942-eb85b9d52e7c
39;; sinhala.el ends here 49;; sinhala.el ends here