aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKenichi Handa2003-02-11 01:38:31 +0000
committerKenichi Handa2003-02-11 01:38:31 +0000
commit585eb076bbbb1108e1d33e172ba9047688340ac5 (patch)
tree92ba411256a8a96a3f02cd300d2cdb22d0d62622
parent764e500228254e1d188a4bb89178b0787f53cfbe (diff)
downloademacs-585eb076bbbb1108e1d33e172ba9047688340ac5.tar.gz
emacs-585eb076bbbb1108e1d33e172ba9047688340ac5.zip
New file.
-rw-r--r--lisp/language/malayalam.el46
-rw-r--r--lisp/language/mlm-util.el406
-rw-r--r--lisp/language/tamil.el43
-rw-r--r--lisp/language/tml-util.el367
4 files changed, 862 insertions, 0 deletions
diff --git a/lisp/language/malayalam.el b/lisp/language/malayalam.el
new file mode 100644
index 00000000000..3a7c19892c7
--- /dev/null
+++ b/lisp/language/malayalam.el
@@ -0,0 +1,46 @@
1;;; malayalam.el --- Support for Malayalam -*- coding: iso-2022-7bit; no-byte-compile: t -*-
2
3;; Copyright (C) 2003 Free Software Foundation, Inc.
4
5;; Maintainer: KAWABATA, Taichi <kawabata@m17n.org>
6;; Keywords: multilingual, Indian, Malayalam
7
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation; either version 2, or (at your option)
13;; any later version.
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
21;; along with GNU Emacs; see the file COPYING. If not, write to the
22;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23;; Boston, MA 02111-1307, USA.
24
25;;; Commentary:
26
27;; This file defines language-info of Malayalam script.
28
29;;; Code:
30
31(set-language-info-alist
32 "Malayalam" '((charset mule-unicode-0100-24ff indian-glyph )
33 ;; indian-2-column
34 ;; comment out later
35 ;; )
36 (coding-system utf-8)
37 (coding-priority utf-8)
38 (input-method . "malayalam-itrans")
39 (features mlm-util)
40 (documentation . "\
41South Indian language Malayalam is supported in this language environment."))
42 '("Indian"))
43
44(provide 'malayalam)
45
46;;; malayalam.el ends here
diff --git a/lisp/language/mlm-util.el b/lisp/language/mlm-util.el
new file mode 100644
index 00000000000..86afaca3ee7
--- /dev/null
+++ b/lisp/language/mlm-util.el
@@ -0,0 +1,406 @@
1;;; mlm-util.el --- support for composing malayalam characters -*-coding: iso-2022-7bit;-*-
2
3;; Copyright (C) 2003 Free Software Foundation, Inc.
4
5;; Maintainer: KAWABATA, Taichi <kawabata@m17n.org>
6;; Keywords: multilingual, Malayalam
7
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation; either version 2, or (at your option)
13;; any later version.
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
21;; along with GNU Emacs; see the file COPYING. If not, write to the
22;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23;; Boston, MA 02111-1307, USA.
24
25;; Created: Feb. 11. 2003
26
27;;; Commentary:
28
29;; This file provides character(Unicode) to glyph(CDAC) conversion and
30;; composition of Malayalam script characters.
31
32;;; Code:
33
34;; Malayalam Composable Pattern
35;; C .. Consonants
36;; V .. Vowel
37;; H .. Halant
38;; M .. Matra
39;; V .. Vowel
40;; A .. Anuswar
41;; D .. Chandrabindu
42;; (N .. Zerowidth Non Joiner)
43;; (J .. Zerowidth Joiner. )
44;; 1. vowel
45;; V(A|visargam)?
46;; 2. syllable : maximum of 5 consecutive consonants. (e.g. kartsnya)
47;; ((CH)?(CH)?(CH)?CH)?C(H|M?(A|D)?)?
48
49(defconst malayalam-consonant
50 "[$,1@5(B-$,1@Y(B]")
51
52(defconst malayalam-composable-pattern
53 (concat
54 "\\([$,1@%(B-$,1@4(B][$,1@"(B]?\\)\\|$,1@#(B"
55 "\\|\\("
56 "\\(?:\\(?:[$,1@5(B-$,1@Y(B]$,1@m(B\\)?\\(?:[$,1@5(B-$,1@Y(B]$,1@m(B\\)?\\(?:[$,1@5(B-$,1@Y(B]$,1@m(B\\)?[$,1@5(B-$,1@Y(B]$,1@m(B\\)?"
57 "[$,1@5(B-$,1@Y(B]\\(?:$,1@m(B\\|[$,1@^(B-$,1@c@f@g@h@j@j@k@l(B]?[$,1@"@m(B]?\\)?"
58 "\\)")
59 "Regexp matching a composable sequence of Malayalam characters.")
60
61;;;###autoload
62(defun malayalam-compose-region (from to)
63 (interactive "r")
64 (save-excursion
65 (save-restriction
66 (narrow-to-region from to)
67 (goto-char (point-min))
68 (while (re-search-forward malayalam-composable-pattern nil t)
69 (malayalam-compose-syllable-region (match-beginning 0)
70 (match-end 0))))))
71(defun malayalam-compose-string (string)
72 (with-temp-buffer
73 (insert (decompose-string string))
74 (malayalam-compose-region (point-min) (point-max))
75 (buffer-string)))
76
77(defun malayalam-post-read-conversion (len)
78 (save-excursion
79 (save-restriction
80 (let ((buffer-modified-p (buffer-modified-p)))
81 (narrow-to-region (point) (+ (point) len))
82 (malayalam-compose-region (point-min) (point-max))
83 (set-buffer-modified-p buffer-modified-p)
84 (- (point-max) (point-min))))))
85
86(defun malayalam-range (from to)
87 "Make the list of the integers of range FROM to TO."
88 (let (result)
89 (while (<= from to) (setq result (cons to result) to (1- to))) result))
90
91(defun malayalam-regexp-of-hashtbl-keys (hashtbl)
92 "Return a regular expression that matches all keys in hashtable HASHTBL."
93 (let ((max-specpdl-size 1000))
94 (regexp-opt
95 (sort
96 (let (dummy)
97 (maphash (function (lambda (key val) (setq dummy (cons key dummy)))) hashtbl)
98 dummy)
99 (function (lambda (x y) (> (length x) (length y))))))))
100
101
102;;;###autoload
103(defun malayalam-composition-function (from to pattern &optional string)
104 "Compose Malayalam characters in REGION, or STRING if specified.
105Assume that the REGION or STRING must fully match the composable
106PATTERN regexp."
107 (if string (malayalam-compose-syllable-string string)
108 (malayalam-compose-syllable-region from to))
109 (- to from))
110
111;; Register a function to compose Malayalam characters.
112(mapc
113 (function (lambda (ucs)
114 (aset composition-function-table (decode-char 'ucs ucs)
115 (list (cons malayalam-composable-pattern
116 'malayalam-composition-function)))))
117 (nconc '(#x0d02 #x0d03) (malayalam-range #x0d05 #x0d39)))
118
119;; Notes on conversion steps.
120
121;; 1. chars to glyphs
122;;
123;; Simple replacement of characters to glyphs is done.
124
125;; 2. glyphs reordering.
126;;
127;; Two special reordering rule takes place.
128;; a. following "$,46[(B" goes to the front.
129;; b. following "$,46S6S(B", "$,46S(B" or "$,46T(B" goes to the front.
130;; This reordering occurs only to the last cluster of consonants.
131;; Preceding consonants with halant characters are not affected.
132
133;; 3. Composition.
134;;
135;; left modifiers will be attached at the left.
136;; others will be attached right.
137
138(defvar mlm-char-glyph
139 '(;; various signs
140 ("$,1@"(B" . "$,46W(B")
141 ("$,1@#(B" . "$,46X(B")
142 ;; Independent Vowels
143 ("$,1@%(B" . "$,46!(B")
144 ("$,1@&(B" . "$,46"(B")
145 ("$,1@'(B" . "$,46#(B")
146 ("$,1@((B" . "$,46#6U(B")
147 ("$,1@)(B" . "$,46$(B")
148 ("$,1@*(B" . "$,46$6U(B")
149 ("$,1@+(B" . "$,46%(B")
150 ("$,1@,(B" . "nil") ;; not in present use, not supported.
151 ("$,1@.(B" . "$,46&(B")
152 ("$,1@/(B" . "$,46'(B")
153 ("$,1@0(B" . "$,46S6&(B")
154 ("$,1@2(B" . "$,46((B")
155 ("$,1@3(B" . "$,46(6M(B")
156 ("$,1@4(B" . "$,46(6U(B")
157 ;; Consonants
158 ("$,1@5(B" . "$,46)(B")
159 ("$,1@5@m@5(B" . "$,47!(B")
160 ("$,1@5@m@7(B" . "$,47"(B")
161 ("$,1@5@m@W(B" . "$,47#(B")
162 ("$,1@5@m@?(B" . "$,47N(B") ;; ?
163 ("$,1@5@m@D(B" . "$,47`(B")
164 ("$,1@5@m@F(B" . "$,47f(B") ;; ? ;; vowel u?
165 ("$,1@5@m@5@m@F(B" . "$,47g(B") ;; ? ;; vowel u?
166
167 ("$,1@6(B" . "$,46*(B")
168
169 ("$,1@7(B" . "$,46+(B")
170 ("$,1@7@m@7(B" . "$,47$(B")
171 ("$,1@7@m@R(B" . "$,47%(B")
172 ("$,1@7@m@N(B" . "$,47\(B")
173 ("$,1@7@m@H(B" . "$,47a(B")
174
175 ("$,1@8(B" . "$,46,(B")
176
177 ("$,1@9(B" . "$,46-(B")
178 ("$,1@9@m@5(B" . "$,47&(B")
179 ("$,1@9@m@9(B" . "$,47'(B")
180 ("$,1@9@m@5@m@F(B" . "$,47h(B") ;; ? ;; vowel u?
181
182 ("$,1@:(B" . "$,46.(B")
183 ("$,1@:@m@:(B" . "$,47((B") ;; duplicate
184 ("$,1@:@m@;(B" . "$,47Q(B") ;; ?
185
186 ("$,1@;(B" . "$,46/(B")
187
188 ("$,1@<(B" . "$,460(B")
189 ("$,1@<@m@<(B" . "$,47V(B")
190 ("$,1@<@m@>(B" . "$,47Z(B")
191
192 ("$,1@=(B" . "$,461(B")
193
194 ("$,1@>(B" . "$,462(B")
195 ("$,1@>@m@:(B" . "$,47)(B")
196 ("$,1@>@m@>(B" . "$,47*(B")
197
198 ("$,1@?(B" . "$,463(B")
199 ("$,1@?@m@?(B" . "$,47+(B")
200
201 ("$,1@@(B" . "$,464(B")
202 ("$,1@A(B" . "$,465(B")
203 ("$,1@A@m@A(B" . "$,47M(B")
204 ("$,1@B(B" . "$,466(B")
205
206 ("$,1@C(B" . "$,467(B")
207 ("$,1@C@m(B" . "$,47,(B") ;; half consonant
208 ("$,1@C@m@?(B" . "$,47-(B")
209 ("$,1@C@m@C(B" . "$,47.(B")
210 ("$,1@C@m@N(B" . "$,47W(B")
211 ("$,1@C@m@G(B" . "$,47^(B") ;; ?
212 ("$,1@C@m@V(B" . "$,47i(B") ;; ?
213
214 ("$,1@D(B" . "$,468(B")
215 ("$,1@D@m@D(B" . "$,47/(B")
216 ("$,1@D@m@E(B" . "$,470(B")
217 ("$,1@D@m@X(B" . "$,47U(B")
218 ("$,1@D@m@M(B" . "$,47[(B")
219 ("$,1@D@m@N(B" . "$,47_(B")
220 ("$,1@D@m@F(B" . "$,47j(B") ;; ? ;; vowel u ?
221
222 ("$,1@E(B" . "$,469(B")
223
224 ("$,1@F(B" . "$,46:(B")
225 ("$,1@F@m@F(B" . "$,471(B")
226 ("$,1@F@m@G(B" . "$,472(B")
227
228 ("$,1@G(B" . "$,46;(B")
229
230 ("$,1@H(B" . "$,46<(B")
231 ("$,1@H@m(B" . "$,473(B") ;; half consonant
232 ("$,1@H@m@D(B" . "$,474(B")
233 ("$,1@H@m@F(B" . "$,475(B")
234 ("$,1@H@m@H(B" . "$,476(B")
235 ("$,1@H@m@N(B" . "$,477(B")
236 ("$,1@H@m@G(B" . "$,47T(B")
237 ("$,1@H@m@E(B" . "$,47Y(B")
238 ("$,1@H@m@Q(B" . "$,47b(B")
239 ("$,1@H@m@V(B" . "$,47k(B") ;; ?
240 ("$,1@H@m@H@m@V(B" . "$,47l(B") ;; ?
241
242 ("$,1@J(B" . "$,46=(B")
243 ("$,1@J@m@J(B" . "$,478(B") ;; duplicate
244 ("$,1@J@m@R(B" . "$,479(B") ;; lakar
245
246 ("$,1@K(B" . "$,46>(B")
247
248 ("$,1@L(B" . "$,46?(B")
249 ("$,1@L@m@L(B" . "$,47:(B") ;; duplicate
250 ("$,1@L@m@R(B" . "$,47;(B") ;; lakar
251 ("$,1@L@m@G(B" . "$,47O(B") ;; ?
252 ("$,1@L@m@F(B" . "$,47P(B") ;; ?
253
254 ("$,1@M(B" . "$,46@(B")
255
256 ("$,1@N(B" . "$,46A(B")
257 ("$,1@N@m@J(B" . "$,47<(B")
258 ("$,1@N@m@N(B" . "$,47=(B")
259 ("$,1@N@m@R(B" . "$,47>(B") ;; lakar
260
261 ("$,1@O(B" . "$,46B(B")
262 ("$,1@O@m@O(B" . "$,47?(B") ;; duplicate
263 ("$,1@O@m@5@m@5(B" . "$,47m(B") ;; ?
264
265 ("$,1@P(B" . "$,46C(B")
266 ("$,1@P@m(B" . "$,47@(B")
267
268 ("$,1@Q(B" . "$,46D(B")
269 ("$,1@Q@m(B" . "$,47@(B") ;; same glyph as "$,1@P@m(B"
270 ;;("$,1@Q@m@Q(B" . "$,47A(B")
271 ("$,1@Q@m@Q(B" . "$,47d(B")
272
273 ("$,1@R(B" . "$,46E(B")
274 ("$,1@R@m(B" . "$,47B(B")
275 ("$,1@R@m@R(B" . "$,47C(B") ;; lakar
276 ("$,1@R@m@J(B" . "$,47e(B") ;; ?
277
278 ("$,1@S(B" . "$,46F(B")
279 ("$,1@S@m(B" . "$,47D(B")
280 ("$,1@S@m@S(B" . "$,47E(B")
281
282 ("$,1@T(B" . "$,46G(B")
283 ("$,1@T@m(B" . "$,47D(B")
284
285 ("$,1@U(B" . "$,46H(B")
286 ("$,1@U@m@U(B" . "$,47F(B")
287
288 ("$,1@V(B" . "$,46I(B")
289 ("$,1@V@m@R(B" . "$,47G(B")
290 ("$,1@V@m@V(B" . "$,47H(B")
291 ("$,1@V@m@:(B" . "$,47](B")
292
293 ("$,1@W(B" . "$,46J(B")
294 ("$,1@W@m@?(B" . "$,47c(B")
295
296 ("$,1@X(B" . "$,46K(B")
297 ("$,1@X@m@R(B" . "$,47I(B")
298 ("$,1@X@m@X(B" . "$,47J(B")
299 ("$,1@X@m@Q@m@Q(B" . "$,47L(B")
300 ("$,1@X@m@E(B" . "$,47X(B")
301
302 ("$,1@Y(B" . "$,46L(B")
303 ("$,1@Y@m@R(B" . "$,47K(B")
304 ("$,1@Y@m@N(B" . "$,47R(B")
305 ("$,1@Y@m@H(B" . "$,47S(B")
306
307 ;; Dependent vowel signs
308 ("$,1@^(B" . "$,46M(B")
309 ("$,1@_(B" . "$,46N(B")
310 ("$,1@`(B" . "$,46O(B")
311 ("$,1@a(B" . "$,46P(B")
312 ("$,1@b(B" . "$,46Q(B")
313 ("$,1@c(B" . "$,46R(B")
314 ("$,1@f(B" . "$,46S(B")
315 ("$,1@g(B" . "$,46T(B")
316 ("$,1@h(B" . "$,46S6S(B")
317 ("$,1@j(B" . "$,46S6M(B")
318 ("$,1@k(B" . "$,46T6M(B")
319 ("$,1@l(B" . "$,46U(B")
320 ;; Various signs
321 ("$,1@m(B" . "$,46V(B")
322 ("$,1@m@O(B" . "$,46Y(B") ;; yakar
323 ("$,1@m@O@a(B" . "$,46\(B") ;; yakar + u ;; ?
324 ("$,1@m@O@b(B" . "$,46](B") ;; yakar + uu ;; ?
325 ("$,1@m@U(B" . "$,46Z(B") ;; vakar modifier
326 ("$,1@m@P(B" . "$,46[(B") ;; rakar modifier is the same to rra modifier.
327 ("$,1@m@Q(B" . "$,46[(B") ;; rrakar modifier
328 ("$,1@m@m(B" . "$,46V(B") ;; double omission sign to stop forming half consonant.
329 ("$,1@w(B" . "$,46U(B") ;; not in present use, already at 0D4C.
330 ))
331
332(defvar mlm-char-glyph-hash
333 (let* ((hash (make-hash-table :test 'equal)))
334 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
335 mlm-char-glyph)
336 hash))
337
338(defvar mlm-char-glyph-regexp
339 (malayalam-regexp-of-hashtbl-keys mlm-char-glyph-hash))
340
341;; Malayalam languages needed to be reordered in a complex mannar.
342
343(defvar mlm-consonants
344 (concat
345 "$,46)6*6+6,6-6.6/606162636465666768696:6;6<6=6>6?6@6A6B6C6D6E6F6G6H6I6J6K6L(B"
346 "$,47!7"7#7$7%7&7'7(7)7*7+7,7-7.7/707172737475767778797:7;7<7=7>7?7@7A7B7C7D7E7F7G7H7I7J7K7L7M7N7O7P7Q7R7S7T7U7V7W7X7Y7Z7[7\7]7^7_7`7a7b7c7d7e(B"
347 ))
348
349(defvar mlm-consonants-regexp
350 (concat "\\($,46[(B?[" mlm-consonants "][$,46Y6Z(B]?\\)"))
351
352(defvar mlm-glyph-reorder-key-glyphs "[$,46[6S6T(B]")
353
354(defvar mlm-glyph-reordering-regexp-list
355 `((,(concat "\\([" mlm-consonants "][$,46Y6Z(B]?\\)$,46[(B") . "$,46[(B\\1")
356 (,(concat mlm-consonants-regexp "$,46S6S(B") . "$,46S6S(B\\1")
357 (,(concat mlm-consonants-regexp "$,46S(B") . "$,46S(B\\1")
358 (,(concat mlm-consonants-regexp "$,46T(B") . "$,46T(B\\1")))
359
360(defun malayalam-compose-syllable-string (string)
361 (with-temp-buffer
362 (insert (decompose-string string))
363 (malayalam-compose-syllable-region (point-min) (point-max))
364 (buffer-string)))
365
366(defun malayalam-compose-syllable-region (from to)
367 "Compose malayalam syllable in region FROM to TO."
368 (let (glyph-str
369 match-str
370 glyph-reorder-regexps
371 glyph-reorder-replace
372 glyph-reorder-regexp)
373 (save-excursion
374 (save-restriction
375 (narrow-to-region from to)
376 (goto-char (point-min))
377 ;; char-glyph-conversion
378 (while (re-search-forward mlm-char-glyph-regexp nil t)
379 (setq match-str (match-string 0))
380 (setq glyph-str
381 (concat glyph-str (gethash match-str mlm-char-glyph-hash))))
382 (when (string-match mlm-glyph-reorder-key-glyphs glyph-str)
383 ;; glyph reordering
384 (setq glyph-reorder-regexps mlm-glyph-reordering-regexp-list)
385 (while glyph-reorder-regexps
386 (setq glyph-reorder-regexp (caar glyph-reorder-regexps))
387 (setq glyph-reorder-replace (cdar glyph-reorder-regexps))
388 (setq glyph-reorder-regexps (cdr glyph-reorder-regexps))
389 (if (string-match glyph-reorder-regexp glyph-str)
390 (setq glyph-str
391 (replace-match glyph-reorder-replace nil nil
392 glyph-str)))))
393 ;; concatenate and attach reference-points.
394 (setq glyph-str
395 (cdr
396 (apply
397 'nconc
398 (mapcar
399 (function
400 (lambda (x) (list '(5 . 3) x))) ;; default ref. point.
401 glyph-str))))
402 (compose-region from to glyph-str)))))
403
404(provide 'mlm-util)
405
406;;; devan-util.el ends here
diff --git a/lisp/language/tamil.el b/lisp/language/tamil.el
new file mode 100644
index 00000000000..dfa1cdb1d9d
--- /dev/null
+++ b/lisp/language/tamil.el
@@ -0,0 +1,43 @@
1;;; tamil.el --- Support for Tamil -*- coding: iso-2022-7bit; no-byte-compile: t -*-
2
3;; Copyright (C) 2003 Free Software Foundation, Inc.
4
5;; Maintainer: KAWABATA, Taichi <batta@beige.ocn.ne.jp>
6;; Keywords: multilingual, Indian, Tamil
7
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation; either version 2, or (at your option)
13;; any later version.
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
21;; along with GNU Emacs; see the file COPYING. If not, write to the
22;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23;; Boston, MA 02111-1307, USA.
24
25;;; Commentary:
26
27;; This file defines language-info of Tamil script.
28
29;;; Code:
30
31(set-language-info-alist
32 "Tamil" '((charset mule-unicode-0100-24ff indian-glyph )
33 (coding-system utf-8)
34 (coding-priority utf-8)
35 (input-method . "tamil-itrans")
36 (features tml-util)
37 (documentation . "\
38South Indian Language Tamikl supported in this language environment."))
39 '("Indian"))
40
41(provide 'tamil)
42
43;;; tamil.el ends here
diff --git a/lisp/language/tml-util.el b/lisp/language/tml-util.el
new file mode 100644
index 00000000000..bb8c8f19e04
--- /dev/null
+++ b/lisp/language/tml-util.el
@@ -0,0 +1,367 @@
1;;; tml-util.el --- support for composing tamil characters -*-coding: iso-2022-7bit;-*-
2
3;; Copyright (C) 2001 Free Software Foundation, Inc.
4
5;; Maintainer: KAWABATA, Taichi <kawabata@m17n.org>
6;; Keywords: multilingual, Indian, Tamil
7
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation; either version 2, or (at your option)
13;; any later version.
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
21;; along with GNU Emacs; see the file COPYING. If not, write to the
22;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23;; Boston, MA 02111-1307, USA.
24
25;; Created: Nov. 08. 2002
26
27;;; Commentary:
28
29;; This file provides character(Unicode) to glyph(CDAC) conversion and
30;; composition of Tamil script characters.
31
32;;; Code:
33
34;; Tamil Composable Pattern
35;; C .. Consonants
36;; V .. Vowel
37;; H .. Pulli
38;; M .. Matra
39;; V .. Vowel
40;; A .. Anuswar
41;; D .. Chandrabindu
42;; 1. vowel
43;; V
44;; 2. syllable : only ligature-formed pattern forms composition.
45;; (CkHCs|C)(H|M)?
46;; 3. sri special
47;; (CsHCrVi)
48
49;; oririnal
50;; ((CH)?(CH)?(CH)?CH)?C(H|M?(A|D)?)?
51
52(defconst tamil-consonant
53 "[$,1<5(B-$,1<Y(B]")
54
55(defconst tamil-composable-pattern
56 (concat
57 "\\([$,1<%(B-$,1<4(B]\\)\\|"
58 "[$,1<"<#(B]\\|" ;; vowel modifier considered independent
59 "\\(\\(?:\\(?:$,1<5<m<W(B\\)\\|[$,1<5(B-$,1<Y(B]\\)[$,1<m<^(B-$,1<l(B]?\\)\\|"
60 "\\($,1<W<m<P<`(B\\)")
61 "Regexp matching a composable sequence of Tamil characters.")
62
63;;;###autoload
64(defun tamil-compose-region (from to)
65 (interactive "r")
66 (save-excursion
67 (save-restriction
68 (narrow-to-region from to)
69 (goto-char (point-min))
70 (while (re-search-forward tamil-composable-pattern nil t)
71 (tamil-compose-syllable-region (match-beginning 0)
72 (match-end 0))))))
73(defun tamil-compose-string (string)
74 (with-temp-buffer
75 (insert (decompose-string string))
76 (tamil-compose-region (point-min) (point-max))
77 (buffer-string)))
78
79(defun tamil-post-read-conversion (len)
80 (save-excursion
81 (save-restriction
82 (let ((buffer-modified-p (buffer-modified-p)))
83 (narrow-to-region (point) (+ (point) len))
84 (tamil-compose-region (point-min) (point-max))
85 (set-buffer-modified-p buffer-modified-p)
86 (- (point-max) (point-min))))))
87
88(defun tamil-range (from to)
89 "Make the list of the integers of range FROM to TO."
90 (let (result)
91 (while (<= from to) (setq result (cons to result) to (1- to))) result))
92
93(defun tamil-regexp-of-hashtbl-keys (hashtbl)
94 "Return a regular expression that matches all keys in hashtable HASHTBL."
95 (let ((max-specpdl-size 1000))
96 (regexp-opt
97 (sort
98 (let (dummy)
99 (maphash (function (lambda (key val) (setq dummy (cons key dummy)))) hashtbl)
100 dummy)
101 (function (lambda (x y) (> (length x) (length y))))))))
102
103
104;;;###autoload
105(defun tamil-composition-function (from to pattern &optional string)
106 "Compose Tamil characters in REGION, or STRING if specified.
107Assume that the REGION or STRING must fully match the composable
108PATTERN regexp."
109 (if string (tamil-compose-syllable-string string)
110 (tamil-compose-syllable-region from to))
111 (- to from))
112
113;; Register a function to compose Tamil characters.
114(mapc
115 (function (lambda (ucs)
116 (aset composition-function-table (decode-char 'ucs ucs)
117 (list (cons tamil-composable-pattern
118 'tamil-composition-function)))))
119 (nconc '(#x0b82 #x0b83) (tamil-range #x0b85 #x0bb9)))
120
121;; Notes on conversion steps.
122
123;; 1. chars to glyphs
124;; Simple replacement of characters to glyphs is done.
125
126;; 2. glyphs reordering.
127;; following "$,4)j(B", "$,4)k(B", "$,4)l(B" goes to the front.
128
129;; 3. glyphs to glyphs
130;; reordered vowels are ligatured to consonants.
131
132;; 4. Composition.
133;; left modifiers will be attached at the left.
134;; others will be attached right.
135
136(defvar tml-char-glyph
137 '(;; various signs
138 ;;("$,1<"(B" . "")
139 ("$,1<#(B" . "$,4*G(B")
140 ;; Independent Vowels
141 ("$,1<%(B" . "$,4*<(B")
142 ("$,1<&(B" . "$,4*=(B")
143 ("$,1<'(B" . "$,4*>(B")
144 ("$,1<((B" . "$,4*?(B")
145 ("$,1<)(B" . "$,4*@(B")
146 ("$,1<*(B" . "$,4*A(B")
147 ("$,1<.(B" . "$,4*B(B")
148 ("$,1</(B" . "$,4*C(B")
149 ("$,1<0(B" . "$,4*D(B")
150 ("$,1<2(B" . "$,4*E(B")
151 ("$,1<3(B" . "$,4*F(B")
152 ("$,1<4(B" . "$,4*E*W(B")
153 ;; Consonants
154 ("$,1<5<m<W<m(B" . "$,4):(B") ; ks.
155 ("$,1<5<m<W(B" . "$,4*^(B") ; ks
156 ("$,1<5(B" . "$,4*H(B")
157
158 ("$,1<9(B" . "$,4*I(B")
159 ("$,1<:(B" . "$,4*J(B")
160 ("$,1<<(B" . "$,4*\(B")
161 ("$,1<<<m(B" . "$,4)8(B")
162 ("$,1<>(B" . "$,4*K(B")
163 ("$,1<?(B" . "$,4*L(B")
164 ("$,1<C(B" . "$,4*M(B")
165 ("$,1<D(B" . "$,4*N(B")
166 ("$,1<H(B" . "$,4*O(B")
167 ("$,1<I(B" . "$,4*Y(B")
168 ("$,1<I<m(B" . "$,4)a(B")
169 ("$,1<J(B" . "$,4*P(B")
170 ("$,1<N(B" . "$,4*Q(B")
171 ("$,1<O(B" . "$,4*R(B")
172 ("$,1<P(B" . "$,4*S(B")
173 ("$,1<Q(B" . "$,4*X(B")
174 ("$,1<R(B" . "$,4*T(B")
175 ("$,1<S(B" . "$,4*W(B")
176 ("$,1<T(B" . "$,4*V(B")
177 ("$,1<U(B" . "$,4*U(B")
178 ("$,1<W(B" . "$,4*[(B")
179 ("$,1<W<m(B" . "$,4)7(B")
180 ("$,1<W<m<P<`(B" . "$,4*_(B")
181 ("$,1<X(B" . "$,4*Z(B")
182 ("$,1<X<m(B" . "$,4)6(B")
183 ("$,1<Y(B" . "$,4*](B")
184 ("$,1<Y<m(B" . "$,4)9(B")
185
186 ;; Dependent vowel signs
187 ("$,1<^(B" . "$,4)c(B")
188 ("$,1<_(B" . "$,4)d(B")
189 ("$,1<`(B" . "$,4)f(B")
190 ("$,1<a(B" . "$,4)g(B")
191 ("$,1<b(B" . "$,4)h(B")
192 ("$,1<f(B" . "$,4)j(B")
193 ("$,1<g(B" . "$,4)k(B")
194 ("$,1<h(B" . "$,4)l(B")
195 ("$,1<j(B" . "$,4)j)c(B")
196 ("$,1<k(B" . "$,4)k)c(B")
197 ("$,1<l(B" . "$,4)j*W(B")
198
199 ;; Various signs
200 ("$,1<m(B" . "$,4)b(B")
201 ("$,1<w(B" . "nil") ;; not supported?
202 ))
203
204(defvar tml-char-glyph-hash
205 (let* ((hash (make-hash-table :test 'equal)))
206 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
207 tml-char-glyph)
208 hash))
209
210(defvar tml-char-glyph-regexp
211 (tamil-regexp-of-hashtbl-keys tml-char-glyph-hash))
212
213;; Tamil languages needed to be reordered.
214
215(defvar tml-consonants-regexp
216 "[$,4*H*^*I*J*\*K*L*M*N*O*Y*P*Q*R*S*X*T*W*V*U*[*Z*](B]")
217
218(defvar tml-glyph-reorder-key-glyphs "[$,4)j)k)l(B]")
219
220(defvar tml-glyph-reordering-regexp-list
221 (cons
222 (concat "\\(" tml-consonants-regexp "\\)\\([$,4)j)k)l(B]\\)") "\\2\\1"))
223
224;; Tamil vowel modifiers to be ligatured.
225(defvar tml-glyph-glyph
226 '(
227 ("$,4*H)d(B" . "$,4(a(B") ; ki
228 ("$,4*^)d(B" . "$,4(v(B") ; ksi
229 ("$,4*^)f(B" . "$,4)2(B") ; ksi~
230 ("$,4*I)d(B" . "$,4(b(B") ; n^i
231 ("$,4*J)d(B" . "$,4(c(B") ; ci
232 ("$,4*K)d(B" . "$,4(d(B") ; n~i
233 ("$,4*L)d(B" . "$,4)n(B") ; t.i
234 ("$,4*M)d(B" . "$,4(e(B") ; n.i
235 ("$,4*N)d(B" . "$,4(f(B") ; ti
236 ("$,4*O)d(B" . "$,4(g(B") ; ni
237 ("$,4*P)d(B" . "$,4(h(B") ; pi
238 ("$,4*Q)d(B" . "$,4(i(B") ; mi
239 ("$,4*R)d(B" . "$,4(j(B") ; yi
240 ("$,4*S)d(B" . "$,4(k(B") ; ri
241 ("$,4*T)d(B" . "$,4(l(B") ; li
242 ("$,4*U)d(B" . "$,4(m(B") ; vi
243 ("$,4*V)d(B" . "$,4(n(B") ; l_i
244 ("$,4*W)d(B" . "$,4(o(B") ; l.i
245 ("$,4*X)d(B" . "$,4(p(B") ; r_i
246 ("$,4*Y)d(B" . "$,4(q(B") ; n_i
247 ("$,4*Z)d(B" . "$,4(r(B") ; si
248 ("$,4*[)d(B" . "$,4(s(B") ; s'i
249 ("$,4*\)d(B" . "$,4(t(B") ; ji
250 ("$,4*])d(B" . "$,4(u(B") ; hi
251
252 ("$,4*H)f(B" . "$,4(w(B") ; ki~
253 ("$,4*I)f(B" . "$,4(x(B") ; n^i~
254 ("$,4*J)f(B" . "$,4(y(B") ; ci~
255 ("$,4*K)f(B" . "$,4(z(B") ; n~i~
256 ("$,4*L)f(B" . "$,4)o(B") ; t.i~
257 ("$,4*M)f(B" . "$,4)!(B") ; n.i~
258 ("$,4*N)f(B" . "$,4)"(B") ; ti~
259 ("$,4*O)f(B" . "$,4)#(B") ; ni~
260 ("$,4*P)f(B" . "$,4)$(B") ; pi~
261 ("$,4*Q)f(B" . "$,4)%(B") ; mi~
262 ("$,4*R)f(B" . "$,4)&(B") ; yi~
263 ("$,4*S)f(B" . "$,4)'(B") ; ri~
264 ("$,4*T)f(B" . "$,4)((B") ; li~
265 ("$,4*U)f(B" . "$,4))(B") ; vi~
266 ("$,4*V)f(B" . "$,4)*(B") ; l_i~
267 ("$,4*W)f(B" . "$,4)+(B") ; l.i~
268 ("$,4*X)f(B" . "$,4),(B") ; r_i~
269 ("$,4*Y)f(B" . "$,4)-(B") ; n_i~
270 ("$,4*Z)f(B" . "$,4).(B") ; si~
271 ("$,4*[)f(B" . "$,4)/(B") ; s'i~
272 ("$,4*\)f(B" . "$,4)0(B") ; ji~
273 ("$,4*])f(B" . "$,4)1(B") ; hi~
274
275 ("$,4*H)g(B" . "$,4)p(B") ; ku
276 ("$,4*I)g(B" . "$,4)q(B") ; n^u
277 ("$,4*J)g(B" . "$,4)r(B") ; cu
278 ("$,4*K)g(B" . "$,4)s(B") ; n~u
279 ("$,4*L)g(B" . "$,4)t(B") ; t.u
280 ("$,4*M)g(B" . "$,4)u(B") ; n.u
281 ("$,4*N)g(B" . "$,4)v(B") ; tu
282 ("$,4*O)g(B" . "$,4)x(B") ; nu
283 ("$,4*P)g(B" . "$,4)y(B") ; pu
284 ("$,4*Q)g(B" . "$,4)z(B") ; mu
285 ("$,4*R)g(B" . "$,4){(B") ; yu
286 ("$,4*S)g(B" . "$,4)|(B") ; ru
287 ("$,4*T)g(B" . "$,4)}(B") ; lu
288 ("$,4*U)g(B" . "$,4)~(B") ; vu
289 ("$,4*V)g(B" . "$,4)(B") ; l_u
290 ("$,4*W)g(B" . "$,4* (B") ; l.u
291 ("$,4*X)g(B" . "$,4*!(B") ; r_u
292 ("$,4*Y)g(B" . "$,4*"(B") ; n_u
293
294 ("$,4*H)h(B" . "$,4*#(B") ; ku~
295 ("$,4*I)h(B" . "$,4*$(B") ; n^u~
296 ("$,4*J)h(B" . "$,4*%(B") ; cu~
297 ("$,4*K)h(B" . "$,4*&(B") ; n~u~
298 ("$,4*L)h(B" . "$,4*'(B") ; t.u~
299 ("$,4*M)h(B" . "$,4*((B") ; n.u~
300 ("$,4*N)h(B" . "$,4*)(B") ; tu~
301 ("$,4*O)h(B" . "$,4*+(B") ; nu~
302 ("$,4*P)h(B" . "$,4*,(B") ; pu~
303 ("$,4*Q)h(B" . "$,4*-(B") ; mu~
304 ("$,4*R)h(B" . "$,4*.(B") ; yu~
305 ("$,4*S)h(B" . "$,4*/(B") ; ru~
306 ("$,4*T)h(B" . "$,4*6(B") ; lu~
307 ("$,4*U)h(B" . "$,4*7(B") ; vu~
308 ("$,4*V)h(B" . "$,4*8(B") ; l_u~
309 ("$,4*W)h(B" . "$,4*9(B") ; l.u~
310 ("$,4*X)h(B" . "$,4*:(B") ; r_u~
311 ("$,4*Y)h(B" . "$,4*;(B") ; n_u~
312 ))
313
314(defvar tml-glyph-glyph-hash
315 (let* ((hash (make-hash-table :test 'equal)))
316 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
317 tml-glyph-glyph)
318 hash))
319
320(defvar tml-glyph-glyph-regexp
321 (tamil-regexp-of-hashtbl-keys tml-glyph-glyph-hash))
322
323(defun tamil-compose-syllable-string (string)
324 (with-temp-buffer
325 (insert (decompose-string string))
326 (tamil-compose-syllable-region (point-min) (point-max))
327 (buffer-string)))
328
329(defun tamil-compose-syllable-region (from to)
330 "Compose tamil syllable in region FROM to TO."
331 (let (glyph-str match-str glyph-reorder-regexps)
332 (save-excursion
333 (save-restriction
334 (narrow-to-region from to)
335 (goto-char (point-min))
336 ;; char-glyph-conversion
337 (while (re-search-forward tml-char-glyph-regexp nil t)
338 (setq match-str (match-string 0))
339 (setq glyph-str
340 (concat glyph-str (gethash match-str tml-char-glyph-hash))))
341 ;; glyph reordering
342 (when (string-match tml-glyph-reorder-key-glyphs glyph-str)
343 (if (string-match (car tml-glyph-reordering-regexp-list)
344 glyph-str)
345 (setq glyph-str
346 (replace-match (cdr tml-glyph-reordering-regexp-list)
347 nil nil glyph-str))))
348 ;; glyph-glyph-conversion
349 (when (string-match tml-glyph-glyph-regexp glyph-str)
350 (setq match-str (match-string 0 glyph-str))
351 (setq glyph-str
352 (replace-match (gethash match-str tml-glyph-glyph-hash)
353 nil nil glyph-str)))
354 ;; concatenate and attach reference-points.
355 (setq glyph-str
356 (cdr
357 (apply
358 'nconc
359 (mapcar
360 (function
361 (lambda (x) (list '(5 . 3) x))) ;; default ref. point.
362 glyph-str))))
363 (compose-region from to glyph-str)))))
364
365(provide 'tml-util)
366
367;;; tml-util.el ends here