aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKenichi Handa2009-12-25 02:45:47 +0000
committerKenichi Handa2009-12-25 02:45:47 +0000
commitf758cd2abb201dacad4a750caba40d6797ee8fb2 (patch)
tree5de843e99c4eb2a0f86efece2cc15c6d8abc126c
parent7f0b390a50dce5f6deab924f537b6125639073ec (diff)
downloademacs-f758cd2abb201dacad4a750caba40d6797ee8fb2.tar.gz
emacs-f758cd2abb201dacad4a750caba40d6797ee8fb2.zip
(devanagari-composable-pattern): Fixed to
handle ZWNJ and ZWJ. Use it in composition-function-table for Devanagari. (malayalam-composable-pattern): Fix previous change.
-rw-r--r--lisp/language/indian.el67
1 files changed, 45 insertions, 22 deletions
diff --git a/lisp/language/indian.el b/lisp/language/indian.el
index 2be54df6591..82f24a0272f 100644
--- a/lisp/language/indian.el
+++ b/lisp/language/indian.el
@@ -139,12 +139,34 @@ South Indian language Malayalam is supported in this language environment."))
139 regexp)) 139 regexp))
140 140
141(defconst devanagari-composable-pattern 141(defconst devanagari-composable-pattern
142 (concat 142 (let ((table
143 "\\([अ-औॠॡ][ँं]?\\)\\|[ः।]" 143 '(("V" . "[\u0904-\u0914\u0960-\u0961\u0972]") ; independent vowel
144 "\\|\\(" 144 ("C" . "[\u0915-\u0939]") ; consonant
145 "\\(?:\\(?:[क-हक़-य़]्\\)?\\(?:[क-हक़-य़]्\\)?\\(?:[क-हक़-य़]्\\)?[क-हक़-य़]्\\)?" 145 ("R" . "\u0930") ; RA
146 "[क-हक़-य़]\\(?:्\\|[ा-्ॢॣ]?[ंँ]?\\)?" 146 ("n" . "\u093C") ; NUKTA
147 "\\)") 147 ("H" . "\u094D") ; HALANT
148 ("m" . "\u093F") ; vowel sign (pre)
149 ("u" . "[\u0945-\u0948\u0955]") ; vowel sign (above)
150 ("b" . "[\u0941-\u0944\u0962-\u0963]") ; vowel sign (below)
151 ("p" . "[\u093E\u0940\u0949-\u094C]") ; vowel sign (post)
152 ("A" . "[\u0900-\u0902\u0953-\u0954]") ; vowel modifier (above)
153 ("a" . "\u0903") ; vowel modifier (post)
154 ("S" . "\u0951") ; stress sign (above)
155 ("s" . "\u0952") ; stress sign (below)
156 ("J" . "\u200D") ; ZWJ
157 ("N" . "\u200C") ; ZWNJ
158 ("X" . "[\u0900-\u097F]")))) ; all coverage
159 (indian-compose-regexp
160 (concat
161 ;; syllables with an independent vowel, or
162 "\\(?:RH\\)?Vn?m?b?u?p?n?A?s?S?a?\\|"
163 ;; consonant-based syllables, or
164 "\\(?:Cn?J?HJ?\\)*Cn?\\(?:H[NJ]?\\|m?b?u?p?n?A?s?S?a?\\)\\|"
165 ;; special consonant form, or
166 "JHR\\|"
167 ;; any other singleton characters
168 "X")
169 table))
148 "Regexp matching a composable sequence of Devanagari characters.") 170 "Regexp matching a composable sequence of Devanagari characters.")
149 171
150(defconst tamil-composable-pattern 172(defconst tamil-composable-pattern
@@ -165,23 +187,24 @@ South Indian language Malayalam is supported in this language environment."))
165 "Regexp matching a composable sequence of Kannada characters.") 187 "Regexp matching a composable sequence of Kannada characters.")
166 188
167(defconst malayalam-composable-pattern 189(defconst malayalam-composable-pattern
168 (let ((table '(("V" . "[\u0D05-\u0D14\u0D60-\u0D61]") ; independent vowel 190 (let ((table
169 ("C" . "[\u0D15-\u0D39]") ; consonant 191 '(("V" . "[\u0D05-\u0D14\u0D60-\u0D61]") ; independent vowel
170 ("m" . "[\u0D46-\u0D48\u0D4A-\u0D4C]") ; prebase matra 192 ("C" . "[\u0D15-\u0D39]") ; consonant
171 ("p" . "[\u0D3E-\u0D44\u0D57]") ; postname matra 193 ("m" . "[\u0D46-\u0D48\u0D4A-\u0D4C]") ; prebase matra
172 ("b" . "[\u0D62-\u0D63]") ; belowbase matra 194 ("p" . "[\u0D3E-\u0D44\u0D57]") ; postbase matra
173 ("a" . "[\u0D02-\u0D03]") ; abovebase sign 195 ("b" . "[\u0D62-\u0D63]") ; belowbase matra
174 ("H" . "്") ; virama sign 196 ("a" . "[\u0D02-\u0D03]") ; abovebase sign
175 ("N" . "\u200D") ; ZWJ 197 ("H" . "\u0D4D") ; virama sign
176 ("J" . "\u200C") ; ZWNJ 198 ("N" . "\u200D") ; ZWJ
177 ("X" . "[\u0D00-\u0D7F]")))) ; all coverage 199 ("J" . "\u200C") ; ZWNJ
200 ("X" . "[\u0D00-\u0D7F]")))) ; all coverage
178 (indian-compose-regexp 201 (indian-compose-regexp
179 (concat 202 (concat
180 ;; consonant-based syllables 203 ;; syllables with an independent vowel, or
181 "\\(CJ?HJ?\\)*C\\(H[NJ]?\\|m?b?p?a?\\)\\|" 204 "V\\(?:J?HC\\)?m?b?p?a?\\|"
182 ;; syllables with an independent vowel 205 ;; consonant-based syllables, or
183 "V\\(J?HC\\)?m?b?p?a?\\|" 206 "\\(?:CJ?HJ?\\)\\{0,4\\}C\\(?:H[NJ]?\\|m?b?p?a?\\)\\|"
184 ;; special consonant form 207 ;; special consonant form, or
185 "JHC\\|" 208 "JHC\\|"
186 ;; any other singleton characters 209 ;; any other singleton characters
187 "X") 210 "X")
@@ -189,7 +212,7 @@ South Indian language Malayalam is supported in this language environment."))
189 "Regexp matching a composable sequence of Malayalam characters.") 212 "Regexp matching a composable sequence of Malayalam characters.")
190 213
191(let ((script-regexp-alist 214(let ((script-regexp-alist
192 `((devanagari . "[\x900-\x97F\x200C\x200D]+") 215 `((devanagari . ,devanagari-composable-pattern)
193 (bengali . "[\x980-\x9FF\x200C\x200D]+") 216 (bengali . "[\x980-\x9FF\x200C\x200D]+")
194 (gurmukhi . "[\xA00-\xA7F\x200C\x200D]+") 217 (gurmukhi . "[\xA00-\xA7F\x200C\x200D]+")
195 (gujarati . "[\xA80-\xAFF\x200C\x200D]+") 218 (gujarati . "[\xA80-\xAFF\x200C\x200D]+")