diff options
| -rw-r--r-- | lisp/language/indian.el | 67 |
1 files changed, 45 insertions, 22 deletions
diff --git a/lisp/language/indian.el b/lisp/language/indian.el index 2be54df6591..82f24a0272f 100644 --- a/lisp/language/indian.el +++ b/lisp/language/indian.el | |||
| @@ -139,12 +139,34 @@ South Indian language Malayalam is supported in this language environment.")) | |||
| 139 | regexp)) | 139 | regexp)) |
| 140 | 140 | ||
| 141 | (defconst devanagari-composable-pattern | 141 | (defconst devanagari-composable-pattern |
| 142 | (concat | 142 | (let ((table |
| 143 | "\\([अ-औॠॡ][ँं]?\\)\\|[ः।]" | 143 | '(("V" . "[\u0904-\u0914\u0960-\u0961\u0972]") ; independent vowel |
| 144 | "\\|\\(" | 144 | ("C" . "[\u0915-\u0939]") ; consonant |
| 145 | "\\(?:\\(?:[क-हक़-य़]्\\)?\\(?:[क-हक़-य़]्\\)?\\(?:[क-हक़-य़]्\\)?[क-हक़-य़]्\\)?" | 145 | ("R" . "\u0930") ; RA |
| 146 | "[क-हक़-य़]\\(?:्\\|[ा-्ॢॣ]?[ंँ]?\\)?" | 146 | ("n" . "\u093C") ; NUKTA |
| 147 | "\\)") | 147 | ("H" . "\u094D") ; HALANT |
| 148 | ("m" . "\u093F") ; vowel sign (pre) | ||
| 149 | ("u" . "[\u0945-\u0948\u0955]") ; vowel sign (above) | ||
| 150 | ("b" . "[\u0941-\u0944\u0962-\u0963]") ; vowel sign (below) | ||
| 151 | ("p" . "[\u093E\u0940\u0949-\u094C]") ; vowel sign (post) | ||
| 152 | ("A" . "[\u0900-\u0902\u0953-\u0954]") ; vowel modifier (above) | ||
| 153 | ("a" . "\u0903") ; vowel modifier (post) | ||
| 154 | ("S" . "\u0951") ; stress sign (above) | ||
| 155 | ("s" . "\u0952") ; stress sign (below) | ||
| 156 | ("J" . "\u200D") ; ZWJ | ||
| 157 | ("N" . "\u200C") ; ZWNJ | ||
| 158 | ("X" . "[\u0900-\u097F]")))) ; all coverage | ||
| 159 | (indian-compose-regexp | ||
| 160 | (concat | ||
| 161 | ;; syllables with an independent vowel, or | ||
| 162 | "\\(?:RH\\)?Vn?m?b?u?p?n?A?s?S?a?\\|" | ||
| 163 | ;; consonant-based syllables, or | ||
| 164 | "\\(?:Cn?J?HJ?\\)*Cn?\\(?:H[NJ]?\\|m?b?u?p?n?A?s?S?a?\\)\\|" | ||
| 165 | ;; special consonant form, or | ||
| 166 | "JHR\\|" | ||
| 167 | ;; any other singleton characters | ||
| 168 | "X") | ||
| 169 | table)) | ||
| 148 | "Regexp matching a composable sequence of Devanagari characters.") | 170 | "Regexp matching a composable sequence of Devanagari characters.") |
| 149 | 171 | ||
| 150 | (defconst tamil-composable-pattern | 172 | (defconst tamil-composable-pattern |
| @@ -165,23 +187,24 @@ South Indian language Malayalam is supported in this language environment.")) | |||
| 165 | "Regexp matching a composable sequence of Kannada characters.") | 187 | "Regexp matching a composable sequence of Kannada characters.") |
| 166 | 188 | ||
| 167 | (defconst malayalam-composable-pattern | 189 | (defconst malayalam-composable-pattern |
| 168 | (let ((table '(("V" . "[\u0D05-\u0D14\u0D60-\u0D61]") ; independent vowel | 190 | (let ((table |
| 169 | ("C" . "[\u0D15-\u0D39]") ; consonant | 191 | '(("V" . "[\u0D05-\u0D14\u0D60-\u0D61]") ; independent vowel |
| 170 | ("m" . "[\u0D46-\u0D48\u0D4A-\u0D4C]") ; prebase matra | 192 | ("C" . "[\u0D15-\u0D39]") ; consonant |
| 171 | ("p" . "[\u0D3E-\u0D44\u0D57]") ; postname matra | 193 | ("m" . "[\u0D46-\u0D48\u0D4A-\u0D4C]") ; prebase matra |
| 172 | ("b" . "[\u0D62-\u0D63]") ; belowbase matra | 194 | ("p" . "[\u0D3E-\u0D44\u0D57]") ; postbase matra |
| 173 | ("a" . "[\u0D02-\u0D03]") ; abovebase sign | 195 | ("b" . "[\u0D62-\u0D63]") ; belowbase matra |
| 174 | ("H" . "്") ; virama sign | 196 | ("a" . "[\u0D02-\u0D03]") ; abovebase sign |
| 175 | ("N" . "\u200D") ; ZWJ | 197 | ("H" . "\u0D4D") ; virama sign |
| 176 | ("J" . "\u200C") ; ZWNJ | 198 | ("N" . "\u200D") ; ZWJ |
| 177 | ("X" . "[\u0D00-\u0D7F]")))) ; all coverage | 199 | ("J" . "\u200C") ; ZWNJ |
| 200 | ("X" . "[\u0D00-\u0D7F]")))) ; all coverage | ||
| 178 | (indian-compose-regexp | 201 | (indian-compose-regexp |
| 179 | (concat | 202 | (concat |
| 180 | ;; consonant-based syllables | 203 | ;; syllables with an independent vowel, or |
| 181 | "\\(CJ?HJ?\\)*C\\(H[NJ]?\\|m?b?p?a?\\)\\|" | 204 | "V\\(?:J?HC\\)?m?b?p?a?\\|" |
| 182 | ;; syllables with an independent vowel | 205 | ;; consonant-based syllables, or |
| 183 | "V\\(J?HC\\)?m?b?p?a?\\|" | 206 | "\\(?:CJ?HJ?\\)\\{0,4\\}C\\(?:H[NJ]?\\|m?b?p?a?\\)\\|" |
| 184 | ;; special consonant form | 207 | ;; special consonant form, or |
| 185 | "JHC\\|" | 208 | "JHC\\|" |
| 186 | ;; any other singleton characters | 209 | ;; any other singleton characters |
| 187 | "X") | 210 | "X") |
| @@ -189,7 +212,7 @@ South Indian language Malayalam is supported in this language environment.")) | |||
| 189 | "Regexp matching a composable sequence of Malayalam characters.") | 212 | "Regexp matching a composable sequence of Malayalam characters.") |
| 190 | 213 | ||
| 191 | (let ((script-regexp-alist | 214 | (let ((script-regexp-alist |
| 192 | `((devanagari . "[\x900-\x97F\x200C\x200D]+") | 215 | `((devanagari . ,devanagari-composable-pattern) |
| 193 | (bengali . "[\x980-\x9FF\x200C\x200D]+") | 216 | (bengali . "[\x980-\x9FF\x200C\x200D]+") |
| 194 | (gurmukhi . "[\xA00-\xA7F\x200C\x200D]+") | 217 | (gurmukhi . "[\xA00-\xA7F\x200C\x200D]+") |
| 195 | (gujarati . "[\xA80-\xAFF\x200C\x200D]+") | 218 | (gujarati . "[\xA80-\xAFF\x200C\x200D]+") |