diff options
| author | Kenichi Handa | 1999-12-15 00:50:18 +0000 |
|---|---|---|
| committer | Kenichi Handa | 1999-12-15 00:50:18 +0000 |
| commit | 3bdf8898b00a8b3fc0b3a275856b0ae28d130cc0 (patch) | |
| tree | 864b23d60d9708836c2ac5875599184f3fdec365 /lisp | |
| parent | c27737aa242649f1c95ec8c7020c578b92b817c1 (diff) | |
| download | emacs-3bdf8898b00a8b3fc0b3a275856b0ae28d130cc0.tar.gz emacs-3bdf8898b00a8b3fc0b3a275856b0ae28d130cc0.zip | |
Most functions rewritten.
(tibetan-char-p): Renamed from tibetan-char-examin.
(tibetan-composable-examin) (tibetan-complete-char-examin)
(tibetan-vertical-stacking) (tibetan-composition): Deleted.
(tibetan-add-components): New function.
(tibetan-composition-function): New function.
Diffstat (limited to 'lisp')
| -rw-r--r-- | lisp/language/tibet-util.el | 510 |
1 files changed, 173 insertions, 337 deletions
diff --git a/lisp/language/tibet-util.el b/lisp/language/tibet-util.el index df2a0261212..9fdf134ef7e 100644 --- a/lisp/language/tibet-util.el +++ b/lisp/language/tibet-util.el | |||
| @@ -29,6 +29,7 @@ | |||
| 29 | ;; History: | 29 | ;; History: |
| 30 | ;; 1997.03.13 Modification in treatment of text properties; | 30 | ;; 1997.03.13 Modification in treatment of text properties; |
| 31 | ;; Support for some special signs and punctuations. | 31 | ;; Support for some special signs and punctuations. |
| 32 | ;; 1999.10.25 Modification for a new composition way by K.Handa. | ||
| 32 | 33 | ||
| 33 | ;;; Code: | 34 | ;;; Code: |
| 34 | 35 | ||
| @@ -37,61 +38,79 @@ | |||
| 37 | (interactive) | 38 | (interactive) |
| 38 | (set-language-environment "Tibetan")) | 39 | (set-language-environment "Tibetan")) |
| 39 | 40 | ||
| 40 | ;;; This function makes a transcription string for | 41 | ;;;###autoload |
| 41 | ;;; re-composing a character. | 42 | (defun tibetan-char-p (ch) |
| 43 | "Check if char CH is Tibetan character. | ||
| 44 | Returns non-nil if CH is Tibetan. Otherwise, returns nil." | ||
| 45 | (memq (char-charset ch) '(tibetan tibetan-1-column))) | ||
| 46 | |||
| 47 | ;;; Functions for Tibetan <-> Tibetan-transcription. | ||
| 42 | 48 | ||
| 43 | ;;;###autoload | 49 | ;;;###autoload |
| 44 | (defun tibetan-tibetan-to-transcription (ch) | 50 | (defun tibetan-tibetan-to-transcription (str) |
| 45 | "Return a transcription string of Tibetan character CH" | 51 | "Transcribe Tibetan string STR and return the corresponding Roman string." |
| 46 | (let ((char ch) | 52 | (let (;; Accumulate transcriptions here in reverse order. |
| 47 | (l (append tibetan-consonant-transcription-alist | 53 | (trans nil) |
| 48 | tibetan-vowel-transcription-alist | 54 | (len (length str)) |
| 49 | tibetan-precomposed-transcription-alist | 55 | (i 0) |
| 50 | tibetan-subjoined-transcription-alist)) | 56 | ch this-trans) |
| 51 | decomp-l t-char trans str result) | 57 | (while (< i len) |
| 52 | (if (eq (char-charset char) 'composition) | 58 | (let ((idx (string-match tibetan-precomposition-rule-alist str i))) |
| 53 | (setq decomp-l (decompose-composite-char char 'list nil)) | 59 | (if (eq idx i) |
| 54 | (setq decomp-l (cons char nil))) | 60 | ;; Ith character and the followings matches precomposable |
| 55 | (setq str "") | 61 | ;; Tibetan sequence. |
| 56 | (while decomp-l | 62 | (setq i (match-end 0) |
| 57 | (setq t-char (char-to-string (car decomp-l))) | 63 | this-trans |
| 58 | (setq trans (car (rassoc t-char l))) | 64 | (car (rassoc |
| 59 | (setq str (concat str trans)) | 65 | (cdr (assoc (match-string 0 str) |
| 60 | (setq decomp-l (cdr decomp-l))) | 66 | tibetan-precomposition-rule-alist)) |
| 61 | (setq result str))) | 67 | tibetan-precomposed-transcription-alist))) |
| 62 | 68 | (setq ch (substring str i (1+ i)) | |
| 63 | ;;; This function translates transcription string into a string of | 69 | i (1+ i) |
| 64 | ;;; Tibetan characters. | 70 | this-trans |
| 71 | (car (or (rassoc ch tibetan-consonant-transcription-alist) | ||
| 72 | (rassoc ch tibetan-vowel-transcription-alist) | ||
| 73 | (rassoc ch tibetan-subjoined-transcription-alist))))) | ||
| 74 | (setq trans (cons this-trans trans)))) | ||
| 75 | (apply 'concat (nreverse trans)))) | ||
| 65 | 76 | ||
| 66 | ;;;###autoload | 77 | ;;;###autoload |
| 67 | (defun tibetan-transcription-to-tibetan (transcription) | 78 | (defun tibetan-transcription-to-tibetan (str) |
| 68 | "Translate Roman transcription into a sequence of Tibetan components." | 79 | "Convert Tibetan Roman string STR to Tibetan character string. |
| 69 | (let ((trans transcription) | 80 | The returned string has no composition information." |
| 70 | (lp tibetan-precomposed-transcription-alist) | 81 | (let (;; Case is significant. |
| 71 | (l (append tibetan-consonant-transcription-alist | ||
| 72 | tibetan-vowel-transcription-alist | ||
| 73 | tibetan-subjoined-transcription-alist)) | ||
| 74 | (case-fold-search nil) | 82 | (case-fold-search nil) |
| 75 | substr t-char p-str t-str result) | 83 | (idx 0) |
| 76 | (setq substr "") | 84 | ;; Accumulate Tibetan strings here in reverse order. |
| 77 | (setq p-str "") | 85 | (t-str-list nil) |
| 78 | (setq t-str "") | 86 | i subtrans) |
| 79 | (cond ((string-match tibetan-precomposed-regexp trans) | 87 | (while (setq i (string-match tibetan-regexp str idx)) |
| 80 | (setq substr (substring trans (match-beginning 0) (match-end 0))) | 88 | (if (< idx i) |
| 81 | (setq trans (substring trans (match-end 0))) | 89 | ;; STR contains a pattern that doesn't match Tibetan |
| 82 | (setq t-char (cdr (assoc substr lp))) | 90 | ;; transcription. Include the pattern as is. |
| 83 | (setq p-str t-char))) | 91 | (setq t-str-list (cons (substring str idx i) t-str-list))) |
| 84 | (while (string-match tibetan-regexp trans) | 92 | (setq subtrans (match-string 0 str) |
| 85 | (setq substr (substring trans (match-beginning 0) (match-end 0))) | 93 | idx (match-end 0)) |
| 86 | (setq trans (substring trans 0 (match-beginning 0))) | 94 | (let ((t-char (cdr (assoc subtrans |
| 87 | (setq t-char | 95 | tibetan-precomposed-transcription-alist)))) |
| 88 | (cdr (assoc substr l))) | 96 | (if t-char |
| 89 | (setq t-str (concat t-char t-str))) | 97 | ;; SUBTRANS corresponds to a transcription for |
| 90 | (setq result (concat p-str t-str)))) | 98 | ;; precomposable Tibetan sequence. |
| 91 | 99 | (setq t-char (car (rassoc t-char | |
| 100 | tibetan-precomposition-rule-alist))) | ||
| 101 | (setq t-char | ||
| 102 | (cdr | ||
| 103 | (or (assoc subtrans tibetan-consonant-transcription-alist) | ||
| 104 | (assoc subtrans tibetan-vowel-transcription-alist) | ||
| 105 | (assoc subtrans tibetan-modifier-transcription-alist) | ||
| 106 | (assoc subtrans tibetan-subjoined-transcription-alist))))) | ||
| 107 | (setq t-str-list (cons t-char t-str-list)))) | ||
| 108 | (if (< idx (length str)) | ||
| 109 | (setq t-str-list (cons (substring str idx) t-str-list))) | ||
| 110 | (apply 'concat (nreverse t-str-list)))) | ||
| 92 | 111 | ||
| 93 | ;;; | 112 | ;;; |
| 94 | ;;; Functions for composing Tibetan character. | 113 | ;;; Functions for composing/decomposing Tibetan sequence. |
| 95 | ;;; | 114 | ;;; |
| 96 | ;;; A Tibetan syllable is typically structured as follows: | 115 | ;;; A Tibetan syllable is typically structured as follows: |
| 97 | ;;; | 116 | ;;; |
| @@ -104,7 +123,7 @@ | |||
| 104 | ;;; | 123 | ;;; |
| 105 | ;;; Here are examples of the words "bsgrubs" and "h'uM" | 124 | ;;; Here are examples of the words "bsgrubs" and "h'uM" |
| 106 | ;;; | 125 | ;;; |
| 107 | ;;; $(7"7(B2$(7%q`"U(B1$(7"7"G(B 2$(7"H`#A`"U0"_(B1 | 126 | ;;; $(7"7"G###C"U"7"G(B $(7"H"A"U"_(B |
| 108 | ;;; | 127 | ;;; |
| 109 | ;;; M | 128 | ;;; M |
| 110 | ;;; b s b s h | 129 | ;;; b s b s h |
| @@ -112,305 +131,122 @@ | |||
| 112 | ;;; r u | 131 | ;;; r u |
| 113 | ;;; u | 132 | ;;; u |
| 114 | ;;; | 133 | ;;; |
| 115 | ;;; Consonants ''', 'w', 'y', 'r' take special forms when they are used | 134 | ;;; Consonants `'' ($(7"A(B), `w' ($(7">(B), `y' ($(7"B(B), `r' ($(7"C(B) take special |
| 116 | ;;; as subjoined consonant. Consonant 'r' takes another special form | 135 | ;;; forms when they are used as subjoined consonant. Consonant `r' |
| 117 | ;;; when used as superjoined as in "rka", and so on, while it does not | 136 | ;;; takes another special form when used as superjoined in such a case |
| 118 | ;;; change its form when conjoined with subjoined ''', 'w' or 'y' | 137 | ;;; as "rka", while it does not change its form when conjoined with |
| 119 | ;;; as in "rwa", "rya". | 138 | ;;; subjoined `'', `w' or `y' as in "rwa", "rya". |
| 120 | ;;; | 139 | |
| 121 | ;;; | 140 | ;; Append a proper composition rule and glyph to COMPONENTS to compose |
| 122 | ;;; As a Tibetan input method should avoid using conversion key, | 141 | ;; CHAR with a composition that has COMPONENTS. |
| 123 | ;;; we use a "Tibetan glyph -> transcription -> Tibetan glyph" | 142 | |
| 124 | ;;; translation at each key input. | 143 | (defun tibetan-add-components (components char) |
| 125 | ;;; | 144 | (let ((last (last components)) |
| 126 | ;;; 1st stage - Check the preceding char. | 145 | (stack-upper '(tc . bc)) |
| 127 | ;;; If the preceding char is Tibetan and composable, then | 146 | (stack-under '(bc . tc)) |
| 128 | ;;; | 147 | rule) |
| 129 | ;;; 2nd stage - Translate the preceding char into transcription | 148 | ;; Special treatment for 'a chung. |
| 130 | ;;; | 149 | ;; If 'a follows a consonant, turn it into the subjoined form. |
| 131 | ;;; 3rd stage - Concatenate the transcription of preceding char | 150 | (if (and (= char ?$(7"A(B) |
| 132 | ;;; and the current input key. | 151 | (aref (char-category-set (car last)) ?0)) |
| 133 | ;;; | 152 | (setq char ?$(7#A(B)) |
| 134 | ;;; 4th stage - Re-translate the concatenated transcription into | 153 | |
| 135 | ;;; a sequence of Tibetan letters. | 154 | (cond |
| 136 | ;;; | 155 | ;; Compose upper vowel sign vertically over. |
| 137 | ;;; 5th stage - Convert leading consonants into one single precomposed char | 156 | ((aref (char-category-set char) ?2) |
| 138 | ;;; if possible. | 157 | (setq rule stack-upper)) |
| 139 | ;;; | 158 | |
| 140 | ;;; 6th stage - Compose the consonants into one composite glyph. | 159 | ;; Compose lower vowel sign vertically under. |
| 141 | ;;; | 160 | ((aref (char-category-set char) ?3) |
| 142 | ;;; (If the current input is a vowel sign or a vowel modifier, | 161 | (setq rule stack-under)) |
| 143 | ;;; then it is composed with preceding char without checking | 162 | |
| 144 | ;;; except when the preceding char is a punctuation or a digit.) | 163 | ;; Transform ra-mgo (superscribed r) if followed by a subjoined |
| 145 | ;;; | 164 | ;; consonant other than w, ', y, r. |
| 146 | ;;; | 165 | ((and (= (car last) ?$(7"C(B) |
| 147 | 166 | (not (memq char '(?$(7#>(B ?$(7#A(B ?$(7#B(B ?$(7#C(B)))) | |
| 148 | ;;; This function is used to avoid composition | 167 | (setcar last ?$(7#P(B) |
| 149 | ;;; between Tibetan and non-Tibetan chars. | 168 | (setq rule stack-under)) |
| 150 | 169 | ||
| 151 | ;;;###autoload | 170 | ;; Transform initial base consonant if followed by a subjoined |
| 152 | (defun tibetan-char-examin (ch) | 171 | ;; consonant but 'a. |
| 153 | "Check if char CH is Tibetan character. | 172 | (t |
| 154 | Returns non-nil if CH is Tibetan. Otherwise, returns nil." | 173 | (let ((laststr (char-to-string (car last)))) |
| 155 | (let ((chr ch)) | 174 | (if (and (/= char ?$(7#A(B) |
| 156 | (if (eq (char-charset chr) 'composition) | 175 | (string-match "[$(7"!(B-$(7"="?"@"D(B-$(7"J(B]" laststr)) |
| 157 | (string-match "\\cq+" (decompose-composite-char chr)) | 176 | (setcar last (string-to-char |
| 158 | (string-match "\\cq" (char-to-string chr))))) | 177 | (cdr (assoc (char-to-string (car last)) |
| 159 | 178 | tibetan-base-to-subjoined-alist))))) | |
| 160 | ;;; This is used to avoid composition between digits, signs, punctuations | 179 | (setq rule stack-under)))) |
| 161 | ;;; and word constituents. | 180 | |
| 162 | 181 | (setcdr last (list rule char)))) | |
| 163 | ;;;###autoload | ||
| 164 | (defun tibetan-composable-examin (ch) | ||
| 165 | "Check if Tibetan char CH is composable. | ||
| 166 | Returns t if CH is a composable char \(i.e. neither punctuation nor digit)." | ||
| 167 | (let ((chr ch) | ||
| 168 | chstr) | ||
| 169 | (if (eq (char-charset chr) 'composition) | ||
| 170 | (setq chstr (decompose-composite-char chr)) | ||
| 171 | (setq chstr (char-to-string chr))) | ||
| 172 | (not (string-match "[$(7!1(B-$(7!o"f$(8!;!=!?!@!A!D"`(B]" chstr)))) | ||
| 173 | |||
| 174 | |||
| 175 | ;;; This checks if a character to be composed contains already | ||
| 176 | ;;; one or more vowels / vowel modifiers. If the character contains | ||
| 177 | ;;; them, then no more consonant should be added. | ||
| 178 | |||
| 179 | ;;;###autoload | ||
| 180 | (defun tibetan-complete-char-examin (ch) | ||
| 181 | "Check if composite char CH contains one or more vowel/vowel modifiers. | ||
| 182 | Returns non-nil, if CH contains vowel/vowel modifiers." | ||
| 183 | (let ((chr ch) | ||
| 184 | chstr) | ||
| 185 | (if (eq (char-charset chr) 'composition) | ||
| 186 | (setq chstr (decompose-composite-char chr)) | ||
| 187 | (setq chstr (char-to-string chr))) | ||
| 188 | (string-match "[$(7!g!e"Q(B-$(7"^"_(B-$(7"l(B]" chstr))) | ||
| 189 | |||
| 190 | ;;; This function makes a composite character consisting of two characters | ||
| 191 | ;;; vertically stacked. | ||
| 192 | |||
| 193 | ;;;###autoload | ||
| 194 | (defun tibetan-vertical-stacking (first second upward) | ||
| 195 | "Return a vertically stacked composite char consisting of FIRST and SECOND. | ||
| 196 | If UPWARD is non-nil, then SECOND is put above FIRST." | ||
| 197 | (let (l rule) | ||
| 198 | (if (cmpcharp first) | ||
| 199 | (setq l (decompose-composite-char first 'list t)) | ||
| 200 | (setq l (list first))) | ||
| 201 | (if upward | ||
| 202 | (setq rule (list '(tc . bc))) | ||
| 203 | (setq rule (list '(bc . tc)))) | ||
| 204 | (setq l (append l rule (list second))) | ||
| 205 | (apply 'compose-chars l))) | ||
| 206 | |||
| 207 | ;;; This function makes a composite char from a string. | ||
| 208 | ;;; Note that this function returns a string, not a char. | ||
| 209 | 182 | ||
| 210 | ;;;###autoload | 183 | ;;;###autoload |
| 211 | (defun tibetan-compose-string (str) | 184 | (defun tibetan-compose-string (str) |
| 212 | "Compose a sequence of Tibetan character components into a composite character. | 185 | "Compose Tibetan string STR." |
| 213 | Returns a string containing a composite character." | 186 | (let ((idx 0)) |
| 214 | (let ((t-str str) | 187 | ;; `$(7"A(B' is included in the pattern for subjoined consonants |
| 215 | f-str s-str f-ch s-ch rest composed result) | 188 | ;; because we treat it specially in tibetan-add-components. |
| 216 | ;;Make sure no redundant vowel sign is present. | 189 | (while (setq idx (string-match tibetan-composable-pattern str idx)) |
| 217 | (if (string-match | 190 | (let ((from idx) |
| 218 | "^\\(.+\\)\\($(7"Q(B\\)\\([$(7!I!g!e"Q(B-$(7"^"_(B-$(7"l(B]\\)" t-str) | 191 | (to (match-end 0)) |
| 219 | (setq t-str (concat | 192 | components) |
| 220 | (match-string 1 t-str) | 193 | (if (eq (string-match tibetan-precomposition-rule-regexp str idx) idx) |
| 221 | (match-string 3 t-str)))) | 194 | (setq idx (match-end 0) |
| 222 | (if (string-match | 195 | components |
| 223 | "^\\(.+\\)\\([$(7!I!g!e"Q(B-$(7"^"_(B-$(7"l(B]\\)\\($(7"Q(B\\)" t-str) | 196 | (list (string-to-char |
| 224 | (setq t-str (concat | 197 | (cdr |
| 225 | (match-string 1 t-str) | 198 | (assoc (match-string 0 str) |
| 226 | (match-string 2 t-str)))) | 199 | tibetan-precomposition-rule-alist))))) |
| 227 | ;;Start conversion. | 200 | (setq components (list (aref str idx)) |
| 228 | (setq result "") | 201 | idx (1+ idx))) |
| 229 | ;; Consecutive base/precomposed consonants are reduced to the last one. | 202 | (while (< idx to) |
| 230 | (while (string-match "^\\([$(7"!(B-$(7"J$!(B-$(7%u(B]\\)\\([$(7"!(B-$(7"@"B(B-$(7"J$!(B-$(7%u(B].*\\)" t-str) | 203 | (tibetan-add-components components (aref str idx)) |
| 231 | (setq result (concat result (match-string 1 t-str))) | 204 | (setq idx (1+ idx))) |
| 232 | (setq t-str (match-string 2 t-str))) | 205 | (compose-string str from to components)))) |
| 233 | ;; Vowel/vowel modifier, subjoined consonants are added one by one | 206 | str) |
| 234 | ;; to the preceding element. | ||
| 235 | (while | ||
| 236 | (string-match "^\\(.\\)\\([$(7"A#!(B-$(7#J!I!g!e"Q(B-$(7"^"_(B-$(7"l(B]\\)\\(.*\\)" t-str) | ||
| 237 | (setq f-str (match-string 1 t-str)) | ||
| 238 | (setq f-ch (string-to-char f-str)) | ||
| 239 | (setq s-str (match-string 2 t-str)) | ||
| 240 | ;;Special treatment for 'a chung. | ||
| 241 | ;;If 'a follows a consonant, then turned into its subjoined form. | ||
| 242 | (if (and (string-match "$(7"A(B" s-str) | ||
| 243 | (not (tibetan-complete-char-examin f-ch))) | ||
| 244 | (setq s-str "$(7#A(B")) | ||
| 245 | (setq s-ch (string-to-char s-str)) | ||
| 246 | (setq rest (match-string 3 t-str)) | ||
| 247 | (cond ((string-match "\\c2" s-str);; upper vowel sign | ||
| 248 | (setq composed | ||
| 249 | (tibetan-vertical-stacking f-ch s-ch t))) | ||
| 250 | ((string-match "\\c3" s-str);; lower vowel sign | ||
| 251 | (setq composed | ||
| 252 | (tibetan-vertical-stacking f-ch s-ch nil))) | ||
| 253 | ;;Automatic conversion of ra-mgo (superscribed r). | ||
| 254 | ;;'r' is converted if followed by a subjoined consonant | ||
| 255 | ;;other than w, ', y, r. | ||
| 256 | ((and (string-match "$(7"C(B" f-str) | ||
| 257 | (not (string-match "[$(7#>#A#B#C(B]" s-str))) | ||
| 258 | (setq f-ch ?$(7#P(B) | ||
| 259 | (setq composed | ||
| 260 | (tibetan-vertical-stacking f-ch s-ch nil))) | ||
| 261 | ((not (tibetan-complete-char-examin f-ch)) | ||
| 262 | ;;Initial base consonant is tranformed, if followed by | ||
| 263 | ;;a subjoined consonant, except when it is followed | ||
| 264 | ;;by a subscribed 'a. | ||
| 265 | (if (and (string-match "[$(7"!(B-$(7"="?"@"D(B-$(7"J(B]" f-str) | ||
| 266 | (not (string-match "$(7#A(B" s-str))) | ||
| 267 | (setq f-ch | ||
| 268 | (string-to-char | ||
| 269 | (cdr (assoc f-str tibetan-base-to-subjoined-alist))))) | ||
| 270 | (setq composed | ||
| 271 | (tibetan-vertical-stacking f-ch s-ch nil))) | ||
| 272 | (t | ||
| 273 | (setq composed s-str) | ||
| 274 | (setq result (concat result f-str)))) | ||
| 275 | (setq t-str (concat composed rest))) | ||
| 276 | (setq result (concat result t-str)))) | ||
| 277 | |||
| 278 | ;;; quail <-> conversion interface. | ||
| 279 | |||
| 280 | ;;;###autoload | ||
| 281 | (defun tibetan-composition (pc key) | ||
| 282 | "Interface to quail input method. | ||
| 283 | Takes two arguments: char PC and string KEY, where PC is the preceding | ||
| 284 | character to be composed with current input KEY. | ||
| 285 | Returns a string which is the result of composition." | ||
| 286 | (let (trans cur-ch t-str result) | ||
| 287 | ;; Make a tibetan character corresponding to current input key. | ||
| 288 | (setq cur-ch (tibetan-transcription-to-tibetan key)) | ||
| 289 | ;; Check if the preceding character is Tibetan and composable. | ||
| 290 | (cond ((and (tibetan-char-examin pc) | ||
| 291 | (tibetan-composable-examin pc)) | ||
| 292 | ;;If Tibetan char corresponding to the current input key exists, | ||
| 293 | (cond (cur-ch | ||
| 294 | ;; Then, | ||
| 295 | ;; Convert the preceding character into transcription, | ||
| 296 | ;; and concatenate it with the current input key, | ||
| 297 | (setq trans (tibetan-tibetan-to-transcription pc)) | ||
| 298 | (setq trans (concat trans key)) | ||
| 299 | ;; Concatenated transcription is converted to | ||
| 300 | ;; a sequence of Tibetan characters, | ||
| 301 | (setq t-str (tibetan-transcription-to-tibetan trans)) | ||
| 302 | ;; And it is composed into a composite character. | ||
| 303 | (setq result (tibetan-compose-string t-str))) | ||
| 304 | ;; Else, | ||
| 305 | (t | ||
| 306 | ;; Simply concatenate the preceding character and | ||
| 307 | ;; the current input key. | ||
| 308 | (setq result (char-to-string pc)) | ||
| 309 | (setq result (concat result key))))) | ||
| 310 | ;; If the preceding char is not Tibetan or not composable, | ||
| 311 | (t | ||
| 312 | ;; pc = 0 means the point is at the beginning of buffer. | ||
| 313 | (if (not (eq pc 0)) | ||
| 314 | (setq result (char-to-string pc))) | ||
| 315 | (if cur-ch | ||
| 316 | (setq result (concat result cur-ch)) | ||
| 317 | (setq result (concat result key)))) | ||
| 318 | ))) | ||
| 319 | |||
| 320 | 207 | ||
| 321 | ;;;###autoload | 208 | ;;;###autoload |
| 322 | (defun tibetan-decompose-region (beg end) | 209 | (defun tibetan-compose-region (beg end) |
| 323 | "Decompose Tibetan characters in the region BEG END into their components. | 210 | "Compose Tibetan text the region BEG and END." |
| 324 | Components are: base and subjoined consonants, vowel signs, vowel modifiers. | ||
| 325 | One column punctuations are converted to their 2 column equivalents." | ||
| 326 | (interactive "r") | 211 | (interactive "r") |
| 327 | (let (ch-str ch-beg ch-end) | 212 | (let (str result chars) |
| 328 | (save-excursion | 213 | (save-excursion |
| 329 | (save-restriction | 214 | (save-restriction |
| 330 | (narrow-to-region beg end) | 215 | (narrow-to-region beg end) |
| 331 | (goto-char (point-min)) | 216 | (goto-char (point-min)) |
| 332 | ;; \\cq = Tibetan character | 217 | ;; `$(7"A(B' is included in the pattern for subjoined consonants |
| 333 | (while (re-search-forward "\\cq" nil t) | 218 | ;; because we treat it specially in tibetan-add-components. |
| 334 | (setq ch-str (buffer-substring-no-properties | 219 | (while (re-search-forward tibetan-composable-pattern nil t) |
| 335 | (match-beginning 0) (match-end 0))) | 220 | (let ((from (match-beginning 0)) |
| 336 | ;; Save the points. Maybe, using save-match-data is preferable. | 221 | (to (match-end 0)) |
| 337 | ;; But in order not to lose the trace(because the body is too long), | 222 | components) |
| 338 | ;; we save the points in variables. | 223 | (goto-char from) |
| 339 | (setq ch-beg (match-beginning 0)) | 224 | (if (looking-at tibetan-precomposition-rule-regexp) |
| 340 | (setq ch-end (match-end 0)) | 225 | (progn |
| 341 | ;; Here starts the decomposition. | 226 | (setq components |
| 342 | (cond | 227 | (list (string-to-char |
| 343 | ;; 1 column punctuations -> 2 column equivalent | 228 | (cdr |
| 344 | ((string-match "[$(8!D!;!=!?!@!A"`(B]" ch-str) | 229 | (assoc (match-string 0) |
| 345 | (setq ch-str | 230 | tibetan-precomposition-rule-alist))))) |
| 346 | (car (rassoc ch-str tibetan-precomposition-rule-alist)))) | 231 | (goto-char (match-end 0))) |
| 347 | ;; Decomposition of composite character. | 232 | (setq components (list (char-after from))) |
| 348 | ((eq (char-charset (string-to-char ch-str)) 'composition) | 233 | (forward-char 1)) |
| 349 | ;; Make a string which consists of a sequence of | 234 | (while (< (point) to) |
| 350 | ;; components. | 235 | (tibetan-add-components components (following-char)) |
| 351 | (setq ch-str (decompose-composite-char (string-to-char ch-str))) | 236 | (forward-char 1)) |
| 352 | ;; Converts nyi zla into base elements. | 237 | (compose-region from to components))))))) |
| 353 | (cond ((string= ch-str "$(7#R#S#S#S(B") | ||
| 354 | (setq ch-str "$(7!4!5!5(B")) | ||
| 355 | ((string= ch-str "$(7#R#S#S(B") | ||
| 356 | (setq ch-str "$(7!4!5(B")) | ||
| 357 | ((string= ch-str "$(7#R#S!I(B") | ||
| 358 | (setq ch-str "$(7!6(B")) | ||
| 359 | ((string= ch-str "$(7#R#S(B") | ||
| 360 | (setq ch-str "$(7!4(B"))))) | ||
| 361 | ;; If the sequence of components starts with a subjoined consonants, | ||
| 362 | (if (string-match "^\\([$(7#!(B-$(7#J(B]\\)\\(.*\\)$" ch-str) | ||
| 363 | ;; then the first components is converted to its base form. | ||
| 364 | (setq ch-str | ||
| 365 | (concat (car (rassoc (match-string 1 ch-str) | ||
| 366 | tibetan-base-to-subjoined-alist)) | ||
| 367 | (match-string 2 ch-str)))) | ||
| 368 | ;; If the sequence of components starts with a precomposed character, | ||
| 369 | (if (string-match "^\\([$(7$!(B-$(7%u(B]\\)\\(.*\\)$" ch-str) | ||
| 370 | ;; then it is converted into a sequence of components. | ||
| 371 | (setq ch-str | ||
| 372 | (concat (car (rassoc (match-string 1 ch-str) | ||
| 373 | tibetan-precomposition-rule-alist)) | ||
| 374 | (match-string 2 ch-str)))) | ||
| 375 | ;; Special treatment for superscribed r. | ||
| 376 | (if (string-match "^$(7#P(B\\(.*\\)$" ch-str) | ||
| 377 | (setq ch-str (concat "$(7"C(B" (match-string 1 ch-str)))) | ||
| 378 | ;; Finally, the result of decomposition is inserted, and | ||
| 379 | ;; the composite character is deleted. | ||
| 380 | (insert-and-inherit ch-str) | ||
| 381 | (delete-region ch-beg ch-end)))))) | ||
| 382 | 238 | ||
| 383 | ;;;###autoload | 239 | ;;;###autoload |
| 384 | (defun tibetan-compose-region (beg end) | 240 | (defalias 'tibetan-decompose-region 'decompose-region) |
| 385 | "Make composite chars from Tibetan character components in the region BEG END. | 241 | ;;;###autoload |
| 386 | Two column punctuations are converted to their 1 column equivalents." | 242 | (defalias 'tibetan-decompose-string 'decompose-string) |
| 387 | (interactive "r") | 243 | |
| 388 | (let (str result) | 244 | ;;;###autoload |
| 389 | (save-excursion | 245 | (defun tibetan-composition-function (from to pattern &optional string) |
| 390 | (save-restriction | 246 | (if string |
| 391 | (narrow-to-region beg end) | 247 | (tibetan-compose-string string) |
| 392 | (goto-char (point-min)) | 248 | (tibetan-compose-region from to)) |
| 393 | ;; First, sequence of components which has a precomposed equivalent | 249 | (- to from)) |
| 394 | ;; is converted. | ||
| 395 | (while (re-search-forward | ||
| 396 | tibetan-precomposition-rule-regexp nil t) | ||
| 397 | (setq str (buffer-substring-no-properties | ||
| 398 | (match-beginning 0) (match-end 0))) | ||
| 399 | (save-match-data | ||
| 400 | (insert-and-inherit | ||
| 401 | (cdr (assoc str tibetan-precomposition-rule-alist)))) | ||
| 402 | (delete-region (match-beginning 0) (match-end 0))) | ||
| 403 | (goto-char (point-min)) | ||
| 404 | ;; Then, composable elements are put into a composite character. | ||
| 405 | (while (re-search-forward | ||
| 406 | "[$(7"!(B-$(7"J$!(B-$(7%u(B]+[$(7#!(B-$(7#J!I!g!e"Q(B-$(7"^"_(B-$(7"l(B]+" | ||
| 407 | nil t) | ||
| 408 | (setq str (buffer-substring-no-properties | ||
| 409 | (match-beginning 0) (match-end 0))) | ||
| 410 | (save-match-data | ||
| 411 | (setq result (tibetan-compose-string str)) | ||
| 412 | (insert-and-inherit result)) | ||
| 413 | (delete-region (match-beginning 0) (match-end 0))))))) | ||
| 414 | 250 | ||
| 415 | ;;; | 251 | ;;; |
| 416 | ;;; This variable is used to avoid repeated decomposition. | 252 | ;;; This variable is used to avoid repeated decomposition. |
| @@ -420,7 +256,7 @@ Two column punctuations are converted to their 1 column equivalents." | |||
| 420 | ;;;###autoload | 256 | ;;;###autoload |
| 421 | (defun tibetan-decompose-buffer () | 257 | (defun tibetan-decompose-buffer () |
| 422 | "Decomposes Tibetan characters in the buffer into their components. | 258 | "Decomposes Tibetan characters in the buffer into their components. |
| 423 | See also docstring of the function tibetan-decompose-region." | 259 | See also the documentation of the function `tibetan-decompose-region'." |
| 424 | (interactive) | 260 | (interactive) |
| 425 | (make-local-variable 'tibetan-decomposed) | 261 | (make-local-variable 'tibetan-decomposed) |
| 426 | (cond ((not tibetan-decomposed) | 262 | (cond ((not tibetan-decomposed) |