aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lisp/language/china-util.el113
1 files changed, 64 insertions, 49 deletions
diff --git a/lisp/language/china-util.el b/lisp/language/china-util.el
index e5316409326..6505fb8c3d8 100644
--- a/lisp/language/china-util.el
+++ b/lisp/language/china-util.el
@@ -88,43 +88,34 @@ Return the length of resulting text."
88 (let (pos ch) 88 (let (pos ch)
89 (narrow-to-region beg end) 89 (narrow-to-region beg end)
90 90
91 ;; We, at first, convert HZ/ZW to `euc-china', 91 ;; We, at first, convert HZ/ZW to `iso-2022-7bit',
92 ;; then decode it. 92 ;; then decode it.
93 93
94 ;; "~\n" -> "\n", "~~" -> "~" 94 ;; "~\n" -> "", "~~" -> "~"
95 (goto-char (point-min)) 95 (goto-char (point-min))
96 (while (search-forward "~" nil t) 96 (while (search-forward "~" nil t)
97 (setq ch (following-char)) 97 (setq ch (following-char))
98 (if (or (= ch ?\n) (= ch ?~)) (delete-char -1))) 98 (cond ((= ch ?{)
99 (delete-region (1- (point)) (1+ (point)))
100 (setq pos (point))
101 (insert iso2022-gb-designation)
102 (if (looking-at "\\([!-}][!-~]\\)*")
103 (goto-char (match-end 0)))
104 (if (looking-at hz-ascii-designation)
105 (delete-region (match-beginning 0) (match-end 0)))
106 (insert iso2022-ascii-designation)
107 (decode-coding-region pos (point) 'iso-2022-7bit))
108
109 ((= ch ?~)
110 (delete-char 1))
111
112 ((and (= ch ?\n)
113 decode-hz-line-continuation)
114 (delete-region (1- (point)) (1+ (point))))
115
116 (t
117 (forward-char 1)))))
99 118
100 ;; "^zW...\n" -> Chinese GB2312
101 ;; "~{...~}" -> Chinese GB2312
102 (goto-char (point-min))
103 (setq beg nil)
104 (while (re-search-forward hz/zw-start-gb nil t)
105 (setq pos (match-beginning 0)
106 ch (char-after pos))
107 ;; Record the first position to start conversion.
108 (or beg (setq beg pos))
109 (end-of-line)
110 (setq end (point))
111 (if (>= ch 128) ; 8bit GB2312
112 nil
113 (goto-char pos)
114 (delete-char 2)
115 (setq end (- end 2))
116 (if (= ch ?z) ; ZW -> euc-china
117 (progn
118 (translate-region (point) end hz-set-msb-table)
119 (goto-char end))
120 (if (search-forward hz-ascii-designation
121 (if decode-hz-line-continuation nil end)
122 t)
123 (delete-char -2))
124 (setq end (point))
125 (translate-region pos (point) hz-set-msb-table))))
126 (if beg
127 (decode-coding-region beg end 'euc-china)))
128 (- (point-max) (point-min))))) 119 (- (point-max) (point-min)))))
129 120
130;;;###autoload 121;;;###autoload
@@ -133,33 +124,57 @@ Return the length of resulting text."
133 (interactive) 124 (interactive)
134 (decode-hz-region (point-min) (point-max))) 125 (decode-hz-region (point-min) (point-max)))
135 126
127(defvar hz-category-table nil)
128
136;;;###autoload 129;;;###autoload
137(defun encode-hz-region (beg end) 130(defun encode-hz-region (beg end)
138 "Encode the text in the current region to HZ. 131 "Encode the text in the current region to HZ.
139Return the length of resulting text." 132Return the length of resulting text."
140 (interactive "r") 133 (interactive "r")
134 (unless hz-category-table
135 (setq hz-category-table (make-category-table))
136 (with-category-table hz-category-table
137 (define-category ?c "hz encodable")
138 (map-charset-chars #'modify-category-entry 'ascii ?c)
139 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?c)))
141 (save-excursion 140 (save-excursion
142 (save-restriction 141 (save-restriction
143 (narrow-to-region beg end) 142 (narrow-to-region beg end)
143 (with-category-table hz-category-table
144 ;; ~ -> ~~
145 (goto-char (point-min))
146 (while (search-forward "~" nil t) (insert ?~))
147
148 ;; ESC -> ESC ESC
149 (goto-char (point-min))
150 (while (search-forward "\e" nil t) (insert ?\e))
144 151
145 ;; "~" -> "~~" 152 ;; Non-ASCII-GB2312 -> \uXXXX
146 (goto-char (point-min)) 153 (goto-char (point-min))
147 (while (search-forward "~" nil t) (insert ?~)) 154 (while (re-search-forward "\\Cc" nil t)
148 155 (let ((ch (preceding-char)))
149 ;; Chinese GB2312 -> "~{...~}" 156 (delete-char -1)
150 (goto-char (point-min)) 157 (insert (format (if (< ch #x10000) "\\u%04X" "\\U%08X") ch))))
151 (if (re-search-forward "\\cc" nil t) 158
152 (let (pos) 159 ;; Prefer chinese-gb2312 for Chinese characters.
153 (goto-char (setq pos (match-beginning 0))) 160 (put-text-property (point-min) (point-max) 'charset 'chinese-gb2312)
154 (encode-coding-region pos (point-max) 'iso-2022-7bit) 161 (encode-coding-region (point-min) (point-max) 'iso-2022-7bit)
155 (goto-char pos) 162
156 (while (search-forward iso2022-gb-designation nil t) 163 ;; ESC $ B ... ESC ( B -> ~{ ... ~}
157 (delete-char -3) 164 ;; ESC ESC -> ESC
158 (insert hz-gb-designation)) 165 (goto-char (point-min))
159 (goto-char pos) 166 (while (search-forward "\e" nil t)
160 (while (search-forward iso2022-ascii-designation nil t) 167 (if (= (following-char) ?\e)
161 (delete-char -3) 168 ;; ESC ESC -> ESC
162 (insert hz-ascii-designation)))) 169 (delete-char 1)
170 (forward-char -1)
171 (if (looking-at iso2022-gb-designation)
172 (progn
173 (delete-region (match-beginning 0) (match-end 0))
174 (insert hz-gb-designation)
175 (search-forward iso2022-ascii-designation nil 'move)
176 (delete-region (match-beginning 0) (match-end 0))
177 (insert hz-ascii-designation))))))
163 (- (point-max) (point-min))))) 178 (- (point-max) (point-min)))))
164 179
165;;;###autoload 180;;;###autoload