aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorK. Handa2016-08-17 23:37:17 +0900
committerK. Handa2016-08-17 23:37:17 +0900
commit7faabf03d885204295701e3fc5d7e75a71c3ec82 (patch)
treecf8846bf96d550ec186953e34d2a7da92b2abee3
parent8d4039bcd69f0134fe3723b2bb3c921952e298c5 (diff)
downloademacs-7faabf03d885204295701e3fc5d7e75a71c3ec82.tar.gz
emacs-7faabf03d885204295701e3fc5d7e75a71c3ec82.zip
Fix hz encoding and decoding (bug#23814)
* lisp/language/china-util.el (decode-hz-region): Pay attention to "~~}" sequence at the end of Chinese character range. (hz-category-table): New variable. (encode-hz-region): Convert non-encodable characters to \u... and \U... Preserve ESC on ecoding. Put `chinese-gb2312' `charset' text property in advance to force iso-2022-encoding to select chinese-gb2312 designation.
-rw-r--r--lisp/language/china-util.el113
1 files changed, 64 insertions, 49 deletions
diff --git a/lisp/language/china-util.el b/lisp/language/china-util.el
index e5316409326..6505fb8c3d8 100644
--- a/lisp/language/china-util.el
+++ b/lisp/language/china-util.el
@@ -88,43 +88,34 @@ Return the length of resulting text."
88 (let (pos ch) 88 (let (pos ch)
89 (narrow-to-region beg end) 89 (narrow-to-region beg end)
90 90
91 ;; We, at first, convert HZ/ZW to `euc-china', 91 ;; We, at first, convert HZ/ZW to `iso-2022-7bit',
92 ;; then decode it. 92 ;; then decode it.
93 93
94 ;; "~\n" -> "\n", "~~" -> "~" 94 ;; "~\n" -> "", "~~" -> "~"
95 (goto-char (point-min)) 95 (goto-char (point-min))
96 (while (search-forward "~" nil t) 96 (while (search-forward "~" nil t)
97 (setq ch (following-char)) 97 (setq ch (following-char))
98 (if (or (= ch ?\n) (= ch ?~)) (delete-char -1))) 98 (cond ((= ch ?{)
99 (delete-region (1- (point)) (1+ (point)))
100 (setq pos (point))
101 (insert iso2022-gb-designation)
102 (if (looking-at "\\([!-}][!-~]\\)*")
103 (goto-char (match-end 0)))
104 (if (looking-at hz-ascii-designation)
105 (delete-region (match-beginning 0) (match-end 0)))
106 (insert iso2022-ascii-designation)
107 (decode-coding-region pos (point) 'iso-2022-7bit))
108
109 ((= ch ?~)
110 (delete-char 1))
111
112 ((and (= ch ?\n)
113 decode-hz-line-continuation)
114 (delete-region (1- (point)) (1+ (point))))
115
116 (t
117 (forward-char 1)))))
99 118
100 ;; "^zW...\n" -> Chinese GB2312
101 ;; "~{...~}" -> Chinese GB2312
102 (goto-char (point-min))
103 (setq beg nil)
104 (while (re-search-forward hz/zw-start-gb nil t)
105 (setq pos (match-beginning 0)
106 ch (char-after pos))
107 ;; Record the first position to start conversion.
108 (or beg (setq beg pos))
109 (end-of-line)
110 (setq end (point))
111 (if (>= ch 128) ; 8bit GB2312
112 nil
113 (goto-char pos)
114 (delete-char 2)
115 (setq end (- end 2))
116 (if (= ch ?z) ; ZW -> euc-china
117 (progn
118 (translate-region (point) end hz-set-msb-table)
119 (goto-char end))
120 (if (search-forward hz-ascii-designation
121 (if decode-hz-line-continuation nil end)
122 t)
123 (delete-char -2))
124 (setq end (point))
125 (translate-region pos (point) hz-set-msb-table))))
126 (if beg
127 (decode-coding-region beg end 'euc-china)))
128 (- (point-max) (point-min))))) 119 (- (point-max) (point-min)))))
129 120
130;;;###autoload 121;;;###autoload
@@ -133,33 +124,57 @@ Return the length of resulting text."
133 (interactive) 124 (interactive)
134 (decode-hz-region (point-min) (point-max))) 125 (decode-hz-region (point-min) (point-max)))
135 126
127(defvar hz-category-table nil)
128
136;;;###autoload 129;;;###autoload
137(defun encode-hz-region (beg end) 130(defun encode-hz-region (beg end)
138 "Encode the text in the current region to HZ. 131 "Encode the text in the current region to HZ.
139Return the length of resulting text." 132Return the length of resulting text."
140 (interactive "r") 133 (interactive "r")
134 (unless hz-category-table
135 (setq hz-category-table (make-category-table))
136 (with-category-table hz-category-table
137 (define-category ?c "hz encodable")
138 (map-charset-chars #'modify-category-entry 'ascii ?c)
139 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?c)))
141 (save-excursion 140 (save-excursion
142 (save-restriction 141 (save-restriction
143 (narrow-to-region beg end) 142 (narrow-to-region beg end)
143 (with-category-table hz-category-table
144 ;; ~ -> ~~
145 (goto-char (point-min))
146 (while (search-forward "~" nil t) (insert ?~))
147
148 ;; ESC -> ESC ESC
149 (goto-char (point-min))
150 (while (search-forward "\e" nil t) (insert ?\e))
144 151
145 ;; "~" -> "~~" 152 ;; Non-ASCII-GB2312 -> \uXXXX
146 (goto-char (point-min)) 153 (goto-char (point-min))
147 (while (search-forward "~" nil t) (insert ?~)) 154 (while (re-search-forward "\\Cc" nil t)
148 155 (let ((ch (preceding-char)))
149 ;; Chinese GB2312 -> "~{...~}" 156 (delete-char -1)
150 (goto-char (point-min)) 157 (insert (format (if (< ch #x10000) "\\u%04X" "\\U%08X") ch))))
151 (if (re-search-forward "\\cc" nil t) 158
152 (let (pos) 159 ;; Prefer chinese-gb2312 for Chinese characters.
153 (goto-char (setq pos (match-beginning 0))) 160 (put-text-property (point-min) (point-max) 'charset 'chinese-gb2312)
154 (encode-coding-region pos (point-max) 'iso-2022-7bit) 161 (encode-coding-region (point-min) (point-max) 'iso-2022-7bit)
155 (goto-char pos) 162
156 (while (search-forward iso2022-gb-designation nil t) 163 ;; ESC $ B ... ESC ( B -> ~{ ... ~}
157 (delete-char -3) 164 ;; ESC ESC -> ESC
158 (insert hz-gb-designation)) 165 (goto-char (point-min))
159 (goto-char pos) 166 (while (search-forward "\e" nil t)
160 (while (search-forward iso2022-ascii-designation nil t) 167 (if (= (following-char) ?\e)
161 (delete-char -3) 168 ;; ESC ESC -> ESC
162 (insert hz-ascii-designation)))) 169 (delete-char 1)
170 (forward-char -1)
171 (if (looking-at iso2022-gb-designation)
172 (progn
173 (delete-region (match-beginning 0) (match-end 0))
174 (insert hz-gb-designation)
175 (search-forward iso2022-ascii-designation nil 'move)
176 (delete-region (match-beginning 0) (match-end 0))
177 (insert hz-ascii-designation))))))
163 (- (point-max) (point-min))))) 178 (- (point-max) (point-min)))))
164 179
165;;;###autoload 180;;;###autoload