diff options
| author | K. Handa | 2016-08-17 23:37:17 +0900 |
|---|---|---|
| committer | K. Handa | 2016-08-17 23:37:17 +0900 |
| commit | 7faabf03d885204295701e3fc5d7e75a71c3ec82 (patch) | |
| tree | cf8846bf96d550ec186953e34d2a7da92b2abee3 | |
| parent | 8d4039bcd69f0134fe3723b2bb3c921952e298c5 (diff) | |
| download | emacs-7faabf03d885204295701e3fc5d7e75a71c3ec82.tar.gz emacs-7faabf03d885204295701e3fc5d7e75a71c3ec82.zip | |
Fix hz encoding and decoding (bug#23814)
* lisp/language/china-util.el (decode-hz-region): Pay
attention to "~~}" sequence at the end of Chinese character
range.
(hz-category-table): New variable.
(encode-hz-region): Convert non-encodable characters to
\u... and \U... Preserve ESC on ecoding. Put
`chinese-gb2312' `charset' text property in advance to force
iso-2022-encoding to select chinese-gb2312 designation.
| -rw-r--r-- | lisp/language/china-util.el | 113 |
1 files changed, 64 insertions, 49 deletions
diff --git a/lisp/language/china-util.el b/lisp/language/china-util.el index e5316409326..6505fb8c3d8 100644 --- a/lisp/language/china-util.el +++ b/lisp/language/china-util.el | |||
| @@ -88,43 +88,34 @@ Return the length of resulting text." | |||
| 88 | (let (pos ch) | 88 | (let (pos ch) |
| 89 | (narrow-to-region beg end) | 89 | (narrow-to-region beg end) |
| 90 | 90 | ||
| 91 | ;; We, at first, convert HZ/ZW to `euc-china', | 91 | ;; We, at first, convert HZ/ZW to `iso-2022-7bit', |
| 92 | ;; then decode it. | 92 | ;; then decode it. |
| 93 | 93 | ||
| 94 | ;; "~\n" -> "\n", "~~" -> "~" | 94 | ;; "~\n" -> "", "~~" -> "~" |
| 95 | (goto-char (point-min)) | 95 | (goto-char (point-min)) |
| 96 | (while (search-forward "~" nil t) | 96 | (while (search-forward "~" nil t) |
| 97 | (setq ch (following-char)) | 97 | (setq ch (following-char)) |
| 98 | (if (or (= ch ?\n) (= ch ?~)) (delete-char -1))) | 98 | (cond ((= ch ?{) |
| 99 | (delete-region (1- (point)) (1+ (point))) | ||
| 100 | (setq pos (point)) | ||
| 101 | (insert iso2022-gb-designation) | ||
| 102 | (if (looking-at "\\([!-}][!-~]\\)*") | ||
| 103 | (goto-char (match-end 0))) | ||
| 104 | (if (looking-at hz-ascii-designation) | ||
| 105 | (delete-region (match-beginning 0) (match-end 0))) | ||
| 106 | (insert iso2022-ascii-designation) | ||
| 107 | (decode-coding-region pos (point) 'iso-2022-7bit)) | ||
| 108 | |||
| 109 | ((= ch ?~) | ||
| 110 | (delete-char 1)) | ||
| 111 | |||
| 112 | ((and (= ch ?\n) | ||
| 113 | decode-hz-line-continuation) | ||
| 114 | (delete-region (1- (point)) (1+ (point)))) | ||
| 115 | |||
| 116 | (t | ||
| 117 | (forward-char 1))))) | ||
| 99 | 118 | ||
| 100 | ;; "^zW...\n" -> Chinese GB2312 | ||
| 101 | ;; "~{...~}" -> Chinese GB2312 | ||
| 102 | (goto-char (point-min)) | ||
| 103 | (setq beg nil) | ||
| 104 | (while (re-search-forward hz/zw-start-gb nil t) | ||
| 105 | (setq pos (match-beginning 0) | ||
| 106 | ch (char-after pos)) | ||
| 107 | ;; Record the first position to start conversion. | ||
| 108 | (or beg (setq beg pos)) | ||
| 109 | (end-of-line) | ||
| 110 | (setq end (point)) | ||
| 111 | (if (>= ch 128) ; 8bit GB2312 | ||
| 112 | nil | ||
| 113 | (goto-char pos) | ||
| 114 | (delete-char 2) | ||
| 115 | (setq end (- end 2)) | ||
| 116 | (if (= ch ?z) ; ZW -> euc-china | ||
| 117 | (progn | ||
| 118 | (translate-region (point) end hz-set-msb-table) | ||
| 119 | (goto-char end)) | ||
| 120 | (if (search-forward hz-ascii-designation | ||
| 121 | (if decode-hz-line-continuation nil end) | ||
| 122 | t) | ||
| 123 | (delete-char -2)) | ||
| 124 | (setq end (point)) | ||
| 125 | (translate-region pos (point) hz-set-msb-table)))) | ||
| 126 | (if beg | ||
| 127 | (decode-coding-region beg end 'euc-china))) | ||
| 128 | (- (point-max) (point-min))))) | 119 | (- (point-max) (point-min))))) |
| 129 | 120 | ||
| 130 | ;;;###autoload | 121 | ;;;###autoload |
| @@ -133,33 +124,57 @@ Return the length of resulting text." | |||
| 133 | (interactive) | 124 | (interactive) |
| 134 | (decode-hz-region (point-min) (point-max))) | 125 | (decode-hz-region (point-min) (point-max))) |
| 135 | 126 | ||
| 127 | (defvar hz-category-table nil) | ||
| 128 | |||
| 136 | ;;;###autoload | 129 | ;;;###autoload |
| 137 | (defun encode-hz-region (beg end) | 130 | (defun encode-hz-region (beg end) |
| 138 | "Encode the text in the current region to HZ. | 131 | "Encode the text in the current region to HZ. |
| 139 | Return the length of resulting text." | 132 | Return the length of resulting text." |
| 140 | (interactive "r") | 133 | (interactive "r") |
| 134 | (unless hz-category-table | ||
| 135 | (setq hz-category-table (make-category-table)) | ||
| 136 | (with-category-table hz-category-table | ||
| 137 | (define-category ?c "hz encodable") | ||
| 138 | (map-charset-chars #'modify-category-entry 'ascii ?c) | ||
| 139 | (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?c))) | ||
| 141 | (save-excursion | 140 | (save-excursion |
| 142 | (save-restriction | 141 | (save-restriction |
| 143 | (narrow-to-region beg end) | 142 | (narrow-to-region beg end) |
| 143 | (with-category-table hz-category-table | ||
| 144 | ;; ~ -> ~~ | ||
| 145 | (goto-char (point-min)) | ||
| 146 | (while (search-forward "~" nil t) (insert ?~)) | ||
| 147 | |||
| 148 | ;; ESC -> ESC ESC | ||
| 149 | (goto-char (point-min)) | ||
| 150 | (while (search-forward "\e" nil t) (insert ?\e)) | ||
| 144 | 151 | ||
| 145 | ;; "~" -> "~~" | 152 | ;; Non-ASCII-GB2312 -> \uXXXX |
| 146 | (goto-char (point-min)) | 153 | (goto-char (point-min)) |
| 147 | (while (search-forward "~" nil t) (insert ?~)) | 154 | (while (re-search-forward "\\Cc" nil t) |
| 148 | 155 | (let ((ch (preceding-char))) | |
| 149 | ;; Chinese GB2312 -> "~{...~}" | 156 | (delete-char -1) |
| 150 | (goto-char (point-min)) | 157 | (insert (format (if (< ch #x10000) "\\u%04X" "\\U%08X") ch)))) |
| 151 | (if (re-search-forward "\\cc" nil t) | 158 | |
| 152 | (let (pos) | 159 | ;; Prefer chinese-gb2312 for Chinese characters. |
| 153 | (goto-char (setq pos (match-beginning 0))) | 160 | (put-text-property (point-min) (point-max) 'charset 'chinese-gb2312) |
| 154 | (encode-coding-region pos (point-max) 'iso-2022-7bit) | 161 | (encode-coding-region (point-min) (point-max) 'iso-2022-7bit) |
| 155 | (goto-char pos) | 162 | |
| 156 | (while (search-forward iso2022-gb-designation nil t) | 163 | ;; ESC $ B ... ESC ( B -> ~{ ... ~} |
| 157 | (delete-char -3) | 164 | ;; ESC ESC -> ESC |
| 158 | (insert hz-gb-designation)) | 165 | (goto-char (point-min)) |
| 159 | (goto-char pos) | 166 | (while (search-forward "\e" nil t) |
| 160 | (while (search-forward iso2022-ascii-designation nil t) | 167 | (if (= (following-char) ?\e) |
| 161 | (delete-char -3) | 168 | ;; ESC ESC -> ESC |
| 162 | (insert hz-ascii-designation)))) | 169 | (delete-char 1) |
| 170 | (forward-char -1) | ||
| 171 | (if (looking-at iso2022-gb-designation) | ||
| 172 | (progn | ||
| 173 | (delete-region (match-beginning 0) (match-end 0)) | ||
| 174 | (insert hz-gb-designation) | ||
| 175 | (search-forward iso2022-ascii-designation nil 'move) | ||
| 176 | (delete-region (match-beginning 0) (match-end 0)) | ||
| 177 | (insert hz-ascii-designation)))))) | ||
| 163 | (- (point-max) (point-min))))) | 178 | (- (point-max) (point-min))))) |
| 164 | 179 | ||
| 165 | ;;;###autoload | 180 | ;;;###autoload |