diff options
| author | Eli Zaretskii | 2015-06-18 15:06:53 +0300 |
|---|---|---|
| committer | Eli Zaretskii | 2015-06-18 15:06:53 +0300 |
| commit | c4782ea5a7c5a63b6e004aefbb3b3898d3846ad5 (patch) | |
| tree | f1651ec11ac7dbcb660883a655fdede2ce2b7235 | |
| parent | 517ab6238a362bd0dda9af14681f5590da1df6de (diff) | |
| download | emacs-c4782ea5a7c5a63b6e004aefbb3b3898d3846ad5.tar.gz emacs-c4782ea5a7c5a63b6e004aefbb3b3898d3846ad5.zip | |
Improve and extend filepos-to-bufferpos
* lisp/international/mule-util.el (filepos-to-bufferpos--dos):
Don't barf if F returns nil for some argument.
(filepos-to-bufferpos): Expand to support UTF-16 and not assume
that every encoding of type 'charset' is single-byte.
| -rw-r--r-- | lisp/international/mule-util.el | 36 |
1 files changed, 34 insertions, 2 deletions
diff --git a/lisp/international/mule-util.el b/lisp/international/mule-util.el index 2a53e40b4fd..bbefdaa7324 100644 --- a/lisp/international/mule-util.el +++ b/lisp/international/mule-util.el | |||
| @@ -320,6 +320,12 @@ per-character basis, this may not be accurate." | |||
| 320 | (while | 320 | (while |
| 321 | (progn | 321 | (progn |
| 322 | (setq pos (funcall f (- byte eol-offset))) | 322 | (setq pos (funcall f (- byte eol-offset))) |
| 323 | ;; Protect against accidental values of BYTE outside of the | ||
| 324 | ;; valid region. | ||
| 325 | (when (null pos) | ||
| 326 | (if (<= byte eol-offset) | ||
| 327 | (setq pos (point-min)) | ||
| 328 | (setq pos (point-max)))) | ||
| 323 | ;; Adjust POS for DOS EOL format. | 329 | ;; Adjust POS for DOS EOL format. |
| 324 | (setq lines (1- (line-number-at-pos pos))) | 330 | (setq lines (1- (line-number-at-pos pos))) |
| 325 | (and (not (= lines eol-offset)) (> omax omin))) | 331 | (and (not (= lines eol-offset)) (> omax omin))) |
| @@ -345,7 +351,25 @@ QUALITY can be: | |||
| 345 | (unless coding-system (setq coding-system buffer-file-coding-system)) | 351 | (unless coding-system (setq coding-system buffer-file-coding-system)) |
| 346 | (let ((eol (coding-system-eol-type coding-system)) | 352 | (let ((eol (coding-system-eol-type coding-system)) |
| 347 | (type (coding-system-type coding-system)) | 353 | (type (coding-system-type coding-system)) |
| 354 | (base (coding-system-base coding-system)) | ||
| 348 | (pm (save-restriction (widen) (point-min)))) | 355 | (pm (save-restriction (widen) (point-min)))) |
| 356 | (and (eq type 'utf-8-emacs) | ||
| 357 | (setq type 'utf-8)) | ||
| 358 | (and (eq type 'utf-8) | ||
| 359 | ;; Any post-read/pre-write conversions mean it's not really UTF-8. | ||
| 360 | (not (null (coding-system-get coding-system :pos-read-conversion))) | ||
| 361 | (setq type 'not-utf-8)) | ||
| 362 | (and (not (eq type 'utf-8)) | ||
| 363 | (eq quality 'exact) | ||
| 364 | (setq type 'use-exact)) | ||
| 365 | (and (memq type '(charset raw-text undecided)) | ||
| 366 | ;; The following are all of type 'charset', but they are | ||
| 367 | ;; actually variable-width encodings. | ||
| 368 | (not (memq base '(chinese-gbk chinese-gb18030 euc-tw euc-jis-2004 | ||
| 369 | korean-iso-8bit chinese-iso-8bit | ||
| 370 | japanese-iso-8bit chinese-big5-hkscs | ||
| 371 | japanese-cp932 korean-cp949))) | ||
| 372 | (setq type 'single-byte)) | ||
| 349 | (pcase type | 373 | (pcase type |
| 350 | (`utf-8 | 374 | (`utf-8 |
| 351 | (when (coding-system-get coding-system :bom) | 375 | (when (coding-system-get coding-system :bom) |
| @@ -353,8 +377,16 @@ QUALITY can be: | |||
| 353 | (if (= eol 1) | 377 | (if (= eol 1) |
| 354 | (filepos-to-bufferpos--dos (+ pm byte) #'byte-to-position) | 378 | (filepos-to-bufferpos--dos (+ pm byte) #'byte-to-position) |
| 355 | (byte-to-position (+ pm byte)))) | 379 | (byte-to-position (+ pm byte)))) |
| 356 | ;; FIXME: What if it's a 2-byte charset? Are there such beasts? | 380 | (`utf-16 |
| 357 | (`charset | 381 | ;; Account for BOM, which is always 2 bytes in UTF-16. |
| 382 | (setq byte (- byte 2)) | ||
| 383 | ;; In approximate mode, assume all characters are within the | ||
| 384 | ;; BMP, i.e. take up 2 bytes. | ||
| 385 | (setq byte (/ byte 2)) | ||
| 386 | (if (= eol 1) | ||
| 387 | (filepos-to-bufferpos--dos (+ pm byte) #'byte-to-position) | ||
| 388 | (byte-to-position (+ pm byte)))) | ||
| 389 | (`single-byte | ||
| 358 | (if (= eol 1) | 390 | (if (= eol 1) |
| 359 | (filepos-to-bufferpos--dos (+ pm byte) #'identity) | 391 | (filepos-to-bufferpos--dos (+ pm byte) #'identity) |
| 360 | (+ pm byte))) | 392 | (+ pm byte))) |