aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEli Zaretskii2015-06-18 15:06:53 +0300
committerEli Zaretskii2015-06-18 15:06:53 +0300
commitc4782ea5a7c5a63b6e004aefbb3b3898d3846ad5 (patch)
treef1651ec11ac7dbcb660883a655fdede2ce2b7235
parent517ab6238a362bd0dda9af14681f5590da1df6de (diff)
downloademacs-c4782ea5a7c5a63b6e004aefbb3b3898d3846ad5.tar.gz
emacs-c4782ea5a7c5a63b6e004aefbb3b3898d3846ad5.zip
Improve and extend filepos-to-bufferpos
* lisp/international/mule-util.el (filepos-to-bufferpos--dos): Don't barf if F returns nil for some argument. (filepos-to-bufferpos): Expand to support UTF-16 and not assume that every encoding of type 'charset' is single-byte.
-rw-r--r--lisp/international/mule-util.el36
1 files changed, 34 insertions, 2 deletions
diff --git a/lisp/international/mule-util.el b/lisp/international/mule-util.el
index 2a53e40b4fd..bbefdaa7324 100644
--- a/lisp/international/mule-util.el
+++ b/lisp/international/mule-util.el
@@ -320,6 +320,12 @@ per-character basis, this may not be accurate."
320 (while 320 (while
321 (progn 321 (progn
322 (setq pos (funcall f (- byte eol-offset))) 322 (setq pos (funcall f (- byte eol-offset)))
323 ;; Protect against accidental values of BYTE outside of the
324 ;; valid region.
325 (when (null pos)
326 (if (<= byte eol-offset)
327 (setq pos (point-min))
328 (setq pos (point-max))))
323 ;; Adjust POS for DOS EOL format. 329 ;; Adjust POS for DOS EOL format.
324 (setq lines (1- (line-number-at-pos pos))) 330 (setq lines (1- (line-number-at-pos pos)))
325 (and (not (= lines eol-offset)) (> omax omin))) 331 (and (not (= lines eol-offset)) (> omax omin)))
@@ -345,7 +351,25 @@ QUALITY can be:
345 (unless coding-system (setq coding-system buffer-file-coding-system)) 351 (unless coding-system (setq coding-system buffer-file-coding-system))
346 (let ((eol (coding-system-eol-type coding-system)) 352 (let ((eol (coding-system-eol-type coding-system))
347 (type (coding-system-type coding-system)) 353 (type (coding-system-type coding-system))
354 (base (coding-system-base coding-system))
348 (pm (save-restriction (widen) (point-min)))) 355 (pm (save-restriction (widen) (point-min))))
356 (and (eq type 'utf-8-emacs)
357 (setq type 'utf-8))
358 (and (eq type 'utf-8)
359 ;; Any post-read/pre-write conversions mean it's not really UTF-8.
360 (not (null (coding-system-get coding-system :pos-read-conversion)))
361 (setq type 'not-utf-8))
362 (and (not (eq type 'utf-8))
363 (eq quality 'exact)
364 (setq type 'use-exact))
365 (and (memq type '(charset raw-text undecided))
366 ;; The following are all of type 'charset', but they are
367 ;; actually variable-width encodings.
368 (not (memq base '(chinese-gbk chinese-gb18030 euc-tw euc-jis-2004
369 korean-iso-8bit chinese-iso-8bit
370 japanese-iso-8bit chinese-big5-hkscs
371 japanese-cp932 korean-cp949)))
372 (setq type 'single-byte))
349 (pcase type 373 (pcase type
350 (`utf-8 374 (`utf-8
351 (when (coding-system-get coding-system :bom) 375 (when (coding-system-get coding-system :bom)
@@ -353,8 +377,16 @@ QUALITY can be:
353 (if (= eol 1) 377 (if (= eol 1)
354 (filepos-to-bufferpos--dos (+ pm byte) #'byte-to-position) 378 (filepos-to-bufferpos--dos (+ pm byte) #'byte-to-position)
355 (byte-to-position (+ pm byte)))) 379 (byte-to-position (+ pm byte))))
356 ;; FIXME: What if it's a 2-byte charset? Are there such beasts? 380 (`utf-16
357 (`charset 381 ;; Account for BOM, which is always 2 bytes in UTF-16.
382 (setq byte (- byte 2))
383 ;; In approximate mode, assume all characters are within the
384 ;; BMP, i.e. take up 2 bytes.
385 (setq byte (/ byte 2))
386 (if (= eol 1)
387 (filepos-to-bufferpos--dos (+ pm byte) #'byte-to-position)
388 (byte-to-position (+ pm byte))))
389 (`single-byte
358 (if (= eol 1) 390 (if (= eol 1)
359 (filepos-to-bufferpos--dos (+ pm byte) #'identity) 391 (filepos-to-bufferpos--dos (+ pm byte) #'identity)
360 (+ pm byte))) 392 (+ pm byte)))