aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKenichi Handa1998-12-15 04:35:38 +0000
committerKenichi Handa1998-12-15 04:35:38 +0000
commit251d4f4bf90c3b4ab234827f4acf4cd147f387be (patch)
tree85ae0321b71bfebd592eedd45d910cfe32f856ea
parentd00742a1c66ae3a77babb468e28cd1c06c92faec (diff)
downloademacs-251d4f4bf90c3b4ab234827f4acf4cd147f387be.tar.gz
emacs-251d4f4bf90c3b4ab234827f4acf4cd147f387be.zip
(find-coding-systems-for-charsets):
Handle the case of unknown charset. (find-multibyte-characters): If invalid multibyte characters are found, return the corresponding strings instead of character codes. (find-multibyte-characters): Adjusted for the above change. (select-safe-coding-system): For a unibyte buffer, always returns DEFAULT-CODING-SYSTEM. (get-charset-property): Fix previous change. Make it a function. (put-charset-property): Make it a function.
-rw-r--r--lisp/international/mule-cmds.el42
1 files changed, 28 insertions, 14 deletions
diff --git a/lisp/international/mule-cmds.el b/lisp/international/mule-cmds.el
index aae8cf5b7ea..8fc2a6d1160 100644
--- a/lisp/international/mule-cmds.el
+++ b/lisp/international/mule-cmds.el
@@ -290,19 +290,23 @@ CHARSETS is a list of character sets."
290 (eq 'ascii (car charsets)))) 290 (eq 'ascii (car charsets))))
291 '(undecided) 291 '(undecided)
292 (setq charsets (delq 'composition charsets)) 292 (setq charsets (delq 'composition charsets))
293 (let ((l coding-system-list) 293 (let ((l (coding-system-list 'base-only))
294 (charset-prefered-codings 294 (charset-prefered-codings
295 (mapcar (function 295 (mapcar (function
296 (lambda (x) 296 (lambda (x)
297 (get-charset-property x 'prefered-coding-system))) 297 (if (eq x 'unknown)
298 'raw-text
299 (get-charset-property x 'prefered-coding-system))))
298 charsets)) 300 charsets))
299 (priorities (mapcar (function (lambda (x) (symbol-value x))) 301 (priorities (mapcar (function (lambda (x) (symbol-value x)))
300 coding-category-list)) 302 coding-category-list))
301 codings coding safe) 303 codings coding safe)
304 (if (memq 'unknown charsets)
305 ;; The region contains invalid multibyte characters.
306 (setq l '(raw-text)))
302 (while l 307 (while l
303 (setq coding (car l) l (cdr l)) 308 (setq coding (car l) l (cdr l))
304 (if (and (eq coding (coding-system-base coding)) 309 (if (and (setq safe (coding-system-get coding 'safe-charsets))
305 (setq safe (coding-system-get coding 'safe-charsets))
306 (or (eq safe t) 310 (or (eq safe t)
307 (find-coding-systems-region-subset-p charsets safe))) 311 (find-coding-systems-region-subset-p charsets safe)))
308 ;; We put the higher priority to coding systems included 312 ;; We put the higher priority to coding systems included
@@ -330,7 +334,9 @@ where
330 COUNT is a number of characters, 334 COUNT is a number of characters,
331 CHARs are found characters of the character set. 335 CHARs are found characters of the character set.
332Optional 3rd arg MAXCOUNT limits how many CHARs are put in the above list. 336Optional 3rd arg MAXCOUNT limits how many CHARs are put in the above list.
333Optional 4th arg EXCLUDE is a list of character sets to be ignored." 337Optional 4th arg EXCLUDE is a list of character sets to be ignored.
338
339For invalid characters, CHARs are actually strings."
334 (let ((chars nil) 340 (let ((chars nil)
335 charset char) 341 charset char)
336 (if (stringp from) 342 (if (stringp from)
@@ -338,7 +344,10 @@ Optional 4th arg EXCLUDE is a list of character sets to be ignored."
338 (while (setq idx (string-match "[^\000-\177]" from idx)) 344 (while (setq idx (string-match "[^\000-\177]" from idx))
339 (setq char (aref from idx) 345 (setq char (aref from idx)
340 charset (char-charset char)) 346 charset (char-charset char))
341 (if (not (memq charset excludes)) 347 (if (eq charset 'unknown)
348 (setq char (match-string 0)))
349 (if (or (eq charset 'unknown)
350 (not (or (eq excludes t) (memq charset excludes))))
342 (let ((slot (assq charset chars))) 351 (let ((slot (assq charset chars)))
343 (if slot 352 (if slot
344 (if (not (memq char (nthcdr 2 slot))) 353 (if (not (memq char (nthcdr 2 slot)))
@@ -353,10 +362,13 @@ Optional 4th arg EXCLUDE is a list of character sets to be ignored."
353 (while (re-search-forward "[^\000-\177]" to t) 362 (while (re-search-forward "[^\000-\177]" to t)
354 (setq char (preceding-char) 363 (setq char (preceding-char)
355 charset (char-charset char)) 364 charset (char-charset char))
356 (if (not (memq charset excludes)) 365 (if (eq charset 'unknown)
366 (setq char (match-string 0)))
367 (if (or (eq charset 'unknown)
368 (not (or (eq excludes t) (memq charset excludes))))
357 (let ((slot (assq charset chars))) 369 (let ((slot (assq charset chars)))
358 (if slot 370 (if slot
359 (if (not (memq char (nthcdr 2 slot))) 371 (if (not (member char (nthcdr 2 slot)))
360 (let ((count (nth 1 slot))) 372 (let ((count (nth 1 slot)))
361 (setcar (cdr slot) (1+ count)) 373 (setcar (cdr slot) (1+ count))
362 (if (or (not maxcount) (< count maxcount)) 374 (if (or (not maxcount) (< count maxcount))
@@ -390,7 +402,8 @@ and TO is ignored."
390 (let* ((charsets (if (stringp from) (find-charset-string from) 402 (let* ((charsets (if (stringp from) (find-charset-string from)
391 (find-charset-region from to))) 403 (find-charset-region from to)))
392 (safe-coding-systems (find-coding-systems-for-charsets charsets))) 404 (safe-coding-systems (find-coding-systems-for-charsets charsets)))
393 (if (or (eq (car safe-coding-systems) 'undecided) 405 (if (or (not enable-multibyte-characters)
406 (eq (car safe-coding-systems) 'undecided)
394 (eq default-coding-system 'no-conversion) 407 (eq default-coding-system 'no-conversion)
395 (and default-coding-system 408 (and default-coding-system
396 (memq (coding-system-base default-coding-system) 409 (memq (coding-system-base default-coding-system)
@@ -449,7 +462,8 @@ and TO is ignored."
449 (insert (format "%25s: " (car (car non-safe-chars)))) 462 (insert (format "%25s: " (car (car non-safe-chars))))
450 (let ((l (nthcdr 2 (car non-safe-chars)))) 463 (let ((l (nthcdr 2 (car non-safe-chars))))
451 (while l 464 (while l
452 (insert (car l)) 465 (if (or (stringp (car l)) (char-valid-p (car l)))
466 (insert (car l)))
453 (setq l (cdr l)))) 467 (setq l (cdr l))))
454 (if (> (nth 1 (car non-safe-chars)) 3) 468 (if (> (nth 1 (car non-safe-chars)) 3)
455 (insert "...")) 469 (insert "..."))
@@ -1325,14 +1339,14 @@ specifies the character set for the major languages of Western Europe."
1325 1339
1326;;; Charset property 1340;;; Charset property
1327 1341
1328(defsubst get-charset-property (charset propname) 1342(defun get-charset-property (charset propname)
1329 "Return the value of CHARSET's PROPNAME property. 1343 "Return the value of CHARSET's PROPNAME property.
1330This is the last value stored with 1344This is the last value stored with
1331 (put-charset-property CHARSET PROPNAME VALUE)." 1345 (put-charset-property CHARSET PROPNAME VALUE)."
1332 (or (eq charset 'composition) 1346 (and (not (eq charset 'composition))
1333 (plist-get (charset-plist charset) propname))) 1347 (plist-get (charset-plist charset) propname)))
1334 1348
1335(defsubst put-charset-property (charset propname value) 1349(defun put-charset-property (charset propname value)
1336 "Store CHARSETS's PROPNAME property with value VALUE. 1350 "Store CHARSETS's PROPNAME property with value VALUE.
1337It can be retrieved with `(get-charset-property CHARSET PROPNAME)'." 1351It can be retrieved with `(get-charset-property CHARSET PROPNAME)'."
1338 (or (eq charset 'composition) 1352 (or (eq charset 'composition)