diff options
| author | Kenichi Handa | 1998-12-15 04:35:38 +0000 |
|---|---|---|
| committer | Kenichi Handa | 1998-12-15 04:35:38 +0000 |
| commit | 251d4f4bf90c3b4ab234827f4acf4cd147f387be (patch) | |
| tree | 85ae0321b71bfebd592eedd45d910cfe32f856ea | |
| parent | d00742a1c66ae3a77babb468e28cd1c06c92faec (diff) | |
| download | emacs-251d4f4bf90c3b4ab234827f4acf4cd147f387be.tar.gz emacs-251d4f4bf90c3b4ab234827f4acf4cd147f387be.zip | |
(find-coding-systems-for-charsets):
Handle the case of unknown charset.
(find-multibyte-characters): If invalid multibyte characters are
found, return the corresponding strings instead of character
codes.
(find-multibyte-characters): Adjusted for the above change.
(select-safe-coding-system): For a unibyte buffer, always returns
DEFAULT-CODING-SYSTEM.
(get-charset-property): Fix previous change. Make it a function.
(put-charset-property): Make it a function.
| -rw-r--r-- | lisp/international/mule-cmds.el | 42 |
1 files changed, 28 insertions, 14 deletions
diff --git a/lisp/international/mule-cmds.el b/lisp/international/mule-cmds.el index aae8cf5b7ea..8fc2a6d1160 100644 --- a/lisp/international/mule-cmds.el +++ b/lisp/international/mule-cmds.el | |||
| @@ -290,19 +290,23 @@ CHARSETS is a list of character sets." | |||
| 290 | (eq 'ascii (car charsets)))) | 290 | (eq 'ascii (car charsets)))) |
| 291 | '(undecided) | 291 | '(undecided) |
| 292 | (setq charsets (delq 'composition charsets)) | 292 | (setq charsets (delq 'composition charsets)) |
| 293 | (let ((l coding-system-list) | 293 | (let ((l (coding-system-list 'base-only)) |
| 294 | (charset-prefered-codings | 294 | (charset-prefered-codings |
| 295 | (mapcar (function | 295 | (mapcar (function |
| 296 | (lambda (x) | 296 | (lambda (x) |
| 297 | (get-charset-property x 'prefered-coding-system))) | 297 | (if (eq x 'unknown) |
| 298 | 'raw-text | ||
| 299 | (get-charset-property x 'prefered-coding-system)))) | ||
| 298 | charsets)) | 300 | charsets)) |
| 299 | (priorities (mapcar (function (lambda (x) (symbol-value x))) | 301 | (priorities (mapcar (function (lambda (x) (symbol-value x))) |
| 300 | coding-category-list)) | 302 | coding-category-list)) |
| 301 | codings coding safe) | 303 | codings coding safe) |
| 304 | (if (memq 'unknown charsets) | ||
| 305 | ;; The region contains invalid multibyte characters. | ||
| 306 | (setq l '(raw-text))) | ||
| 302 | (while l | 307 | (while l |
| 303 | (setq coding (car l) l (cdr l)) | 308 | (setq coding (car l) l (cdr l)) |
| 304 | (if (and (eq coding (coding-system-base coding)) | 309 | (if (and (setq safe (coding-system-get coding 'safe-charsets)) |
| 305 | (setq safe (coding-system-get coding 'safe-charsets)) | ||
| 306 | (or (eq safe t) | 310 | (or (eq safe t) |
| 307 | (find-coding-systems-region-subset-p charsets safe))) | 311 | (find-coding-systems-region-subset-p charsets safe))) |
| 308 | ;; We put the higher priority to coding systems included | 312 | ;; We put the higher priority to coding systems included |
| @@ -330,7 +334,9 @@ where | |||
| 330 | COUNT is a number of characters, | 334 | COUNT is a number of characters, |
| 331 | CHARs are found characters of the character set. | 335 | CHARs are found characters of the character set. |
| 332 | Optional 3rd arg MAXCOUNT limits how many CHARs are put in the above list. | 336 | Optional 3rd arg MAXCOUNT limits how many CHARs are put in the above list. |
| 333 | Optional 4th arg EXCLUDE is a list of character sets to be ignored." | 337 | Optional 4th arg EXCLUDE is a list of character sets to be ignored. |
| 338 | |||
| 339 | For invalid characters, CHARs are actually strings." | ||
| 334 | (let ((chars nil) | 340 | (let ((chars nil) |
| 335 | charset char) | 341 | charset char) |
| 336 | (if (stringp from) | 342 | (if (stringp from) |
| @@ -338,7 +344,10 @@ Optional 4th arg EXCLUDE is a list of character sets to be ignored." | |||
| 338 | (while (setq idx (string-match "[^\000-\177]" from idx)) | 344 | (while (setq idx (string-match "[^\000-\177]" from idx)) |
| 339 | (setq char (aref from idx) | 345 | (setq char (aref from idx) |
| 340 | charset (char-charset char)) | 346 | charset (char-charset char)) |
| 341 | (if (not (memq charset excludes)) | 347 | (if (eq charset 'unknown) |
| 348 | (setq char (match-string 0))) | ||
| 349 | (if (or (eq charset 'unknown) | ||
| 350 | (not (or (eq excludes t) (memq charset excludes)))) | ||
| 342 | (let ((slot (assq charset chars))) | 351 | (let ((slot (assq charset chars))) |
| 343 | (if slot | 352 | (if slot |
| 344 | (if (not (memq char (nthcdr 2 slot))) | 353 | (if (not (memq char (nthcdr 2 slot))) |
| @@ -353,10 +362,13 @@ Optional 4th arg EXCLUDE is a list of character sets to be ignored." | |||
| 353 | (while (re-search-forward "[^\000-\177]" to t) | 362 | (while (re-search-forward "[^\000-\177]" to t) |
| 354 | (setq char (preceding-char) | 363 | (setq char (preceding-char) |
| 355 | charset (char-charset char)) | 364 | charset (char-charset char)) |
| 356 | (if (not (memq charset excludes)) | 365 | (if (eq charset 'unknown) |
| 366 | (setq char (match-string 0))) | ||
| 367 | (if (or (eq charset 'unknown) | ||
| 368 | (not (or (eq excludes t) (memq charset excludes)))) | ||
| 357 | (let ((slot (assq charset chars))) | 369 | (let ((slot (assq charset chars))) |
| 358 | (if slot | 370 | (if slot |
| 359 | (if (not (memq char (nthcdr 2 slot))) | 371 | (if (not (member char (nthcdr 2 slot))) |
| 360 | (let ((count (nth 1 slot))) | 372 | (let ((count (nth 1 slot))) |
| 361 | (setcar (cdr slot) (1+ count)) | 373 | (setcar (cdr slot) (1+ count)) |
| 362 | (if (or (not maxcount) (< count maxcount)) | 374 | (if (or (not maxcount) (< count maxcount)) |
| @@ -390,7 +402,8 @@ and TO is ignored." | |||
| 390 | (let* ((charsets (if (stringp from) (find-charset-string from) | 402 | (let* ((charsets (if (stringp from) (find-charset-string from) |
| 391 | (find-charset-region from to))) | 403 | (find-charset-region from to))) |
| 392 | (safe-coding-systems (find-coding-systems-for-charsets charsets))) | 404 | (safe-coding-systems (find-coding-systems-for-charsets charsets))) |
| 393 | (if (or (eq (car safe-coding-systems) 'undecided) | 405 | (if (or (not enable-multibyte-characters) |
| 406 | (eq (car safe-coding-systems) 'undecided) | ||
| 394 | (eq default-coding-system 'no-conversion) | 407 | (eq default-coding-system 'no-conversion) |
| 395 | (and default-coding-system | 408 | (and default-coding-system |
| 396 | (memq (coding-system-base default-coding-system) | 409 | (memq (coding-system-base default-coding-system) |
| @@ -449,7 +462,8 @@ and TO is ignored." | |||
| 449 | (insert (format "%25s: " (car (car non-safe-chars)))) | 462 | (insert (format "%25s: " (car (car non-safe-chars)))) |
| 450 | (let ((l (nthcdr 2 (car non-safe-chars)))) | 463 | (let ((l (nthcdr 2 (car non-safe-chars)))) |
| 451 | (while l | 464 | (while l |
| 452 | (insert (car l)) | 465 | (if (or (stringp (car l)) (char-valid-p (car l))) |
| 466 | (insert (car l))) | ||
| 453 | (setq l (cdr l)))) | 467 | (setq l (cdr l)))) |
| 454 | (if (> (nth 1 (car non-safe-chars)) 3) | 468 | (if (> (nth 1 (car non-safe-chars)) 3) |
| 455 | (insert "...")) | 469 | (insert "...")) |
| @@ -1325,14 +1339,14 @@ specifies the character set for the major languages of Western Europe." | |||
| 1325 | 1339 | ||
| 1326 | ;;; Charset property | 1340 | ;;; Charset property |
| 1327 | 1341 | ||
| 1328 | (defsubst get-charset-property (charset propname) | 1342 | (defun get-charset-property (charset propname) |
| 1329 | "Return the value of CHARSET's PROPNAME property. | 1343 | "Return the value of CHARSET's PROPNAME property. |
| 1330 | This is the last value stored with | 1344 | This is the last value stored with |
| 1331 | (put-charset-property CHARSET PROPNAME VALUE)." | 1345 | (put-charset-property CHARSET PROPNAME VALUE)." |
| 1332 | (or (eq charset 'composition) | 1346 | (and (not (eq charset 'composition)) |
| 1333 | (plist-get (charset-plist charset) propname))) | 1347 | (plist-get (charset-plist charset) propname))) |
| 1334 | 1348 | ||
| 1335 | (defsubst put-charset-property (charset propname value) | 1349 | (defun put-charset-property (charset propname value) |
| 1336 | "Store CHARSETS's PROPNAME property with value VALUE. | 1350 | "Store CHARSETS's PROPNAME property with value VALUE. |
| 1337 | It can be retrieved with `(get-charset-property CHARSET PROPNAME)'." | 1351 | It can be retrieved with `(get-charset-property CHARSET PROPNAME)'." |
| 1338 | (or (eq charset 'composition) | 1352 | (or (eq charset 'composition) |