aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lisp/international/mule-cmds.el411
1 files changed, 221 insertions, 190 deletions
diff --git a/lisp/international/mule-cmds.el b/lisp/international/mule-cmds.el
index fcf1a762f93..4624fba6fe4 100644
--- a/lisp/international/mule-cmds.el
+++ b/lisp/international/mule-cmds.el
@@ -323,15 +323,57 @@ startup."
323 (setq coding-system base)) 323 (setq coding-system base))
324 (set-default-coding-systems coding-system))) 324 (set-default-coding-systems coding-system)))
325 325
326(defun find-coding-systems-region-subset-p (list1 list2) 326(defvar sort-coding-systems-predicate nil
327 "Return non-nil if all elements in LIST1 are included in LIST2. 327 "If non-nil, a predicate function to sort coding systems.
328Comparison done with EQ." 328
329 (catch 'tag 329It is called with two coding systems, and should return t if the first
330 (while list1 330one is \"less\" than the second.
331 (or (memq (car list1) list2) 331
332 (throw 'tag nil)) 332The function `sort-coding-systems' use it.")
333 (setq list1 (cdr list1))) 333
334 t)) 334(defun sort-coding-systems (codings)
335 "Sort coding system list CODINGS by a priority of each coding system.
336
337If a coding system is most preferred, it has the highest priority.
338Otherwise, a coding system corresponds to some MIME charset has higher
339priorities. Among them, a coding system included in `coding-system'
340key of the current language environment has higher priorities. See
341also the documentation of `language-info-alist'.
342
343If the variable `sort-coding-systems-predicate' (which see) is
344non-nil, it is used to sort CODINGS in the different way than above."
345 (if sort-coding-systems-predicate
346 (sort codings sort-coding-systems-predicate)
347 (let* ((most-preferred (symbol-value (car coding-category-list)))
348 (lang-preferred (get-language-info current-language-environment
349 'coding-system))
350 (func (function
351 (lambda (x)
352 (let ((base (coding-system-base x)))
353 (+ (if (eq base most-preferred) 64 0)
354 (let ((mime (coding-system-get base 'mime-charset)))
355 (if mime
356 (if (string-match "^x-" (symbol-name mime))
357 16 32)
358 0))
359 (if (memq base lang-preferred) 8 0)
360 (if (string-match "-with-esc$" (symbol-name base))
361 0 4)
362 (if (eq (coding-system-type base) 2)
363 ;; For ISO based coding systems, prefer
364 ;; one that doesn't use escape sequences.
365 (let ((flags (coding-system-flags base)))
366 (if (or (consp (aref flags 0))
367 (consp (aref flags 1))
368 (consp (aref flags 2))
369 (consp (aref flags 3)))
370 (if (or (aref flags 8) (aref flags 9))
371 0
372 1)
373 2))
374 1)))))))
375 (sort codings (function (lambda (x y)
376 (> (funcall func x) (funcall func y))))))))
335 377
336(defun find-coding-systems-region (from to) 378(defun find-coding-systems-region (from to)
337 "Return a list of proper coding systems to encode a text between FROM and TO. 379 "Return a list of proper coding systems to encode a text between FROM and TO.
@@ -340,7 +382,13 @@ in the text.
340 382
341If the text contains no multibyte characters, return a list of a single 383If the text contains no multibyte characters, return a list of a single
342element `undecided'." 384element `undecided'."
343 (find-coding-systems-for-charsets (find-charset-region from to))) 385 (let ((codings (find-coding-systems-region-internal from to)))
386 (if (eq codings t)
387 ;; The text contains only ASCII characters. Any coding
388 ;; systems are safe.
389 '(undecided)
390 ;; We need copy-sequence because sorting will alter the argument.
391 (sort-coding-systems (copy-sequence codings)))))
344 392
345(defun find-coding-systems-string (string) 393(defun find-coding-systems-string (string)
346 "Return a list of proper coding systems to encode STRING. 394 "Return a list of proper coding systems to encode STRING.
@@ -349,49 +397,35 @@ in STRING.
349 397
350If STRING contains no multibyte characters, return a list of a single 398If STRING contains no multibyte characters, return a list of a single
351element `undecided'." 399element `undecided'."
352 (find-coding-systems-for-charsets (find-charset-string string))) 400 (find-coding-systems-region string nil))
353 401
354(defun find-coding-systems-for-charsets (charsets) 402(defun find-coding-systems-for-charsets (charsets)
355 "Return a list of proper coding systems to encode characters of CHARSETS. 403 "Return a list of proper coding systems to encode characters of CHARSETS.
356CHARSETS is a list of character sets." 404CHARSETS is a list of character sets."
357 (if (or (null charsets) 405 (cond ((or (null charsets)
358 (and (= (length charsets) 1) 406 (and (= (length charsets) 1)
359 (eq 'ascii (car charsets)))) 407 (eq 'ascii (car charsets))))
360 '(undecided) 408 '(undecided))
361 (setq charsets (delq 'composition charsets)) 409 ((or (memq 'eight-bit-control charsets)
362 (let ((l (coding-system-list 'base-only)) 410 (memq 'eight-bit-graphic charsets))
363 (charset-preferred-codings 411 '(raw-text emacs-mule))
364 (mapcar (function 412 (t
365 (lambda (x) 413 (let ((codings t)
366 (if (eq x 'unknown) 414 charset l ll)
367 'raw-text 415 (while (and codings charsets)
368 (get-charset-property x 'preferred-coding-system)))) 416 (setq charset (car charsets) charsets (cdr charsets))
369 charsets)) 417 (unless (eq charset 'ascii)
370 (priorities (mapcar (function (lambda (x) (symbol-value x))) 418 (setq l (aref char-coding-system-table (make-char charset)))
371 coding-category-list)) 419 (if (eq codings t)
372 codings coding safe) 420 (setq codings l)
373 (if (memq 'unknown charsets) 421 (let ((ll nil))
374 ;; The region contains invalid multibyte characters. 422 (while codings
375 (setq l '(raw-text))) 423 (if (memq (car codings) l)
376 (while l 424 (setq ll (cons (car codings) ll)))
377 (setq coding (car l) l (cdr l)) 425 (setq codings (cdr codings)))
378 (if (and (setq safe (coding-system-get coding 'safe-charsets)) 426 (setq codings ll)))))
379 (or (eq safe t) 427 (append codings
380 (find-coding-systems-region-subset-p charsets safe))) 428 (char-table-extra-slot char-coding-system-table 0))))))
381 ;; We put the higher priority to coding systems included
382 ;; in CHARSET-PREFERRED-CODINGS, and within them, put the
383 ;; higher priority to coding systems which support smaller
384 ;; number of charsets.
385 (let ((priority
386 (+ (if (coding-system-get coding 'mime-charset) 4096 0)
387 (lsh (length (memq coding priorities)) 7)
388 (if (memq coding charset-preferred-codings) 64 0)
389 (if (> (coding-system-type coding) 0) 32 0)
390 (if (consp safe) (- 32 (length safe)) 0))))
391 (setq codings (cons (cons priority coding) codings)))))
392 (mapcar 'cdr
393 (sort codings (function (lambda (x y) (> (car x) (car y))))))
394 )))
395 429
396(defun find-multibyte-characters (from to &optional maxcount excludes) 430(defun find-multibyte-characters (from to &optional maxcount excludes)
397 "Find multibyte characters in the region specified by FROM and TO. 431 "Find multibyte characters in the region specified by FROM and TO.
@@ -453,61 +487,93 @@ to use in order to write a file. If you set it to nil explicitly,
453then call `write-region', then afterward this variable will be non-nil 487then call `write-region', then afterward this variable will be non-nil
454only if the user was explicitly asked and specified a coding system.") 488only if the user was explicitly asked and specified a coding system.")
455 489
456(defun select-safe-coding-system (from to &optional default-coding-system) 490(defvar select-safe-coding-system-accept-default-p nil
491 "If non-nil, a function to control the behaviour of coding system selection.
492The meaning is the same as the argument ACCEPT-DEFAULT-P of the
493function `select-safe-coding-system' (which see). This variable
494overrides that argument.")
495
496(defun select-safe-coding-system (from to &optional default-coding-system
497 accept-default-p)
457 "Ask a user to select a safe coding system from candidates. 498 "Ask a user to select a safe coding system from candidates.
458The candidates of coding systems which can safely encode a text 499The candidates of coding systems which can safely encode a text
459between FROM and TO are shown in a popup window. 500between FROM and TO are shown in a popup window. Among them, the most
501proper one is suggested as the default.
502
503The list of `buffer-file-coding-system' of the current buffer and the
504most preferred coding system (if it corresponds to a MIME charset) is
505treated as the default coding system list. Among them, the first one
506that safely encodes the text is silently selected and returned without
507any user interaction. See also the command `prefer-coding-system'.
508
509Optional 3rd arg DEFAULT-CODING-SYSTEM specifies a coding system or a
510list of coding systems to be prepended to the default coding system
511list.
460 512
461Optional arg DEFAULT-CODING-SYSTEM specifies a coding system to be 513Optional 4th arg ACCEPT-DEFAULT-P, if non-nil, is a function to
462checked at first. If omitted, buffer-file-coding-system of the 514determine the acceptability of the silently selected coding system.
463current buffer is used. 515It is called with that coding system, and should return nil if it
516should not be silently selected and thus user interaction is required.
464 517
465If the text can be encoded safely by DEFAULT-CODING-SYSTEM, it is 518The variable `select-safe-coding-system-accept-default-p', if
466returned without any user interaction. DEFAULT-CODING-SYSTEM may also 519non-nil, overrides ACCEPT-DEFAULT-P.
467be a list, from which the first coding system that can safely encode the
468text is chosen, if any can.
469 520
470Kludgy feature: if FROM is a string, the string is the target text, 521Kludgy feature: if FROM is a string, the string is the target text,
471and TO is ignored." 522and TO is ignored."
472 (or default-coding-system 523 (if (and default-coding-system
473 (setq default-coding-system buffer-file-coding-system)) 524 (not (listp default-coding-system)))
474 (let* ((charsets (if (stringp from) (find-charset-string from) 525 (setq default-coding-system (list default-coding-system)))
475 (find-charset-region from to))) 526
476 (safe-coding-systems (find-coding-systems-for-charsets charsets)) 527 ;; Change elements of the list to (coding . base-coding).
477 (coding-system t) ; t means not yet decided. 528 (setq default-coding-system
478 eol-type) 529 (mapcar (function (lambda (x) (cons x (coding-system-base x))))
479 (if (or (not enable-multibyte-characters) 530 default-coding-system))
480 (eq (car safe-coding-systems) 'undecided)) 531
481 ;; As the text doesn't contain a multibyte character, we can 532 ;; If buffer-file-coding-system is not nil nor undecided, append it
482 ;; use any coding system. 533 ;; to the defaults.
483 (setq coding-system default-coding-system) 534 (if buffer-file-coding-system
484 535 (let ((base (coding-system-base buffer-file-coding-system)))
485 ;; Try the default. If the default is nil or undecided, try the 536 (or (eq base 'undecided)
486 ;; most preferred one or one of its subsidiaries that converts 537 (assq buffer-file-coding-system default-coding-system)
487 ;; EOL as the same way as the default. 538 (rassq base default-coding-system)
488 (if (or (not default-coding-system)
489 (eq (coding-system-base default-coding-system) 'undecided))
490 (progn
491 (setq eol-type
492 (and default-coding-system
493 (coding-system-eol-type default-coding-system)))
494 (setq default-coding-system 539 (setq default-coding-system
495 (symbol-value (car coding-category-list))) 540 (append default-coding-system
496 (or (not eol-type) 541 (list (cons buffer-file-coding-system base)))))))
497 (vectorp eol-type) 542
498 (setq default-coding-system 543 ;; If the most preferred coding system has the property mime-charset,
499 (coding-system-change-eol-conversion 544 ;; append it to the defaults.
500 default-coding-system eol-type))))) 545 (let* ((preferred (symbol-value (car coding-category-list)))
501 (if (or (eq default-coding-system 'no-conversion) 546 (base (coding-system-base preferred)))
502 (and default-coding-system 547 (and (coding-system-get preferred 'mime-charset)
503 (memq (coding-system-base default-coding-system) 548 (not (assq preferred default-coding-system))
504 safe-coding-systems))) 549 (not (rassq base default-coding-system))
505 (setq coding-system default-coding-system))) 550 (setq default-coding-system
506 551 (append default-coding-system (list (cons preferred base))))))
507 (when (eq coding-system t) 552
553 (if select-safe-coding-system-accept-default-p
554 (setq accept-default-p select-safe-coding-system-accept-default-p))
555
556 (let ((codings (find-coding-systems-region from to))
557 (coding-system nil)
558 (l default-coding-system))
559 (if (eq (car codings) 'undecided)
560 ;; Any coding system is ok.
561 (setq coding-system t)
562 ;; Try the defaults.
563 (while (and l (not coding-system))
564 (if (memq (cdr (car l)) codings)
565 (setq coding-system (car (car l)))
566 (setq l (cdr l))))
567 (if (and coding-system accept-default-p)
568 (or (funcall accept-default-p coding-system)
569 (setq coding-system (list coding-system)))))
570
571 ;; If all the defaults failed, ask a user.
572 (when (or (not coding-system) (consp coding-system))
508 ;; At first, change each coding system to the corresponding 573 ;; At first, change each coding system to the corresponding
509 ;; mime-charset name if it is also a coding system. 574 ;; mime-charset name if it is also a coding system. Such a name
510 (let ((l safe-coding-systems) 575 ;; is more friendly to users.
576 (let ((l codings)
511 mime-charset) 577 mime-charset)
512 (while l 578 (while l
513 (setq mime-charset (coding-system-get (car l) 'mime-charset)) 579 (setq mime-charset (coding-system-get (car l) 'mime-charset))
@@ -515,91 +581,56 @@ and TO is ignored."
515 (setcar l mime-charset)) 581 (setcar l mime-charset))
516 (setq l (cdr l)))) 582 (setq l (cdr l))))
517 583
518 (let ((non-safe-chars (find-multibyte-characters 584 ;; Then ask users to select one form CODINGS.
519 from to 3 585 (unwind-protect
520 (and default-coding-system 586 (save-window-excursion
521 (coding-system-get default-coding-system 587 (with-output-to-temp-buffer "*Warning*"
522 'safe-charsets)))) 588 (save-excursion
523 show-position overlays) 589 (set-buffer standard-output)
524 (save-excursion 590 (insert "The following default coding systems were tried,\n"
525 ;; Highlight characters that default-coding-system can't encode. 591 (if (consp coding-system)
526 (when (integerp from) 592 (format "and %s safely encodes the target text:\n"
527 (goto-char from) 593 (car coding-system))
528 (let ((found nil)) 594 "but none of them safely encode the target text:\n"))
529 (while (and (not found) 595 (let ((pos (point))
530 (re-search-forward "[^\000-\177]" to t)) 596 (fill-prefix " "))
531 (setq found (assq (char-charset (preceding-char)) 597 (mapcar (function (lambda (x) (princ " ") (princ (car x))))
532 non-safe-chars)))) 598 default-coding-system)
533 (forward-line -1) 599 (insert "\n")
534 (setq show-position (point)) 600 (fill-region-as-paragraph pos (point)))
535 (save-excursion 601 (insert (if (consp coding-system)
536 (while (and (< (length overlays) 256) 602 "Select it or "
537 (re-search-forward "[^\000-\177]" to t)) 603 "Select ")
538 (let* ((char (preceding-char)) 604 "one from the following safe coding systems:\n")
539 (charset (char-charset char))) 605 (let ((pos (point))
540 (when (assq charset non-safe-chars) 606 (fill-prefix " "))
541 (setq overlays (cons (make-overlay (1- (point)) (point)) 607 (mapcar (function (lambda (x) (princ " ") (princ x)))
542 overlays)) 608 codings)
543 (overlay-put (car overlays) 'face 'highlight)))))) 609 (insert "\n")
544 610 (fill-region-as-paragraph pos (point)))))
545 ;; At last, ask a user to select a proper coding system. 611
546 (unwind-protect 612 ;; Read a coding system.
547 (save-window-excursion 613 (if (consp coding-system)
548 (when show-position 614 (setq codings (cons (car coding-system) codings)))
549 ;; At first, be sure to show the current buffer. 615 (let* ((safe-names (mapcar (lambda (x) (list (symbol-name x)))
550 (set-window-buffer (selected-window) (current-buffer)) 616 codings))
551 (set-window-start (selected-window) show-position)) 617 (name (completing-read
552 ;; Then, show a helpful message. 618 (format "Select coding system (default %s): "
553 (with-output-to-temp-buffer "*Warning*" 619 (car codings))
554 (save-excursion 620 safe-names nil t nil nil
555 (set-buffer standard-output) 621 (car (car safe-names)))))
556 (insert "The target text contains the following non ASCII character(s):\n") 622 (setq last-coding-system-specified (intern name)
557 (let ((len (length non-safe-chars)) 623 coding-system last-coding-system-specified)))
558 (shown 0)) 624 (kill-buffer "*Warning*")))
559 (while (and non-safe-chars (< shown 3)) 625
560 (when (> (length (car non-safe-chars)) 2) 626 (if (vectorp (coding-system-eol-type coding-system))
561 (setq shown (1+ shown)) 627 (let ((eol (coding-system-eol-type buffer-file-coding-system)))
562 (insert (format "%25s: " (car (car non-safe-chars)))) 628 (if (numberp eol)
563 (let ((l (nthcdr 2 (car non-safe-chars)))) 629 (setq coding-system
564 (while l 630 (coding-system-change-eol-conversion coding-system eol)))))
565 (if (or (stringp (car l)) (char-valid-p (car l))) 631
566 (insert (car l))) 632 (if (eq coding-system t)
567 (setq l (cdr l)))) 633 (setq coding-system buffer-file-coding-system))
568 (if (> (nth 1 (car non-safe-chars)) 3)
569 (insert "..."))
570 (insert "\n"))
571 (setq non-safe-chars (cdr non-safe-chars)))
572 (if (< shown len)
573 (insert (format "%27s\n" "..."))))
574 (insert (format
575"These can't be encoded safely by the coding system %s.
576
577Please select one from the following safe coding systems:\n"
578 default-coding-system))
579 (let ((pos (point))
580 (fill-prefix " "))
581 (mapcar (function (lambda (x) (princ " ") (princ x)))
582 safe-coding-systems)
583 (fill-region-as-paragraph pos (point)))))
584
585 ;; Read a coding system.
586 (let* ((safe-names (mapcar (lambda (x) (list (symbol-name x)))
587 safe-coding-systems))
588 (name (completing-read
589 (format "Select coding system (default %s): "
590 (car safe-coding-systems))
591 safe-names nil t nil nil
592 (car (car safe-names)))))
593 (setq last-coding-system-specified (intern name)
594 coding-system last-coding-system-specified)
595 (or (not eol-type)
596 (vectorp eol-type)
597 (setq coding-system (coding-system-change-eol-conversion
598 coding-system eol-type)))))
599 (kill-buffer "*Warning*")
600 (while overlays
601 (delete-overlay (car overlays))
602 (setq overlays (cdr overlays)))))))
603 coding-system)) 634 coding-system))
604 635
605(setq select-safe-coding-system-function 'select-safe-coding-system) 636(setq select-safe-coding-system-function 'select-safe-coding-system)
@@ -610,22 +641,23 @@ It at first tries the first coding system found in these variables
610in this order: 641in this order:
611 (1) local value of `buffer-file-coding-system' 642 (1) local value of `buffer-file-coding-system'
612 (2) value of `sendmail-coding-system' 643 (2) value of `sendmail-coding-system'
613 (3) value of `default-buffer-file-coding-system' 644 (3) value of `default-sendmail-coding-system'
614 (4) value of `default-sendmail-coding-system' 645 (4) value of `default-buffer-file-coding-system'
615If the found coding system can't encode the current buffer, 646If the found coding system can't encode the current buffer,
616or none of them are bound to a coding system, 647or none of them are bound to a coding system,
617it asks the user to select a proper coding system." 648it asks the user to select a proper coding system."
618 (let ((coding (or (and (local-variable-p 'buffer-file-coding-system) 649 (let ((coding (or (and (local-variable-p 'buffer-file-coding-system)
619 buffer-file-coding-system) 650 buffer-file-coding-system)
620 sendmail-coding-system 651 sendmail-coding-system
621 default-buffer-file-coding-system 652 default-sendmail-coding-system
622 default-sendmail-coding-system))) 653 default-buffer-file-coding-system)))
623 (if (eq coding 'no-conversion) 654 (if (eq coding 'no-conversion)
624 ;; We should never use no-conversion for outgoing mails. 655 ;; We should never use no-conversion for outgoing mails.
625 (setq coding nil)) 656 (setq coding nil))
626 (if (fboundp select-safe-coding-system-function) 657 (if (fboundp select-safe-coding-system-function)
627 (funcall select-safe-coding-system-function 658 (funcall select-safe-coding-system-function
628 (point-min) (point-max) coding) 659 (point-min) (point-max) coding
660 (function (lambda (x) (coding-system-get x 'mime-charset))))
629 coding))) 661 coding)))
630 662
631;;; Language support stuff. 663;;; Language support stuff.
@@ -1257,6 +1289,8 @@ The default status is as follows:
1257 (update-coding-systems-internal) 1289 (update-coding-systems-internal)
1258 1290
1259 (set-default-coding-systems nil) 1291 (set-default-coding-systems nil)
1292 (setq default-sendmail-coding-system 'iso-latin-1)
1293
1260 ;; Don't alter the terminal and keyboard coding systems here. 1294 ;; Don't alter the terminal and keyboard coding systems here.
1261 ;; The terminal still supports the same coding system 1295 ;; The terminal still supports the same coding system
1262 ;; that it supported a minute ago. 1296 ;; that it supported a minute ago.
@@ -1324,9 +1358,6 @@ specifies the character set for the major languages of Western Europe."
1324 ((charsetp nonascii) 1358 ((charsetp nonascii)
1325 (setq nonascii-insert-offset (- (make-char nonascii) 128))))) 1359 (setq nonascii-insert-offset (- (make-char nonascii) 128)))))
1326 1360
1327 (setq charset-origin-alist
1328 (get-language-info language-name 'charset-origin-alist))
1329
1330 ;; Unibyte setups if necessary. 1361 ;; Unibyte setups if necessary.
1331 (unless default-enable-multibyte-characters 1362 (unless default-enable-multibyte-characters
1332 ;; Syntax and case table. 1363 ;; Syntax and case table.