diff options
| -rw-r--r-- | lisp/international/mule.el | 187 |
1 files changed, 105 insertions, 82 deletions
diff --git a/lisp/international/mule.el b/lisp/international/mule.el index 140402e0459..00306d7e48d 100644 --- a/lisp/international/mule.el +++ b/lisp/international/mule.el | |||
| @@ -772,16 +772,30 @@ LIST is a list of coding categories ordered by priority." | |||
| 772 | 772 | ||
| 773 | ;;; FILE I/O | 773 | ;;; FILE I/O |
| 774 | 774 | ||
| 775 | (defvar auto-coding-alist | ||
| 776 | '(("\\.\\(arc\\|zip\\|lzh\\|zoo\\|jar\\)\\'" . no-conversion) | ||
| 777 | ("\\.\\(ARC\\|ZIP\\|LZH\\|ZOO\\|JAR\\)\\'" . no-conversion)) | ||
| 778 | "Alist of filename patterns vs corresponding coding systems. | ||
| 779 | Each element looks like (REGEXP . CODING-SYSTEM). | ||
| 780 | A file whose name matches REGEXP is decoded on reading | ||
| 781 | and encoded on writing by CODING-SYSTEM. | ||
| 782 | |||
| 783 | The settings in this variable have higher priority than `coding:' tag | ||
| 784 | in the file contents (see the function `set-auto-coding') | ||
| 785 | and the variable `file-coding-system-alist'.") | ||
| 786 | |||
| 775 | (defvar set-auto-coding-for-load nil | 787 | (defvar set-auto-coding-for-load nil |
| 776 | "Non-nil means look for `load-coding' property instead of `coding'. | 788 | "Non-nil means look for `load-coding' property instead of `coding'. |
| 777 | This is used for loading and byte-compiling Emacs Lisp files.") | 789 | This is used for loading and byte-compiling Emacs Lisp files.") |
| 778 | 790 | ||
| 779 | (defun set-auto-coding (size) | 791 | (defun set-auto-coding (filename size) |
| 780 | "Return coding system for a file of which SIZE bytes follow point. | 792 | "Return coding system for a file FILENAME of which SIZE bytes follow point. |
| 781 | These bytes should include at least the first 1k of the file | 793 | These bytes should include at least the first 1k of the file |
| 782 | and the last 3k of the file, but the middle may be omitted. | 794 | and the last 3k of the file, but the middle may be omitted. |
| 783 | 795 | ||
| 784 | It checks for a `coding:' tag in the first one or two lines following | 796 | It checks FILENAME against the variable `auto-coding-alist'. |
| 797 | If FILENAME doesn't match any entries in the variable, | ||
| 798 | it checks for a `coding:' tag in the first one or two lines following | ||
| 785 | point. If no `coding:' tag is found, it checks for alocal variables | 799 | point. If no `coding:' tag is found, it checks for alocal variables |
| 786 | list in the last 3K bytes out of the SIZE bytes. | 800 | list in the last 3K bytes out of the SIZE bytes. |
| 787 | 801 | ||
| @@ -790,85 +804,94 @@ or nil if nothing specified. | |||
| 790 | 804 | ||
| 791 | The variable `set-auto-coding-function' (which see) is set to this | 805 | The variable `set-auto-coding-function' (which see) is set to this |
| 792 | function by default." | 806 | function by default." |
| 793 | (let* ((case-fold-search t) | 807 | (let ((alist auto-coding-alist) |
| 794 | (head-start (point)) | 808 | (case-fold-search (memq system-type '(vax-vms windows-nt))) |
| 795 | (head-end (+ head-start (min size 1024))) | 809 | coding-system) |
| 796 | (tail-start (+ head-start (max (- size 3072) 0))) | 810 | (while (and alist (not coding-system)) |
| 797 | (tail-end (+ head-start size)) | 811 | (if (string-match (car (car alist)) filename) |
| 798 | coding-system head-found tail-found pos) | 812 | (setq coding-system (cdr (car alist))) |
| 799 | ;; Try a short cut by searching for the string "coding:" | 813 | (setq alist (cdr alist)))) |
| 800 | ;; and for "unibyte:" at th ehead and tail of SIZE bytes. | 814 | |
| 801 | (setq head-found (or (search-forward "coding:" head-end t) | 815 | (or coding-system |
| 802 | (search-forward "unibyte:" head-end t))) | 816 | (let* ((case-fold-search t) |
| 803 | (if (and head-found (> head-found tail-start)) | 817 | (head-start (point)) |
| 804 | ;; Head and tail are overlapped. | 818 | (head-end (+ head-start (min size 1024))) |
| 805 | (setq tail-found head-found) | 819 | (tail-start (+ head-start (max (- size 3072) 0))) |
| 806 | (goto-char tail-start) | 820 | (tail-end (+ head-start size)) |
| 807 | (setq tail-found (or (search-forward "coding:" tail-end t) | 821 | coding-system head-found tail-found pos) |
| 808 | (search-forward "unibyte:" tail-end t)))) | 822 | ;; Try a short cut by searching for the string "coding:" |
| 809 | 823 | ;; and for "unibyte:" at th ehead and tail of SIZE bytes. | |
| 810 | ;; At first check the head. | 824 | (setq head-found (or (search-forward "coding:" head-end t) |
| 811 | (when head-found | 825 | (search-forward "unibyte:" head-end t))) |
| 812 | (goto-char head-start) | 826 | (if (and head-found (> head-found tail-start)) |
| 813 | (setq pos (re-search-forward "[\n\r]" head-end t)) | 827 | ;; Head and tail are overlapped. |
| 814 | (if (and pos | 828 | (setq tail-found head-found) |
| 815 | (= (char-after head-start) ?#) | 829 | (goto-char tail-start) |
| 816 | (= (char-after (1+ head-start)) ?!)) | 830 | (setq tail-found (or (search-forward "coding:" tail-end t) |
| 817 | ;; If the file begins with "#!" (exec interpreter magic), | 831 | (search-forward "unibyte:" tail-end t)))) |
| 818 | ;; look for coding frobs in the first two lines. You cannot | 832 | |
| 819 | ;; necessarily put them in the first line of such a file | 833 | ;; At first check the head. |
| 820 | ;; without screwing up the interpreter invocation. | 834 | (when head-found |
| 821 | (setq pos (search-forward "\n" head-end t))) | 835 | (goto-char head-start) |
| 822 | (if pos (setq head-end pos)) | 836 | (setq pos (re-search-forward "[\n\r]" head-end t)) |
| 823 | (when (< head-found head-end) | 837 | (if (and pos |
| 824 | (goto-char head-start) | 838 | (= (char-after head-start) ?#) |
| 825 | (when (and set-auto-coding-for-load | 839 | (= (char-after (1+ head-start)) ?!)) |
| 826 | (re-search-forward | 840 | ;; If the file begins with "#!" (exec interpreter magic), |
| 827 | "-\\*-\\(.*;\\)?[ \t]*unibyte:[ \t]*\\([^ ;]+\\)" | 841 | ;; look for coding frobs in the first two lines. You cannot |
| 828 | head-end t)) | 842 | ;; necessarily put them in the first line of such a file |
| 829 | (setq coding-system 'raw-text)) | 843 | ;; without screwing up the interpreter invocation. |
| 830 | (when (and (not coding-system) | 844 | (setq pos (search-forward "\n" head-end t))) |
| 831 | (re-search-forward | 845 | (if pos (setq head-end pos)) |
| 832 | "-\\*-\\(.*;\\)?[ \t]*coding:[ \t]*\\([^ ;]+\\)" | 846 | (when (< head-found head-end) |
| 833 | head-end t)) | 847 | (goto-char head-start) |
| 834 | (setq coding-system (intern (match-string 2))) | 848 | (when (and set-auto-coding-for-load |
| 835 | (or (coding-system-p coding-system) | 849 | (re-search-forward |
| 836 | (setq coding-system nil))))) | 850 | "-\\*-\\(.*;\\)?[ \t]*unibyte:[ \t]*\\([^ ;]+\\)" |
| 837 | 851 | head-end t)) | |
| 838 | ;; If no coding: tag in the head, check the tail. | 852 | (setq coding-system 'raw-text)) |
| 839 | (when (and tail-found (not coding-system)) | 853 | (when (and (not coding-system) |
| 840 | (goto-char tail-start) | 854 | (re-search-forward |
| 841 | (search-forward "\n\^L" nil t) | 855 | "-\\*-\\(.*;\\)?[ \t]*coding:[ \t]*\\([^ ;]+\\)" |
| 842 | (if (re-search-forward | 856 | head-end t)) |
| 843 | "^\\(.*\\)[ \t]*Local Variables:[ \t]*\\(.*\\)$" tail-end t) | 857 | (setq coding-system (intern (match-string 2))) |
| 844 | ;; The prefix is what comes before "local variables:" in its | 858 | (or (coding-system-p coding-system) |
| 845 | ;; line. The suffix is what comes after "local variables:" | 859 | (setq coding-system nil))))) |
| 846 | ;; in its line. | 860 | |
| 847 | (let* ((prefix (regexp-quote (match-string 1))) | 861 | ;; If no coding: tag in the head, check the tail. |
| 848 | (suffix (regexp-quote (match-string 2))) | 862 | (when (and tail-found (not coding-system)) |
| 849 | (re-coding (concat | 863 | (goto-char tail-start) |
| 850 | "^" prefix | 864 | (search-forward "\n\^L" nil t) |
| 851 | "coding[ \t]*:[ \t]*\\([^ \t]+\\)[ \t]*" | 865 | (if (re-search-forward |
| 852 | suffix "$")) | 866 | "^\\(.*\\)[ \t]*Local Variables:[ \t]*\\(.*\\)$" tail-end t) |
| 853 | (re-unibyte (concat | 867 | ;; The prefix is what comes before "local variables:" in its |
| 854 | "^" prefix | 868 | ;; line. The suffix is what comes after "local variables:" |
| 855 | "unibyte[ \t]*:[ \t]*\\([^ \t]+\\)[ \t]*" | 869 | ;; in its line. |
| 856 | suffix "$")) | 870 | (let* ((prefix (regexp-quote (match-string 1))) |
| 857 | (re-end (concat | 871 | (suffix (regexp-quote (match-string 2))) |
| 858 | "^" prefix "end *:[ \t]*" suffix "$")) | 872 | (re-coding (concat |
| 859 | (pos (point))) | 873 | "^" prefix |
| 860 | (re-search-forward re-end tail-end 'move) | 874 | "coding[ \t]*:[ \t]*\\([^ \t]+\\)[ \t]*" |
| 861 | (setq tail-end (point)) | 875 | suffix "$")) |
| 862 | (goto-char pos) | 876 | (re-unibyte (concat |
| 863 | (when (and set-auto-coding-for-load | 877 | "^" prefix |
| 864 | (re-search-forward re-unibyte tail-end t)) | 878 | "unibyte[ \t]*:[ \t]*\\([^ \t]+\\)[ \t]*" |
| 865 | (setq coding-system 'raw-text)) | 879 | suffix "$")) |
| 866 | (when (and (not coding-system) | 880 | (re-end (concat |
| 867 | (re-search-forward re-coding tail-end t)) | 881 | "^" prefix "end *:[ \t]*" suffix "$")) |
| 868 | (setq coding-system (intern (match-string 1))) | 882 | (pos (point))) |
| 869 | (or (coding-system-p coding-system) | 883 | (re-search-forward re-end tail-end 'move) |
| 870 | (setq coding-system nil)))))) | 884 | (setq tail-end (point)) |
| 871 | coding-system)) | 885 | (goto-char pos) |
| 886 | (when (and set-auto-coding-for-load | ||
| 887 | (re-search-forward re-unibyte tail-end t)) | ||
| 888 | (setq coding-system 'raw-text)) | ||
| 889 | (when (and (not coding-system) | ||
| 890 | (re-search-forward re-coding tail-end t)) | ||
| 891 | (setq coding-system (intern (match-string 1))) | ||
| 892 | (or (coding-system-p coding-system) | ||
| 893 | (setq coding-system nil)))))) | ||
| 894 | coding-system)))) | ||
| 872 | 895 | ||
| 873 | (setq set-auto-coding-function 'set-auto-coding) | 896 | (setq set-auto-coding-function 'set-auto-coding) |
| 874 | 897 | ||