aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lisp/international/mule.el187
1 files changed, 105 insertions, 82 deletions
diff --git a/lisp/international/mule.el b/lisp/international/mule.el
index 140402e0459..00306d7e48d 100644
--- a/lisp/international/mule.el
+++ b/lisp/international/mule.el
@@ -772,16 +772,30 @@ LIST is a list of coding categories ordered by priority."
772 772
773;;; FILE I/O 773;;; FILE I/O
774 774
775(defvar auto-coding-alist
776 '(("\\.\\(arc\\|zip\\|lzh\\|zoo\\|jar\\)\\'" . no-conversion)
777 ("\\.\\(ARC\\|ZIP\\|LZH\\|ZOO\\|JAR\\)\\'" . no-conversion))
778 "Alist of filename patterns vs corresponding coding systems.
779Each element looks like (REGEXP . CODING-SYSTEM).
780A file whose name matches REGEXP is decoded on reading
781and encoded on writing by CODING-SYSTEM.
782
783The settings in this variable have higher priority than `coding:' tag
784in the file contents (see the function `set-auto-coding')
785and the variable `file-coding-system-alist'.")
786
775(defvar set-auto-coding-for-load nil 787(defvar set-auto-coding-for-load nil
776 "Non-nil means look for `load-coding' property instead of `coding'. 788 "Non-nil means look for `load-coding' property instead of `coding'.
777This is used for loading and byte-compiling Emacs Lisp files.") 789This is used for loading and byte-compiling Emacs Lisp files.")
778 790
779(defun set-auto-coding (size) 791(defun set-auto-coding (filename size)
780 "Return coding system for a file of which SIZE bytes follow point. 792 "Return coding system for a file FILENAME of which SIZE bytes follow point.
781These bytes should include at least the first 1k of the file 793These bytes should include at least the first 1k of the file
782and the last 3k of the file, but the middle may be omitted. 794and the last 3k of the file, but the middle may be omitted.
783 795
784It checks for a `coding:' tag in the first one or two lines following 796It checks FILENAME against the variable `auto-coding-alist'.
797If FILENAME doesn't match any entries in the variable,
798it checks for a `coding:' tag in the first one or two lines following
785point. If no `coding:' tag is found, it checks for alocal variables 799point. If no `coding:' tag is found, it checks for alocal variables
786list in the last 3K bytes out of the SIZE bytes. 800list in the last 3K bytes out of the SIZE bytes.
787 801
@@ -790,85 +804,94 @@ or nil if nothing specified.
790 804
791The variable `set-auto-coding-function' (which see) is set to this 805The variable `set-auto-coding-function' (which see) is set to this
792function by default." 806function by default."
793 (let* ((case-fold-search t) 807 (let ((alist auto-coding-alist)
794 (head-start (point)) 808 (case-fold-search (memq system-type '(vax-vms windows-nt)))
795 (head-end (+ head-start (min size 1024))) 809 coding-system)
796 (tail-start (+ head-start (max (- size 3072) 0))) 810 (while (and alist (not coding-system))
797 (tail-end (+ head-start size)) 811 (if (string-match (car (car alist)) filename)
798 coding-system head-found tail-found pos) 812 (setq coding-system (cdr (car alist)))
799 ;; Try a short cut by searching for the string "coding:" 813 (setq alist (cdr alist))))
800 ;; and for "unibyte:" at th ehead and tail of SIZE bytes. 814
801 (setq head-found (or (search-forward "coding:" head-end t) 815 (or coding-system
802 (search-forward "unibyte:" head-end t))) 816 (let* ((case-fold-search t)
803 (if (and head-found (> head-found tail-start)) 817 (head-start (point))
804 ;; Head and tail are overlapped. 818 (head-end (+ head-start (min size 1024)))
805 (setq tail-found head-found) 819 (tail-start (+ head-start (max (- size 3072) 0)))
806 (goto-char tail-start) 820 (tail-end (+ head-start size))
807 (setq tail-found (or (search-forward "coding:" tail-end t) 821 coding-system head-found tail-found pos)
808 (search-forward "unibyte:" tail-end t)))) 822 ;; Try a short cut by searching for the string "coding:"
809 823 ;; and for "unibyte:" at th ehead and tail of SIZE bytes.
810 ;; At first check the head. 824 (setq head-found (or (search-forward "coding:" head-end t)
811 (when head-found 825 (search-forward "unibyte:" head-end t)))
812 (goto-char head-start) 826 (if (and head-found (> head-found tail-start))
813 (setq pos (re-search-forward "[\n\r]" head-end t)) 827 ;; Head and tail are overlapped.
814 (if (and pos 828 (setq tail-found head-found)
815 (= (char-after head-start) ?#) 829 (goto-char tail-start)
816 (= (char-after (1+ head-start)) ?!)) 830 (setq tail-found (or (search-forward "coding:" tail-end t)
817 ;; If the file begins with "#!" (exec interpreter magic), 831 (search-forward "unibyte:" tail-end t))))
818 ;; look for coding frobs in the first two lines. You cannot 832
819 ;; necessarily put them in the first line of such a file 833 ;; At first check the head.
820 ;; without screwing up the interpreter invocation. 834 (when head-found
821 (setq pos (search-forward "\n" head-end t))) 835 (goto-char head-start)
822 (if pos (setq head-end pos)) 836 (setq pos (re-search-forward "[\n\r]" head-end t))
823 (when (< head-found head-end) 837 (if (and pos
824 (goto-char head-start) 838 (= (char-after head-start) ?#)
825 (when (and set-auto-coding-for-load 839 (= (char-after (1+ head-start)) ?!))
826 (re-search-forward 840 ;; If the file begins with "#!" (exec interpreter magic),
827 "-\\*-\\(.*;\\)?[ \t]*unibyte:[ \t]*\\([^ ;]+\\)" 841 ;; look for coding frobs in the first two lines. You cannot
828 head-end t)) 842 ;; necessarily put them in the first line of such a file
829 (setq coding-system 'raw-text)) 843 ;; without screwing up the interpreter invocation.
830 (when (and (not coding-system) 844 (setq pos (search-forward "\n" head-end t)))
831 (re-search-forward 845 (if pos (setq head-end pos))
832 "-\\*-\\(.*;\\)?[ \t]*coding:[ \t]*\\([^ ;]+\\)" 846 (when (< head-found head-end)
833 head-end t)) 847 (goto-char head-start)
834 (setq coding-system (intern (match-string 2))) 848 (when (and set-auto-coding-for-load
835 (or (coding-system-p coding-system) 849 (re-search-forward
836 (setq coding-system nil))))) 850 "-\\*-\\(.*;\\)?[ \t]*unibyte:[ \t]*\\([^ ;]+\\)"
837 851 head-end t))
838 ;; If no coding: tag in the head, check the tail. 852 (setq coding-system 'raw-text))
839 (when (and tail-found (not coding-system)) 853 (when (and (not coding-system)
840 (goto-char tail-start) 854 (re-search-forward
841 (search-forward "\n\^L" nil t) 855 "-\\*-\\(.*;\\)?[ \t]*coding:[ \t]*\\([^ ;]+\\)"
842 (if (re-search-forward 856 head-end t))
843 "^\\(.*\\)[ \t]*Local Variables:[ \t]*\\(.*\\)$" tail-end t) 857 (setq coding-system (intern (match-string 2)))
844 ;; The prefix is what comes before "local variables:" in its 858 (or (coding-system-p coding-system)
845 ;; line. The suffix is what comes after "local variables:" 859 (setq coding-system nil)))))
846 ;; in its line. 860
847 (let* ((prefix (regexp-quote (match-string 1))) 861 ;; If no coding: tag in the head, check the tail.
848 (suffix (regexp-quote (match-string 2))) 862 (when (and tail-found (not coding-system))
849 (re-coding (concat 863 (goto-char tail-start)
850 "^" prefix 864 (search-forward "\n\^L" nil t)
851 "coding[ \t]*:[ \t]*\\([^ \t]+\\)[ \t]*" 865 (if (re-search-forward
852 suffix "$")) 866 "^\\(.*\\)[ \t]*Local Variables:[ \t]*\\(.*\\)$" tail-end t)
853 (re-unibyte (concat 867 ;; The prefix is what comes before "local variables:" in its
854 "^" prefix 868 ;; line. The suffix is what comes after "local variables:"
855 "unibyte[ \t]*:[ \t]*\\([^ \t]+\\)[ \t]*" 869 ;; in its line.
856 suffix "$")) 870 (let* ((prefix (regexp-quote (match-string 1)))
857 (re-end (concat 871 (suffix (regexp-quote (match-string 2)))
858 "^" prefix "end *:[ \t]*" suffix "$")) 872 (re-coding (concat
859 (pos (point))) 873 "^" prefix
860 (re-search-forward re-end tail-end 'move) 874 "coding[ \t]*:[ \t]*\\([^ \t]+\\)[ \t]*"
861 (setq tail-end (point)) 875 suffix "$"))
862 (goto-char pos) 876 (re-unibyte (concat
863 (when (and set-auto-coding-for-load 877 "^" prefix
864 (re-search-forward re-unibyte tail-end t)) 878 "unibyte[ \t]*:[ \t]*\\([^ \t]+\\)[ \t]*"
865 (setq coding-system 'raw-text)) 879 suffix "$"))
866 (when (and (not coding-system) 880 (re-end (concat
867 (re-search-forward re-coding tail-end t)) 881 "^" prefix "end *:[ \t]*" suffix "$"))
868 (setq coding-system (intern (match-string 1))) 882 (pos (point)))
869 (or (coding-system-p coding-system) 883 (re-search-forward re-end tail-end 'move)
870 (setq coding-system nil)))))) 884 (setq tail-end (point))
871 coding-system)) 885 (goto-char pos)
886 (when (and set-auto-coding-for-load
887 (re-search-forward re-unibyte tail-end t))
888 (setq coding-system 'raw-text))
889 (when (and (not coding-system)
890 (re-search-forward re-coding tail-end t))
891 (setq coding-system (intern (match-string 1)))
892 (or (coding-system-p coding-system)
893 (setq coding-system nil))))))
894 coding-system))))
872 895
873(setq set-auto-coding-function 'set-auto-coding) 896(setq set-auto-coding-function 'set-auto-coding)
874 897