aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGerd Moellmann2001-05-16 10:36:54 +0000
committerGerd Moellmann2001-05-16 10:36:54 +0000
commit502522b28034c87b1e7fcffd612d418b7df9e450 (patch)
tree8e21d28fc663f0294ce74b35b26ba208eba738dd
parentdbcf3c03cabd113eb15cef810206cf3a6a3d45dd (diff)
downloademacs-502522b28034c87b1e7fcffd612d418b7df9e450.tar.gz
emacs-502522b28034c87b1e7fcffd612d418b7df9e450.zip
(auto-coding-regexp-alist): New user-option.
(auto-coding-from-file-contents): New function. (set-auto-coding): Use it to determine a coding system.
-rw-r--r--lisp/international/mule.el207
1 files changed, 119 insertions, 88 deletions
diff --git a/lisp/international/mule.el b/lisp/international/mule.el
index 0f8d6c3f66f..6edd107d7b5 100644
--- a/lisp/international/mule.el
+++ b/lisp/international/mule.el
@@ -1237,6 +1237,20 @@ and the contents of `file-coding-system-alist'."
1237 :type '(repeat (cons (regexp :tag "File name regexp") 1237 :type '(repeat (cons (regexp :tag "File name regexp")
1238 (symbol :tag "Coding system")))) 1238 (symbol :tag "Coding system"))))
1239 1239
1240(defcustom auto-coding-regexp-alist
1241 '(("^BABYL OPTIONS:[ \t]*-\\*-[ \t]*rmail[ \t]*-\\*-" . no-conversion))
1242 "Alist of patterns vs corresponding coding systems.
1243Each element looks like (REGEXP . CODING-SYSTEM).
1244A file whose first bytes match REGEXP is decoded by CODING-SYSTEM on reading.
1245
1246The settings in this alist take priority over `coding:' tags
1247in the file (see the function `set-auto-coding')
1248and the contents of `file-coding-system-alist'."
1249 :group 'files
1250 :group 'mule
1251 :type '(repeat (cons (regexp :tag "Regexp")
1252 (symbol :tag "Coding system"))))
1253
1240(defvar set-auto-coding-for-load nil 1254(defvar set-auto-coding-for-load nil
1241 "Non-nil means look for `load-coding' property instead of `coding'. 1255 "Non-nil means look for `load-coding' property instead of `coding'.
1242This is used for loading and byte-compiling Emacs Lisp files.") 1256This is used for loading and byte-compiling Emacs Lisp files.")
@@ -1252,106 +1266,123 @@ This is used for loading and byte-compiling Emacs Lisp files.")
1252 (setq alist (cdr alist)))) 1266 (setq alist (cdr alist))))
1253 coding-system)) 1267 coding-system))
1254 1268
1269
1270(defun auto-coding-from-file-contents (size)
1271 "Determine a coding system from the contents of the current buffer.
1272The current buffer contains SIZE bytes starting at point.
1273Value is either a coding system or nil."
1274 (save-excursion
1275 (let ((alist auto-coding-regexp-alist)
1276 coding-system)
1277 (while (and alist (not coding-system))
1278 (let ((regexp (car (car alist))))
1279 (when (re-search-forward regexp (+ (point) size) t)
1280 (setq coding-system (cdr (car alist)))))
1281 (setq alist (cdr alist)))
1282 coding-system)))
1283
1284
1255(defun set-auto-coding (filename size) 1285(defun set-auto-coding (filename size)
1256 "Return coding system for a file FILENAME of which SIZE bytes follow point. 1286 "Return coding system for a file FILENAME of which SIZE bytes follow point.
1257These bytes should include at least the first 1k of the file 1287These bytes should include at least the first 1k of the file
1258and the last 3k of the file, but the middle may be omitted. 1288and the last 3k of the file, but the middle may be omitted.
1259 1289
1260It checks FILENAME against the variable `auto-coding-alist'. 1290It checks FILENAME against the variable `auto-coding-alist'. If
1261If FILENAME doesn't match any entries in the variable, 1291FILENAME doesn't match any entries in the variable, it checks the
1262it checks for a `coding:' tag in the first one or two lines following 1292contents of the current buffer following point against
1263point. If no `coding:' tag is found, it checks for local variables 1293`auto-coding-regexp-alist'. If no match is found, it checks for a
1264list in the last 3K bytes out of the SIZE bytes. 1294`coding:' tag in the first one or two lines following point. If no
1295`coding:' tag is found, it checks for local variables list in the last
12963K bytes out of the SIZE bytes.
1265 1297
1266The return value is the specified coding system, 1298The return value is the specified coding system,
1267or nil if nothing specified. 1299or nil if nothing specified.
1268 1300
1269The variable `set-auto-coding-function' (which see) is set to this 1301The variable `set-auto-coding-function' (which see) is set to this
1270function by default." 1302function by default."
1271 (let ((coding-system (auto-coding-alist-lookup filename))) 1303 (or (auto-coding-alist-lookup filename)
1272 1304 (auto-coding-from-file-contents size)
1273 (or coding-system 1305 (let* ((case-fold-search t)
1274 (let* ((case-fold-search t) 1306 (head-start (point))
1275 (head-start (point)) 1307 (head-end (+ head-start (min size 1024)))
1276 (head-end (+ head-start (min size 1024))) 1308 (tail-start (+ head-start (max (- size 3072) 0)))
1277 (tail-start (+ head-start (max (- size 3072) 0))) 1309 (tail-end (+ head-start size))
1278 (tail-end (+ head-start size)) 1310 coding-system head-found tail-found pos)
1279 coding-system head-found tail-found pos) 1311 ;; Try a short cut by searching for the string "coding:"
1280 ;; Try a short cut by searching for the string "coding:" 1312 ;; and for "unibyte:" at the head and tail of SIZE bytes.
1281 ;; and for "unibyte:" at the head and tail of SIZE bytes. 1313 (setq head-found (or (search-forward "coding:" head-end t)
1282 (setq head-found (or (search-forward "coding:" head-end t) 1314 (search-forward "unibyte:" head-end t)))
1283 (search-forward "unibyte:" head-end t))) 1315 (if (and head-found (> head-found tail-start))
1284 (if (and head-found (> head-found tail-start)) 1316 ;; Head and tail are overlapped.
1285 ;; Head and tail are overlapped. 1317 (setq tail-found head-found)
1286 (setq tail-found head-found) 1318 (goto-char tail-start)
1287 (goto-char tail-start) 1319 (setq tail-found (or (search-forward "coding:" tail-end t)
1288 (setq tail-found (or (search-forward "coding:" tail-end t) 1320 (search-forward "unibyte:" tail-end t))))
1289 (search-forward "unibyte:" tail-end t)))) 1321
1290 1322 ;; At first check the head.
1291 ;; At first check the head. 1323 (when head-found
1292 (when head-found 1324 (goto-char head-start)
1325 (setq pos (re-search-forward "[\n\r]" head-end t))
1326 (if (and pos
1327 (= (char-after head-start) ?#)
1328 (= (char-after (1+ head-start)) ?!))
1329 ;; If the file begins with "#!" (exec interpreter magic),
1330 ;; look for coding frobs in the first two lines. You cannot
1331 ;; necessarily put them in the first line of such a file
1332 ;; without screwing up the interpreter invocation.
1333 (setq pos (search-forward "\n" head-end t)))
1334 (if pos (setq head-end pos))
1335 (when (< head-found head-end)
1293 (goto-char head-start) 1336 (goto-char head-start)
1294 (setq pos (re-search-forward "[\n\r]" head-end t)) 1337 (when (and set-auto-coding-for-load
1295 (if (and pos 1338 (re-search-forward
1296 (= (char-after head-start) ?#) 1339 "-\\*-\\(.*;\\)?[ \t]*unibyte:[ \t]*\\([^ ;]+\\)"
1297 (= (char-after (1+ head-start)) ?!)) 1340 head-end t))
1298 ;; If the file begins with "#!" (exec interpreter magic), 1341 (setq coding-system 'raw-text))
1299 ;; look for coding frobs in the first two lines. You cannot 1342 (when (and (not coding-system)
1300 ;; necessarily put them in the first line of such a file 1343 (re-search-forward
1301 ;; without screwing up the interpreter invocation. 1344 "-\\*-\\(.*;\\)?[ \t]*coding:[ \t]*\\([^ ;]+\\)"
1302 (setq pos (search-forward "\n" head-end t))) 1345 head-end t))
1303 (if pos (setq head-end pos)) 1346 (setq coding-system (intern (match-string 2)))
1304 (when (< head-found head-end) 1347 (or (coding-system-p coding-system)
1305 (goto-char head-start) 1348 (setq coding-system nil)))))
1306 (when (and set-auto-coding-for-load 1349
1307 (re-search-forward 1350 ;; If no coding: tag in the head, check the tail.
1308 "-\\*-\\(.*;\\)?[ \t]*unibyte:[ \t]*\\([^ ;]+\\)" 1351 (when (and tail-found (not coding-system))
1309 head-end t)) 1352 (goto-char tail-start)
1310 (setq coding-system 'raw-text)) 1353 (search-forward "\n\^L" nil t)
1311 (when (and (not coding-system) 1354 (if (re-search-forward
1312 (re-search-forward 1355 "^\\(.*\\)[ \t]*Local Variables:[ \t]*\\(.*\\)$" tail-end t)
1313 "-\\*-\\(.*;\\)?[ \t]*coding:[ \t]*\\([^ ;]+\\)" 1356 ;; The prefix is what comes before "local variables:" in its
1314 head-end t)) 1357 ;; line. The suffix is what comes after "local variables:"
1315 (setq coding-system (intern (match-string 2))) 1358 ;; in its line.
1316 (or (coding-system-p coding-system) 1359 (let* ((prefix (regexp-quote (match-string 1)))
1317 (setq coding-system nil))))) 1360 (suffix (regexp-quote (match-string 2)))
1318 1361 (re-coding
1319 ;; If no coding: tag in the head, check the tail. 1362 (concat
1320 (when (and tail-found (not coding-system)) 1363 "^" prefix
1321 (goto-char tail-start) 1364 "[ \t]*coding[ \t]*:[ \t]*\\([^ \t]+\\)[ \t]*"
1322 (search-forward "\n\^L" nil t) 1365 suffix "$"))
1323 (if (re-search-forward 1366 (re-unibyte
1324 "^\\(.*\\)[ \t]*Local Variables:[ \t]*\\(.*\\)$" tail-end t) 1367 (concat
1325 ;; The prefix is what comes before "local variables:" in its 1368 "^" prefix
1326 ;; line. The suffix is what comes after "local variables:" 1369 "[ \t]*unibyte[ \t]*:[ \t]*\\([^ \t]+\\)[ \t]*"
1327 ;; in its line. 1370 suffix "$"))
1328 (let* ((prefix (regexp-quote (match-string 1))) 1371 (re-end
1329 (suffix (regexp-quote (match-string 2))) 1372 (concat "^" prefix "[ \t]*end *:[ \t]*" suffix "$"))
1330 (re-coding 1373 (pos (point)))
1331 (concat 1374 (re-search-forward re-end tail-end 'move)
1332 "^" prefix 1375 (setq tail-end (point))
1333 "[ \t]*coding[ \t]*:[ \t]*\\([^ \t]+\\)[ \t]*" 1376 (goto-char pos)
1334 suffix "$")) 1377 (when (and set-auto-coding-for-load
1335 (re-unibyte 1378 (re-search-forward re-unibyte tail-end t))
1336 (concat 1379 (setq coding-system 'raw-text))
1337 "^" prefix 1380 (when (and (not coding-system)
1338 "[ \t]*unibyte[ \t]*:[ \t]*\\([^ \t]+\\)[ \t]*" 1381 (re-search-forward re-coding tail-end t))
1339 suffix "$")) 1382 (setq coding-system (intern (match-string 1)))
1340 (re-end 1383 (or (coding-system-p coding-system)
1341 (concat "^" prefix "[ \t]*end *:[ \t]*" suffix "$")) 1384 (setq coding-system nil))))))
1342 (pos (point))) 1385 coding-system)))
1343 (re-search-forward re-end tail-end 'move)
1344 (setq tail-end (point))
1345 (goto-char pos)
1346 (when (and set-auto-coding-for-load
1347 (re-search-forward re-unibyte tail-end t))
1348 (setq coding-system 'raw-text))
1349 (when (and (not coding-system)
1350 (re-search-forward re-coding tail-end t))
1351 (setq coding-system (intern (match-string 1)))
1352 (or (coding-system-p coding-system)
1353 (setq coding-system nil))))))
1354 coding-system))))
1355 1386
1356(setq set-auto-coding-function 'set-auto-coding) 1387(setq set-auto-coding-function 'set-auto-coding)
1357 1388