diff options
| author | Gerd Moellmann | 2001-05-16 10:36:54 +0000 |
|---|---|---|
| committer | Gerd Moellmann | 2001-05-16 10:36:54 +0000 |
| commit | 502522b28034c87b1e7fcffd612d418b7df9e450 (patch) | |
| tree | 8e21d28fc663f0294ce74b35b26ba208eba738dd | |
| parent | dbcf3c03cabd113eb15cef810206cf3a6a3d45dd (diff) | |
| download | emacs-502522b28034c87b1e7fcffd612d418b7df9e450.tar.gz emacs-502522b28034c87b1e7fcffd612d418b7df9e450.zip | |
(auto-coding-regexp-alist): New user-option.
(auto-coding-from-file-contents): New function.
(set-auto-coding): Use it to determine a coding system.
| -rw-r--r-- | lisp/international/mule.el | 207 |
1 files changed, 119 insertions, 88 deletions
diff --git a/lisp/international/mule.el b/lisp/international/mule.el index 0f8d6c3f66f..6edd107d7b5 100644 --- a/lisp/international/mule.el +++ b/lisp/international/mule.el | |||
| @@ -1237,6 +1237,20 @@ and the contents of `file-coding-system-alist'." | |||
| 1237 | :type '(repeat (cons (regexp :tag "File name regexp") | 1237 | :type '(repeat (cons (regexp :tag "File name regexp") |
| 1238 | (symbol :tag "Coding system")))) | 1238 | (symbol :tag "Coding system")))) |
| 1239 | 1239 | ||
| 1240 | (defcustom auto-coding-regexp-alist | ||
| 1241 | '(("^BABYL OPTIONS:[ \t]*-\\*-[ \t]*rmail[ \t]*-\\*-" . no-conversion)) | ||
| 1242 | "Alist of patterns vs corresponding coding systems. | ||
| 1243 | Each element looks like (REGEXP . CODING-SYSTEM). | ||
| 1244 | A file whose first bytes match REGEXP is decoded by CODING-SYSTEM on reading. | ||
| 1245 | |||
| 1246 | The settings in this alist take priority over `coding:' tags | ||
| 1247 | in the file (see the function `set-auto-coding') | ||
| 1248 | and the contents of `file-coding-system-alist'." | ||
| 1249 | :group 'files | ||
| 1250 | :group 'mule | ||
| 1251 | :type '(repeat (cons (regexp :tag "Regexp") | ||
| 1252 | (symbol :tag "Coding system")))) | ||
| 1253 | |||
| 1240 | (defvar set-auto-coding-for-load nil | 1254 | (defvar set-auto-coding-for-load nil |
| 1241 | "Non-nil means look for `load-coding' property instead of `coding'. | 1255 | "Non-nil means look for `load-coding' property instead of `coding'. |
| 1242 | This is used for loading and byte-compiling Emacs Lisp files.") | 1256 | This is used for loading and byte-compiling Emacs Lisp files.") |
| @@ -1252,106 +1266,123 @@ This is used for loading and byte-compiling Emacs Lisp files.") | |||
| 1252 | (setq alist (cdr alist)))) | 1266 | (setq alist (cdr alist)))) |
| 1253 | coding-system)) | 1267 | coding-system)) |
| 1254 | 1268 | ||
| 1269 | |||
| 1270 | (defun auto-coding-from-file-contents (size) | ||
| 1271 | "Determine a coding system from the contents of the current buffer. | ||
| 1272 | The current buffer contains SIZE bytes starting at point. | ||
| 1273 | Value is either a coding system or nil." | ||
| 1274 | (save-excursion | ||
| 1275 | (let ((alist auto-coding-regexp-alist) | ||
| 1276 | coding-system) | ||
| 1277 | (while (and alist (not coding-system)) | ||
| 1278 | (let ((regexp (car (car alist)))) | ||
| 1279 | (when (re-search-forward regexp (+ (point) size) t) | ||
| 1280 | (setq coding-system (cdr (car alist))))) | ||
| 1281 | (setq alist (cdr alist))) | ||
| 1282 | coding-system))) | ||
| 1283 | |||
| 1284 | |||
| 1255 | (defun set-auto-coding (filename size) | 1285 | (defun set-auto-coding (filename size) |
| 1256 | "Return coding system for a file FILENAME of which SIZE bytes follow point. | 1286 | "Return coding system for a file FILENAME of which SIZE bytes follow point. |
| 1257 | These bytes should include at least the first 1k of the file | 1287 | These bytes should include at least the first 1k of the file |
| 1258 | and the last 3k of the file, but the middle may be omitted. | 1288 | and the last 3k of the file, but the middle may be omitted. |
| 1259 | 1289 | ||
| 1260 | It checks FILENAME against the variable `auto-coding-alist'. | 1290 | It checks FILENAME against the variable `auto-coding-alist'. If |
| 1261 | If FILENAME doesn't match any entries in the variable, | 1291 | FILENAME doesn't match any entries in the variable, it checks the |
| 1262 | it checks for a `coding:' tag in the first one or two lines following | 1292 | contents of the current buffer following point against |
| 1263 | point. If no `coding:' tag is found, it checks for local variables | 1293 | `auto-coding-regexp-alist'. If no match is found, it checks for a |
| 1264 | list in the last 3K bytes out of the SIZE bytes. | 1294 | `coding:' tag in the first one or two lines following point. If no |
| 1295 | `coding:' tag is found, it checks for local variables list in the last | ||
| 1296 | 3K bytes out of the SIZE bytes. | ||
| 1265 | 1297 | ||
| 1266 | The return value is the specified coding system, | 1298 | The return value is the specified coding system, |
| 1267 | or nil if nothing specified. | 1299 | or nil if nothing specified. |
| 1268 | 1300 | ||
| 1269 | The variable `set-auto-coding-function' (which see) is set to this | 1301 | The variable `set-auto-coding-function' (which see) is set to this |
| 1270 | function by default." | 1302 | function by default." |
| 1271 | (let ((coding-system (auto-coding-alist-lookup filename))) | 1303 | (or (auto-coding-alist-lookup filename) |
| 1272 | 1304 | (auto-coding-from-file-contents size) | |
| 1273 | (or coding-system | 1305 | (let* ((case-fold-search t) |
| 1274 | (let* ((case-fold-search t) | 1306 | (head-start (point)) |
| 1275 | (head-start (point)) | 1307 | (head-end (+ head-start (min size 1024))) |
| 1276 | (head-end (+ head-start (min size 1024))) | 1308 | (tail-start (+ head-start (max (- size 3072) 0))) |
| 1277 | (tail-start (+ head-start (max (- size 3072) 0))) | 1309 | (tail-end (+ head-start size)) |
| 1278 | (tail-end (+ head-start size)) | 1310 | coding-system head-found tail-found pos) |
| 1279 | coding-system head-found tail-found pos) | 1311 | ;; Try a short cut by searching for the string "coding:" |
| 1280 | ;; Try a short cut by searching for the string "coding:" | 1312 | ;; and for "unibyte:" at the head and tail of SIZE bytes. |
| 1281 | ;; and for "unibyte:" at the head and tail of SIZE bytes. | 1313 | (setq head-found (or (search-forward "coding:" head-end t) |
| 1282 | (setq head-found (or (search-forward "coding:" head-end t) | 1314 | (search-forward "unibyte:" head-end t))) |
| 1283 | (search-forward "unibyte:" head-end t))) | 1315 | (if (and head-found (> head-found tail-start)) |
| 1284 | (if (and head-found (> head-found tail-start)) | 1316 | ;; Head and tail are overlapped. |
| 1285 | ;; Head and tail are overlapped. | 1317 | (setq tail-found head-found) |
| 1286 | (setq tail-found head-found) | 1318 | (goto-char tail-start) |
| 1287 | (goto-char tail-start) | 1319 | (setq tail-found (or (search-forward "coding:" tail-end t) |
| 1288 | (setq tail-found (or (search-forward "coding:" tail-end t) | 1320 | (search-forward "unibyte:" tail-end t)))) |
| 1289 | (search-forward "unibyte:" tail-end t)))) | 1321 | |
| 1290 | 1322 | ;; At first check the head. | |
| 1291 | ;; At first check the head. | 1323 | (when head-found |
| 1292 | (when head-found | 1324 | (goto-char head-start) |
| 1325 | (setq pos (re-search-forward "[\n\r]" head-end t)) | ||
| 1326 | (if (and pos | ||
| 1327 | (= (char-after head-start) ?#) | ||
| 1328 | (= (char-after (1+ head-start)) ?!)) | ||
| 1329 | ;; If the file begins with "#!" (exec interpreter magic), | ||
| 1330 | ;; look for coding frobs in the first two lines. You cannot | ||
| 1331 | ;; necessarily put them in the first line of such a file | ||
| 1332 | ;; without screwing up the interpreter invocation. | ||
| 1333 | (setq pos (search-forward "\n" head-end t))) | ||
| 1334 | (if pos (setq head-end pos)) | ||
| 1335 | (when (< head-found head-end) | ||
| 1293 | (goto-char head-start) | 1336 | (goto-char head-start) |
| 1294 | (setq pos (re-search-forward "[\n\r]" head-end t)) | 1337 | (when (and set-auto-coding-for-load |
| 1295 | (if (and pos | 1338 | (re-search-forward |
| 1296 | (= (char-after head-start) ?#) | 1339 | "-\\*-\\(.*;\\)?[ \t]*unibyte:[ \t]*\\([^ ;]+\\)" |
| 1297 | (= (char-after (1+ head-start)) ?!)) | 1340 | head-end t)) |
| 1298 | ;; If the file begins with "#!" (exec interpreter magic), | 1341 | (setq coding-system 'raw-text)) |
| 1299 | ;; look for coding frobs in the first two lines. You cannot | 1342 | (when (and (not coding-system) |
| 1300 | ;; necessarily put them in the first line of such a file | 1343 | (re-search-forward |
| 1301 | ;; without screwing up the interpreter invocation. | 1344 | "-\\*-\\(.*;\\)?[ \t]*coding:[ \t]*\\([^ ;]+\\)" |
| 1302 | (setq pos (search-forward "\n" head-end t))) | 1345 | head-end t)) |
| 1303 | (if pos (setq head-end pos)) | 1346 | (setq coding-system (intern (match-string 2))) |
| 1304 | (when (< head-found head-end) | 1347 | (or (coding-system-p coding-system) |
| 1305 | (goto-char head-start) | 1348 | (setq coding-system nil))))) |
| 1306 | (when (and set-auto-coding-for-load | 1349 | |
| 1307 | (re-search-forward | 1350 | ;; If no coding: tag in the head, check the tail. |
| 1308 | "-\\*-\\(.*;\\)?[ \t]*unibyte:[ \t]*\\([^ ;]+\\)" | 1351 | (when (and tail-found (not coding-system)) |
| 1309 | head-end t)) | 1352 | (goto-char tail-start) |
| 1310 | (setq coding-system 'raw-text)) | 1353 | (search-forward "\n\^L" nil t) |
| 1311 | (when (and (not coding-system) | 1354 | (if (re-search-forward |
| 1312 | (re-search-forward | 1355 | "^\\(.*\\)[ \t]*Local Variables:[ \t]*\\(.*\\)$" tail-end t) |
| 1313 | "-\\*-\\(.*;\\)?[ \t]*coding:[ \t]*\\([^ ;]+\\)" | 1356 | ;; The prefix is what comes before "local variables:" in its |
| 1314 | head-end t)) | 1357 | ;; line. The suffix is what comes after "local variables:" |
| 1315 | (setq coding-system (intern (match-string 2))) | 1358 | ;; in its line. |
| 1316 | (or (coding-system-p coding-system) | 1359 | (let* ((prefix (regexp-quote (match-string 1))) |
| 1317 | (setq coding-system nil))))) | 1360 | (suffix (regexp-quote (match-string 2))) |
| 1318 | 1361 | (re-coding | |
| 1319 | ;; If no coding: tag in the head, check the tail. | 1362 | (concat |
| 1320 | (when (and tail-found (not coding-system)) | 1363 | "^" prefix |
| 1321 | (goto-char tail-start) | 1364 | "[ \t]*coding[ \t]*:[ \t]*\\([^ \t]+\\)[ \t]*" |
| 1322 | (search-forward "\n\^L" nil t) | 1365 | suffix "$")) |
| 1323 | (if (re-search-forward | 1366 | (re-unibyte |
| 1324 | "^\\(.*\\)[ \t]*Local Variables:[ \t]*\\(.*\\)$" tail-end t) | 1367 | (concat |
| 1325 | ;; The prefix is what comes before "local variables:" in its | 1368 | "^" prefix |
| 1326 | ;; line. The suffix is what comes after "local variables:" | 1369 | "[ \t]*unibyte[ \t]*:[ \t]*\\([^ \t]+\\)[ \t]*" |
| 1327 | ;; in its line. | 1370 | suffix "$")) |
| 1328 | (let* ((prefix (regexp-quote (match-string 1))) | 1371 | (re-end |
| 1329 | (suffix (regexp-quote (match-string 2))) | 1372 | (concat "^" prefix "[ \t]*end *:[ \t]*" suffix "$")) |
| 1330 | (re-coding | 1373 | (pos (point))) |
| 1331 | (concat | 1374 | (re-search-forward re-end tail-end 'move) |
| 1332 | "^" prefix | 1375 | (setq tail-end (point)) |
| 1333 | "[ \t]*coding[ \t]*:[ \t]*\\([^ \t]+\\)[ \t]*" | 1376 | (goto-char pos) |
| 1334 | suffix "$")) | 1377 | (when (and set-auto-coding-for-load |
| 1335 | (re-unibyte | 1378 | (re-search-forward re-unibyte tail-end t)) |
| 1336 | (concat | 1379 | (setq coding-system 'raw-text)) |
| 1337 | "^" prefix | 1380 | (when (and (not coding-system) |
| 1338 | "[ \t]*unibyte[ \t]*:[ \t]*\\([^ \t]+\\)[ \t]*" | 1381 | (re-search-forward re-coding tail-end t)) |
| 1339 | suffix "$")) | 1382 | (setq coding-system (intern (match-string 1))) |
| 1340 | (re-end | 1383 | (or (coding-system-p coding-system) |
| 1341 | (concat "^" prefix "[ \t]*end *:[ \t]*" suffix "$")) | 1384 | (setq coding-system nil)))))) |
| 1342 | (pos (point))) | 1385 | coding-system))) |
| 1343 | (re-search-forward re-end tail-end 'move) | ||
| 1344 | (setq tail-end (point)) | ||
| 1345 | (goto-char pos) | ||
| 1346 | (when (and set-auto-coding-for-load | ||
| 1347 | (re-search-forward re-unibyte tail-end t)) | ||
| 1348 | (setq coding-system 'raw-text)) | ||
| 1349 | (when (and (not coding-system) | ||
| 1350 | (re-search-forward re-coding tail-end t)) | ||
| 1351 | (setq coding-system (intern (match-string 1))) | ||
| 1352 | (or (coding-system-p coding-system) | ||
| 1353 | (setq coding-system nil)))))) | ||
| 1354 | coding-system)))) | ||
| 1355 | 1386 | ||
| 1356 | (setq set-auto-coding-function 'set-auto-coding) | 1387 | (setq set-auto-coding-function 'set-auto-coding) |
| 1357 | 1388 | ||