diff options
| author | Colin Walters | 2002-05-21 21:14:03 +0000 |
|---|---|---|
| committer | Colin Walters | 2002-05-21 21:14:03 +0000 |
| commit | d9f6dfe6e70e5d344e5e786b43010c30a16737a3 (patch) | |
| tree | 89a65662675d058ba20614f88306361952b55b02 | |
| parent | 4531bb653ba0638f31bb04a6a17367a72f072f8a (diff) | |
| download | emacs-d9f6dfe6e70e5d344e5e786b43010c30a16737a3.tar.gz emacs-d9f6dfe6e70e5d344e5e786b43010c30a16737a3.zip | |
(make-coding-system): Doc fixes.
(auto-coding-functions): New variable.
(auto-coding-from-file-contents): Use it.
(set-auto-coding): Update docs.
(sgml-xml-auto-coding-function): New function.
| -rw-r--r-- | lisp/international/mule.el | 55 |
1 files changed, 49 insertions, 6 deletions
diff --git a/lisp/international/mule.el b/lisp/international/mule.el index fea506437a5..b2ec27dc3b0 100644 --- a/lisp/international/mule.el +++ b/lisp/international/mule.el | |||
| @@ -725,9 +725,9 @@ in the following format: | |||
| 725 | 725 | ||
| 726 | TYPE is an integer value indicating the type of the coding system as follows: | 726 | TYPE is an integer value indicating the type of the coding system as follows: |
| 727 | 0: Emacs internal format, | 727 | 0: Emacs internal format, |
| 728 | 1: Shift-JIS (or MS-Kanji) used mainly on Japanese PC, | 728 | 1: Shift-JIS (or MS-Kanji) used mainly on Japanese PCs, |
| 729 | 2: ISO-2022 including many variants, | 729 | 2: ISO-2022 including many variants, |
| 730 | 3: Big5 used mainly on Chinese PC, | 730 | 3: Big5 used mainly on Chinese PCs, |
| 731 | 4: private, CCL programs provide encoding/decoding algorithm, | 731 | 4: private, CCL programs provide encoding/decoding algorithm, |
| 732 | 5: Raw-text, which means that text contains random 8-bit codes. | 732 | 5: Raw-text, which means that text contains random 8-bit codes. |
| 733 | 733 | ||
| @@ -822,7 +822,7 @@ following properties are recognized: | |||
| 822 | 822 | ||
| 823 | o mime-charset | 823 | o mime-charset |
| 824 | 824 | ||
| 825 | The value is a symbol of which name is `MIME-charset' parameter of | 825 | The value is a symbol whose name is the `MIME-charset' parameter of |
| 826 | the coding system. | 826 | the coding system. |
| 827 | 827 | ||
| 828 | o valid-codes (meaningful only for a coding system based on CCL) | 828 | o valid-codes (meaningful only for a coding system based on CCL) |
| @@ -1489,6 +1489,22 @@ and the contents of `file-coding-system-alist'." | |||
| 1489 | :type '(repeat (cons (regexp :tag "Regexp") | 1489 | :type '(repeat (cons (regexp :tag "Regexp") |
| 1490 | (symbol :tag "Coding system")))) | 1490 | (symbol :tag "Coding system")))) |
| 1491 | 1491 | ||
| 1492 | ;; See the bottom of this file for built-in auto coding functions. | ||
| 1493 | (defcustom auto-coding-functions '(sgml-xml-auto-coding-function) | ||
| 1494 | "A list of functions which attempt to determine a coding system. | ||
| 1495 | |||
| 1496 | Each function in this list should be written to operate on the current | ||
| 1497 | buffer, but should not modify it in any way. It should take one | ||
| 1498 | argument SIZE, past which it should not search. If a function | ||
| 1499 | succeeds in determining a coding system, it should return that coding | ||
| 1500 | system. Otherwise, it should return nil. | ||
| 1501 | |||
| 1502 | The functions in this list take priority over `coding:' tags in the | ||
| 1503 | file, just as for `auto-coding-regexp-alist'." | ||
| 1504 | :group 'files | ||
| 1505 | :group 'mule | ||
| 1506 | :type '(repeat function)) | ||
| 1507 | |||
| 1492 | (defvar set-auto-coding-for-load nil | 1508 | (defvar set-auto-coding-for-load nil |
| 1493 | "Non-nil means look for `load-coding' property instead of `coding'. | 1509 | "Non-nil means look for `load-coding' property instead of `coding'. |
| 1494 | This is used for loading and byte-compiling Emacs Lisp files.") | 1510 | This is used for loading and byte-compiling Emacs Lisp files.") |
| @@ -1504,21 +1520,25 @@ This is used for loading and byte-compiling Emacs Lisp files.") | |||
| 1504 | (setq alist (cdr alist)))) | 1520 | (setq alist (cdr alist)))) |
| 1505 | coding-system)) | 1521 | coding-system)) |
| 1506 | 1522 | ||
| 1507 | |||
| 1508 | (defun auto-coding-from-file-contents (size) | 1523 | (defun auto-coding-from-file-contents (size) |
| 1509 | "Determine a coding system from the contents of the current buffer. | 1524 | "Determine a coding system from the contents of the current buffer. |
| 1510 | The current buffer contains SIZE bytes starting at point. | 1525 | The current buffer contains SIZE bytes starting at point. |
| 1511 | Value is either a coding system or nil." | 1526 | Value is either a coding system or nil." |
| 1512 | (save-excursion | 1527 | (save-excursion |
| 1513 | (let ((alist auto-coding-regexp-alist) | 1528 | (let ((alist auto-coding-regexp-alist) |
| 1529 | (funcs auto-coding-functions) | ||
| 1514 | coding-system) | 1530 | coding-system) |
| 1515 | (while (and alist (not coding-system)) | 1531 | (while (and alist (not coding-system)) |
| 1516 | (let ((regexp (car (car alist)))) | 1532 | (let ((regexp (car (car alist)))) |
| 1517 | (when (re-search-forward regexp (+ (point) size) t) | 1533 | (when (re-search-forward regexp (+ (point) size) t) |
| 1518 | (setq coding-system (cdr (car alist))))) | 1534 | (setq coding-system (cdr (car alist))))) |
| 1519 | (setq alist (cdr alist))) | 1535 | (setq alist (cdr alist))) |
| 1536 | (while (and funcs (not coding-system)) | ||
| 1537 | (setq coding-system (condition-case e | ||
| 1538 | (save-excursion | ||
| 1539 | (funcall (pop funcs) size)) | ||
| 1540 | (error nil)))) | ||
| 1520 | coding-system))) | 1541 | coding-system))) |
| 1521 | |||
| 1522 | 1542 | ||
| 1523 | (defun set-auto-coding (filename size) | 1543 | (defun set-auto-coding (filename size) |
| 1524 | "Return coding system for a file FILENAME of which SIZE bytes follow point. | 1544 | "Return coding system for a file FILENAME of which SIZE bytes follow point. |
| @@ -1528,7 +1548,8 @@ and the last 3k of the file, but the middle may be omitted. | |||
| 1528 | It checks FILENAME against the variable `auto-coding-alist'. If | 1548 | It checks FILENAME against the variable `auto-coding-alist'. If |
| 1529 | FILENAME doesn't match any entries in the variable, it checks the | 1549 | FILENAME doesn't match any entries in the variable, it checks the |
| 1530 | contents of the current buffer following point against | 1550 | contents of the current buffer following point against |
| 1531 | `auto-coding-regexp-alist'. If no match is found, it checks for a | 1551 | `auto-coding-regexp-alist', and tries calling each function in |
| 1552 | `auto-coding-functions'. If no match is found, it checks for a | ||
| 1532 | `coding:' tag in the first one or two lines following point. If no | 1553 | `coding:' tag in the first one or two lines following point. If no |
| 1533 | `coding:' tag is found, it checks for local variables list in the last | 1554 | `coding:' tag is found, it checks for local variables list in the last |
| 1534 | 3K bytes out of the SIZE bytes. | 1555 | 3K bytes out of the SIZE bytes. |
| @@ -1898,6 +1919,28 @@ the table in `translation-table-vector'." | |||
| 1898 | (setq ignore-relative-composition | 1919 | (setq ignore-relative-composition |
| 1899 | (make-char-table 'ignore-relative-composition)) | 1920 | (make-char-table 'ignore-relative-composition)) |
| 1900 | 1921 | ||
| 1922 | |||
| 1923 | ;;; Built-in auto-coding-functions: | ||
| 1924 | |||
| 1925 | (defun sgml-xml-auto-coding-function (size) | ||
| 1926 | "Determine whether the buffer is XML, and if so, its encoding. | ||
| 1927 | This function is intended to be added to `auto-coding-functions'." | ||
| 1928 | (when (re-search-forward "\\`[[:space:]\n]*<\\?xml") | ||
| 1929 | (let ((end (save-excursion | ||
| 1930 | ;; This is a hack. | ||
| 1931 | (search-forward "\"\\s-*?>" size t)))) | ||
| 1932 | (when end | ||
| 1933 | (if (re-search-forward "encoding=\"\\(.+?\\)\"" end t) | ||
| 1934 | (let ((match (downcase (match-string 1)))) | ||
| 1935 | ;; FIXME: what other encodings are valid, and how can we | ||
| 1936 | ;; translate them to the names of coding systems? | ||
| 1937 | (cond ((string= match "utf-8") | ||
| 1938 | 'utf-8) | ||
| 1939 | ((string-match "iso-8859-[[:digit:]]+" match) | ||
| 1940 | (intern match)) | ||
| 1941 | (t nil))) | ||
| 1942 | 'utf-8))))) | ||
| 1943 | |||
| 1901 | ;;; | 1944 | ;;; |
| 1902 | (provide 'mule) | 1945 | (provide 'mule) |
| 1903 | 1946 | ||