diff options
| author | Dave Love | 2002-06-14 23:23:39 +0000 |
|---|---|---|
| committer | Dave Love | 2002-06-14 23:23:39 +0000 |
| commit | 15f2c48a3cb6bfabb762357189ae4543203391e3 (patch) | |
| tree | 3cb358914d081d39684b447963380f63bb521044 | |
| parent | 057eca0991c04ec70f62d94a2e7335dc1b505c7b (diff) | |
| download | emacs-15f2c48a3cb6bfabb762357189ae4543203391e3.tar.gz emacs-15f2c48a3cb6bfabb762357189ae4543203391e3.zip | |
(find-multibyte-characters): Doc fix.
(find-multibyte-characters): Don't test for charset `unknown'.
(locale-language-names): Change or add: be, bs, cy, mk, ru.koi8,
ru, sr_YU, tg, wa, zh.gbk.
(locale-language-names): Change sp to Cyrillic.
(locale-charset-language-names): Match @euro after utf-8.
| -rw-r--r-- | lisp/ChangeLog | 12 | ||||
| -rw-r--r-- | lisp/international/mule-cmds.el | 45 |
2 files changed, 32 insertions, 25 deletions
diff --git a/lisp/ChangeLog b/lisp/ChangeLog index e32ba01332e..b14c6197f62 100644 --- a/lisp/ChangeLog +++ b/lisp/ChangeLog | |||
| @@ -1,3 +1,15 @@ | |||
| 1 | 2002-06-15 Dave Love <fx@gnu.org> | ||
| 2 | |||
| 3 | * language/cyrillic.el ("Belarusian"): Doc fix. | ||
| 4 | (Cp1125, koi8-t): Doc fix. | ||
| 5 | |||
| 6 | * international/mule-cmds.el (find-multibyte-characters): Doc fix. | ||
| 7 | (find-multibyte-characters): Don't test for charset `unknown'. | ||
| 8 | (locale-language-names): Change or add: be, bs, cy, mk, ru.koi8, | ||
| 9 | ru, sr_YU, tg, wa, zh.gbk. | ||
| 10 | (locale-language-names): Change sp to Cyrillic. | ||
| 11 | (locale-charset-language-names): Match @euro after utf-8. | ||
| 12 | |||
| 1 | 2002-06-13 Dave Love <fx@gnu.org> | 13 | 2002-06-13 Dave Love <fx@gnu.org> |
| 2 | 14 | ||
| 3 | * language/chinese.el (chinese-gbk, gbk, cp936, windows-936): New | 15 | * language/chinese.el (chinese-gbk, gbk, cp936, windows-936): New |
diff --git a/lisp/international/mule-cmds.el b/lisp/international/mule-cmds.el index 5513f626311..9c55deef3a3 100644 --- a/lisp/international/mule-cmds.el +++ b/lisp/international/mule-cmds.el | |||
| @@ -461,6 +461,7 @@ CHARSETS is a list of character sets." | |||
| 461 | (append codings | 461 | (append codings |
| 462 | (char-table-extra-slot char-coding-system-table 0)))))) | 462 | (char-table-extra-slot char-coding-system-table 0)))))) |
| 463 | 463 | ||
| 464 | ;; Fixme: is this doing the right thing now, at least with eight-bit? | ||
| 464 | (defun find-multibyte-characters (from to &optional maxcount excludes) | 465 | (defun find-multibyte-characters (from to &optional maxcount excludes) |
| 465 | "Find multibyte characters in the region specified by FROM and TO. | 466 | "Find multibyte characters in the region specified by FROM and TO. |
| 466 | If FROM is a string, find multibyte characters in the string. | 467 | If FROM is a string, find multibyte characters in the string. |
| @@ -471,9 +472,7 @@ where | |||
| 471 | COUNT is a number of characters, | 472 | COUNT is a number of characters, |
| 472 | CHARs are found characters of the character set. | 473 | CHARs are found characters of the character set. |
| 473 | Optional 3rd arg MAXCOUNT limits how many CHARs are put in the above list. | 474 | Optional 3rd arg MAXCOUNT limits how many CHARs are put in the above list. |
| 474 | Optional 4th arg EXCLUDE is a list of character sets to be ignored. | 475 | Optional 4th arg EXCLUDE is a list of character sets to be ignored." |
| 475 | |||
| 476 | For invalid characters, CHARs are actually strings." | ||
| 477 | (let ((chars nil) | 476 | (let ((chars nil) |
| 478 | charset char) | 477 | charset char) |
| 479 | (if (stringp from) | 478 | (if (stringp from) |
| @@ -481,10 +480,7 @@ For invalid characters, CHARs are actually strings." | |||
| 481 | (while (setq idx (string-match "[^\000-\177]" from idx)) | 480 | (while (setq idx (string-match "[^\000-\177]" from idx)) |
| 482 | (setq char (aref from idx) | 481 | (setq char (aref from idx) |
| 483 | charset (char-charset char)) | 482 | charset (char-charset char)) |
| 484 | (if (eq charset 'unknown) | 483 | (if (or (memq charset '(eight-bit-control eight-bit-graphic)) |
| 485 | (setq char (match-string 0))) | ||
| 486 | (if (or (memq charset '(unknown | ||
| 487 | eight-bit-control eight-bit-graphic)) | ||
| 488 | (not (or (eq excludes t) (memq charset excludes)))) | 484 | (not (or (eq excludes t) (memq charset excludes)))) |
| 489 | (let ((slot (assq charset chars))) | 485 | (let ((slot (assq charset chars))) |
| 490 | (if slot | 486 | (if slot |
| @@ -500,9 +496,7 @@ For invalid characters, CHARs are actually strings." | |||
| 500 | (while (re-search-forward "[^\000-\177]" to t) | 496 | (while (re-search-forward "[^\000-\177]" to t) |
| 501 | (setq char (preceding-char) | 497 | (setq char (preceding-char) |
| 502 | charset (char-charset char)) | 498 | charset (char-charset char)) |
| 503 | (if (eq charset 'unknown) | 499 | (if (or (memq charset '(eight-bit-control eight-bit-graphic)) |
| 504 | (setq char (match-string 0))) | ||
| 505 | (if (or (memq charset '(unknown eight-bit-control eight-bit-graphic)) | ||
| 506 | (not (or (eq excludes t) (memq charset excludes)))) | 500 | (not (or (eq excludes t) (memq charset excludes)))) |
| 507 | (let ((slot (assq charset chars))) | 501 | (let ((slot (assq charset chars))) |
| 508 | (if slot | 502 | (if slot |
| @@ -1641,17 +1635,18 @@ of buffer-file-coding-system set by this function." | |||
| 1641 | ; ay Aymara | 1635 | ; ay Aymara |
| 1642 | ; az Azerbaijani | 1636 | ; az Azerbaijani |
| 1643 | ; ba Bashkir | 1637 | ; ba Bashkir |
| 1644 | ("be" . "Belarussian") ; Belarussian [Byelorussian] | 1638 | ("be" . "Belarusian") ; Belarusian [Byelorussian until early 1990s] |
| 1645 | ("bg" . "Bulgarian") ; Bulgarian | 1639 | ("bg" . "Bulgarian") ; Bulgarian |
| 1646 | ; bh Bihari | 1640 | ; bh Bihari |
| 1647 | ; bi Bislama | 1641 | ; bi Bislama |
| 1648 | ; bn Bengali, Bangla | 1642 | ; bn Bengali, Bangla |
| 1649 | ("bo" . "Tibetan") | 1643 | ("bo" . "Tibetan") |
| 1650 | ("br" . "Latin-1") ; Breton | 1644 | ("br" . "Latin-1") ; Breton |
| 1645 | ("bs" . "Latin-2") ; Bosnian | ||
| 1651 | ("ca" . "Latin-1") ; Catalan | 1646 | ("ca" . "Latin-1") ; Catalan |
| 1652 | ; co Corsican | 1647 | ; co Corsican |
| 1653 | ("cs" . "Czech") | 1648 | ("cs" . "Czech") |
| 1654 | ("cy" . "Latin-8") ; Welsh | 1649 | ("cy" . "Welsh") ; Welsh |
| 1655 | ("da" . "Latin-1") ; Danish | 1650 | ("da" . "Latin-1") ; Danish |
| 1656 | ("de" . "German") | 1651 | ("de" . "German") |
| 1657 | ; dz Bhutani | 1652 | ; dz Bhutani |
| @@ -1662,7 +1657,7 @@ of buffer-file-coding-system set by this function." | |||
| 1662 | ("es" . "Spanish") | 1657 | ("es" . "Spanish") |
| 1663 | ("et" . "Latin-4") ; Estonian | 1658 | ("et" . "Latin-4") ; Estonian |
| 1664 | ("eu" . "Latin-1") ; Basque | 1659 | ("eu" . "Latin-1") ; Basque |
| 1665 | ; fa Persian | 1660 | ; fa Persian glibc uses utf-8 |
| 1666 | ("fi" . "Latin-1") ; Finnish | 1661 | ("fi" . "Latin-1") ; Finnish |
| 1667 | ; fj Fiji | 1662 | ; fj Fiji |
| 1668 | ("fo" . "Latin-1") ; Faroese | 1663 | ("fo" . "Latin-1") ; Faroese |
| @@ -1673,7 +1668,7 @@ of buffer-file-coding-system set by this function." | |||
| 1673 | ("gl" . "Latin-1") ; Galician | 1668 | ("gl" . "Latin-1") ; Galician |
| 1674 | ; gn Guarani | 1669 | ; gn Guarani |
| 1675 | ; gu Gujarati | 1670 | ; gu Gujarati |
| 1676 | ("gv" . "Latin-8") ; Manx Gaelic | 1671 | ("gv" . "Latin-8") ; Manx Gaelic glibc uses 8859-1 |
| 1677 | ; ha Hausa | 1672 | ; ha Hausa |
| 1678 | ("he" . "Hebrew") | 1673 | ("he" . "Hebrew") |
| 1679 | ("hi" . "Devanagari") ; Hindi glibc uses utf-8 | 1674 | ("hi" . "Devanagari") ; Hindi glibc uses utf-8 |
| @@ -1707,7 +1702,7 @@ of buffer-file-coding-system set by this function." | |||
| 1707 | ("lv" . "Latvian") ; Latvian, Lettish | 1702 | ("lv" . "Latvian") ; Latvian, Lettish |
| 1708 | ; mg Malagasy | 1703 | ; mg Malagasy |
| 1709 | ("mi" . "Latin-7") ; Maori | 1704 | ("mi" . "Latin-7") ; Maori |
| 1710 | ("mk" . "Latin-5") ; Macedonian | 1705 | ("mk" . "Cyrillic-ISO") ; Macedonian |
| 1711 | ; ml Malayalam | 1706 | ; ml Malayalam |
| 1712 | ; mn Mongolian | 1707 | ; mn Mongolian |
| 1713 | ; mo Moldavian | 1708 | ; mo Moldavian |
| @@ -1730,8 +1725,8 @@ of buffer-file-coding-system set by this function." | |||
| 1730 | ("rm" . "Latin-1") ; Rhaeto-Romanic | 1725 | ("rm" . "Latin-1") ; Rhaeto-Romanic |
| 1731 | ; rn Kirundi | 1726 | ; rn Kirundi |
| 1732 | ("ro" . "Romanian") | 1727 | ("ro" . "Romanian") |
| 1733 | ("ru.*[_.]koi8" . "Cyrillic-KOI8") ; Russian | 1728 | ("ru.*[_.]koi8\\(?:-r\\)?\\'" . "Cyrillic-KOI8") ; Russian |
| 1734 | ("ru" . "Latin-5") ; Russian | 1729 | ("ru" . "Cyrillic-ISO") ; Russian |
| 1735 | ; rw Kinyarwanda | 1730 | ; rw Kinyarwanda |
| 1736 | ("sa" . "Devanagari") ; Sanskrit | 1731 | ("sa" . "Devanagari") ; Sanskrit |
| 1737 | ; sd Sindhi | 1732 | ; sd Sindhi |
| @@ -1746,6 +1741,7 @@ of buffer-file-coding-system set by this function." | |||
| 1746 | ; so Somali | 1741 | ; so Somali |
| 1747 | ("sq" . "Latin-1") ; Albanian | 1742 | ("sq" . "Latin-1") ; Albanian |
| 1748 | ("sr" . "Latin-2") ; Serbian (Latin alphabet) | 1743 | ("sr" . "Latin-2") ; Serbian (Latin alphabet) |
| 1744 | ("sr.*@cyrillic" . "Cyrillic-ISO") ; per glibc | ||
| 1749 | ; ss Siswati | 1745 | ; ss Siswati |
| 1750 | ; st Sesotho | 1746 | ; st Sesotho |
| 1751 | ; su Sundanese | 1747 | ; su Sundanese |
| @@ -1753,7 +1749,7 @@ of buffer-file-coding-system set by this function." | |||
| 1753 | ("sw" . "Latin-1") ; Swahili | 1749 | ("sw" . "Latin-1") ; Swahili |
| 1754 | ; ta Tamil glibc uses utf-8 | 1750 | ; ta Tamil glibc uses utf-8 |
| 1755 | ; te Telugu glibc uses utf-8 | 1751 | ; te Telugu glibc uses utf-8 |
| 1756 | ("tg" . "Cyrillic-KOI8-T") ; Tajik | 1752 | ("tg" . "Tajik") |
| 1757 | ("th" . "Thai") | 1753 | ("th" . "Thai") |
| 1758 | ; ti Tigrinya | 1754 | ; ti Tigrinya |
| 1759 | ; tk Turkmen | 1755 | ; tk Turkmen |
| @@ -1770,6 +1766,7 @@ of buffer-file-coding-system set by this function." | |||
| 1770 | ("uz" . "Latin-1") ; Uzbek | 1766 | ("uz" . "Latin-1") ; Uzbek |
| 1771 | ("vi" . "Vietnamese") ; glibc uses utf-8 | 1767 | ("vi" . "Vietnamese") ; glibc uses utf-8 |
| 1772 | ; vo Volapuk | 1768 | ; vo Volapuk |
| 1769 | ("wa" . "Latin-1") ; Walloon | ||
| 1773 | ; wo Wolof | 1770 | ; wo Wolof |
| 1774 | ; xh Xhosa | 1771 | ; xh Xhosa |
| 1775 | ("yi" . "Windows-1255") ; Yiddish | 1772 | ("yi" . "Windows-1255") ; Yiddish |
| @@ -1778,13 +1775,11 @@ of buffer-file-coding-system set by this function." | |||
| 1778 | 1775 | ||
| 1779 | ; glibc: | 1776 | ; glibc: |
| 1780 | ; zh_CN.GB18030/GB18030 \ | 1777 | ; zh_CN.GB18030/GB18030 \ |
| 1781 | ; zh_CN.GBK/GBK \ | ||
| 1782 | ; zh_HK/BIG5-HKSCS \ | 1778 | ; zh_HK/BIG5-HKSCS \ |
| 1783 | ; zh_TW/BIG5 \ | ||
| 1784 | ; zh_TW.EUC-TW/EUC-TW \ | ||
| 1785 | 1779 | ||
| 1786 | ("zh.*[._]big5" . "Chinese-BIG5") | 1780 | ("zh.*[._]big5" . "Chinese-BIG5") |
| 1787 | ("zh.*[._]gbk" . nil) ; Solaris 2.7; has gbk-0 as well as GB 2312.1980-0 | 1781 | ("zh.*[._].gbk" . "Chinese-GBK") |
| 1782 | ;; glibc has zh_TW.EUC-TW, with zh_TW defaulting to Big5 | ||
| 1788 | ("zh_tw" . "Chinese-CNS") | 1783 | ("zh_tw" . "Chinese-CNS") |
| 1789 | ("zh" . "Chinese-GB") | 1784 | ("zh" . "Chinese-GB") |
| 1790 | ; zu Zulu | 1785 | ; zu Zulu |
| @@ -1801,7 +1796,7 @@ of buffer-file-coding-system set by this function." | |||
| 1801 | ("cz" . "Czech") ; e.g. Solaris 2.6 | 1796 | ("cz" . "Czech") ; e.g. Solaris 2.6 |
| 1802 | ("ee" . "Latin-4") ; Estonian, e.g. X11R6.4 | 1797 | ("ee" . "Latin-4") ; Estonian, e.g. X11R6.4 |
| 1803 | ("iw" . "Hebrew") ; e.g. X11R6.4 | 1798 | ("iw" . "Hebrew") ; e.g. X11R6.4 |
| 1804 | ("sp" . "Latin-5") ; Serbian (Cyrillic alphabet), e.g. X11R6.4 | 1799 | ("sp" . "Cyrillic-ISO") ; Serbian (Cyrillic alphabet), e.g. X11R6.4 |
| 1805 | ("su" . "Latin-1") ; Finnish, e.g. Solaris 2.6 | 1800 | ("su" . "Latin-1") ; Finnish, e.g. Solaris 2.6 |
| 1806 | ("jp" . "Japanese") ; e.g. MS Windows | 1801 | ("jp" . "Japanese") ; e.g. MS Windows |
| 1807 | ("chs" . "Chinese-GB") ; MS Windows Chinese Simplified | 1802 | ("chs" . "Chinese-GB") ; MS Windows Chinese Simplified |
| @@ -1821,8 +1816,8 @@ If the language name is nil, there is no corresponding language environment.") | |||
| 1821 | (".*8859[-_]?9\\>" . "Latin-5") | 1816 | (".*8859[-_]?9\\>" . "Latin-5") |
| 1822 | (".*8859[-_]?14\\>" . "Latin-8") | 1817 | (".*8859[-_]?14\\>" . "Latin-8") |
| 1823 | (".*8859[-_]?15\\>" . "Latin-9") | 1818 | (".*8859[-_]?15\\>" . "Latin-9") |
| 1824 | (".*@euro\\>" . "Latin-9") | 1819 | (".*utf\\(-?8\\)\\>" . "UTF-8") |
| 1825 | (".*utf\\(-?8\\)\\>" . "UTF-8"))) | 1820 | (".*@euro\\>" . "Latin-9"))) ; utf-8@euro exists, so put this last |
| 1826 | "List of pairs of locale regexps and charset language names. | 1821 | "List of pairs of locale regexps and charset language names. |
| 1827 | The first element whose locale regexp matches the start of a downcased locale | 1822 | The first element whose locale regexp matches the start of a downcased locale |
| 1828 | specifies the language name whose charsets corresponds to that locale. | 1823 | specifies the language name whose charsets corresponds to that locale. |