aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Love2002-06-14 23:23:39 +0000
committerDave Love2002-06-14 23:23:39 +0000
commit15f2c48a3cb6bfabb762357189ae4543203391e3 (patch)
tree3cb358914d081d39684b447963380f63bb521044
parent057eca0991c04ec70f62d94a2e7335dc1b505c7b (diff)
downloademacs-15f2c48a3cb6bfabb762357189ae4543203391e3.tar.gz
emacs-15f2c48a3cb6bfabb762357189ae4543203391e3.zip
(find-multibyte-characters): Doc fix.
(find-multibyte-characters): Don't test for charset `unknown'. (locale-language-names): Change or add: be, bs, cy, mk, ru.koi8, ru, sr_YU, tg, wa, zh.gbk. (locale-language-names): Change sp to Cyrillic. (locale-charset-language-names): Match @euro after utf-8.
-rw-r--r--lisp/ChangeLog12
-rw-r--r--lisp/international/mule-cmds.el45
2 files changed, 32 insertions, 25 deletions
diff --git a/lisp/ChangeLog b/lisp/ChangeLog
index e32ba01332e..b14c6197f62 100644
--- a/lisp/ChangeLog
+++ b/lisp/ChangeLog
@@ -1,3 +1,15 @@
12002-06-15 Dave Love <fx@gnu.org>
2
3 * language/cyrillic.el ("Belarusian"): Doc fix.
4 (Cp1125, koi8-t): Doc fix.
5
6 * international/mule-cmds.el (find-multibyte-characters): Doc fix.
7 (find-multibyte-characters): Don't test for charset `unknown'.
8 (locale-language-names): Change or add: be, bs, cy, mk, ru.koi8,
9 ru, sr_YU, tg, wa, zh.gbk.
10 (locale-language-names): Change sp to Cyrillic.
11 (locale-charset-language-names): Match @euro after utf-8.
12
12002-06-13 Dave Love <fx@gnu.org> 132002-06-13 Dave Love <fx@gnu.org>
2 14
3 * language/chinese.el (chinese-gbk, gbk, cp936, windows-936): New 15 * language/chinese.el (chinese-gbk, gbk, cp936, windows-936): New
diff --git a/lisp/international/mule-cmds.el b/lisp/international/mule-cmds.el
index 5513f626311..9c55deef3a3 100644
--- a/lisp/international/mule-cmds.el
+++ b/lisp/international/mule-cmds.el
@@ -461,6 +461,7 @@ CHARSETS is a list of character sets."
461 (append codings 461 (append codings
462 (char-table-extra-slot char-coding-system-table 0)))))) 462 (char-table-extra-slot char-coding-system-table 0))))))
463 463
464;; Fixme: is this doing the right thing now, at least with eight-bit?
464(defun find-multibyte-characters (from to &optional maxcount excludes) 465(defun find-multibyte-characters (from to &optional maxcount excludes)
465 "Find multibyte characters in the region specified by FROM and TO. 466 "Find multibyte characters in the region specified by FROM and TO.
466If FROM is a string, find multibyte characters in the string. 467If FROM is a string, find multibyte characters in the string.
@@ -471,9 +472,7 @@ where
471 COUNT is a number of characters, 472 COUNT is a number of characters,
472 CHARs are found characters of the character set. 473 CHARs are found characters of the character set.
473Optional 3rd arg MAXCOUNT limits how many CHARs are put in the above list. 474Optional 3rd arg MAXCOUNT limits how many CHARs are put in the above list.
474Optional 4th arg EXCLUDE is a list of character sets to be ignored. 475Optional 4th arg EXCLUDE is a list of character sets to be ignored."
475
476For invalid characters, CHARs are actually strings."
477 (let ((chars nil) 476 (let ((chars nil)
478 charset char) 477 charset char)
479 (if (stringp from) 478 (if (stringp from)
@@ -481,10 +480,7 @@ For invalid characters, CHARs are actually strings."
481 (while (setq idx (string-match "[^\000-\177]" from idx)) 480 (while (setq idx (string-match "[^\000-\177]" from idx))
482 (setq char (aref from idx) 481 (setq char (aref from idx)
483 charset (char-charset char)) 482 charset (char-charset char))
484 (if (eq charset 'unknown) 483 (if (or (memq charset '(eight-bit-control eight-bit-graphic))
485 (setq char (match-string 0)))
486 (if (or (memq charset '(unknown
487 eight-bit-control eight-bit-graphic))
488 (not (or (eq excludes t) (memq charset excludes)))) 484 (not (or (eq excludes t) (memq charset excludes))))
489 (let ((slot (assq charset chars))) 485 (let ((slot (assq charset chars)))
490 (if slot 486 (if slot
@@ -500,9 +496,7 @@ For invalid characters, CHARs are actually strings."
500 (while (re-search-forward "[^\000-\177]" to t) 496 (while (re-search-forward "[^\000-\177]" to t)
501 (setq char (preceding-char) 497 (setq char (preceding-char)
502 charset (char-charset char)) 498 charset (char-charset char))
503 (if (eq charset 'unknown) 499 (if (or (memq charset '(eight-bit-control eight-bit-graphic))
504 (setq char (match-string 0)))
505 (if (or (memq charset '(unknown eight-bit-control eight-bit-graphic))
506 (not (or (eq excludes t) (memq charset excludes)))) 500 (not (or (eq excludes t) (memq charset excludes))))
507 (let ((slot (assq charset chars))) 501 (let ((slot (assq charset chars)))
508 (if slot 502 (if slot
@@ -1641,17 +1635,18 @@ of buffer-file-coding-system set by this function."
1641 ; ay Aymara 1635 ; ay Aymara
1642 ; az Azerbaijani 1636 ; az Azerbaijani
1643 ; ba Bashkir 1637 ; ba Bashkir
1644 ("be" . "Belarussian") ; Belarussian [Byelorussian] 1638 ("be" . "Belarusian") ; Belarusian [Byelorussian until early 1990s]
1645 ("bg" . "Bulgarian") ; Bulgarian 1639 ("bg" . "Bulgarian") ; Bulgarian
1646 ; bh Bihari 1640 ; bh Bihari
1647 ; bi Bislama 1641 ; bi Bislama
1648 ; bn Bengali, Bangla 1642 ; bn Bengali, Bangla
1649 ("bo" . "Tibetan") 1643 ("bo" . "Tibetan")
1650 ("br" . "Latin-1") ; Breton 1644 ("br" . "Latin-1") ; Breton
1645 ("bs" . "Latin-2") ; Bosnian
1651 ("ca" . "Latin-1") ; Catalan 1646 ("ca" . "Latin-1") ; Catalan
1652 ; co Corsican 1647 ; co Corsican
1653 ("cs" . "Czech") 1648 ("cs" . "Czech")
1654 ("cy" . "Latin-8") ; Welsh 1649 ("cy" . "Welsh") ; Welsh
1655 ("da" . "Latin-1") ; Danish 1650 ("da" . "Latin-1") ; Danish
1656 ("de" . "German") 1651 ("de" . "German")
1657 ; dz Bhutani 1652 ; dz Bhutani
@@ -1662,7 +1657,7 @@ of buffer-file-coding-system set by this function."
1662 ("es" . "Spanish") 1657 ("es" . "Spanish")
1663 ("et" . "Latin-4") ; Estonian 1658 ("et" . "Latin-4") ; Estonian
1664 ("eu" . "Latin-1") ; Basque 1659 ("eu" . "Latin-1") ; Basque
1665 ; fa Persian 1660 ; fa Persian glibc uses utf-8
1666 ("fi" . "Latin-1") ; Finnish 1661 ("fi" . "Latin-1") ; Finnish
1667 ; fj Fiji 1662 ; fj Fiji
1668 ("fo" . "Latin-1") ; Faroese 1663 ("fo" . "Latin-1") ; Faroese
@@ -1673,7 +1668,7 @@ of buffer-file-coding-system set by this function."
1673 ("gl" . "Latin-1") ; Galician 1668 ("gl" . "Latin-1") ; Galician
1674 ; gn Guarani 1669 ; gn Guarani
1675 ; gu Gujarati 1670 ; gu Gujarati
1676 ("gv" . "Latin-8") ; Manx Gaelic 1671 ("gv" . "Latin-8") ; Manx Gaelic glibc uses 8859-1
1677 ; ha Hausa 1672 ; ha Hausa
1678 ("he" . "Hebrew") 1673 ("he" . "Hebrew")
1679 ("hi" . "Devanagari") ; Hindi glibc uses utf-8 1674 ("hi" . "Devanagari") ; Hindi glibc uses utf-8
@@ -1707,7 +1702,7 @@ of buffer-file-coding-system set by this function."
1707 ("lv" . "Latvian") ; Latvian, Lettish 1702 ("lv" . "Latvian") ; Latvian, Lettish
1708 ; mg Malagasy 1703 ; mg Malagasy
1709 ("mi" . "Latin-7") ; Maori 1704 ("mi" . "Latin-7") ; Maori
1710 ("mk" . "Latin-5") ; Macedonian 1705 ("mk" . "Cyrillic-ISO") ; Macedonian
1711 ; ml Malayalam 1706 ; ml Malayalam
1712 ; mn Mongolian 1707 ; mn Mongolian
1713 ; mo Moldavian 1708 ; mo Moldavian
@@ -1730,8 +1725,8 @@ of buffer-file-coding-system set by this function."
1730 ("rm" . "Latin-1") ; Rhaeto-Romanic 1725 ("rm" . "Latin-1") ; Rhaeto-Romanic
1731 ; rn Kirundi 1726 ; rn Kirundi
1732 ("ro" . "Romanian") 1727 ("ro" . "Romanian")
1733 ("ru.*[_.]koi8" . "Cyrillic-KOI8") ; Russian 1728 ("ru.*[_.]koi8\\(?:-r\\)?\\'" . "Cyrillic-KOI8") ; Russian
1734 ("ru" . "Latin-5") ; Russian 1729 ("ru" . "Cyrillic-ISO") ; Russian
1735 ; rw Kinyarwanda 1730 ; rw Kinyarwanda
1736 ("sa" . "Devanagari") ; Sanskrit 1731 ("sa" . "Devanagari") ; Sanskrit
1737 ; sd Sindhi 1732 ; sd Sindhi
@@ -1746,6 +1741,7 @@ of buffer-file-coding-system set by this function."
1746 ; so Somali 1741 ; so Somali
1747 ("sq" . "Latin-1") ; Albanian 1742 ("sq" . "Latin-1") ; Albanian
1748 ("sr" . "Latin-2") ; Serbian (Latin alphabet) 1743 ("sr" . "Latin-2") ; Serbian (Latin alphabet)
1744 ("sr.*@cyrillic" . "Cyrillic-ISO") ; per glibc
1749 ; ss Siswati 1745 ; ss Siswati
1750 ; st Sesotho 1746 ; st Sesotho
1751 ; su Sundanese 1747 ; su Sundanese
@@ -1753,7 +1749,7 @@ of buffer-file-coding-system set by this function."
1753 ("sw" . "Latin-1") ; Swahili 1749 ("sw" . "Latin-1") ; Swahili
1754 ; ta Tamil glibc uses utf-8 1750 ; ta Tamil glibc uses utf-8
1755 ; te Telugu glibc uses utf-8 1751 ; te Telugu glibc uses utf-8
1756 ("tg" . "Cyrillic-KOI8-T") ; Tajik 1752 ("tg" . "Tajik")
1757 ("th" . "Thai") 1753 ("th" . "Thai")
1758 ; ti Tigrinya 1754 ; ti Tigrinya
1759 ; tk Turkmen 1755 ; tk Turkmen
@@ -1770,6 +1766,7 @@ of buffer-file-coding-system set by this function."
1770 ("uz" . "Latin-1") ; Uzbek 1766 ("uz" . "Latin-1") ; Uzbek
1771 ("vi" . "Vietnamese") ; glibc uses utf-8 1767 ("vi" . "Vietnamese") ; glibc uses utf-8
1772 ; vo Volapuk 1768 ; vo Volapuk
1769 ("wa" . "Latin-1") ; Walloon
1773 ; wo Wolof 1770 ; wo Wolof
1774 ; xh Xhosa 1771 ; xh Xhosa
1775 ("yi" . "Windows-1255") ; Yiddish 1772 ("yi" . "Windows-1255") ; Yiddish
@@ -1778,13 +1775,11 @@ of buffer-file-coding-system set by this function."
1778 1775
1779 ; glibc: 1776 ; glibc:
1780 ; zh_CN.GB18030/GB18030 \ 1777 ; zh_CN.GB18030/GB18030 \
1781 ; zh_CN.GBK/GBK \
1782 ; zh_HK/BIG5-HKSCS \ 1778 ; zh_HK/BIG5-HKSCS \
1783 ; zh_TW/BIG5 \
1784 ; zh_TW.EUC-TW/EUC-TW \
1785 1779
1786 ("zh.*[._]big5" . "Chinese-BIG5") 1780 ("zh.*[._]big5" . "Chinese-BIG5")
1787 ("zh.*[._]gbk" . nil) ; Solaris 2.7; has gbk-0 as well as GB 2312.1980-0 1781 ("zh.*[._].gbk" . "Chinese-GBK")
1782 ;; glibc has zh_TW.EUC-TW, with zh_TW defaulting to Big5
1788 ("zh_tw" . "Chinese-CNS") 1783 ("zh_tw" . "Chinese-CNS")
1789 ("zh" . "Chinese-GB") 1784 ("zh" . "Chinese-GB")
1790 ; zu Zulu 1785 ; zu Zulu
@@ -1801,7 +1796,7 @@ of buffer-file-coding-system set by this function."
1801 ("cz" . "Czech") ; e.g. Solaris 2.6 1796 ("cz" . "Czech") ; e.g. Solaris 2.6
1802 ("ee" . "Latin-4") ; Estonian, e.g. X11R6.4 1797 ("ee" . "Latin-4") ; Estonian, e.g. X11R6.4
1803 ("iw" . "Hebrew") ; e.g. X11R6.4 1798 ("iw" . "Hebrew") ; e.g. X11R6.4
1804 ("sp" . "Latin-5") ; Serbian (Cyrillic alphabet), e.g. X11R6.4 1799 ("sp" . "Cyrillic-ISO") ; Serbian (Cyrillic alphabet), e.g. X11R6.4
1805 ("su" . "Latin-1") ; Finnish, e.g. Solaris 2.6 1800 ("su" . "Latin-1") ; Finnish, e.g. Solaris 2.6
1806 ("jp" . "Japanese") ; e.g. MS Windows 1801 ("jp" . "Japanese") ; e.g. MS Windows
1807 ("chs" . "Chinese-GB") ; MS Windows Chinese Simplified 1802 ("chs" . "Chinese-GB") ; MS Windows Chinese Simplified
@@ -1821,8 +1816,8 @@ If the language name is nil, there is no corresponding language environment.")
1821 (".*8859[-_]?9\\>" . "Latin-5") 1816 (".*8859[-_]?9\\>" . "Latin-5")
1822 (".*8859[-_]?14\\>" . "Latin-8") 1817 (".*8859[-_]?14\\>" . "Latin-8")
1823 (".*8859[-_]?15\\>" . "Latin-9") 1818 (".*8859[-_]?15\\>" . "Latin-9")
1824 (".*@euro\\>" . "Latin-9") 1819 (".*utf\\(-?8\\)\\>" . "UTF-8")
1825 (".*utf\\(-?8\\)\\>" . "UTF-8"))) 1820 (".*@euro\\>" . "Latin-9"))) ; utf-8@euro exists, so put this last
1826 "List of pairs of locale regexps and charset language names. 1821 "List of pairs of locale regexps and charset language names.
1827The first element whose locale regexp matches the start of a downcased locale 1822The first element whose locale regexp matches the start of a downcased locale
1828specifies the language name whose charsets corresponds to that locale. 1823specifies the language name whose charsets corresponds to that locale.