(locale-language-names): Modify the

format of elements and add more entries. (locale-preferred-coding-systems): Add more entries. (set-locale-environment): Adjusted for the change of locale-language-names.
author: Kenichi Handa 2005-03-15 02:32:39 +0000
committer: Kenichi Handa 2005-03-15 02:32:39 +0000
commit: 8dedddd58a31ca9468fdbd972de7073f5f88af4f (patch)
tree: 20431384a7aa5b335324083df11744bccc10c3f9
parent: 8a46238114f7eed372f77e20582077d9a0f088c0 (diff)
download: emacs-8dedddd58a31ca9468fdbd972de7073f5f88af4f.tar.gz
emacs-8dedddd58a31ca9468fdbd972de7073f5f88af4f.zip
2 files changed, 137 insertions, 84 deletions
diff --git a/lisp/ChangeLog b/lisp/ChangeLog
index 48a11e45202..c63e730d0a0 100644
--- a/lisp/ChangeLog
+++ b/lisp/ChangeLog
@@ -1,3 +1,11 @@
+2005-03-15  Kenichi Handa  <handa@m17n.org>
+        * international/mule-cmds.el (locale-language-names): Modify the
+        format of elements and add more entries.
+        (locale-preferred-coding-systems): Add more entries.
+        (set-locale-environment): Adjusted for the change of
+        locale-language-names.
 2005-03-14  Stefan Monnier  <monnier@iro.umontreal.ca>
        * pcvs.el (smerge-ediff): Remove bogus autoload.
diff --git a/lisp/international/mule-cmds.el b/lisp/international/mule-cmds.el
index 2106585f8a7..a06c438b421 100644
--- a/lisp/international/mule-cmds.el
+++ b/lisp/international/mule-cmds.el
@@ -2043,55 +2043,60 @@ of `buffer-file-coding-system' set by this function."
     ;; and Chinese are exceptions, which are listed in the
     ;; non-standard section at the bottom of locale-language-names.
-    ; aa Afar
+    ("aa_DJ" . "Latin-1") ; Afar
-    ; ab Abkhazian
+    ("aa" . "UTF-8")
+    ;; ab Abkhazian
    ("af" . "Latin-1") ; Afrikaans
-    ("am" . "Ethiopic") ; Amharic
+    ("am" "Ethiopic" utf-8) ; Amharic
+    ("an" . "Latin-9") ; Aragonese
    ; ar Arabic glibc uses 8859-6
    ; as Assamese
    ; ay Aymara
-    ; az Azerbaijani
+    ("az" . "UTF-8") ; Azerbaijani
    ; ba Bashkir
-    ("be" . "Belarusian") ; Belarusian [Byelorussian until early 1990s]
+    ("be" "Belarusian" cp1251) ; Belarusian [Byelorussian until early 1990s]
-    ("bg" . "Bulgarian") ; Bulgarian
+    ("bg" "Bulgarian" cp1251) ; Bulgarian
    ; bh Bihari
    ; bi Bislama
-    ; bn Bengali, Bangla
+    ("bn" . "UTF-8") ; Bengali, Bangla
    ("bo" . "Tibetan")
    ("br" . "Latin-1") ; Breton
    ("bs" . "Latin-2") ; Bosnian
+    ("byn" . "UTF-8")  ; Bilin; Blin
    ("ca" . "Latin-1") ; Catalan
    ; co Corsican
-    ("cs" . "Czech")
+    ("cs" "Czech" iso-8859-2)
-    ("cy" . "Welsh") ; Welsh [glibc uses Latin-8.  Did this change?]
+    ("cy" "Welsh" iso-8859-14)
    ("da" . "Latin-1") ; Danish
-    ("de" . "German")
+    ("de" "German" iso-8859-1)
    ; dz Bhutani
-    ("el" . "Greek")
+    ("el" "Greek" iso-8859-7)
    ;; Users who specify "en" explicitly typically want Latin-1, not ASCII.
    ;; That's actually what the GNU locales define, modulo things like
    ;; en_IN -- fx.
+    ("en_IN" "English" utf-8) ; glibc uses utf-8 for English in India
    ("en" . "Latin-1") ; English
    ("eo" . "Latin-3") ; Esperanto
-    ("es" . "Spanish")
+    ("es" "Spanish" iso-8859-1)
-    ("et" . "Latin-4") ; Estonian
+    ("et" . "Latin-1") ; Estonian
    ("eu" . "Latin-1") ; Basque
-    ; fa Persian glibc uses utf-8
+    ("fa" . "UTF-8") ; Persian
    ("fi" . "Latin-1") ; Finnish
-    ; fj Fiji
+    ("fj" . "Latin-1") ; Fiji
    ("fo" . "Latin-1") ; Faroese
-    ("fr" . "French") ; French
+    ("fr" "French" iso-8859-1) ; French
    ("fy" . "Latin-1") ; Frisian
    ("ga" . "Latin-1") ; Irish Gaelic (new orthography)
-    ("gd" . "Latin-1") ; Scots Gaelic
+    ("gd" . "Latin-9") ; Scots Gaelic
-    ("gl" . "Latin-1") ; Galician
+    ("gez" "Ethiopic" utf-8) ; Geez
+    ("gl" . "Latin-1") ; Gallegan; Galician
    ; gn Guarani
-    ; gu Gujarati
+    ("gu" . "UTF-8") ; Gujarati
-    ("gv" . "Latin-8") ; Manx Gaelic  glibc uses 8859-1
+    ("gv" . "Latin-1") ; Manx Gaelic
    ; ha Hausa
-    ("he" . "Hebrew")
+    ("he" "Hebrew" iso-8859-8)
-    ("hi" . "Devanagari") ; Hindi  glibc uses utf-8
+    ("hi" "Devanagari" utf-8) ; Hindi
-    ("hr" . "Croatian") ; Croatian
+    ("hr" "Croatian" iso-8859-2) ; Croatian
    ("hu" . "Latin-2") ; Hungarian
    ; hy Armenian
    ; ia Interlingua
@@ -2099,110 +2104,114 @@ of `buffer-file-coding-system' set by this function."
    ; ie Interlingue
    ; ik Inupiak
    ("is" . "Latin-1") ; Icelandic
-    ("it" . "Italian") ; Italian
+    ("it" "Italian" iso-8859-1) ; Italian
    ; iu Inuktitut
-    ("ja" . "Japanese")
+    ("iw" "Hebrew" iso-8859-8)
+    ("ja" "Japanese" euc-jp)
    ; jw Javanese
-    ("ka" . "Georgian") ; Georgian
+    ("ka" "Georgian" georgian-ps) ; Georgian
    ; kk Kazakh
    ("kl" . "Latin-1") ; Greenlandic
    ; km Cambodian
-    ; kn Kannada
+    ("kn" "Kannada" utf-8)
-    ("ko" . "Korean")
+    ("ko" "Korean" euc-kr)
    ; ks Kashmiri
    ; ku Kurdish
    ("kw" . "Latin-1") ; Cornish
    ; ky Kirghiz
    ("la" . "Latin-1") ; Latin
    ("lb" . "Latin-1") ; Luxemburgish
+    ("lg" . "Laint-6") ; Ganda
    ; ln Lingala
-    ("lo" . "Lao") ; Laothian
+    ("lo" "Lao" utf-8) ; Laothian
-    ("lt" . "Lithuanian")
+    ("lt" "Lithuanian" iso-8859-13)
    ("lv" . "Latvian") ; Latvian, Lettish
    ; mg Malagasy
    ("mi" . "Latin-7") ; Maori
-    ("mk" . "Cyrillic-ISO") ; Macedonian
+    ("mk" "Cyrillic-ISO" iso-8859-5) ; Macedonian
-    ; ml Malayalam
+    ("ml" "Malayalam" utf-8)
-    ; mn Mongolian
+    ("mn" . "UTF-8") ; Mongolian
    ; mo Moldavian
-    ("mr" . "Devanagari") ; Marathi  glibc uses utf-8
+    ("mr" "Devanagari" utf-8) ; Marathi
    ("ms" . "Latin-1") ; Malay
    ("mt" . "Latin-3") ; Maltese
    ; my Burmese
    ; na Nauru
-    ("ne" . "Devanagari") ; Nepali
+    ("nb" . "Latin-1") ; Norwegian
-    ("nl" . "Dutch")
+    ("ne" "Devanagari" utf-8) ; Nepali
+    ("nl" "Dutch" iso-8859-1)
    ("no" . "Latin-1") ; Norwegian
    ("oc" . "Latin-1") ; Occitan
-    ; om (Afan) Oromo
+    ("om_ET" . "UTF-8") ; (Afan) Oromo
+    ("om" . "Latin-1") ; (Afan) Oromo
    ; or Oriya
-    ; pa Punjabi
+    ("pa" . "UTF-8") ; Punjabi
    ("pl" . "Latin-2") ; Polish
    ; ps Pashto, Pushto
    ("pt" . "Latin-1") ; Portuguese
    ; qu Quechua
    ("rm" . "Latin-1") ; Rhaeto-Romanic
    ; rn Kirundi
-    ("ro" . "Romanian")
+    ("ro" "Romanian" iso-8859-2)
-    ("ru.*[_.]koi8" . "Russian")
+    ("ru_RU" "Russian" iso-8859-5)
-    ("ru" . "Cyrillic-ISO") ; Russian
+    ("ru_UA" "Russian" koi8-u)
    ; rw Kinyarwanda
    ("sa" . "Devanagari") ; Sanskrit
    ; sd Sindhi
-    ; se   Northern Sami
+    ("se" . "UTF-8") ; Northern Sami
    ; sg Sangho
    ("sh" . "Latin-2") ; Serbo-Croatian
    ; si Sinhalese
-    ("sk" . "Slovak")
+    ("sid" . "UTF-8") ; Sidamo
-    ("sl" . "Slovenian")
+    ("sk" "Slovak" iso-8859-2)
+    ("sl" "Slovenian" iso-8859-2)
    ; sm Samoan
    ; sn Shona
-    ; so Somali
+    ("so_ET" "UTF-8") ; Somali
+    ("so" "Latin-1") ; Somali
    ("sq" . "Latin-1") ; Albanian
+    ("sr_YU@cyrillic" . "Cyrillic-ISO") ; Serbian (Cyrillic alphabet)
    ("sr" . "Latin-2") ; Serbian (Latin alphabet)
-    ("sr_YU@cyrillic" . "Cyrillic-ISO") ; per glibc
    ; ss Siswati
-    ; st Sesotho
+    ("st" . "Latin-1") ;  Sesotho
    ; su Sundanese
-    ("sv" . "Swedish") ; Swedish
+    ("sv" "Swedish" iso-8859-1)         ; Swedish
    ("sw" . "Latin-1") ; Swahili
-    ; ta Tamil  glibc uses utf-8
+    ("ta" "Tamil" utf-8)
-    ; te Telugu  glibc uses utf-8
+    ("te" . "UTF-8") ; Telugu
-    ("tg" . "Tajik")
+    ("tg" "Tajik" koi8-t)
-    ("th" . "Thai")
+    ("th" "Thai" tis-620)
-    ; ti Tigrinya
+    ("ti" "Ethiopic" utf-8) ; Tigrinya
+    ("tig_ER" . "UTF-8") ; Tigre
    ; tk Turkmen
    ("tl" . "Latin-1") ; Tagalog
    ; tn Setswana
    ; to Tonga
-    ("tr" . "Turkish")
+    ("tr" "Turkish" iso-8859-9)
    ; ts Tsonga
-    ; tt Tatar
+    ("tt" . "UTF-8") ; Tatar
    ; tw Twi
    ; ug Uighur
-    ("uk" . "Ukrainian") ; Ukrainian
+    ("uk" "Ukrainian" koi8-u)
-    ; ur Urdu  glibc uses utf-8
+    ("ur" . "UTF-8") ; Urdu
+    ("uz_UZ@cyrillic" . "UTF-8"); Uzbek
    ("uz" . "Latin-1") ; Uzbek
-    ("vi" . "Vietnamese") ;  glibc uses utf-8
+    ("vi" "Vietnamese" utf-8)
    ; vo Volapuk
    ("wa" . "Latin-1") ; Walloon
    ; wo Wolof
-    ; xh Xhosa
+    ("xh" . "Latin-1") ; Xhosa
    ("yi" . "Windows-1255") ; Yiddish
    ; yo Yoruba
    ; za Zhuang
+    ("zh_HK" . "Chinese-Big5")
-    ; glibc:
+    ("zh_TW" . "Chinese-Big5")
+    ("zh_CN" . "Chinese-GB")
+    ("zh" . "Chinese-GB")
    ; zh_CN.GB18030/GB18030 \
    ; zh_CN.GBK/GBK \
    ; zh_HK/BIG5-HKSCS \
+    ("zu" . "Latin-1") ; Zulu
-    ("zh.*[._]big5" . "Chinese-BIG5")
-    ("zh.*[._]gbk" . nil) ; Solaris 2.7; has gbk-0 as well as GB 2312.1980-0
-    ("zh_tw" . "Chinese-CNS") ; glibc uses big5
-    ("zh_tw[._]euc-tw" . "Chinese-EUC-TW")
-    ("zh" . "Chinese-GB")
-    ; zu Zulu
    ;; ISO standard locales
    ("c$" . "ASCII")
@@ -2222,10 +2231,16 @@ of `buffer-file-coding-system' set by this function."
    ("chs" . "Chinese-GB") ; MS Windows Chinese Simplified
    ("cht" . "Chinese-BIG5") ; MS Windows Chinese Traditional
    ))
-  "List of pairs of locale regexps and language names.
+  "Alist of locale regexps vs the corresponding languages and coding systems.
-The first element whose locale regexp matches the start of a downcased locale
+Each element has these form:
-specifies the language name corresponding to that locale.
+  \(LOCALE-REGEXP LANG-ENV CODING-SYSTEM)
-If the language name is nil, there is no corresponding language environment.")
+The first element whose LOCALE-REGEXP matches the start of a
+downcased locale specifies the LANG-ENV \(language environtment)
+and CODING-SYSTEM corresponding to that locale.  If there is no
+appropriate language environment, the element may have this form:
+  \(LOCALE-REGEXP . LANG-ENV)
+In this case, LANG-ENV is one of generic language environments for an
+specific encoding such as \"Latin-1\" and \"UTF-8\".")
 (defconst locale-charset-language-names
  (purecopy
@@ -2243,20 +2258,43 @@ If the language name is nil, there is no corresponding language environment.")
  "List of pairs of locale regexps and charset language names.
 The first element whose locale regexp matches the start of a downcased locale
 specifies the language name whose charset corresponds to that locale.
-This language name is used if its charsets disagree with the charsets of
+This language name is used if the locale is not listed in
-the language name that would otherwise be used for this locale.")
+`locale-language-names'")
 (defconst locale-preferred-coding-systems
  (purecopy
-   '(("ja.*[._]euc" . japanese-iso-8bit)
+   '((".*8859[-_]?1\\>" . iso-8859-1)
+     (".*8859[-_]?2\\>" . iso-8859-2)
+     (".*8859[-_]?3\\>" . iso-8859-3)
+     (".*8859[-_]?4\\>" . iso-8859-4)
+     (".*8859[-_]?9\\>" . iso-8859-9)
+     (".*8859[-_]?14\\>" . iso-8859-14)
+     (".*8859[-_]?15\\>" . iso-8859-15)
+     (".*utf\\(?:-?8\\)?" . utf-8)
+     ;; utf-8@euro exists, so put this after utf-8.  (@euro really
+     ;; specifies the currency, rather than the charset.)
+     (".*@euro" . iso-8859-15)
+     ("koi8-?r" . koi8-r)
+     ("koi8-?u" . koi8-u)
+     ("tcvn" . tcvn)
+     ("big5" . big5)
+     ("euc-?tw" . euc-tw)
+     ;; We don't support GBK, but as it is upper compatible with
+     ;; GB-2312, we setup the default coding system to gb2312.
+     ("gbk" . gb2312)
+     ;; We don't support BIG5-HKSCS, but as it is upper compatible with
+     ;; BIG5, we setup the default coding system to big5.
+     ("big5hkscs" . big5)
+     ("ja.*[._]euc" . japanese-iso-8bit)
     ("ja.*[._]jis7" . iso-2022-jp)
     ("ja.*[._]pck" . japanese-shift-jis)
     ("ja.*[._]sjis" . japanese-shift-jis)
     ("jpn" . japanese-shift-jis)   ; MS-Windows uses this.
-     (".*[._]utf" . utf-8)))
+     ))
  "List of pairs of locale regexps and preferred coding systems.
 The first element whose locale regexp matches the start of a downcased locale
-specifies the coding system to prefer when using that locale.")
+specifies the coding system to prefer when using that locale.
+This coding system is used if the locale specifies a specific charset.")
 (defun locale-name-match (key alist)
  "Search for KEY in ALIST, which should be a list of regexp-value pairs.
@@ -2386,12 +2424,17 @@ See also `locale-charset-language-names', `locale-language-names',
                       (locale-charset-to-coding-system
                        (match-string 1 locale)))))))
-        ;; Give preference to charset-language-name over language-name.
+        (if (consp language-name)
-        (if (and charset-language-name
+            ;; locale-language-names specify both lang-env and coding.
-                 (not
+            ;; But, what specified in locale-preferred-coding-systems
-                  (equal (get-language-info language-name 'charset)
+            ;; has higher priority.
-                         (get-language-info charset-language-name 'charset))))
+            (setq coding-system (or coding-system
-            (setq language-name charset-language-name))
+                                    (nth 1 language-name))
+                  language-name (car language-name))
+          ;; Otherwise, if locale is not listed in locale-language-names,
+          ;; use what listed in locale-charset-language-names.
+          (if (not language-name)
+              (setq language-name charset-language-name)))
        (when language-name
@@ -2417,7 +2460,9 @@ See also `locale-charset-language-names', `locale-language-names',
          (setq locale-coding-system
                (car (get-language-info language-name 'coding-priority))))
-        (when coding-system
+        (when (and coding-system
+                   (not (coding-system-equal coding-system
+                                             locale-coding-system)))
          (prefer-coding-system coding-system)
          (setq locale-coding-system coding-system))))
author	Kenichi Handa	2005-03-15 02:32:39 +0000
committer	Kenichi Handa	2005-03-15 02:32:39 +0000
commit	8dedddd58a31ca9468fdbd972de7073f5f88af4f (patch)
tree	20431384a7aa5b335324083df11744bccc10c3f9
parent	8a46238114f7eed372f77e20582077d9a0f088c0 (diff)
download	emacs-8dedddd58a31ca9468fdbd972de7073f5f88af4f.tar.gz emacs-8dedddd58a31ca9468fdbd972de7073f5f88af4f.zip

diff --git a/lisp/ChangeLog b/lisp/ChangeLog index 48a11e45202..c63e730d0a0 100644 --- a/lisp/ChangeLog +++ b/lisp/ChangeLog
@@ -1,3 +1,11 @@
		1	2005-03-15 Kenichi Handa <handa@m17n.org>
		2
		3	* international/mule-cmds.el (locale-language-names): Modify the
		4	format of elements and add more entries.
		5	(locale-preferred-coding-systems): Add more entries.
		6	(set-locale-environment): Adjusted for the change of
		7	locale-language-names.
		8
1	2005-03-14 Stefan Monnier <monnier@iro.umontreal.ca>	9	2005-03-14 Stefan Monnier <monnier@iro.umontreal.ca>
2		10
3	* pcvs.el (smerge-ediff): Remove bogus autoload.	11	* pcvs.el (smerge-ediff): Remove bogus autoload.


diff --git a/lisp/international/mule-cmds.el b/lisp/international/mule-cmds.el index 2106585f8a7..a06c438b421 100644 --- a/lisp/international/mule-cmds.el +++ b/lisp/international/mule-cmds.el
@@ -2043,55 +2043,60 @@ of `buffer-file-coding-system' set by this function."
2043	;; and Chinese are exceptions, which are listed in the	2043	;; and Chinese are exceptions, which are listed in the
2044	;; non-standard section at the bottom of locale-language-names.	2044	;; non-standard section at the bottom of locale-language-names.
2045		2045
2046	; aa Afar	2046	("aa_DJ" . "Latin-1") ; Afar
2047	; ab Abkhazian	2047	("aa" . "UTF-8")
		2048	;; ab Abkhazian
2048	("af" . "Latin-1") ; Afrikaans	2049	("af" . "Latin-1") ; Afrikaans
2049	("am" . "Ethiopic") ; Amharic	2050	("am" "Ethiopic" utf-8) ; Amharic
		2051	("an" . "Latin-9") ; Aragonese
2050	; ar Arabic glibc uses 8859-6	2052	; ar Arabic glibc uses 8859-6
2051	; as Assamese	2053	; as Assamese
2052	; ay Aymara	2054	; ay Aymara
2053	; az Azerbaijani	2055	("az" . "UTF-8") ; Azerbaijani
2054	; ba Bashkir	2056	; ba Bashkir
2055	("be" . "Belarusian") ; Belarusian [Byelorussian until early 1990s]	2057	("be" "Belarusian" cp1251) ; Belarusian [Byelorussian until early 1990s]
2056	("bg" . "Bulgarian") ; Bulgarian	2058	("bg" "Bulgarian" cp1251) ; Bulgarian
2057	; bh Bihari	2059	; bh Bihari
2058	; bi Bislama	2060	; bi Bislama
2059	; bn Bengali, Bangla	2061	("bn" . "UTF-8") ; Bengali, Bangla
2060	("bo" . "Tibetan")	2062	("bo" . "Tibetan")
2061	("br" . "Latin-1") ; Breton	2063	("br" . "Latin-1") ; Breton
2062	("bs" . "Latin-2") ; Bosnian	2064	("bs" . "Latin-2") ; Bosnian
		2065	("byn" . "UTF-8") ; Bilin; Blin
2063	("ca" . "Latin-1") ; Catalan	2066	("ca" . "Latin-1") ; Catalan
2064	; co Corsican	2067	; co Corsican
2065	("cs" . "Czech")	2068	("cs" "Czech" iso-8859-2)
2066	("cy" . "Welsh") ; Welsh [glibc uses Latin-8. Did this change?]	2069	("cy" "Welsh" iso-8859-14)
2067	("da" . "Latin-1") ; Danish	2070	("da" . "Latin-1") ; Danish
2068	("de" . "German")	2071	("de" "German" iso-8859-1)
2069	; dz Bhutani	2072	; dz Bhutani
2070	("el" . "Greek")	2073	("el" "Greek" iso-8859-7)
2071	;; Users who specify "en" explicitly typically want Latin-1, not ASCII.	2074	;; Users who specify "en" explicitly typically want Latin-1, not ASCII.
2072	;; That's actually what the GNU locales define, modulo things like	2075	;; That's actually what the GNU locales define, modulo things like
2073	;; en_IN -- fx.	2076	;; en_IN -- fx.
		2077	("en_IN" "English" utf-8) ; glibc uses utf-8 for English in India
2074	("en" . "Latin-1") ; English	2078	("en" . "Latin-1") ; English
2075	("eo" . "Latin-3") ; Esperanto	2079	("eo" . "Latin-3") ; Esperanto
2076	("es" . "Spanish")	2080	("es" "Spanish" iso-8859-1)
2077	("et" . "Latin-4") ; Estonian	2081	("et" . "Latin-1") ; Estonian
2078	("eu" . "Latin-1") ; Basque	2082	("eu" . "Latin-1") ; Basque
2079	; fa Persian glibc uses utf-8	2083	("fa" . "UTF-8") ; Persian
2080	("fi" . "Latin-1") ; Finnish	2084	("fi" . "Latin-1") ; Finnish
2081	; fj Fiji	2085	("fj" . "Latin-1") ; Fiji
2082	("fo" . "Latin-1") ; Faroese	2086	("fo" . "Latin-1") ; Faroese
2083	("fr" . "French") ; French	2087	("fr" "French" iso-8859-1) ; French
2084	("fy" . "Latin-1") ; Frisian	2088	("fy" . "Latin-1") ; Frisian
2085	("ga" . "Latin-1") ; Irish Gaelic (new orthography)	2089	("ga" . "Latin-1") ; Irish Gaelic (new orthography)
2086	("gd" . "Latin-1") ; Scots Gaelic	2090	("gd" . "Latin-9") ; Scots Gaelic
2087	("gl" . "Latin-1") ; Galician	2091	("gez" "Ethiopic" utf-8) ; Geez
		2092	("gl" . "Latin-1") ; Gallegan; Galician
2088	; gn Guarani	2093	; gn Guarani
2089	; gu Gujarati	2094	("gu" . "UTF-8") ; Gujarati
2090	("gv" . "Latin-8") ; Manx Gaelic glibc uses 8859-1	2095	("gv" . "Latin-1") ; Manx Gaelic
2091	; ha Hausa	2096	; ha Hausa
2092	("he" . "Hebrew")	2097	("he" "Hebrew" iso-8859-8)
2093	("hi" . "Devanagari") ; Hindi glibc uses utf-8	2098	("hi" "Devanagari" utf-8) ; Hindi
2094	("hr" . "Croatian") ; Croatian	2099	("hr" "Croatian" iso-8859-2) ; Croatian
2095	("hu" . "Latin-2") ; Hungarian	2100	("hu" . "Latin-2") ; Hungarian
2096	; hy Armenian	2101	; hy Armenian
2097	; ia Interlingua	2102	; ia Interlingua
@@ -2099,110 +2104,114 @@ of `buffer-file-coding-system' set by this function."
2099	; ie Interlingue	2104	; ie Interlingue
2100	; ik Inupiak	2105	; ik Inupiak
2101	("is" . "Latin-1") ; Icelandic	2106	("is" . "Latin-1") ; Icelandic
2102	("it" . "Italian") ; Italian	2107	("it" "Italian" iso-8859-1) ; Italian
2103	; iu Inuktitut	2108	; iu Inuktitut
2104	("ja" . "Japanese")	2109	("iw" "Hebrew" iso-8859-8)
		2110	("ja" "Japanese" euc-jp)
2105	; jw Javanese	2111	; jw Javanese
2106	("ka" . "Georgian") ; Georgian	2112	("ka" "Georgian" georgian-ps) ; Georgian
2107	; kk Kazakh	2113	; kk Kazakh
2108	("kl" . "Latin-1") ; Greenlandic	2114	("kl" . "Latin-1") ; Greenlandic
2109	; km Cambodian	2115	; km Cambodian
2110	; kn Kannada	2116	("kn" "Kannada" utf-8)
2111	("ko" . "Korean")	2117	("ko" "Korean" euc-kr)
2112	; ks Kashmiri	2118	; ks Kashmiri
2113	; ku Kurdish	2119	; ku Kurdish
2114	("kw" . "Latin-1") ; Cornish	2120	("kw" . "Latin-1") ; Cornish
2115	; ky Kirghiz	2121	; ky Kirghiz
2116	("la" . "Latin-1") ; Latin	2122	("la" . "Latin-1") ; Latin
2117	("lb" . "Latin-1") ; Luxemburgish	2123	("lb" . "Latin-1") ; Luxemburgish
		2124	("lg" . "Laint-6") ; Ganda
2118	; ln Lingala	2125	; ln Lingala
2119	("lo" . "Lao") ; Laothian	2126	("lo" "Lao" utf-8) ; Laothian
2120	("lt" . "Lithuanian")	2127	("lt" "Lithuanian" iso-8859-13)
2121	("lv" . "Latvian") ; Latvian, Lettish	2128	("lv" . "Latvian") ; Latvian, Lettish
2122	; mg Malagasy	2129	; mg Malagasy
2123	("mi" . "Latin-7") ; Maori	2130	("mi" . "Latin-7") ; Maori
2124	("mk" . "Cyrillic-ISO") ; Macedonian	2131	("mk" "Cyrillic-ISO" iso-8859-5) ; Macedonian
2125	; ml Malayalam	2132	("ml" "Malayalam" utf-8)
2126	; mn Mongolian	2133	("mn" . "UTF-8") ; Mongolian
2127	; mo Moldavian	2134	; mo Moldavian
2128	("mr" . "Devanagari") ; Marathi glibc uses utf-8	2135	("mr" "Devanagari" utf-8) ; Marathi
2129	("ms" . "Latin-1") ; Malay	2136	("ms" . "Latin-1") ; Malay
2130	("mt" . "Latin-3") ; Maltese	2137	("mt" . "Latin-3") ; Maltese
2131	; my Burmese	2138	; my Burmese
2132	; na Nauru	2139	; na Nauru
2133	("ne" . "Devanagari") ; Nepali	2140	("nb" . "Latin-1") ; Norwegian
2134	("nl" . "Dutch")	2141	("ne" "Devanagari" utf-8) ; Nepali
		2142	("nl" "Dutch" iso-8859-1)
2135	("no" . "Latin-1") ; Norwegian	2143	("no" . "Latin-1") ; Norwegian
2136	("oc" . "Latin-1") ; Occitan	2144	("oc" . "Latin-1") ; Occitan
2137	; om (Afan) Oromo	2145	("om_ET" . "UTF-8") ; (Afan) Oromo
		2146	("om" . "Latin-1") ; (Afan) Oromo
2138	; or Oriya	2147	; or Oriya
2139	; pa Punjabi	2148	("pa" . "UTF-8") ; Punjabi
2140	("pl" . "Latin-2") ; Polish	2149	("pl" . "Latin-2") ; Polish
2141	; ps Pashto, Pushto	2150	; ps Pashto, Pushto
2142	("pt" . "Latin-1") ; Portuguese	2151	("pt" . "Latin-1") ; Portuguese
2143	; qu Quechua	2152	; qu Quechua
2144	("rm" . "Latin-1") ; Rhaeto-Romanic	2153	("rm" . "Latin-1") ; Rhaeto-Romanic
2145	; rn Kirundi	2154	; rn Kirundi
2146	("ro" . "Romanian")	2155	("ro" "Romanian" iso-8859-2)
2147	("ru.*[_.]koi8" . "Russian")	2156	("ru_RU" "Russian" iso-8859-5)
2148	("ru" . "Cyrillic-ISO") ; Russian	2157	("ru_UA" "Russian" koi8-u)
2149	; rw Kinyarwanda	2158	; rw Kinyarwanda
2150	("sa" . "Devanagari") ; Sanskrit	2159	("sa" . "Devanagari") ; Sanskrit
2151	; sd Sindhi	2160	; sd Sindhi
2152	; se Northern Sami	2161	("se" . "UTF-8") ; Northern Sami
2153	; sg Sangho	2162	; sg Sangho
2154	("sh" . "Latin-2") ; Serbo-Croatian	2163	("sh" . "Latin-2") ; Serbo-Croatian
2155	; si Sinhalese	2164	; si Sinhalese
2156	("sk" . "Slovak")	2165	("sid" . "UTF-8") ; Sidamo
2157	("sl" . "Slovenian")	2166	("sk" "Slovak" iso-8859-2)
		2167	("sl" "Slovenian" iso-8859-2)
2158	; sm Samoan	2168	; sm Samoan
2159	; sn Shona	2169	; sn Shona
2160	; so Somali	2170	("so_ET" "UTF-8") ; Somali
		2171	("so" "Latin-1") ; Somali
2161	("sq" . "Latin-1") ; Albanian	2172	("sq" . "Latin-1") ; Albanian
		2173	("sr_YU@cyrillic" . "Cyrillic-ISO") ; Serbian (Cyrillic alphabet)
2162	("sr" . "Latin-2") ; Serbian (Latin alphabet)	2174	("sr" . "Latin-2") ; Serbian (Latin alphabet)
2163	("sr_YU@cyrillic" . "Cyrillic-ISO") ; per glibc
2164	; ss Siswati	2175	; ss Siswati
2165	; st Sesotho	2176	("st" . "Latin-1") ; Sesotho
2166	; su Sundanese	2177	; su Sundanese
2167	("sv" . "Swedish") ; Swedish	2178	("sv" "Swedish" iso-8859-1) ; Swedish
2168	("sw" . "Latin-1") ; Swahili	2179	("sw" . "Latin-1") ; Swahili
2169	; ta Tamil glibc uses utf-8	2180	("ta" "Tamil" utf-8)
2170	; te Telugu glibc uses utf-8	2181	("te" . "UTF-8") ; Telugu
2171	("tg" . "Tajik")	2182	("tg" "Tajik" koi8-t)
2172	("th" . "Thai")	2183	("th" "Thai" tis-620)
2173	; ti Tigrinya	2184	("ti" "Ethiopic" utf-8) ; Tigrinya
		2185	("tig_ER" . "UTF-8") ; Tigre
2174	; tk Turkmen	2186	; tk Turkmen
2175	("tl" . "Latin-1") ; Tagalog	2187	("tl" . "Latin-1") ; Tagalog
2176	; tn Setswana	2188	; tn Setswana
2177	; to Tonga	2189	; to Tonga
2178	("tr" . "Turkish")	2190	("tr" "Turkish" iso-8859-9)
2179	; ts Tsonga	2191	; ts Tsonga
2180	; tt Tatar	2192	("tt" . "UTF-8") ; Tatar
2181	; tw Twi	2193	; tw Twi
2182	; ug Uighur	2194	; ug Uighur
2183	("uk" . "Ukrainian") ; Ukrainian	2195	("uk" "Ukrainian" koi8-u)
2184	; ur Urdu glibc uses utf-8	2196	("ur" . "UTF-8") ; Urdu
		2197	("uz_UZ@cyrillic" . "UTF-8"); Uzbek
2185	("uz" . "Latin-1") ; Uzbek	2198	("uz" . "Latin-1") ; Uzbek
2186	("vi" . "Vietnamese") ; glibc uses utf-8	2199	("vi" "Vietnamese" utf-8)
2187	; vo Volapuk	2200	; vo Volapuk
2188	("wa" . "Latin-1") ; Walloon	2201	("wa" . "Latin-1") ; Walloon
2189	; wo Wolof	2202	; wo Wolof
2190	; xh Xhosa	2203	("xh" . "Latin-1") ; Xhosa
2191	("yi" . "Windows-1255") ; Yiddish	2204	("yi" . "Windows-1255") ; Yiddish
2192	; yo Yoruba	2205	; yo Yoruba
2193	; za Zhuang	2206	; za Zhuang
2194		2207	("zh_HK" . "Chinese-Big5")
2195	; glibc:	2208	("zh_TW" . "Chinese-Big5")
		2209	("zh_CN" . "Chinese-GB")
		2210	("zh" . "Chinese-GB")
2196	; zh_CN.GB18030/GB18030 \	2211	; zh_CN.GB18030/GB18030 \
2197	; zh_CN.GBK/GBK \	2212	; zh_CN.GBK/GBK \
2198	; zh_HK/BIG5-HKSCS \	2213	; zh_HK/BIG5-HKSCS \
2199		2214	("zu" . "Latin-1") ; Zulu
2200	("zh.*[._]big5" . "Chinese-BIG5")
2201	("zh.*[._]gbk" . nil) ; Solaris 2.7; has gbk-0 as well as GB 2312.1980-0
2202	("zh_tw" . "Chinese-CNS") ; glibc uses big5
2203	("zh_tw[._]euc-tw" . "Chinese-EUC-TW")
2204	("zh" . "Chinese-GB")
2205	; zu Zulu
2206		2215
2207	;; ISO standard locales	2216	;; ISO standard locales
2208	("c$" . "ASCII")	2217	("c$" . "ASCII")
@@ -2222,10 +2231,16 @@ of `buffer-file-coding-system' set by this function."
2222	("chs" . "Chinese-GB") ; MS Windows Chinese Simplified	2231	("chs" . "Chinese-GB") ; MS Windows Chinese Simplified
2223	("cht" . "Chinese-BIG5") ; MS Windows Chinese Traditional	2232	("cht" . "Chinese-BIG5") ; MS Windows Chinese Traditional
2224	))	2233	))
2225	"List of pairs of locale regexps and language names.	2234	"Alist of locale regexps vs the corresponding languages and coding systems.
2226	The first element whose locale regexp matches the start of a downcased locale	2235	Each element has these form:
2227	specifies the language name corresponding to that locale.	2236	\(LOCALE-REGEXP LANG-ENV CODING-SYSTEM)
2228	If the language name is nil, there is no corresponding language environment.")	2237	The first element whose LOCALE-REGEXP matches the start of a
		2238	downcased locale specifies the LANG-ENV \(language environtment)
		2239	and CODING-SYSTEM corresponding to that locale. If there is no
		2240	appropriate language environment, the element may have this form:
		2241	\(LOCALE-REGEXP . LANG-ENV)
		2242	In this case, LANG-ENV is one of generic language environments for an
		2243	specific encoding such as \"Latin-1\" and \"UTF-8\".")
2229		2244
2230	(defconst locale-charset-language-names	2245	(defconst locale-charset-language-names
2231	(purecopy	2246	(purecopy
@@ -2243,20 +2258,43 @@ If the language name is nil, there is no corresponding language environment.")
2243	"List of pairs of locale regexps and charset language names.	2258	"List of pairs of locale regexps and charset language names.
2244	The first element whose locale regexp matches the start of a downcased locale	2259	The first element whose locale regexp matches the start of a downcased locale
2245	specifies the language name whose charset corresponds to that locale.	2260	specifies the language name whose charset corresponds to that locale.
2246	This language name is used if its charsets disagree with the charsets of	2261	This language name is used if the locale is not listed in
2247	the language name that would otherwise be used for this locale.")	2262	`locale-language-names'")
2248		2263
2249	(defconst locale-preferred-coding-systems	2264	(defconst locale-preferred-coding-systems
2250	(purecopy	2265	(purecopy
2251	'(("ja.*[._]euc" . japanese-iso-8bit)	2266	'((".*8859[-_]?1\\>" . iso-8859-1)
		2267	(".*8859[-_]?2\\>" . iso-8859-2)
		2268	(".*8859[-_]?3\\>" . iso-8859-3)
		2269	(".*8859[-_]?4\\>" . iso-8859-4)
		2270	(".*8859[-_]?9\\>" . iso-8859-9)
		2271	(".*8859[-_]?14\\>" . iso-8859-14)
		2272	(".*8859[-_]?15\\>" . iso-8859-15)
		2273	(".*utf\\(?:-?8\\)?" . utf-8)
		2274	;; utf-8@euro exists, so put this after utf-8. (@euro really
		2275	;; specifies the currency, rather than the charset.)
		2276	(".*@euro" . iso-8859-15)
		2277	("koi8-?r" . koi8-r)
		2278	("koi8-?u" . koi8-u)
		2279	("tcvn" . tcvn)
		2280	("big5" . big5)
		2281	("euc-?tw" . euc-tw)
		2282	;; We don't support GBK, but as it is upper compatible with
		2283	;; GB-2312, we setup the default coding system to gb2312.
		2284	("gbk" . gb2312)
		2285	;; We don't support BIG5-HKSCS, but as it is upper compatible with
		2286	;; BIG5, we setup the default coding system to big5.
		2287	("big5hkscs" . big5)
		2288	("ja.*[._]euc" . japanese-iso-8bit)
2252	("ja.*[._]jis7" . iso-2022-jp)	2289	("ja.*[._]jis7" . iso-2022-jp)
2253	("ja.*[._]pck" . japanese-shift-jis)	2290	("ja.*[._]pck" . japanese-shift-jis)
2254	("ja.*[._]sjis" . japanese-shift-jis)	2291	("ja.*[._]sjis" . japanese-shift-jis)
2255	("jpn" . japanese-shift-jis) ; MS-Windows uses this.	2292	("jpn" . japanese-shift-jis) ; MS-Windows uses this.
2256	(".*[._]utf" . utf-8)))	2293	))
2257	"List of pairs of locale regexps and preferred coding systems.	2294	"List of pairs of locale regexps and preferred coding systems.
2258	The first element whose locale regexp matches the start of a downcased locale	2295	The first element whose locale regexp matches the start of a downcased locale
2259	specifies the coding system to prefer when using that locale.")	2296	specifies the coding system to prefer when using that locale.
		2297	This coding system is used if the locale specifies a specific charset.")
2260		2298
2261	(defun locale-name-match (key alist)	2299	(defun locale-name-match (key alist)
2262	"Search for KEY in ALIST, which should be a list of regexp-value pairs.	2300	"Search for KEY in ALIST, which should be a list of regexp-value pairs.
@@ -2386,12 +2424,17 @@ See also `locale-charset-language-names', `locale-language-names',
2386	(locale-charset-to-coding-system	2424	(locale-charset-to-coding-system
2387	(match-string 1 locale)))))))	2425	(match-string 1 locale)))))))
2388		2426
2389	;; Give preference to charset-language-name over language-name.	2427	(if (consp language-name)
2390	(if (and charset-language-name	2428	;; locale-language-names specify both lang-env and coding.
2391	(not	2429	;; But, what specified in locale-preferred-coding-systems
2392	(equal (get-language-info language-name 'charset)	2430	;; has higher priority.
2393	(get-language-info charset-language-name 'charset))))	2431	(setq coding-system (or coding-system
2394	(setq language-name charset-language-name))	2432	(nth 1 language-name))
		2433	language-name (car language-name))
		2434	;; Otherwise, if locale is not listed in locale-language-names,
		2435	;; use what listed in locale-charset-language-names.
		2436	(if (not language-name)
		2437	(setq language-name charset-language-name)))
2395		2438
2396	(when language-name	2439	(when language-name
2397		2440
@@ -2417,7 +2460,9 @@ See also `locale-charset-language-names', `locale-language-names',
2417	(setq locale-coding-system	2460	(setq locale-coding-system
2418	(car (get-language-info language-name 'coding-priority))))	2461	(car (get-language-info language-name 'coding-priority))))
2419		2462
2420	(when coding-system	2463	(when (and coding-system
		2464	(not (coding-system-equal coding-system
		2465	locale-coding-system)))
2421	(prefer-coding-system coding-system)	2466	(prefer-coding-system coding-system)
2422	(setq locale-coding-system coding-system))))	2467	(setq locale-coding-system coding-system))))
2423		2468