Improve font search and handling on MS-Windows

* src/w32font.c: Add commentary about font search on MS-Windows. (w32font_coverage_ok, add_font_entity_to_list) (font_supported_scripts): Consider the coverage OK if a font has only the SIP bit set, but also sets relevant codepage bits in the CSB bits. (font_supported_scripts): Fix script for USB bit 99. * src/font.c (font_parse_fcname, font_parse_family_registry) [HAVE_NTGUI]: Don't consider hyphenated suffixes of some Windows fonts as not belonging to the family name. * src/w32uniscribe.c (uniscribe_check_otf_1): Increase tags[] array size, to avoid the E_OUTOFMEMORY error for some fonts. * lisp/international/fontset.el (font-encoding-alist): Add 'unicode-sip'.
author: Eli Zaretskii 2024-08-03 18:11:57 +0300
committer: Eli Zaretskii 2024-08-03 18:11:57 +0300
commit: ff6954b9c833bfeb8032fb772fa08e60e9ec56a8 (patch)
tree: 95c5ac2d3d85e13e34ef5af47f8e7beba1bb1504
parent: ef8276d4247fdf0b1ee19a2c70328710490dd2d6 (diff)
download: emacs-ff6954b9c833bfeb8032fb772fa08e60e9ec56a8.tar.gz
emacs-ff6954b9c833bfeb8032fb772fa08e60e9ec56a8.zip
4 files changed, 160 insertions, 18 deletions
diff --git a/lisp/international/fontset.el b/lisp/international/fontset.el
index 695c313cb26..c9b60418b22 100644
--- a/lisp/international/fontset.el
+++ b/lisp/international/fontset.el
@@ -88,6 +88,7 @@
        ("iso10646-1$" . (unicode-bmp . nil))
        ("iso10646.indian-1" . (unicode-bmp . nil))
        ("unicode-bmp" . (unicode-bmp . nil))
+        ("unicode-sip" . (unicode-sip . nil)) ; used by w32font.c
        ("abobe-symbol" . symbol)
        ("sisheng_cwnn" . chinese-sisheng)
        ("mulearabic-0" . arabic-digit)
diff --git a/src/font.c b/src/font.c
index 246fe1c4426..112618a7307 100644
--- a/src/font.c
+++ b/src/font.c
@@ -1627,15 +1627,30 @@ font_parse_fcname (char *name, ptrdiff_t len, Lisp_Object font)
        {
          bool decimal = 0, size_found = 1;
          for (q = p + 1; *q && *q != ':'; q++)
-            if (! c_isdigit (*q))
+            {
-              {
+#ifdef HAVE_NTGUI
-                if (*q != '.' || decimal)
+              /* MS-Windows has several CJK fonts whose name ends in
-                  {
+                 "-ExtB".  It also has fonts whose names end in "-R" or
-                    size_found = 0;
+                 "-B", and one font whose name ends in "-SB".  */
-                    break;
+              if (q == p + 1 && (strncmp (q, "ExtB", 4) == 0
-                  }
+                                 || strncmp (q, "R", 1) == 0
-                decimal = 1;
+                                 || strncmp (q, "B", 1) == 0
-              }
+                                 || strncmp (q, "SB", 2) == 0))
+                {
+                  size_found = 0;
+                  break;
+                }
+#endif
+              if (! c_isdigit (*q))
+                {
+                  if (*q != '.' || decimal)
+                    {
+                      size_found = 0;
+                      break;
+                    }
+                  decimal = 1;
+                }
+            }
          if (size_found)
            {
              family_end = p;
@@ -2000,6 +2015,15 @@ font_parse_family_registry (Lisp_Object family, Lisp_Object registry, Lisp_Objec
      len = SBYTES (family);
      p0 = SSDATA (family);
      p1 = strchr (p0, '-');
+#ifdef HAVE_NTGUI
+      /* MS-Windows has fonts whose family name ends in "-ExtB" and
+         other suffixes which include a hyphen.  */
+      if (p1 && (strcmp (p1, "-ExtB") == 0
+                 || strcmp (p1, "-R") == 0
+                 || strcmp (p1, "-B") == 0
+                 || strcmp (p1, "-SB") == 0))
+        p1 = NULL;
+#endif
      if (p1)
        {
          if ((*p0 != '*' && p1 - p0 > 0)
diff --git a/src/w32font.c b/src/w32font.c
index ccbd3837afb..efb42d80336 100644
--- a/src/w32font.c
+++ b/src/w32font.c
@@ -809,6 +809,93 @@ w32font_otf_drive (struct font *font, Lisp_Object features,
                   bool alternate_subst);
  */
+/* Notes about the way fonts are found on MS-Windows when we have a
+   character unsupported by the default font.
+   Since we don't use Fontconfig on MS-Windows, we cannot efficiently
+   search for fonts which support certain characters, because Windows
+   doesn't store this information anywhere, and we can only know whether
+   a font supports some character if we actually open the font, which is
+   expensive and slow.  Instead, we rely on font information Windows
+   exposes to the API we use to enumerate available fonts,
+   EnumFontFamiliesEx.  This information includes two bitmapped attributes:
+     USB (which stands for Unicode Subset Bitfields) -- this is an array
+         of 4 32-bit values, 128 bits in total, where each bit
+         corresponds to some block (sometimes several related blocks) of
+         Unicode codepoints which the font claims to support.
+     CSB (which stands for Codepage Bitfields) -- this is an array of 2
+         32-bit values (64 bits), where each bit corresponds to some
+         codepage whose characters the font claims to support.
+   When Emacs needs to find a font for a character, it enumerates the
+   available fonts, filtering the fonts by examining these bitmaps and a
+   few other font attributes.  The script of the character is converted
+   to the corresponding bits in USB, and a font that has any of these
+   bits set is deemed as a candidate; see font_supported_scripts, which
+   is called by font_matches_spec.  The problem with this strategy is
+   twofold:
+    - Some Unicode blocks have no USB bits.  For the scripts
+      corresponding to those blocks we use a small cache of fonts known
+      to support those script.  This cache is calculated once, and needs
+      not be recalculated as long as no fonts are installed or deleted
+      (it can be saved in your init file and reused for the following
+      sessions).  See the function w32-find-non-USB-fonts.  Note that
+      for that function to work well, 'script-representative-chars'
+      should include the important characters for each script which has
+      no USB bits defined.
+    - Some fonts claim support for a block, but don't support it well.
+      Other fonts support some blocks very well, but don't set the
+      corresponding USB bits for the blocks.  For these we use some
+      heuristics:
+      . For few fonts that claim coverage, but don't provide it, we
+        either recognize them by name and reject their false claims, or
+        let users set face-ignored-fonts to ignore those fonts.
+      . For fonts that support some blocks very well, but don't set
+        their USB bits, we examine the CSB bits instead.  This is
+        particularly important for some CJK fonts with good support in
+        the SIP area: they only set the SIP bit (bit 57) in the USB.  We
+        consider those as candidates for CJK scripts ('han', 'kana',
+        etc.) if the CSB bits are set for the corresponding CJK
+        codepages.
+   Eventually, some characters could still appear as "tofu" (a box with
+   the character's hex codepoint), even though a font might be available
+   on the system which supports the character.  This is because the
+   above strategy, with all its heuristics and tricks, sometimes fails.
+   For example, it could fail if the system has several fonts installed
+   whose coverage of some blocks is incomplete -- Emacs could select
+   such a font based on its USB bits, and realize the font has no glyph
+   for a character only when it's too late.  This happens because when
+   several fonts claim coverage of the same Unicode block, Emacs on
+   Windows has no way of preferring one over the other, if they all
+   support the same values of size, weight, and slant.  So Emacs usually
+   selects the first such candidate, which could lack glyphs for the
+   characters Emacs needs to display.  Since we avoid naming non-free
+   Windows fonts in Emacs's sources, this cannot be fixed in the the
+   default fontset setup provided by Emacs: we cannot arrange for the
+   "good" fonts to be used in all such cases, because that would mean
+   naming those fonts.  The solution for thes issues is to customize the
+   default fontset using set-fontset-font, to force Emacs to use a font
+   known to support some characters.
+   One other Windows-specific issue is the fact that some Windows fonts
+   have hyphens in their names.  Emacs generally follows the XLFD
+   specifications, where a hyphen is used as separator between segments
+   of a font spec.  There are few places in the code in font.c where
+   Emacs handles such font names specially, and it currently knows about
+   font names documented for Windows versions up to and including 11.
+   See this page for the latest update:
+     https://learn.microsoft.com/en-us/typography/fonts/windows_11_font_list
+   If more fonts are added to Windows that have hyphens in their names,
+   the code in font.c will need to be updated.  */
 /* Internal implementation of w32font_list.
   Additional parameter opentype_only restricts the returned fonts to
   opentype fonts, which can be used with the Uniscribe backend.  */
@@ -1455,22 +1542,34 @@ static int
 w32font_coverage_ok (FONTSIGNATURE * coverage, BYTE charset)
 {
  DWORD subrange1 = coverage->fsUsb[1];
+  DWORD codepages0 = coverage->fsCsb[0];
 #define SUBRANGE1_HAN_MASK 0x08000000
 #define SUBRANGE1_HANGEUL_MASK 0x01000000
 #define SUBRANGE1_JAPANESE_MASK (0x00060000 | SUBRANGE1_HAN_MASK)
+#define SUBRANGE1_SIP_MASK 0x02000000
+/* We consider the coverage to be OK if either (a) subrange1 has the
+   bits set that correspond to CHARSET, or (b) subrange1 indicates SIP
+   support and codepages0 has one or more bits set corresponding to
+   CHARSET.  */
  if (charset == GB2312_CHARSET || charset == CHINESEBIG5_CHARSET)
    {
-      return (subrange1 & SUBRANGE1_HAN_MASK) == SUBRANGE1_HAN_MASK;
+      return ((subrange1 & SUBRANGE1_HAN_MASK) == SUBRANGE1_HAN_MASK
+              || ((subrange1 & SUBRANGE1_SIP_MASK) != 0
+                  && (codepages0 & CSB_CHINESE) != 0));
    }
  else if (charset == SHIFTJIS_CHARSET)
    {
-      return (subrange1 & SUBRANGE1_JAPANESE_MASK) == SUBRANGE1_JAPANESE_MASK;
+      return ((subrange1 & SUBRANGE1_JAPANESE_MASK) == SUBRANGE1_JAPANESE_MASK
+              || ((subrange1 & SUBRANGE1_SIP_MASK) != 0
+                  && (codepages0 & CSB_JAPANESE) != 0));
    }
  else if (charset == HANGEUL_CHARSET)
    {
-      return (subrange1 & SUBRANGE1_HANGEUL_MASK) == SUBRANGE1_HANGEUL_MASK;
+      return ((subrange1 & SUBRANGE1_HANGEUL_MASK) == SUBRANGE1_HANGEUL_MASK
+              || ((subrange1 & SUBRANGE1_SIP_MASK) != 0
+                  && (codepages0 & CSB_KOREAN) != 0));
    }
  return 1;
@@ -1620,11 +1719,18 @@ add_font_entity_to_list (ENUMLOGFONTEX *logical_font,
        }
      /* unicode-sip fonts must contain characters in Unicode plane 2.
         so look for bit 57 (surrogates) in the Unicode subranges, plus
-         the bits for CJK ranges that include those characters.  */
+         the bits for CJK ranges that include those characters or CJK
+         bits in code-page bit fields..  */
      else if (EQ (spec_charset, Qunicode_sip))
        {
-          if (!(physical_font->ntmFontSig.fsUsb[1] & 0x02000000)
+          if (!((physical_font->ntmFontSig.fsUsb[1] & 0x02000000)
-              || !(physical_font->ntmFontSig.fsUsb[1] & 0x28000000))
+                && ((physical_font->ntmFontSig.fsUsb[1] & 0x28000000)
+                    /* Some CJK fonts with very good coverage of SIP
+                       characters have only the 0x02000000 bit in USB
+                       set, so we allow them if their code-page bits
+                       indicate support for CJK character sets.  */
+                    || (physical_font->ntmFontSig.fsCsb[0]
+                        & (CSB_CHINESE | CSB_JAPANESE | CSB_KOREAN)))))
            return 1;
        }
@@ -2328,7 +2434,18 @@ font_supported_scripts (FONTSIGNATURE * sig)
  SUBRANGE (53, Qphags_pa);
  /* 54: Enclosed CJK letters and months, 55: CJK Compatibility.  */
  SUBRANGE (56, Qhangul);
-  /* 57: Surrogates.  */
+  /* 57: Non-BMP.  Processed specially: Several fonts that support CJK
+     Ideographs Extensions and other extensions, set just this bit and
+     Latin, and nothing else.  */
+  if (subranges[57 / 32] & (1U << (57 % 32)))
+    {
+      if ((sig->fsCsb[0] & CSB_CHINESE))
+        supported = Fcons (Qhan, supported);
+      if ((sig->fsCsb[0] & CSB_JAPANESE))
+        supported = Fcons (Qkana, supported);
+      if ((sig->fsCsb[0] & CSB_KOREAN))
+        supported = Fcons (Qhangul, supported);
+    }
  SUBRANGE (58, Qphoenician);
  SUBRANGE (59, Qhan); /* There are others, but this is the main one.  */
  SUBRANGE (59, Qideographic_description); /* Windows lumps this in.  */
@@ -2385,7 +2502,7 @@ font_supported_scripts (FONTSIGNATURE * sig)
  SUBRANGE (97, Qglagolitic);
  SUBRANGE (98, Qtifinagh);
  /* 99: Yijing Hexagrams.  */
-  SUBRANGE (99, Qhan);
+  SUBRANGE (99, Qcjk_misc);
  SUBRANGE (100, Qsyloti_nagri);
  SUBRANGE (101, Qlinear_b);
  SUBRANGE (101, Qaegean_number);
diff --git a/src/w32uniscribe.c b/src/w32uniscribe.c
index 471bdf544d8..751963705d2 100644
--- a/src/w32uniscribe.c
+++ b/src/w32uniscribe.c
@@ -895,7 +895,7 @@ uniscribe_check_otf_1 (HDC context, Lisp_Object script, Lisp_Object lang,
                       Lisp_Object features[2], int *retval)
 {
  SCRIPT_CACHE cache = NULL;
-  OPENTYPE_TAG tags[32], script_tag, lang_tag;
+  OPENTYPE_TAG tags[128], script_tag, lang_tag;
  int max_tags = ARRAYELTS (tags);
  int ntags, i, ret = 0;
  HRESULT rslt;
author	Eli Zaretskii	2024-08-03 18:11:57 +0300
committer	Eli Zaretskii	2024-08-03 18:11:57 +0300
commit	ff6954b9c833bfeb8032fb772fa08e60e9ec56a8 (patch)
tree	95c5ac2d3d85e13e34ef5af47f8e7beba1bb1504
parent	ef8276d4247fdf0b1ee19a2c70328710490dd2d6 (diff)
download	emacs-ff6954b9c833bfeb8032fb772fa08e60e9ec56a8.tar.gz emacs-ff6954b9c833bfeb8032fb772fa08e60e9ec56a8.zip

diff --git a/lisp/international/fontset.el b/lisp/international/fontset.el index 695c313cb26..c9b60418b22 100644 --- a/lisp/international/fontset.el +++ b/lisp/international/fontset.el
@@ -88,6 +88,7 @@
88	("iso10646-1$" . (unicode-bmp . nil))	88	("iso10646-1$" . (unicode-bmp . nil))
89	("iso10646.indian-1" . (unicode-bmp . nil))	89	("iso10646.indian-1" . (unicode-bmp . nil))
90	("unicode-bmp" . (unicode-bmp . nil))	90	("unicode-bmp" . (unicode-bmp . nil))
		91	("unicode-sip" . (unicode-sip . nil)) ; used by w32font.c
91	("abobe-symbol" . symbol)	92	("abobe-symbol" . symbol)
92	("sisheng_cwnn" . chinese-sisheng)	93	("sisheng_cwnn" . chinese-sisheng)
93	("mulearabic-0" . arabic-digit)	94	("mulearabic-0" . arabic-digit)


diff --git a/src/font.c b/src/font.c index 246fe1c4426..112618a7307 100644 --- a/src/font.c +++ b/src/font.c
@@ -1627,15 +1627,30 @@ font_parse_fcname (char *name, ptrdiff_t len, Lisp_Object font)
1627	{	1627	{
1628	bool decimal = 0, size_found = 1;	1628	bool decimal = 0, size_found = 1;
1629	for (q = p + 1; q && q != ':'; q++)	1629	for (q = p + 1; q && q != ':'; q++)
1630	if (! c_isdigit (*q))	1630	{
1631	{	1631	#ifdef HAVE_NTGUI
1632	if (*q != '.' \|\| decimal)	1632	/* MS-Windows has several CJK fonts whose name ends in
1633	{	1633	"-ExtB". It also has fonts whose names end in "-R" or
1634	size_found = 0;	1634	"-B", and one font whose name ends in "-SB". */
1635	break;	1635	if (q == p + 1 && (strncmp (q, "ExtB", 4) == 0
1636	}	1636	\|\| strncmp (q, "R", 1) == 0
1637	decimal = 1;	1637	\|\| strncmp (q, "B", 1) == 0
1638	}	1638	\|\| strncmp (q, "SB", 2) == 0))
		1639	{
		1640	size_found = 0;
		1641	break;
		1642	}
		1643	#endif
		1644	if (! c_isdigit (*q))
		1645	{
		1646	if (*q != '.' \|\| decimal)
		1647	{
		1648	size_found = 0;
		1649	break;
		1650	}
		1651	decimal = 1;
		1652	}
		1653	}
1639	if (size_found)	1654	if (size_found)
1640	{	1655	{
1641	family_end = p;	1656	family_end = p;
@@ -2000,6 +2015,15 @@ font_parse_family_registry (Lisp_Object family, Lisp_Object registry, Lisp_Objec
2000	len = SBYTES (family);	2015	len = SBYTES (family);
2001	p0 = SSDATA (family);	2016	p0 = SSDATA (family);
2002	p1 = strchr (p0, '-');	2017	p1 = strchr (p0, '-');
		2018	#ifdef HAVE_NTGUI
		2019	/* MS-Windows has fonts whose family name ends in "-ExtB" and
		2020	other suffixes which include a hyphen. */
		2021	if (p1 && (strcmp (p1, "-ExtB") == 0
		2022	\|\| strcmp (p1, "-R") == 0
		2023	\|\| strcmp (p1, "-B") == 0
		2024	\|\| strcmp (p1, "-SB") == 0))
		2025	p1 = NULL;
		2026	#endif
2003	if (p1)	2027	if (p1)
2004	{	2028	{
2005	if ((p0 != '' && p1 - p0 > 0)	2029	if ((p0 != '' && p1 - p0 > 0)


diff --git a/src/w32font.c b/src/w32font.c index ccbd3837afb..efb42d80336 100644 --- a/src/w32font.c +++ b/src/w32font.c
@@ -809,6 +809,93 @@ w32font_otf_drive (struct font *font, Lisp_Object features,
809	bool alternate_subst);	809	bool alternate_subst);
810	*/	810	*/
811		811
		812	/* Notes about the way fonts are found on MS-Windows when we have a
		813	character unsupported by the default font.
		814
		815	Since we don't use Fontconfig on MS-Windows, we cannot efficiently
		816	search for fonts which support certain characters, because Windows
		817	doesn't store this information anywhere, and we can only know whether
		818	a font supports some character if we actually open the font, which is
		819	expensive and slow. Instead, we rely on font information Windows
		820	exposes to the API we use to enumerate available fonts,
		821	EnumFontFamiliesEx. This information includes two bitmapped attributes:
		822
		823	USB (which stands for Unicode Subset Bitfields) -- this is an array
		824	of 4 32-bit values, 128 bits in total, where each bit
		825	corresponds to some block (sometimes several related blocks) of
		826	Unicode codepoints which the font claims to support.
		827	CSB (which stands for Codepage Bitfields) -- this is an array of 2
		828	32-bit values (64 bits), where each bit corresponds to some
		829	codepage whose characters the font claims to support.
		830
		831	When Emacs needs to find a font for a character, it enumerates the
		832	available fonts, filtering the fonts by examining these bitmaps and a
		833	few other font attributes. The script of the character is converted
		834	to the corresponding bits in USB, and a font that has any of these
		835	bits set is deemed as a candidate; see font_supported_scripts, which
		836	is called by font_matches_spec. The problem with this strategy is
		837	twofold:
		838
		839	- Some Unicode blocks have no USB bits. For the scripts
		840	corresponding to those blocks we use a small cache of fonts known
		841	to support those script. This cache is calculated once, and needs
		842	not be recalculated as long as no fonts are installed or deleted
		843	(it can be saved in your init file and reused for the following
		844	sessions). See the function w32-find-non-USB-fonts. Note that
		845	for that function to work well, 'script-representative-chars'
		846	should include the important characters for each script which has
		847	no USB bits defined.
		848
		849	- Some fonts claim support for a block, but don't support it well.
		850	Other fonts support some blocks very well, but don't set the
		851	corresponding USB bits for the blocks. For these we use some
		852	heuristics:
		853
		854	. For few fonts that claim coverage, but don't provide it, we
		855	either recognize them by name and reject their false claims, or
		856	let users set face-ignored-fonts to ignore those fonts.
		857
		858	. For fonts that support some blocks very well, but don't set
		859	their USB bits, we examine the CSB bits instead. This is
		860	particularly important for some CJK fonts with good support in
		861	the SIP area: they only set the SIP bit (bit 57) in the USB. We
		862	consider those as candidates for CJK scripts ('han', 'kana',
		863	etc.) if the CSB bits are set for the corresponding CJK
		864	codepages.
		865
		866	Eventually, some characters could still appear as "tofu" (a box with
		867	the character's hex codepoint), even though a font might be available
		868	on the system which supports the character. This is because the
		869	above strategy, with all its heuristics and tricks, sometimes fails.
		870	For example, it could fail if the system has several fonts installed
		871	whose coverage of some blocks is incomplete -- Emacs could select
		872	such a font based on its USB bits, and realize the font has no glyph
		873	for a character only when it's too late. This happens because when
		874	several fonts claim coverage of the same Unicode block, Emacs on
		875	Windows has no way of preferring one over the other, if they all
		876	support the same values of size, weight, and slant. So Emacs usually
		877	selects the first such candidate, which could lack glyphs for the
		878	characters Emacs needs to display. Since we avoid naming non-free
		879	Windows fonts in Emacs's sources, this cannot be fixed in the the
		880	default fontset setup provided by Emacs: we cannot arrange for the
		881	"good" fonts to be used in all such cases, because that would mean
		882	naming those fonts. The solution for thes issues is to customize the
		883	default fontset using set-fontset-font, to force Emacs to use a font
		884	known to support some characters.
		885
		886	One other Windows-specific issue is the fact that some Windows fonts
		887	have hyphens in their names. Emacs generally follows the XLFD
		888	specifications, where a hyphen is used as separator between segments
		889	of a font spec. There are few places in the code in font.c where
		890	Emacs handles such font names specially, and it currently knows about
		891	font names documented for Windows versions up to and including 11.
		892	See this page for the latest update:
		893
		894	https://learn.microsoft.com/en-us/typography/fonts/windows_11_font_list
		895
		896	If more fonts are added to Windows that have hyphens in their names,
		897	the code in font.c will need to be updated. */
		898
812	/* Internal implementation of w32font_list.	899	/* Internal implementation of w32font_list.
813	Additional parameter opentype_only restricts the returned fonts to	900	Additional parameter opentype_only restricts the returned fonts to
814	opentype fonts, which can be used with the Uniscribe backend. */	901	opentype fonts, which can be used with the Uniscribe backend. */
@@ -1455,22 +1542,34 @@ static int
1455	w32font_coverage_ok (FONTSIGNATURE * coverage, BYTE charset)	1542	w32font_coverage_ok (FONTSIGNATURE * coverage, BYTE charset)
1456	{	1543	{
1457	DWORD subrange1 = coverage->fsUsb[1];	1544	DWORD subrange1 = coverage->fsUsb[1];
		1545	DWORD codepages0 = coverage->fsCsb[0];
1458		1546
1459	#define SUBRANGE1_HAN_MASK 0x08000000	1547	#define SUBRANGE1_HAN_MASK 0x08000000
1460	#define SUBRANGE1_HANGEUL_MASK 0x01000000	1548	#define SUBRANGE1_HANGEUL_MASK 0x01000000
1461	#define SUBRANGE1_JAPANESE_MASK (0x00060000 \| SUBRANGE1_HAN_MASK)	1549	#define SUBRANGE1_JAPANESE_MASK (0x00060000 \| SUBRANGE1_HAN_MASK)
		1550	#define SUBRANGE1_SIP_MASK 0x02000000
1462		1551
		1552	/* We consider the coverage to be OK if either (a) subrange1 has the
		1553	bits set that correspond to CHARSET, or (b) subrange1 indicates SIP
		1554	support and codepages0 has one or more bits set corresponding to
		1555	CHARSET. */
1463	if (charset == GB2312_CHARSET \|\| charset == CHINESEBIG5_CHARSET)	1556	if (charset == GB2312_CHARSET \|\| charset == CHINESEBIG5_CHARSET)
1464	{	1557	{
1465	return (subrange1 & SUBRANGE1_HAN_MASK) == SUBRANGE1_HAN_MASK;	1558	return ((subrange1 & SUBRANGE1_HAN_MASK) == SUBRANGE1_HAN_MASK
		1559	\|\| ((subrange1 & SUBRANGE1_SIP_MASK) != 0
		1560	&& (codepages0 & CSB_CHINESE) != 0));
1466	}	1561	}
1467	else if (charset == SHIFTJIS_CHARSET)	1562	else if (charset == SHIFTJIS_CHARSET)
1468	{	1563	{
1469	return (subrange1 & SUBRANGE1_JAPANESE_MASK) == SUBRANGE1_JAPANESE_MASK;	1564	return ((subrange1 & SUBRANGE1_JAPANESE_MASK) == SUBRANGE1_JAPANESE_MASK
		1565	\|\| ((subrange1 & SUBRANGE1_SIP_MASK) != 0
		1566	&& (codepages0 & CSB_JAPANESE) != 0));
1470	}	1567	}
1471	else if (charset == HANGEUL_CHARSET)	1568	else if (charset == HANGEUL_CHARSET)
1472	{	1569	{
1473	return (subrange1 & SUBRANGE1_HANGEUL_MASK) == SUBRANGE1_HANGEUL_MASK;	1570	return ((subrange1 & SUBRANGE1_HANGEUL_MASK) == SUBRANGE1_HANGEUL_MASK
		1571	\|\| ((subrange1 & SUBRANGE1_SIP_MASK) != 0
		1572	&& (codepages0 & CSB_KOREAN) != 0));
1474	}	1573	}
1475		1574
1476	return 1;	1575	return 1;
@@ -1620,11 +1719,18 @@ add_font_entity_to_list (ENUMLOGFONTEX *logical_font,
1620	}	1719	}
1621	/* unicode-sip fonts must contain characters in Unicode plane 2.	1720	/* unicode-sip fonts must contain characters in Unicode plane 2.
1622	so look for bit 57 (surrogates) in the Unicode subranges, plus	1721	so look for bit 57 (surrogates) in the Unicode subranges, plus
1623	the bits for CJK ranges that include those characters. */	1722	the bits for CJK ranges that include those characters or CJK
		1723	bits in code-page bit fields.. */
1624	else if (EQ (spec_charset, Qunicode_sip))	1724	else if (EQ (spec_charset, Qunicode_sip))
1625	{	1725	{
1626	if (!(physical_font->ntmFontSig.fsUsb[1] & 0x02000000)	1726	if (!((physical_font->ntmFontSig.fsUsb[1] & 0x02000000)
1627	\|\| !(physical_font->ntmFontSig.fsUsb[1] & 0x28000000))	1727	&& ((physical_font->ntmFontSig.fsUsb[1] & 0x28000000)
		1728	/* Some CJK fonts with very good coverage of SIP
		1729	characters have only the 0x02000000 bit in USB
		1730	set, so we allow them if their code-page bits
		1731	indicate support for CJK character sets. */
		1732	\|\| (physical_font->ntmFontSig.fsCsb[0]
		1733	& (CSB_CHINESE \| CSB_JAPANESE \| CSB_KOREAN)))))
1628	return 1;	1734	return 1;
1629	}	1735	}
1630		1736
@@ -2328,7 +2434,18 @@ font_supported_scripts (FONTSIGNATURE * sig)
2328	SUBRANGE (53, Qphags_pa);	2434	SUBRANGE (53, Qphags_pa);
2329	/* 54: Enclosed CJK letters and months, 55: CJK Compatibility. */	2435	/* 54: Enclosed CJK letters and months, 55: CJK Compatibility. */
2330	SUBRANGE (56, Qhangul);	2436	SUBRANGE (56, Qhangul);
2331	/* 57: Surrogates. */	2437	/* 57: Non-BMP. Processed specially: Several fonts that support CJK
		2438	Ideographs Extensions and other extensions, set just this bit and
		2439	Latin, and nothing else. */
		2440	if (subranges[57 / 32] & (1U << (57 % 32)))
		2441	{
		2442	if ((sig->fsCsb[0] & CSB_CHINESE))
		2443	supported = Fcons (Qhan, supported);
		2444	if ((sig->fsCsb[0] & CSB_JAPANESE))
		2445	supported = Fcons (Qkana, supported);
		2446	if ((sig->fsCsb[0] & CSB_KOREAN))
		2447	supported = Fcons (Qhangul, supported);
		2448	}
2332	SUBRANGE (58, Qphoenician);	2449	SUBRANGE (58, Qphoenician);
2333	SUBRANGE (59, Qhan); /* There are others, but this is the main one. */	2450	SUBRANGE (59, Qhan); /* There are others, but this is the main one. */
2334	SUBRANGE (59, Qideographic_description); /* Windows lumps this in. */	2451	SUBRANGE (59, Qideographic_description); /* Windows lumps this in. */
@@ -2385,7 +2502,7 @@ font_supported_scripts (FONTSIGNATURE * sig)
2385	SUBRANGE (97, Qglagolitic);	2502	SUBRANGE (97, Qglagolitic);
2386	SUBRANGE (98, Qtifinagh);	2503	SUBRANGE (98, Qtifinagh);
2387	/* 99: Yijing Hexagrams. */	2504	/* 99: Yijing Hexagrams. */
2388	SUBRANGE (99, Qhan);	2505	SUBRANGE (99, Qcjk_misc);
2389	SUBRANGE (100, Qsyloti_nagri);	2506	SUBRANGE (100, Qsyloti_nagri);
2390	SUBRANGE (101, Qlinear_b);	2507	SUBRANGE (101, Qlinear_b);
2391	SUBRANGE (101, Qaegean_number);	2508	SUBRANGE (101, Qaegean_number);


diff --git a/src/w32uniscribe.c b/src/w32uniscribe.c index 471bdf544d8..751963705d2 100644 --- a/src/w32uniscribe.c +++ b/src/w32uniscribe.c
@@ -895,7 +895,7 @@ uniscribe_check_otf_1 (HDC context, Lisp_Object script, Lisp_Object lang,
895	Lisp_Object features[2], int *retval)	895	Lisp_Object features[2], int *retval)
896	{	896	{
897	SCRIPT_CACHE cache = NULL;	897	SCRIPT_CACHE cache = NULL;
898	OPENTYPE_TAG tags[32], script_tag, lang_tag;	898	OPENTYPE_TAG tags[128], script_tag, lang_tag;
899	int max_tags = ARRAYELTS (tags);	899	int max_tags = ARRAYELTS (tags);
900	int ntags, i, ret = 0;	900	int ntags, i, ret = 0;
901	HRESULT rslt;	901	HRESULT rslt;