aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEli Zaretskii2024-08-03 18:11:57 +0300
committerEli Zaretskii2024-08-03 18:11:57 +0300
commitff6954b9c833bfeb8032fb772fa08e60e9ec56a8 (patch)
tree95c5ac2d3d85e13e34ef5af47f8e7beba1bb1504
parentef8276d4247fdf0b1ee19a2c70328710490dd2d6 (diff)
downloademacs-ff6954b9c833bfeb8032fb772fa08e60e9ec56a8.tar.gz
emacs-ff6954b9c833bfeb8032fb772fa08e60e9ec56a8.zip
Improve font search and handling on MS-Windows
* src/w32font.c: Add commentary about font search on MS-Windows. (w32font_coverage_ok, add_font_entity_to_list) (font_supported_scripts): Consider the coverage OK if a font has only the SIP bit set, but also sets relevant codepage bits in the CSB bits. (font_supported_scripts): Fix script for USB bit 99. * src/font.c (font_parse_fcname, font_parse_family_registry) [HAVE_NTGUI]: Don't consider hyphenated suffixes of some Windows fonts as not belonging to the family name. * src/w32uniscribe.c (uniscribe_check_otf_1): Increase tags[] array size, to avoid the E_OUTOFMEMORY error for some fonts. * lisp/international/fontset.el (font-encoding-alist): Add 'unicode-sip'.
-rw-r--r--lisp/international/fontset.el1
-rw-r--r--src/font.c42
-rw-r--r--src/w32font.c133
-rw-r--r--src/w32uniscribe.c2
4 files changed, 160 insertions, 18 deletions
diff --git a/lisp/international/fontset.el b/lisp/international/fontset.el
index 695c313cb26..c9b60418b22 100644
--- a/lisp/international/fontset.el
+++ b/lisp/international/fontset.el
@@ -88,6 +88,7 @@
88 ("iso10646-1$" . (unicode-bmp . nil)) 88 ("iso10646-1$" . (unicode-bmp . nil))
89 ("iso10646.indian-1" . (unicode-bmp . nil)) 89 ("iso10646.indian-1" . (unicode-bmp . nil))
90 ("unicode-bmp" . (unicode-bmp . nil)) 90 ("unicode-bmp" . (unicode-bmp . nil))
91 ("unicode-sip" . (unicode-sip . nil)) ; used by w32font.c
91 ("abobe-symbol" . symbol) 92 ("abobe-symbol" . symbol)
92 ("sisheng_cwnn" . chinese-sisheng) 93 ("sisheng_cwnn" . chinese-sisheng)
93 ("mulearabic-0" . arabic-digit) 94 ("mulearabic-0" . arabic-digit)
diff --git a/src/font.c b/src/font.c
index 246fe1c4426..112618a7307 100644
--- a/src/font.c
+++ b/src/font.c
@@ -1627,15 +1627,30 @@ font_parse_fcname (char *name, ptrdiff_t len, Lisp_Object font)
1627 { 1627 {
1628 bool decimal = 0, size_found = 1; 1628 bool decimal = 0, size_found = 1;
1629 for (q = p + 1; *q && *q != ':'; q++) 1629 for (q = p + 1; *q && *q != ':'; q++)
1630 if (! c_isdigit (*q)) 1630 {
1631 { 1631#ifdef HAVE_NTGUI
1632 if (*q != '.' || decimal) 1632 /* MS-Windows has several CJK fonts whose name ends in
1633 { 1633 "-ExtB". It also has fonts whose names end in "-R" or
1634 size_found = 0; 1634 "-B", and one font whose name ends in "-SB". */
1635 break; 1635 if (q == p + 1 && (strncmp (q, "ExtB", 4) == 0
1636 } 1636 || strncmp (q, "R", 1) == 0
1637 decimal = 1; 1637 || strncmp (q, "B", 1) == 0
1638 } 1638 || strncmp (q, "SB", 2) == 0))
1639 {
1640 size_found = 0;
1641 break;
1642 }
1643#endif
1644 if (! c_isdigit (*q))
1645 {
1646 if (*q != '.' || decimal)
1647 {
1648 size_found = 0;
1649 break;
1650 }
1651 decimal = 1;
1652 }
1653 }
1639 if (size_found) 1654 if (size_found)
1640 { 1655 {
1641 family_end = p; 1656 family_end = p;
@@ -2000,6 +2015,15 @@ font_parse_family_registry (Lisp_Object family, Lisp_Object registry, Lisp_Objec
2000 len = SBYTES (family); 2015 len = SBYTES (family);
2001 p0 = SSDATA (family); 2016 p0 = SSDATA (family);
2002 p1 = strchr (p0, '-'); 2017 p1 = strchr (p0, '-');
2018#ifdef HAVE_NTGUI
2019 /* MS-Windows has fonts whose family name ends in "-ExtB" and
2020 other suffixes which include a hyphen. */
2021 if (p1 && (strcmp (p1, "-ExtB") == 0
2022 || strcmp (p1, "-R") == 0
2023 || strcmp (p1, "-B") == 0
2024 || strcmp (p1, "-SB") == 0))
2025 p1 = NULL;
2026#endif
2003 if (p1) 2027 if (p1)
2004 { 2028 {
2005 if ((*p0 != '*' && p1 - p0 > 0) 2029 if ((*p0 != '*' && p1 - p0 > 0)
diff --git a/src/w32font.c b/src/w32font.c
index ccbd3837afb..efb42d80336 100644
--- a/src/w32font.c
+++ b/src/w32font.c
@@ -809,6 +809,93 @@ w32font_otf_drive (struct font *font, Lisp_Object features,
809 bool alternate_subst); 809 bool alternate_subst);
810 */ 810 */
811 811
812/* Notes about the way fonts are found on MS-Windows when we have a
813 character unsupported by the default font.
814
815 Since we don't use Fontconfig on MS-Windows, we cannot efficiently
816 search for fonts which support certain characters, because Windows
817 doesn't store this information anywhere, and we can only know whether
818 a font supports some character if we actually open the font, which is
819 expensive and slow. Instead, we rely on font information Windows
820 exposes to the API we use to enumerate available fonts,
821 EnumFontFamiliesEx. This information includes two bitmapped attributes:
822
823 USB (which stands for Unicode Subset Bitfields) -- this is an array
824 of 4 32-bit values, 128 bits in total, where each bit
825 corresponds to some block (sometimes several related blocks) of
826 Unicode codepoints which the font claims to support.
827 CSB (which stands for Codepage Bitfields) -- this is an array of 2
828 32-bit values (64 bits), where each bit corresponds to some
829 codepage whose characters the font claims to support.
830
831 When Emacs needs to find a font for a character, it enumerates the
832 available fonts, filtering the fonts by examining these bitmaps and a
833 few other font attributes. The script of the character is converted
834 to the corresponding bits in USB, and a font that has any of these
835 bits set is deemed as a candidate; see font_supported_scripts, which
836 is called by font_matches_spec. The problem with this strategy is
837 twofold:
838
839 - Some Unicode blocks have no USB bits. For the scripts
840 corresponding to those blocks we use a small cache of fonts known
841 to support those script. This cache is calculated once, and needs
842 not be recalculated as long as no fonts are installed or deleted
843 (it can be saved in your init file and reused for the following
844 sessions). See the function w32-find-non-USB-fonts. Note that
845 for that function to work well, 'script-representative-chars'
846 should include the important characters for each script which has
847 no USB bits defined.
848
849 - Some fonts claim support for a block, but don't support it well.
850 Other fonts support some blocks very well, but don't set the
851 corresponding USB bits for the blocks. For these we use some
852 heuristics:
853
854 . For few fonts that claim coverage, but don't provide it, we
855 either recognize them by name and reject their false claims, or
856 let users set face-ignored-fonts to ignore those fonts.
857
858 . For fonts that support some blocks very well, but don't set
859 their USB bits, we examine the CSB bits instead. This is
860 particularly important for some CJK fonts with good support in
861 the SIP area: they only set the SIP bit (bit 57) in the USB. We
862 consider those as candidates for CJK scripts ('han', 'kana',
863 etc.) if the CSB bits are set for the corresponding CJK
864 codepages.
865
866 Eventually, some characters could still appear as "tofu" (a box with
867 the character's hex codepoint), even though a font might be available
868 on the system which supports the character. This is because the
869 above strategy, with all its heuristics and tricks, sometimes fails.
870 For example, it could fail if the system has several fonts installed
871 whose coverage of some blocks is incomplete -- Emacs could select
872 such a font based on its USB bits, and realize the font has no glyph
873 for a character only when it's too late. This happens because when
874 several fonts claim coverage of the same Unicode block, Emacs on
875 Windows has no way of preferring one over the other, if they all
876 support the same values of size, weight, and slant. So Emacs usually
877 selects the first such candidate, which could lack glyphs for the
878 characters Emacs needs to display. Since we avoid naming non-free
879 Windows fonts in Emacs's sources, this cannot be fixed in the the
880 default fontset setup provided by Emacs: we cannot arrange for the
881 "good" fonts to be used in all such cases, because that would mean
882 naming those fonts. The solution for thes issues is to customize the
883 default fontset using set-fontset-font, to force Emacs to use a font
884 known to support some characters.
885
886 One other Windows-specific issue is the fact that some Windows fonts
887 have hyphens in their names. Emacs generally follows the XLFD
888 specifications, where a hyphen is used as separator between segments
889 of a font spec. There are few places in the code in font.c where
890 Emacs handles such font names specially, and it currently knows about
891 font names documented for Windows versions up to and including 11.
892 See this page for the latest update:
893
894 https://learn.microsoft.com/en-us/typography/fonts/windows_11_font_list
895
896 If more fonts are added to Windows that have hyphens in their names,
897 the code in font.c will need to be updated. */
898
812/* Internal implementation of w32font_list. 899/* Internal implementation of w32font_list.
813 Additional parameter opentype_only restricts the returned fonts to 900 Additional parameter opentype_only restricts the returned fonts to
814 opentype fonts, which can be used with the Uniscribe backend. */ 901 opentype fonts, which can be used with the Uniscribe backend. */
@@ -1455,22 +1542,34 @@ static int
1455w32font_coverage_ok (FONTSIGNATURE * coverage, BYTE charset) 1542w32font_coverage_ok (FONTSIGNATURE * coverage, BYTE charset)
1456{ 1543{
1457 DWORD subrange1 = coverage->fsUsb[1]; 1544 DWORD subrange1 = coverage->fsUsb[1];
1545 DWORD codepages0 = coverage->fsCsb[0];
1458 1546
1459#define SUBRANGE1_HAN_MASK 0x08000000 1547#define SUBRANGE1_HAN_MASK 0x08000000
1460#define SUBRANGE1_HANGEUL_MASK 0x01000000 1548#define SUBRANGE1_HANGEUL_MASK 0x01000000
1461#define SUBRANGE1_JAPANESE_MASK (0x00060000 | SUBRANGE1_HAN_MASK) 1549#define SUBRANGE1_JAPANESE_MASK (0x00060000 | SUBRANGE1_HAN_MASK)
1550#define SUBRANGE1_SIP_MASK 0x02000000
1462 1551
1552/* We consider the coverage to be OK if either (a) subrange1 has the
1553 bits set that correspond to CHARSET, or (b) subrange1 indicates SIP
1554 support and codepages0 has one or more bits set corresponding to
1555 CHARSET. */
1463 if (charset == GB2312_CHARSET || charset == CHINESEBIG5_CHARSET) 1556 if (charset == GB2312_CHARSET || charset == CHINESEBIG5_CHARSET)
1464 { 1557 {
1465 return (subrange1 & SUBRANGE1_HAN_MASK) == SUBRANGE1_HAN_MASK; 1558 return ((subrange1 & SUBRANGE1_HAN_MASK) == SUBRANGE1_HAN_MASK
1559 || ((subrange1 & SUBRANGE1_SIP_MASK) != 0
1560 && (codepages0 & CSB_CHINESE) != 0));
1466 } 1561 }
1467 else if (charset == SHIFTJIS_CHARSET) 1562 else if (charset == SHIFTJIS_CHARSET)
1468 { 1563 {
1469 return (subrange1 & SUBRANGE1_JAPANESE_MASK) == SUBRANGE1_JAPANESE_MASK; 1564 return ((subrange1 & SUBRANGE1_JAPANESE_MASK) == SUBRANGE1_JAPANESE_MASK
1565 || ((subrange1 & SUBRANGE1_SIP_MASK) != 0
1566 && (codepages0 & CSB_JAPANESE) != 0));
1470 } 1567 }
1471 else if (charset == HANGEUL_CHARSET) 1568 else if (charset == HANGEUL_CHARSET)
1472 { 1569 {
1473 return (subrange1 & SUBRANGE1_HANGEUL_MASK) == SUBRANGE1_HANGEUL_MASK; 1570 return ((subrange1 & SUBRANGE1_HANGEUL_MASK) == SUBRANGE1_HANGEUL_MASK
1571 || ((subrange1 & SUBRANGE1_SIP_MASK) != 0
1572 && (codepages0 & CSB_KOREAN) != 0));
1474 } 1573 }
1475 1574
1476 return 1; 1575 return 1;
@@ -1620,11 +1719,18 @@ add_font_entity_to_list (ENUMLOGFONTEX *logical_font,
1620 } 1719 }
1621 /* unicode-sip fonts must contain characters in Unicode plane 2. 1720 /* unicode-sip fonts must contain characters in Unicode plane 2.
1622 so look for bit 57 (surrogates) in the Unicode subranges, plus 1721 so look for bit 57 (surrogates) in the Unicode subranges, plus
1623 the bits for CJK ranges that include those characters. */ 1722 the bits for CJK ranges that include those characters or CJK
1723 bits in code-page bit fields.. */
1624 else if (EQ (spec_charset, Qunicode_sip)) 1724 else if (EQ (spec_charset, Qunicode_sip))
1625 { 1725 {
1626 if (!(physical_font->ntmFontSig.fsUsb[1] & 0x02000000) 1726 if (!((physical_font->ntmFontSig.fsUsb[1] & 0x02000000)
1627 || !(physical_font->ntmFontSig.fsUsb[1] & 0x28000000)) 1727 && ((physical_font->ntmFontSig.fsUsb[1] & 0x28000000)
1728 /* Some CJK fonts with very good coverage of SIP
1729 characters have only the 0x02000000 bit in USB
1730 set, so we allow them if their code-page bits
1731 indicate support for CJK character sets. */
1732 || (physical_font->ntmFontSig.fsCsb[0]
1733 & (CSB_CHINESE | CSB_JAPANESE | CSB_KOREAN)))))
1628 return 1; 1734 return 1;
1629 } 1735 }
1630 1736
@@ -2328,7 +2434,18 @@ font_supported_scripts (FONTSIGNATURE * sig)
2328 SUBRANGE (53, Qphags_pa); 2434 SUBRANGE (53, Qphags_pa);
2329 /* 54: Enclosed CJK letters and months, 55: CJK Compatibility. */ 2435 /* 54: Enclosed CJK letters and months, 55: CJK Compatibility. */
2330 SUBRANGE (56, Qhangul); 2436 SUBRANGE (56, Qhangul);
2331 /* 57: Surrogates. */ 2437 /* 57: Non-BMP. Processed specially: Several fonts that support CJK
2438 Ideographs Extensions and other extensions, set just this bit and
2439 Latin, and nothing else. */
2440 if (subranges[57 / 32] & (1U << (57 % 32)))
2441 {
2442 if ((sig->fsCsb[0] & CSB_CHINESE))
2443 supported = Fcons (Qhan, supported);
2444 if ((sig->fsCsb[0] & CSB_JAPANESE))
2445 supported = Fcons (Qkana, supported);
2446 if ((sig->fsCsb[0] & CSB_KOREAN))
2447 supported = Fcons (Qhangul, supported);
2448 }
2332 SUBRANGE (58, Qphoenician); 2449 SUBRANGE (58, Qphoenician);
2333 SUBRANGE (59, Qhan); /* There are others, but this is the main one. */ 2450 SUBRANGE (59, Qhan); /* There are others, but this is the main one. */
2334 SUBRANGE (59, Qideographic_description); /* Windows lumps this in. */ 2451 SUBRANGE (59, Qideographic_description); /* Windows lumps this in. */
@@ -2385,7 +2502,7 @@ font_supported_scripts (FONTSIGNATURE * sig)
2385 SUBRANGE (97, Qglagolitic); 2502 SUBRANGE (97, Qglagolitic);
2386 SUBRANGE (98, Qtifinagh); 2503 SUBRANGE (98, Qtifinagh);
2387 /* 99: Yijing Hexagrams. */ 2504 /* 99: Yijing Hexagrams. */
2388 SUBRANGE (99, Qhan); 2505 SUBRANGE (99, Qcjk_misc);
2389 SUBRANGE (100, Qsyloti_nagri); 2506 SUBRANGE (100, Qsyloti_nagri);
2390 SUBRANGE (101, Qlinear_b); 2507 SUBRANGE (101, Qlinear_b);
2391 SUBRANGE (101, Qaegean_number); 2508 SUBRANGE (101, Qaegean_number);
diff --git a/src/w32uniscribe.c b/src/w32uniscribe.c
index 471bdf544d8..751963705d2 100644
--- a/src/w32uniscribe.c
+++ b/src/w32uniscribe.c
@@ -895,7 +895,7 @@ uniscribe_check_otf_1 (HDC context, Lisp_Object script, Lisp_Object lang,
895 Lisp_Object features[2], int *retval) 895 Lisp_Object features[2], int *retval)
896{ 896{
897 SCRIPT_CACHE cache = NULL; 897 SCRIPT_CACHE cache = NULL;
898 OPENTYPE_TAG tags[32], script_tag, lang_tag; 898 OPENTYPE_TAG tags[128], script_tag, lang_tag;
899 int max_tags = ARRAYELTS (tags); 899 int max_tags = ARRAYELTS (tags);
900 int ntags, i, ret = 0; 900 int ntags, i, ret = 0;
901 HRESULT rslt; 901 HRESULT rslt;