diff options
| author | Eli Zaretskii | 2024-08-03 18:11:57 +0300 |
|---|---|---|
| committer | Eli Zaretskii | 2024-08-03 18:11:57 +0300 |
| commit | ff6954b9c833bfeb8032fb772fa08e60e9ec56a8 (patch) | |
| tree | 95c5ac2d3d85e13e34ef5af47f8e7beba1bb1504 /src | |
| parent | ef8276d4247fdf0b1ee19a2c70328710490dd2d6 (diff) | |
| download | emacs-ff6954b9c833bfeb8032fb772fa08e60e9ec56a8.tar.gz emacs-ff6954b9c833bfeb8032fb772fa08e60e9ec56a8.zip | |
Improve font search and handling on MS-Windows
* src/w32font.c: Add commentary about font search on MS-Windows.
(w32font_coverage_ok, add_font_entity_to_list)
(font_supported_scripts): Consider the coverage OK if a font has
only the SIP bit set, but also sets relevant codepage bits in the
CSB bits.
(font_supported_scripts): Fix script for USB bit 99.
* src/font.c (font_parse_fcname, font_parse_family_registry)
[HAVE_NTGUI]: Don't consider hyphenated suffixes of some Windows
fonts as not belonging to the family name.
* src/w32uniscribe.c (uniscribe_check_otf_1): Increase tags[]
array size, to avoid the E_OUTOFMEMORY error for some fonts.
* lisp/international/fontset.el (font-encoding-alist): Add
'unicode-sip'.
Diffstat (limited to 'src')
| -rw-r--r-- | src/font.c | 42 | ||||
| -rw-r--r-- | src/w32font.c | 133 | ||||
| -rw-r--r-- | src/w32uniscribe.c | 2 |
3 files changed, 159 insertions, 18 deletions
diff --git a/src/font.c b/src/font.c index 246fe1c4426..112618a7307 100644 --- a/src/font.c +++ b/src/font.c | |||
| @@ -1627,15 +1627,30 @@ font_parse_fcname (char *name, ptrdiff_t len, Lisp_Object font) | |||
| 1627 | { | 1627 | { |
| 1628 | bool decimal = 0, size_found = 1; | 1628 | bool decimal = 0, size_found = 1; |
| 1629 | for (q = p + 1; *q && *q != ':'; q++) | 1629 | for (q = p + 1; *q && *q != ':'; q++) |
| 1630 | if (! c_isdigit (*q)) | 1630 | { |
| 1631 | { | 1631 | #ifdef HAVE_NTGUI |
| 1632 | if (*q != '.' || decimal) | 1632 | /* MS-Windows has several CJK fonts whose name ends in |
| 1633 | { | 1633 | "-ExtB". It also has fonts whose names end in "-R" or |
| 1634 | size_found = 0; | 1634 | "-B", and one font whose name ends in "-SB". */ |
| 1635 | break; | 1635 | if (q == p + 1 && (strncmp (q, "ExtB", 4) == 0 |
| 1636 | } | 1636 | || strncmp (q, "R", 1) == 0 |
| 1637 | decimal = 1; | 1637 | || strncmp (q, "B", 1) == 0 |
| 1638 | } | 1638 | || strncmp (q, "SB", 2) == 0)) |
| 1639 | { | ||
| 1640 | size_found = 0; | ||
| 1641 | break; | ||
| 1642 | } | ||
| 1643 | #endif | ||
| 1644 | if (! c_isdigit (*q)) | ||
| 1645 | { | ||
| 1646 | if (*q != '.' || decimal) | ||
| 1647 | { | ||
| 1648 | size_found = 0; | ||
| 1649 | break; | ||
| 1650 | } | ||
| 1651 | decimal = 1; | ||
| 1652 | } | ||
| 1653 | } | ||
| 1639 | if (size_found) | 1654 | if (size_found) |
| 1640 | { | 1655 | { |
| 1641 | family_end = p; | 1656 | family_end = p; |
| @@ -2000,6 +2015,15 @@ font_parse_family_registry (Lisp_Object family, Lisp_Object registry, Lisp_Objec | |||
| 2000 | len = SBYTES (family); | 2015 | len = SBYTES (family); |
| 2001 | p0 = SSDATA (family); | 2016 | p0 = SSDATA (family); |
| 2002 | p1 = strchr (p0, '-'); | 2017 | p1 = strchr (p0, '-'); |
| 2018 | #ifdef HAVE_NTGUI | ||
| 2019 | /* MS-Windows has fonts whose family name ends in "-ExtB" and | ||
| 2020 | other suffixes which include a hyphen. */ | ||
| 2021 | if (p1 && (strcmp (p1, "-ExtB") == 0 | ||
| 2022 | || strcmp (p1, "-R") == 0 | ||
| 2023 | || strcmp (p1, "-B") == 0 | ||
| 2024 | || strcmp (p1, "-SB") == 0)) | ||
| 2025 | p1 = NULL; | ||
| 2026 | #endif | ||
| 2003 | if (p1) | 2027 | if (p1) |
| 2004 | { | 2028 | { |
| 2005 | if ((*p0 != '*' && p1 - p0 > 0) | 2029 | if ((*p0 != '*' && p1 - p0 > 0) |
diff --git a/src/w32font.c b/src/w32font.c index ccbd3837afb..efb42d80336 100644 --- a/src/w32font.c +++ b/src/w32font.c | |||
| @@ -809,6 +809,93 @@ w32font_otf_drive (struct font *font, Lisp_Object features, | |||
| 809 | bool alternate_subst); | 809 | bool alternate_subst); |
| 810 | */ | 810 | */ |
| 811 | 811 | ||
| 812 | /* Notes about the way fonts are found on MS-Windows when we have a | ||
| 813 | character unsupported by the default font. | ||
| 814 | |||
| 815 | Since we don't use Fontconfig on MS-Windows, we cannot efficiently | ||
| 816 | search for fonts which support certain characters, because Windows | ||
| 817 | doesn't store this information anywhere, and we can only know whether | ||
| 818 | a font supports some character if we actually open the font, which is | ||
| 819 | expensive and slow. Instead, we rely on font information Windows | ||
| 820 | exposes to the API we use to enumerate available fonts, | ||
| 821 | EnumFontFamiliesEx. This information includes two bitmapped attributes: | ||
| 822 | |||
| 823 | USB (which stands for Unicode Subset Bitfields) -- this is an array | ||
| 824 | of 4 32-bit values, 128 bits in total, where each bit | ||
| 825 | corresponds to some block (sometimes several related blocks) of | ||
| 826 | Unicode codepoints which the font claims to support. | ||
| 827 | CSB (which stands for Codepage Bitfields) -- this is an array of 2 | ||
| 828 | 32-bit values (64 bits), where each bit corresponds to some | ||
| 829 | codepage whose characters the font claims to support. | ||
| 830 | |||
| 831 | When Emacs needs to find a font for a character, it enumerates the | ||
| 832 | available fonts, filtering the fonts by examining these bitmaps and a | ||
| 833 | few other font attributes. The script of the character is converted | ||
| 834 | to the corresponding bits in USB, and a font that has any of these | ||
| 835 | bits set is deemed as a candidate; see font_supported_scripts, which | ||
| 836 | is called by font_matches_spec. The problem with this strategy is | ||
| 837 | twofold: | ||
| 838 | |||
| 839 | - Some Unicode blocks have no USB bits. For the scripts | ||
| 840 | corresponding to those blocks we use a small cache of fonts known | ||
| 841 | to support those script. This cache is calculated once, and needs | ||
| 842 | not be recalculated as long as no fonts are installed or deleted | ||
| 843 | (it can be saved in your init file and reused for the following | ||
| 844 | sessions). See the function w32-find-non-USB-fonts. Note that | ||
| 845 | for that function to work well, 'script-representative-chars' | ||
| 846 | should include the important characters for each script which has | ||
| 847 | no USB bits defined. | ||
| 848 | |||
| 849 | - Some fonts claim support for a block, but don't support it well. | ||
| 850 | Other fonts support some blocks very well, but don't set the | ||
| 851 | corresponding USB bits for the blocks. For these we use some | ||
| 852 | heuristics: | ||
| 853 | |||
| 854 | . For few fonts that claim coverage, but don't provide it, we | ||
| 855 | either recognize them by name and reject their false claims, or | ||
| 856 | let users set face-ignored-fonts to ignore those fonts. | ||
| 857 | |||
| 858 | . For fonts that support some blocks very well, but don't set | ||
| 859 | their USB bits, we examine the CSB bits instead. This is | ||
| 860 | particularly important for some CJK fonts with good support in | ||
| 861 | the SIP area: they only set the SIP bit (bit 57) in the USB. We | ||
| 862 | consider those as candidates for CJK scripts ('han', 'kana', | ||
| 863 | etc.) if the CSB bits are set for the corresponding CJK | ||
| 864 | codepages. | ||
| 865 | |||
| 866 | Eventually, some characters could still appear as "tofu" (a box with | ||
| 867 | the character's hex codepoint), even though a font might be available | ||
| 868 | on the system which supports the character. This is because the | ||
| 869 | above strategy, with all its heuristics and tricks, sometimes fails. | ||
| 870 | For example, it could fail if the system has several fonts installed | ||
| 871 | whose coverage of some blocks is incomplete -- Emacs could select | ||
| 872 | such a font based on its USB bits, and realize the font has no glyph | ||
| 873 | for a character only when it's too late. This happens because when | ||
| 874 | several fonts claim coverage of the same Unicode block, Emacs on | ||
| 875 | Windows has no way of preferring one over the other, if they all | ||
| 876 | support the same values of size, weight, and slant. So Emacs usually | ||
| 877 | selects the first such candidate, which could lack glyphs for the | ||
| 878 | characters Emacs needs to display. Since we avoid naming non-free | ||
| 879 | Windows fonts in Emacs's sources, this cannot be fixed in the the | ||
| 880 | default fontset setup provided by Emacs: we cannot arrange for the | ||
| 881 | "good" fonts to be used in all such cases, because that would mean | ||
| 882 | naming those fonts. The solution for thes issues is to customize the | ||
| 883 | default fontset using set-fontset-font, to force Emacs to use a font | ||
| 884 | known to support some characters. | ||
| 885 | |||
| 886 | One other Windows-specific issue is the fact that some Windows fonts | ||
| 887 | have hyphens in their names. Emacs generally follows the XLFD | ||
| 888 | specifications, where a hyphen is used as separator between segments | ||
| 889 | of a font spec. There are few places in the code in font.c where | ||
| 890 | Emacs handles such font names specially, and it currently knows about | ||
| 891 | font names documented for Windows versions up to and including 11. | ||
| 892 | See this page for the latest update: | ||
| 893 | |||
| 894 | https://learn.microsoft.com/en-us/typography/fonts/windows_11_font_list | ||
| 895 | |||
| 896 | If more fonts are added to Windows that have hyphens in their names, | ||
| 897 | the code in font.c will need to be updated. */ | ||
| 898 | |||
| 812 | /* Internal implementation of w32font_list. | 899 | /* Internal implementation of w32font_list. |
| 813 | Additional parameter opentype_only restricts the returned fonts to | 900 | Additional parameter opentype_only restricts the returned fonts to |
| 814 | opentype fonts, which can be used with the Uniscribe backend. */ | 901 | opentype fonts, which can be used with the Uniscribe backend. */ |
| @@ -1455,22 +1542,34 @@ static int | |||
| 1455 | w32font_coverage_ok (FONTSIGNATURE * coverage, BYTE charset) | 1542 | w32font_coverage_ok (FONTSIGNATURE * coverage, BYTE charset) |
| 1456 | { | 1543 | { |
| 1457 | DWORD subrange1 = coverage->fsUsb[1]; | 1544 | DWORD subrange1 = coverage->fsUsb[1]; |
| 1545 | DWORD codepages0 = coverage->fsCsb[0]; | ||
| 1458 | 1546 | ||
| 1459 | #define SUBRANGE1_HAN_MASK 0x08000000 | 1547 | #define SUBRANGE1_HAN_MASK 0x08000000 |
| 1460 | #define SUBRANGE1_HANGEUL_MASK 0x01000000 | 1548 | #define SUBRANGE1_HANGEUL_MASK 0x01000000 |
| 1461 | #define SUBRANGE1_JAPANESE_MASK (0x00060000 | SUBRANGE1_HAN_MASK) | 1549 | #define SUBRANGE1_JAPANESE_MASK (0x00060000 | SUBRANGE1_HAN_MASK) |
| 1550 | #define SUBRANGE1_SIP_MASK 0x02000000 | ||
| 1462 | 1551 | ||
| 1552 | /* We consider the coverage to be OK if either (a) subrange1 has the | ||
| 1553 | bits set that correspond to CHARSET, or (b) subrange1 indicates SIP | ||
| 1554 | support and codepages0 has one or more bits set corresponding to | ||
| 1555 | CHARSET. */ | ||
| 1463 | if (charset == GB2312_CHARSET || charset == CHINESEBIG5_CHARSET) | 1556 | if (charset == GB2312_CHARSET || charset == CHINESEBIG5_CHARSET) |
| 1464 | { | 1557 | { |
| 1465 | return (subrange1 & SUBRANGE1_HAN_MASK) == SUBRANGE1_HAN_MASK; | 1558 | return ((subrange1 & SUBRANGE1_HAN_MASK) == SUBRANGE1_HAN_MASK |
| 1559 | || ((subrange1 & SUBRANGE1_SIP_MASK) != 0 | ||
| 1560 | && (codepages0 & CSB_CHINESE) != 0)); | ||
| 1466 | } | 1561 | } |
| 1467 | else if (charset == SHIFTJIS_CHARSET) | 1562 | else if (charset == SHIFTJIS_CHARSET) |
| 1468 | { | 1563 | { |
| 1469 | return (subrange1 & SUBRANGE1_JAPANESE_MASK) == SUBRANGE1_JAPANESE_MASK; | 1564 | return ((subrange1 & SUBRANGE1_JAPANESE_MASK) == SUBRANGE1_JAPANESE_MASK |
| 1565 | || ((subrange1 & SUBRANGE1_SIP_MASK) != 0 | ||
| 1566 | && (codepages0 & CSB_JAPANESE) != 0)); | ||
| 1470 | } | 1567 | } |
| 1471 | else if (charset == HANGEUL_CHARSET) | 1568 | else if (charset == HANGEUL_CHARSET) |
| 1472 | { | 1569 | { |
| 1473 | return (subrange1 & SUBRANGE1_HANGEUL_MASK) == SUBRANGE1_HANGEUL_MASK; | 1570 | return ((subrange1 & SUBRANGE1_HANGEUL_MASK) == SUBRANGE1_HANGEUL_MASK |
| 1571 | || ((subrange1 & SUBRANGE1_SIP_MASK) != 0 | ||
| 1572 | && (codepages0 & CSB_KOREAN) != 0)); | ||
| 1474 | } | 1573 | } |
| 1475 | 1574 | ||
| 1476 | return 1; | 1575 | return 1; |
| @@ -1620,11 +1719,18 @@ add_font_entity_to_list (ENUMLOGFONTEX *logical_font, | |||
| 1620 | } | 1719 | } |
| 1621 | /* unicode-sip fonts must contain characters in Unicode plane 2. | 1720 | /* unicode-sip fonts must contain characters in Unicode plane 2. |
| 1622 | so look for bit 57 (surrogates) in the Unicode subranges, plus | 1721 | so look for bit 57 (surrogates) in the Unicode subranges, plus |
| 1623 | the bits for CJK ranges that include those characters. */ | 1722 | the bits for CJK ranges that include those characters or CJK |
| 1723 | bits in code-page bit fields.. */ | ||
| 1624 | else if (EQ (spec_charset, Qunicode_sip)) | 1724 | else if (EQ (spec_charset, Qunicode_sip)) |
| 1625 | { | 1725 | { |
| 1626 | if (!(physical_font->ntmFontSig.fsUsb[1] & 0x02000000) | 1726 | if (!((physical_font->ntmFontSig.fsUsb[1] & 0x02000000) |
| 1627 | || !(physical_font->ntmFontSig.fsUsb[1] & 0x28000000)) | 1727 | && ((physical_font->ntmFontSig.fsUsb[1] & 0x28000000) |
| 1728 | /* Some CJK fonts with very good coverage of SIP | ||
| 1729 | characters have only the 0x02000000 bit in USB | ||
| 1730 | set, so we allow them if their code-page bits | ||
| 1731 | indicate support for CJK character sets. */ | ||
| 1732 | || (physical_font->ntmFontSig.fsCsb[0] | ||
| 1733 | & (CSB_CHINESE | CSB_JAPANESE | CSB_KOREAN))))) | ||
| 1628 | return 1; | 1734 | return 1; |
| 1629 | } | 1735 | } |
| 1630 | 1736 | ||
| @@ -2328,7 +2434,18 @@ font_supported_scripts (FONTSIGNATURE * sig) | |||
| 2328 | SUBRANGE (53, Qphags_pa); | 2434 | SUBRANGE (53, Qphags_pa); |
| 2329 | /* 54: Enclosed CJK letters and months, 55: CJK Compatibility. */ | 2435 | /* 54: Enclosed CJK letters and months, 55: CJK Compatibility. */ |
| 2330 | SUBRANGE (56, Qhangul); | 2436 | SUBRANGE (56, Qhangul); |
| 2331 | /* 57: Surrogates. */ | 2437 | /* 57: Non-BMP. Processed specially: Several fonts that support CJK |
| 2438 | Ideographs Extensions and other extensions, set just this bit and | ||
| 2439 | Latin, and nothing else. */ | ||
| 2440 | if (subranges[57 / 32] & (1U << (57 % 32))) | ||
| 2441 | { | ||
| 2442 | if ((sig->fsCsb[0] & CSB_CHINESE)) | ||
| 2443 | supported = Fcons (Qhan, supported); | ||
| 2444 | if ((sig->fsCsb[0] & CSB_JAPANESE)) | ||
| 2445 | supported = Fcons (Qkana, supported); | ||
| 2446 | if ((sig->fsCsb[0] & CSB_KOREAN)) | ||
| 2447 | supported = Fcons (Qhangul, supported); | ||
| 2448 | } | ||
| 2332 | SUBRANGE (58, Qphoenician); | 2449 | SUBRANGE (58, Qphoenician); |
| 2333 | SUBRANGE (59, Qhan); /* There are others, but this is the main one. */ | 2450 | SUBRANGE (59, Qhan); /* There are others, but this is the main one. */ |
| 2334 | SUBRANGE (59, Qideographic_description); /* Windows lumps this in. */ | 2451 | SUBRANGE (59, Qideographic_description); /* Windows lumps this in. */ |
| @@ -2385,7 +2502,7 @@ font_supported_scripts (FONTSIGNATURE * sig) | |||
| 2385 | SUBRANGE (97, Qglagolitic); | 2502 | SUBRANGE (97, Qglagolitic); |
| 2386 | SUBRANGE (98, Qtifinagh); | 2503 | SUBRANGE (98, Qtifinagh); |
| 2387 | /* 99: Yijing Hexagrams. */ | 2504 | /* 99: Yijing Hexagrams. */ |
| 2388 | SUBRANGE (99, Qhan); | 2505 | SUBRANGE (99, Qcjk_misc); |
| 2389 | SUBRANGE (100, Qsyloti_nagri); | 2506 | SUBRANGE (100, Qsyloti_nagri); |
| 2390 | SUBRANGE (101, Qlinear_b); | 2507 | SUBRANGE (101, Qlinear_b); |
| 2391 | SUBRANGE (101, Qaegean_number); | 2508 | SUBRANGE (101, Qaegean_number); |
diff --git a/src/w32uniscribe.c b/src/w32uniscribe.c index 471bdf544d8..751963705d2 100644 --- a/src/w32uniscribe.c +++ b/src/w32uniscribe.c | |||
| @@ -895,7 +895,7 @@ uniscribe_check_otf_1 (HDC context, Lisp_Object script, Lisp_Object lang, | |||
| 895 | Lisp_Object features[2], int *retval) | 895 | Lisp_Object features[2], int *retval) |
| 896 | { | 896 | { |
| 897 | SCRIPT_CACHE cache = NULL; | 897 | SCRIPT_CACHE cache = NULL; |
| 898 | OPENTYPE_TAG tags[32], script_tag, lang_tag; | 898 | OPENTYPE_TAG tags[128], script_tag, lang_tag; |
| 899 | int max_tags = ARRAYELTS (tags); | 899 | int max_tags = ARRAYELTS (tags); |
| 900 | int ntags, i, ret = 0; | 900 | int ntags, i, ret = 0; |
| 901 | HRESULT rslt; | 901 | HRESULT rslt; |