diff options
| author | Eli Zaretskii | 2023-08-05 17:55:56 +0300 |
|---|---|---|
| committer | Eli Zaretskii | 2023-08-05 17:55:56 +0300 |
| commit | a06a2950e168dddcbf1c3cd14697875d93a4f9ff (patch) | |
| tree | af9e6fc2c54a97ce6996eb731f5abae810e93770 | |
| parent | 60e5f212182ca2f41f89a4315075e38433bc8ac0 (diff) | |
| download | emacs-a06a2950e168dddcbf1c3cd14697875d93a4f9ff.tar.gz emacs-a06a2950e168dddcbf1c3cd14697875d93a4f9ff.zip | |
Allow user control on char-width of "ambiguous" characters
* src/character.c (syms_of_character) <ambiguous-width-chars>: New
char-table.
* lisp/international/characters.el (ambiguous-width-chars): Fill
the table.
(update-cjk-ambiguous-char-widths): New function.
(cjk-ambiguous-chars-are-wide): New defcustom, uses
'update-cjk-ambiguous-char-widths' as its :set function.
(use-cjk-char-width-table): Obey 'cjk-ambiguous-chars-are-wide' by
adding another child char-table for ambiguous-width characters,
where the width is set according to the option.
* lisp/language/chinese.el ("Chinese-GB", "Chinese-BIG5")
("Chinese-CNS", "Chinese-EUC-TW", "Chinese-GBK"):
* lisp/language/japanese.el ("Japanese"):
* lisp/language/korean.el ("Korean"): Add new language-info slot
'cjk-locale-symbol'.
Bug#64420
| -rw-r--r-- | lisp/international/characters.el | 217 | ||||
| -rw-r--r-- | lisp/language/chinese.el | 5 | ||||
| -rw-r--r-- | lisp/language/japanese.el | 1 | ||||
| -rw-r--r-- | lisp/language/korean.el | 1 | ||||
| -rw-r--r-- | src/character.c | 8 |
5 files changed, 231 insertions, 1 deletions
diff --git a/lisp/international/characters.el b/lisp/international/characters.el index 9aea5e27063..1aa570ca59a 100644 --- a/lisp/international/characters.el +++ b/lisp/international/characters.el | |||
| @@ -1394,6 +1394,174 @@ with L, LRE, or LRO Unicode bidi character type.") | |||
| 1394 | (dolist (elt l) | 1394 | (dolist (elt l) |
| 1395 | (set-char-table-range char-width-table elt 2))) | 1395 | (set-char-table-range char-width-table elt 2))) |
| 1396 | 1396 | ||
| 1397 | ;; A: East Asian "Ambiguous" characters. | ||
| 1398 | (let ((l '((#x00A1 . #x00A1) | ||
| 1399 | (#x00A4 . #x00A4) | ||
| 1400 | (#x00A7 . #x00A8) | ||
| 1401 | (#x00AA . #x00AA) | ||
| 1402 | (#x00AD . #x00AE) | ||
| 1403 | (#x00B0 . #x00B4) | ||
| 1404 | (#x00B6 . #x00BA) | ||
| 1405 | (#x00BC . #x00BF) | ||
| 1406 | (#x00C6 . #x00C6) | ||
| 1407 | (#x00D0 . #x00D0) | ||
| 1408 | (#x00D7 . #x00D8) | ||
| 1409 | (#x00E0 . #x00E1) | ||
| 1410 | (#x00E6 . #x00E6) | ||
| 1411 | (#x00E8 . #x00EA) | ||
| 1412 | (#x00EC . #x00ED) | ||
| 1413 | (#x00F0 . #x00F0) | ||
| 1414 | (#x00F2 . #x00F3) | ||
| 1415 | (#x00F7 . #x00FA) | ||
| 1416 | (#x00FC . #x00FC) | ||
| 1417 | (#x00FE . #x00FE) | ||
| 1418 | (#x0101 . #x0101) | ||
| 1419 | (#x0111 . #x0111) | ||
| 1420 | (#x0113 . #x0113) | ||
| 1421 | (#x011B . #x011B) | ||
| 1422 | (#x0126 . #x0127) | ||
| 1423 | (#x012B . #x012B) | ||
| 1424 | (#x0131 . #x0133) | ||
| 1425 | (#x0138 . #x0138) | ||
| 1426 | (#x013F . #x0142) | ||
| 1427 | (#x0144 . #x0144) | ||
| 1428 | (#x0148 . #x014B) | ||
| 1429 | (#x014D . #x014D) | ||
| 1430 | (#x0152 . #x0153) | ||
| 1431 | (#x0166 . #x0167) | ||
| 1432 | (#x016B . #x016B) | ||
| 1433 | (#x01CE . #x01CE) | ||
| 1434 | (#x01D0 . #x01D0) | ||
| 1435 | (#x01D2 . #x01D2) | ||
| 1436 | (#x01D4 . #x01D4) | ||
| 1437 | (#x01D6 . #x01D6) | ||
| 1438 | (#x01D8 . #x01D8) | ||
| 1439 | (#x01DA . #x01DA) | ||
| 1440 | (#x01DC . #x01DC) | ||
| 1441 | (#x0251 . #x0251) | ||
| 1442 | (#x0261 . #x0261) | ||
| 1443 | (#x02C4 . #x02C4) | ||
| 1444 | (#x02C7 . #x02C7) | ||
| 1445 | (#x02C9 . #x02CB) | ||
| 1446 | (#x02CD . #x02CD) | ||
| 1447 | (#x02D0 . #x02D0) | ||
| 1448 | (#x02D8 . #x02DB) | ||
| 1449 | (#x02DD . #x02DD) | ||
| 1450 | (#x02DF . #x02DF) | ||
| 1451 | (#x0300 . #x036F) | ||
| 1452 | (#x0391 . #x03A1) | ||
| 1453 | (#x03A3 . #x03A9) | ||
| 1454 | (#x03B1 . #x03C1) | ||
| 1455 | (#x03C3 . #x03C9) | ||
| 1456 | (#x0401 . #x0401) | ||
| 1457 | (#x0410 . #x044F) | ||
| 1458 | (#x0451 . #x0451) | ||
| 1459 | (#x2010 . #x2010) | ||
| 1460 | (#x2013 . #x2016) | ||
| 1461 | (#x2018 . #x2019) | ||
| 1462 | (#x201C . #x201D) | ||
| 1463 | (#x2020 . #x2022) | ||
| 1464 | (#x2024 . #x2027) | ||
| 1465 | (#x2030 . #x2030) | ||
| 1466 | (#x2032 . #x2033) | ||
| 1467 | (#x2035 . #x2035) | ||
| 1468 | (#x203E . #x203E) | ||
| 1469 | (#x2074 . #x2074) | ||
| 1470 | (#x207F . #x207F) | ||
| 1471 | (#x2081 . #x2084) | ||
| 1472 | (#x20AC . #x20AC) | ||
| 1473 | (#x2103 . #x2103) | ||
| 1474 | (#x2105 . #x2105) | ||
| 1475 | (#x2109 . #x2109) | ||
| 1476 | (#x2113 . #x2113) | ||
| 1477 | (#x2116 . #x2116) | ||
| 1478 | (#x2121 . #x2122) | ||
| 1479 | (#x2126 . #x2126) | ||
| 1480 | (#x212B . #x212B) | ||
| 1481 | (#x2153 . #x2154) | ||
| 1482 | (#x215B . #x215E) | ||
| 1483 | (#x2160 . #x216B) | ||
| 1484 | (#x2170 . #x2179) | ||
| 1485 | (#x2189 . #x2189) | ||
| 1486 | (#x2190 . #x2199) | ||
| 1487 | (#x21B8 . #x21B9) | ||
| 1488 | (#x21D2 . #x21D2) | ||
| 1489 | (#x21D4 . #x21D4) | ||
| 1490 | (#x21E7 . #x21E7) | ||
| 1491 | (#x2200 . #x2200) | ||
| 1492 | (#x2202 . #x2203) | ||
| 1493 | (#x2207 . #x2208) | ||
| 1494 | (#x220B . #x220B) | ||
| 1495 | (#x220F . #x220F) | ||
| 1496 | (#x2211 . #x2211) | ||
| 1497 | (#x2215 . #x2215) | ||
| 1498 | (#x221A . #x221A) | ||
| 1499 | (#x221D . #x2220) | ||
| 1500 | (#x2223 . #x2223) | ||
| 1501 | (#x2225 . #x2225) | ||
| 1502 | (#x2227 . #x222C) | ||
| 1503 | (#x222E . #x222E) | ||
| 1504 | (#x2234 . #x2237) | ||
| 1505 | (#x223C . #x223D) | ||
| 1506 | (#x2248 . #x2248) | ||
| 1507 | (#x224C . #x224C) | ||
| 1508 | (#x2252 . #x2252) | ||
| 1509 | (#x2260 . #x2261) | ||
| 1510 | (#x2264 . #x2267) | ||
| 1511 | (#x226A . #x226B) | ||
| 1512 | (#x226E . #x226F) | ||
| 1513 | (#x2282 . #x2283) | ||
| 1514 | (#x2286 . #x2287) | ||
| 1515 | (#x2295 . #x2295) | ||
| 1516 | (#x2299 . #x2299) | ||
| 1517 | (#x22A5 . #x22A5) | ||
| 1518 | (#x22BF . #x22BF) | ||
| 1519 | (#x2312 . #x2312) | ||
| 1520 | (#x2460 . #x24E9) | ||
| 1521 | (#x24EB . #x254B) | ||
| 1522 | (#x2550 . #x2573) | ||
| 1523 | (#x2580 . #x258F) | ||
| 1524 | (#x2592 . #x2595) | ||
| 1525 | (#x25A0 . #x25A1) | ||
| 1526 | (#x25A3 . #x25A9) | ||
| 1527 | (#x25B2 . #x25B3) | ||
| 1528 | (#x25B6 . #x25B7) | ||
| 1529 | (#x25BC . #x25BD) | ||
| 1530 | (#x25C0 . #x25C1) | ||
| 1531 | (#x25C6 . #x25C8) | ||
| 1532 | (#x25CE . #x25D1) | ||
| 1533 | (#x25E2 . #x25E5) | ||
| 1534 | (#x25EF . #x25EF) | ||
| 1535 | (#x2605 . #x2606) | ||
| 1536 | (#x260E . #x260F) | ||
| 1537 | (#x261C . #x261C) | ||
| 1538 | (#x261E . #x261E) | ||
| 1539 | (#x2640 . #x2640) | ||
| 1540 | (#x2642 . #x2642) | ||
| 1541 | (#x2660 . #x2661) | ||
| 1542 | (#x2663 . #x2665) | ||
| 1543 | (#x2667 . #x266A) | ||
| 1544 | (#x266C . #x266D) | ||
| 1545 | (#x266F . #x266F) | ||
| 1546 | (#x269E . #x269F) | ||
| 1547 | (#x26BF . #x26BF) | ||
| 1548 | (#x26C6 . #x26CD) | ||
| 1549 | (#x26CF . #x26D3) | ||
| 1550 | (#x26D5 . #x26E1) | ||
| 1551 | (#x26E3 . #x26E3) | ||
| 1552 | (#x26E8 . #x26E9) | ||
| 1553 | (#x26EB . #x26F1) | ||
| 1554 | (#x26F4 . #x26F4) | ||
| 1555 | (#x26F6 . #x26F9) | ||
| 1556 | (#x26FB . #x26FC) | ||
| 1557 | (#x26FE . #x26FF) | ||
| 1558 | (#x273D . #x273D) | ||
| 1559 | (#x2776 . #x277F) | ||
| 1560 | (#x2B56 . #x2B59) | ||
| 1561 | (#x3248 . #x324F)))) | ||
| 1562 | (dolist (elt l) | ||
| 1563 | (set-char-table-range ambiguous-width-chars elt t))) | ||
| 1564 | |||
| 1397 | ;; Other double width | 1565 | ;; Other double width |
| 1398 | ;;(map-charset-chars | 1566 | ;;(map-charset-chars |
| 1399 | ;; (lambda (range ignore) (set-char-table-range char-width-table range 2)) | 1567 | ;; (lambda (range ignore) (set-char-table-range char-width-table range 2)) |
| @@ -1427,6 +1595,45 @@ with L, LRE, or LRO Unicode bidi character type.") | |||
| 1427 | (chinese-cns11643-1 (#x2121 . #x427E))) | 1595 | (chinese-cns11643-1 (#x2121 . #x427E))) |
| 1428 | (ko_KR nil (korean-ksc5601 (#x2121 . #x2C7E))))) | 1596 | (ko_KR nil (korean-ksc5601 (#x2121 . #x2C7E))))) |
| 1429 | 1597 | ||
| 1598 | (defun update-cjk-ambiguous-char-widths (locale-name) | ||
| 1599 | "Update character widths for LOCALE-NAME using `ambiguous-width-chars'. | ||
| 1600 | LOCALE-NAME is the symbol of a CJK locale, such as \\='zh_CN." | ||
| 1601 | (let ((slot (assq locale-name cjk-char-width-table-list))) | ||
| 1602 | (or slot (error "Unknown locale for CJK language environment: %s" | ||
| 1603 | locale-name)) | ||
| 1604 | ;; Force recomputation of child table in 'use-cjk-char-width-table'. | ||
| 1605 | (setcar (cdr slot) nil) | ||
| 1606 | (use-cjk-char-width-table locale-name))) | ||
| 1607 | |||
| 1608 | |||
| 1609 | (defcustom cjk-ambiguous-chars-are-wide t | ||
| 1610 | "Whether the \"ambiguous-width\" characters take 2 columns on display. | ||
| 1611 | |||
| 1612 | Some of the characters are defined by Unicode as being of \"ambiguous\" | ||
| 1613 | width: the actual width, either 1 column or 2 columns, should be | ||
| 1614 | determined at display time, depending on the language context. | ||
| 1615 | If this variable is non-nil, Emacs will consider these characters as | ||
| 1616 | full-width, i.e. taking 2 columns; otherwise they are narrow characters | ||
| 1617 | taking 1 column on display. Which value is correct depends on the | ||
| 1618 | fonts being used. In some CJK locales the fonts are set so that | ||
| 1619 | these characters are displayed as full-width. This setting is most | ||
| 1620 | important for text-mode frames, because there Emacs cannot access the | ||
| 1621 | metrics of the fonts used by the console or the terminal emulator. | ||
| 1622 | |||
| 1623 | Do not set this directly via `setq'; instead, use `setopt' or the | ||
| 1624 | Customize commands. Alternatively, call `update-cjk-ambiguous-char-widths' | ||
| 1625 | passing it the symbol of the current locale environment, after changing | ||
| 1626 | the value of the variable with `setq'." | ||
| 1627 | :type 'boolean | ||
| 1628 | :set (lambda (symbol value) | ||
| 1629 | (set-default symbol value) | ||
| 1630 | (let ((locsym (get-language-info current-language-environment | ||
| 1631 | 'cjk-locale-symbol))) | ||
| 1632 | (when locsym | ||
| 1633 | (update-cjk-ambiguous-char-widths locsym)))) | ||
| 1634 | :version "30.1" | ||
| 1635 | :group 'display) | ||
| 1636 | |||
| 1430 | ;; Internal use only. | 1637 | ;; Internal use only. |
| 1431 | ;; Setup char-width-table appropriate for a language environment | 1638 | ;; Setup char-width-table appropriate for a language environment |
| 1432 | ;; corresponding to LOCALE-NAME (symbol). | 1639 | ;; corresponding to LOCALE-NAME (symbol). |
| @@ -1448,7 +1655,15 @@ with L, LRE, or LRO Unicode bidi character type.") | |||
| 1448 | (car code-range) (cdr code-range))))) | 1655 | (car code-range) (cdr code-range))))) |
| 1449 | (optimize-char-table table) | 1656 | (optimize-char-table table) |
| 1450 | (set-char-table-parent table char-width-table) | 1657 | (set-char-table-parent table char-width-table) |
| 1451 | (setcar (cdr slot) table))) | 1658 | (let ((tbl (make-char-table nil))) |
| 1659 | (map-char-table | ||
| 1660 | (lambda (range _val) | ||
| 1661 | (set-char-table-range tbl range | ||
| 1662 | (if cjk-ambiguous-chars-are-wide 2 1))) | ||
| 1663 | ambiguous-width-chars) | ||
| 1664 | (optimize-char-table tbl) | ||
| 1665 | (set-char-table-parent tbl table) | ||
| 1666 | (setcar (cdr slot) tbl)))) | ||
| 1452 | (setq char-width-table (nth 1 slot)))) | 1667 | (setq char-width-table (nth 1 slot)))) |
| 1453 | 1668 | ||
| 1454 | (defun use-default-char-width-table () | 1669 | (defun use-default-char-width-table () |
diff --git a/lisp/language/chinese.el b/lisp/language/chinese.el index 26f1194aa4c..e35f3f179ad 100644 --- a/lisp/language/chinese.el +++ b/lisp/language/chinese.el | |||
| @@ -111,6 +111,7 @@ | |||
| 111 | (set-language-info-alist | 111 | (set-language-info-alist |
| 112 | "Chinese-GB" '((charset chinese-gb2312 chinese-sisheng) | 112 | "Chinese-GB" '((charset chinese-gb2312 chinese-sisheng) |
| 113 | (iso639-language . zh) | 113 | (iso639-language . zh) |
| 114 | (cjk-locale-symbol . zh_CN) | ||
| 114 | (setup-function . (lambda () | 115 | (setup-function . (lambda () |
| 115 | (use-cjk-char-width-table 'zh_CN))) | 116 | (use-cjk-char-width-table 'zh_CN))) |
| 116 | (exit-function . use-default-char-width-table) | 117 | (exit-function . use-default-char-width-table) |
| @@ -142,6 +143,7 @@ | |||
| 142 | (set-language-info-alist | 143 | (set-language-info-alist |
| 143 | "Chinese-BIG5" '((charset chinese-big5-1 chinese-big5-2) | 144 | "Chinese-BIG5" '((charset chinese-big5-1 chinese-big5-2) |
| 144 | (iso639-language . zh) | 145 | (iso639-language . zh) |
| 146 | (cjk-locale-symbol . zh_HK) | ||
| 145 | (setup-function . (lambda () | 147 | (setup-function . (lambda () |
| 146 | (use-cjk-char-width-table 'zh_HK))) | 148 | (use-cjk-char-width-table 'zh_HK))) |
| 147 | (exit-function . use-default-char-width-table) | 149 | (exit-function . use-default-char-width-table) |
| @@ -198,6 +200,7 @@ | |||
| 198 | chinese-cns11643-5 chinese-cns11643-6 | 200 | chinese-cns11643-5 chinese-cns11643-6 |
| 199 | chinese-cns11643-7) | 201 | chinese-cns11643-7) |
| 200 | (iso639-language . zh) | 202 | (iso639-language . zh) |
| 203 | (cjk-locale-symbol . zh_TW) | ||
| 201 | (setup-function . (lambda () | 204 | (setup-function . (lambda () |
| 202 | (use-cjk-char-width-table 'zh_TW))) | 205 | (use-cjk-char-width-table 'zh_TW))) |
| 203 | (exit-function . use-default-char-width-table) | 206 | (exit-function . use-default-char-width-table) |
| @@ -218,6 +221,7 @@ accepts Big5 for input also (which is then converted to CNS).")) | |||
| 218 | chinese-cns11643-5 chinese-cns11643-6 | 221 | chinese-cns11643-5 chinese-cns11643-6 |
| 219 | chinese-cns11643-7 chinese-big5-1 chinese-big5-2) | 222 | chinese-cns11643-7 chinese-big5-1 chinese-big5-2) |
| 220 | (iso639-language . zh) | 223 | (iso639-language . zh) |
| 224 | (cjk-locale-symbol . zh_TW) | ||
| 221 | (setup-function . (lambda () | 225 | (setup-function . (lambda () |
| 222 | (use-cjk-char-width-table 'zh_TW))) | 226 | (use-cjk-char-width-table 'zh_TW))) |
| 223 | (exit-function . use-default-char-width-table) | 227 | (exit-function . use-default-char-width-table) |
| @@ -248,6 +252,7 @@ converted to CNS).")) | |||
| 248 | (set-language-info-alist | 252 | (set-language-info-alist |
| 249 | "Chinese-GBK" '((charset chinese-gbk) | 253 | "Chinese-GBK" '((charset chinese-gbk) |
| 250 | (iso639-language . zh) | 254 | (iso639-language . zh) |
| 255 | (cjk-locale-symbol . zh_CN) | ||
| 251 | (setup-function . (lambda () | 256 | (setup-function . (lambda () |
| 252 | (use-cjk-char-width-table 'zh_CN))) | 257 | (use-cjk-char-width-table 'zh_CN))) |
| 253 | (exit-function . use-default-char-width-table) | 258 | (exit-function . use-default-char-width-table) |
diff --git a/lisp/language/japanese.el b/lisp/language/japanese.el index 681dc9d7b92..6042ebf4511 100644 --- a/lisp/language/japanese.el +++ b/lisp/language/japanese.el | |||
| @@ -208,6 +208,7 @@ eucJP-ms is defined in <http://www.opengroup.or.jp/jvc/cde/appendix.html>." | |||
| 208 | "Japanese" '((setup-function . setup-japanese-environment-internal) | 208 | "Japanese" '((setup-function . setup-japanese-environment-internal) |
| 209 | (exit-function . use-default-char-width-table) | 209 | (exit-function . use-default-char-width-table) |
| 210 | (iso639-language . ja) | 210 | (iso639-language . ja) |
| 211 | (cjk-locale-symbol . ja_JP) | ||
| 211 | (tutorial . "TUTORIAL.ja") | 212 | (tutorial . "TUTORIAL.ja") |
| 212 | (charset japanese-jisx0208 | 213 | (charset japanese-jisx0208 |
| 213 | japanese-jisx0212 latin-jisx0201 katakana-jisx0201 | 214 | japanese-jisx0212 latin-jisx0201 katakana-jisx0201 |
diff --git a/lisp/language/korean.el b/lisp/language/korean.el index fef5796bc4b..ede37d5d07c 100644 --- a/lisp/language/korean.el +++ b/lisp/language/korean.el | |||
| @@ -68,6 +68,7 @@ | |||
| 68 | (set-language-info-alist | 68 | (set-language-info-alist |
| 69 | "Korean" '((setup-function . setup-korean-environment-internal) | 69 | "Korean" '((setup-function . setup-korean-environment-internal) |
| 70 | (exit-function . exit-korean-environment) | 70 | (exit-function . exit-korean-environment) |
| 71 | (cjk-locale-symbol . ko_KR) | ||
| 71 | (iso639-language . ko) | 72 | (iso639-language . ko) |
| 72 | (tutorial . "TUTORIAL.ko") | 73 | (tutorial . "TUTORIAL.ko") |
| 73 | (charset korean-ksc5601 cp949) | 74 | (charset korean-ksc5601 cp949) |
diff --git a/src/character.c b/src/character.c index f4164360f21..2118b20a7c7 100644 --- a/src/character.c +++ b/src/character.c | |||
| @@ -1117,6 +1117,14 @@ A char-table for width (columns) of each character. */); | |||
| 1117 | char_table_set_range (Vchar_width_table, MAX_5_BYTE_CHAR + 1, MAX_CHAR, | 1117 | char_table_set_range (Vchar_width_table, MAX_5_BYTE_CHAR + 1, MAX_CHAR, |
| 1118 | make_fixnum (4)); | 1118 | make_fixnum (4)); |
| 1119 | 1119 | ||
| 1120 | DEFVAR_LISP ("ambiguous-width-chars", Vambiguous_width_chars, | ||
| 1121 | doc: /* | ||
| 1122 | A char-table for characters whose width (columns) can be 1 or 2. | ||
| 1123 | |||
| 1124 | The actual width depends on the language-environment and on the | ||
| 1125 | value of `cjk-ambiguous-chars-are-wide'. */); | ||
| 1126 | Vambiguous_width_chars = Fmake_char_table (Qnil, Qnil); | ||
| 1127 | |||
| 1120 | DEFVAR_LISP ("printable-chars", Vprintable_chars, | 1128 | DEFVAR_LISP ("printable-chars", Vprintable_chars, |
| 1121 | doc: /* A char-table for each printable character. */); | 1129 | doc: /* A char-table for each printable character. */); |
| 1122 | Vprintable_chars = Fmake_char_table (Qnil, Qnil); | 1130 | Vprintable_chars = Fmake_char_table (Qnil, Qnil); |