aboutsummaryrefslogtreecommitdiffstats
path: root/lisp
diff options
context:
space:
mode:
authorKaroly Lorentey2004-06-14 20:00:54 +0000
committerKaroly Lorentey2004-06-14 20:00:54 +0000
commit987a34896f0f07c76a87314ee444467903632aeb (patch)
tree5e982f524d8a38b1156fd5c7c93517170a4e6764 /lisp
parent71a96040ef5a4a8a308a109c45bbabb20769beac (diff)
parent4060d762b6bd7a2e7e6d0c57154beac00496c9d6 (diff)
downloademacs-987a34896f0f07c76a87314ee444467903632aeb.tar.gz
emacs-987a34896f0f07c76a87314ee444467903632aeb.zip
Merged in changes from CVS trunk.
Patches applied: * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-398 Tweak permissions * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-399 Tweak directory permissions * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-400 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-401 More build-in-place tweaking of arch tagging * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-402 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-403 Yet more build-in-place tweaking of arch tagging * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-404 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-405 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-406 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-407 Update from CVS git-archimport-id: lorentey@elte.hu--2004/emacs--multi-tty--0--patch-199
Diffstat (limited to 'lisp')
-rw-r--r--lisp/.arch-inventory3
-rw-r--r--lisp/ChangeLog150
-rw-r--r--lisp/abbrev.el13
-rw-r--r--lisp/bindings.el4
-rw-r--r--lisp/bookmark.el17
-rw-r--r--lisp/cus-face.el8
-rw-r--r--lisp/diff-mode.el9
-rw-r--r--lisp/dired-x.el2
-rw-r--r--lisp/dired.el10
-rw-r--r--lisp/emacs-lisp/bytecomp.el42
-rw-r--r--lisp/emulation/pc-select.el27
-rw-r--r--lisp/eshell/.arch-inventory4
-rw-r--r--lisp/files.el2
-rw-r--r--lisp/hexl.el6
-rw-r--r--lisp/info.el33
-rw-r--r--lisp/international/ccl.el3
-rw-r--r--lisp/international/characters.el9
-rw-r--r--lisp/international/code-pages.el120
-rw-r--r--lisp/international/mule-cmds.el9
-rw-r--r--lisp/international/mule.el6
-rw-r--r--lisp/international/utf-16.el519
-rw-r--r--lisp/international/utf-8.el1091
-rw-r--r--lisp/language/devan-util.el1
-rw-r--r--lisp/progmodes/python.el3
-rw-r--r--lisp/simple.el3
-rw-r--r--lisp/textmodes/paragraphs.el2
-rw-r--r--lisp/time-stamp.el8
27 files changed, 1278 insertions, 826 deletions
diff --git a/lisp/.arch-inventory b/lisp/.arch-inventory
index 5341c2d8fec..9bd88350a95 100644
--- a/lisp/.arch-inventory
+++ b/lisp/.arch-inventory
@@ -1,4 +1,7 @@
1# Auto-generated lisp files, which ignore 1# Auto-generated lisp files, which ignore
2precious ^(loaddefs|finder-inf|cus-load)\.el$ 2precious ^(loaddefs|finder-inf|cus-load)\.el$
3 3
4# Something generated during a windows build?!?
5precious ^(Makefile\.unix)$
6
4# arch-tag: fc62dc9f-3a91-455b-b8e7-d49df66beee0 7# arch-tag: fc62dc9f-3a91-455b-b8e7-d49df66beee0
diff --git a/lisp/ChangeLog b/lisp/ChangeLog
index 6dcc5c8fbbe..055bb70de3b 100644
--- a/lisp/ChangeLog
+++ b/lisp/ChangeLog
@@ -1,3 +1,149 @@
12004-06-14 Kenichi Handa <handa@m17n.org>
2
3 * international/code-pages.el (windows-1256, cp1125): Fix tables
4 for several characters.
5
6 * international/utf-8.el (ccl-encode-mule-utf-8): Fix previous
7 change.
8
92004-06-13 Richard M. Stallman <rms@gnu.org>
10
11 * textmodes/paragraphs.el (sentence-end): Add 0x5397d as close brace.
12
13 * emulation/pc-select.el: Doc fixes: say "PC Selection mode",
14 not "`pc-selection-mode'".
15
16 * emacs-lisp/bytecomp.el: Put `...' around symbols in warning messages.
17
18 * simple.el (previous-matching-history-element): Specify a default.
19
20 * hexl.el (hexl-mode): Catch errors in hexl-goto-address.
21
22 * cus-face.el (custom-declare-face): Simplify code.
23
24 * abbrev.el (abbrev-mode, edit-abbrevs-map): Doc fixes.
25
262004-06-13 Luc Teirlinck <teirllm@auburn.edu>
27
28 * files.el (before-save-hook): Add `time-stamp' to the options.
29
30 * time-stamp.el (time-stamp): Recommend adding it to
31 `before-save-hook', rather than `write-file-functions'
32 Make a similar change in `Commentary' section.
33
342004-06-13 Kai Grossjohann <kai.grossjohann@gmx.net>
35
36 * diff-mode.el (diff-current-defun): If at start of hunk, use
37 position of first change.
38
392004-06-13 Lars Hansen <larsh@math.ku.dk>
40
41 * dired-x.el (dired-mark-omitted): Bind to "*O".
42
432004-06-12 Karl Fogel <kfogel@red-bean.com>
44
45 * bookmark.el (bookmark-bmenu-relocate): New function, as
46 suggested by David J. Biesack <David.Biesack@sas.com>.
47 (bookmark-bmenu-mode-map): Bind `bookmark-bmenu-relocate' to "R".
48 (bookmark-bmenu-mode): Describe binding in doc string.
49 (bookmark-set-filename): Save the bookmark list if it's time.
50
512004-06-13 Kenichi Handa <handa@m17n.org>
52
53 * international/utf-8.el (ccl-decode-mule-utf-8): Fix previous
54 change.
55 (ccl-untranslated-to-ucs): Fix typo.
56
572004-06-12 Karl Chen <quarl@hkn.eecs.berkeley.edu> (tiny change)
58
59 * progmodes/python.el (python-open-block-statement-p): Fix
60 indentation after a block opening that contains a comment.
61
622004-06-12 J,Ai(Br,At(Bme Marant <jerome@marant.org> (tiny change)
63
64 * bindings.el (completion-ignored-extensions): Add file extensions
65 of Python byte-compiled files.
66
672004-06-12 Juri Linkov <juri@jurta.org>
68
69 * info.el (Info-goto-node): Add autoload.
70 (Info-toc): Add substring-no-properties on Info file name.
71 (Info-mode, info, Info-toc, Info-mode-menu): Doc fix.
72 (Info-mode-map): Bind L to Info-history, T to Info-toc.
73
742004-06-12 Kenichi Handa <handa@m17n.org>
75
76 * international/mule-cmds.el (set-language-environment): Load
77 subst tables if necessary.
78
79 * international/mule.el (decode-char): Load subst tables if
80 necessary.
81 (encode-char): Likewise.
82
83 * international/utf-16.el (utf-16-decode-ucs): Handle a surrogate
84 pair correctly. Call ccl-mule-utf-untrans for untranslable chars.
85 (utf-16le-decode-loop): Set r5 to -1 before loop.
86 (utf-16be-decode-loop): Likewise.
87 (ccl-decode-mule-utf-16le): Add EOF processing block.
88 (ccl-decode-mule-utf-16be): Likewise.
89 (ccl-decode-mule-utf-16le-with-signature): Likewise.
90 (ccl-decode-mule-utf-16be-with-signature): Likewise.
91 (ccl-decode-mule-utf-16): Likewise. Set r5 to -1 initially.
92 (ccl-mule-utf-16-encode-untrans): New CCL.
93 (utf-16-decode-to-ucs): Handle pre-read character.
94 (utf-16le-encode-loop): Handle surrogate pair.
95 (utf-16be-encode-loop): Likewise.
96 (ccl-encode-mule-utf-16le-with-signature): Adjusted for the change
97 of utf-16le-encode-loop.
98 (ccl-encode-mule-utf-16be-with-signature): Adjusted for the change
99 of utf-16be-encode-loop.
100 (mule-utf-16-post-read-conversion): Call
101 utf-8-post-read-conversion at first.
102 (mule-utf-16[{le|be}], mule-utf-16{le|be}-with-signature): Include
103 CJK charsets in safe-charsets if utf-translate-cjk-mode is on.
104 Add post-read-conversion and pre-write-conversion.
105
106 * international/utf-8.el (utf-translate-cjk-charsets): New
107 variable.
108 (utf-translate-cjk-unicode-range): New variable.
109 (utf-translate-cjk-load-tables): New function.
110 (utf-lookup-subst-table-for-decode): New function.
111 (utf-lookup-subst-table-for-encode): New function.
112 (utf-translate-cjk-mode): Init-value changed to t. Don't load
113 tables here. Update safe-charsets of utf-* coding systems.
114 (ccl-mule-utf-untrans): New CCL.
115 (ccl-decode-mule-utf-8): Call ccl-mule-utf-untrans. Use `repeat'
116 at end of each branch.
117 (ccl-mule-utf-8-encode-untrans): New CCL.
118 (ccl-encode-mule-utf-8): Call ccl-mule-utf-8-encode-untrans.
119 (ccl-untranslated-to-ucs): Handle 2-byte encoding. Set r1 to the
120 length of encoding. Don't return r0.
121 (utf-8-compose): New arg hash-table. Handle 2-byte encoding.
122 (utf-8-post-read-conversion): Narrow to region properly. If
123 utf-translate-cjk-mode is on, load tables if necessary. Call
124 utf-8-compose with hash-table arg if necessary. Call
125 XXX-compose-region instead of XXX-post-read-convesion.
126 (utf-8-pre-write-conversion): New function.
127 (mule-utf-8): Include CJK charsets in safe-charsets if
128 utf-translate-cjk-mode is on. Add pre-write-conversion.
129
130 * international/characters.el: Temporarily set
131 utf-translate-cjk-mode to nil.
132
133 * language/devan-util.el (devanagari-compose-region): Add
134 autoload cookie.
135
136 * international/ccl.el (ccl-dump-call): Fix printing the
137 subroutine name.
138
1392004-06-11 Luc Teirlinck <teirllm@auburn.edu>
140
141 * dired.el (dired-revert): If buffer is marked unmodified before
142 reverting, keep it marked unmodified.
143 Adapt to new conventions for commenting out code.
144 (dired-make-relative): Adapt to new conventions for commenting out
145 code.
146
12004-06-10 Miles Bader <miles@gnu.ai.mit.edu> 1472004-06-10 Miles Bader <miles@gnu.ai.mit.edu>
2 148
3 * eshell/esh-module.el (eshell-load-defgroups): Bind 149 * eshell/esh-module.el (eshell-load-defgroups): Bind
@@ -2169,17 +2315,21 @@
2169 (desktop-buffer-info-misc-data): Rename to 2315 (desktop-buffer-info-misc-data): Rename to
2170 Info-desktop-buffer-misc-data and move to info.el. 2316 Info-desktop-buffer-misc-data and move to info.el.
2171 (desktop-read): Add message about number of buffers restored/failed. 2317 (desktop-read): Add message about number of buffers restored/failed.
2318
2172 * dired.el (dired-restore-desktop-buffer) Move from desktop.el. 2319 * dired.el (dired-restore-desktop-buffer) Move from desktop.el.
2173 Add parameters. Pause to display error only when 2320 Add parameters. Pause to display error only when
2174 desktop-missing-file-warning is non-nil. 2321 desktop-missing-file-warning is non-nil.
2175 (dired-desktop-buffer-misc-data): Move from desktop.el. Add parameter. 2322 (dired-desktop-buffer-misc-data): Move from desktop.el. Add parameter.
2176 (dired-mode): Bind desktop-buffer-misc-data-function. 2323 (dired-mode): Bind desktop-buffer-misc-data-function.
2324
2177 * info.el (Info-restore-desktop-buffer): Move from desktop.el. 2325 * info.el (Info-restore-desktop-buffer): Move from desktop.el.
2178 Add Parameters. 2326 Add Parameters.
2179 (Info-desktop-buffer-misc-data): Move from desktop.el. Add parameter. 2327 (Info-desktop-buffer-misc-data): Move from desktop.el. Add parameter.
2180 (Info-mode): Bind desktop-buffer-misc-data-function. 2328 (Info-mode): Bind desktop-buffer-misc-data-function.
2329
2181 * mail/rmail.el (rmail-restore-desktop-buffer): Move from desktop.el. 2330 * mail/rmail.el (rmail-restore-desktop-buffer): Move from desktop.el.
2182 Add Parameters. 2331 Add Parameters.
2332
2183 * mh-e/mh-e.el (mh-restore-desktop-buffer): Move from desktop.el. 2333 * mh-e/mh-e.el (mh-restore-desktop-buffer): Move from desktop.el.
2184 Add Parameters. 2334 Add Parameters.
2185 2335
diff --git a/lisp/abbrev.el b/lisp/abbrev.el
index 1e3eea0e359..3be0014fd0e 100644
--- a/lisp/abbrev.el
+++ b/lisp/abbrev.el
@@ -37,9 +37,9 @@ define global abbrevs instead."
37 :group 'convenience) 37 :group 'convenience)
38 38
39(defun abbrev-mode (&optional arg) 39(defun abbrev-mode (&optional arg)
40 "Toggle abbrev mode. 40 "Toggle Abbrev mode in the current buffer.
41With argument ARG, turn abbrev mode on iff ARG is positive. 41With argument ARG, turn abbrev mode on iff ARG is positive.
42In abbrev mode, inserting an abbreviation causes it to expand 42In Abbrev mode, inserting an abbreviation causes it to expand
43and be replaced by its expansion." 43and be replaced by its expansion."
44 (interactive "P") 44 (interactive "P")
45 (setq abbrev-mode 45 (setq abbrev-mode
@@ -48,18 +48,19 @@ and be replaced by its expansion."
48 (force-mode-line-update)) 48 (force-mode-line-update))
49 49
50(defcustom abbrev-mode nil 50(defcustom abbrev-mode nil
51 "Toggle abbrev mode. 51 "Enable or disable Abbrev mode.
52Non-nil means automatically expand abbrevs as they are inserted. 52Non-nil means automatically expand abbrevs as they are inserted.
53 53
54Setting this variable with `setq' changes it for the current buffer.
54Changing it with \\[customize] sets the default value. 55Changing it with \\[customize] sets the default value.
55Use the command `abbrev-mode' to enable or disable Abbrev mode in the current 56Interactively, use the command `abbrev-mode'
56buffer." 57to enable or disable Abbrev mode in the current buffer."
57 :type 'boolean 58 :type 'boolean
58 :group 'abbrev-mode) 59 :group 'abbrev-mode)
59 60
60 61
61(defvar edit-abbrevs-map nil 62(defvar edit-abbrevs-map nil
62 "Keymap used in edit-abbrevs.") 63 "Keymap used in `edit-abbrevs'.")
63(if edit-abbrevs-map 64(if edit-abbrevs-map
64 nil 65 nil
65 (setq edit-abbrevs-map (make-sparse-keymap)) 66 (setq edit-abbrevs-map (make-sparse-keymap))
diff --git a/lisp/bindings.el b/lisp/bindings.el
index 2518c9bae08..68c4ec433f7 100644
--- a/lisp/bindings.el
+++ b/lisp/bindings.el
@@ -524,7 +524,9 @@ is okay. See `mode-line-format'.")
524 ;; files you do want to see, not just TeX stuff. -- fx 524 ;; files you do want to see, not just TeX stuff. -- fx
525 ".toc" ".aux" 525 ".toc" ".aux"
526 ".cp" ".fn" ".ky" ".pg" ".tp" ".vr" 526 ".cp" ".fn" ".ky" ".pg" ".tp" ".vr"
527 ".cps" ".fns" ".kys" ".pgs" ".tps" ".vrs"))) 527 ".cps" ".fns" ".kys" ".pgs" ".tps" ".vrs"
528 ;; Python byte-compiled
529 ".pyc" ".pyo")))
528 530
529;; Suffixes used for executables. 531;; Suffixes used for executables.
530(setq exec-suffixes 532(setq exec-suffixes
diff --git a/lisp/bookmark.el b/lisp/bookmark.el
index 3ed66f229a8..b25c261c1e7 100644
--- a/lisp/bookmark.el
+++ b/lisp/bookmark.el
@@ -376,7 +376,11 @@ That is, all information but the name."
376 (if cell 376 (if cell
377 (setcdr cell filename) 377 (setcdr cell filename)
378 (nconc (bookmark-get-bookmark-record bookmark) 378 (nconc (bookmark-get-bookmark-record bookmark)
379 (list (cons 'filename filename)))))) 379 (list (cons 'filename filename))))
380 (setq bookmark-alist-modification-count
381 (1+ bookmark-alist-modification-count))
382 (if (bookmark-time-to-save-p)
383 (bookmark-save))))
380 384
381 385
382(defun bookmark-get-position (bookmark) 386(defun bookmark-get-position (bookmark)
@@ -1491,6 +1495,7 @@ method buffers use to resolve name collisions."
1491 (define-key bookmark-bmenu-mode-map "m" 'bookmark-bmenu-mark) 1495 (define-key bookmark-bmenu-mode-map "m" 'bookmark-bmenu-mark)
1492 (define-key bookmark-bmenu-mode-map "l" 'bookmark-bmenu-load) 1496 (define-key bookmark-bmenu-mode-map "l" 'bookmark-bmenu-load)
1493 (define-key bookmark-bmenu-mode-map "r" 'bookmark-bmenu-rename) 1497 (define-key bookmark-bmenu-mode-map "r" 'bookmark-bmenu-rename)
1498 (define-key bookmark-bmenu-mode-map "R" 'bookmark-bmenu-relocate)
1494 (define-key bookmark-bmenu-mode-map "t" 'bookmark-bmenu-toggle-filenames) 1499 (define-key bookmark-bmenu-mode-map "t" 'bookmark-bmenu-toggle-filenames)
1495 (define-key bookmark-bmenu-mode-map "a" 'bookmark-bmenu-show-annotation) 1500 (define-key bookmark-bmenu-mode-map "a" 'bookmark-bmenu-show-annotation)
1496 (define-key bookmark-bmenu-mode-map "A" 'bookmark-bmenu-show-all-annotations) 1501 (define-key bookmark-bmenu-mode-map "A" 'bookmark-bmenu-show-all-annotations)
@@ -1589,6 +1594,7 @@ Bookmark names preceded by a \"*\" have annotations.
1589 so the bookmark menu bookmark remains visible in its window. 1594 so the bookmark menu bookmark remains visible in its window.
1590\\[bookmark-bmenu-switch-other-window] -- switch the other window to this bookmark. 1595\\[bookmark-bmenu-switch-other-window] -- switch the other window to this bookmark.
1591\\[bookmark-bmenu-rename] -- rename this bookmark \(prompts for new name\). 1596\\[bookmark-bmenu-rename] -- rename this bookmark \(prompts for new name\).
1597\\[bookmark-bmenu-relocate] -- relocate this bookmark's file \(prompts for new file\).
1592\\[bookmark-bmenu-delete] -- mark this bookmark to be deleted, and move down. 1598\\[bookmark-bmenu-delete] -- mark this bookmark to be deleted, and move down.
1593\\[bookmark-bmenu-delete-backwards] -- mark this bookmark to be deleted, and move up. 1599\\[bookmark-bmenu-delete-backwards] -- mark this bookmark to be deleted, and move up.
1594\\[bookmark-bmenu-execute-deletions] -- delete bookmarks marked with `\\[bookmark-bmenu-delete]'. 1600\\[bookmark-bmenu-execute-deletions] -- delete bookmarks marked with `\\[bookmark-bmenu-delete]'.
@@ -2041,6 +2047,15 @@ To carry out the deletions that you've marked, use \\<bookmark-bmenu-mode-map>\\
2041 (let ((bmrk (bookmark-bmenu-bookmark))) 2047 (let ((bmrk (bookmark-bmenu-bookmark)))
2042 (message (bookmark-location bmrk))))) 2048 (message (bookmark-location bmrk)))))
2043 2049
2050(defun bookmark-bmenu-relocate ()
2051 "Change the file path of the bookmark on the current line,
2052 prompting with completion for the new path."
2053 (interactive)
2054 (if (bookmark-bmenu-check-position)
2055 (let ((bmrk (bookmark-bmenu-bookmark))
2056 (thispoint (point)))
2057 (bookmark-relocate bmrk)
2058 (goto-char thispoint))))
2044 2059
2045 2060
2046;;; Menu bar stuff. Prefix is "bookmark-menu". 2061;;; Menu bar stuff. Prefix is "bookmark-menu".
diff --git a/lisp/cus-face.el b/lisp/cus-face.el
index b5716da161a..0bd3387d3c7 100644
--- a/lisp/cus-face.el
+++ b/lisp/cus-face.el
@@ -40,15 +40,11 @@
40 (unless (facep face) 40 (unless (facep face)
41 ;; If the user has already created the face, respect that. 41 ;; If the user has already created the face, respect that.
42 (let ((value (or (get face 'saved-face) spec)) 42 (let ((value (or (get face 'saved-face) spec))
43 (frames (frame-list)) 43 (have-window-system (memq initial-window-system '(x w32))))
44 (have-window-system (memq initial-window-system '(x w32)))
45 frame)
46 ;; Create global face. 44 ;; Create global face.
47 (make-empty-face face) 45 (make-empty-face face)
48 ;; Create frame-local faces 46 ;; Create frame-local faces
49 (while frames 47 (dolist (frame (frame-list))
50 (setq frame (car frames)
51 frames (cdr frames))
52 (face-spec-set face value frame) 48 (face-spec-set face value frame)
53 (when (memq (window-system frame) '(x w32)) 49 (when (memq (window-system frame) '(x w32))
54 (setq have-window-system t))) 50 (setq have-window-system t)))
diff --git a/lisp/diff-mode.el b/lisp/diff-mode.el
index 9b00eae050d..26ff5441baf 100644
--- a/lisp/diff-mode.el
+++ b/lisp/diff-mode.el
@@ -1248,9 +1248,12 @@ If the prefix arg is bigger than 8 (for example with \\[universal-argument] \\[u
1248(defun diff-current-defun () 1248(defun diff-current-defun ()
1249 "Find the name of function at point. 1249 "Find the name of function at point.
1250For use in `add-log-current-defun-function'." 1250For use in `add-log-current-defun-function'."
1251 (destructuring-bind (buf line-offset pos src dst &optional switched) 1251 (save-excursion
1252 (diff-find-source-location) 1252 (when (looking-at diff-hunk-header-re)
1253 (save-excursion 1253 (forward-line 1)
1254 (while (and (looking-at " ") (not (zerop (forward-line 1))))))
1255 (destructuring-bind (buf line-offset pos src dst &optional switched)
1256 (diff-find-source-location)
1254 (beginning-of-line) 1257 (beginning-of-line)
1255 (or (when (memq (char-after) '(?< ?-)) 1258 (or (when (memq (char-after) '(?< ?-))
1256 ;; Cursor is pointing at removed text. This could be a removed 1259 ;; Cursor is pointing at removed text. This could be a removed
diff --git a/lisp/dired-x.el b/lisp/dired-x.el
index caef06a64fb..6b44b73b170 100644
--- a/lisp/dired-x.el
+++ b/lisp/dired-x.el
@@ -239,7 +239,7 @@ to nil: a pipe using `zcat' or `gunzip -c' will be used."
239;;; KEY BINDINGS. 239;;; KEY BINDINGS.
240 240
241(define-key dired-mode-map "\M-o" 'dired-omit-mode) 241(define-key dired-mode-map "\M-o" 'dired-omit-mode)
242(define-key dired-mode-map "\M-O" 'dired-mark-omitted) 242(define-key dired-mode-map "*O" 'dired-mark-omitted)
243(define-key dired-mode-map "\M-(" 'dired-mark-sexp) 243(define-key dired-mode-map "\M-(" 'dired-mark-sexp)
244(define-key dired-mode-map "*(" 'dired-mark-sexp) 244(define-key dired-mode-map "*(" 'dired-mark-sexp)
245(define-key dired-mode-map "*." 'dired-mark-extension) 245(define-key dired-mode-map "*." 'dired-mark-extension)
diff --git a/lisp/dired.el b/lisp/dired.el
index 3d3fd34b5ac..e5e23dfe2d6 100644
--- a/lisp/dired.el
+++ b/lisp/dired.el
@@ -880,7 +880,8 @@ Must also be called after dired-actual-switches have changed.
880Should not fail even on completely garbaged buffers. 880Should not fail even on completely garbaged buffers.
881Preserves old cursor, marks/flags, hidden-p." 881Preserves old cursor, marks/flags, hidden-p."
882 (widen) ; just in case user narrowed 882 (widen) ; just in case user narrowed
883 (let ((opoint (point)) 883 (let ((modflag (buffer-modified-p))
884 (opoint (point))
884 (ofile (dired-get-filename nil t)) 885 (ofile (dired-get-filename nil t))
885 (mark-alist nil) ; save marked files 886 (mark-alist nil) ; save marked files
886 (hidden-subdirs (dired-remember-hidden)) 887 (hidden-subdirs (dired-remember-hidden))
@@ -907,9 +908,10 @@ Preserves old cursor, marks/flags, hidden-p."
907 (save-excursion ; hide subdirs that were hidden 908 (save-excursion ; hide subdirs that were hidden
908 (dolist (dir hidden-subdirs) 909 (dolist (dir hidden-subdirs)
909 (if (dired-goto-subdir dir) 910 (if (dired-goto-subdir dir)
910 (dired-hide-subdir 1))))) 911 (dired-hide-subdir 1))))
912 (unless modflag (restore-buffer-modified-p nil)))
911 ;; outside of the let scope 913 ;; outside of the let scope
912;;; Might as well not override the user if the user changed this. 914;;; Might as well not override the user if the user changed this.
913;;; (setq buffer-read-only t) 915;;; (setq buffer-read-only t)
914 ) 916 )
915 917
@@ -1707,7 +1709,7 @@ DIR must be a directory name, not a file name."
1707 (setq dir (expand-file-name dir))) 1709 (setq dir (expand-file-name dir)))
1708 (if (string-match (concat "^" (regexp-quote dir)) file) 1710 (if (string-match (concat "^" (regexp-quote dir)) file)
1709 (substring file (match-end 0)) 1711 (substring file (match-end 0))
1710;;; (or no-error 1712;;; (or no-error
1711;;; (error "%s: not in directory tree growing at %s" file dir)) 1713;;; (error "%s: not in directory tree growing at %s" file dir))
1712 file)) 1714 file))
1713 1715
diff --git a/lisp/emacs-lisp/bytecomp.el b/lisp/emacs-lisp/bytecomp.el
index 8e20925c70d..a4ae751cab7 100644
--- a/lisp/emacs-lisp/bytecomp.el
+++ b/lisp/emacs-lisp/bytecomp.el
@@ -1008,11 +1008,11 @@ Each function's symbol gets added to `byte-compile-noruntime-functions'."
1008 (when (nth 2 new))) 1008 (when (nth 2 new)))
1009 (byte-compile-set-symbol-position (car form)) 1009 (byte-compile-set-symbol-position (car form))
1010 (if (memq 'obsolete byte-compile-warnings) 1010 (if (memq 'obsolete byte-compile-warnings)
1011 (byte-compile-warn "%s is an obsolete function%s; %s" (car form) 1011 (byte-compile-warn "`%s' is an obsolete function%s; %s" (car form)
1012 (if when (concat " since " when) "") 1012 (if when (concat " since " when) "")
1013 (if (stringp (car new)) 1013 (if (stringp (car new))
1014 (car new) 1014 (car new)
1015 (format "use %s instead." (car new))))) 1015 (format "use `%s' instead." (car new)))))
1016 (funcall (or handler 'byte-compile-normal-call) form))) 1016 (funcall (or handler 'byte-compile-normal-call) form)))
1017 1017
1018;; Compiler options 1018;; Compiler options
@@ -2076,7 +2076,7 @@ list that represents a doc string reference.
2076(defun byte-compile-file-form-defsubst (form) 2076(defun byte-compile-file-form-defsubst (form)
2077 (when (assq (nth 1 form) byte-compile-unresolved-functions) 2077 (when (assq (nth 1 form) byte-compile-unresolved-functions)
2078 (setq byte-compile-current-form (nth 1 form)) 2078 (setq byte-compile-current-form (nth 1 form))
2079 (byte-compile-warn "defsubst %s was used before it was defined" 2079 (byte-compile-warn "defsubst `%s' was used before it was defined"
2080 (nth 1 form))) 2080 (nth 1 form)))
2081 (byte-compile-file-form 2081 (byte-compile-file-form
2082 (macroexpand form byte-compile-macro-environment)) 2082 (macroexpand form byte-compile-macro-environment))
@@ -2206,7 +2206,7 @@ list that represents a doc string reference.
2206 (not (assq (nth 1 form) 2206 (not (assq (nth 1 form)
2207 byte-compile-initial-macro-environment))) 2207 byte-compile-initial-macro-environment)))
2208 (byte-compile-warn 2208 (byte-compile-warn
2209 "%s defined multiple times, as both function and macro" 2209 "`%s' defined multiple times, as both function and macro"
2210 (nth 1 form))) 2210 (nth 1 form)))
2211 (setcdr that-one nil)) 2211 (setcdr that-one nil))
2212 (this-one 2212 (this-one
@@ -2215,14 +2215,14 @@ list that represents a doc string reference.
2215 ;; byte-compiler macros in byte-run.el... 2215 ;; byte-compiler macros in byte-run.el...
2216 (not (assq (nth 1 form) 2216 (not (assq (nth 1 form)
2217 byte-compile-initial-macro-environment))) 2217 byte-compile-initial-macro-environment)))
2218 (byte-compile-warn "%s %s defined multiple times in this file" 2218 (byte-compile-warn "%s `%s' defined multiple times in this file"
2219 (if macrop "macro" "function") 2219 (if macrop "macro" "function")
2220 (nth 1 form)))) 2220 (nth 1 form))))
2221 ((and (fboundp name) 2221 ((and (fboundp name)
2222 (eq (car-safe (symbol-function name)) 2222 (eq (car-safe (symbol-function name))
2223 (if macrop 'lambda 'macro))) 2223 (if macrop 'lambda 'macro)))
2224 (when (memq 'redefine byte-compile-warnings) 2224 (when (memq 'redefine byte-compile-warnings)
2225 (byte-compile-warn "%s %s being redefined as a %s" 2225 (byte-compile-warn "%s `%s' being redefined as a %s"
2226 (if macrop "function" "macro") 2226 (if macrop "function" "macro")
2227 (nth 1 form) 2227 (nth 1 form)
2228 (if macrop "macro" "function"))) 2228 (if macrop "macro" "function")))
@@ -2695,7 +2695,7 @@ If FORM is a lambda or a macro, byte-compile it as a function."
2695 (handler (get fn 'byte-compile))) 2695 (handler (get fn 'byte-compile)))
2696 (byte-compile-set-symbol-position fn) 2696 (byte-compile-set-symbol-position fn)
2697 (when (byte-compile-const-symbol-p fn) 2697 (when (byte-compile-const-symbol-p fn)
2698 (byte-compile-warn "%s called as a function" fn)) 2698 (byte-compile-warn "`%s' called as a function" fn))
2699 (if (and handler 2699 (if (and handler
2700 (or (not (byte-compile-version-cond 2700 (or (not (byte-compile-version-cond
2701 byte-compile-compatibility)) 2701 byte-compile-compatibility))
@@ -2730,9 +2730,9 @@ If FORM is a lambda or a macro, byte-compile it as a function."
2730 (if (or (not (symbolp var)) 2730 (if (or (not (symbolp var))
2731 (byte-compile-const-symbol-p var (not (eq base-op 'byte-varref)))) 2731 (byte-compile-const-symbol-p var (not (eq base-op 'byte-varref))))
2732 (byte-compile-warn 2732 (byte-compile-warn
2733 (cond ((eq base-op 'byte-varbind) "attempt to let-bind %s %s") 2733 (cond ((eq base-op 'byte-varbind) "attempt to let-bind %s `%s'")
2734 ((eq base-op 'byte-varset) "variable assignment to %s %s") 2734 ((eq base-op 'byte-varset) "variable assignment to %s `%s'")
2735 (t "variable reference to %s %s")) 2735 (t "variable reference to %s `%s'"))
2736 (if (symbolp var) "constant" "nonvariable") 2736 (if (symbolp var) "constant" "nonvariable")
2737 (prin1-to-string var)) 2737 (prin1-to-string var))
2738 (if (and (get var 'byte-obsolete-variable) 2738 (if (and (get var 'byte-obsolete-variable)
@@ -2740,11 +2740,11 @@ If FORM is a lambda or a macro, byte-compile it as a function."
2740 (not (eq var byte-compile-not-obsolete-var))) 2740 (not (eq var byte-compile-not-obsolete-var)))
2741 (let* ((ob (get var 'byte-obsolete-variable)) 2741 (let* ((ob (get var 'byte-obsolete-variable))
2742 (when (cdr ob))) 2742 (when (cdr ob)))
2743 (byte-compile-warn "%s is an obsolete variable%s; %s" var 2743 (byte-compile-warn "`%s' is an obsolete variable%s; %s" var
2744 (if when (concat " since " when) "") 2744 (if when (concat " since " when) "")
2745 (if (stringp (car ob)) 2745 (if (stringp (car ob))
2746 (car ob) 2746 (car ob)
2747 (format "use %s instead." (car ob)))))) 2747 (format "use `%s' instead." (car ob))))))
2748 (if (memq 'free-vars byte-compile-warnings) 2748 (if (memq 'free-vars byte-compile-warnings)
2749 (if (eq base-op 'byte-varbind) 2749 (if (eq base-op 'byte-varbind)
2750 (push var byte-compile-bound-variables) 2750 (push var byte-compile-bound-variables)
@@ -2753,11 +2753,11 @@ If FORM is a lambda or a macro, byte-compile it as a function."
2753 (if (eq base-op 'byte-varset) 2753 (if (eq base-op 'byte-varset)
2754 (or (memq var byte-compile-free-assignments) 2754 (or (memq var byte-compile-free-assignments)
2755 (progn 2755 (progn
2756 (byte-compile-warn "assignment to free variable %s" var) 2756 (byte-compile-warn "assignment to free variable `%s'" var)
2757 (push var byte-compile-free-assignments))) 2757 (push var byte-compile-free-assignments)))
2758 (or (memq var byte-compile-free-references) 2758 (or (memq var byte-compile-free-references)
2759 (progn 2759 (progn
2760 (byte-compile-warn "reference to free variable %s" var) 2760 (byte-compile-warn "reference to free variable `%s'" var)
2761 (push var byte-compile-free-references)))))))) 2761 (push var byte-compile-free-references))))))))
2762 (let ((tmp (assq var byte-compile-variables))) 2762 (let ((tmp (assq var byte-compile-variables)))
2763 (unless tmp 2763 (unless tmp
@@ -2958,7 +2958,7 @@ If FORM is a lambda or a macro, byte-compile it as a function."
2958 2958
2959(defun byte-compile-subr-wrong-args (form n) 2959(defun byte-compile-subr-wrong-args (form n)
2960 (byte-compile-set-symbol-position (car form)) 2960 (byte-compile-set-symbol-position (car form))
2961 (byte-compile-warn "%s called with %d arg%s, but requires %s" 2961 (byte-compile-warn "`%s' called with %d arg%s, but requires %s"
2962 (car form) (length (cdr form)) 2962 (car form) (length (cdr form))
2963 (if (= 1 (length (cdr form))) "" "s") n) 2963 (if (= 1 (length (cdr form))) "" "s") n)
2964 ;; get run-time wrong-number-of-args error. 2964 ;; get run-time wrong-number-of-args error.
@@ -3124,7 +3124,7 @@ If FORM is a lambda or a macro, byte-compile it as a function."
3124 (if (and (consp (car body)) 3124 (if (and (consp (car body))
3125 (not (eq 'byte-code (car (car body))))) 3125 (not (eq 'byte-code (car (car body)))))
3126 (byte-compile-warn 3126 (byte-compile-warn
3127 "A quoted lambda form is the second argument of fset. This is probably 3127 "A quoted lambda form is the second argument of `fset'. This is probably
3128 not what you want, as that lambda cannot be compiled. Consider using 3128 not what you want, as that lambda cannot be compiled. Consider using
3129 the syntax (function (lambda (...) ...)) instead."))))) 3129 the syntax (function (lambda (...) ...)) instead.")))))
3130 (byte-compile-two-args form)) 3130 (byte-compile-two-args form))
@@ -3507,7 +3507,7 @@ being undefined will be suppressed."
3507 (byte-compile-set-symbol-position 'condition-case) 3507 (byte-compile-set-symbol-position 'condition-case)
3508 (unless (symbolp var) 3508 (unless (symbolp var)
3509 (byte-compile-warn 3509 (byte-compile-warn
3510 "%s is not a variable-name or nil (in condition-case)" var)) 3510 "`%s' is not a variable-name or nil (in condition-case)" var))
3511 (byte-compile-push-constant var) 3511 (byte-compile-push-constant var)
3512 (byte-compile-push-constant (byte-compile-top-level 3512 (byte-compile-push-constant (byte-compile-top-level
3513 (nth 2 form) for-effect)) 3513 (nth 2 form) for-effect))
@@ -3525,13 +3525,13 @@ being undefined will be suppressed."
3525 (setq syms (cdr syms))) 3525 (setq syms (cdr syms)))
3526 ok)))) 3526 ok))))
3527 (byte-compile-warn 3527 (byte-compile-warn
3528 "%s is not a condition name or list of such (in condition-case)" 3528 "`%s' is not a condition name or list of such (in condition-case)"
3529 (prin1-to-string condition))) 3529 (prin1-to-string condition)))
3530;; ((not (or (eq condition 't) 3530;; ((not (or (eq condition 't)
3531;; (and (stringp (get condition 'error-message)) 3531;; (and (stringp (get condition 'error-message))
3532;; (consp (get condition 'error-conditions))))) 3532;; (consp (get condition 'error-conditions)))))
3533;; (byte-compile-warn 3533;; (byte-compile-warn
3534;; "%s is not a known condition name (in condition-case)" 3534;; "`%s' is not a known condition name (in condition-case)"
3535;; condition)) 3535;; condition))
3536 ) 3536 )
3537 (setq compiled-clauses 3537 (setq compiled-clauses
@@ -3627,7 +3627,7 @@ being undefined will be suppressed."
3627 (and (eq fun 'defconst) (null (cddr form)))) 3627 (and (eq fun 'defconst) (null (cddr form))))
3628 (let ((ncall (length (cdr form)))) 3628 (let ((ncall (length (cdr form))))
3629 (byte-compile-warn 3629 (byte-compile-warn
3630 "%s called with %d argument%s, but %s %s" 3630 "`%s' called with %d argument%s, but %s %s"
3631 fun ncall 3631 fun ncall
3632 (if (= 1 ncall) "" "s") 3632 (if (= 1 ncall) "" "s")
3633 (if (< ncall 2) "requires" "accepts only") 3633 (if (< ncall 2) "requires" "accepts only")
@@ -3644,7 +3644,7 @@ being undefined will be suppressed."
3644 `(push ',var current-load-list)) 3644 `(push ',var current-load-list))
3645 (when (> (length form) 3) 3645 (when (> (length form) 3)
3646 (when (and string (not (stringp string))) 3646 (when (and string (not (stringp string)))
3647 (byte-compile-warn "third arg to %s %s is not a string: %s" 3647 (byte-compile-warn "third arg to `%s %s' is not a string: %s"
3648 fun var string)) 3648 fun var string))
3649 `(put ',var 'variable-documentation ,string)) 3649 `(put ',var 'variable-documentation ,string))
3650 (if (cddr form) ; `value' provided 3650 (if (cddr form) ; `value' provided
diff --git a/lisp/emulation/pc-select.el b/lisp/emulation/pc-select.el
index c7ea973467f..188e335687c 100644
--- a/lisp/emulation/pc-select.el
+++ b/lisp/emulation/pc-select.el
@@ -61,7 +61,7 @@
61;; Eli Barzilay (eli@cs.bgu.ac.il) suggested the sexps functions and 61;; Eli Barzilay (eli@cs.bgu.ac.il) suggested the sexps functions and
62;; keybindings. 62;; keybindings.
63;; 63;;
64;; Ok, some details about the idea of pc-selection-mode: 64;; Ok, some details about the idea of PC Selection mode:
65;; 65;;
66;; o The standard keys for moving around (right, left, up, down, home, end, 66;; o The standard keys for moving around (right, left, up, down, home, end,
67;; prior, next, called "move-keys" from now on) will always de-activate 67;; prior, next, called "move-keys" from now on) will always de-activate
@@ -114,23 +114,23 @@ This gives mostly Emacs-like behaviour with only the selection keys enabled."
114 :group 'pc-select) 114 :group 'pc-select)
115 115
116(defvar pc-select-saved-settings-alist nil 116(defvar pc-select-saved-settings-alist nil
117 "The values of the variables before `pc-selection-mode' was toggled on. 117 "The values of the variables before PC Selection mode was toggled on.
118When `pc-selection-mode' is toggled on, it sets quite a few variables 118When PC Selection mode is toggled on, it sets quite a few variables
119for its own purposes. This alist holds the original values of the 119for its own purposes. This alist holds the original values of the
120variables `pc-selection-mode' had set, so that these variables can be 120variables PC Selection mode had set, so that these variables can be
121restored to their original values when `pc-selection-mode' is toggled off.") 121restored to their original values when PC Selection mode is toggled off.")
122 122
123(defvar pc-select-map nil 123(defvar pc-select-map nil
124 "The keymap used as the global map when `pc-selection-mode' is on." ) 124 "The keymap used as the global map when PC Selection mode is on." )
125 125
126(defvar pc-select-saved-global-map nil 126(defvar pc-select-saved-global-map nil
127 "The global map that was in effect when `pc-selection-mode' was toggled on.") 127 "The global map that was in effect when PC Selection mode was toggled on.")
128 128
129(defvar pc-select-key-bindings-alist nil 129(defvar pc-select-key-bindings-alist nil
130 "This alist holds all the key bindings `pc-selection-mode' sets.") 130 "This alist holds all the key bindings PC Selection mode sets.")
131 131
132(defvar pc-select-default-key-bindings nil 132(defvar pc-select-default-key-bindings nil
133 "These key bindings always get set by `pc-selection-mode'.") 133 "These key bindings always get set by PC Selection mode.")
134 134
135(unless pc-select-default-key-bindings 135(unless pc-select-default-key-bindings
136 (let ((lst 136 (let ((lst
@@ -250,7 +250,7 @@ These key bindings get installed when running in a tty, but only if
250(defvar pc-select-old-M-delete-binding nil 250(defvar pc-select-old-M-delete-binding nil
251 "Holds the old mapping of [M-delete] in the `function-key-map'. 251 "Holds the old mapping of [M-delete] in the `function-key-map'.
252This variable holds the value associated with [M-delete] in the 252This variable holds the value associated with [M-delete] in the
253`function-key-map' before `pc-selection-mode' had changed that 253`function-key-map' before PC Selection mode had changed that
254association.") 254association.")
255 255
256;;;; 256;;;;
@@ -842,7 +842,7 @@ M-LEFT and M-RIGHT move back or forward one word or sexp, disabling the mark.
842S-M-LEFT and S-M-RIGHT move back or forward one word or sexp, leaving the mark 842S-M-LEFT and S-M-RIGHT move back or forward one word or sexp, leaving the mark
843behind. To control whether these keys move word-wise or sexp-wise set the 843behind. To control whether these keys move word-wise or sexp-wise set the
844variable `pc-select-meta-moves-sexps' after loading pc-select.el but before 844variable `pc-select-meta-moves-sexps' after loading pc-select.el but before
845turning `pc-selection-mode' on. 845turning PC Selection mode on.
846 846
847C-DOWN and C-UP move back or forward a paragraph, disabling the mark. 847C-DOWN and C-UP move back or forward a paragraph, disabling the mark.
848S-C-DOWN and S-C-UP move back or forward a paragraph, leaving the mark behind. 848S-C-DOWN and S-C-UP move back or forward a paragraph, leaving the mark behind.
@@ -864,7 +864,7 @@ C-INSERT copies the region into the kill ring (`copy-region-as-kill').
864 864
865In addition, certain other PC bindings are imitated (to avoid this, set 865In addition, certain other PC bindings are imitated (to avoid this, set
866the variable `pc-select-selection-keys-only' to t after loading pc-select.el 866the variable `pc-select-selection-keys-only' to t after loading pc-select.el
867but before calling `pc-selection-mode'): 867but before calling PC Selection mode):
868 868
869 F6 other-window 869 F6 other-window
870 DELETE delete-char 870 DELETE delete-char
@@ -974,7 +974,8 @@ but before calling `pc-selection-mode'):
974Change mark behaviour to emulate Motif, MAC or MS-Windows cut and paste style, 974Change mark behaviour to emulate Motif, MAC or MS-Windows cut and paste style,
975and cursor movement commands. 975and cursor movement commands.
976This mode enables Delete Selection mode and Transient Mark mode. 976This mode enables Delete Selection mode and Transient Mark mode.
977You must modify via \\[customize] for this variable to have an effect." 977Setting this variable directly does not take effect;
978you must modify it using \\[customize] or \\[pc-selection-mode]."
978 :set (lambda (symbol value) 979 :set (lambda (symbol value)
979 (pc-selection-mode (if value 1 -1))) 980 (pc-selection-mode (if value 1 -1)))
980 :initialize 'custom-initialize-default 981 :initialize 'custom-initialize-default
diff --git a/lisp/eshell/.arch-inventory b/lisp/eshell/.arch-inventory
new file mode 100644
index 00000000000..b5d82cdd6fc
--- /dev/null
+++ b/lisp/eshell/.arch-inventory
@@ -0,0 +1,4 @@
1# Generated files
2precious ^(esh-groups)\.el$
3
4# arch-tag: 8dc7bfaa-6ca6-4be0-915a-1e539c3dabfb
diff --git a/lisp/files.el b/lisp/files.el
index 4ee6da4e544..dc84c79df84 100644
--- a/lisp/files.el
+++ b/lisp/files.el
@@ -3024,7 +3024,7 @@ the last real save, but optional arg FORCE non-nil means delete anyway."
3024 3024
3025(defcustom before-save-hook nil 3025(defcustom before-save-hook nil
3026 "Normal hook that is run before a buffer is saved to its file." 3026 "Normal hook that is run before a buffer is saved to its file."
3027 :options '(copyright-update) 3027 :options '(copyright-update time-stamp)
3028 :type 'hook 3028 :type 'hook
3029 :group 'files) 3029 :group 'files)
3030 3030
diff --git a/lisp/hexl.el b/lisp/hexl.el
index cc36c37602e..883700933a8 100644
--- a/lisp/hexl.el
+++ b/lisp/hexl.el
@@ -217,7 +217,9 @@ You can use \\[hexl-find-file] to visit a file in Hexl mode.
217 (set-buffer-modified-p modified)) 217 (set-buffer-modified-p modified))
218 (make-local-variable 'hexl-max-address) 218 (make-local-variable 'hexl-max-address)
219 (setq hexl-max-address max-address) 219 (setq hexl-max-address max-address)
220 (hexl-goto-address original-point)) 220 (condition-case nil
221 (hexl-goto-address original-point)
222 (error nil)))
221 223
222 ;; We do not turn off the old major mode; instead we just 224 ;; We do not turn off the old major mode; instead we just
223 ;; override most of it. That way, we can restore it perfectly. 225 ;; override most of it. That way, we can restore it perfectly.
@@ -405,7 +407,7 @@ This function is indented to be used as eldoc callback."
405Signal error if ADDRESS out of range." 407Signal error if ADDRESS out of range."
406 (interactive "nAddress: ") 408 (interactive "nAddress: ")
407 (if (or (< address 0) (> address hexl-max-address)) 409 (if (or (< address 0) (> address hexl-max-address))
408 (error "Out of hexl region")) 410 (error "Out of hexl region"))
409 (goto-char (hexl-address-to-marker address))) 411 (goto-char (hexl-address-to-marker address)))
410 412
411(defun hexl-goto-hex-address (hex-address) 413(defun hexl-goto-hex-address (hex-address)
diff --git a/lisp/info.el b/lisp/info.el
index 14183383743..43e1dafcc6f 100644
--- a/lisp/info.el
+++ b/lisp/info.el
@@ -469,7 +469,8 @@ with the top-level Info directory.
469 469
470In interactive use, a non-numeric prefix argument directs 470In interactive use, a non-numeric prefix argument directs
471this command to read a file name from the minibuffer. 471this command to read a file name from the minibuffer.
472A numeric prefix argument appends the number to the buffer name. 472A numeric prefix argument selects an Info buffer with the prefix number
473appended to the Info buffer name.
473 474
474The search path for Info files is in the variable `Info-directory-list'. 475The search path for Info files is in the variable `Info-directory-list'.
475The top-level Info directory is made by combining all the files named `dir' 476The top-level Info directory is made by combining all the files named `dir'
@@ -1315,6 +1316,7 @@ any double quotes or backslashes must be escaped (\\\",\\\\)."
1315;; Go to an info node specified with a filename-and-nodename string 1316;; Go to an info node specified with a filename-and-nodename string
1316;; of the sort that is found in pointers in nodes. 1317;; of the sort that is found in pointers in nodes.
1317 1318
1319;;;###autoload
1318(defun Info-goto-node (nodename &optional fork) 1320(defun Info-goto-node (nodename &optional fork)
1319 "Go to info node named NODENAME. Give just NODENAME or (FILENAME)NODENAME. 1321 "Go to info node named NODENAME. Give just NODENAME or (FILENAME)NODENAME.
1320If NODENAME is of the form (FILENAME)NODENAME, the node is in the Info file 1322If NODENAME is of the form (FILENAME)NODENAME, the node is in the Info file
@@ -1672,7 +1674,8 @@ If SAME-FILE is non-nil, do not move to a different Info file."
1672 (goto-char (or p (point-min))))) 1674 (goto-char (or p (point-min)))))
1673 1675
1674(defun Info-toc () 1676(defun Info-toc ()
1675 "Go to a node with table of contents of the current Info file." 1677 "Go to a node with table of contents of the current Info file.
1678Table of contents is created from the tree structure of menus."
1676 (interactive) 1679 (interactive)
1677 (let ((curr-file Info-current-file) 1680 (let ((curr-file Info-current-file)
1678 (curr-node Info-current-node) 1681 (curr-node Info-current-node)
@@ -1687,7 +1690,7 @@ If SAME-FILE is non-nil, do not move to a different Info file."
1687 (insert "*Note Top::\n") 1690 (insert "*Note Top::\n")
1688 (Info-insert-toc 1691 (Info-insert-toc
1689 (nth 2 (assoc "Top" node-list)) ; get Top nodes 1692 (nth 2 (assoc "Top" node-list)) ; get Top nodes
1690 node-list 0 curr-file)) 1693 node-list 0 (substring-no-properties curr-file)))
1691 (if (not (bobp)) 1694 (if (not (bobp))
1692 (let ((Info-hide-note-references 'hide) 1695 (let ((Info-hide-note-references 'hide)
1693 (Info-fontify-visited-nodes nil)) 1696 (Info-fontify-visited-nodes nil))
@@ -2786,6 +2789,7 @@ if point is in a menu item description, follow that menu item."
2786 (define-key Info-mode-map "h" 'Info-help) 2789 (define-key Info-mode-map "h" 'Info-help)
2787 (define-key Info-mode-map "i" 'Info-index) 2790 (define-key Info-mode-map "i" 'Info-index)
2788 (define-key Info-mode-map "l" 'Info-last) 2791 (define-key Info-mode-map "l" 'Info-last)
2792 (define-key Info-mode-map "L" 'Info-history)
2789 (define-key Info-mode-map "m" 'Info-menu) 2793 (define-key Info-mode-map "m" 'Info-menu)
2790 (define-key Info-mode-map "n" 'Info-next) 2794 (define-key Info-mode-map "n" 'Info-next)
2791 (define-key Info-mode-map "p" 'Info-prev) 2795 (define-key Info-mode-map "p" 'Info-prev)
@@ -2796,6 +2800,7 @@ if point is in a menu item description, follow that menu item."
2796 (define-key Info-mode-map "\M-s" 'Info-search) 2800 (define-key Info-mode-map "\M-s" 'Info-search)
2797 (define-key Info-mode-map "\M-n" 'clone-buffer) 2801 (define-key Info-mode-map "\M-n" 'clone-buffer)
2798 (define-key Info-mode-map "t" 'Info-top-node) 2802 (define-key Info-mode-map "t" 'Info-top-node)
2803 (define-key Info-mode-map "T" 'Info-toc)
2799 (define-key Info-mode-map "u" 'Info-up) 2804 (define-key Info-mode-map "u" 'Info-up)
2800 ;; For consistency with dired-copy-filename-as-kill. 2805 ;; For consistency with dired-copy-filename-as-kill.
2801 (define-key Info-mode-map "w" 'Info-copy-current-node-name) 2806 (define-key Info-mode-map "w" 'Info-copy-current-node-name)
@@ -2843,9 +2848,9 @@ if point is in a menu item description, follow that menu item."
2843 ["Last" Info-last :active Info-history 2848 ["Last" Info-last :active Info-history
2844 :help "Go to the last node you were at"] 2849 :help "Go to the last node you were at"]
2845 ["History" Info-history :active Info-history-list 2850 ["History" Info-history :active Info-history-list
2846 :help "Go to the history buffer"] 2851 :help "Go to menu of visited nodes"]
2847 ["Table of Contents" Info-toc 2852 ["Table of Contents" Info-toc
2848 :help "Go to the buffer with a table of contents"] 2853 :help "Go to table of contents"]
2849 ("Index..." 2854 ("Index..."
2850 ["Lookup a String" Info-index 2855 ["Lookup a String" Info-index
2851 :help "Look for a string in the index items"] 2856 :help "Look for a string in the index items"]
@@ -2990,15 +2995,15 @@ Selecting other nodes:
2990\\[Info-directory] Go to the Info directory node. 2995\\[Info-directory] Go to the Info directory node.
2991\\[Info-follow-reference] Follow a cross reference. Reads name of reference. 2996\\[Info-follow-reference] Follow a cross reference. Reads name of reference.
2992\\[Info-last] Move to the last node you were at. 2997\\[Info-last] Move to the last node you were at.
2993\\[Info-history] Go to the history buffer. 2998\\[Info-history] Go to menu of visited nodes.
2994\\[Info-toc] Go to the buffer with a table of contents. 2999\\[Info-toc] Go to table of contents of the current Info file.
2995\\[Info-index] Look up a topic in this file's Index and move to that node.
2996\\[Info-index-next] (comma) Move to the next match from a previous \\<Info-mode-map>\\[Info-index] command.
2997\\[info-apropos] Look for a string in the indices of all manuals.
2998\\[Info-top-node] Go to the Top node of this file. 3000\\[Info-top-node] Go to the Top node of this file.
2999\\[Info-final-node] Go to the final node in this file. 3001\\[Info-final-node] Go to the final node in this file.
3000\\[Info-backward-node] Go backward one node, considering all nodes as forming one sequence. 3002\\[Info-backward-node] Go backward one node, considering all nodes as forming one sequence.
3001\\[Info-forward-node] Go forward one node, considering all nodes as forming one sequence. 3003\\[Info-forward-node] Go forward one node, considering all nodes as forming one sequence.
3004\\[Info-index] Look up a topic in this file's Index and move to that node.
3005\\[Info-index-next] (comma) Move to the next match from a previous \\<Info-mode-map>\\[Info-index] command.
3006\\[info-apropos] Look for a string in the indices of all manuals.
3002 3007
3003Moving within a node: 3008Moving within a node:
3004\\[Info-scroll-up] Normally, scroll forward a full screen. 3009\\[Info-scroll-up] Normally, scroll forward a full screen.
@@ -3015,15 +3020,15 @@ Advanced commands:
3015\\[Info-copy-current-node-name] Put name of current info node in the kill ring. 3020\\[Info-copy-current-node-name] Put name of current info node in the kill ring.
3016\\[clone-buffer] Select a new cloned Info buffer in another window. 3021\\[clone-buffer] Select a new cloned Info buffer in another window.
3017\\[Info-edit] Edit contents of selected node. 3022\\[Info-edit] Edit contents of selected node.
30181 Pick first item in node's menu. 30231 .. 9 Pick first ... ninth item in node's menu.
30192, 3, 4, 5 Pick second ... fifth item in node's menu. 3024 Every third `*' is highlighted to help pick the right number.
3020\\[Info-goto-node] Move to node specified by name. 3025\\[Info-goto-node] Move to node specified by name.
3021 You may include a filename as well, as (FILENAME)NODENAME. 3026 You may include a filename as well, as (FILENAME)NODENAME.
3022\\[universal-argument] \\[info] Move to new Info file with completion. 3027\\[universal-argument] \\[info] Move to new Info file with completion.
3028\\[universal-argument] N \\[info] Select Info buffer with prefix number in the name *info*<N>.
3023\\[Info-search] Search through this Info file for specified regexp, 3029\\[Info-search] Search through this Info file for specified regexp,
3024 and select the node in which the next occurrence is found. 3030 and select the node in which the next occurrence is found.
3025\\[Info-search-case-sensitively] Search through this Info file 3031\\[Info-search-case-sensitively] Search through this Info file for specified regexp case-sensitively.
3026 for specified regexp case-sensitively.
3027\\[Info-search-next] Search for another occurrence of regexp 3032\\[Info-search-next] Search for another occurrence of regexp
3028 from a previous \\<Info-mode-map>\\[Info-search] command. 3033 from a previous \\<Info-mode-map>\\[Info-search] command.
3029\\[Info-next-reference] Move cursor to next cross-reference or menu item. 3034\\[Info-next-reference] Move cursor to next cross-reference or menu item.
diff --git a/lisp/international/ccl.el b/lisp/international/ccl.el
index e9e46bb0c6c..e55f1fbcf36 100644
--- a/lisp/international/ccl.el
+++ b/lisp/international/ccl.el
@@ -1120,7 +1120,8 @@
1120 (insert (format "write r%d (%d remaining)\n" rrr cc))) 1120 (insert (format "write r%d (%d remaining)\n" rrr cc)))
1121 1121
1122(defun ccl-dump-call (ignore cc) 1122(defun ccl-dump-call (ignore cc)
1123 (insert (format "call subroutine #%d\n" cc))) 1123 (let ((subroutine (car (ccl-get-next-code))))
1124 (insert (format "call subroutine `%s'\n" subroutine))))
1124 1125
1125(defun ccl-dump-write-const-string (rrr cc) 1126(defun ccl-dump-write-const-string (rrr cc)
1126 (if (= rrr 0) 1127 (if (= rrr 0)
diff --git a/lisp/international/characters.el b/lisp/international/characters.el
index 809c457c6e1..c33bd2eb43f 100644
--- a/lisp/international/characters.el
+++ b/lisp/international/characters.el
@@ -33,6 +33,11 @@
33 33
34;;; Code: 34;;; Code:
35 35
36;; We must set utf-translate-cjk-mode to nil while loading this file
37;; to avoid translating CJK characters in decode-char.
38(defvar saved-utf-translate-cjk-mode utf-translate-cjk-mode)
39(setq utf-translate-cjk-mode nil)
40
36;;; Predefined categories. 41;;; Predefined categories.
37 42
38;; For each character set. 43;; For each character set.
@@ -1277,6 +1282,10 @@
1277 (put-charset-property (car l) 'nospace-between-words t) 1282 (put-charset-property (car l) 'nospace-between-words t)
1278 (setq l (cdr l)))) 1283 (setq l (cdr l))))
1279 1284
1285
1286(setq utf-translate-cjk-mode saved-utf-translate-cjk-mode)
1287(makunbound 'saved-utf-translate-cjk-mode)
1288
1280;;; Local Variables: 1289;;; Local Variables:
1281;;; coding: iso-2022-7bit 1290;;; coding: iso-2022-7bit
1282;;; End: 1291;;; End:
diff --git a/lisp/international/code-pages.el b/lisp/international/code-pages.el
index 64c430f9383..0f8cdbf5713 100644
--- a/lisp/international/code-pages.el
+++ b/lisp/international/code-pages.el
@@ -2970,22 +2970,22 @@ Return an updated `non-iso-charset-alist'."
2970(cp-make-coding-system 2970(cp-make-coding-system
2971 windows-1256 2971 windows-1256
2972 [?\€ 2972 [?\€
2973 ?\Ù  2973 ?\Ù¾
2974 ?\‚ 2974 ?\‚
2975 ?\Ù¡ 2975 ?\Æ’
2976 ?\„ 2976 ?\„
2977 ?\… 2977 ?\…
2978 ?\† 2978 ?\†
2979 ?\‡ 2979 ?\‡
2980 ?\٢ 2980 ?\ˆ
2981 ?\٣ 2981 ?\‰
2982 ?\Ù¤ 2982 ?\Ù¹
2983 ?\‹ 2983 ?\‹
2984 ?\Ù¥ 2984 ?\Å’
2985 ?\Ù¦ 2985 ?\Ú†
2986 ?\Ù§ 2986 ?\Ú˜
2987 ?\Ù¨ 2987 ?\Úˆ
2988 ?\Ù© 2988 ?\Ú¯
2989 ?\‘ 2989 ?\‘
2990 ?\’ 2990 ?\’
2991 ?\“ 2991 ?\“
@@ -2993,110 +2993,110 @@ Return an updated `non-iso-charset-alist'."
2993 ?\• 2993 ?\•
2994 ?\– 2994 ?\–
2995 ?\— 2995 ?\—
2996 ?\Ø› 2996 ?\Ú©
2997 ?\â„¢ 2997 ?\â„¢
2998 ?\ØŸ 2998 ?\Ú‘
2999 ?\› 2999 ?\›
3000 ?\Ø¡ 3000 ?\Å“
3001 ?\آ 3001 ?\‌
3002 ?\Ø£ 3002 ?\â€
3003 ?\Ÿ 3003 ?\ں
3004 ?\  3004 ?\ 
3005 ?\ؤ 3005 ?\،
3006 ?\إ 3006 ?\¢
3007 ?\£ 3007 ?\£
3008 ?\¤ 3008 ?\¤
3009 ?\ئ 3009 ?\¥
3010 ?\¦ 3010 ?\¦
3011 ?\§ 3011 ?\§
3012 ?\ا 3012 ?\¨
3013 ?\© 3013 ?\©
3014 ?\ب 3014 ?\ھ
3015 ?\« 3015 ?\«
3016 ?\¬ 3016 ?\¬
3017 ?\­ 3017 ?\­
3018 ?\® 3018 ?\®
3019 ?\پ 3019 ?\¯
3020 ?\° 3020 ?\°
3021 ?\± 3021 ?\±
3022 ?\ة 3022 ?\²
3023 ?\ت 3023 ?\³
3024 ?\ث 3024 ?\´
3025 ?\µ 3025 ?\µ
3026 ?\¶ 3026 ?\¶
3027 ?\· 3027 ?\·
3028 ?\¸
3029 ?\¹
3030 ?\Ø›
3031 ?\»
3032 ?\¼
3033 ?\½
3034 ?\¾
3035 ?\ØŸ
3036 ?\Û
3037 ?\Ø¡
3038 ?\Ø¢
3039 ?\Ø£
3040 ?\ؤ
3041 ?\Ø¥
3042 ?\ئ
3043 ?\ا
3044 ?\ب
3045 ?\Ø©
3046 ?\ت
3047 ?\Ø«
3028 ?\ج 3048 ?\ج
3029 ?\Ú†
3030 ?\Ø­ 3049 ?\Ø­
3031 ?\»
3032 ?\Ø® 3050 ?\Ø®
3033 ?\د 3051 ?\د
3034 ?\ذ 3052 ?\ذ
3035 ?\ر 3053 ?\ر
3036 ?\À
3037 ?\ز 3054 ?\ز
3038 ?\Â
3039 ?\Ú˜
3040 ?\س 3055 ?\س
3041 ?\Ø´ 3056 ?\Ø´
3042 ?\ص 3057 ?\ص
3043 ?\Ç
3044 ?\È
3045 ?\É
3046 ?\Ê
3047 ?\Ë
3048 ?\ض 3058 ?\ض
3059 ?\×
3049 ?\Ø· 3060 ?\Ø·
3050 ?\Î 3061 ?\ظ
3051 ?\Ã
3052 ?\ã„“
3053 ?\ع 3062 ?\ع
3054 ?\غ 3063 ?\غ
3055 ?\Ù€ 3064 ?\Ù€
3056 ?\Ô
3057 ?\Ù 3065 ?\Ù
3058 ?\Ù‚ 3066 ?\Ù‚
3059 ?\×
3060 ?\Ùƒ 3067 ?\Ùƒ
3061 ?\Ù 3068 ?\à
3062 ?\Ú¯
3063 ?\Û
3064 ?\Ü
3065 ?\Ù„ 3069 ?\Ù„
3070 ?\â
3066 ?\Ù… 3071 ?\Ù…
3067 ?\Ù† 3072 ?\Ù†
3068 ?\à
3069 ?\Ù‡ 3073 ?\Ù‡
3070 ?\â
3071 ?\Ú
3072 ?\Ùˆ 3074 ?\Ùˆ
3073 ?\Ù‰
3074 ?\ÙŠ
3075 ?\ç 3075 ?\ç
3076 ?\è 3076 ?\è
3077 ?\é 3077 ?\é
3078 ?\ê 3078 ?\ê
3079 ?\ë 3079 ?\ë
3080 ?\Ù 3080 ?\Ù
3081 ?\ٌ 3081 ?\ي
3082 ?\î 3082 ?\î
3083 ?\ï 3083 ?\ï
3084 ?\Ù‹
3085 ?\ٌ
3084 ?\Ù 3086 ?\Ù
3085 ?\ÙŽ 3087 ?\ÙŽ
3088 ?\ô
3086 ?\Ù 3089 ?\Ù
3087 ?\Ù 3090 ?\Ù
3088 ?\ô
3089 ?\Ù‘
3090 ?\Ù’
3091 ?\÷ 3091 ?\÷
3092 nil 3092 ?\Ù‘
3093 ?\ù 3093 ?\ù
3094 nil 3094 ?\Ù’
3095 ?\û 3095 ?\û
3096 ?\ü 3096 ?\ü
3097 ?\‎ 3097 ?\‎
3098 ?\†3098 ?\â€
3099 ?\ÿ] 3099 ?\ے]
3100 nil ?a) ;; Arabic 3100 nil ?a) ;; Arabic
3101 3101
3102(cp-make-coding-system 3102(cp-make-coding-system
@@ -4430,11 +4430,11 @@ Return an updated `non-iso-charset-alist'."
4430 ?\Ñ– 4430 ?\Ñ–
4431 ?\Ї 4431 ?\Ї
4432 ?\Ñ— 4432 ?\Ñ—
4433 ?\÷ 4433 ?\·
4434 ?\± 4434 ?\√
4435 ?\â„– 4435 ?\â„–
4436 ?\¤ 4436 ?\¤
4437 ?\ï¿­ 4437 ?\â– 
4438 ?\ ]) 4438 ?\ ])
4439(define-coding-system-alias 'ruscii 'cp1125) 4439(define-coding-system-alias 'ruscii 'cp1125)
4440;; Original name for cp1125, says Serhii Hlodin <hlodin@lutsk.bank.gov.ua> 4440;; Original name for cp1125, says Serhii Hlodin <hlodin@lutsk.bank.gov.ua>
diff --git a/lisp/international/mule-cmds.el b/lisp/international/mule-cmds.el
index c9c462f028e..95177fdb954 100644
--- a/lisp/international/mule-cmds.el
+++ b/lisp/international/mule-cmds.el
@@ -1834,6 +1834,15 @@ specifies the character set for the major languages of Western Europe."
1834 (let ((func (get-language-info language-name 'setup-function))) 1834 (let ((func (get-language-info language-name 'setup-function)))
1835 (if (functionp func) 1835 (if (functionp func)
1836 (funcall func))) 1836 (funcall func)))
1837 (if (and utf-translate-cjk-mode
1838 utf-translate-cjk-lang-env
1839 (not (eq utf-translate-cjk-lang-env language-name))
1840 (catch 'tag
1841 (dolist (charset (get-language-info language-name 'charset))
1842 (if (memq charset utf-translate-cjk-charsets)
1843 (throw 'tag t)))
1844 nil))
1845 (utf-translate-cjk-load-tables))
1837 (run-hooks 'set-language-environment-hook) 1846 (run-hooks 'set-language-environment-hook)
1838 (force-mode-line-update t)) 1847 (force-mode-line-update t))
1839 1848
diff --git a/lisp/international/mule.el b/lisp/international/mule.el
index c4c7be3a225..bbe83c2baf7 100644
--- a/lisp/international/mule.el
+++ b/lisp/international/mule.el
@@ -316,8 +316,7 @@ Optional argument RESTRICTION specifies a way to map the pair of CCS
316and CODE-POINT to a character. Currently not supported and just ignored." 316and CODE-POINT to a character. Currently not supported and just ignored."
317 (cond 317 (cond
318 ((eq ccs 'ucs) 318 ((eq ccs 'ucs)
319 (or (gethash code-point 319 (or (utf-lookup-subst-table-for-decode code-point)
320 (get 'utf-subst-table-for-decode 'translation-hash-table))
321 (let ((c (cond 320 (let ((c (cond
322 ((< code-point 160) 321 ((< code-point 160)
323 code-point) 322 code-point)
@@ -361,8 +360,7 @@ code-point in CCS. Currently not supported and just ignored."
361 (charset (car split)) 360 (charset (car split))
362 trans) 361 trans)
363 (cond ((eq ccs 'ucs) 362 (cond ((eq ccs 'ucs)
364 (or (gethash char (get 'utf-subst-table-for-encode 363 (or (utf-lookup-subst-table-for-encode char)
365 'translation-hash-table))
366 (let ((table (get 'utf-translation-table-for-encode 364 (let ((table (get 'utf-translation-table-for-encode
367 'translation-table))) 365 'translation-table)))
368 (setq trans (aref table char)) 366 (setq trans (aref table char))
diff --git a/lisp/international/utf-16.el b/lisp/international/utf-16.el
index d924512b634..477cccc2bf9 100644
--- a/lisp/international/utf-16.el
+++ b/lisp/international/utf-16.el
@@ -48,99 +48,110 @@
48;; things below, sometimes with commonality abstracted into a let 48;; things below, sometimes with commonality abstracted into a let
49;; binding for maintenance convenience. 49;; binding for maintenance convenience.
50 50
51;; We'd need new charsets distinct from ascii and eight-bit-control to
52;; deal with untranslated sequences, since we can't otherwise
53;; distinguish the bytes, as we can with utf-8.
54
55;; ;; Do a multibyte write for bytes in r3 and r4.
56;; ;; Intended for untranslatable utf-16 sequences.
57;; (define-ccl-program ccl-mule-utf-16-untrans
58;; `(0
59;; (if (r3 < 128)
60;; (r0 = ,(charset-id 'ascii))
61;; (if (r3 < 160)
62;; (r0 = ,(charset-id 'eight-bit-control))
63;; (r0 = ,(charset-id 'eight-bit-graphic))))
64;; (if (r4 < 128)
65;; (r0 = ,(charset-id 'ascii))
66;; (if (r4 < 160)
67;; (r0 = ,(charset-id 'eight-bit-control))
68;; (r0 = ,(charset-id 'eight-bit-graphic))))
69;; (r1 = r4)))
70;; "Do a multibyte write for bytes in r3 and r4.
71;; First swap them if we're big endian, indicated by r5==0.
72;; Intended for untranslatable utf-16 sequences.")
73
74;; Needed in macro expansion, so can't be let-bound. Zapped after use. 51;; Needed in macro expansion, so can't be let-bound. Zapped after use.
75(eval-and-compile 52(eval-and-compile
76(defconst utf-16-decode-ucs 53(defconst utf-16-decode-ucs
77 ;; We have the unicode in r1. Output is charset ID in r0, code 54 ;; If r5 is negative, r1 is a Unicode chacter code. Otherise, r5 is
78 ;; point in r1. 55 ;; the first of a surrogate pair and r1 is the second of the pair.
79 `((lookup-integer utf-subst-table-for-decode r1 r3) 56 ;; Output is charset ID in r0, code point in r1. R0 may be set to
80 (if r7 ; got a translation 57 ;; -1 in which case a caller should not write out r1.
81 ((r0 = r1) (r1 = r3)) 58 `((if (r5 >= 0)
82 (if (r1 < 128) 59 ((r0 = (r1 < #xDC00))
83 (r0 = ,(charset-id 'ascii)) 60 (if ((r1 >= #xE000) | r0)
84 (if (r1 < 160) 61 ;; Invalid second code of surrogate pair.
85 (r0 = ,(charset-id 'eight-bit-control)) 62 ((r0 = r5)
86 (if (r1 < 256) 63 (call ccl-mule-utf-untrans))
87 ((r0 = ,(charset-id 'latin-iso8859-1)) 64 ((r1 -= #xDC00)
88 (r1 -= 128)) 65 (r1 += (((r5 - #xD800) << 10) + #x10000))))
89 (if (r1 < #x2500) 66 (r5 = -1)))
90 ((r0 = ,(charset-id 'mule-unicode-0100-24ff)) 67 (if (r1 < 128)
91 (r1 -= #x100) 68 (r0 = ,(charset-id 'ascii))
92 (r2 = (((r1 / 96) + 32) << 7)) 69 ((lookup-integer utf-subst-table-for-decode r1 r3)
93 (r1 %= 96) 70 (if r7 ; got a translation
94 (r1 += (r2 + 32))) 71 ((r0 = r1) (r1 = r3))
95 (if (r1 < #x3400) 72 (if (r1 < 160)
96 ((r0 = ,(charset-id 'mule-unicode-2500-33ff)) 73 (r0 = ,(charset-id 'eight-bit-control))
97 (r1 -= #x2500) 74 (if (r1 < 256)
98 (r2 = (((r1 / 96) + 32) << 7)) 75 ((r0 = ,(charset-id 'latin-iso8859-1))
99 (r1 %= 96) 76 (r1 -= 128))
100 (r1 += (r2 + 32))) 77 (if (r1 < #x2500)
101 (if (r1 < #xd800) ; 2 untranslated bytes 78 ((r0 = ,(charset-id 'mule-unicode-0100-24ff))
102 ;; ;; Assume this is rare, so don't worry about the 79 (r1 -= #x100)
103 ;; ;; overhead of the call. 80 (r2 = (((r1 / 96) + 32) << 7))
104 ;; (call mule-utf-16-untrans) 81 (r1 %= 96)
105 ((r0 = ,(charset-id 'mule-unicode-e000-ffff)) 82 (r1 += (r2 + 32)))
106 (r1 = 15037)) ; U+fffd 83 (if (r1 < #x3400)
107 (if (r1 < #xe000) ; surrogate 84 ((r0 = ,(charset-id 'mule-unicode-2500-33ff))
108 ;; ((call mule-utf-16-untrans) 85 (r1 -= #x2500)
109 ;; (write-multibyte-character r0 r1) 86 (r2 = (((r1 / 96) + 32) << 7))
110 ;; (read r3 r4) 87 (r1 %= 96)
111 ;; (call mule-utf-16-untrans)) 88 (r1 += (r2 + 32)))
112 ((read r3 r4) 89 (if (r1 < #xD800)
113 (r0 = ,(charset-id 'mule-unicode-e000-ffff)) 90 ;; We can't have this character.
114 (r1 = 15037)) 91 ((r0 = r1)
115 ((r0 = ,(charset-id 'mule-unicode-e000-ffff)) 92 (call ccl-mule-utf-untrans)
116 (r1 -= #xe000) 93 (r5 = -1)
117 (r2 = (((r1 / 96) + 32) << 7)) 94 (r0 = -1))
118 (r1 %= 96) 95 (if (r1 < #xDC00)
119 (r1 += (r2 + 32))))))))))))) 96 ;; The first code of a surrogate pair.
97 ((r5 = r1)
98 (r0 = -1))
99 (if (r1 < #xE000)
100 ;; The second code of a surrogate pair, invalid.
101 ((r0 = r1)
102 (call ccl-mule-utf-untrans)
103 (r5 = -1)
104 (r0 = -1))
105 (if (r1 < #x10000)
106 ((r0 = ,(charset-id 'mule-unicode-e000-ffff))
107 (r1 -= #xE000)
108 (r2 = (((r1 / 96) + 32) << 7))
109 (r1 %= 96)
110 (r1 += (r2 + 32)))
111 ;; We can't have this character.
112 ((r0 = r1)
113 (call ccl-mule-utf-untrans)
114 (r5 = -1)
115 (r0 = -1)))))))))))))))
120 116
121(defconst utf-16le-decode-loop 117(defconst utf-16le-decode-loop
122 `(loop 118 `((r5 = -1)
123 (read r3 r4) 119 (loop
124 (r1 = (r4 <8 r3)) 120 (r3 = -1)
125 ,utf-16-decode-ucs 121 (read r3 r4)
126 (translate-character utf-translation-table-for-decode r0 r1) 122 (r1 = (r4 <8 r3))
127 (write-multibyte-character r0 r1) 123 ,@utf-16-decode-ucs
128 (repeat))) 124 (if (r0 >= 0)
125 ((translate-character utf-translation-table-for-decode r0 r1)
126 (write-multibyte-character r0 r1)))
127 (repeat))))
129 128
130(defconst utf-16be-decode-loop 129(defconst utf-16be-decode-loop
131 `(loop 130 `((r5 = -1)
132 (read r3 r4) 131 (loop
133 (r1 = (r3 <8 r4)) 132 (r3 = -1)
134 ,@utf-16-decode-ucs 133 (read r3 r4)
135 (translate-character utf-translation-table-for-decode r0 r1) 134 (r1 = (r3 <8 r4))
136 (write-multibyte-character r0 r1) 135 ,@utf-16-decode-ucs
137 (repeat))) 136 (if (r0 >= 0)
137 ((translate-character utf-translation-table-for-decode r0 r1)
138 (write-multibyte-character r0 r1)))
139 (repeat))))
138 140
139) 141)
140 142
141(define-ccl-program ccl-decode-mule-utf-16le 143(define-ccl-program ccl-decode-mule-utf-16le
142 `(2 ; 2 bytes -> 1 to 4 bytes 144 `(2 ; 2 bytes -> 1 to 4 bytes
143 ,utf-16le-decode-loop) 145 ,utf-16le-decode-loop
146 ((if (r5 >= 0)
147 ((r0 = r5)
148 (call ccl-mule-utf-untrans)))
149 (if (r3 < 0)
150 nil
151 ((if (r3 < #xA0)
152 (r0 = ,(charset-id 'eight-bit-control))
153 (r0 = ,(charset-id 'eight-bit-graphic)))
154 (write-multibyte-character r0 r3)))))
144 "Decode UTF-16LE (little endian without signature bytes). 155 "Decode UTF-16LE (little endian without signature bytes).
145Basic decoding is done into the charsets ascii, latin-iso8859-1 and 156Basic decoding is done into the charsets ascii, latin-iso8859-1 and
146mule-unicode-*. Un-representable Unicode characters are decoded as 157mule-unicode-*. Un-representable Unicode characters are decoded as
@@ -149,7 +160,13 @@ U+fffd. The result is run through the translation-table named
149 160
150(define-ccl-program ccl-decode-mule-utf-16be 161(define-ccl-program ccl-decode-mule-utf-16be
151 `(2 ; 2 bytes -> 1 to 4 bytes 162 `(2 ; 2 bytes -> 1 to 4 bytes
152 ,utf-16be-decode-loop) 163 ,utf-16be-decode-loop
164 ((if (r5 >= 0)
165 ((r0 = r5)
166 (call ccl-mule-utf-untrans)))
167 (if (r3 >= 0)
168 ((r0 = r3)
169 (call ccl-mule-utf-untrans)))))
153 "Decode UTF-16BE (big endian without signature bytes). 170 "Decode UTF-16BE (big endian without signature bytes).
154Basic decoding is done into the charsets ascii, latin-iso8859-1 and 171Basic decoding is done into the charsets ascii, latin-iso8859-1 and
155mule-unicode-*. Un-representable Unicode characters are 172mule-unicode-*. Un-representable Unicode characters are
@@ -158,91 +175,218 @@ name `utf-translation-table-for-decode'.")
158 175
159(define-ccl-program ccl-decode-mule-utf-16le-with-signature 176(define-ccl-program ccl-decode-mule-utf-16le-with-signature
160 `(2 177 `(2
161 ((read r3 r4) 178 ((r3 = -1)
162 ,utf-16le-decode-loop)) 179 (read r3 r4)
180 ,@utf-16le-decode-loop)
181 (if (r3 >= 0)
182 ((r0 = r3)
183 (call ccl-mule-utf-untrans))))
163 "Like ccl-decode-utf-16le but skip the first 2-byte BOM.") 184 "Like ccl-decode-utf-16le but skip the first 2-byte BOM.")
164 185
165(define-ccl-program ccl-decode-mule-utf-16be-with-signature 186(define-ccl-program ccl-decode-mule-utf-16be-with-signature
166 `(2 187 `(2
167 ((read r3 r4) 188 ((r3 = -1)
168 ,utf-16be-decode-loop)) 189 (read r3 r4)
190 ,@utf-16be-decode-loop)
191 (if (r3 >= 0)
192 ((r0 = r3)
193 (call ccl-mule-utf-untrans))))
169 "Like ccl-decode-utf-16be but skip the first 2-byte BOM.") 194 "Like ccl-decode-utf-16be but skip the first 2-byte BOM.")
170 195
171(define-ccl-program ccl-decode-mule-utf-16 196(define-ccl-program ccl-decode-mule-utf-16
172 `(2 197 `(2
173 ((read r3 r4) 198 ((r3 = -1)
199 (read r3 r4)
174 (r1 = (r3 <8 r4)) 200 (r1 = (r3 <8 r4))
201 (r5 = -1)
175 (if (r1 == #xFFFE) 202 (if (r1 == #xFFFE)
176 ;; R1 is a BOM for little endian. We keep this character as 203 ;; R1 is a BOM for little endian. We keep this character as
177 ;; is temporarily. It is removed by post-read-conversion 204 ;; is temporarily. It is removed by post-read-conversion
178 ;; function. 205 ;; function.
179 (,@utf-16-decode-ucs 206 (,@utf-16-decode-ucs
180 (write-multibyte-character r0 r1) 207 (write-multibyte-character r0 r1)
181 ,utf-16le-decode-loop) 208 ,@utf-16le-decode-loop)
182 ((if (r1 == #xFEFF) 209 ((if (r1 == #xFEFF)
183 ;; R1 is a BOM for big endian, but we can't keep that 210 ;; R1 is a BOM for big endian, but we can't keep that
184 ;; character in the output because it can't be 211 ;; character in the output because it can't be
185 ;; distinguished with the normal U+FEFF. So, we keep 212 ;; distinguished with the normal U+FEFF. So, we keep
186 ;; #xFFFF instead. 213 ;; #xFFFF instead.
187 ((r1 = #xFFFF) 214 ((r1 = #xFFFF)
188 ,@utf-16-decode-ucs) 215 ,@utf-16-decode-ucs
189 ;; R1 a normal Unicode character. 216 (write-multibyte-character r0 r1))
217 ;; R1 is a normal Unicode character.
190 (,@utf-16-decode-ucs 218 (,@utf-16-decode-ucs
191 (translate-character utf-translation-table-for-decode r0 r1))) 219 (if (r0 >= 0)
192 (write-multibyte-character r0 r1) 220 ((translate-character utf-translation-table-for-decode r0 r1)
193 ,utf-16be-decode-loop)))) 221 (write-multibyte-character r0 r1)))))
222 ,@utf-16be-decode-loop)))
223 (if (r3 >= 0)
224 ((r0 = r3)
225 (call ccl-mule-utf-untrans))))
194 "Like ccl-decode-utf-16be/le but check the first BOM.") 226 "Like ccl-decode-utf-16be/le but check the first BOM.")
195 227
196(makunbound 'utf-16-decode-ucs) ; done with it 228(makunbound 'utf-16-decode-ucs) ; done with it
197(makunbound 'utf-16le-decode-loop) 229(makunbound 'utf-16le-decode-loop)
198(makunbound 'utf-16be-decode-loop) 230(makunbound 'utf-16be-decode-loop)
199 231
232;; UTF-16 decoder generates an UTF-8 sequence represented by a
233;; sequence eight-bit-control/graphic chars for an invalid byte (the
234;; last byte of an odd length source) and an untranslatable character
235;; (including an invalid surrogate-pair code-point).
236;;
237;; This CCL parses that sequence (the first byte is already in r1),
238;; and if the sequence represents an untranslatable character, it sets
239;; r1 to the original invalid code or untranslated Unicode character
240;; code, sets r2 to -1 (to prevent r2 and r3 are written), set2 r5 to
241;; -1 (to tell the caller that there's no pre-read character).
242;;
243;; If the sequence represents an invalid byte, it sets r1 to -1, r2 to
244;; the byte, sets r3 and r5 to -1.
245;;
246;; Otherwise, don't change r1, set r2 and r3 to already read
247;; eight-bit-control/graphic characters (if any), set r5 and r6 to the
248;; last character that invalidates the UTF-8 form.
249;;
250;; Note: For UTF-8 validation, we only check if a character is
251;; eight-bit-control/graphic or not. It may result in incorrect
252;; handling of random binary data, but such a data can't be encoded by
253;; UTF-16 anyway. At least, UTF-16 decoder doesn't generate such a
254;; sequence even if a source contains invalid byte-sequence.
255
256(define-ccl-program ccl-mule-utf-16-encode-untrans
257 `(0
258 ((r2 = -1)
259 ;; Read the 2nd byte.
260 (read-multibyte-character r5 r6)
261 (r0 = (r5 != ,(charset-id 'eight-bit-control)))
262 (if ((r5 != ,(charset-id 'eight-bit-graphic)) & r0)
263 ((r2 = r1)
264 (r3 = -1)
265 (r1 = -1)
266 (end))) ; invalid UTF-8
267
268 (r3 = -1)
269 (r2 = r6)
270 (if (r1 <= #xE0)
271 ;; 2-byte UTF-8, i.e. originally an invalid byte.
272 ((r2 &= #x3F)
273 (r2 |= ((r1 & #x1F) << 6))
274 (r1 = -1)
275 (r5 = -1)
276 (end)))
277
278 ;; Read the 3rd byte.
279 (read-multibyte-character r5 r6)
280 (r0 = (r5 != ,(charset-id 'eight-bit-control)))
281 (if ((r5 != ,(charset-id 'eight-bit-graphic)) & r0)
282 ((end))) ; invalid UTF-8
283
284 (if (r1 < #xF0) ; valid 3-byte UTF-8
285 ((r1 = ((r1 & #x0F) << 12))
286 (r1 |= ((r2 & #x3F) << 6))
287 (r1 |= (r6 & #x3F))
288 (r2 = -1)
289 (r5 = -1)
290 (end)))
291
292 (r3 = r6)
293 ;; Read the 4th byte.
294 (read-multibyte-character r5 r6)
295 (r0 = (r5 != ,(charset-id 'eight-bit-control)))
296 (if ((r5 != ,(charset-id 'eight-bit-graphic)) & r0)
297 (end)) ; livalid UTF-8
298
299 ;; valid 4-byte UTF-8
300 (r1 = ((r1 & #x07) << 18))
301 (r1 |= ((r2 & #x3F) << 12))
302 (r1 |= ((r3 & #x3F) << 6))
303 (r1 |= (r6 & #x3F))
304 (r2 = -1)
305 (r5 = -1)
306 (end))
307
308 (if (r1 >= 0)
309 ((write r1)
310 (if (r2 >= 0)
311 ((write r2)
312 (if (r3 >= 0)
313 (write r3))))))))
314
200(eval-and-compile 315(eval-and-compile
201(defconst utf-16-decode-to-ucs 316(defconst utf-16-decode-to-ucs
202 ;; CCL which, given the result of a multibyte read in r0 and r1, 317 ;; Read a character and set r1 to the corresponding Unicode code.
203 ;; sets r0 to the character's Unicode if the charset is one of the 318 ;; If r5 is not negative, it means that we have already read a
204 ;; basic utf-8 coding system ones. Otherwise set to U+fffd. 319 ;; character into r5 and r6.
205 `(if (r0 == ,(charset-id 'ascii)) 320 ;; If an invalid eight-bit-control/graphic sequence is found, r2 and
206 (r0 = r1) 321 ;; r3 may contain a byte to written out, r5 and r6 may contain a
207 (if (r0 == ,(charset-id 'latin-iso8859-1)) 322 ;; pre-read character. Usually they are set to -1.
208 (r0 = (r1 + 128)) 323 `((if (r5 < 0)
209 (if (r0 == ,(charset-id 'eight-bit-control)) 324 (read-multibyte-character r0 r1)
210 (r0 = r1) 325 ((r0 = r5)
211 (if (r0 == ,(charset-id 'eight-bit-graphic)) 326 (r1 = r6)
212 (r0 = r1) 327 (r5 = -1)))
213 ((r2 = (r1 & #x7f)) 328 (lookup-character utf-subst-table-for-encode r0 r1)
214 (r1 >>= 7) 329 (r2 = -1)
215 (r3 = ((r1 - 32) * 96)) 330 (if (r7 > 0)
216 (r3 += (r2 - 32)) 331 (r1 = r0)
217 (if (r0 == ,(charset-id 'mule-unicode-0100-24ff)) 332 ((translate-character utf-translation-table-for-encode r0 r1)
218 (r0 = (r3 + #x100)) 333 (if (r0 == ,(charset-id 'ascii))
219 (if (r0 == ,(charset-id 'mule-unicode-2500-33ff)) 334 nil
220 (r0 = (r3 + #x2500)) 335 (if (r0 == ,(charset-id 'latin-iso8859-1))
221 (if (r0 == ,(charset-id 'mule-unicode-e000-ffff)) 336 (r1 += 128)
222 (r0 = (r3 + #xe000)) 337 (if (r0 == ,(charset-id 'eight-bit-control))
223 (r0 = #xfffd)))))))))) 338 nil
339 (if (r0 == ,(charset-id 'eight-bit-graphic))
340 (call ccl-mule-utf-16-encode-untrans)
341 ((r2 = ((r1 & #x7f) - 32))
342 (r3 = ((((r1 >> 7) - 32) * 96) + r2))
343 (r2 = -1)
344 (r5 = -1)
345 (if (r0 == ,(charset-id 'mule-unicode-0100-24ff))
346 (r1 = (r3 + #x100))
347 (if (r0 == ,(charset-id 'mule-unicode-2500-33ff))
348 (r1 = (r3 + #x2500))
349 (if (r0 == ,(charset-id 'mule-unicode-e000-ffff))
350 (r1 = (r3 + #xe000))
351 (r1 = #xfffd)))))))))))))
224 352
225(defconst utf-16le-encode-loop 353(defconst utf-16le-encode-loop
226 `(loop 354 `((r5 = -1)
227 (read-multibyte-character r0 r1) 355 (loop
228 (lookup-character utf-subst-table-for-encode r0 r1) 356 ,@utf-16-decode-to-ucs
229 (if (r7 == 0) 357 (if (r1 >= #x10000)
230 ((translate-character utf-translation-table-for-encode r0 r1) 358 ((r1 -= #x10000)
231 ,utf-16-decode-to-ucs)) 359 (r0 = ((r1 >> 10) + #xD800))
232 (write (r0 & 255)) 360 (write (r0 & 255))
233 (write (r0 >> 8)) 361 (write (r0 >> 8))
234 (repeat))) 362 (r1 = ((r1 & #x3FF) + #xDC00))))
363 (if (r1 >= 0)
364 ((write (r1 & 255))
365 (write (r1 >> 8))))
366 (if (r2 >= 0)
367 ((write r2)
368 (if (r3 >= 0)
369 (write r3))))
370 (repeat))))
235 371
236(defconst utf-16be-encode-loop 372(defconst utf-16be-encode-loop
237 `(loop 373 `((r5 = -1)
238 (read-multibyte-character r0 r1) 374 (loop
239 (lookup-character utf-subst-table-for-encode r0 r1) 375 ,@utf-16-decode-to-ucs
240 (if (r7 == 0) 376 (if (r1 >= #x10000)
241 ((translate-character utf-translation-table-for-encode r0 r1) 377 ((r1 -= #x10000)
242 ,utf-16-decode-to-ucs)) 378 (r0 = ((r1 >> 10) + #xD800))
243 (write (r0 >> 8)) 379 (write (r0 >> 8))
244 (write (r0 & 255)) 380 (write (r0 & 255))
245 (repeat))) 381 (r1 = ((r1 & #x3FF) + #xDC00))))
382 (if (r1 >= 0)
383 ((write (r1 >> 8))
384 (write (r1 & 255))))
385 (if (r2 >= 0)
386 ((write r2)
387 (if (r3 >= 0)
388 (write r3))))
389 (repeat))))
246) 390)
247 391
248 392
@@ -270,7 +414,7 @@ Others are encoded as U+FFFD.")
270 `(1 414 `(1
271 ((write #xFF) 415 ((write #xFF)
272 (write #xFE) 416 (write #xFE)
273 ,utf-16le-encode-loop)) 417 ,@utf-16le-encode-loop))
274 "Encode to UTF-16 (little endian with signature). 418 "Encode to UTF-16 (little endian with signature).
275Characters from the charsets ascii, eight-bit-control, 419Characters from the charsets ascii, eight-bit-control,
276eight-bit-graphic, latin-iso8859-1 and mule-unicode-* are encoded 420eight-bit-graphic, latin-iso8859-1 and mule-unicode-* are encoded
@@ -282,7 +426,7 @@ Others are encoded as U+FFFD.")
282 `(1 426 `(1
283 ((write #xFE) 427 ((write #xFE)
284 (write #xFF) 428 (write #xFF)
285 ,utf-16be-encode-loop)) 429 ,@utf-16be-encode-loop))
286 "Encode to UTF-16 (big endian with signature). 430 "Encode to UTF-16 (big endian with signature).
287Characters from the charsets ascii, eight-bit-control, 431Characters from the charsets ascii, eight-bit-control,
288eight-bit-graphic, latin-iso8859-1 and mule-unicode-* are encoded 432eight-bit-graphic, latin-iso8859-1 and mule-unicode-* are encoded
@@ -296,6 +440,7 @@ Others are encoded as U+FFFD.")
296 440
297(defun mule-utf-16-post-read-conversion (length) 441(defun mule-utf-16-post-read-conversion (length)
298 (when (> length 0) 442 (when (> length 0)
443 (setq length (utf-8-post-read-conversion length))
299 (let ((char (following-char))) 444 (let ((char (following-char)))
300 (cond ((= char (decode-char 'ucs #xFFFE)) 445 (cond ((= char (decode-char 'ucs #xFFFE))
301 (delete-char 1) 446 (delete-char 1)
@@ -329,29 +474,34 @@ if they are re-encoded.
329 474
330On encoding (e.g. writing a file), Emacs characters not belonging to 475On encoding (e.g. writing a file), Emacs characters not belonging to
331any of the character sets listed above are encoded into the byte 476any of the character sets listed above are encoded into the byte
332sequence representing U+FFFD (REPLACEMENT CHARACTER).")) 477sequence representing U+FFFD (REPLACEMENT CHARACTER).")
478 (props `((safe-charsets
479 ascii
480 eight-bit-control
481 eight-bit-graphic
482 latin-iso8859-1
483 mule-unicode-0100-24ff
484 mule-unicode-2500-33ff
485 mule-unicode-e000-ffff
486 ,@(if utf-translate-cjk-mode
487 utf-translate-cjk-charsets))
488 (valid-codes (0 . 255))
489 (mime-text-unsuitable . t)
490 (pre-write-conversion . utf-8-pre-write-conversion)
491 (dependency unify-8859-on-encoding-mode
492 unify-8859-on-decoding-mode
493 utf-fragment-on-decoding
494 utf-translate-cjk-mode))))
333 (make-coding-system 495 (make-coding-system
334 'mule-utf-16le 4 496 'mule-utf-16le 4
335 ?u ; Mule-UCS uses ?U, but code-pages uses that for koi8-u. 497 ?u ; Mule-UCS uses ?U, but code-pages uses that for koi8-u.
336 (concat 498 (concat
337 "UTF-16LE encoding for Emacs-supported Unicode characters." 499 "UTF-16LE encoding for Emacs-supported Unicode characters."
338 doc) 500 doc)
339
340 '(ccl-decode-mule-utf-16le . ccl-encode-mule-utf-16le) 501 '(ccl-decode-mule-utf-16le . ccl-encode-mule-utf-16le)
341 '((safe-charsets 502 `(,@props
342 ascii 503 (post-read-conversion . utf-8-post-read-conversion)
343 eight-bit-control 504 (mime-charset . utf-16le)))
344 latin-iso8859-1
345 mule-unicode-0100-24ff
346 mule-unicode-2500-33ff
347 mule-unicode-e000-ffff)
348 (mime-charset . utf-16le)
349 (mime-text-unsuitable . t)
350 (valid-codes (0 . 255))
351 (dependency unify-8859-on-encoding-mode
352 unify-8859-on-decoding-mode
353 utf-fragment-on-decoding
354 utf-translate-cjk-mode)))
355 505
356 (make-coding-system 506 (make-coding-system
357 'mule-utf-16be 4 ?u 507 'mule-utf-16be 4 ?u
@@ -360,19 +510,9 @@ sequence representing U+FFFD (REPLACEMENT CHARACTER)."))
360 doc) 510 doc)
361 511
362 '(ccl-decode-mule-utf-16be . ccl-encode-mule-utf-16be) 512 '(ccl-decode-mule-utf-16be . ccl-encode-mule-utf-16be)
363 '((safe-charsets 513 `(,@props
364 ascii 514 (post-read-conversion . utf-8-post-read-conversion)
365 eight-bit-control 515 (mime-charset . utf-16be)))
366 latin-iso8859-1
367 mule-unicode-0100-24ff
368 mule-unicode-2500-33ff
369 mule-unicode-e000-ffff)
370 (mime-charset . utf-16be)
371 (valid-codes (0 . 255))
372 (dependency unify-8859-on-encoding-mode
373 unify-8859-on-decoding-mode
374 utf-fragment-on-decoding
375 utf-translate-cjk-mode)))
376 516
377 (make-coding-system 517 (make-coding-system
378 'mule-utf-16le-with-signature 4 ?u 518 'mule-utf-16le-with-signature 4 ?u
@@ -382,21 +522,10 @@ sequence representing U+FFFD (REPLACEMENT CHARACTER)."))
382 522
383 '(ccl-decode-mule-utf-16le-with-signature 523 '(ccl-decode-mule-utf-16le-with-signature
384 . ccl-encode-mule-utf-16le-with-signature) 524 . ccl-encode-mule-utf-16le-with-signature)
385 '((safe-charsets 525 `(,@props
386 ascii 526 (post-read-conversion . utf-8-post-read-conversion)
387 eight-bit-control
388 latin-iso8859-1
389 mule-unicode-0100-24ff
390 mule-unicode-2500-33ff
391 mule-unicode-e000-ffff)
392 (coding-category . coding-category-utf-16-le) 527 (coding-category . coding-category-utf-16-le)
393 (mime-charset . utf-16) 528 (mime-charset . utf-16)))
394 (mime-text-unsuitable . t)
395 (valid-codes (0 . 255))
396 (dependency unify-8859-on-encoding-mode
397 unify-8859-on-decoding-mode
398 utf-fragment-on-decoding
399 utf-translate-cjk-mode)))
400 529
401 (make-coding-system 530 (make-coding-system
402 'mule-utf-16be-with-signature 4 ?u 531 'mule-utf-16be-with-signature 4 ?u
@@ -406,20 +535,10 @@ sequence representing U+FFFD (REPLACEMENT CHARACTER)."))
406 535
407 '(ccl-decode-mule-utf-16be-with-signature 536 '(ccl-decode-mule-utf-16be-with-signature
408 . ccl-encode-mule-utf-16be-with-signature) 537 . ccl-encode-mule-utf-16be-with-signature)
409 '((safe-charsets 538 `(,@props
410 ascii 539 (post-read-conversion . utf-8-post-read-conversion)
411 eight-bit-control
412 latin-iso8859-1
413 mule-unicode-0100-24ff
414 mule-unicode-2500-33ff
415 mule-unicode-e000-ffff)
416 (coding-category . coding-category-utf-16-be) 540 (coding-category . coding-category-utf-16-be)
417 (mime-charset . utf-16) 541 (mime-charset . utf-16)))
418 (valid-codes (0 . 255))
419 (dependency unify-8859-on-encoding-mode
420 unify-8859-on-decoding-mode
421 utf-fragment-on-decoding
422 utf-translate-cjk-mode)))
423 542
424 (make-coding-system 543 (make-coding-system
425 'mule-utf-16 4 ?u 544 'mule-utf-16 4 ?u
@@ -428,22 +547,10 @@ sequence representing U+FFFD (REPLACEMENT CHARACTER)."))
428 doc) 547 doc)
429 548
430 '(ccl-decode-mule-utf-16 . ccl-encode-mule-utf-16be-with-signature) 549 '(ccl-decode-mule-utf-16 . ccl-encode-mule-utf-16be-with-signature)
431 '((safe-charsets 550 `(,@props
432 ascii 551 (post-read-conversion . mule-utf-16-post-read-conversion)
433 eight-bit-control
434 latin-iso8859-1
435 mule-unicode-0100-24ff
436 mule-unicode-2500-33ff
437 mule-unicode-e000-ffff)
438 (coding-category . coding-category-utf-16-be) 552 (coding-category . coding-category-utf-16-be)
439 (mime-charset . utf-16) 553 (mime-charset . utf-16)))
440 (mime-text-unsuitable . t)
441 (valid-codes (0 . 255))
442 (dependency unify-8859-on-encoding-mode
443 unify-8859-on-decoding-mode
444 utf-fragment-on-decoding
445 utf-translate-cjk-mode)
446 (post-read-conversion . mule-utf-16-post-read-conversion)))
447) 554)
448 555
449(define-coding-system-alias 'utf-16le 'mule-utf-16le) 556(define-coding-system-alias 'utf-16le 'mule-utf-16le)
diff --git a/lisp/international/utf-8.el b/lisp/international/utf-8.el
index d4dd7b6c882..77a51abb43f 100644
--- a/lisp/international/utf-8.el
+++ b/lisp/international/utf-8.el
@@ -190,9 +190,102 @@ Setting this variable outside customize has no effect."
190 :type 'boolean 190 :type 'boolean
191 :group 'mule) 191 :group 'mule)
192 192
193
194(defconst utf-translate-cjk-charsets '(chinese-gb2312
195 chinese-big5-1 chinese-big5-2
196 japanese-jisx0208 japanese-jisx0212
197 korean-ksc5601)
198 "List of charsets supported by `utf-translate-cjk-mode'.")
199
200(defconst utf-translate-cjk-unicode-range
201 '((#x2e80 . #xd7a3)
202 (#xff00 . #xffef))
203 "List of Unicode code ranges supported by `utf-translate-cjk-mode'.")
204
205;; Return non-nil if CODE-POINT is in `utf-translate-cjk-unicode-range'.
206(defsubst utf-translate-cjk-substitutable-p (code-point)
207 (let ((tail utf-translate-cjk-unicode-range)
208 elt)
209 (while tail
210 (setq elt (car tail) tail (cdr tail))
211 (if (and (>= code-point (car elt)) (<= code-point (cdr elt)))
212 (setq tail nil)
213 (setq elt nil)))
214 elt))
215
216(defvar utf-translate-cjk-lang-env nil
217 "Language environment in which tables for `utf-translate-cjk-mode' is loaded.
218The value nil means that the tables are not yet loaded.")
219
220(defun utf-translate-cjk-load-tables ()
221 "Load tables for `utf-translate-cjk-mode'."
222 ;; Fixme: Allow the use of the CJK charsets to be
223 ;; customized by reordering and possible omission.
224 (let ((redefined (< (hash-table-size ucs-mule-cjk-to-unicode) 43000)))
225 (if redefined
226 ;; Redefine them with realistic initial sizes and a
227 ;; smallish rehash size to avoid wasting significant
228 ;; space after they're built.
229 (setq ucs-mule-cjk-to-unicode
230 (make-hash-table :test 'eq :size 43000 :rehash-size 1000)
231 ucs-unicode-to-mule-cjk
232 (make-hash-table :test 'eq :size 21500 :rehash-size 1000)))
233
234 ;; Load the files explicitly, to avoid having to keep
235 ;; around the large tables they contain (as well as the
236 ;; ones which get built).
237 (cond ((string= "Korean" current-language-environment)
238 (load "subst-jis")
239 (load "subst-big5")
240 (load "subst-gb2312")
241 (load "subst-ksc"))
242 ((string= "Chinese-BIG5" current-language-environment)
243 (load "subst-jis")
244 (load "subst-ksc")
245 (load "subst-gb2312")
246 (load "subst-big5"))
247 ((string= "Chinese-GB" current-language-environment)
248 (load "subst-jis")
249 (load "subst-ksc")
250 (load "subst-big5")
251 (load "subst-gb2312"))
252 (t
253 (load "subst-ksc")
254 (load "subst-gb2312")
255 (load "subst-big5")
256 (load "subst-jis"))) ; jis covers as much as big5, gb2312
257
258 (when redefined
259 (define-translation-hash-table 'utf-subst-table-for-decode
260 ucs-unicode-to-mule-cjk)
261 (define-translation-hash-table 'utf-subst-table-for-encode
262 ucs-mule-cjk-to-unicode)
263 (set-char-table-extra-slot (get 'utf-translation-table-for-encode
264 'translation-table)
265 1 ucs-mule-cjk-to-unicode))
266
267 (setq utf-translate-cjk-lang-env current-language-environment)))
268
269(defun utf-lookup-subst-table-for-decode (code-point)
270 (if (and utf-translate-cjk-mode
271 (not utf-translate-cjk-lang-env)
272 (utf-translate-cjk-substitutable-p code-point))
273 (utf-translate-cjk-load-tables))
274 (gethash code-point
275 (get 'utf-subst-table-for-decode 'translation-hash-table)))
276
277
278(defun utf-lookup-subst-table-for-encode (char)
279 (if (and utf-translate-cjk-mode
280 (not utf-translate-cjk-lang-env)
281 (memq (char-charset char) utf-translate-cjk-charsets))
282 (utf-translate-cjk-load-tables))
283 (gethash char
284 (get 'utf-subst-table-for-encode 'translation-hash-table)))
285
193(define-minor-mode utf-translate-cjk-mode 286(define-minor-mode utf-translate-cjk-mode
194 "Whether the UTF based coding systems should decode/encode CJK characters. 287 "Whether the UTF based coding systems should decode/encode CJK characters.
195Enabling this loads tables which allow the coding systems mule-utf-8, 288Enabling this allows the coding systems mule-utf-8,
196mule-utf-16le and mule-utf-16be to encode characters in the charsets 289mule-utf-16le and mule-utf-16be to encode characters in the charsets
197`korean-ksc5601', `chinese-gb2312', `chinese-big5-1', 290`korean-ksc5601', `chinese-gb2312', `chinese-big5-1',
198`chinese-big5-2', `japanese-jisx0208' and `japanese-jisx0212', and to 291`chinese-big5-2', `japanese-jisx0208' and `japanese-jisx0212', and to
@@ -203,49 +296,16 @@ according to the language environment in effect when this option is
203turned on: ksc5601 for Korean, gb2312 for Chinese-GB, big5 for 296turned on: ksc5601 for Korean, gb2312 for Chinese-GB, big5 for
204Chinese-Big5 and jisx for other environments. 297Chinese-Big5 and jisx for other environments.
205 298
206The tables are large (over 40000 entries), so this option is not the 299This option is on by default. If you are not interested in CJK
207default. Also, installing them may be rather slow." 300characters and want to avoid some overhead on encoding/decoding
208 :init-value nil 301by the above coding systems, you can customize this option to nil."
302 :init-value t
209 :version "21.4" 303 :version "21.4"
210 :type 'boolean 304 :type 'boolean
211 :set-after '(current-language-environment)
212 :group 'mule 305 :group 'mule
213 :global t 306 :global t
214 (if utf-translate-cjk-mode 307 (if utf-translate-cjk-mode
215 ;; Fixme: Allow the use of the CJK charsets to be
216 ;; customized by reordering and possible omission.
217 (progn 308 (progn
218 ;; Redefine them with realistic initial sizes and a
219 ;; smallish rehash size to avoid wasting significant
220 ;; space after they're built.
221 (setq ucs-mule-cjk-to-unicode
222 (make-hash-table :test 'eq :size 43000 :rehash-size 1000)
223 ucs-unicode-to-mule-cjk
224 (make-hash-table :test 'eq :size 21500 :rehash-size 1000))
225 ;; Load the files explicitly, to avoid having to keep
226 ;; around the large tables they contain (as well as the
227 ;; ones which get built).
228 (cond
229 ((string= "Korean" current-language-environment)
230 (load "subst-jis")
231 (load "subst-big5")
232 (load "subst-gb2312")
233 (load "subst-ksc"))
234 ((string= "Chinese-BIG5" current-language-environment)
235 (load "subst-jis")
236 (load "subst-ksc")
237 (load "subst-gb2312")
238 (load "subst-big5"))
239 ((string= "Chinese-GB" current-language-environment)
240 (load "subst-jis")
241 (load "subst-ksc")
242 (load "subst-big5")
243 (load "subst-gb2312"))
244 (t
245 (load "subst-ksc")
246 (load "subst-gb2312")
247 (load "subst-big5")
248 (load "subst-jis"))) ; jis covers as much as big5, gb2312
249 (define-translation-hash-table 'utf-subst-table-for-decode 309 (define-translation-hash-table 'utf-subst-table-for-decode
250 ucs-unicode-to-mule-cjk) 310 ucs-unicode-to-mule-cjk)
251 (define-translation-hash-table 'utf-subst-table-for-encode 311 (define-translation-hash-table 'utf-subst-table-for-encode
@@ -259,7 +319,58 @@ default. Also, installing them may be rather slow."
259 (make-hash-table :test 'eq)) 319 (make-hash-table :test 'eq))
260 (set-char-table-extra-slot (get 'utf-translation-table-for-encode 320 (set-char-table-extra-slot (get 'utf-translation-table-for-encode
261 'translation-table) 321 'translation-table)
262 1 nil))) 322 1 nil))
323
324 ;; Update safe-chars of mule-utf-* coding systems.
325 (dolist (elt (coding-system-list t))
326 (if (string-match "^mule-utf" (symbol-name elt))
327 (let ((safe-charsets (coding-system-get elt 'safe-charsets))
328 (safe-chars (coding-system-get elt 'safe-chars))
329 (need-update nil))
330 (dolist (charset utf-translate-cjk-charsets)
331 (unless (eq utf-translate-cjk-mode (memq charset safe-charsets))
332 (setq safe-charsets
333 (if utf-translate-cjk-mode
334 (cons charset safe-charsets)
335 (delq charset safe-charsets))
336 need-update t)
337 (aset safe-chars (make-char charset) utf-translate-cjk-mode)))
338 (when need-update
339 (coding-system-put elt 'safe-charsets safe-charsets)
340 (define-coding-system-internal elt))))))
341
342(define-ccl-program ccl-mule-utf-untrans
343 ;; R0 is an untranslatable Unicode code-point (U+3500..U+DFFF or
344 ;; U+10000..U+10FFFF) or an invaid byte (#x00..#xFF). Write
345 ;; eight-bit-control/graphic sequence (2 to 4 chars) representing
346 ;; UTF-8 sequence of r0. Registers r4, r5, r6 are modified.
347 ;;
348 ;; This is a subrountine because we assume that this is called very
349 ;; rarely (so we don't have to worry about the overhead of the
350 ;; call).
351 `(0
352 ((r5 = ,(charset-id 'eight-bit-control))
353 (r6 = ,(charset-id 'eight-bit-graphic))
354 (if (r0 < #x100)
355 ((r4 = ((r0 >> 6) | #xC0))
356 (write-multibyte-character r6 r4))
357 ((if (r0 < #x10000)
358 ((r4 = ((r0 >> 12) | #xE0))
359 (write-multibyte-character r6 r4))
360 ((r4 = ((r0 >> 18) | #xF0))
361 (write-multibyte-character r6 r4)
362 (r4 = (((r0 >> 12) & #x3F) | #x80))
363 (if (r4 < #xA0)
364 (write-multibyte-character r5 r4)
365 (write-multibyte-character r6 r4))))
366 (r4 = (((r0 >> 6) & #x3F) | #x80))
367 (if (r4 < #xA0)
368 (write-multibyte-character r5 r4)
369 (write-multibyte-character r6 r4))))
370 (r4 = ((r0 & #x3F) | #x80))
371 (if (r4 < #xA0)
372 (write-multibyte-character r5 r4)
373 (write-multibyte-character r6 r4)))))
263 374
264(define-ccl-program ccl-decode-mule-utf-8 375(define-ccl-program ccl-decode-mule-utf-8
265 ;; 376 ;;
@@ -278,260 +389,210 @@ default. Also, installing them may be rather slow."
278 ;; (>= 8000) | | 389 ;; (>= 8000) | |
279 ;; mule-unicode-2500-33ff | 3 | 4 390 ;; mule-unicode-2500-33ff | 3 | 4
280 ;; mule-unicode-e000-ffff | 3 | 4 391 ;; mule-unicode-e000-ffff | 3 | 4
392 ;; -----------------------+----------------+---------------
393 ;; invalid byte | 1 | 2
281 ;; 394 ;;
282 ;; Thus magnification factor is two. 395 ;; Thus magnification factor is two.
283 ;; 396 ;;
284 `(2 397 `(2
285 ((r5 = ,(charset-id 'eight-bit-control)) 398 ((r6 = ,(charset-id 'latin-iso8859-1))
286 (r6 = ,(charset-id 'eight-bit-graphic)) 399 (read r0)
287 (loop 400 (loop
288 (r0 = -1)
289 (read r0)
290
291 ;; 1byte encoding, i.e., ascii
292 (if (r0 < #x80) 401 (if (r0 < #x80)
293 ((write r0)) 402 ;; 1-byte encoding, i.e., ascii
294 (if (r0 < #xc0) ; continuation byte (invalid here) 403 (write-read-repeat r0))
295 ((if (r0 < #xa0) 404 (if (r0 < #xc2)
296 (write-multibyte-character r5 r0) 405 ;; continuation byte (invalid here) or 1st byte of overlong
297 (write-multibyte-character r6 r0))) 406 ;; 2-byte sequence.
298 ;; 2 byte encoding 00000yyyyyxxxxxx = 110yyyyy 10xxxxxx 407 ((call ccl-mule-utf-untrans)
299 (if (r0 < #xe0) 408 (r6 = ,(charset-id 'latin-iso8859-1))
300 ((r1 = -1) 409 (read r0)
301 (read r1) 410 (repeat)))
302 411
303 (if ((r1 & #b11000000) != #b10000000) 412 ;; Read the 2nd byte.
304 ;; Invalid 2-byte sequence 413 (read r1)
305 ((if (r0 < #xa0) 414 (if ((r1 & #b11000000) != #b10000000) ; Invalid 2nd byte
306 (write-multibyte-character r5 r0) 415 ((call ccl-mule-utf-untrans)
307 (write-multibyte-character r6 r0)) 416 (r6 = ,(charset-id 'latin-iso8859-1))
308 (if (r1 < #x80) 417 ;; Handle it in the next loop.
309 (write r1) 418 (r0 = r1)
310 (if (r1 < #xa0) 419 (repeat)))
311 (write-multibyte-character r5 r1) 420
312 (write-multibyte-character r6 r1)))) 421 (if (r0 < #xe0)
313 422 ;; 2-byte encoding 00000yyyyyxxxxxx = 110yyyyy 10xxxxxx
314 ((r3 = r0) ; save in case of overlong sequence 423 ((r1 &= #x3F)
315 (r2 = r1) 424 (r1 |= ((r0 & #x1F) << 6))
316 (r0 &= #x1f) 425 ;; Now r2 holds scalar value. We don't have to check
317 (r0 <<= 6) 426 ;; `overlong sequence' because r0 >= 0xC2.
318 (r1 &= #x3f) 427
319 (r1 += r0) 428 (if (r1 >= 256)
320 ;; Now r1 holds scalar value 429 ;; mule-unicode-0100-24ff (< 0800)
321 430 ((r0 = ,(charset-id 'mule-unicode-0100-24ff))
322 (if (r1 < 128) ; `overlong sequence' 431 (r1 -= #x0100)
323 ((if (r3 < #xa0) 432 (r2 = (((r1 / 96) + 32) << 7))
324 (write-multibyte-character r5 r3) 433 (r1 %= 96)
325 (write-multibyte-character r6 r3)) 434 (r1 += (r2 + 32))
326 (if (r2 < #x80) 435 (translate-character
327 (write r2) 436 utf-translation-table-for-decode r0 r1)
328 (if (r2 < #xa0) 437 (write-multibyte-character r0 r1)
329 (write-multibyte-character r5 r2) 438 (read r0)
330 (write-multibyte-character r6 r2)))) 439 (repeat))
331 440 (if (r1 >= 160)
332 ;; eight-bit-control 441 ;; latin-iso8859-1
333 (if (r1 < 160) 442 ((r1 -= 128)
334 ((write-multibyte-character r5 r1)) 443 (write-multibyte-character r6 r1)
335 444 (read r0)
336 ;; latin-iso8859-1 445 (repeat))
337 (if (r1 < 256) 446 ;; eight-bit-control
338 ((r0 = ,(charset-id 'latin-iso8859-1)) 447 ((r0 = ,(charset-id 'eight-bit-control))
339 (r1 -= 128) 448 (write-multibyte-character r0 r1)
340 (write-multibyte-character r0 r1)) 449 (read r0)
341 450 (repeat))))))
342 ;; mule-unicode-0100-24ff (< 0800) 451
343 ((r0 = ,(charset-id 'mule-unicode-0100-24ff)) 452 ;; Read the 3rd bytes.
344 (r1 -= #x0100) 453 (read r2)
345 (r2 = (((r1 / 96) + 32) << 7)) 454 (if ((r2 & #b11000000) != #b10000000) ; Invalid 3rd byte
346 (r1 %= 96) 455 ((call ccl-mule-utf-untrans)
347 (r1 += (r2 + 32)) 456 (r0 = r1)
348 (translate-character 457 (call ccl-mule-utf-untrans)
349 utf-translation-table-for-decode r0 r1) 458 (r6 = ,(charset-id 'latin-iso8859-1))
350 (write-multibyte-character r0 r1)))))))) 459 ;; Handle it in the next loop.
351 460 (r0 = r2)
352 ;; 3byte encoding 461 (repeat)))
353 ;; zzzzyyyyyyxxxxxx = 1110zzzz 10yyyyyy 10xxxxxx 462
354 (if (r0 < #xf0) 463 (if (r0 < #xF0)
355 ((r1 = -1) 464 ;; 3byte encoding
356 (r2 = -1) 465 ;; zzzzyyyyyyxxxxxx = 1110zzzz 10yyyyyy 10xxxxxx
357 (read r1 r2) 466 ((r3 = ((r0 & #xF) << 12))
358 467 (r3 |= ((r1 & #x3F) << 6))
359 ;; This is set to 1 if the encoding is invalid. 468 (r3 |= (r2 & #x3F))
360 (r4 = 0) 469
361 470 (if (r3 < #x800) ; `overlong sequence'
362 (r3 = (r1 & #b11000000)) 471 ((call ccl-mule-utf-untrans)
363 (r3 |= ((r2 >> 2) & #b00110000)) 472 (r0 = r1)
364 (if (r3 != #b10100000) 473 (call ccl-mule-utf-untrans)
365 (r4 = 1) 474 (r0 = r2)
366 ((r3 = ((r0 & #x0f) << 12)) 475 (call ccl-mule-utf-untrans)
367 (r3 += ((r1 & #x3f) << 6)) 476 (r6 = ,(charset-id 'latin-iso8859-1))
368 (r3 += (r2 & #x3f)) 477 (read r0)
369 (if (r3 < #x0800) 478 (repeat)))
370 (r4 = 1)))) 479
371 480 (if (r3 < #x2500)
372 (if (r4 != 0) 481 ;; mule-unicode-0100-24ff (>= 0800)
373 ;; Invalid 3-byte sequence 482 ((r0 = ,(charset-id 'mule-unicode-0100-24ff))
374 ((if (r0 < #xa0) 483 (r3 -= #x0100)
375 (write-multibyte-character r5 r0) 484 (r3 //= 96)
376 (write-multibyte-character r6 r0)) 485 (r1 = (r7 + 32))
377 (if (r1 < #x80) 486 (r1 += ((r3 + 32) << 7))
378 (write r1) 487 (translate-character
379 (if (r1 < #xa0) 488 utf-translation-table-for-decode r0 r1)
380 (write-multibyte-character r5 r1) 489 (write-multibyte-character r0 r1)
381 (write-multibyte-character r6 r1))) 490 (read r0)
382 (if (r2 < #x80) 491 (repeat)))
383 (write r2) 492
384 (if (r2 < #xa0) 493 (if (r3 < #x3400)
385 (write-multibyte-character r5 r2) 494 ;; mule-unicode-2500-33ff
386 (write-multibyte-character r6 r2)))) 495 ((r0 = r3) ; don't zap r3
387 496 (lookup-integer utf-subst-table-for-decode r0 r1)
388 ;; mule-unicode-0100-24ff (>= 0800) 497 (if (r7 == 0)
389 ((if (r3 < #x2500) 498 ((r0 = ,(charset-id 'mule-unicode-2500-33ff))
390 ((r0 = ,(charset-id 'mule-unicode-0100-24ff)) 499 (r3 -= #x2500)
391 (r3 -= #x0100) 500 (r3 //= 96)
392 (r3 //= 96) 501 (r1 = (r7 + 32))
393 (r1 = (r7 + 32)) 502 (r1 += ((r3 + 32) << 7))))
394 (r1 += ((r3 + 32) << 7)) 503 (write-multibyte-character r0 r1)
395 (translate-character 504 (read r0)
396 utf-translation-table-for-decode r0 r1) 505 (repeat)))
397 (write-multibyte-character r0 r1)) 506
398 507 (if (r3 < #xE000)
399 ;; mule-unicode-2500-33ff 508 ;; Try to convert to CJK chars, else
400 (if (r3 < #x3400) 509 ;; keep them as eight-bit-{control|graphic}.
401 ((r4 = r3) ; don't zap r3 510 ((r0 = r3)
402 (lookup-integer utf-subst-table-for-decode r4 r5) 511 (lookup-integer utf-subst-table-for-decode r3 r1)
403 (if r7 512 (if r7
404 ;; got a translation 513 ;; got a translation
405 ((write-multibyte-character r4 r5) 514 ((write-multibyte-character r3 r1)
406 ;; Zapped through register starvation. 515 (read r0)
407 (r5 = ,(charset-id 'eight-bit-control))) 516 (repeat))
408 ((r0 = ,(charset-id 'mule-unicode-2500-33ff)) 517 ((call ccl-mule-utf-untrans)
409 (r3 -= #x2500) 518 (r6 = ,(charset-id 'latin-iso8859-1))
410 (r3 //= 96) 519 (read r0)
411 (r1 = (r7 + 32)) 520 (repeat)))))
412 (r1 += ((r3 + 32) << 7)) 521
413 (write-multibyte-character r0 r1)))) 522 ;; mule-unicode-e000-ffff
414 523 ;; Fixme: fffe and ffff are invalid.
415 ;; U+3400 .. U+D7FF 524 (r0 = r3) ; don't zap r3
416 ;; Try to convert to CJK chars, else keep 525 (lookup-integer utf-subst-table-for-decode r0 r1)
417 ;; them as eight-bit-{control|graphic}. 526 (if (r7 == 0)
418 (if (r3 < #xd800) 527 ((r0 = ,(charset-id 'mule-unicode-e000-ffff))
419 ((r4 = r3) ; don't zap r3 528 (r3 -= #xe000)
420 (lookup-integer utf-subst-table-for-decode r4 r5) 529 (r3 //= 96)
421 (if r7 530 (r1 = (r7 + 32))
422 ;; got a translation 531 (r1 += ((r3 + 32) << 7))))
423 ((write-multibyte-character r4 r5) 532 (write-multibyte-character r0 r1)
424 ;; Zapped through register starvation. 533 (read r0)
425 (r5 = ,(charset-id 'eight-bit-control))) 534 (repeat)))
426 ;; #xe0 <= r0 < #xf0, so r0 is eight-bit-graphic 535
427 ((r3 = r6) 536 ;; Read the 4th bytes.
428 (write-multibyte-character r3 r0) 537 (read r3)
429 (if (r1 < #xa0) 538 (if ((r3 & #b11000000) != #b10000000) ; Invalid 4th byte
430 (r3 = r5)) 539 ((call ccl-mule-utf-untrans)
431 (write-multibyte-character r3 r1) 540 (r0 = r1)
432 (if (r2 < #xa0) 541 (call ccl-mule-utf-untrans)
433 (r3 = r5) 542 (r0 = r2)
434 (r3 = r6)) 543 (call ccl-mule-utf-untrans)
435 (write-multibyte-character r3 r2)))) 544 (r6 = ,(charset-id 'latin-iso8859-1))
436 545 ;; Handle it in the next loop.
437 ;; Surrogates, U+D800 .. U+DFFF 546 (r0 = r3)
438 (if (r3 < #xe000) 547 (repeat)))
439 ((r3 = r6) 548
440 (write-multibyte-character r3 r0) ; eight-bit-graphic 549 (if (r0 < #xF8)
441 (if (r1 < #xa0) 550 ;; 4-byte encoding:
442 (r3 = r5)) 551 ;; wwwzzzzzzyyyyyyxxxxxx = 11110www 10zzzzzz 10yyyyyy 10xxxxxx
443 (write-multibyte-character r3 r1) 552 ;; keep those bytes as eight-bit-{control|graphic}
444 (if (r2 < #xa0) 553 ;; Fixme: allow lookup in utf-subst-table-for-decode.
445 (r3 = r5) 554 ((r4 = ((r0 & #x7) << 18))
446 (r3 = r6)) 555 (r4 |= ((r1 & #x3F) << 12))
447 (write-multibyte-character r3 r2)) 556 (r4 |= ((r2 & #x3F) << 6))
448 557 (r4 |= (r3 & #x3F))
449 ;; mule-unicode-e000-ffff 558
450 ;; Fixme: fffe and ffff are invalid. 559 (if (r4 < #x10000) ; `overlong sequence'
451 ((r4 = r3) ; don't zap r3 560 ((call ccl-mule-utf-untrans)
452 (lookup-integer utf-subst-table-for-decode r4 r5) 561 (r0 = r1)
453 (if r7 562 (call ccl-mule-utf-untrans)
454 ;; got a translation 563 (r0 = r2)
455 ((write-multibyte-character r4 r5) 564 (call ccl-mule-utf-untrans)
456 ;; Zapped through register starvation. 565 (r0 = r3)
457 (r5 = ,(charset-id 'eight-bit-control))) 566 (call ccl-mule-utf-untrans))
458 ((r0 = ,(charset-id 'mule-unicode-e000-ffff)) 567 ((r0 = r4)
459 (r3 -= #xe000) 568 (call ccl-mule-utf-untrans))))
460 (r3 //= 96) 569
461 (r1 = (r7 + 32)) 570 ;; Unsupported sequence.
462 (r1 += ((r3 + 32) << 7)) 571 ((call ccl-mule-utf-untrans)
463 (write-multibyte-character r0 r1))))))))))) 572 (r0 = r1)
464 573 (call ccl-mule-utf-untrans)
465 (if (r0 < #xfe) 574 (r0 = r2)
466 ;; 4byte encoding 575 (call ccl-mule-utf-untrans)
467 ;; keep those bytes as eight-bit-{control|graphic} 576 (r0 = r3)
468 ;; Fixme: allow lookup in utf-subst-table-for-decode. 577 (call ccl-mule-utf-untrans)))
469 ((r1 = -1) 578 (r6 = ,(charset-id 'latin-iso8859-1))
470 (r2 = -1) 579 (read r0)
471 (r3 = -1)
472 (read r1 r2 r3)
473 ;; r0 > #xf0, thus eight-bit-graphic
474 (write-multibyte-character r6 r0)
475 (if (r1 < #xa0)
476 (if (r1 < #x80) ; invalid byte
477 (write r1)
478 (write-multibyte-character r5 r1))
479 (write-multibyte-character r6 r1))
480 (if (r2 < #xa0)
481 (if (r2 < #x80) ; invalid byte
482 (write r2)
483 (write-multibyte-character r5 r2))
484 (write-multibyte-character r6 r2))
485 (if (r3 < #xa0)
486 (if (r3 < #x80) ; invalid byte
487 (write r3)
488 (write-multibyte-character r5 r3))
489 (write-multibyte-character r6 r3))
490 (if (r0 >= #xf8) ; 5- or 6-byte encoding
491 ((r0 = -1)
492 (read r0)
493 (if (r0 < #xa0)
494 (if (r0 < #x80) ; invalid byte
495 (write r0)
496 (write-multibyte-character r5 r0))
497 (write-multibyte-character r6 r0))
498 (if (r0 >= #xfc) ; 6-byte
499 ((r0 = -1)
500 (read r0)
501 (if (r0 < #xa0)
502 (if (r0 < #x80) ; invalid byte
503 (write r0)
504 (write-multibyte-character r5 r0))
505 (write-multibyte-character r6 r0)))))))
506 ;; else invalid byte >= #xfe
507 (write-multibyte-character r6 r0))))))
508 (repeat))) 580 (repeat)))
509 581
582
510 ;; At EOF... 583 ;; At EOF...
511 (if (r0 >= 0) 584 (if (r0 >= 0)
512 ((if (r0 < #x80) 585 ;; r0 >= #x80
513 (write r0) 586 ((call ccl-mule-utf-untrans)
514 (if (r0 < #xa0)
515 (write-multibyte-character r5 r0)
516 ((write-multibyte-character r6 r0))))
517 (if (r1 >= 0) 587 (if (r1 >= 0)
518 ((if (r1 < #x80) 588 ((r0 = r1)
519 (write r1) 589 (call ccl-mule-utf-untrans)
520 (if (r1 < #xa0)
521 (write-multibyte-character r5 r1)
522 ((write-multibyte-character r6 r1))))
523 (if (r2 >= 0) 590 (if (r2 >= 0)
524 ((if (r2 < #x80) 591 ((r0 = r2)
525 (write r2) 592 (call ccl-mule-utf-untrans)
526 (if (r2 < #xa0)
527 (write-multibyte-character r5 r2)
528 ((write-multibyte-character r6 r2))))
529 (if (r3 >= 0) 593 (if (r3 >= 0)
530 (if (r3 < #x80) 594 ((r0 = r3)
531 (write r3) 595 (call ccl-mule-utf-untrans))))))))))
532 (if (r3 < #xa0)
533 (write-multibyte-character r5 r3)
534 ((write-multibyte-character r6 r3))))))))))))
535 596
536 "CCL program to decode UTF-8. 597 "CCL program to decode UTF-8.
537Basic decoding is done into the charsets ascii, latin-iso8859-1 and 598Basic decoding is done into the charsets ascii, latin-iso8859-1 and
@@ -540,164 +601,203 @@ mule-unicode-*, but see also `utf-fragmentation-table' and
540Encodings of un-representable Unicode characters are decoded asis into 601Encodings of un-representable Unicode characters are decoded asis into
541eight-bit-control and eight-bit-graphic characters.") 602eight-bit-control and eight-bit-graphic characters.")
542 603
604(define-ccl-program ccl-mule-utf-8-encode-untrans
605 ;; UTF-8 decoder generates an UTF-8 sequence represented by a
606 ;; sequence eight-bit-control/graphic chars for an untranslatable
607 ;; character and an invalid byte.
608 ;;
609 ;; This CCL parses that sequence (the first byte is already in r1),
610 ;; writes out the original bytes of that sequence, and sets r5 to
611 ;; -1.
612 ;;
613 ;; If the eight-bit-control/graphic sequence is shorter than what r1
614 ;; suggests, it sets r5 and r6 to the last character read that
615 ;; should be handled by the next loop of a caller.
616 ;;
617 ;; Note: For UTF-8 validation, we only check if a character is
618 ;; eight-bit-control/graphic or not. It may result in incorrect
619 ;; handling of random binary data, but such a data can't be encoded
620 ;; by UTF-8 anyway. At least, UTF-8 decoders doesn't generate such
621 ;; a sequence even if a source contains invalid byte-sequence.
622 `(0
623 (;; Read the 2nd byte.
624 (read-multibyte-character r5 r6)
625 (r0 = (r5 != ,(charset-id 'eight-bit-control)))
626 (if ((r5 != ,(charset-id 'eight-bit-graphic)) & r0)
627 ((write r1) ; invalid UTF-8
628 (r1 = -1)
629 (end)))
630
631 (if (r1 <= #xC3)
632 ;; 2-byte sequence for an originally invalid byte.
633 ((r6 &= #x3F)
634 (r6 |= ((r1 & #x1F) << 6))
635 (write r6)
636 (r5 = -1)
637 (end)))
638
639 (write r1 r6)
640 (r2 = r1)
641 (r1 = -1)
642 ;; Read the 3rd byte.
643 (read-multibyte-character r5 r6)
644 (r0 = (r5 != ,(charset-id 'eight-bit-control)))
645 (if ((r5 != ,(charset-id 'eight-bit-graphic)) & r0)
646 (end)) ; invalid UTF-8
647 (write r6)
648 (if (r2 < #xF0)
649 ;; 3-byte sequence for an untranslated character.
650 ((r5 = -1)
651 (end)))
652 ;; Read the 4th byte.
653 (read-multibyte-character r5 r6)
654 (r0 = (r5 != ,(charset-id 'eight-bit-control)))
655 (if ((r5 != ,(charset-id 'eight-bit-graphic)) & r0)
656 (end)) ; invalid UTF-8
657 ;; 4-byte sequence for an untranslated character.
658 (write r6)
659 (r5 = -1)
660 (end))
661
662 ;; At EOF...
663 ((r5 = -1)
664 (if (r1 >= 0)
665 (write r1)))))
666
543(define-ccl-program ccl-encode-mule-utf-8 667(define-ccl-program ccl-encode-mule-utf-8
544 `(1 668 `(1
545 ((r5 = -1) 669 ((r5 = -1)
546 (loop 670 (loop
547 (if (r5 < 0) 671 (if (r5 < 0)
548 ((r1 = -1) 672 (read-multibyte-character r0 r1)
549 (read-multibyte-character r0 r1) 673 ;; Pre-read character is in r5 (charset-ID) and r6 (code-point).
550 (translate-character utf-translation-table-for-encode r0 r1)) 674 ((r0 = r5)
551 (;; We have already done read-multibyte-character.
552 (r0 = r5)
553 (r1 = r6) 675 (r1 = r6)
554 (r5 = -1))) 676 (r5 = -1)))
677 (translate-character utf-translation-table-for-encode r0 r1)
555 678
556 (if (r0 == ,(charset-id 'ascii)) 679 (if (r0 == ,(charset-id 'ascii))
557 (write r1) 680 (write-repeat r1))
558 681
559 (if (r0 == ,(charset-id 'latin-iso8859-1)) 682 (if (r0 == ,(charset-id 'latin-iso8859-1))
560 ;; r1 scalar utf-8 683 ;; r1 scalar utf-8
561 ;; 0000 0yyy yyxx xxxx 110y yyyy 10xx xxxx 684 ;; 0000 0yyy yyxx xxxx 110y yyyy 10xx xxxx
562 ;; 20 0000 0000 1010 0000 1100 0010 1010 0000 685 ;; 20 0000 0000 1010 0000 1100 0010 1010 0000
563 ;; 7f 0000 0000 1111 1111 1100 0011 1011 1111 686 ;; 7f 0000 0000 1111 1111 1100 0011 1011 1111
564 ((r0 = (((r1 & #x40) >> 6) | #xc2)) 687 ((write ((r1 >> 6) | #xc2))
565 (r1 &= #x3f) 688 (r1 &= #x3f)
566 (r1 |= #x80) 689 (r1 |= #x80)
567 (write r0 r1)) 690 (write-repeat r1)))
568 691
569 (if (r0 == ,(charset-id 'mule-unicode-0100-24ff)) 692 (if (r0 == ,(charset-id 'mule-unicode-0100-24ff))
570 ((r0 = ((((r1 & #x3f80) >> 7) - 32) * 96)) 693 ((r0 = ((((r1 & #x3f80) >> 7) - 32) * 96))
571 ;; #x3f80 == (0011 1111 1000 0000)b 694 ;; #x3f80 == (0011 1111 1000 0000)b
572 (r1 &= #x7f) 695 (r1 &= #x7f)
573 (r1 += (r0 + 224)) ; 240 == -32 + #x0100 696 (r1 += (r0 + 224)) ; 240 == -32 + #x0100
574 ;; now r1 holds scalar value 697 ;; now r1 holds scalar value
575 (if (r1 < #x0800) 698 (if (r1 < #x0800)
576 ;; 2byte encoding 699 ;; 2byte encoding
577 ((r0 = (((r1 & #x07c0) >> 6) | #xc0)) 700 ((write ((r1 >> 6) | #xC0))
578 ;; #x07c0 == (0000 0111 1100 0000)b 701 (r1 &= #x3F)
579 (r1 &= #x3f) 702 (r1 |= #x80)
580 (r1 |= #x80) 703 (write-repeat r1))
581 (write r0 r1)) 704 ;; 3byte encoding
582 ;; 3byte encoding 705 ((write ((r1 >> 12) | #xE0))
583 ((r0 = (((r1 & #xf000) >> 12) | #xe0)) 706 (write (((r1 & #x0FC0) >> 6) | #x80))
584 (r2 = ((r1 & #x3f) | #x80)) 707 (r1 &= #x3F)
585 (r1 &= #x0fc0) 708 (r1 |= #x80)
586 (r1 >>= 6) 709 (write-repeat r1)))))
587 (r1 |= #x80) 710
588 (write r0 r1 r2)))) 711 (if (r0 == ,(charset-id 'mule-unicode-2500-33ff))
589 712 ((r0 = ((((r1 & #x3f80) >> 7) - 32) * 96))
590 (if (r0 == ,(charset-id 'mule-unicode-2500-33ff)) 713 (r1 &= #x7f)
591 ((r0 = ((((r1 & #x3f80) >> 7) - 32) * 96)) 714 (r1 += (r0 + 9440)) ; 9440 == -32 + #x2500
592 (r1 &= #x7f) 715 ;; now r1 holds scalar value
593 (r1 += (r0 + 9440)) ; 9440 == -32 + #x2500 716 (write ((r1 >> 12) | #xE0))
594 (r0 = (((r1 & #xf000) >> 12) | #xe0)) 717 (write (((r1 & #x0FC0) >> 6) | #x80))
595 (r2 = ((r1 & #x3f) | #x80)) 718 (r1 &= #x3F)
596 (r1 &= #x0fc0) 719 (r1 |= #x80)
597 (r1 >>= 6) 720 (write-repeat r1)))
598 (r1 |= #x80) 721
599 (write r0 r1 r2)) 722 (if (r0 == ,(charset-id 'mule-unicode-e000-ffff))
600 723 ((r0 = ((((r1 & #x3f80) >> 7) - 32) * 96))
601 (if (r0 == ,(charset-id 'mule-unicode-e000-ffff)) 724 (r1 &= #x7f)
602 ((r0 = ((((r1 & #x3f80) >> 7) - 32) * 96)) 725 (r1 += (r0 + 57312)) ; 57312 == -32 + #xe000
603 (r1 &= #x7f) 726 ;; now r1 holds scalar value
604 (r1 += (r0 + 57312)) ; 57312 == -32 + #xe000 727 (write ((r1 >> 12) | #xE0))
605 (r0 = (((r1 & #xf000) >> 12) | #xe0)) 728 (write (((r1 & #x0FC0) >> 6) | #x80))
606 (r2 = ((r1 & #x3f) | #x80)) 729 (r1 &= #x3F)
607 (r1 &= #x0fc0) 730 (r1 |= #x80)
608 (r1 >>= 6) 731 (write-repeat r1)))
609 (r1 |= #x80) 732
610 (write r0 r1 r2)) 733 (if (r0 == ,(charset-id 'eight-bit-control))
611 734 ;; r1 scalar utf-8
612 (if (r0 == ,(charset-id 'eight-bit-control)) 735 ;; 0000 0yyy yyxx xxxx 110y yyyy 10xx xxxx
613 ;; r1 scalar utf-8 736 ;; 80 0000 0000 1000 0000 1100 0010 1000 0000
614 ;; 0000 0yyy yyxx xxxx 110y yyyy 10xx xxxx 737 ;; 9f 0000 0000 1001 1111 1100 0010 1001 1111
615 ;; 80 0000 0000 1000 0000 1100 0010 1000 0000 738 ((write #xC2)
616 ;; 9f 0000 0000 1001 1111 1100 0010 1001 1111 739 (write-repeat r1)))
617 ((write #xc2) 740
618 (write r1)) 741 (if (r0 == ,(charset-id 'eight-bit-graphic))
619 742 ;; r1 scalar utf-8
620 (if (r0 == ,(charset-id 'eight-bit-graphic)) 743 ;; 0000 0yyy yyxx xxxx 110y yyyy 10xx xxxx
621 ;; r1 scalar utf-8 744 ;; a0 0000 0000 1010 0000 1100 0010 1010 0000
622 ;; 0000 0yyy yyxx xxxx 110y yyyy 10xx xxxx 745 ;; ff 0000 0000 1111 1111 1101 1111 1011 1111
623 ;; a0 0000 0000 1010 0000 1100 0010 1010 0000 746 ((r0 = (r1 >= #xC0))
624 ;; ff 0000 0000 1111 1111 1101 1111 1011 1111 747 (r0 &= (r1 <= #xC3))
625 ((write r1) 748 (r4 = (r1 >= #xE1))
626 (r1 = -1) 749 (r4 &= (r1 <= #xF7))
627 (read-multibyte-character r0 r1) 750 (r0 |= r4)
628 (if (r0 != ,(charset-id 'eight-bit-graphic)) 751 (if r0
629 (if (r0 != ,(charset-id 'eight-bit-control)) 752 ((call ccl-mule-utf-8-encode-untrans)
630 ((r5 = r0) 753 (repeat))
631 (r6 = r1)))) 754 (write-repeat r1))))
632 (if (r5 < 0) 755
633 ((read-multibyte-character r0 r2) 756 (lookup-character utf-subst-table-for-encode r0 r1)
634 (if (r0 != ,(charset-id 'eight-bit-graphic)) 757 (if r7 ; lookup succeeded
635 (if (r0 != ,(charset-id 'eight-bit-control)) 758 (if (r0 < #x800)
636 ((r5 = r0) 759 ;; 2byte encoding
637 (r6 = r2)))) 760 ((write ((r0 >> 6) | #xC0))
638 (if (r5 < 0) 761 (r0 = ((r0 & #x3F) | #x80))
639 (write r1 r2) 762 (write-repeat r0))
640 (if (r1 < #xa0) 763 ;; 3byte encoding
641 (write r1) 764 ((write ((r0 >> 12) | #xE0))
642 ((write #xc2) 765 (write (((r0 & #x0FC0) >> 6) | #x80))
643 (write r1))))))) 766 (r0 = ((r0 & #x3F) | #x80))
644 767 (write-repeat r0))))
645 ((lookup-character utf-subst-table-for-encode r0 r1) 768
646 (if r7 ; lookup succeeded 769 ;; Unsupported character.
647 ((r1 = (((r0 & #xf000) >> 12) | #xe0)) 770 ;; Output U+FFFD, which is `ef bf bd' in UTF-8.
648 (r2 = ((r0 & #x3f) | #x80)) 771 (write #xef)
649 (r0 &= #x0fc0) 772 (write #xbf)
650 (r0 >>= 6) 773 (write-repeat #xbd))))
651 (r0 |= #x80)
652 (write r1 r0 r2))
653 ;; Unsupported character.
654 ;; Output U+FFFD, which is `ef bf bd' in UTF-8.
655 ((write #xef)
656 (write #xbf)
657 (write #xbd)))))))))))
658 (repeat)))
659 (if (r1 >= #xa0)
660 (write r1)
661 (if (r1 >= #x80)
662 ((write #xc2)
663 (write r1)))))
664
665 "CCL program to encode into UTF-8.") 774 "CCL program to encode into UTF-8.")
666 775
667 776
668(define-ccl-program ccl-untranslated-to-ucs 777(define-ccl-program ccl-untranslated-to-ucs
669 `(0 778 `(0
670 (if (r0 < #xf0) ; 3-byte encoding, as above 779 (if (r1 == 0)
671 ((r4 = 0) 780 nil
672 (r3 = (r1 & #b11000000)) 781 (if (r0 <= #xC3) ; 2-byte encoding
673 (r3 |= ((r2 >> 2) & #b00110000)) 782 ((r0 = ((r0 & #x3) << 6))
674 (if (r3 != #b10100000) 783 (r0 |= (r1 & #x3F))
675 (r4 = 1) 784 (r1 = 2))
676 ((r3 = ((r0 & #x0f) << 12)) 785 (if (r2 == 0)
677 (r3 += ((r1 & #x3f) << 6)) 786 (r1 = 0)
678 (r3 += (r2 & #x3f)) 787 (if (r0 < #xF0) ; 3-byte encoding, as above
679 (if (r3 < #x0800) 788 ((r0 = ((r0 & #xF) << 12))
680 (r4 = 1)))) 789 (r0 |= ((r1 & #x3F) << 6))
681 (if (r4 != 0) 790 (r0 |= (r2 & #x3F))
682 (r0 = 0) 791 (r1 = 3))
683 (r0 = r3))) 792 (if (r3 == 0)
684 (if (r0 < #xf8) ; 4-byte (Mule-UCS recipe) 793 (r1 = 0)
685 ((r4 = (r1 >> 6)) 794 ((r0 = ((r0 & #x7) << 18))
686 (if (r4 != #b10) 795 (r0 |= ((r1 & #x3F) << 12))
687 (r0 = 0) 796 (r0 |= ((r2 & #x3F) << 6))
688 ((r4 = (r2 >> 6)) 797 (r0 |= (r3 & #x3F))
689 (if (r4 != #b10) 798 (r1 = 4))))))))
690 (r0 = 0) 799 "Decode 2-, 3-, or 4-byte sequences in r0, r1, r2 [,r3] to unicodes in r0.
691 ((r4 = (r3 >> 6)) 800Set r1 to the byte length. r0 == 0 for invalid sequence.")
692 (if (r4 != #b10)
693 (r0 = 0)
694 ((r1 = ((r1 & #x3F) << 12))
695 (r2 = ((r2 & #x3F) << 6))
696 (r3 &= #x3F)
697 (r0 = (((((r0 & #x07) << 18) | r1) | r2) | r3)))))))))
698 (r0 = 0))))
699 "Decode 3- or 4-byte sequences in r0, r1, r2 [,r3] to unicodes in r0.
700r0 == 0 for invalid sequence.")
701 801
702(defvar utf-8-ccl-regs (make-vector 8 0)) 802(defvar utf-8-ccl-regs (make-vector 8 0))
703 803
@@ -708,33 +808,47 @@ Only for 3- or 4-byte sequences."
708 (aset utf-8-ccl-regs 1 (or (char-after (1+ (point))) 0)) 808 (aset utf-8-ccl-regs 1 (or (char-after (1+ (point))) 0))
709 (aset utf-8-ccl-regs 2 (or (char-after (+ 2 (point))) 0)) 809 (aset utf-8-ccl-regs 2 (or (char-after (+ 2 (point))) 0))
710 (aset utf-8-ccl-regs 3 (or (char-after (+ 3 (point))) 0)) 810 (aset utf-8-ccl-regs 3 (or (char-after (+ 3 (point))) 0))
711 (ccl-execute 'ccl-untranslated-to-ucs utf-8-ccl-regs) 811 (ccl-execute 'ccl-untranslated-to-ucs utf-8-ccl-regs))
712 (aref utf-8-ccl-regs 0))
713 812
714(defun utf-8-help-echo (window object position) 813(defun utf-8-help-echo (window object position)
715 (format "Untranslated Unicode U+%04X" 814 (format "Untranslated Unicode U+%04X"
716 (get-char-property position 'untranslated-utf-8 object))) 815 (get-char-property position 'untranslated-utf-8 object)))
717 816
718;; We compose the untranslatable sequences into a single character. 817;; We compose the untranslatable sequences into a single character,
818;; and move point to the next character.
719;; This is infelicitous for editing, because there's currently no 819;; This is infelicitous for editing, because there's currently no
720;; mechanism for treating compositions as atomic, but is OK for 820;; mechanism for treating compositions as atomic, but is OK for
721;; display. They are composed to U+FFFD with help-echo which 821;; display. They are composed to U+FFFD with help-echo which
722;; indicates the unicodes they represent. This function GCs too much. 822;; indicates the unicodes they represent. This function GCs too much.
723(defsubst utf-8-compose () 823
724 "Put a suitable composition on an untranslatable sequence. 824;; If utf-translate-cjk-mode is non-nil, this function is called with
725Return the sequence's length." 825;; HASH-TABLE which translates CJK characters into some of CJK
726 (let* ((u (utf-8-untranslated-to-ucs)) 826;; charsets.
727 (l (unless (zerop u) 827
728 (if (>= u #x10000) 828(defsubst utf-8-compose (hash-table)
729 4 829 "Put a suitable composition on an untranslatable sequence at point.
730 3)))) 830If HASH-TABLE is non-nil, try to translate CJK characters by it at first.
731 (when l 831Move point to the end of the sequence."
732 (put-text-property (point) (min (point-max) (+ l (point))) 832 (utf-8-untranslated-to-ucs)
733 'untranslated-utf-8 u) 833 (let ((l (aref utf-8-ccl-regs 1))
734 (put-text-property (point) (min (point-max) (+ l (point))) 834 ch)
735 'help-echo 'utf-8-help-echo) 835 (if (> l 0)
736 (compose-region (point) (+ l (point)) ?$,3u=(B) 836 (if (and hash-table
737 l))) 837 (setq ch (gethash (aref utf-8-ccl-regs 0) hash-table)))
838 (progn
839 (insert ch)
840 (delete-region (point) (min (point-max) (+ l (point)))))
841 (setq ch (aref utf-8-ccl-regs 0))
842 (put-text-property (point) (min (point-max) (+ l (point)))
843 'untranslated-utf-8 ch)
844 (put-text-property (point) (min (point-max) (+ l (point)))
845 'help-echo 'utf-8-help-echo)
846 (if (= l 2)
847 (put-text-property (point) (min (point-max) (+ l (point)))
848 'display (format "\\%03o" ch))
849 (compose-region (point) (+ l (point)) ?$,3u=(B))
850 (forward-char l))
851 (forward-char 1))))
738 852
739(defcustom utf-8-compose-scripts nil 853(defcustom utf-8-compose-scripts nil
740 "*Non-nil means compose various scripts on decoding utf-8 text." 854 "*Non-nil means compose various scripts on decoding utf-8 text."
@@ -744,38 +858,63 @@ Return the sequence's length."
744 858
745(defun utf-8-post-read-conversion (length) 859(defun utf-8-post-read-conversion (length)
746 "Compose untranslated utf-8 sequences into single characters. 860 "Compose untranslated utf-8 sequences into single characters.
861If `utf-translate-cjk-mode' is non-nil, tries to translate CJK characters.
747Also compose particular scripts if `utf-8-compose-scripts' is non-nil." 862Also compose particular scripts if `utf-8-compose-scripts' is non-nil."
748 (save-excursion 863 (save-excursion
749 ;; Can't do eval-when-compile to insert a multibyte constant 864 (save-restriction
750 ;; version of the string in the loop, since it's always loaded as 865 (narrow-to-region (point) (+ (point) length))
751 ;; unibyte from a byte-compiled file. 866 ;; Can't do eval-when-compile to insert a multibyte constant
752 (let ((range (string-as-multibyte "^\xe1-\xf7"))) 867 ;; version of the string in the loop, since it's always loaded as
753 (while (and (skip-chars-forward range) 868 ;; unibyte from a byte-compiled file.
754 (not (eobp))) 869 (let ((range (string-as-multibyte "^\xc0-\xc3\xe1-\xf7"))
755 (forward-char (utf-8-compose))))) 870 hash-table ch)
756 ;; Fixme: Takahashi-san implies it may not work this easily. I 871 (when utf-translate-cjk-mode
757 ;; asked why but didn't get a reply. -- fx 872 (if (not utf-translate-cjk-lang-env)
758 (when (and utf-8-compose-scripts (> length 1)) 873 ;; Check these characters:
759 ;; These currently have definitions which cover the relevant 874 ;; "U+2e80-U+33ff", "U+ff00-U+ffef"
760 ;; unicodes. We could avoid loading thai-util &c by checking 875 ;; We may have to translate them to CJK charsets.
761 ;; whether the region contains any characters with the appropriate 876 (let ((range2 "$,29@(B-$,2G$,3r`(B-$,3u/(B"))
762 ;; categories. There aren't yet Unicode-based rules for Tibetan. 877 (skip-chars-forward (concat range range2))
763 (save-excursion (setq length (diacritic-post-read-conversion length))) 878 (unless (eobp)
764 (save-excursion (setq length (thai-post-read-conversion length))) 879 (utf-translate-cjk-load-tables)
765 (save-excursion (setq length (lao-post-read-conversion length))) 880 (setq range (concat range range2)))
766 (save-excursion (setq length (devanagari-post-read-conversion length))) 881 (setq hash-table (get 'utf-subst-table-for-decode
767 (save-excursion (setq length (malayalam-post-read-conversion length))) 882 'translation-hash-table)))))
768 (save-excursion (setq length (tamil-post-read-conversion length)))) 883 (while (and (skip-chars-forward range)
769 length) 884 (not (eobp)))
770 885 (setq ch (following-char))
771;; ucs-tables is preloaded 886 (if (< ch 256)
772;; (defun utf-8-pre-write-conversion (beg end) 887 (utf-8-compose hash-table)
773;; "Semi-dummy pre-write function effectively to autoload ucs-tables." 888 (if (and hash-table
774;; ;; Ensure translation-table is loaded. 889 (setq ch (gethash (encode-char ch 'ucs) hash-table)))
775;; (require 'ucs-tables) 890 (progn
776;; ;; Don't do this again. 891 (insert ch)
777;; (coding-system-put 'mule-utf-8 'pre-write-conversion nil) 892 (delete-char 1))
778;; nil) 893 (forward-char 1)))))
894
895 (when (and utf-8-compose-scripts (> length 1))
896 ;; These currently have definitions which cover the relevant
897 ;; unicodes. We could avoid loading thai-util &c by checking
898 ;; whether the region contains any characters with the appropriate
899 ;; categories. There aren't yet Unicode-based rules for Tibetan.
900 (diacritic-compose-region (point-max) (point-min))
901 (thai-compose-region (point-max) (point-min))
902 (lao-compose-region (point-max) (point-min))
903 (devanagari-compose-region (point-max) (point-min))
904 (malayalam-compose-region (point-max) (point-min))
905 (tamil-compose-region (point-max) (point-min)))
906 (- (point-max) (point-min)))))
907
908(defun utf-8-pre-write-conversion (beg end)
909 "Prepare for `utf-translate-cjk-mode' to encode text between BEG and END.
910This is used as a post-read-conversion of utf-8 coding system."
911 (if (and utf-translate-cjk-mode
912 (not utf-translate-cjk-lang-env)
913 (save-excursion
914 (goto-char beg)
915 (re-search-forward "\\cc\\|\\cj\\|\\ch" end t)))
916 (utf-translate-cjk-load-tables))
917 nil)
779 918
780(make-coding-system 919(make-coding-system
781 'mule-utf-8 4 ?u 920 'mule-utf-8 4 ?u
@@ -797,18 +936,20 @@ any of the character sets listed above are encoded into the UTF-8 byte
797sequence representing U+FFFD (REPLACEMENT CHARACTER)." 936sequence representing U+FFFD (REPLACEMENT CHARACTER)."
798 937
799 '(ccl-decode-mule-utf-8 . ccl-encode-mule-utf-8) 938 '(ccl-decode-mule-utf-8 . ccl-encode-mule-utf-8)
800 '((safe-charsets 939 `((safe-charsets
801 ascii 940 ascii
802 eight-bit-control 941 eight-bit-control
803 eight-bit-graphic 942 eight-bit-graphic
804 latin-iso8859-1 943 latin-iso8859-1
805 mule-unicode-0100-24ff 944 mule-unicode-0100-24ff
806 mule-unicode-2500-33ff 945 mule-unicode-2500-33ff
807 mule-unicode-e000-ffff) 946 mule-unicode-e000-ffff
947 ,@(if utf-translate-cjk-mode
948 utf-translate-cjk-charsets))
808 (mime-charset . utf-8) 949 (mime-charset . utf-8)
809 (coding-category . coding-category-utf-8) 950 (coding-category . coding-category-utf-8)
810 (valid-codes (0 . 255)) 951 (valid-codes (0 . 255))
811;; (pre-write-conversion . utf-8-pre-write-conversion) 952 (pre-write-conversion . utf-8-pre-write-conversion)
812 (post-read-conversion . utf-8-post-read-conversion) 953 (post-read-conversion . utf-8-post-read-conversion)
813 (translation-table-for-encode . utf-translation-table-for-encode) 954 (translation-table-for-encode . utf-translation-table-for-encode)
814 (dependency unify-8859-on-encoding-mode 955 (dependency unify-8859-on-encoding-mode
diff --git a/lisp/language/devan-util.el b/lisp/language/devan-util.el
index 20bcffdad49..24b9d40eec0 100644
--- a/lisp/language/devan-util.el
+++ b/lisp/language/devan-util.el
@@ -60,6 +60,7 @@
60 "\\)") 60 "\\)")
61 "Regexp matching a composable sequence of Devanagari characters.") 61 "Regexp matching a composable sequence of Devanagari characters.")
62 62
63;;;###autoload
63(defun devanagari-compose-region (from to) 64(defun devanagari-compose-region (from to)
64 (interactive "r") 65 (interactive "r")
65 (save-excursion 66 (save-excursion
diff --git a/lisp/progmodes/python.el b/lisp/progmodes/python.el
index cee59a6e3e1..274480a36de 100644
--- a/lisp/progmodes/python.el
+++ b/lisp/progmodes/python.el
@@ -323,7 +323,8 @@ BOS non-nil means point is known to be at beginning of statement."
323 line-end)) 323 line-end))
324 (save-excursion (python-end-of-statement)) 324 (save-excursion (python-end-of-statement))
325 t) 325 t)
326 (not (python-in-string/comment))))) 326 (not (progn (goto-char (match-beginning 0))
327 (python-in-string/comment))))))
327 328
328(defun python-close-block-statement-p (&optional bos) 329(defun python-close-block-statement-p (&optional bos)
329 "Return non-nil if current line is a statement closing a block. 330 "Return non-nil if current line is a statement closing a block.
diff --git a/lisp/simple.el b/lisp/simple.el
index 94557956de0..9cd630b94ec 100644
--- a/lisp/simple.el
+++ b/lisp/simple.el
@@ -953,7 +953,8 @@ See also `minibuffer-history-case-insensitive-variables'."
953 nil 953 nil
954 minibuffer-local-map 954 minibuffer-local-map
955 nil 955 nil
956 'minibuffer-history-search-history))) 956 'minibuffer-history-search-history
957 (car minibuffer-history-search-history))))
957 ;; Use the last regexp specified, by default, if input is empty. 958 ;; Use the last regexp specified, by default, if input is empty.
958 (list (if (string= regexp "") 959 (list (if (string= regexp "")
959 (if minibuffer-history-search-history 960 (if minibuffer-history-search-history
diff --git a/lisp/textmodes/paragraphs.el b/lisp/textmodes/paragraphs.el
index 1b2628760e1..e9cc4f397de 100644
--- a/lisp/textmodes/paragraphs.el
+++ b/lisp/textmodes/paragraphs.el
@@ -171,7 +171,7 @@ followed by two spaces, unless it's inside some sort of quotes or
171parenthesis. See Info node `Sentences'." 171parenthesis. See Info node `Sentences'."
172 (or sentence-end 172 (or sentence-end
173 (concat (if sentence-end-without-period "\\w \\|") 173 (concat (if sentence-end-without-period "\\w \\|")
174 "\\([.?!][]\"'\xd0c9)}]*" 174 "\\([.?!][]\"'\xd0c9\x5397d)}]*"
175 (if sentence-end-double-space 175 (if sentence-end-double-space
176 "\\($\\| $\\|\t\\| \\)" "\\($\\|[\t ]\\)") 176 "\\($\\| $\\|\t\\| \\)" "\\($\\|[\t ]\\)")
177 "\\|[" sentence-end-without-space "]+\\)" 177 "\\|[" sentence-end-without-space "]+\\)"
diff --git a/lisp/time-stamp.el b/lisp/time-stamp.el
index b6e76ee5394..f574144f4b0 100644
--- a/lisp/time-stamp.el
+++ b/lisp/time-stamp.el
@@ -5,7 +5,7 @@
5 5
6;; This file is part of GNU Emacs. 6;; This file is part of GNU Emacs.
7 7
8;; Maintainer's Time-stamp: <2003-02-01 09:26:25 gildea> 8;; Maintainer's Time-stamp: <2004-06-13 19:04:36 teirllm>
9;; Maintainer: Stephen Gildea <gildea@stop.mail-abuse.org> 9;; Maintainer: Stephen Gildea <gildea@stop.mail-abuse.org>
10;; Keywords: tools 10;; Keywords: tools
11 11
@@ -32,7 +32,7 @@
32;; See the top of `time-stamp.el' for another example. 32;; See the top of `time-stamp.el' for another example.
33 33
34;; To use time-stamping, add this line to your .emacs file: 34;; To use time-stamping, add this line to your .emacs file:
35;; (add-hook 'write-file-hooks 'time-stamp) 35;; (add-hook 'before-save-hook 'time-stamp)
36;; Now any time-stamp templates in your files will be updated automatically. 36;; Now any time-stamp templates in your files will be updated automatically.
37 37
38;; See the documentation for the functions `time-stamp' 38;; See the documentation for the functions `time-stamp'
@@ -242,7 +242,8 @@ of the time-stamped file itself.")
242 "Update the time stamp string(s) in the buffer. 242 "Update the time stamp string(s) in the buffer.
243A template in a file can be automatically updated with a new time stamp 243A template in a file can be automatically updated with a new time stamp
244every time you save the file. Add this line to your .emacs file: 244every time you save the file. Add this line to your .emacs file:
245 (add-hook 'write-file-hooks 'time-stamp) 245 (add-hook 'before-save-hook 'time-stamp)
246or customize `before-save-hook' through Custom.
246Normally the template must appear in the first 8 lines of a file and 247Normally the template must appear in the first 8 lines of a file and
247look like one of the following: 248look like one of the following:
248 Time-stamp: <> 249 Time-stamp: <>
@@ -318,7 +319,6 @@ template."
318 (setq start (time-stamp-once start search-limit ts-start ts-end 319 (setq start (time-stamp-once start search-limit ts-start ts-end
319 ts-format format-lines end-lines)) 320 ts-format format-lines end-lines))
320 (setq ts-count (1- ts-count)))) 321 (setq ts-count (1- ts-count))))
321 ;; be sure to return nil so can be used on write-file-hooks
322 nil) 322 nil)
323 323
324(defun time-stamp-once (start search-limit ts-start ts-end 324(defun time-stamp-once (start search-limit ts-start ts-end