diff options
Diffstat (limited to 'src/coding.c')
| -rw-r--r-- | src/coding.c | 132 |
1 files changed, 68 insertions, 64 deletions
diff --git a/src/coding.c b/src/coding.c index cfdaa4dd125..27f11bed6d1 100644 --- a/src/coding.c +++ b/src/coding.c | |||
| @@ -37,18 +37,18 @@ Boston, MA 02111-1307, USA. */ | |||
| 37 | /*** 0. General comments ***/ | 37 | /*** 0. General comments ***/ |
| 38 | 38 | ||
| 39 | 39 | ||
| 40 | /*** GENERAL NOTE on CODING SYSTEM *** | 40 | /*** GENERAL NOTE on CODING SYSTEMS *** |
| 41 | 41 | ||
| 42 | Coding system is an encoding mechanism of one or more character | 42 | A coding system is an encoding mechanism for one or more character |
| 43 | sets. Here's a list of coding systems which Emacs can handle. When | 43 | sets. Here's a list of coding systems which Emacs can handle. When |
| 44 | we say "decode", it means converting some other coding system to | 44 | we say "decode", it means converting some other coding system to |
| 45 | Emacs' internal format (emacs-internal), and when we say "encode", | 45 | Emacs' internal format (emacs-mule), and when we say "encode", |
| 46 | it means converting the coding system emacs-mule to some other | 46 | it means converting the coding system emacs-mule to some other |
| 47 | coding system. | 47 | coding system. |
| 48 | 48 | ||
| 49 | 0. Emacs' internal format (emacs-mule) | 49 | 0. Emacs' internal format (emacs-mule) |
| 50 | 50 | ||
| 51 | Emacs itself holds a multi-lingual character in a buffer and a string | 51 | Emacs itself holds a multi-lingual character in buffers and strings |
| 52 | in a special format. Details are described in section 2. | 52 | in a special format. Details are described in section 2. |
| 53 | 53 | ||
| 54 | 1. ISO2022 | 54 | 1. ISO2022 |
| @@ -66,21 +66,21 @@ Boston, MA 02111-1307, USA. */ | |||
| 66 | 66 | ||
| 67 | 3. BIG5 | 67 | 3. BIG5 |
| 68 | 68 | ||
| 69 | A coding system to encode character sets: ASCII and Big5. Widely | 69 | A coding system to encode the character sets ASCII and Big5. Widely |
| 70 | used by Chinese (mainly in Taiwan and Hong Kong). Details are | 70 | used for Chinese (mainly in Taiwan and Hong Kong). Details are |
| 71 | described in section 4. In this file, when we write "BIG5" | 71 | described in section 4. In this file, when we write "BIG5" |
| 72 | (all uppercase), we mean the coding system, and when we write | 72 | (all uppercase), we mean the coding system, and when we write |
| 73 | "Big5" (capitalized), we mean the character set. | 73 | "Big5" (capitalized), we mean the character set. |
| 74 | 74 | ||
| 75 | 4. Raw text | 75 | 4. Raw text |
| 76 | 76 | ||
| 77 | A coding system for a text containing random 8-bit code. Emacs does | 77 | A coding system for text containing random 8-bit code. Emacs does |
| 78 | no code conversion on such a text except for end-of-line format. | 78 | no code conversion on such text except for end-of-line format. |
| 79 | 79 | ||
| 80 | 5. Other | 80 | 5. Other |
| 81 | 81 | ||
| 82 | If a user wants to read/write a text encoded in a coding system not | 82 | If a user wants to read/write text encoded in a coding system not |
| 83 | listed above, he can supply a decoder and an encoder for it in CCL | 83 | listed above, he can supply a decoder and an encoder for it as CCL |
| 84 | (Code Conversion Language) programs. Emacs executes the CCL program | 84 | (Code Conversion Language) programs. Emacs executes the CCL program |
| 85 | while reading/writing. | 85 | while reading/writing. |
| 86 | 86 | ||
| @@ -93,16 +93,16 @@ Boston, MA 02111-1307, USA. */ | |||
| 93 | 93 | ||
| 94 | /*** GENERAL NOTES on END-OF-LINE FORMAT *** | 94 | /*** GENERAL NOTES on END-OF-LINE FORMAT *** |
| 95 | 95 | ||
| 96 | How end-of-line of a text is encoded depends on a system. For | 96 | How end-of-line of text is encoded depends on the operating system. |
| 97 | instance, Unix's format is just one byte of `line-feed' code, | 97 | For instance, Unix's format is just one byte of `line-feed' code, |
| 98 | whereas DOS's format is two-byte sequence of `carriage-return' and | 98 | whereas DOS's format is two-byte sequence of `carriage-return' and |
| 99 | `line-feed' codes. MacOS's format is usually one byte of | 99 | `line-feed' codes. MacOS's format is usually one byte of |
| 100 | `carriage-return'. | 100 | `carriage-return'. |
| 101 | 101 | ||
| 102 | Since text characters encoding and end-of-line encoding are | 102 | Since text character encoding and end-of-line encoding are |
| 103 | independent, any coding system described above can take | 103 | independent, any coding system described above can have any |
| 104 | any format of end-of-line. So, Emacs has information of format of | 104 | end-of-line format. So Emacs has information about end-of-line |
| 105 | end-of-line in each coding-system. See section 6 for more details. | 105 | format in each coding-system. See section 6 for more details. |
| 106 | 106 | ||
| 107 | */ | 107 | */ |
| 108 | 108 | ||
| @@ -110,9 +110,9 @@ Boston, MA 02111-1307, USA. */ | |||
| 110 | 110 | ||
| 111 | These functions check if a text between SRC and SRC_END is encoded | 111 | These functions check if a text between SRC and SRC_END is encoded |
| 112 | in the coding system category XXX. Each returns an integer value in | 112 | in the coding system category XXX. Each returns an integer value in |
| 113 | which appropriate flag bits for the category XXX is set. The flag | 113 | which appropriate flag bits for the category XXX are set. The flag |
| 114 | bits are defined in macros CODING_CATEGORY_MASK_XXX. Below is the | 114 | bits are defined in macros CODING_CATEGORY_MASK_XXX. Below is the |
| 115 | template of these functions. If MULTIBYTEP is nonzero, 8-bit codes | 115 | template for these functions. If MULTIBYTEP is nonzero, 8-bit codes |
| 116 | of the range 0x80..0x9F are in multibyte form. */ | 116 | of the range 0x80..0x9F are in multibyte form. */ |
| 117 | #if 0 | 117 | #if 0 |
| 118 | int | 118 | int |
| @@ -131,16 +131,17 @@ detect_coding_emacs_mule (src, src_end, multibytep) | |||
| 131 | multibyte text goes to a place pointed to by DESTINATION, the length | 131 | multibyte text goes to a place pointed to by DESTINATION, the length |
| 132 | of which should not exceed DST_BYTES. | 132 | of which should not exceed DST_BYTES. |
| 133 | 133 | ||
| 134 | These functions set the information of original and decoded texts in | 134 | These functions set the information about original and decoded texts |
| 135 | the members produced, produced_char, consumed, and consumed_char of | 135 | in the members `produced', `produced_char', `consumed', and |
| 136 | the structure *CODING. They also set the member result to one of | 136 | `consumed_char' of the structure *CODING. They also set the member |
| 137 | CODING_FINISH_XXX indicating how the decoding finished. | 137 | `result' to one of CODING_FINISH_XXX indicating how the decoding |
| 138 | finished. | ||
| 138 | 139 | ||
| 139 | DST_BYTES zero means that source area and destination area are | 140 | DST_BYTES zero means that the source area and destination area are |
| 140 | overlapped, which means that we can produce a decoded text until it | 141 | overlapped, which means that we can produce a decoded text until it |
| 141 | reaches at the head of not-yet-decoded source text. | 142 | reaches the head of the not-yet-decoded source text. |
| 142 | 143 | ||
| 143 | Below is a template of these functions. */ | 144 | Below is a template for these functions. */ |
| 144 | #if 0 | 145 | #if 0 |
| 145 | static void | 146 | static void |
| 146 | decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes) | 147 | decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes) |
| @@ -154,21 +155,22 @@ decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes) | |||
| 154 | 155 | ||
| 155 | /*** GENERAL NOTES on `encode_coding_XXX ()' functions *** | 156 | /*** GENERAL NOTES on `encode_coding_XXX ()' functions *** |
| 156 | 157 | ||
| 157 | These functions encode SRC_BYTES length text at SOURCE of Emacs' | 158 | These functions encode SRC_BYTES length text at SOURCE from Emacs' |
| 158 | internal multibyte format to CODING. The resulting unibyte text | 159 | internal multibyte format to CODING. The resulting unibyte text |
| 159 | goes to a place pointed to by DESTINATION, the length of which | 160 | goes to a place pointed to by DESTINATION, the length of which |
| 160 | should not exceed DST_BYTES. | 161 | should not exceed DST_BYTES. |
| 161 | 162 | ||
| 162 | These functions set the information of original and encoded texts in | 163 | These functions set the information about original and encoded texts |
| 163 | the members produced, produced_char, consumed, and consumed_char of | 164 | in the members `produced', `produced_char', `consumed', and |
| 164 | the structure *CODING. They also set the member result to one of | 165 | `consumed_char' of the structure *CODING. They also set the member |
| 165 | CODING_FINISH_XXX indicating how the encoding finished. | 166 | `result' to one of CODING_FINISH_XXX indicating how the encoding |
| 167 | finished. | ||
| 166 | 168 | ||
| 167 | DST_BYTES zero means that source area and destination area are | 169 | DST_BYTES zero means that the source area and destination area are |
| 168 | overlapped, which means that we can produce a encoded text until it | 170 | overlapped, which means that we can produce encoded text until it |
| 169 | reaches at the head of not-yet-encoded source text. | 171 | reaches at the head of the not-yet-encoded source text. |
| 170 | 172 | ||
| 171 | Below is a template of these functions. */ | 173 | Below is a template for these functions. */ |
| 172 | #if 0 | 174 | #if 0 |
| 173 | static void | 175 | static void |
| 174 | encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes) | 176 | encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes) |
| @@ -260,7 +262,7 @@ encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes) | |||
| 260 | /* Produce a multibyte form of characater C to `dst'. Jump to | 262 | /* Produce a multibyte form of characater C to `dst'. Jump to |
| 261 | `label_end_of_loop' if there's not enough space at `dst'. | 263 | `label_end_of_loop' if there's not enough space at `dst'. |
| 262 | 264 | ||
| 263 | If we are now in the middle of composition sequence, the decoded | 265 | If we are now in the middle of a composition sequence, the decoded |
| 264 | character may be ALTCHAR (for the current composition). In that | 266 | character may be ALTCHAR (for the current composition). In that |
| 265 | case, the character goes to coding->cmp_data->data instead of | 267 | case, the character goes to coding->cmp_data->data instead of |
| 266 | `dst'. | 268 | `dst'. |
| @@ -1125,21 +1127,23 @@ encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes) | |||
| 1125 | 1127 | ||
| 1126 | /* The following note describes the coding system ISO2022 briefly. | 1128 | /* The following note describes the coding system ISO2022 briefly. |
| 1127 | Since the intention of this note is to help understand the | 1129 | Since the intention of this note is to help understand the |
| 1128 | functions in this file, some parts are NOT ACCURATE or OVERLY | 1130 | functions in this file, some parts are NOT ACCURATE or are OVERLY |
| 1129 | SIMPLIFIED. For thorough understanding, please refer to the | 1131 | SIMPLIFIED. For thorough understanding, please refer to the |
| 1130 | original document of ISO2022. | 1132 | original document of ISO2022. This is equivalent to the standard |
| 1133 | ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*). | ||
| 1131 | 1134 | ||
| 1132 | ISO2022 provides many mechanisms to encode several character sets | 1135 | ISO2022 provides many mechanisms to encode several character sets |
| 1133 | in 7-bit and 8-bit environments. For 7-bite environments, all text | 1136 | in 7-bit and 8-bit environments. For 7-bit environments, all text |
| 1134 | is encoded using bytes less than 128. This may make the encoded | 1137 | is encoded using bytes less than 128. This may make the encoded |
| 1135 | text a little bit longer, but the text passes more easily through | 1138 | text a little bit longer, but the text passes more easily through |
| 1136 | several gateways, some of which strip off MSB (Most Signigant Bit). | 1139 | several types of gateway, some of which strip off the MSB (Most |
| 1140 | Signigant Bit). | ||
| 1137 | 1141 | ||
| 1138 | There are two kinds of character sets: control character set and | 1142 | There are two kinds of character sets: control character sets and |
| 1139 | graphic character set. The former contains control characters such | 1143 | graphic character sets. The former contain control characters such |
| 1140 | as `newline' and `escape' to provide control functions (control | 1144 | as `newline' and `escape' to provide control functions (control |
| 1141 | functions are also provided by escape sequences). The latter | 1145 | functions are also provided by escape sequences). The latter |
| 1142 | contains graphic characters such as 'A' and '-'. Emacs recognizes | 1146 | contain graphic characters such as 'A' and '-'. Emacs recognizes |
| 1143 | two control character sets and many graphic character sets. | 1147 | two control character sets and many graphic character sets. |
| 1144 | 1148 | ||
| 1145 | Graphic character sets are classified into one of the following | 1149 | Graphic character sets are classified into one of the following |
| @@ -1151,14 +1155,14 @@ encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes) | |||
| 1151 | - DIMENSION2_CHARS96 | 1155 | - DIMENSION2_CHARS96 |
| 1152 | 1156 | ||
| 1153 | In addition, each character set is assigned an identification tag, | 1157 | In addition, each character set is assigned an identification tag, |
| 1154 | unique for each set, called "final character" (denoted as <F> | 1158 | unique for each set, called the "final character" (denoted as <F> |
| 1155 | hereafter). The <F> of each character set is decided by ECMA(*) | 1159 | hereafter). The <F> of each character set is decided by ECMA(*) |
| 1156 | when it is registered in ISO. The code range of <F> is 0x30..0x7F | 1160 | when it is registered in ISO. The code range of <F> is 0x30..0x7F |
| 1157 | (0x30..0x3F are for private use only). | 1161 | (0x30..0x3F are for private use only). |
| 1158 | 1162 | ||
| 1159 | Note (*): ECMA = European Computer Manufacturers Association | 1163 | Note (*): ECMA = European Computer Manufacturers Association |
| 1160 | 1164 | ||
| 1161 | Here are examples of graphic character set [NAME(<F>)]: | 1165 | Here are examples of graphic character sets [NAME(<F>)]: |
| 1162 | o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ... | 1166 | o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ... |
| 1163 | o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ... | 1167 | o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ... |
| 1164 | o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ... | 1168 | o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ... |
| @@ -1251,7 +1255,7 @@ encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes) | |||
| 1251 | Note (**): If <F> is '@', 'A', or 'B', the intermediate character | 1255 | Note (**): If <F> is '@', 'A', or 'B', the intermediate character |
| 1252 | '(' can be omitted. We refer to this as "short-form" hereafter. | 1256 | '(' can be omitted. We refer to this as "short-form" hereafter. |
| 1253 | 1257 | ||
| 1254 | Now you may notice that there are a lot of ways for encoding the | 1258 | Now you may notice that there are a lot of ways of encoding the |
| 1255 | same multilingual text in ISO2022. Actually, there exist many | 1259 | same multilingual text in ISO2022. Actually, there exist many |
| 1256 | coding systems such as Compound Text (used in X11's inter client | 1260 | coding systems such as Compound Text (used in X11's inter client |
| 1257 | communication, ISO-2022-JP (used in Japanese internet), ISO-2022-KR | 1261 | communication, ISO-2022-JP (used in Japanese internet), ISO-2022-KR |
| @@ -1277,14 +1281,14 @@ encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes) | |||
| 1277 | o ESC '3' -- start relative composition with alternate chars (**) | 1281 | o ESC '3' -- start relative composition with alternate chars (**) |
| 1278 | o ESC '4' -- start rule-base composition with alternate chars (**) | 1282 | o ESC '4' -- start rule-base composition with alternate chars (**) |
| 1279 | Since these are not standard escape sequences of any ISO standard, | 1283 | Since these are not standard escape sequences of any ISO standard, |
| 1280 | the use of them for these meaning is restricted to Emacs only. | 1284 | the use of them with these meanings is restricted to Emacs only. |
| 1281 | 1285 | ||
| 1282 | (*) This form is used only in Emacs 20.5 and the older versions, | 1286 | (*) This form is used only in Emacs 20.5 and older versions, |
| 1283 | but the newer versions can safely decode it. | 1287 | but the newer versions can safely decode it. |
| 1284 | (**) This form is used only in Emacs 21.1 and the newer versions, | 1288 | (**) This form is used only in Emacs 21.1 and newer versions, |
| 1285 | and the older versions can't decode it. | 1289 | and the older versions can't decode it. |
| 1286 | 1290 | ||
| 1287 | Here's a list of examples usages of these composition escape | 1291 | Here's a list of example usages of these composition escape |
| 1288 | sequences (categorized by `enum composition_method'). | 1292 | sequences (categorized by `enum composition_method'). |
| 1289 | 1293 | ||
| 1290 | COMPOSITION_RELATIVE: | 1294 | COMPOSITION_RELATIVE: |
| @@ -1311,7 +1315,7 @@ enum iso_code_class_type iso_code_class[256]; | |||
| 1311 | (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0) | 1315 | (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0) |
| 1312 | 1316 | ||
| 1313 | /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". | 1317 | /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". |
| 1314 | Check if a text is encoded in ISO2022. If it is, returns an | 1318 | Check if a text is encoded in ISO2022. If it is, return an |
| 1315 | integer in which appropriate flag bits any of: | 1319 | integer in which appropriate flag bits any of: |
| 1316 | CODING_CATEGORY_MASK_ISO_7 | 1320 | CODING_CATEGORY_MASK_ISO_7 |
| 1317 | CODING_CATEGORY_MASK_ISO_7_TIGHT | 1321 | CODING_CATEGORY_MASK_ISO_7_TIGHT |
| @@ -2040,7 +2044,7 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 2040 | 2044 | ||
| 2041 | /* | 2045 | /* |
| 2042 | It is not enough to say just "ISO2022" on encoding, we have to | 2046 | It is not enough to say just "ISO2022" on encoding, we have to |
| 2043 | specify more details. In Emacs, each coding system of ISO2022 | 2047 | specify more details. In Emacs, each ISO2022 coding system |
| 2044 | variant has the following specifications: | 2048 | variant has the following specifications: |
| 2045 | 1. Initial designation to G0 thru G3. | 2049 | 1. Initial designation to G0 thru G3. |
| 2046 | 2. Allows short-form designation? | 2050 | 2. Allows short-form designation? |
| @@ -2635,7 +2639,7 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 2635 | 2639 | ||
| 2636 | /*** 4. SJIS and BIG5 handlers ***/ | 2640 | /*** 4. SJIS and BIG5 handlers ***/ |
| 2637 | 2641 | ||
| 2638 | /* Although SJIS and BIG5 are not ISO's coding system, they are used | 2642 | /* Although SJIS and BIG5 are not ISO coding systems, they are used |
| 2639 | quite widely. So, for the moment, Emacs supports them in the bare | 2643 | quite widely. So, for the moment, Emacs supports them in the bare |
| 2640 | C code. But, in the future, they may be supported only by CCL. */ | 2644 | C code. But, in the future, they may be supported only by CCL. */ |
| 2641 | 2645 | ||
| @@ -2644,7 +2648,7 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 2644 | as is. A character of charset katakana-jisx0201 is encoded by | 2648 | as is. A character of charset katakana-jisx0201 is encoded by |
| 2645 | "position-code + 0x80". A character of charset japanese-jisx0208 | 2649 | "position-code + 0x80". A character of charset japanese-jisx0208 |
| 2646 | is encoded in 2-byte but two position-codes are divided and shifted | 2650 | is encoded in 2-byte but two position-codes are divided and shifted |
| 2647 | so that it fit in the range below. | 2651 | so that it fits in the range below. |
| 2648 | 2652 | ||
| 2649 | --- CODE RANGE of SJIS --- | 2653 | --- CODE RANGE of SJIS --- |
| 2650 | (character set) (range) | 2654 | (character set) (range) |
| @@ -2658,7 +2662,7 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 2658 | 2662 | ||
| 2659 | /* BIG5 is a coding system encoding two character sets: ASCII and | 2663 | /* BIG5 is a coding system encoding two character sets: ASCII and |
| 2660 | Big5. An ASCII character is encoded as is. Big5 is a two-byte | 2664 | Big5. An ASCII character is encoded as is. Big5 is a two-byte |
| 2661 | character set and is encoded in two-byte. | 2665 | character set and is encoded in two bytes. |
| 2662 | 2666 | ||
| 2663 | --- CODE RANGE of BIG5 --- | 2667 | --- CODE RANGE of BIG5 --- |
| 2664 | (character set) (range) | 2668 | (character set) (range) |
| @@ -3310,15 +3314,15 @@ encode_eol (coding, source, destination, src_bytes, dst_bytes) | |||
| 3310 | 3314 | ||
| 3311 | /*** 7. C library functions ***/ | 3315 | /*** 7. C library functions ***/ |
| 3312 | 3316 | ||
| 3313 | /* In Emacs Lisp, coding system is represented by a Lisp symbol which | 3317 | /* In Emacs Lisp, a coding system is represented by a Lisp symbol which |
| 3314 | has a property `coding-system'. The value of this property is a | 3318 | has a property `coding-system'. The value of this property is a |
| 3315 | vector of length 5 (called as coding-vector). Among elements of | 3319 | vector of length 5 (called the coding-vector). Among elements of |
| 3316 | this vector, the first (element[0]) and the fifth (element[4]) | 3320 | this vector, the first (element[0]) and the fifth (element[4]) |
| 3317 | carry important information for decoding/encoding. Before | 3321 | carry important information for decoding/encoding. Before |
| 3318 | decoding/encoding, this information should be set in fields of a | 3322 | decoding/encoding, this information should be set in fields of a |
| 3319 | structure of type `coding_system'. | 3323 | structure of type `coding_system'. |
| 3320 | 3324 | ||
| 3321 | A value of property `coding-system' can be a symbol of another | 3325 | The value of the property `coding-system' can be a symbol of another |
| 3322 | subsidiary coding-system. In that case, Emacs gets coding-vector | 3326 | subsidiary coding-system. In that case, Emacs gets coding-vector |
| 3323 | from that symbol. | 3327 | from that symbol. |
| 3324 | 3328 | ||
| @@ -3362,12 +3366,12 @@ encode_eol (coding, source, destination, src_bytes, dst_bytes) | |||
| 3362 | 3366 | ||
| 3363 | If `coding->type' takes the other value, element[4] is ignored. | 3367 | If `coding->type' takes the other value, element[4] is ignored. |
| 3364 | 3368 | ||
| 3365 | Emacs Lisp's coding system also carries information about format of | 3369 | Emacs Lisp's coding systems also carry information about format of |
| 3366 | end-of-line in a value of property `eol-type'. If the value is | 3370 | end-of-line in a value of property `eol-type'. If the value is |
| 3367 | integer, 0 means CODING_EOL_LF, 1 means CODING_EOL_CRLF, and 2 | 3371 | integer, 0 means CODING_EOL_LF, 1 means CODING_EOL_CRLF, and 2 |
| 3368 | means CODING_EOL_CR. If it is not integer, it should be a vector | 3372 | means CODING_EOL_CR. If it is not integer, it should be a vector |
| 3369 | of subsidiary coding systems of which property `eol-type' has one | 3373 | of subsidiary coding systems of which property `eol-type' has one |
| 3370 | of above values. | 3374 | of the above values. |
| 3371 | 3375 | ||
| 3372 | */ | 3376 | */ |
| 3373 | 3377 | ||
| @@ -3895,10 +3899,10 @@ setup_raw_text_coding_system (coding) | |||
| 3895 | `no-conversion' by default. | 3899 | `no-conversion' by default. |
| 3896 | 3900 | ||
| 3897 | Each of them is a Lisp symbol and the value is an actual | 3901 | Each of them is a Lisp symbol and the value is an actual |
| 3898 | `coding-system's (this is also a Lisp symbol) assigned by a user. | 3902 | `coding-system' (this is also a Lisp symbol) assigned by a user. |
| 3899 | What Emacs does actually is to detect a category of coding system. | 3903 | What Emacs does actually is to detect a category of coding system. |
| 3900 | Then, it uses a `coding-system' assigned to it. If Emacs can't | 3904 | Then, it uses a `coding-system' assigned to it. If Emacs can't |
| 3901 | decide only one possible category, it selects a category of the | 3905 | decide a single possible category, it selects a category of the |
| 3902 | highest priority. Priorities of categories are also specified by a | 3906 | highest priority. Priorities of categories are also specified by a |
| 3903 | user in a Lisp variable `coding-category-list'. | 3907 | user in a Lisp variable `coding-category-list'. |
| 3904 | 3908 | ||
| @@ -4188,7 +4192,7 @@ detect_eol_type (source, src_bytes, skip) | |||
| 4188 | static int | 4192 | static int |
| 4189 | detect_eol_type_in_2_octet_form (source, src_bytes, skip, big_endian_p) | 4193 | detect_eol_type_in_2_octet_form (source, src_bytes, skip, big_endian_p) |
| 4190 | unsigned char *source; | 4194 | unsigned char *source; |
| 4191 | int src_bytes, *skip; | 4195 | int src_bytes, *skip, big_endian_p; |
| 4192 | { | 4196 | { |
| 4193 | unsigned char *src = source, *src_end = src + src_bytes; | 4197 | unsigned char *src = source, *src_end = src + src_bytes; |
| 4194 | unsigned int c1, c2; | 4198 | unsigned int c1, c2; |
| @@ -6406,7 +6410,7 @@ code_convert_region1 (start, end, coding_system, encodep) | |||
| 6406 | 6410 | ||
| 6407 | DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region, | 6411 | DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region, |
| 6408 | 3, 3, "r\nzCoding system: ", | 6412 | 3, 3, "r\nzCoding system: ", |
| 6409 | "Decode the current region by specified coding system.\n\ | 6413 | "Decode the current region from the specified coding system.\n\ |
| 6410 | When called from a program, takes three arguments:\n\ | 6414 | When called from a program, takes three arguments:\n\ |
| 6411 | START, END, and CODING-SYSTEM. START and END are buffer positions.\n\ | 6415 | START, END, and CODING-SYSTEM. START and END are buffer positions.\n\ |
| 6412 | This function sets `last-coding-system-used' to the precise coding system\n\ | 6416 | This function sets `last-coding-system-used' to the precise coding system\n\ |
| @@ -6421,7 +6425,7 @@ It returns the length of the decoded text.") | |||
| 6421 | 6425 | ||
| 6422 | DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region, | 6426 | DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region, |
| 6423 | 3, 3, "r\nzCoding system: ", | 6427 | 3, 3, "r\nzCoding system: ", |
| 6424 | "Encode the current region by specified coding system.\n\ | 6428 | "Encode the current region into the specified coding system.\n\ |
| 6425 | When called from a program, takes three arguments:\n\ | 6429 | When called from a program, takes three arguments:\n\ |
| 6426 | START, END, and CODING-SYSTEM. START and END are buffer positions.\n\ | 6430 | START, END, and CODING-SYSTEM. START and END are buffer positions.\n\ |
| 6427 | This function sets `last-coding-system-used' to the precise coding system\n\ | 6431 | This function sets `last-coding-system-used' to the precise coding system\n\ |