diff options
| author | Dave Love | 2001-01-22 23:20:06 +0000 |
|---|---|---|
| committer | Dave Love | 2001-01-22 23:20:06 +0000 |
| commit | d8e4f486d992ef1ca02fdbb9840b63f8ddc81697 (patch) | |
| tree | 9ec7ad62d2b88baa8e832e31f9d9e29442470eae /src | |
| parent | c99554b1f61bfe371a61d21f89fada90eaf6963b (diff) | |
| download | emacs-d8e4f486d992ef1ca02fdbb9840b63f8ddc81697.tar.gz emacs-d8e4f486d992ef1ca02fdbb9840b63f8ddc81697.zip | |
comment fixes
Diffstat (limited to 'src')
| -rw-r--r-- | src/charset.c | 10 | ||||
| -rw-r--r-- | src/charset.h | 46 |
2 files changed, 28 insertions, 28 deletions
diff --git a/src/charset.c b/src/charset.c index 54edddd47a6..a7522c4cf4e 100644 --- a/src/charset.c +++ b/src/charset.c | |||
| @@ -1461,10 +1461,10 @@ multibyte_chars_in_text (ptr, nbytes) | |||
| 1461 | return chars; | 1461 | return chars; |
| 1462 | } | 1462 | } |
| 1463 | 1463 | ||
| 1464 | /* Parse unibyte text at STR of LEN bytes as a multibyte text, and | 1464 | /* Parse unibyte text at STR of LEN bytes as multibyte text, and |
| 1465 | count the numbers of characters and bytes in it. On counting | 1465 | count the numbers of characters and bytes in it. On counting |
| 1466 | bytes, pay attention to that 8-bit characters in the range | 1466 | bytes, pay attention to the fact that 8-bit characters in the range |
| 1467 | 0x80..0x9F are represented by 2-byte in a multibyte text. */ | 1467 | 0x80..0x9F are represented by 2 bytes in multibyte text. */ |
| 1468 | void | 1468 | void |
| 1469 | parse_str_as_multibyte (str, len, nchars, nbytes) | 1469 | parse_str_as_multibyte (str, len, nchars, nbytes) |
| 1470 | unsigned char *str; | 1470 | unsigned char *str; |
| @@ -1486,7 +1486,7 @@ parse_str_as_multibyte (str, len, nchars, nbytes) | |||
| 1486 | return; | 1486 | return; |
| 1487 | } | 1487 | } |
| 1488 | 1488 | ||
| 1489 | /* Arrange unibyte text at STR of NBYTES bytes as a multibyte text. | 1489 | /* Arrange unibyte text at STR of NBYTES bytes as multibyte text. |
| 1490 | It actually converts only 8-bit characters in the range 0x80..0x9F | 1490 | It actually converts only 8-bit characters in the range 0x80..0x9F |
| 1491 | that don't contruct multibyte characters to multibyte forms. If | 1491 | that don't contruct multibyte characters to multibyte forms. If |
| 1492 | NCHARS is nonzero, set *NCHARS to the number of characters in the | 1492 | NCHARS is nonzero, set *NCHARS to the number of characters in the |
| @@ -1535,7 +1535,7 @@ str_as_multibyte (str, len, nbytes, nchars) | |||
| 1535 | return (to - str); | 1535 | return (to - str); |
| 1536 | } | 1536 | } |
| 1537 | 1537 | ||
| 1538 | /* Convert unibyte text at STR of NBYTES bytes to a multibyte text | 1538 | /* Convert unibyte text at STR of NBYTES bytes to multibyte text |
| 1539 | that contains the same single-byte characters. It actually | 1539 | that contains the same single-byte characters. It actually |
| 1540 | converts all 8-bit characters to multibyte forms. It is assured | 1540 | converts all 8-bit characters to multibyte forms. It is assured |
| 1541 | that we can use LEN bytes at STR as a work area and that is | 1541 | that we can use LEN bytes at STR as a work area and that is |
diff --git a/src/charset.h b/src/charset.h index 7cc0931f162..27146bca928 100644 --- a/src/charset.h +++ b/src/charset.h | |||
| @@ -29,19 +29,19 @@ Boston, MA 02111-1307, USA. */ | |||
| 29 | A character set ("charset" hereafter) is a meaningful collection | 29 | A character set ("charset" hereafter) is a meaningful collection |
| 30 | (i.e. language, culture, functionality, etc) of characters. Emacs | 30 | (i.e. language, culture, functionality, etc) of characters. Emacs |
| 31 | handles multiple charsets at once. Each charset corresponds to one | 31 | handles multiple charsets at once. Each charset corresponds to one |
| 32 | of ISO charsets. Emacs identifies a charset by a unique | 32 | of the ISO charsets. Emacs identifies a charset by a unique |
| 33 | identification number, whereas ISO identifies a charset by a triplet | 33 | identification number, whereas ISO identifies a charset by a triplet |
| 34 | of DIMENSION, CHARS and FINAL-CHAR. So, hereafter, just saying | 34 | of DIMENSION, CHARS and FINAL-CHAR. So, hereafter, just saying |
| 35 | "charset" means an identification number (integer value). | 35 | "charset" means an identification number (integer value). |
| 36 | 36 | ||
| 37 | The value range of charset is 0x00, 0x81..0xFE. There are four | 37 | The value range of charsets is 0x00, 0x81..0xFE. There are four |
| 38 | kinds of charset depending on DIMENSION (1 or 2) and CHARS (94 or | 38 | kinds of charset depending on DIMENSION (1 or 2) and CHARS (94 or |
| 39 | 96). For instance, a charset of DIMENSION2_CHARS94 contains 94x94 | 39 | 96). For instance, a charset of DIMENSION2_CHARS94 contains 94x94 |
| 40 | characters. | 40 | characters. |
| 41 | 41 | ||
| 42 | Within Emacs Lisp, a charset is treated as a symbol which has a | 42 | Within Emacs Lisp, a charset is treated as a symbol which has a |
| 43 | property `charset'. The property value is a vector containing | 43 | property `charset'. The property value is a vector containing |
| 44 | various information about the charset. For readability of C codes, | 44 | various information about the charset. For readability of C code, |
| 45 | we use the following convention for C variable names: | 45 | we use the following convention for C variable names: |
| 46 | charset_symbol: Emacs Lisp symbol of a charset | 46 | charset_symbol: Emacs Lisp symbol of a charset |
| 47 | charset_id: Emacs Lisp integer of an identification number of a charset | 47 | charset_id: Emacs Lisp integer of an identification number of a charset |
| @@ -51,14 +51,14 @@ Boston, MA 02111-1307, USA. */ | |||
| 51 | (range 0x80..0x9E). In addition, a charset of greater than 0xA0 | 51 | (range 0x80..0x9E). In addition, a charset of greater than 0xA0 |
| 52 | (whose base leading-code is 0x9A..0x9D) is assigned an extended | 52 | (whose base leading-code is 0x9A..0x9D) is assigned an extended |
| 53 | leading-code (range 0xA0..0xFE). In this case, each base | 53 | leading-code (range 0xA0..0xFE). In this case, each base |
| 54 | leading-code specify the allowable range of extended leading-code as | 54 | leading-code specifies the allowable range of extended leading-code |
| 55 | shown in the table below. A leading-code is used to represent a | 55 | as shown in the table below. A leading-code is used to represent a |
| 56 | character in Emacs' buffer and string. | 56 | character in Emacs' buffer and string. |
| 57 | 57 | ||
| 58 | We call a charset which has extended leading-code as "private | 58 | We call a charset which has extended leading-code a "private |
| 59 | charset" because those are mainly for a charset which is not yet | 59 | charset" because those are mainly for a charset which is not yet |
| 60 | registered by ISO. On the contrary, we call a charset which does | 60 | registered by ISO. On the contrary, we call a charset which does |
| 61 | not have extended leading-code as "official charset". | 61 | not have extended leading-code an "official charset". |
| 62 | 62 | ||
| 63 | --------------------------------------------------------------------------- | 63 | --------------------------------------------------------------------------- |
| 64 | charset dimension base leading-code extended leading-code | 64 | charset dimension base leading-code extended leading-code |
| @@ -136,8 +136,8 @@ extern int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */ | |||
| 136 | 136 | ||
| 137 | /*** GENERAL NOTE on CHARACTER REPRESENTATION *** | 137 | /*** GENERAL NOTE on CHARACTER REPRESENTATION *** |
| 138 | 138 | ||
| 139 | At first, the term "character" or "char" is used for a multilingual | 139 | Firstly, the term "character" or "char" is used for a multilingual |
| 140 | character (of course, including ASCII character), not for a byte in | 140 | character (of course, including ASCII characters), not for a byte in |
| 141 | computer memory. We use the term "code" or "byte" for the latter | 141 | computer memory. We use the term "code" or "byte" for the latter |
| 142 | case. | 142 | case. |
| 143 | 143 | ||
| @@ -149,14 +149,14 @@ extern int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */ | |||
| 149 | POSITION-CODE is 0x20..0x7F. | 149 | POSITION-CODE is 0x20..0x7F. |
| 150 | 150 | ||
| 151 | Emacs has two kinds of representation of a character: multi-byte | 151 | Emacs has two kinds of representation of a character: multi-byte |
| 152 | form (for buffer and string) and single-word form (for character | 152 | form (for buffers and strings) and single-word form (for character |
| 153 | object in Emacs Lisp). The latter is called "character code" here | 153 | objects in Emacs Lisp). The latter is called "character code" |
| 154 | after. Both representations encode the information of charset and | 154 | hereafter. Both representations encode the information of charset |
| 155 | POSITION-CODE but in a different way (for instance, MSB of | 155 | and POSITION-CODE but in a different way (for instance, the MSB of |
| 156 | POSITION-CODE is set in multi-byte form). | 156 | POSITION-CODE is set in multi-byte form). |
| 157 | 157 | ||
| 158 | For details of multi-byte form, see the section "2. Emacs internal | 158 | For details of the multi-byte form, see the section "2. Emacs |
| 159 | format handlers" of `coding.c'. | 159 | internal format handlers" of `coding.c'. |
| 160 | 160 | ||
| 161 | Emacs uses 19 bits for a character code. The bits are divided into | 161 | Emacs uses 19 bits for a character code. The bits are divided into |
| 162 | 3 fields: FIELD1(5bits):FIELD2(7bits):FIELD3(7bits). | 162 | 3 fields: FIELD1(5bits):FIELD2(7bits):FIELD3(7bits). |
| @@ -220,9 +220,9 @@ extern int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */ | |||
| 220 | /* 1 if BYTE is an ASCII character in itself, in multibyte mode. */ | 220 | /* 1 if BYTE is an ASCII character in itself, in multibyte mode. */ |
| 221 | #define ASCII_BYTE_P(byte) ((byte) < 0x80) | 221 | #define ASCII_BYTE_P(byte) ((byte) < 0x80) |
| 222 | 222 | ||
| 223 | /* A char-table containing information of each character set. | 223 | /* A char-table containing information on each character set. |
| 224 | 224 | ||
| 225 | Unlike ordinary char-tables, this doesn't contain any nested table. | 225 | Unlike ordinary char-tables, this doesn't contain any nested tables. |
| 226 | Only the top level elements are used. Each element is a vector of | 226 | Only the top level elements are used. Each element is a vector of |
| 227 | the following information: | 227 | the following information: |
| 228 | CHARSET-ID, BYTES, DIMENSION, CHARS, WIDTH, DIRECTION, | 228 | CHARSET-ID, BYTES, DIMENSION, CHARS, WIDTH, DIRECTION, |
| @@ -233,8 +233,8 @@ extern int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */ | |||
| 233 | 233 | ||
| 234 | CHARSET-ID (integer) is the identification number of the charset. | 234 | CHARSET-ID (integer) is the identification number of the charset. |
| 235 | 235 | ||
| 236 | BYTES (integer) is the length of multi-byte form of a character in | 236 | BYTES (integer) is the length of the multi-byte form of a character |
| 237 | the charset: one of 1, 2, 3, and 4. | 237 | in the charset: one of 1, 2, 3, and 4. |
| 238 | 238 | ||
| 239 | DIMENSION (integer) is the number of bytes to represent a character: 1 or 2. | 239 | DIMENSION (integer) is the number of bytes to represent a character: 1 or 2. |
| 240 | 240 | ||
| @@ -251,7 +251,7 @@ extern int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */ | |||
| 251 | charset. | 251 | charset. |
| 252 | 252 | ||
| 253 | LEADING-CODE-EXT (integer) is the extended leading-code for the | 253 | LEADING-CODE-EXT (integer) is the extended leading-code for the |
| 254 | charset. All charsets of less than 0xA0 has the value 0. | 254 | charset. All charsets of less than 0xA0 have the value 0. |
| 255 | 255 | ||
| 256 | ISO-FINAL-CHAR (character) is the final character of the | 256 | ISO-FINAL-CHAR (character) is the final character of the |
| 257 | corresponding ISO 2022 charset. It is -1 for such a character | 257 | corresponding ISO 2022 charset. It is -1 for such a character |
| @@ -266,7 +266,7 @@ extern int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */ | |||
| 266 | REVERSE-CHARSET (integer) is the charset which differs only in | 266 | REVERSE-CHARSET (integer) is the charset which differs only in |
| 267 | LEFT-TO-RIGHT value from the charset. If there's no such a | 267 | LEFT-TO-RIGHT value from the charset. If there's no such a |
| 268 | charset, the value is -1. | 268 | charset, the value is -1. |
| 269 | 269 | ||
| 270 | SHORT-NAME (string) is the short name to refer to the charset. | 270 | SHORT-NAME (string) is the short name to refer to the charset. |
| 271 | 271 | ||
| 272 | LONG-NAME (string) is the long name to refer to the charset. | 272 | LONG-NAME (string) is the long name to refer to the charset. |
| @@ -274,7 +274,7 @@ extern int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */ | |||
| 274 | DESCRIPTION (string) is the description string of the charset. | 274 | DESCRIPTION (string) is the description string of the charset. |
| 275 | 275 | ||
| 276 | PLIST (property list) may contain any type of information a user | 276 | PLIST (property list) may contain any type of information a user |
| 277 | want to put and get by functions `put-charset-property' and | 277 | wants to put and get by functions `put-charset-property' and |
| 278 | `get-charset-property' respectively. */ | 278 | `get-charset-property' respectively. */ |
| 279 | extern Lisp_Object Vcharset_table; | 279 | extern Lisp_Object Vcharset_table; |
| 280 | 280 | ||
| @@ -515,7 +515,7 @@ extern int iso_charset_table[2][2][128]; | |||
| 515 | : char_bytes (c)) | 515 | : char_bytes (c)) |
| 516 | 516 | ||
| 517 | /* The following two macros CHAR_STRING and STRING_CHAR are the main | 517 | /* The following two macros CHAR_STRING and STRING_CHAR are the main |
| 518 | entry points to convert between Emacs two types of character | 518 | entry points to convert between Emacs's two types of character |
| 519 | representations: multi-byte form and single-word form (character | 519 | representations: multi-byte form and single-word form (character |
| 520 | code). */ | 520 | code). */ |
| 521 | 521 | ||