diff options
| author | Eli Zaretskii | 2012-04-06 16:10:30 +0300 |
|---|---|---|
| committer | Eli Zaretskii | 2012-04-06 16:10:30 +0300 |
| commit | 2f8e16b2a3c5782a3c8266cc76fbba80d506b93d (patch) | |
| tree | 208db075f932b76c4720ffd4de7d8ef732da8ae8 /src | |
| parent | ea0ff31442804544d4096f1e7eaff9ecb10e479d (diff) | |
| download | emacs-2f8e16b2a3c5782a3c8266cc76fbba80d506b93d.tar.gz emacs-2f8e16b2a3c5782a3c8266cc76fbba80d506b93d.zip | |
Warning comments about subtleties of fetching characters from buffers/strings.
src/buffer.h (FETCH_CHAR, FETCH_MULTIBYTE_CHAR):
src/character.h (STRING_CHAR, STRING_CHAR_AND_LENGTH): Add comments
about subtle differences between FETCH_CHAR* and STRING_CHAR*
macros related to unification of CJK characters. For the details,
see the discussion following the message here:
http://debbugs.gnu.org/cgi/bugreport.cgi?bug=11073#14.
Diffstat (limited to 'src')
| -rw-r--r-- | src/ChangeLog | 9 | ||||
| -rw-r--r-- | src/buffer.h | 15 | ||||
| -rw-r--r-- | src/character.h | 14 |
3 files changed, 34 insertions, 4 deletions
diff --git a/src/ChangeLog b/src/ChangeLog index ea770969818..af65d38a33b 100644 --- a/src/ChangeLog +++ b/src/ChangeLog | |||
| @@ -1,3 +1,12 @@ | |||
| 1 | 2012-04-06 Eli Zaretskii <eliz@gnu.org> | ||
| 2 | |||
| 3 | * buffer.h (FETCH_CHAR, FETCH_MULTIBYTE_CHAR): | ||
| 4 | * character.h (STRING_CHAR, STRING_CHAR_AND_LENGTH): Add comments | ||
| 5 | about subtle differences between FETCH_CHAR* and STRING_CHAR* | ||
| 6 | macros related to unification of CJK characters. For the details, | ||
| 7 | see the discussion following the message here: | ||
| 8 | http://debbugs.gnu.org/cgi/bugreport.cgi?bug=11073#14. | ||
| 9 | |||
| 1 | 2012-04-04 Chong Yidong <cyd@gnu.org> | 10 | 2012-04-04 Chong Yidong <cyd@gnu.org> |
| 2 | 11 | ||
| 3 | * keyboard.c (Vdelayed_warnings_list): Doc fix. | 12 | * keyboard.c (Vdelayed_warnings_list): Doc fix. |
diff --git a/src/buffer.h b/src/buffer.h index 3df4a95cf93..1635a847839 100644 --- a/src/buffer.h +++ b/src/buffer.h | |||
| @@ -343,7 +343,8 @@ while (0) | |||
| 343 | - (ptr - (current_buffer)->text->beg <= GPT_BYTE - BEG_BYTE ? 0 : GAP_SIZE) \ | 343 | - (ptr - (current_buffer)->text->beg <= GPT_BYTE - BEG_BYTE ? 0 : GAP_SIZE) \ |
| 344 | + BEG_BYTE) | 344 | + BEG_BYTE) |
| 345 | 345 | ||
| 346 | /* Return character at byte position POS. */ | 346 | /* Return character at byte position POS. See the caveat WARNING for |
| 347 | FETCH_MULTIBYTE_CHAR below. */ | ||
| 347 | 348 | ||
| 348 | #define FETCH_CHAR(pos) \ | 349 | #define FETCH_CHAR(pos) \ |
| 349 | (!NILP (BVAR (current_buffer, enable_multibyte_characters)) \ | 350 | (!NILP (BVAR (current_buffer, enable_multibyte_characters)) \ |
| @@ -359,7 +360,17 @@ extern unsigned char *_fetch_multibyte_char_p; | |||
| 359 | 360 | ||
| 360 | /* Return character code of multi-byte form at byte position POS. If POS | 361 | /* Return character code of multi-byte form at byte position POS. If POS |
| 361 | doesn't point the head of valid multi-byte form, only the byte at | 362 | doesn't point the head of valid multi-byte form, only the byte at |
| 362 | POS is returned. No range checking. */ | 363 | POS is returned. No range checking. |
| 364 | |||
| 365 | WARNING: The character returned by this macro could be "unified" | ||
| 366 | inside STRING_CHAR, if the original character in the buffer belongs | ||
| 367 | to one of the Private Use Areas (PUAs) of codepoints that Emacs | ||
| 368 | uses to support non-unified CJK characters. If that happens, | ||
| 369 | CHAR_BYTES will return a value that is different from the length of | ||
| 370 | the original multibyte sequence stored in the buffer. Therefore, | ||
| 371 | do _not_ use FETCH_MULTIBYTE_CHAR if you need to advance through | ||
| 372 | the buffer to the next character after fetching this one. Instead, | ||
| 373 | use either FETCH_CHAR_ADVANCE or STRING_CHAR_AND_LENGTH. */ | ||
| 363 | 374 | ||
| 364 | #define FETCH_MULTIBYTE_CHAR(pos) \ | 375 | #define FETCH_MULTIBYTE_CHAR(pos) \ |
| 365 | (_fetch_multibyte_char_p = (((pos) >= GPT_BYTE ? GAP_SIZE : 0) \ | 376 | (_fetch_multibyte_char_p = (((pos) >= GPT_BYTE ? GAP_SIZE : 0) \ |
diff --git a/src/character.h b/src/character.h index 5ae6cb8c49c..a829def428d 100644 --- a/src/character.h +++ b/src/character.h | |||
| @@ -292,7 +292,9 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */ | |||
| 292 | } while (0) | 292 | } while (0) |
| 293 | 293 | ||
| 294 | /* Return the character code of character whose multibyte form is at | 294 | /* Return the character code of character whose multibyte form is at |
| 295 | P. */ | 295 | P. Note that this macro unifies CJK characters whose codepoints |
| 296 | are in the Private Use Areas (PUAs), so it might return a different | ||
| 297 | codepoint from the one actually stored at P. */ | ||
| 296 | 298 | ||
| 297 | #define STRING_CHAR(p) \ | 299 | #define STRING_CHAR(p) \ |
| 298 | (!((p)[0] & 0x80) \ | 300 | (!((p)[0] & 0x80) \ |
| @@ -309,7 +311,15 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */ | |||
| 309 | 311 | ||
| 310 | 312 | ||
| 311 | /* Like STRING_CHAR, but set ACTUAL_LEN to the length of multibyte | 313 | /* Like STRING_CHAR, but set ACTUAL_LEN to the length of multibyte |
| 312 | form. */ | 314 | form. |
| 315 | |||
| 316 | Note: This macro returns the actual length of the character's | ||
| 317 | multibyte sequence as it is stored in a buffer or string. The | ||
| 318 | character it returns might have a different codepoint that has a | ||
| 319 | different multibyte sequence of a different legth, due to possible | ||
| 320 | unification of CJK characters inside string_char. Therefore do NOT | ||
| 321 | assume that the length returned by this macro is identical to the | ||
| 322 | length of the multibyte sequence of the character it returns. */ | ||
| 313 | 323 | ||
| 314 | #define STRING_CHAR_AND_LENGTH(p, actual_len) \ | 324 | #define STRING_CHAR_AND_LENGTH(p, actual_len) \ |
| 315 | (!((p)[0] & 0x80) \ | 325 | (!((p)[0] & 0x80) \ |