diff options
| author | Eli Zaretskii | 2012-04-06 16:10:30 +0300 |
|---|---|---|
| committer | Eli Zaretskii | 2012-04-06 16:10:30 +0300 |
| commit | 2f8e16b2a3c5782a3c8266cc76fbba80d506b93d (patch) | |
| tree | 208db075f932b76c4720ffd4de7d8ef732da8ae8 /src/buffer.h | |
| parent | ea0ff31442804544d4096f1e7eaff9ecb10e479d (diff) | |
| download | emacs-2f8e16b2a3c5782a3c8266cc76fbba80d506b93d.tar.gz emacs-2f8e16b2a3c5782a3c8266cc76fbba80d506b93d.zip | |
Warning comments about subtleties of fetching characters from buffers/strings.
src/buffer.h (FETCH_CHAR, FETCH_MULTIBYTE_CHAR):
src/character.h (STRING_CHAR, STRING_CHAR_AND_LENGTH): Add comments
about subtle differences between FETCH_CHAR* and STRING_CHAR*
macros related to unification of CJK characters. For the details,
see the discussion following the message here:
http://debbugs.gnu.org/cgi/bugreport.cgi?bug=11073#14.
Diffstat (limited to 'src/buffer.h')
| -rw-r--r-- | src/buffer.h | 15 |
1 files changed, 13 insertions, 2 deletions
diff --git a/src/buffer.h b/src/buffer.h index 3df4a95cf93..1635a847839 100644 --- a/src/buffer.h +++ b/src/buffer.h | |||
| @@ -343,7 +343,8 @@ while (0) | |||
| 343 | - (ptr - (current_buffer)->text->beg <= GPT_BYTE - BEG_BYTE ? 0 : GAP_SIZE) \ | 343 | - (ptr - (current_buffer)->text->beg <= GPT_BYTE - BEG_BYTE ? 0 : GAP_SIZE) \ |
| 344 | + BEG_BYTE) | 344 | + BEG_BYTE) |
| 345 | 345 | ||
| 346 | /* Return character at byte position POS. */ | 346 | /* Return character at byte position POS. See the caveat WARNING for |
| 347 | FETCH_MULTIBYTE_CHAR below. */ | ||
| 347 | 348 | ||
| 348 | #define FETCH_CHAR(pos) \ | 349 | #define FETCH_CHAR(pos) \ |
| 349 | (!NILP (BVAR (current_buffer, enable_multibyte_characters)) \ | 350 | (!NILP (BVAR (current_buffer, enable_multibyte_characters)) \ |
| @@ -359,7 +360,17 @@ extern unsigned char *_fetch_multibyte_char_p; | |||
| 359 | 360 | ||
| 360 | /* Return character code of multi-byte form at byte position POS. If POS | 361 | /* Return character code of multi-byte form at byte position POS. If POS |
| 361 | doesn't point the head of valid multi-byte form, only the byte at | 362 | doesn't point the head of valid multi-byte form, only the byte at |
| 362 | POS is returned. No range checking. */ | 363 | POS is returned. No range checking. |
| 364 | |||
| 365 | WARNING: The character returned by this macro could be "unified" | ||
| 366 | inside STRING_CHAR, if the original character in the buffer belongs | ||
| 367 | to one of the Private Use Areas (PUAs) of codepoints that Emacs | ||
| 368 | uses to support non-unified CJK characters. If that happens, | ||
| 369 | CHAR_BYTES will return a value that is different from the length of | ||
| 370 | the original multibyte sequence stored in the buffer. Therefore, | ||
| 371 | do _not_ use FETCH_MULTIBYTE_CHAR if you need to advance through | ||
| 372 | the buffer to the next character after fetching this one. Instead, | ||
| 373 | use either FETCH_CHAR_ADVANCE or STRING_CHAR_AND_LENGTH. */ | ||
| 363 | 374 | ||
| 364 | #define FETCH_MULTIBYTE_CHAR(pos) \ | 375 | #define FETCH_MULTIBYTE_CHAR(pos) \ |
| 365 | (_fetch_multibyte_char_p = (((pos) >= GPT_BYTE ? GAP_SIZE : 0) \ | 376 | (_fetch_multibyte_char_p = (((pos) >= GPT_BYTE ? GAP_SIZE : 0) \ |