aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorEli Zaretskii2012-04-06 16:10:30 +0300
committerEli Zaretskii2012-04-06 16:10:30 +0300
commit2f8e16b2a3c5782a3c8266cc76fbba80d506b93d (patch)
tree208db075f932b76c4720ffd4de7d8ef732da8ae8 /src
parentea0ff31442804544d4096f1e7eaff9ecb10e479d (diff)
downloademacs-2f8e16b2a3c5782a3c8266cc76fbba80d506b93d.tar.gz
emacs-2f8e16b2a3c5782a3c8266cc76fbba80d506b93d.zip
Warning comments about subtleties of fetching characters from buffers/strings.
src/buffer.h (FETCH_CHAR, FETCH_MULTIBYTE_CHAR): src/character.h (STRING_CHAR, STRING_CHAR_AND_LENGTH): Add comments about subtle differences between FETCH_CHAR* and STRING_CHAR* macros related to unification of CJK characters. For the details, see the discussion following the message here: http://debbugs.gnu.org/cgi/bugreport.cgi?bug=11073#14.
Diffstat (limited to 'src')
-rw-r--r--src/ChangeLog9
-rw-r--r--src/buffer.h15
-rw-r--r--src/character.h14
3 files changed, 34 insertions, 4 deletions
diff --git a/src/ChangeLog b/src/ChangeLog
index ea770969818..af65d38a33b 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,12 @@
12012-04-06 Eli Zaretskii <eliz@gnu.org>
2
3 * buffer.h (FETCH_CHAR, FETCH_MULTIBYTE_CHAR):
4 * character.h (STRING_CHAR, STRING_CHAR_AND_LENGTH): Add comments
5 about subtle differences between FETCH_CHAR* and STRING_CHAR*
6 macros related to unification of CJK characters. For the details,
7 see the discussion following the message here:
8 http://debbugs.gnu.org/cgi/bugreport.cgi?bug=11073#14.
9
12012-04-04 Chong Yidong <cyd@gnu.org> 102012-04-04 Chong Yidong <cyd@gnu.org>
2 11
3 * keyboard.c (Vdelayed_warnings_list): Doc fix. 12 * keyboard.c (Vdelayed_warnings_list): Doc fix.
diff --git a/src/buffer.h b/src/buffer.h
index 3df4a95cf93..1635a847839 100644
--- a/src/buffer.h
+++ b/src/buffer.h
@@ -343,7 +343,8 @@ while (0)
343 - (ptr - (current_buffer)->text->beg <= GPT_BYTE - BEG_BYTE ? 0 : GAP_SIZE) \ 343 - (ptr - (current_buffer)->text->beg <= GPT_BYTE - BEG_BYTE ? 0 : GAP_SIZE) \
344 + BEG_BYTE) 344 + BEG_BYTE)
345 345
346/* Return character at byte position POS. */ 346/* Return character at byte position POS. See the caveat WARNING for
347 FETCH_MULTIBYTE_CHAR below. */
347 348
348#define FETCH_CHAR(pos) \ 349#define FETCH_CHAR(pos) \
349 (!NILP (BVAR (current_buffer, enable_multibyte_characters)) \ 350 (!NILP (BVAR (current_buffer, enable_multibyte_characters)) \
@@ -359,7 +360,17 @@ extern unsigned char *_fetch_multibyte_char_p;
359 360
360/* Return character code of multi-byte form at byte position POS. If POS 361/* Return character code of multi-byte form at byte position POS. If POS
361 doesn't point the head of valid multi-byte form, only the byte at 362 doesn't point the head of valid multi-byte form, only the byte at
362 POS is returned. No range checking. */ 363 POS is returned. No range checking.
364
365 WARNING: The character returned by this macro could be "unified"
366 inside STRING_CHAR, if the original character in the buffer belongs
367 to one of the Private Use Areas (PUAs) of codepoints that Emacs
368 uses to support non-unified CJK characters. If that happens,
369 CHAR_BYTES will return a value that is different from the length of
370 the original multibyte sequence stored in the buffer. Therefore,
371 do _not_ use FETCH_MULTIBYTE_CHAR if you need to advance through
372 the buffer to the next character after fetching this one. Instead,
373 use either FETCH_CHAR_ADVANCE or STRING_CHAR_AND_LENGTH. */
363 374
364#define FETCH_MULTIBYTE_CHAR(pos) \ 375#define FETCH_MULTIBYTE_CHAR(pos) \
365 (_fetch_multibyte_char_p = (((pos) >= GPT_BYTE ? GAP_SIZE : 0) \ 376 (_fetch_multibyte_char_p = (((pos) >= GPT_BYTE ? GAP_SIZE : 0) \
diff --git a/src/character.h b/src/character.h
index 5ae6cb8c49c..a829def428d 100644
--- a/src/character.h
+++ b/src/character.h
@@ -292,7 +292,9 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
292 } while (0) 292 } while (0)
293 293
294/* Return the character code of character whose multibyte form is at 294/* Return the character code of character whose multibyte form is at
295 P. */ 295 P. Note that this macro unifies CJK characters whose codepoints
296 are in the Private Use Areas (PUAs), so it might return a different
297 codepoint from the one actually stored at P. */
296 298
297#define STRING_CHAR(p) \ 299#define STRING_CHAR(p) \
298 (!((p)[0] & 0x80) \ 300 (!((p)[0] & 0x80) \
@@ -309,7 +311,15 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
309 311
310 312
311/* Like STRING_CHAR, but set ACTUAL_LEN to the length of multibyte 313/* Like STRING_CHAR, but set ACTUAL_LEN to the length of multibyte
312 form. */ 314 form.
315
316 Note: This macro returns the actual length of the character's
317 multibyte sequence as it is stored in a buffer or string. The
318 character it returns might have a different codepoint that has a
319 different multibyte sequence of a different legth, due to possible
320 unification of CJK characters inside string_char. Therefore do NOT
321 assume that the length returned by this macro is identical to the
322 length of the multibyte sequence of the character it returns. */
313 323
314#define STRING_CHAR_AND_LENGTH(p, actual_len) \ 324#define STRING_CHAR_AND_LENGTH(p, actual_len) \
315 (!((p)[0] & 0x80) \ 325 (!((p)[0] & 0x80) \