aboutsummaryrefslogtreecommitdiffstats
path: root/src/character.h
diff options
context:
space:
mode:
authorEli Zaretskii2012-04-06 16:10:30 +0300
committerEli Zaretskii2012-04-06 16:10:30 +0300
commit2f8e16b2a3c5782a3c8266cc76fbba80d506b93d (patch)
tree208db075f932b76c4720ffd4de7d8ef732da8ae8 /src/character.h
parentea0ff31442804544d4096f1e7eaff9ecb10e479d (diff)
downloademacs-2f8e16b2a3c5782a3c8266cc76fbba80d506b93d.tar.gz
emacs-2f8e16b2a3c5782a3c8266cc76fbba80d506b93d.zip
Warning comments about subtleties of fetching characters from buffers/strings.
src/buffer.h (FETCH_CHAR, FETCH_MULTIBYTE_CHAR): src/character.h (STRING_CHAR, STRING_CHAR_AND_LENGTH): Add comments about subtle differences between FETCH_CHAR* and STRING_CHAR* macros related to unification of CJK characters. For the details, see the discussion following the message here: http://debbugs.gnu.org/cgi/bugreport.cgi?bug=11073#14.
Diffstat (limited to 'src/character.h')
-rw-r--r--src/character.h14
1 files changed, 12 insertions, 2 deletions
diff --git a/src/character.h b/src/character.h
index 5ae6cb8c49c..a829def428d 100644
--- a/src/character.h
+++ b/src/character.h
@@ -292,7 +292,9 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
292 } while (0) 292 } while (0)
293 293
294/* Return the character code of character whose multibyte form is at 294/* Return the character code of character whose multibyte form is at
295 P. */ 295 P. Note that this macro unifies CJK characters whose codepoints
296 are in the Private Use Areas (PUAs), so it might return a different
297 codepoint from the one actually stored at P. */
296 298
297#define STRING_CHAR(p) \ 299#define STRING_CHAR(p) \
298 (!((p)[0] & 0x80) \ 300 (!((p)[0] & 0x80) \
@@ -309,7 +311,15 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
309 311
310 312
311/* Like STRING_CHAR, but set ACTUAL_LEN to the length of multibyte 313/* Like STRING_CHAR, but set ACTUAL_LEN to the length of multibyte
312 form. */ 314 form.
315
316 Note: This macro returns the actual length of the character's
317 multibyte sequence as it is stored in a buffer or string. The
318 character it returns might have a different codepoint that has a
319 different multibyte sequence of a different legth, due to possible
320 unification of CJK characters inside string_char. Therefore do NOT
321 assume that the length returned by this macro is identical to the
322 length of the multibyte sequence of the character it returns. */
313 323
314#define STRING_CHAR_AND_LENGTH(p, actual_len) \ 324#define STRING_CHAR_AND_LENGTH(p, actual_len) \
315 (!((p)[0] & 0x80) \ 325 (!((p)[0] & 0x80) \