aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorEli Zaretskii2006-06-09 18:22:30 +0000
committerEli Zaretskii2006-06-09 18:22:30 +0000
commit71b169b8c49d4c2f593b7074e8555f6e479b10f3 (patch)
tree351837ea1b19a7f1b4a68cff90c9596cf983da0d /src
parenta9ab79a844b232ce7971c6234c86be3cc634a78e (diff)
downloademacs-71b169b8c49d4c2f593b7074e8555f6e479b10f3.tar.gz
emacs-71b169b8c49d4c2f593b7074e8555f6e479b10f3.zip
(read_escape): Provide a Unicode character escape syntax; \u followed by
exactly four or \U followed by exactly eight hex digits in a comment or string is read as a Unicode character with that code point.
Diffstat (limited to 'src')
-rw-r--r--src/lread.c49
1 files changed, 49 insertions, 0 deletions
diff --git a/src/lread.c b/src/lread.c
index 31f974d9bc0..a0d4ad825dd 100644
--- a/src/lread.c
+++ b/src/lread.c
@@ -1764,6 +1764,9 @@ read_escape (readcharfun, stringp, byterep)
1764 int *byterep; 1764 int *byterep;
1765{ 1765{
1766 register int c = READCHAR; 1766 register int c = READCHAR;
1767 /* \u allows up to four hex digits, \U up to eight. Default to the
1768 behaviour for \u, and change this value in the case that \U is seen. */
1769 int unicode_hex_count = 4;
1767 1770
1768 *byterep = 0; 1771 *byterep = 0;
1769 1772
@@ -1928,6 +1931,52 @@ read_escape (readcharfun, stringp, byterep)
1928 return i; 1931 return i;
1929 } 1932 }
1930 1933
1934 case 'U':
1935 /* Post-Unicode-2.0: Up to eight hex chars. */
1936 unicode_hex_count = 8;
1937 case 'u':
1938
1939 /* A Unicode escape. We only permit them in strings and characters,
1940 not arbitrarily in the source code, as in some other languages. */
1941 {
1942 int i = 0;
1943 int count = 0;
1944 Lisp_Object lisp_char;
1945 struct gcpro gcpro1;
1946
1947 while (++count <= unicode_hex_count)
1948 {
1949 c = READCHAR;
1950 /* isdigit(), isalpha() may be locale-specific, which we don't
1951 want. */
1952 if (c >= '0' && c <= '9') i = (i << 4) + (c - '0');
1953 else if (c >= 'a' && c <= 'f') i = (i << 4) + (c - 'a') + 10;
1954 else if (c >= 'A' && c <= 'F') i = (i << 4) + (c - 'A') + 10;
1955 else
1956 {
1957 error ("Non-hex digit used for Unicode escape");
1958 break;
1959 }
1960 }
1961
1962 GCPRO1 (readcharfun);
1963 lisp_char = call2(intern("decode-char"), intern("ucs"),
1964 make_number(i));
1965 UNGCPRO;
1966
1967 if (EQ(Qnil, lisp_char))
1968 {
1969 /* This is ugly and horrible and trashes the user's data. */
1970 XSETFASTINT (i, MAKE_CHAR (charset_katakana_jisx0201,
1971 34 + 128, 46 + 128));
1972 return i;
1973 }
1974 else
1975 {
1976 return XFASTINT (lisp_char);
1977 }
1978 }
1979
1931 default: 1980 default:
1932 if (BASE_LEADING_CODE_P (c)) 1981 if (BASE_LEADING_CODE_P (c))
1933 c = read_multibyte (c, readcharfun); 1982 c = read_multibyte (c, readcharfun);