diff options
| author | Eli Zaretskii | 2006-06-09 18:22:30 +0000 |
|---|---|---|
| committer | Eli Zaretskii | 2006-06-09 18:22:30 +0000 |
| commit | 71b169b8c49d4c2f593b7074e8555f6e479b10f3 (patch) | |
| tree | 351837ea1b19a7f1b4a68cff90c9596cf983da0d /src | |
| parent | a9ab79a844b232ce7971c6234c86be3cc634a78e (diff) | |
| download | emacs-71b169b8c49d4c2f593b7074e8555f6e479b10f3.tar.gz emacs-71b169b8c49d4c2f593b7074e8555f6e479b10f3.zip | |
(read_escape): Provide a Unicode character escape syntax; \u followed by
exactly four or \U followed by exactly eight hex digits in a comment or
string is read as a Unicode character with that code point.
Diffstat (limited to 'src')
| -rw-r--r-- | src/lread.c | 49 |
1 files changed, 49 insertions, 0 deletions
diff --git a/src/lread.c b/src/lread.c index 31f974d9bc0..a0d4ad825dd 100644 --- a/src/lread.c +++ b/src/lread.c | |||
| @@ -1764,6 +1764,9 @@ read_escape (readcharfun, stringp, byterep) | |||
| 1764 | int *byterep; | 1764 | int *byterep; |
| 1765 | { | 1765 | { |
| 1766 | register int c = READCHAR; | 1766 | register int c = READCHAR; |
| 1767 | /* \u allows up to four hex digits, \U up to eight. Default to the | ||
| 1768 | behaviour for \u, and change this value in the case that \U is seen. */ | ||
| 1769 | int unicode_hex_count = 4; | ||
| 1767 | 1770 | ||
| 1768 | *byterep = 0; | 1771 | *byterep = 0; |
| 1769 | 1772 | ||
| @@ -1928,6 +1931,52 @@ read_escape (readcharfun, stringp, byterep) | |||
| 1928 | return i; | 1931 | return i; |
| 1929 | } | 1932 | } |
| 1930 | 1933 | ||
| 1934 | case 'U': | ||
| 1935 | /* Post-Unicode-2.0: Up to eight hex chars. */ | ||
| 1936 | unicode_hex_count = 8; | ||
| 1937 | case 'u': | ||
| 1938 | |||
| 1939 | /* A Unicode escape. We only permit them in strings and characters, | ||
| 1940 | not arbitrarily in the source code, as in some other languages. */ | ||
| 1941 | { | ||
| 1942 | int i = 0; | ||
| 1943 | int count = 0; | ||
| 1944 | Lisp_Object lisp_char; | ||
| 1945 | struct gcpro gcpro1; | ||
| 1946 | |||
| 1947 | while (++count <= unicode_hex_count) | ||
| 1948 | { | ||
| 1949 | c = READCHAR; | ||
| 1950 | /* isdigit(), isalpha() may be locale-specific, which we don't | ||
| 1951 | want. */ | ||
| 1952 | if (c >= '0' && c <= '9') i = (i << 4) + (c - '0'); | ||
| 1953 | else if (c >= 'a' && c <= 'f') i = (i << 4) + (c - 'a') + 10; | ||
| 1954 | else if (c >= 'A' && c <= 'F') i = (i << 4) + (c - 'A') + 10; | ||
| 1955 | else | ||
| 1956 | { | ||
| 1957 | error ("Non-hex digit used for Unicode escape"); | ||
| 1958 | break; | ||
| 1959 | } | ||
| 1960 | } | ||
| 1961 | |||
| 1962 | GCPRO1 (readcharfun); | ||
| 1963 | lisp_char = call2(intern("decode-char"), intern("ucs"), | ||
| 1964 | make_number(i)); | ||
| 1965 | UNGCPRO; | ||
| 1966 | |||
| 1967 | if (EQ(Qnil, lisp_char)) | ||
| 1968 | { | ||
| 1969 | /* This is ugly and horrible and trashes the user's data. */ | ||
| 1970 | XSETFASTINT (i, MAKE_CHAR (charset_katakana_jisx0201, | ||
| 1971 | 34 + 128, 46 + 128)); | ||
| 1972 | return i; | ||
| 1973 | } | ||
| 1974 | else | ||
| 1975 | { | ||
| 1976 | return XFASTINT (lisp_char); | ||
| 1977 | } | ||
| 1978 | } | ||
| 1979 | |||
| 1931 | default: | 1980 | default: |
| 1932 | if (BASE_LEADING_CODE_P (c)) | 1981 | if (BASE_LEADING_CODE_P (c)) |
| 1933 | c = read_multibyte (c, readcharfun); | 1982 | c = read_multibyte (c, readcharfun); |