aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPhilipp Stephani2016-04-21 14:45:22 -0700
committerPaul Eggert2016-04-21 19:29:40 -0700
commitde7d5f36e0f3261a7300fa3a3d87ae3b758b8a73 (patch)
treed929612c6d9ae099782dfe67b2d03041b9395a4f
parent7621a521452d988b27e761c76ad8e667e932192e (diff)
downloademacs-de7d5f36e0f3261a7300fa3a3d87ae3b758b8a73.tar.gz
emacs-de7d5f36e0f3261a7300fa3a3d87ae3b758b8a73.zip
Implement named character escapes, similar to Perl
* lread.c (init_character_names): New function. (read_escape): Read Perl-style named character escape sequences. (syms_of_lread): Initialize new variable 'character_names'. * test/src/lread-tests.el (lread-char-empty-name): Add test file for src/lread.c.
-rw-r--r--src/lread.c96
-rw-r--r--test/src/lread-tests.el54
2 files changed, 150 insertions, 0 deletions
diff --git a/src/lread.c b/src/lread.c
index fedfcb807c8..9fa46a875be 100644
--- a/src/lread.c
+++ b/src/lread.c
@@ -43,6 +43,7 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
43#include "systime.h" 43#include "systime.h"
44#include "termhooks.h" 44#include "termhooks.h"
45#include "blockinput.h" 45#include "blockinput.h"
46#include <c-ctype.h>
46 47
47#ifdef MSDOS 48#ifdef MSDOS
48#include "msdos.h" 49#include "msdos.h"
@@ -2149,6 +2150,36 @@ grow_read_buffer (void)
2149 MAX_MULTIBYTE_LENGTH, -1, 1); 2150 MAX_MULTIBYTE_LENGTH, -1, 1);
2150} 2151}
2151 2152
2153/* Hash table that maps Unicode character names to code points. */
2154static Lisp_Object character_names;
2155
2156/* Length of the longest Unicode character name, in bytes. */
2157static ptrdiff_t max_character_name_length;
2158
2159/* Initializes `character_names' and `max_character_name_length'.
2160 Called by `read_escape'. */
2161void init_character_names ()
2162{
2163 character_names = CALLN (Fmake_hash_table,
2164 QCtest, Qequal,
2165 /* Currently around 100,000 Unicode
2166 characters are defined. */
2167 QCsize, make_natnum (100000));
2168 const Lisp_Object get_property =
2169 Fsymbol_function (intern_c_string ("get-char-code-property"));
2170 ptrdiff_t length = 0;
2171 for (int i = 0; i <= MAX_UNICODE_CHAR; ++i)
2172 {
2173 const Lisp_Object code = make_natnum (i);
2174 const Lisp_Object name = call2 (get_property, code, Qname);
2175 if (NILP (name)) continue;
2176 CHECK_STRING (name);
2177 length = max (length, SBYTES (name));
2178 Fputhash (name, code, character_names);
2179 }
2180 max_character_name_length = length;
2181}
2182
2152/* Read a \-escape sequence, assuming we already read the `\'. 2183/* Read a \-escape sequence, assuming we already read the `\'.
2153 If the escape sequence forces unibyte, return eight-bit char. */ 2184 If the escape sequence forces unibyte, return eight-bit char. */
2154 2185
@@ -2356,6 +2387,68 @@ read_escape (Lisp_Object readcharfun, bool stringp)
2356 return i; 2387 return i;
2357 } 2388 }
2358 2389
2390 case 'N':
2391 /* Named character. */
2392 {
2393 c = READCHAR;
2394 if (c != '{')
2395 invalid_syntax ("Expected opening brace after \\N");
2396 if (NILP (character_names))
2397 init_character_names ();
2398 USE_SAFE_ALLOCA;
2399 char *name = SAFE_ALLOCA (max_character_name_length + 1);
2400 bool whitespace = false;
2401 ptrdiff_t length = 0;
2402 while (true)
2403 {
2404 c = READCHAR;
2405 if (c < 0)
2406 end_of_file_error ();
2407 if (c == '}')
2408 break;
2409 if (! c_isascii (c))
2410 xsignal1 (Qinvalid_read_syntax,
2411 CALLN (Fformat,
2412 build_pure_c_string ("Non-ASCII character U+%04X"
2413 " in character name"),
2414 make_natnum (c)));
2415 /* We treat multiple adjacent whitespace characters as a
2416 single space character. This makes it easier to use
2417 character names in e.g. multi-line strings. */
2418 if (c_isspace (c))
2419 {
2420 if (! whitespace)
2421 {
2422 whitespace = true;
2423 name[length++] = ' ';
2424 }
2425 }
2426 else
2427 {
2428 whitespace = false;
2429 name[length++] = c;
2430 }
2431 if (length >= max_character_name_length)
2432 invalid_syntax ("Character name too long");
2433 }
2434 if (length == 0)
2435 invalid_syntax ("Empty character name");
2436 name[length] = 0;
2437 const Lisp_Object lisp_name = make_unibyte_string (name, length);
2438 const Lisp_Object code =
2439 (length >= 3 && length <= 10 && name[0] == 'U' && name[1] == '+') ?
2440 /* Code point as U+N, where N is between 1 and 8 hexadecimal
2441 digits. */
2442 string_to_number (name + 2, 16, false) :
2443 Fgethash (lisp_name, character_names, Qnil);
2444 SAFE_FREE ();
2445 if (! RANGED_INTEGERP (0, code, MAX_UNICODE_CHAR))
2446 xsignal1 (Qinvalid_read_syntax,
2447 CALLN (Fformat,
2448 build_pure_c_string ("\\N{%s}"), lisp_name));
2449 return XINT (code);
2450 }
2451
2359 default: 2452 default:
2360 return c; 2453 return c;
2361 } 2454 }
@@ -4744,4 +4837,7 @@ that are loaded before your customizations are read! */);
4744 DEFSYM (Qweakness, "weakness"); 4837 DEFSYM (Qweakness, "weakness");
4745 DEFSYM (Qrehash_size, "rehash-size"); 4838 DEFSYM (Qrehash_size, "rehash-size");
4746 DEFSYM (Qrehash_threshold, "rehash-threshold"); 4839 DEFSYM (Qrehash_threshold, "rehash-threshold");
4840
4841 character_names = Qnil;
4842 staticpro (&character_names);
4747} 4843}
diff --git a/test/src/lread-tests.el b/test/src/lread-tests.el
new file mode 100644
index 00000000000..1f873340c56
--- /dev/null
+++ b/test/src/lread-tests.el
@@ -0,0 +1,54 @@
1;;; lread-tests.el --- tests for lread.c -*- lexical-binding: t; -*-
2
3;; Copyright (C) 2016 Google Inc.
4
5;; Author: Philipp Stephani <phst@google.com>
6
7;; This file is part of GNU Emacs.
8
9;; This program is free software; you can redistribute it and/or modify
10;; it under the terms of the GNU General Public License as published by
11;; the Free Software Foundation, either version 3 of the License, or
12;; (at your option) any later version.
13
14;; This program is distributed in the hope that it will be useful,
15;; but WITHOUT ANY WARRANTY; without even the implied warranty of
16;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17;; GNU General Public License for more details.
18
19;; You should have received a copy of the GNU General Public License
20;; along with this program. If not, see <http://www.gnu.org/licenses/>.
21
22;;; Commentary:
23
24;; Unit tests for code in src/lread.c.
25
26;;; Code:
27
28(ert-deftest lread-char-number ()
29 (should (equal ?\N{U+A817} #xA817)))
30
31(ert-deftest lread-char-name ()
32 (should (equal ?\N{SYLOTI NAGRI LETTER
33 DHO}
34 #xA817)))
35
36(ert-deftest lread-char-invalid-number ()
37 (should-error (read "?\\N{U+110000}") :type 'invalid-read-syntax))
38
39(ert-deftest lread-char-invalid-name ()
40 (should-error (read "?\\N{DOES NOT EXIST}")) :type 'invalid-read-syntax)
41
42(ert-deftest lread-char-non-ascii-name ()
43 (should-error (read "?\\N{LATIN CAPITAL LETTER Ø}")) 'invalid-read-syntax)
44
45(ert-deftest lread-char-empty-name ()
46 (should-error (read "?\\N{}")) 'invalid-read-syntax)
47
48(ert-deftest lread-string-char-number ()
49 (should (equal "a\N{U+A817}b" "a\uA817b")))
50
51(ert-deftest lread-string-char-name ()
52 (should (equal "a\N{SYLOTI NAGRI LETTER DHO}b" "a\uA817b")))
53
54;;; lread-tests.el ends here