aboutsummaryrefslogtreecommitdiffstats
path: root/src/charset.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/charset.c')
-rw-r--r--src/charset.c103
1 files changed, 67 insertions, 36 deletions
diff --git a/src/charset.c b/src/charset.c
index 55234aa76aa..57e1603fc19 100644
--- a/src/charset.c
+++ b/src/charset.c
@@ -1,5 +1,5 @@
1/* Basic character set support. 1/* Basic character set support.
2 Copyright (C) 2001-2011 Free Software Foundation, Inc. 2 Copyright (C) 2001-2012 Free Software Foundation, Inc.
3 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 3 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
4 2005, 2006, 2007, 2008, 2009, 2010, 2011 4 2005, 2006, 2007, 2008, 2009, 2010, 2011
5 National Institute of Advanced Industrial Science and Technology (AIST) 5 National Institute of Advanced Industrial Science and Technology (AIST)
@@ -61,7 +61,7 @@ Lisp_Object Vcharset_hash_table;
61/* Table of struct charset. */ 61/* Table of struct charset. */
62struct charset *charset_table; 62struct charset *charset_table;
63 63
64static int charset_table_size; 64static ptrdiff_t charset_table_size;
65static int charset_table_used; 65static int charset_table_used;
66 66
67Lisp_Object Qcharsetp; 67Lisp_Object Qcharsetp;
@@ -162,13 +162,13 @@ static struct
162 /* 1 iff the following table is used for encoder. */ 162 /* 1 iff the following table is used for encoder. */
163 short for_encoder; 163 short for_encoder;
164 164
165 /* When the following table is used for encoding, mininum and 165 /* When the following table is used for encoding, minimum and
166 maxinum character of the current charset. */ 166 maximum character of the current charset. */
167 int min_char, max_char; 167 int min_char, max_char;
168 168
169 /* A Unicode character correspoinding to the code indice 0 (i.e. the 169 /* A Unicode character corresponding to the code index 0 (i.e. the
170 minimum code-point) of the current charset, or -1 if the code 170 minimum code-point) of the current charset, or -1 if the code
171 indice 0 is not a Unicode character. This is checked when 171 index 0 is not a Unicode character. This is checked when
172 table.encoder[CHAR] is zero. */ 172 table.encoder[CHAR] is zero. */
173 int zero_index_char; 173 int zero_index_char;
174 174
@@ -419,7 +419,7 @@ load_charset_map (struct charset *charset, struct charset_map_entries *entries,
419 paying attention to comment character '#'. */ 419 paying attention to comment character '#'. */
420 420
421static inline unsigned 421static inline unsigned
422read_hex (FILE *fp, int *eof) 422read_hex (FILE *fp, int *eof, int *overflow)
423{ 423{
424 int c; 424 int c;
425 unsigned n; 425 unsigned n;
@@ -441,15 +441,16 @@ read_hex (FILE *fp, int *eof)
441 *eof = 1; 441 *eof = 1;
442 return 0; 442 return 0;
443 } 443 }
444 *eof = 0;
445 n = 0; 444 n = 0;
446 if (c == 'x') 445 while (isxdigit (c = getc (fp)))
447 while ((c = getc (fp)) != EOF && isxdigit (c)) 446 {
447 if (UINT_MAX >> 4 < n)
448 *overflow = 1;
448 n = ((n << 4) 449 n = ((n << 4)
449 | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10)); 450 | (c - ('0' <= c && c <= '9' ? '0'
450 else 451 : 'A' <= c && c <= 'F' ? 'A' - 10
451 while ((c = getc (fp)) != EOF && isdigit (c)) 452 : 'a' - 10)));
452 n = (n * 10) + c - '0'; 453 }
453 if (c != EOF) 454 if (c != EOF)
454 ungetc (c, fp); 455 ungetc (c, fp);
455 return n; 456 return n;
@@ -479,7 +480,6 @@ load_charset_map_from_file (struct charset *charset, Lisp_Object mapfile, int co
479 unsigned max_code = CHARSET_MAX_CODE (charset); 480 unsigned max_code = CHARSET_MAX_CODE (charset);
480 int fd; 481 int fd;
481 FILE *fp; 482 FILE *fp;
482 int eof;
483 Lisp_Object suffixes; 483 Lisp_Object suffixes;
484 struct charset_map_entries *head, *entries; 484 struct charset_map_entries *head, *entries;
485 int n_entries, count; 485 int n_entries, count;
@@ -504,22 +504,27 @@ load_charset_map_from_file (struct charset *charset, Lisp_Object mapfile, int co
504 memset (entries, 0, sizeof (struct charset_map_entries)); 504 memset (entries, 0, sizeof (struct charset_map_entries));
505 505
506 n_entries = 0; 506 n_entries = 0;
507 eof = 0;
508 while (1) 507 while (1)
509 { 508 {
510 unsigned from, to; 509 unsigned from, to, c;
511 int c;
512 int idx; 510 int idx;
511 int eof = 0, overflow = 0;
513 512
514 from = read_hex (fp, &eof); 513 from = read_hex (fp, &eof, &overflow);
515 if (eof) 514 if (eof)
516 break; 515 break;
517 if (getc (fp) == '-') 516 if (getc (fp) == '-')
518 to = read_hex (fp, &eof); 517 to = read_hex (fp, &eof, &overflow);
519 else 518 else
520 to = from; 519 to = from;
521 c = (int) read_hex (fp, &eof); 520 if (eof)
521 break;
522 c = read_hex (fp, &eof, &overflow);
523 if (eof)
524 break;
522 525
526 if (overflow)
527 continue;
523 if (from < min_code || to > max_code || from > to || c > MAX_CHAR) 528 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
524 continue; 529 continue;
525 530
@@ -1145,13 +1150,25 @@ usage: (define-charset-internal ...) */)
1145 hash_code); 1150 hash_code);
1146 if (charset_table_used == charset_table_size) 1151 if (charset_table_used == charset_table_size)
1147 { 1152 {
1148 struct charset *new_table 1153 /* Ensure that charset IDs fit into 'int' as well as into the
1149 = (struct charset *) xmalloc (sizeof (struct charset) 1154 restriction imposed by fixnums. Although the 'int' restriction
1150 * (charset_table_size + 16)); 1155 could be removed, too much other code would need altering; for
1151 memcpy (new_table, charset_table, 1156 example, the IDs are stuffed into struct
1152 sizeof (struct charset) * charset_table_size); 1157 coding_system.charbuf[i] entries, which are 'int'. */
1153 charset_table_size += 16; 1158 int old_size = charset_table_size;
1159 struct charset *new_table =
1160 xpalloc (0, &charset_table_size, 1,
1161 min (INT_MAX, MOST_POSITIVE_FIXNUM),
1162 sizeof *charset_table);
1163 memcpy (new_table, charset_table, old_size * sizeof *new_table);
1154 charset_table = new_table; 1164 charset_table = new_table;
1165 /* FIXME: This leaks memory, as the old charset_table becomes
1166 unreachable. If the old charset table is charset_table_init
1167 then this leak is intentional; otherwise, it's unclear.
1168 If the latter memory leak is intentional, a
1169 comment should be added to explain this. If not, the old
1170 charset_table should be freed, by passing it as the 1st argument
1171 to xpalloc and removing the memcpy. */
1155 } 1172 }
1156 id = charset_table_used++; 1173 id = charset_table_used++;
1157 new_definition_p = 1; 1174 new_definition_p = 1;
@@ -1643,7 +1660,7 @@ maybe_unify_char (int c, Lisp_Object val)
1643} 1660}
1644 1661
1645 1662
1646/* Return a character correponding to the code-point CODE of 1663/* Return a character corresponding to the code-point CODE of
1647 CHARSET. */ 1664 CHARSET. */
1648 1665
1649int 1666int
@@ -2065,7 +2082,7 @@ that case, find the charset from what supported by that coding system. */)
2065DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0, 2082DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
2066 doc: /* 2083 doc: /*
2067Return charset of a character in the current buffer at position POS. 2084Return charset of a character in the current buffer at position POS.
2068If POS is nil, it defauls to the current point. 2085If POS is nil, it defaults to the current point.
2069If POS is out of range, the value is nil. */) 2086If POS is out of range, the value is nil. */)
2070 (Lisp_Object pos) 2087 (Lisp_Object pos)
2071{ 2088{
@@ -2210,14 +2227,16 @@ struct charset_sort_data
2210{ 2227{
2211 Lisp_Object charset; 2228 Lisp_Object charset;
2212 int id; 2229 int id;
2213 int priority; 2230 ptrdiff_t priority;
2214}; 2231};
2215 2232
2216static int 2233static int
2217charset_compare (const void *d1, const void *d2) 2234charset_compare (const void *d1, const void *d2)
2218{ 2235{
2219 const struct charset_sort_data *data1 = d1, *data2 = d2; 2236 const struct charset_sort_data *data1 = d1, *data2 = d2;
2220 return (data1->priority - data2->priority); 2237 if (data1->priority != data2->priority)
2238 return data1->priority < data2->priority ? -1 : 1;
2239 return 0;
2221} 2240}
2222 2241
2223DEFUN ("sort-charsets", Fsort_charsets, Ssort_charsets, 1, 1, 0, 2242DEFUN ("sort-charsets", Fsort_charsets, Ssort_charsets, 1, 1, 0,
@@ -2227,7 +2246,8 @@ See also `charset-priority-list' and `set-charset-priority'. */)
2227 (Lisp_Object charsets) 2246 (Lisp_Object charsets)
2228{ 2247{
2229 Lisp_Object len = Flength (charsets); 2248 Lisp_Object len = Flength (charsets);
2230 int n = XFASTINT (len), i, j, done; 2249 ptrdiff_t n = XFASTINT (len), i, j;
2250 int done;
2231 Lisp_Object tail, elt, attrs; 2251 Lisp_Object tail, elt, attrs;
2232 struct charset_sort_data *sort_data; 2252 struct charset_sort_data *sort_data;
2233 int id, min_id = INT_MAX, max_id = INT_MIN; 2253 int id, min_id = INT_MAX, max_id = INT_MIN;
@@ -2235,7 +2255,7 @@ See also `charset-priority-list' and `set-charset-priority'. */)
2235 2255
2236 if (n == 0) 2256 if (n == 0)
2237 return Qnil; 2257 return Qnil;
2238 SAFE_ALLOCA (sort_data, struct charset_sort_data *, sizeof (*sort_data) * n); 2258 SAFE_NALLOCA (sort_data, 1, n);
2239 for (tail = charsets, i = 0; CONSP (tail); tail = XCDR (tail), i++) 2259 for (tail = charsets, i = 0; CONSP (tail); tail = XCDR (tail), i++)
2240 { 2260 {
2241 elt = XCAR (tail); 2261 elt = XCAR (tail);
@@ -2307,6 +2327,18 @@ init_charset_once (void)
2307 2327
2308#ifdef emacs 2328#ifdef emacs
2309 2329
2330/* Allocate an initial charset table that is large enough to handle
2331 Emacs while it is bootstrapping. As of September 2011, the size
2332 needs to be at least 166; make it a bit bigger to allow for future
2333 expansion.
2334
2335 Don't make the value so small that the table is reallocated during
2336 bootstrapping, as glibc malloc calls larger than just under 64 KiB
2337 during an initial bootstrap wreak havoc after dumping; see the
2338 M_MMAP_THRESHOLD value in alloc.c, plus there is a extra overhead
2339 internal to glibc malloc and perhaps to Emacs malloc debugging. */
2340static struct charset charset_table_init[180];
2341
2310void 2342void
2311syms_of_charset (void) 2343syms_of_charset (void)
2312{ 2344{
@@ -2342,9 +2374,8 @@ syms_of_charset (void)
2342 Vcharset_hash_table = Fmake_hash_table (2, args); 2374 Vcharset_hash_table = Fmake_hash_table (2, args);
2343 } 2375 }
2344 2376
2345 charset_table_size = 128; 2377 charset_table = charset_table_init;
2346 charset_table = ((struct charset *) 2378 charset_table_size = sizeof charset_table_init / sizeof *charset_table_init;
2347 xmalloc (sizeof (struct charset) * charset_table_size));
2348 charset_table_used = 0; 2379 charset_table_used = 0;
2349 2380
2350 defsubr (&Scharsetp); 2381 defsubr (&Scharsetp);