diff options
Diffstat (limited to 'src/charset.c')
| -rw-r--r-- | src/charset.c | 103 |
1 files changed, 67 insertions, 36 deletions
diff --git a/src/charset.c b/src/charset.c index 55234aa76aa..57e1603fc19 100644 --- a/src/charset.c +++ b/src/charset.c | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* Basic character set support. | 1 | /* Basic character set support. |
| 2 | Copyright (C) 2001-2011 Free Software Foundation, Inc. | 2 | Copyright (C) 2001-2012 Free Software Foundation, Inc. |
| 3 | Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, | 3 | Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, |
| 4 | 2005, 2006, 2007, 2008, 2009, 2010, 2011 | 4 | 2005, 2006, 2007, 2008, 2009, 2010, 2011 |
| 5 | National Institute of Advanced Industrial Science and Technology (AIST) | 5 | National Institute of Advanced Industrial Science and Technology (AIST) |
| @@ -61,7 +61,7 @@ Lisp_Object Vcharset_hash_table; | |||
| 61 | /* Table of struct charset. */ | 61 | /* Table of struct charset. */ |
| 62 | struct charset *charset_table; | 62 | struct charset *charset_table; |
| 63 | 63 | ||
| 64 | static int charset_table_size; | 64 | static ptrdiff_t charset_table_size; |
| 65 | static int charset_table_used; | 65 | static int charset_table_used; |
| 66 | 66 | ||
| 67 | Lisp_Object Qcharsetp; | 67 | Lisp_Object Qcharsetp; |
| @@ -162,13 +162,13 @@ static struct | |||
| 162 | /* 1 iff the following table is used for encoder. */ | 162 | /* 1 iff the following table is used for encoder. */ |
| 163 | short for_encoder; | 163 | short for_encoder; |
| 164 | 164 | ||
| 165 | /* When the following table is used for encoding, mininum and | 165 | /* When the following table is used for encoding, minimum and |
| 166 | maxinum character of the current charset. */ | 166 | maximum character of the current charset. */ |
| 167 | int min_char, max_char; | 167 | int min_char, max_char; |
| 168 | 168 | ||
| 169 | /* A Unicode character correspoinding to the code indice 0 (i.e. the | 169 | /* A Unicode character corresponding to the code index 0 (i.e. the |
| 170 | minimum code-point) of the current charset, or -1 if the code | 170 | minimum code-point) of the current charset, or -1 if the code |
| 171 | indice 0 is not a Unicode character. This is checked when | 171 | index 0 is not a Unicode character. This is checked when |
| 172 | table.encoder[CHAR] is zero. */ | 172 | table.encoder[CHAR] is zero. */ |
| 173 | int zero_index_char; | 173 | int zero_index_char; |
| 174 | 174 | ||
| @@ -419,7 +419,7 @@ load_charset_map (struct charset *charset, struct charset_map_entries *entries, | |||
| 419 | paying attention to comment character '#'. */ | 419 | paying attention to comment character '#'. */ |
| 420 | 420 | ||
| 421 | static inline unsigned | 421 | static inline unsigned |
| 422 | read_hex (FILE *fp, int *eof) | 422 | read_hex (FILE *fp, int *eof, int *overflow) |
| 423 | { | 423 | { |
| 424 | int c; | 424 | int c; |
| 425 | unsigned n; | 425 | unsigned n; |
| @@ -441,15 +441,16 @@ read_hex (FILE *fp, int *eof) | |||
| 441 | *eof = 1; | 441 | *eof = 1; |
| 442 | return 0; | 442 | return 0; |
| 443 | } | 443 | } |
| 444 | *eof = 0; | ||
| 445 | n = 0; | 444 | n = 0; |
| 446 | if (c == 'x') | 445 | while (isxdigit (c = getc (fp))) |
| 447 | while ((c = getc (fp)) != EOF && isxdigit (c)) | 446 | { |
| 447 | if (UINT_MAX >> 4 < n) | ||
| 448 | *overflow = 1; | ||
| 448 | n = ((n << 4) | 449 | n = ((n << 4) |
| 449 | | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10)); | 450 | | (c - ('0' <= c && c <= '9' ? '0' |
| 450 | else | 451 | : 'A' <= c && c <= 'F' ? 'A' - 10 |
| 451 | while ((c = getc (fp)) != EOF && isdigit (c)) | 452 | : 'a' - 10))); |
| 452 | n = (n * 10) + c - '0'; | 453 | } |
| 453 | if (c != EOF) | 454 | if (c != EOF) |
| 454 | ungetc (c, fp); | 455 | ungetc (c, fp); |
| 455 | return n; | 456 | return n; |
| @@ -479,7 +480,6 @@ load_charset_map_from_file (struct charset *charset, Lisp_Object mapfile, int co | |||
| 479 | unsigned max_code = CHARSET_MAX_CODE (charset); | 480 | unsigned max_code = CHARSET_MAX_CODE (charset); |
| 480 | int fd; | 481 | int fd; |
| 481 | FILE *fp; | 482 | FILE *fp; |
| 482 | int eof; | ||
| 483 | Lisp_Object suffixes; | 483 | Lisp_Object suffixes; |
| 484 | struct charset_map_entries *head, *entries; | 484 | struct charset_map_entries *head, *entries; |
| 485 | int n_entries, count; | 485 | int n_entries, count; |
| @@ -504,22 +504,27 @@ load_charset_map_from_file (struct charset *charset, Lisp_Object mapfile, int co | |||
| 504 | memset (entries, 0, sizeof (struct charset_map_entries)); | 504 | memset (entries, 0, sizeof (struct charset_map_entries)); |
| 505 | 505 | ||
| 506 | n_entries = 0; | 506 | n_entries = 0; |
| 507 | eof = 0; | ||
| 508 | while (1) | 507 | while (1) |
| 509 | { | 508 | { |
| 510 | unsigned from, to; | 509 | unsigned from, to, c; |
| 511 | int c; | ||
| 512 | int idx; | 510 | int idx; |
| 511 | int eof = 0, overflow = 0; | ||
| 513 | 512 | ||
| 514 | from = read_hex (fp, &eof); | 513 | from = read_hex (fp, &eof, &overflow); |
| 515 | if (eof) | 514 | if (eof) |
| 516 | break; | 515 | break; |
| 517 | if (getc (fp) == '-') | 516 | if (getc (fp) == '-') |
| 518 | to = read_hex (fp, &eof); | 517 | to = read_hex (fp, &eof, &overflow); |
| 519 | else | 518 | else |
| 520 | to = from; | 519 | to = from; |
| 521 | c = (int) read_hex (fp, &eof); | 520 | if (eof) |
| 521 | break; | ||
| 522 | c = read_hex (fp, &eof, &overflow); | ||
| 523 | if (eof) | ||
| 524 | break; | ||
| 522 | 525 | ||
| 526 | if (overflow) | ||
| 527 | continue; | ||
| 523 | if (from < min_code || to > max_code || from > to || c > MAX_CHAR) | 528 | if (from < min_code || to > max_code || from > to || c > MAX_CHAR) |
| 524 | continue; | 529 | continue; |
| 525 | 530 | ||
| @@ -1145,13 +1150,25 @@ usage: (define-charset-internal ...) */) | |||
| 1145 | hash_code); | 1150 | hash_code); |
| 1146 | if (charset_table_used == charset_table_size) | 1151 | if (charset_table_used == charset_table_size) |
| 1147 | { | 1152 | { |
| 1148 | struct charset *new_table | 1153 | /* Ensure that charset IDs fit into 'int' as well as into the |
| 1149 | = (struct charset *) xmalloc (sizeof (struct charset) | 1154 | restriction imposed by fixnums. Although the 'int' restriction |
| 1150 | * (charset_table_size + 16)); | 1155 | could be removed, too much other code would need altering; for |
| 1151 | memcpy (new_table, charset_table, | 1156 | example, the IDs are stuffed into struct |
| 1152 | sizeof (struct charset) * charset_table_size); | 1157 | coding_system.charbuf[i] entries, which are 'int'. */ |
| 1153 | charset_table_size += 16; | 1158 | int old_size = charset_table_size; |
| 1159 | struct charset *new_table = | ||
| 1160 | xpalloc (0, &charset_table_size, 1, | ||
| 1161 | min (INT_MAX, MOST_POSITIVE_FIXNUM), | ||
| 1162 | sizeof *charset_table); | ||
| 1163 | memcpy (new_table, charset_table, old_size * sizeof *new_table); | ||
| 1154 | charset_table = new_table; | 1164 | charset_table = new_table; |
| 1165 | /* FIXME: This leaks memory, as the old charset_table becomes | ||
| 1166 | unreachable. If the old charset table is charset_table_init | ||
| 1167 | then this leak is intentional; otherwise, it's unclear. | ||
| 1168 | If the latter memory leak is intentional, a | ||
| 1169 | comment should be added to explain this. If not, the old | ||
| 1170 | charset_table should be freed, by passing it as the 1st argument | ||
| 1171 | to xpalloc and removing the memcpy. */ | ||
| 1155 | } | 1172 | } |
| 1156 | id = charset_table_used++; | 1173 | id = charset_table_used++; |
| 1157 | new_definition_p = 1; | 1174 | new_definition_p = 1; |
| @@ -1643,7 +1660,7 @@ maybe_unify_char (int c, Lisp_Object val) | |||
| 1643 | } | 1660 | } |
| 1644 | 1661 | ||
| 1645 | 1662 | ||
| 1646 | /* Return a character correponding to the code-point CODE of | 1663 | /* Return a character corresponding to the code-point CODE of |
| 1647 | CHARSET. */ | 1664 | CHARSET. */ |
| 1648 | 1665 | ||
| 1649 | int | 1666 | int |
| @@ -2065,7 +2082,7 @@ that case, find the charset from what supported by that coding system. */) | |||
| 2065 | DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0, | 2082 | DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0, |
| 2066 | doc: /* | 2083 | doc: /* |
| 2067 | Return charset of a character in the current buffer at position POS. | 2084 | Return charset of a character in the current buffer at position POS. |
| 2068 | If POS is nil, it defauls to the current point. | 2085 | If POS is nil, it defaults to the current point. |
| 2069 | If POS is out of range, the value is nil. */) | 2086 | If POS is out of range, the value is nil. */) |
| 2070 | (Lisp_Object pos) | 2087 | (Lisp_Object pos) |
| 2071 | { | 2088 | { |
| @@ -2210,14 +2227,16 @@ struct charset_sort_data | |||
| 2210 | { | 2227 | { |
| 2211 | Lisp_Object charset; | 2228 | Lisp_Object charset; |
| 2212 | int id; | 2229 | int id; |
| 2213 | int priority; | 2230 | ptrdiff_t priority; |
| 2214 | }; | 2231 | }; |
| 2215 | 2232 | ||
| 2216 | static int | 2233 | static int |
| 2217 | charset_compare (const void *d1, const void *d2) | 2234 | charset_compare (const void *d1, const void *d2) |
| 2218 | { | 2235 | { |
| 2219 | const struct charset_sort_data *data1 = d1, *data2 = d2; | 2236 | const struct charset_sort_data *data1 = d1, *data2 = d2; |
| 2220 | return (data1->priority - data2->priority); | 2237 | if (data1->priority != data2->priority) |
| 2238 | return data1->priority < data2->priority ? -1 : 1; | ||
| 2239 | return 0; | ||
| 2221 | } | 2240 | } |
| 2222 | 2241 | ||
| 2223 | DEFUN ("sort-charsets", Fsort_charsets, Ssort_charsets, 1, 1, 0, | 2242 | DEFUN ("sort-charsets", Fsort_charsets, Ssort_charsets, 1, 1, 0, |
| @@ -2227,7 +2246,8 @@ See also `charset-priority-list' and `set-charset-priority'. */) | |||
| 2227 | (Lisp_Object charsets) | 2246 | (Lisp_Object charsets) |
| 2228 | { | 2247 | { |
| 2229 | Lisp_Object len = Flength (charsets); | 2248 | Lisp_Object len = Flength (charsets); |
| 2230 | int n = XFASTINT (len), i, j, done; | 2249 | ptrdiff_t n = XFASTINT (len), i, j; |
| 2250 | int done; | ||
| 2231 | Lisp_Object tail, elt, attrs; | 2251 | Lisp_Object tail, elt, attrs; |
| 2232 | struct charset_sort_data *sort_data; | 2252 | struct charset_sort_data *sort_data; |
| 2233 | int id, min_id = INT_MAX, max_id = INT_MIN; | 2253 | int id, min_id = INT_MAX, max_id = INT_MIN; |
| @@ -2235,7 +2255,7 @@ See also `charset-priority-list' and `set-charset-priority'. */) | |||
| 2235 | 2255 | ||
| 2236 | if (n == 0) | 2256 | if (n == 0) |
| 2237 | return Qnil; | 2257 | return Qnil; |
| 2238 | SAFE_ALLOCA (sort_data, struct charset_sort_data *, sizeof (*sort_data) * n); | 2258 | SAFE_NALLOCA (sort_data, 1, n); |
| 2239 | for (tail = charsets, i = 0; CONSP (tail); tail = XCDR (tail), i++) | 2259 | for (tail = charsets, i = 0; CONSP (tail); tail = XCDR (tail), i++) |
| 2240 | { | 2260 | { |
| 2241 | elt = XCAR (tail); | 2261 | elt = XCAR (tail); |
| @@ -2307,6 +2327,18 @@ init_charset_once (void) | |||
| 2307 | 2327 | ||
| 2308 | #ifdef emacs | 2328 | #ifdef emacs |
| 2309 | 2329 | ||
| 2330 | /* Allocate an initial charset table that is large enough to handle | ||
| 2331 | Emacs while it is bootstrapping. As of September 2011, the size | ||
| 2332 | needs to be at least 166; make it a bit bigger to allow for future | ||
| 2333 | expansion. | ||
| 2334 | |||
| 2335 | Don't make the value so small that the table is reallocated during | ||
| 2336 | bootstrapping, as glibc malloc calls larger than just under 64 KiB | ||
| 2337 | during an initial bootstrap wreak havoc after dumping; see the | ||
| 2338 | M_MMAP_THRESHOLD value in alloc.c, plus there is a extra overhead | ||
| 2339 | internal to glibc malloc and perhaps to Emacs malloc debugging. */ | ||
| 2340 | static struct charset charset_table_init[180]; | ||
| 2341 | |||
| 2310 | void | 2342 | void |
| 2311 | syms_of_charset (void) | 2343 | syms_of_charset (void) |
| 2312 | { | 2344 | { |
| @@ -2342,9 +2374,8 @@ syms_of_charset (void) | |||
| 2342 | Vcharset_hash_table = Fmake_hash_table (2, args); | 2374 | Vcharset_hash_table = Fmake_hash_table (2, args); |
| 2343 | } | 2375 | } |
| 2344 | 2376 | ||
| 2345 | charset_table_size = 128; | 2377 | charset_table = charset_table_init; |
| 2346 | charset_table = ((struct charset *) | 2378 | charset_table_size = sizeof charset_table_init / sizeof *charset_table_init; |
| 2347 | xmalloc (sizeof (struct charset) * charset_table_size)); | ||
| 2348 | charset_table_used = 0; | 2379 | charset_table_used = 0; |
| 2349 | 2380 | ||
| 2350 | defsubr (&Scharsetp); | 2381 | defsubr (&Scharsetp); |