diff options
| author | Kenichi Handa | 2002-08-11 01:06:42 +0000 |
|---|---|---|
| committer | Kenichi Handa | 2002-08-11 01:06:42 +0000 |
| commit | 068a9dbdf69bf0efe8ddf92ec1283e7b26894e43 (patch) | |
| tree | 9798d5bbf21e235ca8ca0a939db38415fc4efa6e /src/coding.c | |
| parent | 054e62aeb3c14b1a8324b6d3e911f5cff64a5b09 (diff) | |
| download | emacs-068a9dbdf69bf0efe8ddf92ec1283e7b26894e43.tar.gz emacs-068a9dbdf69bf0efe8ddf92ec1283e7b26894e43.zip | |
(unencodable_char_position): New function.
(Funencodable_char_position): New function.
(syms_of_coding): Defsubr Funencodable_char_position.
Diffstat (limited to 'src/coding.c')
| -rw-r--r-- | src/coding.c | 137 |
1 files changed, 137 insertions, 0 deletions
diff --git a/src/coding.c b/src/coding.c index 57d7e4b21fd..7f9f5171b9e 100644 --- a/src/coding.c +++ b/src/coding.c | |||
| @@ -6498,6 +6498,142 @@ DEFUN ("find-coding-systems-region-internal", | |||
| 6498 | } | 6498 | } |
| 6499 | 6499 | ||
| 6500 | 6500 | ||
| 6501 | /* Search from position POS for such characters that are unencodable | ||
| 6502 | accoding to SAFE_CHARS, and return a list of their positions. P | ||
| 6503 | points where in the memory the character at POS exists. Limit the | ||
| 6504 | search at PEND or when Nth unencodable characters are found. | ||
| 6505 | |||
| 6506 | If SAFE_CHARS is a char table, an element for an unencodable | ||
| 6507 | character is nil. | ||
| 6508 | |||
| 6509 | If SAFE_CHARS is nil, all non-ASCII characters are unencodable. | ||
| 6510 | |||
| 6511 | Otherwise, SAFE_CHARS is t, and only eight-bit-contrl and | ||
| 6512 | eight-bit-graphic characters are unencodable. */ | ||
| 6513 | |||
| 6514 | static Lisp_Object | ||
| 6515 | unencodable_char_position (safe_chars, pos, p, pend, n) | ||
| 6516 | Lisp_Object safe_chars; | ||
| 6517 | int pos; | ||
| 6518 | unsigned char *p, *pend; | ||
| 6519 | int n; | ||
| 6520 | { | ||
| 6521 | Lisp_Object pos_list; | ||
| 6522 | |||
| 6523 | pos_list = Qnil; | ||
| 6524 | while (p < pend) | ||
| 6525 | { | ||
| 6526 | int len; | ||
| 6527 | int c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, len); | ||
| 6528 | |||
| 6529 | if (c >= 128 | ||
| 6530 | && (CHAR_TABLE_P (safe_chars) | ||
| 6531 | ? NILP (CHAR_TABLE_REF (safe_chars, c)) | ||
| 6532 | : (NILP (safe_chars) || c < 256))) | ||
| 6533 | { | ||
| 6534 | pos_list = Fcons (make_number (pos), pos_list); | ||
| 6535 | if (--n <= 0) | ||
| 6536 | break; | ||
| 6537 | } | ||
| 6538 | pos++; | ||
| 6539 | p += len; | ||
| 6540 | } | ||
| 6541 | return Fnreverse (pos_list); | ||
| 6542 | } | ||
| 6543 | |||
| 6544 | |||
| 6545 | DEFUN ("unencodable-char-position", Funencodable_char_position, | ||
| 6546 | Sunencodable_char_position, 3, 5, 0, | ||
| 6547 | doc: /* | ||
| 6548 | Return position of first un-encodable character in a region. | ||
| 6549 | START and END specfiy the region and CODING-SYSTEM specifies the | ||
| 6550 | encoding to check. Return nil if CODING-SYSTEM does encode the region. | ||
| 6551 | |||
| 6552 | If optional 4th argument COUNT is non-nil, it specifies at most how | ||
| 6553 | many un-encodable characters to search. In this case, the value is a | ||
| 6554 | list of positions. | ||
| 6555 | |||
| 6556 | If optional 5th argument STRING is non-nil, it is a string to search | ||
| 6557 | for un-encodable characters. In that case, START and END are indexes | ||
| 6558 | to the string. */) | ||
| 6559 | (start, end, coding_system, count, string) | ||
| 6560 | Lisp_Object start, end, coding_system, count, string; | ||
| 6561 | { | ||
| 6562 | int n; | ||
| 6563 | Lisp_Object safe_chars; | ||
| 6564 | struct coding_system coding; | ||
| 6565 | Lisp_Object positions; | ||
| 6566 | int from, to; | ||
| 6567 | unsigned char *p, *pend; | ||
| 6568 | |||
| 6569 | if (NILP (string)) | ||
| 6570 | { | ||
| 6571 | validate_region (&start, &end); | ||
| 6572 | from = XINT (start); | ||
| 6573 | to = XINT (end); | ||
| 6574 | if (NILP (current_buffer->enable_multibyte_characters)) | ||
| 6575 | return Qnil; | ||
| 6576 | p = CHAR_POS_ADDR (from); | ||
| 6577 | pend = CHAR_POS_ADDR (to); | ||
| 6578 | } | ||
| 6579 | else | ||
| 6580 | { | ||
| 6581 | CHECK_STRING (string); | ||
| 6582 | CHECK_NATNUM (start); | ||
| 6583 | CHECK_NATNUM (end); | ||
| 6584 | from = XINT (start); | ||
| 6585 | to = XINT (end); | ||
| 6586 | if (from > to | ||
| 6587 | || to > SCHARS (string)) | ||
| 6588 | args_out_of_range_3 (string, start, end); | ||
| 6589 | if (! STRING_MULTIBYTE (string)) | ||
| 6590 | return Qnil; | ||
| 6591 | p = SDATA (string) + string_char_to_byte (string, from); | ||
| 6592 | pend = SDATA (string) + string_char_to_byte (string, to); | ||
| 6593 | } | ||
| 6594 | |||
| 6595 | setup_coding_system (Fcheck_coding_system (coding_system), &coding); | ||
| 6596 | |||
| 6597 | if (NILP (count)) | ||
| 6598 | n = 1; | ||
| 6599 | else | ||
| 6600 | { | ||
| 6601 | CHECK_NATNUM (count); | ||
| 6602 | n = XINT (count); | ||
| 6603 | } | ||
| 6604 | |||
| 6605 | if (coding.type == coding_type_no_conversion | ||
| 6606 | || coding.type == coding_type_raw_text) | ||
| 6607 | return Qnil; | ||
| 6608 | |||
| 6609 | if (coding.type == coding_type_undecided) | ||
| 6610 | safe_chars = Qnil; | ||
| 6611 | else | ||
| 6612 | safe_chars = coding_safe_chars (&coding); | ||
| 6613 | |||
| 6614 | if (STRINGP (string) | ||
| 6615 | || from >= GPT || to <= GPT) | ||
| 6616 | positions = unencodable_char_position (safe_chars, from, p, pend, n); | ||
| 6617 | else | ||
| 6618 | { | ||
| 6619 | Lisp_Object args[2]; | ||
| 6620 | |||
| 6621 | args[0] = unencodable_char_position (safe_chars, from, p, GPT_ADDR, n); | ||
| 6622 | n -= Flength (args[0]); | ||
| 6623 | if (n <= 0) | ||
| 6624 | positions = args[0]; | ||
| 6625 | else | ||
| 6626 | { | ||
| 6627 | args[1] = unencodable_char_position (safe_chars, GPT, GAP_END_ADDR, | ||
| 6628 | pend, n); | ||
| 6629 | positions = Fappend (2, args); | ||
| 6630 | } | ||
| 6631 | } | ||
| 6632 | |||
| 6633 | return (NILP (count) ? Fcar (positions) : positions); | ||
| 6634 | } | ||
| 6635 | |||
| 6636 | |||
| 6501 | Lisp_Object | 6637 | Lisp_Object |
| 6502 | code_convert_region1 (start, end, coding_system, encodep) | 6638 | code_convert_region1 (start, end, coding_system, encodep) |
| 6503 | Lisp_Object start, end, coding_system; | 6639 | Lisp_Object start, end, coding_system; |
| @@ -7189,6 +7325,7 @@ syms_of_coding () | |||
| 7189 | defsubr (&Sdetect_coding_region); | 7325 | defsubr (&Sdetect_coding_region); |
| 7190 | defsubr (&Sdetect_coding_string); | 7326 | defsubr (&Sdetect_coding_string); |
| 7191 | defsubr (&Sfind_coding_systems_region_internal); | 7327 | defsubr (&Sfind_coding_systems_region_internal); |
| 7328 | defsubr (&Sunencodable_char_position); | ||
| 7192 | defsubr (&Sdecode_coding_region); | 7329 | defsubr (&Sdecode_coding_region); |
| 7193 | defsubr (&Sencode_coding_region); | 7330 | defsubr (&Sencode_coding_region); |
| 7194 | defsubr (&Sdecode_coding_string); | 7331 | defsubr (&Sdecode_coding_string); |