diff options
| author | Andreas Schwab | 2011-03-15 19:38:57 +0100 |
|---|---|---|
| committer | Andreas Schwab | 2011-03-15 19:38:57 +0100 |
| commit | 0e48bb227a5b9cdabeb845422de33d62ccb1edc5 (patch) | |
| tree | 1555e62e7146144b2c5dc344e55e7e1fb570ec7f /src/coding.c | |
| parent | 0adf561883e07549e657a39d0a4a95cafa4d04fd (diff) | |
| download | emacs-0e48bb227a5b9cdabeb845422de33d62ccb1edc5.tar.gz emacs-0e48bb227a5b9cdabeb845422de33d62ccb1edc5.zip | |
* src/coding.c (detect_coding_iso_2022): Reorganize code to clarify
structure.
Diffstat (limited to 'src/coding.c')
| -rw-r--r-- | src/coding.c | 159 |
1 files changed, 80 insertions, 79 deletions
diff --git a/src/coding.c b/src/coding.c index 9a6a4484e50..0c2836c19f6 100644 --- a/src/coding.c +++ b/src/coding.c | |||
| @@ -2954,12 +2954,7 @@ detect_coding_iso_2022 (struct coding_system *coding, | |||
| 2954 | const unsigned char *src_end = coding->source + coding->src_bytes; | 2954 | const unsigned char *src_end = coding->source + coding->src_bytes; |
| 2955 | int multibytep = coding->src_multibyte; | 2955 | int multibytep = coding->src_multibyte; |
| 2956 | int single_shifting = 0; | 2956 | int single_shifting = 0; |
| 2957 | 2957 | int id; | |
| 2958 | /* FIXME: Does ID need to be initialized here? The "End of composition" | ||
| 2959 | code below does not initialize ID even though ID is used | ||
| 2960 | afterwards, and perhaps that is a bug. */ | ||
| 2961 | int id = 0; | ||
| 2962 | |||
| 2963 | int c, c1; | 2958 | int c, c1; |
| 2964 | int consumed_chars = 0; | 2959 | int consumed_chars = 0; |
| 2965 | int i; | 2960 | int i; |
| @@ -2999,40 +2994,11 @@ detect_coding_iso_2022 (struct coding_system *coding, | |||
| 2999 | break; | 2994 | break; |
| 3000 | single_shifting = 0; | 2995 | single_shifting = 0; |
| 3001 | ONE_MORE_BYTE (c); | 2996 | ONE_MORE_BYTE (c); |
| 3002 | if (c >= '(' && c <= '/') | 2997 | if (c == 'N' || c == 'O') |
| 3003 | { | ||
| 3004 | /* Designation sequence for a charset of dimension 1. */ | ||
| 3005 | ONE_MORE_BYTE (c1); | ||
| 3006 | if (c1 < ' ' || c1 >= 0x80 | ||
| 3007 | || (id = iso_charset_table[0][c >= ','][c1]) < 0) | ||
| 3008 | /* Invalid designation sequence. Just ignore. */ | ||
| 3009 | break; | ||
| 3010 | } | ||
| 3011 | else if (c == '$') | ||
| 3012 | { | ||
| 3013 | /* Designation sequence for a charset of dimension 2. */ | ||
| 3014 | ONE_MORE_BYTE (c); | ||
| 3015 | if (c >= '@' && c <= 'B') | ||
| 3016 | /* Designation for JISX0208.1978, GB2312, or JISX0208. */ | ||
| 3017 | id = iso_charset_table[1][0][c]; | ||
| 3018 | else if (c >= '(' && c <= '/') | ||
| 3019 | { | ||
| 3020 | ONE_MORE_BYTE (c1); | ||
| 3021 | if (c1 < ' ' || c1 >= 0x80 | ||
| 3022 | || (id = iso_charset_table[1][c >= ','][c1]) < 0) | ||
| 3023 | /* Invalid designation sequence. Just ignore. */ | ||
| 3024 | break; | ||
| 3025 | } | ||
| 3026 | else | ||
| 3027 | /* Invalid designation sequence. Just ignore it. */ | ||
| 3028 | break; | ||
| 3029 | } | ||
| 3030 | else if (c == 'N' || c == 'O') | ||
| 3031 | { | 2998 | { |
| 3032 | /* ESC <Fe> for SS2 or SS3. */ | 2999 | /* ESC <Fe> for SS2 or SS3. */ |
| 3033 | single_shifting = 1; | 3000 | single_shifting = 1; |
| 3034 | rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT; | 3001 | rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT; |
| 3035 | break; | ||
| 3036 | } | 3002 | } |
| 3037 | else if (c == '1') | 3003 | else if (c == '1') |
| 3038 | { | 3004 | { |
| @@ -3048,36 +3014,66 @@ detect_coding_iso_2022 (struct coding_system *coding, | |||
| 3048 | { | 3014 | { |
| 3049 | /* ESC <Fp> for start/end composition. */ | 3015 | /* ESC <Fp> for start/end composition. */ |
| 3050 | composition_count = 0; | 3016 | composition_count = 0; |
| 3051 | break; | ||
| 3052 | } | 3017 | } |
| 3053 | else | 3018 | else |
| 3054 | { | 3019 | { |
| 3055 | /* Invalid escape sequence. Just ignore it. */ | 3020 | if (c >= '(' && c <= '/') |
| 3056 | break; | 3021 | { |
| 3057 | } | 3022 | /* Designation sequence for a charset of dimension 1. */ |
| 3023 | ONE_MORE_BYTE (c1); | ||
| 3024 | if (c1 < ' ' || c1 >= 0x80 | ||
| 3025 | || (id = iso_charset_table[0][c >= ','][c1]) < 0) | ||
| 3026 | /* Invalid designation sequence. Just ignore. */ | ||
| 3027 | break; | ||
| 3028 | } | ||
| 3029 | else if (c == '$') | ||
| 3030 | { | ||
| 3031 | /* Designation sequence for a charset of dimension 2. */ | ||
| 3032 | ONE_MORE_BYTE (c); | ||
| 3033 | if (c >= '@' && c <= 'B') | ||
| 3034 | /* Designation for JISX0208.1978, GB2312, or JISX0208. */ | ||
| 3035 | id = iso_charset_table[1][0][c]; | ||
| 3036 | else if (c >= '(' && c <= '/') | ||
| 3037 | { | ||
| 3038 | ONE_MORE_BYTE (c1); | ||
| 3039 | if (c1 < ' ' || c1 >= 0x80 | ||
| 3040 | || (id = iso_charset_table[1][c >= ','][c1]) < 0) | ||
| 3041 | /* Invalid designation sequence. Just ignore. */ | ||
| 3042 | break; | ||
| 3043 | } | ||
| 3044 | else | ||
| 3045 | /* Invalid designation sequence. Just ignore it. */ | ||
| 3046 | break; | ||
| 3047 | } | ||
| 3048 | else | ||
| 3049 | { | ||
| 3050 | /* Invalid escape sequence. Just ignore it. */ | ||
| 3051 | break; | ||
| 3052 | } | ||
| 3058 | 3053 | ||
| 3059 | /* We found a valid designation sequence for CHARSET. */ | 3054 | /* We found a valid designation sequence for CHARSET. */ |
| 3060 | rejected |= CATEGORY_MASK_ISO_8BIT; | 3055 | rejected |= CATEGORY_MASK_ISO_8BIT; |
| 3061 | if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7], | 3056 | if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7], |
| 3062 | id)) | 3057 | id)) |
| 3063 | found |= CATEGORY_MASK_ISO_7; | 3058 | found |= CATEGORY_MASK_ISO_7; |
| 3064 | else | 3059 | else |
| 3065 | rejected |= CATEGORY_MASK_ISO_7; | 3060 | rejected |= CATEGORY_MASK_ISO_7; |
| 3066 | if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight], | 3061 | if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight], |
| 3067 | id)) | 3062 | id)) |
| 3068 | found |= CATEGORY_MASK_ISO_7_TIGHT; | 3063 | found |= CATEGORY_MASK_ISO_7_TIGHT; |
| 3069 | else | 3064 | else |
| 3070 | rejected |= CATEGORY_MASK_ISO_7_TIGHT; | 3065 | rejected |= CATEGORY_MASK_ISO_7_TIGHT; |
| 3071 | if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else], | 3066 | if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else], |
| 3072 | id)) | 3067 | id)) |
| 3073 | found |= CATEGORY_MASK_ISO_7_ELSE; | 3068 | found |= CATEGORY_MASK_ISO_7_ELSE; |
| 3074 | else | 3069 | else |
| 3075 | rejected |= CATEGORY_MASK_ISO_7_ELSE; | 3070 | rejected |= CATEGORY_MASK_ISO_7_ELSE; |
| 3076 | if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else], | 3071 | if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else], |
| 3077 | id)) | 3072 | id)) |
| 3078 | found |= CATEGORY_MASK_ISO_8_ELSE; | 3073 | found |= CATEGORY_MASK_ISO_8_ELSE; |
| 3079 | else | 3074 | else |
| 3080 | rejected |= CATEGORY_MASK_ISO_8_ELSE; | 3075 | rejected |= CATEGORY_MASK_ISO_8_ELSE; |
| 3076 | } | ||
| 3081 | break; | 3077 | break; |
| 3082 | 3078 | ||
| 3083 | case ISO_CODE_SO: | 3079 | case ISO_CODE_SO: |
| @@ -3105,13 +3101,32 @@ detect_coding_iso_2022 (struct coding_system *coding, | |||
| 3105 | rejected |= CATEGORY_MASK_ISO_7BIT; | 3101 | rejected |= CATEGORY_MASK_ISO_7BIT; |
| 3106 | if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1]) | 3102 | if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1]) |
| 3107 | & CODING_ISO_FLAG_SINGLE_SHIFT) | 3103 | & CODING_ISO_FLAG_SINGLE_SHIFT) |
| 3108 | found |= CATEGORY_MASK_ISO_8_1, single_shifting = 1; | 3104 | { |
| 3105 | found |= CATEGORY_MASK_ISO_8_1; | ||
| 3106 | single_shifting = 1; | ||
| 3107 | } | ||
| 3109 | if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2]) | 3108 | if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2]) |
| 3110 | & CODING_ISO_FLAG_SINGLE_SHIFT) | 3109 | & CODING_ISO_FLAG_SINGLE_SHIFT) |
| 3111 | found |= CATEGORY_MASK_ISO_8_2, single_shifting = 1; | 3110 | { |
| 3111 | found |= CATEGORY_MASK_ISO_8_2; | ||
| 3112 | single_shifting = 1; | ||
| 3113 | } | ||
| 3112 | if (single_shifting) | 3114 | if (single_shifting) |
| 3113 | break; | 3115 | break; |
| 3114 | goto check_extra_latin; | 3116 | check_extra_latin: |
| 3117 | if (! VECTORP (Vlatin_extra_code_table) | ||
| 3118 | || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c])) | ||
| 3119 | { | ||
| 3120 | rejected = CATEGORY_MASK_ISO; | ||
| 3121 | break; | ||
| 3122 | } | ||
| 3123 | if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1]) | ||
| 3124 | & CODING_ISO_FLAG_LATIN_EXTRA) | ||
| 3125 | found |= CATEGORY_MASK_ISO_8_1; | ||
| 3126 | else | ||
| 3127 | rejected |= CATEGORY_MASK_ISO_8_1; | ||
| 3128 | rejected |= CATEGORY_MASK_ISO_8_2; | ||
| 3129 | break; | ||
| 3115 | 3130 | ||
| 3116 | default: | 3131 | default: |
| 3117 | if (c < 0) | 3132 | if (c < 0) |
| @@ -3162,20 +3177,6 @@ detect_coding_iso_2022 (struct coding_system *coding, | |||
| 3162 | } | 3177 | } |
| 3163 | break; | 3178 | break; |
| 3164 | } | 3179 | } |
| 3165 | check_extra_latin: | ||
| 3166 | single_shifting = 0; | ||
| 3167 | if (! VECTORP (Vlatin_extra_code_table) | ||
| 3168 | || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c])) | ||
| 3169 | { | ||
| 3170 | rejected = CATEGORY_MASK_ISO; | ||
| 3171 | break; | ||
| 3172 | } | ||
| 3173 | if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1]) | ||
| 3174 | & CODING_ISO_FLAG_LATIN_EXTRA) | ||
| 3175 | found |= CATEGORY_MASK_ISO_8_1; | ||
| 3176 | else | ||
| 3177 | rejected |= CATEGORY_MASK_ISO_8_1; | ||
| 3178 | rejected |= CATEGORY_MASK_ISO_8_2; | ||
| 3179 | } | 3180 | } |
| 3180 | } | 3181 | } |
| 3181 | detect_info->rejected |= CATEGORY_MASK_ISO; | 3182 | detect_info->rejected |= CATEGORY_MASK_ISO; |