From 13cda7cefaab6b0eef7b4718629f8ce31cd9eff0 Mon Sep 17 00:00:00 2001 From: hiro Date: Fri, 14 Jan 2005 08:08:32 +0000 Subject: fix code conversions, add conv_iconv_strdup_with_cd(), and use EUC-JP-MS for JIS to UTF-8 conversion if available. git-svn-id: svn://sylpheed.sraoss.jp/sylpheed/trunk@7 ee746299-78ed-0310-b773-934348b2243d --- src/codeconv.c | 86 ++++++++++++++++++++++++++++++++++++++------------------- src/codeconv.h | 5 ++++ src/compose.c | 27 +++++++----------- src/imap.c | 39 ++++++++++++-------------- src/statusbar.c | 24 ++++------------ src/textview.c | 8 +++--- src/unmime.c | 2 +- src/utils.c | 16 +++++------ 8 files changed, 107 insertions(+), 100 deletions(-) (limited to 'src') diff --git a/src/codeconv.c b/src/codeconv.c index 2ebbbee3..1440f80e 100644 --- a/src/codeconv.c +++ b/src/codeconv.c @@ -31,9 +31,7 @@ # include #endif -#if HAVE_ICONV -# include -#endif +#include #include "intl.h" #include "codeconv.h" @@ -375,9 +373,34 @@ void conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf) void conv_jistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf) { + static iconv_t cd = (iconv_t)-1; + static gboolean iconv_ok = TRUE; gchar *tmpstr; + gchar *eucstr; + + Xalloca(eucstr, outlen, return); + + conv_jistoeuc(eucstr, outlen, inbuf); + + if (cd == (iconv_t)-1) { + if (!iconv_ok) { + strncpy2(outbuf, inbuf, outlen); + return; + } + cd = iconv_open(CS_UTF_8, CS_EUC_JP_MS); + if (cd == (iconv_t)-1) { + cd = iconv_open(CS_UTF_8, CS_EUC_JP); + if (cd == (iconv_t)-1) { + g_warning("conv_jistoutf8(): %s\n", + g_strerror(errno)); + iconv_ok = FALSE; + strncpy2(outbuf, inbuf, outlen); + return; + } + } + } - tmpstr = conv_iconv_strdup(inbuf, CS_ISO_2022_JP, CS_UTF_8); + tmpstr = conv_iconv_strdup_with_cd(eucstr, cd); if (tmpstr) { strncpy2(outbuf, tmpstr, outlen); g_free(tmpstr); @@ -864,9 +887,9 @@ CodeConvFunc conv_get_code_conv_func(const gchar *src_charset_str, /* auto detection mode */ if (!src_charset_str && !dest_charset_str) { - //if (src_charset == C_EUC_JP || src_charset == C_SHIFT_JIS) - // return conv_anytodisp; - //else + if (src_charset == C_EUC_JP || src_charset == C_SHIFT_JIS) + return conv_anytodisp; + else return conv_noconv; } @@ -934,20 +957,12 @@ gchar *conv_iconv_strdup(const gchar *inbuf, const gchar *src_code, const gchar *dest_code) { iconv_t cd; - const gchar *inbuf_p; gchar *outbuf; - gchar *outbuf_p; - size_t in_size; - size_t in_left; - size_t out_size; - size_t out_left; - size_t n_conv; - size_t len; if (!src_code) src_code = conv_get_outgoing_charset_str(); if (!dest_code) - dest_code = conv_get_locale_charset_str(); + dest_code = conv_get_internal_charset_str(); /* don't convert if current codeset is US-ASCII */ if (!strcasecmp(dest_code, CS_US_ASCII)) @@ -961,6 +976,25 @@ gchar *conv_iconv_strdup(const gchar *inbuf, if (cd == (iconv_t)-1) return NULL; + outbuf = conv_iconv_strdup_with_cd(inbuf, cd); + + iconv_close(cd); + + return outbuf; +} + +gchar *conv_iconv_strdup_with_cd(const gchar *inbuf, iconv_t cd) +{ + const gchar *inbuf_p; + gchar *outbuf; + gchar *outbuf_p; + size_t in_size; + size_t in_left; + size_t out_size; + size_t out_left; + size_t n_conv; + size_t len; + inbuf_p = inbuf; in_size = strlen(inbuf); in_left = in_size; @@ -981,6 +1015,7 @@ gchar *conv_iconv_strdup(const gchar *inbuf, while ((n_conv = iconv(cd, (ICONV_CONST gchar **)&inbuf_p, &in_left, &outbuf_p, &out_left)) == (size_t)-1) { if (EILSEQ == errno) { + g_print("iconv(): at %d: %s\n", in_size - in_left, g_strerror(errno)); inbuf_p++; in_left--; if (out_left == 0) { @@ -1016,8 +1051,6 @@ gchar *conv_iconv_strdup(const gchar *inbuf, outbuf = g_realloc(outbuf, len + 1); outbuf[len] = '\0'; - iconv_close(cd); - return outbuf; } @@ -1070,6 +1103,7 @@ static const struct { {C_ISO_2022_JP_3, CS_ISO_2022_JP_3}, {C_EUC_JP, CS_EUC_JP}, {C_EUC_JP, CS_EUCJP}, + {C_EUC_JP_MS, CS_EUC_JP_MS}, {C_SHIFT_JIS, CS_SHIFT_JIS}, {C_SHIFT_JIS, CS_SHIFT__JIS}, {C_SHIFT_JIS, CS_SJIS}, @@ -1459,6 +1493,7 @@ gboolean conv_is_multibyte_encoding(CharSet encoding) { switch (encoding) { case C_EUC_JP: + case C_EUC_JP_MS: case C_EUC_KR: case C_EUC_TW: case C_EUC_CN: @@ -1574,6 +1609,8 @@ void conv_encode_header(gchar *dest, gint len, const gchar *src, guchar *destp = dest; gboolean use_base64; + g_return_if_fail(g_utf8_validate(src, -1, NULL) == TRUE); + if (MB_CUR_MAX > 1) { use_base64 = TRUE; mimesep_enc = "?B?"; @@ -1582,9 +1619,7 @@ void conv_encode_header(gchar *dest, gint len, const gchar *src, mimesep_enc = "?Q?"; } - cur_encoding = conv_get_locale_charset_str(); - if (!strcmp(cur_encoding, CS_US_ASCII)) - cur_encoding = CS_ISO_8859_1; + cur_encoding = conv_get_internal_charset_str(); out_encoding = conv_get_outgoing_charset_str(); if (!strcmp(out_encoding, CS_US_ASCII)) out_encoding = CS_ISO_8859_1; @@ -1646,14 +1681,7 @@ void conv_encode_header(gchar *dest, gint len, const gchar *src, if (addr_field && (*p == '(' || *p == ')')) break; - if (MB_CUR_MAX > 1) { - mb_len = mblen(p, MB_CUR_MAX); - if (mb_len < 0) { - g_warning("conv_encode_header(): invalid multibyte character encountered\n"); - mb_len = 1; - } - } else - mb_len = 1; + mb_len = g_utf8_skip[*p]; Xstrndup_a(part_str, srcp, cur_len + mb_len, ); out_str = conv_codeset_strdup diff --git a/src/codeconv.h b/src/codeconv.h index 6a498af6..71e0a20b 100644 --- a/src/codeconv.h +++ b/src/codeconv.h @@ -25,6 +25,7 @@ #endif #include +#include typedef struct _CodeConverter CodeConverter; @@ -74,6 +75,7 @@ typedef enum C_ISO_2022_JP_2, C_ISO_2022_JP_3, C_EUC_JP, + C_EUC_JP_MS, C_SHIFT_JIS, C_ISO_2022_KR, C_EUC_KR, @@ -145,6 +147,7 @@ struct _CodeConverter #define CS_ISO_2022_JP_3 "ISO-2022-JP-3" #define CS_EUC_JP "EUC-JP" #define CS_EUCJP "EUCJP" +#define CS_EUC_JP_MS "EUC-JP-MS" #define CS_SHIFT_JIS "Shift_JIS" #define CS_SHIFT__JIS "SHIFT-JIS" #define CS_SJIS "SJIS" @@ -208,6 +211,8 @@ CodeConvFunc conv_get_code_conv_func (const gchar *src_charset_str, gchar *conv_iconv_strdup (const gchar *inbuf, const gchar *src_code, const gchar *dest_code); +gchar *conv_iconv_strdup_with_cd (const gchar *inbuf, + iconv_t cd); const gchar *conv_get_charset_str (CharSet charset); CharSet conv_get_charset_from_str (const gchar *charset); diff --git a/src/compose.c b/src/compose.c index 2cfa22cd..5a8cce4d 100644 --- a/src/compose.c +++ b/src/compose.c @@ -1559,7 +1559,9 @@ static gchar *compose_get_signature_str(Compose *compose) if (sig_str) { utf8_sig_str = conv_codeset_strdup - (sig_str, conv_get_locale_charset_str(), CS_UTF_8); + (sig_str, + conv_get_locale_charset_str(), + conv_get_internal_charset_str()); g_free(sig_str); } @@ -1593,7 +1595,8 @@ static void compose_insert_file(Compose *compose, const gchar *file) while (fgets(buf, sizeof(buf), fp) != NULL) { gchar *str; - str = conv_codeset_strdup(buf, cur_encoding, CS_UTF_8); + str = conv_codeset_strdup + (buf, cur_encoding, conv_get_internal_charset_str()); if (!str) continue; /* strip if DOS/Windows file, @@ -2755,7 +2758,7 @@ static gint compose_write_to_file(Compose *compose, const gchar *file, encoding = ENC_BASE64; #endif - src_codeset = CS_UTF_8; + src_codeset = conv_get_internal_charset_str(); debug_print("src encoding = %s, out encoding = %s, transfer encoding = %s\n", src_codeset, out_codeset, procmime_get_encoding_str(encoding)); @@ -2942,8 +2945,9 @@ static gint compose_write_body_to_file(Compose *compose, const gchar *file) gtk_text_buffer_get_end_iter(buffer, &end); tmp = gtk_text_buffer_get_text(buffer, &start, &end, FALSE); - chars = conv_codeset_strdup - (tmp, CS_UTF_8, conv_get_locale_charset_str()); + chars = conv_codeset_strdup(tmp, + conv_get_internal_charset_str(), + conv_get_locale_charset_str()); g_free(tmp); @@ -3694,24 +3698,13 @@ static gint compose_redirect_write_headers(Compose *compose, FILE *fp) static void compose_convert_header(gchar *dest, gint len, gchar *src, gint header_len, gboolean addr_field) { - gchar *str; - const gchar *cur_encoding; - g_return_if_fail(src != NULL); g_return_if_fail(dest != NULL); if (len < 1) return; g_strchomp(src); - -#warning FIXME_GTK2 redundant code conversion - cur_encoding = conv_get_locale_charset_str(); - if (!strcmp(cur_encoding, CS_US_ASCII)) - cur_encoding = CS_ISO_8859_1; - str = conv_codeset_strdup(src, CS_UTF_8, cur_encoding); - if (str) - conv_encode_header(dest, len, str, header_len, addr_field); - g_free(str); + conv_encode_header(dest, len, src, header_len, addr_field); } static void compose_generate_msgid(Compose *compose, gchar *buf, gint len) diff --git a/src/imap.c b/src/imap.c index c454e855..cbd430ad 100644 --- a/src/imap.c +++ b/src/imap.c @@ -350,8 +350,8 @@ static gchar *search_array_str (GPtrArray *array, static void imap_path_separator_subst (gchar *str, gchar separator); -static gchar *imap_modified_utf7_to_locale (const gchar *mutf7_str); -static gchar *imap_locale_to_modified_utf7 (const gchar *from); +static gchar *imap_modified_utf7_to_utf8 (const gchar *mutf7_str); +static gchar *imap_utf8_to_modified_utf7 (const gchar *from); static GSList *imap_get_seq_set_from_msglist (GSList *msglist); static void imap_seq_set_free (GSList *seq_list); @@ -1765,8 +1765,8 @@ static GSList *imap_parse_list(IMAPSession *session, const gchar *real_path, name = g_basename(buf); if (name[0] == '.') continue; - loc_name = imap_modified_utf7_to_locale(name); - loc_path = imap_modified_utf7_to_locale(buf); + loc_name = imap_modified_utf7_to_utf8(name); + loc_path = imap_modified_utf7_to_utf8(buf); new_item = folder_item_new(loc_name, loc_path); if (strcasestr(flags, "\\Noinferiors") != NULL) new_item->no_sub = TRUE; @@ -1889,7 +1889,7 @@ static FolderItem *imap_create_folder(Folder *folder, FolderItem *parent, /* keep trailing directory separator to create a folder that contains sub folder */ - imap_path = imap_locale_to_modified_utf7(dirpath); + imap_path = imap_utf8_to_modified_utf7(dirpath); strtailchomp(dirpath, '/'); Xstrdup_a(new_name, name, {g_free(dirpath); return NULL;}); strtailchomp(new_name, '/'); @@ -1989,7 +1989,7 @@ static gint imap_rename_folder(Folder *folder, FolderItem *item, } else newpath = g_strdup(name); - real_newpath = imap_locale_to_modified_utf7(newpath); + real_newpath = imap_utf8_to_modified_utf7(newpath); imap_path_separator_subst(real_newpath, separator); ok = imap_cmd_rename(session, real_oldpath, real_newpath); @@ -2432,7 +2432,7 @@ static gchar *imap_get_real_path(IMAPFolder *folder, const gchar *path) g_return_val_if_fail(folder != NULL, NULL); g_return_val_if_fail(path != NULL, NULL); - real_path = imap_locale_to_modified_utf7(path); + real_path = imap_utf8_to_modified_utf7(path); separator = imap_get_path_separator(folder, path); imap_path_separator_subst(real_path, separator); @@ -3645,7 +3645,7 @@ static void imap_path_separator_subst(gchar *str, gchar separator) } } -static gchar *imap_modified_utf7_to_locale(const gchar *mutf7_str) +static gchar *imap_modified_utf7_to_utf8(const gchar *mutf7_str) { static iconv_t cd = (iconv_t)-1; static gboolean iconv_ok = TRUE; @@ -3660,15 +3660,16 @@ static gchar *imap_modified_utf7_to_locale(const gchar *mutf7_str) if (!iconv_ok) return g_strdup(mutf7_str); if (cd == (iconv_t)-1) { - cd = iconv_open(conv_get_locale_charset_str(), "UTF-7"); + cd = iconv_open(conv_get_internal_charset_str(), CS_UTF_7); if (cd == (iconv_t)-1) { g_warning("iconv cannot convert UTF-7 to %s\n", - conv_get_locale_charset_str()); + conv_get_internal_charset_str()); iconv_ok = FALSE; return g_strdup(mutf7_str); } } + /* modified UTF-7 to normal UTF-7 conversion */ norm_utf7 = g_string_new(NULL); for (p = mutf7_str; *p != '\0'; p++) { @@ -3715,7 +3716,7 @@ static gchar *imap_modified_utf7_to_locale(const gchar *mutf7_str) return to_str; } -static gchar *imap_locale_to_modified_utf7(const gchar *from) +static gchar *imap_utf8_to_modified_utf7(const gchar *from) { static iconv_t cd = (iconv_t)-1; static gboolean iconv_ok = TRUE; @@ -3728,15 +3729,16 @@ static gchar *imap_locale_to_modified_utf7(const gchar *from) if (!iconv_ok) return g_strdup(from); if (cd == (iconv_t)-1) { - cd = iconv_open("UTF-7", conv_get_locale_charset_str()); + cd = iconv_open(CS_UTF_7, conv_get_internal_charset_str()); if (cd == (iconv_t)-1) { g_warning(_("iconv cannot convert %s to UTF-7\n"), - conv_get_locale_charset_str()); + conv_get_internal_charset_str()); iconv_ok = FALSE; return g_strdup(from); } } + /* UTF-8 to normal UTF-7 conversion */ Xstrdup_a(from_tmp, from, return g_strdup(from)); from_len = strlen(from); norm_utf7_len = from_len * 5; @@ -3760,18 +3762,13 @@ static gchar *imap_locale_to_modified_utf7(const gchar *from) from_tmp++; from_len--; } else { - size_t mb_len = 0, conv_len = 0; + size_t conv_len = 0; /* unprintable char: convert to UTF-7 */ p = from_tmp; while (!IS_PRINT(*(guchar *)p) && conv_len < from_len) { - mb_len = mblen(p, MB_LEN_MAX); - if (mb_len <= 0) { - g_warning("wrong multibyte sequence\n"); - return g_strdup(from); - } - conv_len += mb_len; - p += mb_len; + conv_len += g_utf8_skip[*(guchar *)p]; + p += g_utf8_skip[*(guchar *)p]; } from_len -= conv_len; diff --git a/src/statusbar.c b/src/statusbar.c index 38d1377f..8ddc9508 100644 --- a/src/statusbar.c +++ b/src/statusbar.c @@ -49,26 +49,12 @@ void statusbar_puts(GtkStatusbar *statusbar, const gchar *str) { gint cid; gchar *buf; + gchar *tmp; - buf = g_strdup(str); - strretchomp(buf); - if (strlen(buf) > 76) { - wchar_t *wbuf; - - wbuf = strdup_mbstowcs(buf); - - if (wcslen(wbuf) > 60) { - gchar *tmp; - - g_free(buf); - wbuf[60] = (wchar_t)0; - tmp = strdup_wcstombs(wbuf); - buf = g_strconcat(tmp, "...", NULL); - g_free(tmp); - } - - g_free(wbuf); - } + tmp = g_strdup(str); + strretchomp(tmp); + buf = trim_string(tmp, 76); + g_free(tmp); cid = gtk_statusbar_get_context_id(statusbar, "Standard Output"); gtk_statusbar_pop(statusbar, cid); diff --git a/src/textview.c b/src/textview.c index 2b240b6b..da860bfe 100644 --- a/src/textview.c +++ b/src/textview.c @@ -1269,14 +1269,14 @@ void textview_scroll_one_line(TextView *textview, gboolean up) if (!up) { upper = vadj->upper - vadj->page_size; if (vadj->value < upper) { - vadj->value += vadj->step_increment * 4; + vadj->value += vadj->step_increment; vadj->value = MIN(vadj->value, upper); g_signal_emit_by_name(G_OBJECT(vadj), "value_changed", 0); } } else { if (vadj->value > 0.0) { - vadj->value -= vadj->step_increment * 4; + vadj->value -= vadj->step_increment; vadj->value = MAX(vadj->value, 0.0); g_signal_emit_by_name(G_OBJECT(vadj), "value_changed", 0); @@ -1366,7 +1366,7 @@ static void textview_smooth_scroll_one_line(TextView *textview, gboolean up) upper = vadj->upper - vadj->page_size; if (vadj->value < upper) { old_value = vadj->value; - last_value = vadj->value + vadj->step_increment * 4; + last_value = vadj->value + vadj->step_increment; last_value = MIN(last_value, upper); textview_smooth_scroll_do(textview, old_value, @@ -1376,7 +1376,7 @@ static void textview_smooth_scroll_one_line(TextView *textview, gboolean up) } else { if (vadj->value > 0.0) { old_value = vadj->value; - last_value = vadj->value - vadj->step_increment * 4; + last_value = vadj->value - vadj->step_increment; last_value = MAX(last_value, 0.0); textview_smooth_scroll_do(textview, old_value, diff --git a/src/unmime.c b/src/unmime.c index e7dce098..bb9e2822 100644 --- a/src/unmime.c +++ b/src/unmime.c @@ -112,7 +112,7 @@ void unmime_header(gchar *out, const gchar *str) continue; } - /* convert to locale encoding */ + /* convert to UTF-8 */ conv_str = conv_codeset_strdup(decoded_text, charset, NULL); if (conv_str) { len = strlen(conv_str); diff --git a/src/utils.c b/src/utils.c index 493c9597..693f86ea 100644 --- a/src/utils.c +++ b/src/utils.c @@ -518,11 +518,9 @@ gint get_mbs_len(const gchar *s) return -1; while (*p != '\0') { - mb_len = mblen(p, MB_LEN_MAX); + mb_len = g_utf8_skip[*(guchar *)p]; if (mb_len == 0) break; - else if (mb_len < 0) - return -1; else len++; @@ -1340,13 +1338,13 @@ gchar *trim_string(const gchar *str, gint len) if (!str) return NULL; if (strlen(str) <= len) return g_strdup(str); + if (g_utf8_validate(str, -1, NULL) == FALSE) + return g_strdup(str); while (*p != '\0') { - mb_len = mblen(p, MB_LEN_MAX); + mb_len = g_utf8_skip[*(guchar *)p]; if (mb_len == 0) break; - else if (mb_len < 0) - return g_strdup(str); else if (new_len + mb_len > len) break; @@ -1367,13 +1365,13 @@ gchar *trim_string_before(const gchar *str, gint len) if (!str) return NULL; if ((new_len = strlen(str)) <= len) return g_strdup(str); + if (g_utf8_validate(str, -1, NULL) == FALSE) + return g_strdup(str); while (*p != '\0') { - mb_len = mblen(p, MB_LEN_MAX); + mb_len = g_utf8_skip[*(guchar *)p]; if (mb_len == 0) break; - else if (mb_len < 0) - return g_strdup(str); new_len -= mb_len; p += mb_len; -- cgit v1.2.3