fix code conversions, add conv_iconv_strdup_with_cd(), and use EUC-JP-MS for JIS to UTF-8 conversion if available.

git-svn-id: svn://sylpheed.sraoss.jp/sylpheed/trunk@7 ee746299-78ed-0310-b773-934348b2243d
author: hiro <hiro@ee746299-78ed-0310-b773-934348b2243d> 2005-01-14 08:08:32 +0000
committer: hiro <hiro@ee746299-78ed-0310-b773-934348b2243d> 2005-01-14 08:08:32 +0000
commit: 13cda7cefaab6b0eef7b4718629f8ce31cd9eff0 (patch)
tree: 26a6f3ec89fcaf68b0420db33cf2f3b350eeefc3 /src/codeconv.c
parent: 01109639c07dc4babec7912a8febfe1f989b1b0d (diff)
1 files changed, 57 insertions, 29 deletions
diff --git a/src/codeconv.c b/src/codeconv.c
index 2ebbbee3..1440f80e 100644
--- a/src/codeconv.c
+++ b/src/codeconv.c
@@ -31,9 +31,7 @@
 #  include <locale.h>
 #endif
 
-#if HAVE_ICONV
-#  include <iconv.h>
-#endif
+#include <iconv.h>
 
 #include "intl.h"
 #include "codeconv.h"
@@ -375,9 +373,34 @@ void conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
 
 void conv_jistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
 {
+	static iconv_t cd = (iconv_t)-1;
+	static gboolean iconv_ok = TRUE;
 	gchar *tmpstr;
+	gchar *eucstr;
+
+	Xalloca(eucstr, outlen, return);
+
+	conv_jistoeuc(eucstr, outlen, inbuf);
+
+	if (cd == (iconv_t)-1) {
+		if (!iconv_ok) {
+			strncpy2(outbuf, inbuf, outlen);
+			return;
+		}
+		cd = iconv_open(CS_UTF_8, CS_EUC_JP_MS);
+		if (cd == (iconv_t)-1) {
+			cd = iconv_open(CS_UTF_8, CS_EUC_JP);
+			if (cd == (iconv_t)-1) {
+				g_warning("conv_jistoutf8(): %s\n",
+					  g_strerror(errno));
+				iconv_ok = FALSE;
+				strncpy2(outbuf, inbuf, outlen);
+				return;
+			}
+		}
+	}
 
-	tmpstr = conv_iconv_strdup(inbuf, CS_ISO_2022_JP, CS_UTF_8);
+	tmpstr = conv_iconv_strdup_with_cd(eucstr, cd);
 	if (tmpstr) {
 		strncpy2(outbuf, tmpstr, outlen);
 		g_free(tmpstr);
@@ -864,9 +887,9 @@ CodeConvFunc conv_get_code_conv_func(const gchar *src_charset_str,
 
 	/* auto detection mode */
 	if (!src_charset_str && !dest_charset_str) {
-		//if (src_charset == C_EUC_JP || src_charset == C_SHIFT_JIS)
-		//	return conv_anytodisp;
-		//else
+		if (src_charset == C_EUC_JP || src_charset == C_SHIFT_JIS)
+			return conv_anytodisp;
+		else
 			return conv_noconv;
 	}
 
@@ -934,20 +957,12 @@ gchar *conv_iconv_strdup(const gchar *inbuf,
 			 const gchar *src_code, const gchar *dest_code)
 {
 	iconv_t cd;
-	const gchar *inbuf_p;
 	gchar *outbuf;
-	gchar *outbuf_p;
-	size_t in_size;
-	size_t in_left;
-	size_t out_size;
-	size_t out_left;
-	size_t n_conv;
-	size_t len;
 
 	if (!src_code)
 		src_code = conv_get_outgoing_charset_str();
 	if (!dest_code)
-		dest_code = conv_get_locale_charset_str();
+		dest_code = conv_get_internal_charset_str();
 
 	/* don't convert if current codeset is US-ASCII */
 	if (!strcasecmp(dest_code, CS_US_ASCII))
@@ -961,6 +976,25 @@ gchar *conv_iconv_strdup(const gchar *inbuf,
 	if (cd == (iconv_t)-1)
 		return NULL;
 
+	outbuf = conv_iconv_strdup_with_cd(inbuf, cd);
+
+	iconv_close(cd);
+
+	return outbuf;
+}
+
+gchar *conv_iconv_strdup_with_cd(const gchar *inbuf, iconv_t cd)
+{
+	const gchar *inbuf_p;
+	gchar *outbuf;
+	gchar *outbuf_p;
+	size_t in_size;
+	size_t in_left;
+	size_t out_size;
+	size_t out_left;
+	size_t n_conv;
+	size_t len;
+
 	inbuf_p = inbuf;
 	in_size = strlen(inbuf);
 	in_left = in_size;
@@ -981,6 +1015,7 @@ gchar *conv_iconv_strdup(const gchar *inbuf,
 	while ((n_conv = iconv(cd, (ICONV_CONST gchar **)&inbuf_p, &in_left,
 			       &outbuf_p, &out_left)) == (size_t)-1) {
 		if (EILSEQ == errno) {
+			g_print("iconv(): at %d: %s\n", in_size - in_left, g_strerror(errno));
 			inbuf_p++;
 			in_left--;
 			if (out_left == 0) {
@@ -1016,8 +1051,6 @@ gchar *conv_iconv_strdup(const gchar *inbuf,
 	outbuf = g_realloc(outbuf, len + 1);
 	outbuf[len] = '\0';
 
-	iconv_close(cd);
-
 	return outbuf;
 }
 
@@ -1070,6 +1103,7 @@ static const struct {
 	{C_ISO_2022_JP_3,	CS_ISO_2022_JP_3},
 	{C_EUC_JP,		CS_EUC_JP},
 	{C_EUC_JP,		CS_EUCJP},
+	{C_EUC_JP_MS,		CS_EUC_JP_MS},
 	{C_SHIFT_JIS,		CS_SHIFT_JIS},
 	{C_SHIFT_JIS,		CS_SHIFT__JIS},
 	{C_SHIFT_JIS,		CS_SJIS},
@@ -1459,6 +1493,7 @@ gboolean conv_is_multibyte_encoding(CharSet encoding)
 {
 	switch (encoding) {
 	case C_EUC_JP:
+	case C_EUC_JP_MS:
 	case C_EUC_KR:
 	case C_EUC_TW:
 	case C_EUC_CN:
@@ -1574,6 +1609,8 @@ void conv_encode_header(gchar *dest, gint len, const gchar *src,
 	guchar *destp = dest;
 	gboolean use_base64;
 
+	g_return_if_fail(g_utf8_validate(src, -1, NULL) == TRUE);
+
 	if (MB_CUR_MAX > 1) {
 		use_base64 = TRUE;
 		mimesep_enc = "?B?";
@@ -1582,9 +1619,7 @@ void conv_encode_header(gchar *dest, gint len, const gchar *src,
 		mimesep_enc = "?Q?";
 	}
 
-	cur_encoding = conv_get_locale_charset_str();
-	if (!strcmp(cur_encoding, CS_US_ASCII))
-		cur_encoding = CS_ISO_8859_1;
+	cur_encoding = conv_get_internal_charset_str();
 	out_encoding = conv_get_outgoing_charset_str();
 	if (!strcmp(out_encoding, CS_US_ASCII))
 		out_encoding = CS_ISO_8859_1;
@@ -1646,14 +1681,7 @@ void conv_encode_header(gchar *dest, gint len, const gchar *src,
 				if (addr_field && (*p == '(' || *p == ')'))
 					break;
 
-				if (MB_CUR_MAX > 1) {
-					mb_len = mblen(p, MB_CUR_MAX);
-					if (mb_len < 0) {
-						g_warning("conv_encode_header(): invalid multibyte character encountered\n");
-						mb_len = 1;
-					}
-				} else
-					mb_len = 1;
+				mb_len = g_utf8_skip[*p];
 
 				Xstrndup_a(part_str, srcp, cur_len + mb_len, );
 				out_str = conv_codeset_strdup
author	hiro <hiro@ee746299-78ed-0310-b773-934348b2243d>	2005-01-14 08:08:32 +0000
committer	hiro <hiro@ee746299-78ed-0310-b773-934348b2243d>	2005-01-14 08:08:32 +0000
commit	13cda7cefaab6b0eef7b4718629f8ce31cd9eff0 (patch)
tree	26a6f3ec89fcaf68b0420db33cf2f3b350eeefc3 /src/codeconv.c
parent	01109639c07dc4babec7912a8febfe1f989b1b0d (diff)