From 7fb480447736bd7e7371c53e5def624724028240 Mon Sep 17 00:00:00 2001 From: hiro Date: Thu, 16 Nov 2017 07:52:54 +0000 Subject: supported UTF-16 text attachments on sending. git-svn-id: svn://sylpheed.sraoss.jp/sylpheed/trunk@3573 ee746299-78ed-0310-b773-934348b2243d --- libsylph/codeconv.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++++ libsylph/codeconv.h | 8 +++++- 2 files changed, 80 insertions(+), 1 deletion(-) (limited to 'libsylph') diff --git a/libsylph/codeconv.c b/libsylph/codeconv.c index 8781f446..1ba52ac3 100644 --- a/libsylph/codeconv.c +++ b/libsylph/codeconv.c @@ -1681,6 +1681,9 @@ static const struct { {C_GEORGIAN_PS, CS_GEORGIAN_PS}, {C_TCVN5712_1, CS_TCVN5712_1}, {C_ISO_8859_16, CS_ISO_8859_16}, + {C_UTF_16, CS_UTF_16}, + {C_UTF_16BE, CS_UTF_16BE}, + {C_UTF_16LE, CS_UTF_16LE}, }; static const struct { @@ -2656,6 +2659,7 @@ CharSet conv_check_file_encoding(const gchar *file) CharSet enc; const gchar *enc_str; gboolean is_locale = TRUE, is_utf8 = TRUE; + size_t size; g_return_val_if_fail(file != NULL, C_AUTO); @@ -2669,6 +2673,75 @@ CharSet conv_check_file_encoding(const gchar *file) return C_AUTO; } + /* UTF-16 check */ + if ((size = fread(buf, 2, BUFFSIZE / 2, fp)) > 0) { + CharSet guess_enc = C_AUTO; + + debug_print("conv_check_file_encoding: check first %d bytes of file %s\n", size * 2, file); + + /* BOM check */ + if ((buf[0] & 0xff) == 0xfe && (buf[1] & 0xff) == 0xff) { + debug_print("conv_check_file_encoding: UTF-16 BOM (BE) found\n"); + guess_enc = C_UTF_16; /* UTF-16BE */ + } else if ((buf[0] & 0xff) == 0xff && (buf[1] & 0xff) == 0xfe) { + debug_print("conv_check_file_encoding: UTF-16 BOM (LE) found\n"); + guess_enc = C_UTF_16; /* UTF-16LE */ + } + if (guess_enc != C_AUTO) { + fclose(fp); + return guess_enc; + } + + /* search UTF-16 CR/LF */ + if (memchr(buf, 0x00, size * 2) != NULL) { + gint i; + guchar c1, c2; + + for (i = 0; i < size; i++) { + c1 = buf[i * 2] & 0xff; + c2 = buf[i * 2 + 1] & 0xff; + if (c1 == 0x00 && c2 == 0x0d) { /* UTF-16BE CR */ + i++; + if (i >= size) { + break; + } + c1 = buf[i * 2] & 0xff; + c2 = buf[i * 2 + 1] & 0xff; + if (c1 == 0x00 && c2 == 0x0a) { /* UTF-16BE LF */ + guess_enc = C_UTF_16BE; + break; + } + } else if (c1 == 0x0d && c2 == 0x00) { /* UTF-16LE CR */ + i++; + if (i >= size) { + break; + } + c1 = buf[i * 2] & 0xff; + c2 = buf[i * 2 + 1] & 0xff; + if (c1 == 0x0a && c2 == 0x00) { /* UTF-16LE LF */ + guess_enc = C_UTF_16LE; + break; + } + } else if (c1 == 0x00 && c2 == 0x0a) { /* UTF-16BE LF */ + guess_enc = C_UTF_16BE; + break; + } else if (c1 == 0x0a && c2 == 0x00) { /* UTF-16LE LF */ + guess_enc = C_UTF_16LE; + break; + } + } + + if (guess_enc != C_AUTO) { + debug_print("conv_check_file_encoding: %s detected\n", + conv_get_charset_str(guess_enc)); + fclose(fp); + return guess_enc; + } + } + } + + rewind(fp); + while (fgets(buf, sizeof(buf), fp) != NULL) { gchar *str; gint error = 0; diff --git a/libsylph/codeconv.h b/libsylph/codeconv.h index 121de0da..6f6fc365 100644 --- a/libsylph/codeconv.h +++ b/libsylph/codeconv.h @@ -92,7 +92,10 @@ typedef enum C_WINDOWS_874, C_GEORGIAN_PS, C_TCVN5712_1, - C_ISO_8859_16 + C_ISO_8859_16, + C_UTF_16, + C_UTF_16BE, + C_UTF_16LE } CharSet; typedef enum @@ -180,6 +183,9 @@ struct _CodeConverter #define CS_GEORGIAN_PS "GEORGIAN-PS" #define CS_TCVN5712_1 "TCVN5712-1" #define CS_ISO_8859_16 "ISO-8859-16" +#define CS_UTF_16 "UTF-16" +#define CS_UTF_16BE "UTF-16BE" +#define CS_UTF_16LE "UTF-16LE" #define C_INTERNAL C_UTF_8 #define CS_INTERNAL CS_UTF_8 -- cgit v1.2.3