aboutsummaryrefslogtreecommitdiff
path: root/libsylph
diff options
context:
space:
mode:
authorhiro <hiro@ee746299-78ed-0310-b773-934348b2243d>2005-09-07 07:09:33 +0000
committerhiro <hiro@ee746299-78ed-0310-b773-934348b2243d>2005-09-07 07:09:33 +0000
commite8a89709045c2dcb85098ae9ac515cccf6c9bb2d (patch)
tree83e6d2fbe8fd938fa2e1a33e075f7c8ae14041ff /libsylph
parentf59c735b2a727a1359c485e9568e8c278a05c1ca (diff)
support UTF-8 in guessing Japanese encoding.
git-svn-id: svn://sylpheed.sraoss.jp/sylpheed/trunk@553 ee746299-78ed-0310-b773-934348b2243d
Diffstat (limited to 'libsylph')
-rw-r--r--libsylph/codeconv.c37
1 files changed, 33 insertions, 4 deletions
diff --git a/libsylph/codeconv.c b/libsylph/codeconv.c
index c88b588c..b3be5f5c 100644
--- a/libsylph/codeconv.c
+++ b/libsylph/codeconv.c
@@ -61,6 +61,7 @@ typedef enum
(((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
#define iseucaux(c) \
(((c) & 0xff) == 0x8f)
+
#define issjiskanji1(c) \
((((c) & 0xff) >= 0x81 && ((c) & 0xff) <= 0x9f) || \
(((c) & 0xff) >= 0xe0 && ((c) & 0xff) <= 0xfc))
@@ -70,6 +71,17 @@ typedef enum
#define issjishwkana(c) \
(((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
+/* U+0080 - U+07FF */
+#define isutf8_2_1(c) \
+ (((c) & 0xe0) == 0xc0)
+#define isutf8_2_2(c) \
+ (((c) & 0xc0) == 0x80)
+/* U+0800 - U+FFFF */
+#define isutf8_3_1(c) \
+ (((c) & 0xf0) == 0xe0)
+#define isutf8_3_2(c) \
+ (((c) & 0xc0) == 0x80)
+
#define K_IN() \
if (state != JIS_KANJI) { \
*out++ = ESC; \
@@ -799,19 +811,36 @@ CharSet conv_guess_ja_encoding(const gchar *str)
guessed = C_EUC_JP;
p += 2;
} else if (issjiskanji1(*p) && issjiskanji2(*(p + 1))) {
- if (iseuchwkana1(*p) && iseuchwkana2(*(p + 1)))
- guessed = C_SHIFT_JIS;
- else
- return C_SHIFT_JIS;
+ guessed = C_SHIFT_JIS;
p += 2;
} else if (issjishwkana(*p)) {
guessed = C_SHIFT_JIS;
p++;
} else {
+ if (guessed == C_US_ASCII)
+ guessed = C_AUTO;
p++;
}
}
+ if (guessed != C_US_ASCII) {
+ p = (const guchar *)str;
+
+ while (*p != '\0') {
+ if (isascii(*p)) {
+ p++;
+ } else if (isutf8_3_1(*p) &&
+ isutf8_3_2(*(p + 1)) &&
+ isutf8_3_2(*(p + 2))) {
+ p += 3;
+ } else {
+ return guessed;
+ }
+ }
+
+ return C_UTF_8;
+ }
+
return guessed;
}