diff options
author | hiro <hiro@ee746299-78ed-0310-b773-934348b2243d> | 2005-02-25 10:00:41 +0000 |
---|---|---|
committer | hiro <hiro@ee746299-78ed-0310-b773-934348b2243d> | 2005-02-25 10:00:41 +0000 |
commit | 2a47b946a59bdfe4bfefb136fe0f7475add03bb2 (patch) | |
tree | b61928ab31b3c6c6578f13f6d46fe73a9df5e83e /src/html.c | |
parent | 19a51bbe411d98970bcebcec1e559d60b02899b0 (diff) |
output printing text with locale encoding, and removed broken locale specific HTML entity reference conversion.
git-svn-id: svn://sylpheed.sraoss.jp/sylpheed/trunk@129 ee746299-78ed-0310-b773-934348b2243d
Diffstat (limited to 'src/html.c')
-rw-r--r-- | src/html.c | 244 |
1 files changed, 15 insertions, 229 deletions
@@ -111,201 +111,7 @@ static HTMLSymbol ascii_symbol_list[] = { {"ý", "y'"}, }; -static HTMLSymbol eucjp_symbol_list[] = { - {"¡" , "^!"}, - {"¢" , "\xa1\xf1"}, - {"£" , "\xa1\xf2"}, - {"¥" , "\xa1\xef"}, - {"¦", "|"}, - {"§" , "\xa1\xf8"}, - {"¨" , "\xa1\xaf"}, - {"©" , "(C)"}, - {"«" , "<<"}, - {"®" , "(R)"}, - - {"°" , "\xa1\xeb"}, - {"±", "\xa1\xde"}, - {"²" , "^2"}, - {"³" , "^3"}, - {"´" , "'"}, - {"µ" , "\xa6\xcc"}, - {"¶" , "\xa2\xf9"}, - {"·", "\xa1\xa6"}, - {"¸" , ","}, - {"¹" , "^1"}, - {"»" , ">>"}, - {"¼", "1/4"}, - {"½", "1/2"}, - {"¾", "3/4"}, - {"¿", "^?"}, - - {"À", "A`"}, - {"Á", "A'"}, - {"Â" , "A^"}, - {"Ã", "A~"}, - {"Ä" , "A\xa1\xaf"}, - {"Å" , "A\xa1\xeb"}, - {"Æ" , "AE"}, - {"È", "E`"}, - {"É", "E'"}, - {"Ê" , "E^"}, - {"Ë" , "E\xa1\xaf"}, - {"Ì", "I`"}, - {"Í", "I'"}, - {"Î" , "I^"}, - {"Ï" , "I\xa1\xaf"}, - - {"Ñ", "N~"}, - {"Ò", "O`"}, - {"Ó", "O'"}, - {"Ô" , "O^"}, - {"Õ", "O~"}, - {"Ö" , "O\xa1\xaf"}, - {"×" , "\xa1\xdf"}, - {"Ù", "U`"}, - {"Ú", "U'"}, - {"Û" , "U^"}, - {"Ü" , "U\xa1\xaf"}, - {"Ý", "Y'"}, - - {"à", "a`"}, - {"á", "a'"}, - {"â" , "a^"}, - {"ã", "a~"}, - {"ä" , "a\xa1\xaf"}, - {"å" , "a\xa1\xeb"}, - {"æ" , "ae"}, - {"è", "e`"}, - {"é", "e'"}, - {"ê" , "e^"}, - {"ë" , "e\xa1\xaf"}, - {"ì", "i`"}, - {"í", "i'"}, - {"î" , "i^"}, - {"ï" , "i\xa1\xaf"}, - - {"ð" , "\xa2\xdf"}, - {"ñ", "n~"}, - {"ò", "o`"}, - {"ó", "o'"}, - {"ô" , "o^"}, - {"õ", "o~"}, - {"ö" , "o\xa1\xaf"}, - {"÷", "\xa1\xe0"}, - {"ù", "u`"}, - {"ú", "u'"}, - {"û" , "u^"}, - {"ü" , "u\xa1\xaf"}, - {"ý", "y'"}, - {"ÿ" , "y\xa1\xaf"}, -}; - -static HTMLSymbol latin_symbol_list[] = { - {"¡" , "\xa1"}, - {"¢" , "\xa2"}, - {"£" , "\xa3"}, - {"¤", "\xa4"}, - {"¥" , "\xa5"}, - {"¦", "\xa6"}, - {"§" , "\xa7"}, - {"¨" , "\xa8"}, - {"©" , "\xa9"}, - {"ª" , "\xaa"}, - {"«" , "\xab"}, - {"¬" , "\xac"}, - {"­" , "\xad"}, - {"®" , "\xae"}, - {"¯" , "\xaf"}, - - {"°" , "\xb0"}, - {"±", "\xb1"}, - {"²" , "\xb2"}, - {"³" , "\xb3"}, - {"´" , "\xb4"}, - {"µ" , "\xb5"}, - {"¶" , "\xb6"}, - {"·", "\xb7"}, - {"¸" , "\xb8"}, - {"¹" , "\xb9"}, - {"º" , "\xba"}, - {"»" , "\xbb"}, - {"¼", "\xbc"}, - {"½", "\xbd"}, - {"¾", "\xbe"}, - {"¿", "\xbf"}, - - {"À", "\xc0"}, - {"Á", "\xc1"}, - {"Â" , "\xc2"}, - {"Ã", "\xc3"}, - {"Ä" , "\xc4"}, - {"Å" , "\xc5"}, - {"Æ" , "\xc6"}, - {"Ç", "\xc7"}, - {"È", "\xc8"}, - {"É", "\xc9"}, - {"Ê" , "\xca"}, - {"Ë" , "\xcb"}, - {"Ì", "\xcc"}, - {"Í", "\xcd"}, - {"Î" , "\xce"}, - {"Ï" , "\xcf"}, - - {"Ð" , "\xd0"}, - {"Ñ", "\xd1"}, - {"Ò", "\xd2"}, - {"Ó", "\xd3"}, - {"Ô" , "\xd4"}, - {"Õ", "\xd5"}, - {"Ö" , "\xd6"}, - {"×" , "\xd7"}, - {"Ø", "\xd8"}, - {"Ù", "\xd9"}, - {"Ú", "\xda"}, - {"Û" , "\xdb"}, - {"Ü" , "\xdc"}, - {"Ý", "\xdd"}, - {"Þ" , "\xde"}, - {"ß" , "\xdf"}, - - {"à", "\xe0"}, - {"á", "\xe1"}, - {"â" , "\xe2"}, - {"ã", "\xe3"}, - {"ä" , "\xe4"}, - {"å" , "\xe5"}, - {"æ" , "\xe6"}, - {"ç", "\xe7"}, - {"è", "\xe8"}, - {"é", "\xe9"}, - {"ê" , "\xea"}, - {"ë" , "\xeb"}, - {"ì", "\xec"}, - {"í", "\xed"}, - {"î" , "\xee"}, - {"ï" , "\xef"}, - - {"ð" , "\xf0"}, - {"ñ", "\xf1"}, - {"ò", "\xf2"}, - {"ó", "\xf3"}, - {"ô" , "\xf4"}, - {"õ", "\xf5"}, - {"ö" , "\xf6"}, - {"÷", "\xf7"}, - {"ø", "\xf8"}, - {"ù", "\xf9"}, - {"ú", "\xfa"}, - {"û" , "\xfb"}, - {"ü" , "\xfc"}, - {"ý", "\xfd"}, - {"þ" , "\xfe"}, - {"ÿ" , "\xff"}, -}; - static GHashTable *default_symbol_table; -static GHashTable *eucjp_symbol_table; -static GHashTable *latin_symbol_table; static HTMLState html_read_line (HTMLParser *parser); static void html_append_char (HTMLParser *parser, @@ -354,31 +160,10 @@ HTMLParser *html_parser_new(FILE *fp, CodeConverter *conv) SYMBOL_TABLE_ADD(default_symbol_table, symbol_list); SYMBOL_TABLE_ADD(default_symbol_table, ascii_symbol_list); } - if (!eucjp_symbol_table) { - eucjp_symbol_table = - g_hash_table_new(g_str_hash, g_str_equal); - SYMBOL_TABLE_ADD(eucjp_symbol_table, symbol_list); - SYMBOL_TABLE_ADD(eucjp_symbol_table, eucjp_symbol_list); - } - if (!latin_symbol_table) { - latin_symbol_table = - g_hash_table_new(g_str_hash, g_str_equal); - SYMBOL_TABLE_ADD(latin_symbol_table, symbol_list); - SYMBOL_TABLE_ADD(latin_symbol_table, latin_symbol_list); - } #undef SYMBOL_TABLE_ADD - if (conv->charset == C_ISO_8859_1) - parser->symbol_table = latin_symbol_table; - else if ((conv->charset == C_ISO_2022_JP || - conv->charset == C_ISO_2022_JP_2 || - conv->charset == C_EUC_JP || - conv->charset == C_SHIFT_JIS) && - conv_get_locale_charset() == C_EUC_JP) - parser->symbol_table = eucjp_symbol_table; - else - parser->symbol_table = default_symbol_table; + parser->symbol_table = default_symbol_table; return parser; } @@ -529,7 +314,7 @@ static HTMLTag *html_get_tag(const gchar *str) tag = g_new0(HTMLTag, 1); - for (tmpp = tmp; *tmpp != '\0' && !isspace(*tmpp); tmpp++) + for (tmpp = tmp; *tmpp != '\0' && !g_ascii_isspace(*tmpp); tmpp++) ; if (*tmpp == '\0') { @@ -549,18 +334,20 @@ static HTMLTag *html_get_tag(const gchar *str) gchar *p; gchar quote; - while (isspace(*tmpp)) tmpp++; + while (g_ascii_isspace(*tmpp)) tmpp++; attr_name = tmpp; - while (*tmpp != '\0' && !isspace(*tmpp) && *tmpp != '=') tmpp++; + while (*tmpp != '\0' && !g_ascii_isspace(*tmpp) && + *tmpp != '=') + tmpp++; if (*tmpp != '\0' && *tmpp != '=') { *tmpp++ = '\0'; - while (isspace(*tmpp)) tmpp++; + while (g_ascii_isspace(*tmpp)) tmpp++; } if (*tmpp == '=') { *tmpp++ = '\0'; - while (isspace(*tmpp)) tmpp++; + while (g_ascii_isspace(*tmpp)) tmpp++; if (*tmpp == '"' || *tmpp == '\'') { /* name="value" */ @@ -573,11 +360,11 @@ static HTMLTag *html_get_tag(const gchar *str) } tmpp = p; *tmpp++ = '\0'; - while (isspace(*tmpp)) tmpp++; + while (g_ascii_isspace(*tmpp)) tmpp++; } else { /* name=value */ attr_value = tmpp; - while (*tmpp != '\0' && !isspace(*tmpp)) tmpp++; + while (*tmpp != '\0' && !g_ascii_isspace(*tmpp)) tmpp++; if (*tmpp != '\0') *tmpp++ = '\0'; } @@ -664,7 +451,7 @@ static HTMLState html_parse_tag(HTMLParser *parser) !strcmp(tag->name, "li") || !strcmp(tag->name, "table") || !strcmp(tag->name, "tr") || - (tag->name[0] == 'h' && isdigit((guchar)tag->name[1]))) { + (tag->name[0] == 'h' && g_ascii_isdigit(tag->name[1]))) { if (!parser->newline) { parser->space = FALSE; html_append_char(parser, '\n'); @@ -673,7 +460,7 @@ static HTMLState html_parse_tag(HTMLParser *parser) } else if (!strcmp(tag->name, "/table") || (tag->name[0] == '/' && tag->name[1] == 'h' && - isdigit((guchar)tag->name[1]))) { + g_ascii_isdigit(tag->name[1]))) { if (!parser->empty_line) { parser->space = FALSE; if (!parser->newline) html_append_char(parser, '\n'); @@ -721,13 +508,12 @@ static void html_parse_special(HTMLParser *parser) html_append_str(parser, val, -1); parser->state = HTML_NORMAL; return; - } else if (symbol_name[1] == '#' && isdigit((guchar)symbol_name[2])) { + } else if (symbol_name[1] == '#' && g_ascii_isdigit(symbol_name[2])) { gint ch; + /* TODO: support other entity references */ ch = atoi(symbol_name + 2); - if ((ch > 0 && ch <= 127) || - (ch >= 128 && ch <= 255 && - parser->conv->charset == C_ISO_8859_1)) { + if (g_ascii_isprint(ch)) { html_append_char(parser, ch); parser->state = HTML_NORMAL; return; |