diff --git a/charset/localenc.c b/charset/localenc.c index 5eabeaf1..a24126d3 100644 --- a/charset/localenc.c +++ b/charset/localenc.c @@ -21,6 +21,7 @@ static const struct { int return_in_enum; /* enumeration misses some charsets */ } localencs[] = { { "", CS_NONE, 0 }, + { "UTF-8", CS_UTF8, 1 }, { "ISO-8859-1", CS_ISO8859_1, 1 }, { "ISO-8859-1 with X11 line drawing", CS_ISO8859_1_X11, 0 }, { "ISO-8859-2", CS_ISO8859_2, 1 }, @@ -75,7 +76,6 @@ static const struct { { "VISCII", CS_VISCII, 1 }, { "HP ROMAN8", CS_HP_ROMAN8, 1 }, { "DEC MCS", CS_DEC_MCS, 1 }, - { "UTF-8", CS_UTF8, 1 }, }; const char *charset_to_localenc(int charset) diff --git a/unix/uxucs.c b/unix/uxucs.c index 9d8242f8..8cd300d3 100644 --- a/unix/uxucs.c +++ b/unix/uxucs.c @@ -253,17 +253,19 @@ const char *cp_name(int codepage) const char *cp_enumerate(int index) { int charset; - if (index == 0) - return "Use font encoding"; - charset = charset_localenc_nth(index-1); - if (charset == CS_NONE) + charset = charset_localenc_nth(index); + if (charset == CS_NONE) { + /* "Use font encoding" comes after all the named charsets */ + if (charset_localenc_nth(index-1) != CS_NONE) + return "Use font encoding"; return NULL; + } return charset_to_localenc(charset); } int decode_codepage(char *cp_name) { if (!*cp_name) - return CS_NONE; /* use font encoding */ + return CS_UTF8; return charset_from_localenc(cp_name); } diff --git a/windows/winucs.c b/windows/winucs.c index 2ce1693b..cd509835 100644 --- a/windows/winucs.c +++ b/windows/winucs.c @@ -390,6 +390,8 @@ struct cp_list_item { }; static const struct cp_list_item cp_list[] = { + {"UTF-8", CP_UTF8}, + {"ISO-8859-1:1998 (Latin-1, West Europe)", 0, 96, iso_8859_1}, {"ISO-8859-2:1999 (Latin-2, East Europe)", 0, 96, iso_8859_2}, {"ISO-8859-3:1999 (Latin-3, South Europe)", 0, 96, iso_8859_3}, @@ -406,8 +408,6 @@ static const struct cp_list_item cp_list[] = { {"ISO-8859-15:1999 (Latin-9, \"euro\")", 0, 96, iso_8859_15}, {"ISO-8859-16:2001 (Latin-10, Balkan)", 0, 96, iso_8859_16}, - {"UTF-8", CP_UTF8}, - {"KOI8-U", 0, 128, koi8_u}, {"KOI8-R", 20866}, {"HP-ROMAN8", 0, 96, roman8}, @@ -1016,48 +1016,8 @@ int decode_codepage(char *cp_name) int codepage = -1; CPINFO cpinfo; - if (!*cp_name) { - /* - * Here we select a plausible default code page based on - * the locale the user is in. We wish to select an ISO code - * page or appropriate local default _rather_ than go with - * the Win125* series, because it's more important to have - * CSI and friends enabled by default than the ghastly - * Windows extra quote characters, and because it's more - * likely the user is connecting to a remote server that - * does something Unixy or VMSy and hence standards- - * compliant than that they're connecting back to a Windows - * box using horrible nonstandard charsets. - * - * Accordingly, Robert de Bath suggests a method for - * picking a default character set that runs as follows: - * first call GetACP to get the system's ANSI code page - * identifier, and translate as follows: - * - * 1250 -> ISO 8859-2 - * 1251 -> KOI8-U - * 1252 -> ISO 8859-1 - * 1253 -> ISO 8859-7 - * 1254 -> ISO 8859-9 - * 1255 -> ISO 8859-8 - * 1256 -> ISO 8859-6 - * 1257 -> ISO 8859-13 (changed from 8859-4 on advice of a Lithuanian) - * - * and for anything else, choose direct-to-font. - */ - int cp = GetACP(); - switch (cp) { - case 1250: cp_name = "ISO-8859-2"; break; - case 1251: cp_name = "KOI8-U"; break; - case 1252: cp_name = "ISO-8859-1"; break; - case 1253: cp_name = "ISO-8859-7"; break; - case 1254: cp_name = "ISO-8859-9"; break; - case 1255: cp_name = "ISO-8859-8"; break; - case 1256: cp_name = "ISO-8859-6"; break; - case 1257: cp_name = "ISO-8859-13"; break; - /* default: leave it blank, which will select -1, direct->font */ - } - } + if (!*cp_name) + return CP_UTF8; /* default */ if (cp_name && *cp_name) for (cpi = cp_list; cpi->name; cpi++) {