diff --git a/CMakeLists.txt b/CMakeLists.txt index e0a7f867..314b7f6a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -175,4 +175,13 @@ foreach(subdir ${platform} ${extra_dirs}) add_subdirectory(${subdir}) endforeach() +# Nasty bodge: we'd like to run this command inside unix/CMakeLists, +# adding the 'charset' library to everything that links with utils. +# But that wasn't allowed until cmake 3.13 (see cmake policy CMP0073), +# and we still have a min cmake version less than that. So we do it +# here instead. +if(platform STREQUAL unix) + target_link_libraries(utils charset) +endif() + configure_file(cmake/cmake.h.in ${GENERATED_SOURCES_DIR}/cmake.h) diff --git a/putty.h b/putty.h index 9483be20..d240e132 100644 --- a/putty.h +++ b/putty.h @@ -1797,16 +1797,62 @@ enum config_primary_key { N_CONFIG_OPTIONS }; -/* Types that appear in Conf keys and values. CONF_TYPE_NONE is used - * as the subkey type for options that don't have subkeys, and is also - * available as a placeholder value for other kinds of 'no type found' - * error. */ +/* Types that appear in Conf keys and values. */ enum { + /* + * CONF_TYPE_NONE is included in this enum because sometimes you + * need a placeholder for 'no type found'. (In Rust you'd leave it + * out, and use Option for those situations.) + * + * In particular, it's used as the subkey type for options that + * don't have subkeys. + */ CONF_TYPE_NONE, + + /* Booleans, accessed via conf_get_bool and conf_set_bool */ CONF_TYPE_BOOL, + + /* Integers, accessed via conf_get_int and conf_set_int */ CONF_TYPE_INT, + + /* + * NUL-terminated char strings, accessed via conf_get_str and + * conf_set_str. + * + * Where character encoding is relevant, these are generally + * expected to be in the host system's default character encoding. + * + * (Character encoding might not be relevant at all: for example, + * if the string is going to be used as a shell command on Unix, + * then the exec system call will want a char string anyway.) + */ CONF_TYPE_STR, + + /* NUL-terminated char strings encoded in UTF-8, accessed via + * conf_get_utf8 and conf_set_utf8. */ + CONF_TYPE_UTF8, + + /* + * A type that can be _either_ a char string in system encoding + * (aka CONF_TYPE_STR), _or_ a char string in UTF-8 (aka + * CONF_TYPE_UTF8). You can set it to be one or the other via + * conf_set_str or conf_set_utf8. To read it, you must use + * conf_get_str_ambi(), which returns a char string and a boolean + * telling you whether it's UTF-8. + * + * These can't be used as _keys_ in Conf, only as values. (If you + * used them as keys, you'd have to answer the difficult question + * of whether a UTF-8 and a non-UTF-8 string should be considered + * equal.) + */ + CONF_TYPE_STR_AMBI, + + /* PuTTY's OS-specific 'Filename' data type, accessed via + * conf_get_filename and conf_set_filename */ CONF_TYPE_FILENAME, + + /* PuTTY's GUI-specific 'FontSpec' data type, accessed via + * conf_get_fontspec and conf_set_fontspec */ CONF_TYPE_FONT, }; @@ -1853,6 +1899,9 @@ bool conf_get_bool(Conf *conf, int key); int conf_get_int(Conf *conf, int key); int conf_get_int_int(Conf *conf, int key, int subkey); char *conf_get_str(Conf *conf, int key); /* result still owned by conf */ +char *conf_get_utf8(Conf *conf, int key); /* result still owned by conf */ +char *conf_get_str_ambi( /* result still owned by conf; 'utf8' may be NULL */ + Conf *conf, int key, bool *utf8); char *conf_get_str_str(Conf *conf, int key, const char *subkey); Filename *conf_get_filename(Conf *conf, int key); FontSpec *conf_get_fontspec(Conf *conf, int key); /* still owned by conf */ @@ -1870,6 +1919,9 @@ void conf_set_bool(Conf *conf, int key, bool value); void conf_set_int(Conf *conf, int key, int value); void conf_set_int_int(Conf *conf, int key, int subkey, int value); void conf_set_str(Conf *conf, int key, const char *value); +void conf_set_utf8(Conf *conf, int key, const char *value); +bool conf_try_set_str(Conf *conf, int key, const char *value); +bool conf_try_set_utf8(Conf *conf, int key, const char *value); void conf_set_str_str(Conf *conf, int key, const char *subkey, const char *val); void conf_del_str_str(Conf *conf, int key, const char *subkey); diff --git a/settings.c b/settings.c index b81d0593..71b45fd1 100644 --- a/settings.c +++ b/settings.c @@ -546,6 +546,42 @@ void save_open_settings(settings_w *sesskey, Conf *conf) write_setting_s(sesskey, info->save_keyword, conf_get_str(conf, key)); break; + case CONF_TYPE_STR_AMBI: { + bool orig_is_utf8; + const char *orig = conf_get_str_ambi(conf, key, &orig_is_utf8); + + int cp_from, cp_to; + if (orig_is_utf8) { + cp_from = CP_UTF8; + cp_to = DEFAULT_CODEPAGE; + } else { + cp_from = DEFAULT_CODEPAGE; + cp_to = CP_UTF8; + } + + size_t wlen; + wchar_t *wide = dup_mb_to_wc_c( + cp_from, orig, strlen(orig), &wlen); + + size_t clen; + char *converted = dup_wc_to_mb_c( + cp_to, wide, wlen, "", &clen); + + const char *native, *utf8; + if (orig_is_utf8) { + utf8 = orig; + native = converted; + } else { + native = orig; + utf8 = converted; + } + write_setting_s(sesskey, info->save_keyword, native); + (void)utf8; /* FIXME: also save the UTF-8 version */ + + burnwcs(wide); + burnstr(converted); + break; + } case CONF_TYPE_INT: { int ival = conf_get_int(conf, key); if (info->storage_enum) { @@ -678,6 +714,7 @@ void load_open_settings(settings_r *sesskey, Conf *conf) if (info->subkey_type == CONF_TYPE_NONE) { switch (info->value_type) { case CONF_TYPE_STR: + case CONF_TYPE_STR_AMBI: conf_set_str(conf, key, info->default_value.sval); break; case CONF_TYPE_INT: @@ -695,6 +732,7 @@ void load_open_settings(settings_r *sesskey, Conf *conf) assert(info->subkey_type == CONF_TYPE_NONE); switch (info->value_type) { case CONF_TYPE_STR: + case CONF_TYPE_STR_AMBI: gpps(sesskey, info->save_keyword, info->default_value.sval, conf, key); break; diff --git a/test/test_conf.c b/test/test_conf.c index d9f215b3..e305a2be 100644 --- a/test/test_conf.c +++ b/test/test_conf.c @@ -270,6 +270,117 @@ void test_str_simple(int confid, const char *saveid, const char *defexp) conf_free(conf); } +void test_utf8_simple(int confid, const char *saveid, const char *defexp) +{ + Conf *conf = conf_new(); + + do_defaults(NULL, conf); + const char *defgot = conf_get_utf8(conf, confid); + if (0 != strcmp(defgot, defexp)) { + printf("fail test_utf8_simple(%s): default = '%s', expected '%s'\n", + saveid, defgot, defexp); + nfails++; + } + + for (int i = 0; i < 2; i++) { + settings_w sw = { + .n = 1, + .si[0].key = saveid, + .si[0].type = SAVE_UNSET, + }; + static const char *const teststrings[] = { "foo", "bar" }; + const char *teststring = teststrings[i]; + conf_set_utf8(conf, confid, teststring); + save_open_settings(&sw, conf); + if (sw.si[0].type != SAVE_S) { + printf("fail test_utf8_simple(%s): saved type = %d, expected %d\n", + saveid, sw.si[0].type, SAVE_S); + nfails++; + } else if (0 != strcmp(sw.si[0].sval, teststring)) { + printf("fail test_utf8_simple(%s): " + "saved string = '%s', expected '%s'\n", + saveid, sw.si[0].sval, teststring); + nfails++; + } + + conf_clear(conf); + settings_r sr = { + .n = 1, + .si[0].key = saveid, + .si[0].type = SAVE_S, + }; + snprintf(sr.si[0].sval, sizeof(sr.si[0].sval), "%s", teststring); + load_open_settings(&sr, conf); + const char *loaded = conf_get_utf8(conf, confid); + if (0 != strcmp(loaded, teststring)) { + printf("fail test_utf8_simple(%s): " + "loaded string = '%s', expected '%s'\n", + saveid, loaded, teststring); + nfails++; + } + } + + conf_free(conf); +} + +void test_str_ambi_simple(int confid, const char *saveid, + const char *defexp, bool defutf8) +{ + Conf *conf = conf_new(); + bool utf8; + + do_defaults(NULL, conf); + const char *defgot = conf_get_str_ambi(conf, confid, &utf8); + if (0 != strcmp(defgot, defexp) || utf8 != defutf8) { + printf("fail test_str_ambi_simple(%s): " + "default = '%s' (%s), expected '%s' (%s)\n", + saveid, defgot, utf8 ? "native" : "UTF-8", + defexp, defutf8 ? "native" : "UTF-8"); + nfails++; + } + + for (int i = 0; i < 2; i++) { + settings_w sw = { + .n = 1, + .si[0].key = saveid, + .si[0].type = SAVE_UNSET, + }; + static const char *const teststrings[] = { "foo", "bar" }; + const char *teststring = teststrings[i]; + conf_set_str(conf, confid, teststring); + save_open_settings(&sw, conf); + if (sw.si[0].type != SAVE_S) { + printf("fail test_str_ambi_simple(%s): " + "saved type = %d, expected %d\n", + saveid, sw.si[0].type, SAVE_S); + nfails++; + } else if (0 != strcmp(sw.si[0].sval, teststring)) { + printf("fail test_str_ambi_simple(%s): " + "saved string = '%s', expected '%s'\n", + saveid, sw.si[0].sval, teststring); + nfails++; + } + + conf_clear(conf); + settings_r sr = { + .n = 1, + .si[0].key = saveid, + .si[0].type = SAVE_S, + }; + snprintf(sr.si[0].sval, sizeof(sr.si[0].sval), "%s", teststring); + load_open_settings(&sr, conf); + const char *loaded = conf_get_str_ambi(conf, confid, &utf8); + if (0 != strcmp(loaded, teststring) || utf8) { + printf("fail test_str_ambi_simple(%s): " + "loaded string = '%s' (%s), expected '%s' (native)\n", + saveid, loaded, utf8 ? "native" : "UTF-8", teststring); + nfails++; + } + } + + conf_free(conf); +} + void test_int_simple(int confid, const char *saveid, int defexp) { Conf *conf = conf_new(); @@ -864,7 +975,10 @@ void test_conf_key_info(void) } if ((td->got_default_int && info->value_type != CONF_TYPE_INT) || - (td->got_default_str && info->value_type != CONF_TYPE_STR) || + (td->got_default_str && + (info->value_type != CONF_TYPE_STR && + info->value_type != CONF_TYPE_STR_AMBI && + info->value_type != CONF_TYPE_UTF8)) || (td->got_default_bool && info->value_type != CONF_TYPE_BOOL)) { fprintf(stderr, "%s: default doesn't match type\n", td->name); nfails++; diff --git a/unix/CMakeLists.txt b/unix/CMakeLists.txt index 8bd2d3df..c24e96b5 100644 --- a/unix/CMakeLists.txt +++ b/unix/CMakeLists.txt @@ -64,15 +64,16 @@ target_link_libraries(fuzzterm add_executable(osxlaunch osxlaunch.c) -add_sources_from_current_dir(plink no-gtk.c) -add_sources_from_current_dir(pscp no-gtk.c) -add_sources_from_current_dir(psftp no-gtk.c) +add_sources_from_current_dir(plink unicode.c no-gtk.c) +add_sources_from_current_dir(pscp unicode.c no-gtk.c) +add_sources_from_current_dir(psftp unicode.c no-gtk.c) add_sources_from_current_dir(psocks no-gtk.c) add_executable(psusan psusan.c ${CMAKE_SOURCE_DIR}/stubs/no-gss.c ${CMAKE_SOURCE_DIR}/ssh/scpserver.c + unicode.c no-gtk.c pty.c) be_list(psusan psusan) @@ -120,6 +121,7 @@ add_executable(uppity ${CMAKE_SOURCE_DIR}/ssh/scpserver.c no-gtk.c pty.c + unicode.c ${CMAKE_SOURCE_DIR}/stubs/no-gss.c) be_list(uppity Uppity) target_link_libraries(uppity @@ -148,7 +150,7 @@ if(GTK_FOUND) pty.c) be_list(pterm pterm) target_link_libraries(pterm - guiterminal eventloop settings charset utils ptermxpms + guiterminal eventloop settings utils ptermxpms ${GTK_LIBRARIES} ${X11_LIBRARIES}) installed_program(pterm) @@ -164,7 +166,7 @@ if(GTK_FOUND) pty.c) be_list(ptermapp pterm) target_link_libraries(ptermapp - guiterminal eventloop settings charset utils ptermxpms + guiterminal eventloop settings utils ptermxpms ${GTK_LIBRARIES} ${X11_LIBRARIES}) endif() @@ -175,7 +177,7 @@ if(GTK_FOUND) be_list(putty PuTTY SSH SERIAL OTHERBACKENDS) target_link_libraries(putty guiterminal eventloop sshclient otherbackends settings - network crypto charset utils puttyxpms + network crypto utils puttyxpms ${GTK_LIBRARIES} ${X11_LIBRARIES}) set_target_properties(putty PROPERTIES LINK_INTERFACE_MULTIPLICITY 2) @@ -190,7 +192,7 @@ if(GTK_FOUND) be_list(puttyapp PuTTY SSH SERIAL OTHERBACKENDS) target_link_libraries(puttyapp guiterminal eventloop sshclient otherbackends settings - network crypto charset utils puttyxpms + network crypto utils puttyxpms ${GTK_LIBRARIES} ${X11_LIBRARIES}) endif() @@ -205,7 +207,7 @@ if(GTK_FOUND) ${CMAKE_SOURCE_DIR}/proxy/nosshproxy.c) be_list(puttytel PuTTYtel SERIAL OTHERBACKENDS) target_link_libraries(puttytel - guiterminal eventloop otherbackends settings network charset utils + guiterminal eventloop otherbackends settings network utils puttyxpms ${GTK_LIBRARIES} ${X11_LIBRARIES}) @@ -217,7 +219,7 @@ if(GTK_FOUND) ${CMAKE_SOURCE_DIR}/stubs/no-storage.c ${CMAKE_SOURCE_DIR}/stubs/no-timing.c) target_link_libraries(test_lineedit - guiterminal settings eventloop charset utils ${platform_libraries}) + guiterminal settings eventloop utils ${platform_libraries}) add_executable(test_terminal ${CMAKE_SOURCE_DIR}/test/test_terminal.c @@ -225,7 +227,7 @@ if(GTK_FOUND) ${CMAKE_SOURCE_DIR}/stubs/no-storage.c ${CMAKE_SOURCE_DIR}/stubs/no-timing.c) target_link_libraries(test_terminal - guiterminal settings eventloop charset utils ${platform_libraries}) + guiterminal settings eventloop utils ${platform_libraries}) endif() # Pageant is built whether we have GTK or not; in its absence we @@ -251,4 +253,4 @@ target_link_libraries(pageant ${pageant_libs}) installed_program(pageant) -add_sources_from_current_dir(test_conf stubs/no-uxsel.c) +add_sources_from_current_dir(test_conf unicode.c stubs/no-uxsel.c) diff --git a/utils/conf.c b/utils/conf.c index 7915ddde..074d6e5e 100644 --- a/utils/conf.c +++ b/utils/conf.c @@ -39,7 +39,10 @@ struct value { union { bool boolval; int intval; - char *stringval; + struct { + char *str; + bool utf8; + } stringval; Filename *fileval; FontSpec *fontval; } u; @@ -79,9 +82,12 @@ static int conf_cmp(void *av, void *bv) return +1; return 0; case CONF_TYPE_STR: + case CONF_TYPE_UTF8: return strcmp(a->secondary.s, b->secondary.s); - default: + case CONF_TYPE_NONE: return 0; + default: + unreachable("Unsupported subkey type"); } } @@ -102,9 +108,12 @@ static int conf_cmp_constkey(void *av, void *bv) return +1; return 0; case CONF_TYPE_STR: + case CONF_TYPE_UTF8: return strcmp(a->secondary.s, b->secondary.s); - default: + case CONF_TYPE_NONE: return 0; + default: + unreachable("Unsupported subkey type"); } } @@ -115,7 +124,8 @@ static int conf_cmp_constkey(void *av, void *bv) */ static void free_key(struct key *key) { - if (conf_key_info[key->primary].subkey_type == CONF_TYPE_STR) + if (conf_key_info[key->primary].subkey_type == CONF_TYPE_STR || + conf_key_info[key->primary].subkey_type == CONF_TYPE_UTF8) sfree(key->secondary.s); } @@ -131,6 +141,7 @@ static void copy_key(struct key *to, struct key *from) to->secondary.i = from->secondary.i; break; case CONF_TYPE_STR: + case CONF_TYPE_UTF8: to->secondary.s = dupstr(from->secondary.s); break; } @@ -143,8 +154,9 @@ static void copy_key(struct key *to, struct key *from) */ static void free_value(struct value *val, int type) { - if (type == CONF_TYPE_STR) - sfree(val->u.stringval); + if (type == CONF_TYPE_STR || type == CONF_TYPE_UTF8 || + type == CONF_TYPE_STR_AMBI) + sfree(val->u.stringval.str); else if (type == CONF_TYPE_FILENAME) filename_free(val->u.fileval); else if (type == CONF_TYPE_FONT) @@ -165,7 +177,10 @@ static void copy_value(struct value *to, struct value *from, int type) to->u.intval = from->u.intval; break; case CONF_TYPE_STR: - to->u.stringval = dupstr(from->u.stringval); + case CONF_TYPE_UTF8: + case CONF_TYPE_STR_AMBI: + to->u.stringval.str = dupstr(from->u.stringval.str); + to->u.stringval.utf8 = from->u.stringval.utf8; break; case CONF_TYPE_FILENAME: to->u.fileval = filename_copy(from->u.fileval); @@ -296,7 +311,37 @@ char *conf_get_str(Conf *conf, int primary) key.primary = primary; entry = find234(conf->tree, &key, NULL); assert(entry); - return entry->value.u.stringval; + return entry->value.u.stringval.str; +} + +char *conf_get_utf8(Conf *conf, int primary) +{ + struct key key; + struct conf_entry *entry; + + assert(conf_key_info[primary].subkey_type == CONF_TYPE_NONE); + assert(conf_key_info[primary].value_type == CONF_TYPE_UTF8); + key.primary = primary; + entry = find234(conf->tree, &key, NULL); + assert(entry); + return entry->value.u.stringval.str; +} + +char *conf_get_str_ambi(Conf *conf, int primary, bool *utf8) +{ + struct key key; + struct conf_entry *entry; + + assert(conf_key_info[primary].subkey_type == CONF_TYPE_NONE); + assert(conf_key_info[primary].value_type == CONF_TYPE_STR || + conf_key_info[primary].value_type == CONF_TYPE_UTF8 || + conf_key_info[primary].value_type == CONF_TYPE_STR_AMBI); + key.primary = primary; + entry = find234(conf->tree, &key, NULL); + assert(entry); + if (utf8) + *utf8 = entry->value.u.stringval.utf8; + return entry->value.u.stringval.str; } char *conf_get_str_str_opt(Conf *conf, int primary, const char *secondary) @@ -309,7 +354,7 @@ char *conf_get_str_str_opt(Conf *conf, int primary, const char *secondary) key.primary = primary; key.secondary.s = (char *)secondary; entry = find234(conf->tree, &key, NULL); - return entry ? entry->value.u.stringval : NULL; + return entry ? entry->value.u.stringval.str : NULL; } char *conf_get_str_str(Conf *conf, int primary, const char *secondary) @@ -338,7 +383,7 @@ char *conf_get_str_strs(Conf *conf, int primary, if (!entry || entry->key.primary != primary) return NULL; *subkeyout = entry->key.secondary.s; - return entry->value.u.stringval; + return entry->value.u.stringval.str; } char *conf_get_str_nthstrkey(Conf *conf, int primary, int n) @@ -422,15 +467,48 @@ void conf_set_int_int(Conf *conf, int primary, conf_insert(conf, entry); } -void conf_set_str(Conf *conf, int primary, const char *value) +bool conf_try_set_str(Conf *conf, int primary, const char *value) { struct conf_entry *entry = snew(struct conf_entry); assert(conf_key_info[primary].subkey_type == CONF_TYPE_NONE); - assert(conf_key_info[primary].value_type == CONF_TYPE_STR); + if (conf_key_info[primary].value_type == CONF_TYPE_UTF8) + return false; + assert(conf_key_info[primary].value_type == CONF_TYPE_STR || + conf_key_info[primary].value_type == CONF_TYPE_STR_AMBI); entry->key.primary = primary; - entry->value.u.stringval = dupstr(value); + entry->value.u.stringval.str = dupstr(value); + entry->value.u.stringval.utf8 = false; conf_insert(conf, entry); + return true; +} + +void conf_set_str(Conf *conf, int primary, const char *value) +{ + bool success = conf_try_set_str(conf, primary, value); + assert(success && "conf_set_str on CONF_TYPE_UTF8"); +} + +bool conf_try_set_utf8(Conf *conf, int primary, const char *value) +{ + struct conf_entry *entry = snew(struct conf_entry); + + assert(conf_key_info[primary].subkey_type == CONF_TYPE_NONE); + if (conf_key_info[primary].value_type == CONF_TYPE_STR) + return false; + assert(conf_key_info[primary].value_type == CONF_TYPE_UTF8 || + conf_key_info[primary].value_type == CONF_TYPE_STR_AMBI); + entry->key.primary = primary; + entry->value.u.stringval.str = dupstr(value); + entry->value.u.stringval.utf8 = true; + conf_insert(conf, entry); + return true; +} + +void conf_set_utf8(Conf *conf, int primary, const char *value) +{ + bool success = conf_try_set_utf8(conf, primary, value); + assert(success && "conf_set_utf8 on CONF_TYPE_STR"); } void conf_set_str_str(Conf *conf, int primary, const char *secondary, @@ -442,7 +520,8 @@ void conf_set_str_str(Conf *conf, int primary, const char *secondary, assert(conf_key_info[primary].value_type == CONF_TYPE_STR); entry->key.primary = primary; entry->key.secondary.s = dupstr(secondary); - entry->value.u.stringval = dupstr(value); + entry->value.u.stringval.str = dupstr(value); + entry->value.u.stringval.utf8 = false; conf_insert(conf, entry); } @@ -508,7 +587,12 @@ void conf_serialise(BinarySink *bs, Conf *conf) put_uint32(bs, entry->value.u.intval); break; case CONF_TYPE_STR: - put_asciz(bs, entry->value.u.stringval); + case CONF_TYPE_UTF8: + put_asciz(bs, entry->value.u.stringval.str); + break; + case CONF_TYPE_STR_AMBI: + put_asciz(bs, entry->value.u.stringval.str); + put_bool(bs, entry->value.u.stringval.utf8); break; case CONF_TYPE_FILENAME: filename_serialise(bs, entry->value.u.fileval); @@ -557,7 +641,16 @@ bool conf_deserialise(Conf *conf, BinarySource *src) entry->value.u.intval = toint(get_uint32(src)); break; case CONF_TYPE_STR: - entry->value.u.stringval = dupstr(get_asciz(src)); + entry->value.u.stringval.str = dupstr(get_asciz(src)); + entry->value.u.stringval.utf8 = false; + break; + case CONF_TYPE_UTF8: + entry->value.u.stringval.str = dupstr(get_asciz(src)); + entry->value.u.stringval.utf8 = true; + break; + case CONF_TYPE_STR_AMBI: + entry->value.u.stringval.str = dupstr(get_asciz(src)); + entry->value.u.stringval.utf8 = get_bool(src); break; case CONF_TYPE_FILENAME: entry->value.u.fileval = filename_deserialise(src);