1
0
mirror of https://git.tartarus.org/simon/putty.git synced 2025-07-01 03:22:48 -05:00

Add two new string types to the Conf system.

This begins the process of making PuTTY more able to handle Unicode
strings as a first-class type in its configuration. One of the new
types, CONF_TYPE_UTF8, looks physically just like CONF_TYPE_STR but
the semantics are that it's definitely encoded in UTF-8, instead of
'shrug, whatever the system locale's encoding is'.

Unfortunately, we can't yet switch over any Conf items to having that
type, because our data representations in saved configuration (both on
Unix and Windows) store char strings in the system encoding. So we'll
have to change that representation at the same time, which risks
breaking backwards compatibility with old PuTTYs reading the same
configuration.

So the other new type, CONF_TYPE_STR_AMBI, is intended as a
transitional form, recording a configuration setting that _might_ be
explicitly UTF-8 or might have the legacy 'shrug, whatever' semantics,
depending on where we got it from.

My general migration plan is that first I _enable_ Unicode support in
a Conf item, by turning it into STR_AMBI; the Unicode version of the
string (if any) is saved in a new location, and a best-effort
local-charset version is saved where it's always been. That way new
PuTTY can read the Unicode version, and old PuTTY reading that
configuration will behave no worse than it would have done already.

It would be nice to think that in the far future we've migrated
everything to STR_AMBI and can move them all to mandatory UTF-8,
obsoleting the old configuration. I think it's more likely we'll never
get there. But at least _new_ Conf items, with no backwards
compatibility requirement in the first place, can be CONF_TYPE_UTF8
where appropriate.

(In conf_get_str_ambi(), I considered making it mandatory via assert()
to pass the 'utf8' output pointer as non-NULL, to defend against lazy
adaptation of existing code by just changing the function call. But in
fact I think there's a legitimate use case for not caring if the
output is UTF-8 or not, because some of the existing SSH code
currently just shoves strings like usernames directly on to the wire
whether they're in the right encoding or not; so if you want to do the
correct UTF-8 thing where possible and preserve legacy behaviour if
not, then treating both classes of string the same _is_ the right
thing to do.)

This also requires linking the Unicode support into many Unix
applications that hadn't previously needed it.
This commit is contained in:
Simon Tatham
2024-09-23 12:00:37 +01:00
parent 4f756d2a4d
commit 75b6e12f84
6 changed files with 340 additions and 32 deletions

View File

@ -270,6 +270,117 @@ void test_str_simple(int confid, const char *saveid, const char *defexp)
conf_free(conf);
}
void test_utf8_simple(int confid, const char *saveid, const char *defexp)
{
Conf *conf = conf_new();
do_defaults(NULL, conf);
const char *defgot = conf_get_utf8(conf, confid);
if (0 != strcmp(defgot, defexp)) {
printf("fail test_utf8_simple(%s): default = '%s', expected '%s'\n",
saveid, defgot, defexp);
nfails++;
}
for (int i = 0; i < 2; i++) {
settings_w sw = {
.n = 1,
.si[0].key = saveid,
.si[0].type = SAVE_UNSET,
};
static const char *const teststrings[] = { "foo", "bar" };
const char *teststring = teststrings[i];
conf_set_utf8(conf, confid, teststring);
save_open_settings(&sw, conf);
if (sw.si[0].type != SAVE_S) {
printf("fail test_utf8_simple(%s): saved type = %d, expected %d\n",
saveid, sw.si[0].type, SAVE_S);
nfails++;
} else if (0 != strcmp(sw.si[0].sval, teststring)) {
printf("fail test_utf8_simple(%s): "
"saved string = '%s', expected '%s'\n",
saveid, sw.si[0].sval, teststring);
nfails++;
}
conf_clear(conf);
settings_r sr = {
.n = 1,
.si[0].key = saveid,
.si[0].type = SAVE_S,
};
snprintf(sr.si[0].sval, sizeof(sr.si[0].sval), "%s", teststring);
load_open_settings(&sr, conf);
const char *loaded = conf_get_utf8(conf, confid);
if (0 != strcmp(loaded, teststring)) {
printf("fail test_utf8_simple(%s): "
"loaded string = '%s', expected '%s'\n",
saveid, loaded, teststring);
nfails++;
}
}
conf_free(conf);
}
void test_str_ambi_simple(int confid, const char *saveid,
const char *defexp, bool defutf8)
{
Conf *conf = conf_new();
bool utf8;
do_defaults(NULL, conf);
const char *defgot = conf_get_str_ambi(conf, confid, &utf8);
if (0 != strcmp(defgot, defexp) || utf8 != defutf8) {
printf("fail test_str_ambi_simple(%s): "
"default = '%s' (%s), expected '%s' (%s)\n",
saveid, defgot, utf8 ? "native" : "UTF-8",
defexp, defutf8 ? "native" : "UTF-8");
nfails++;
}
for (int i = 0; i < 2; i++) {
settings_w sw = {
.n = 1,
.si[0].key = saveid,
.si[0].type = SAVE_UNSET,
};
static const char *const teststrings[] = { "foo", "bar" };
const char *teststring = teststrings[i];
conf_set_str(conf, confid, teststring);
save_open_settings(&sw, conf);
if (sw.si[0].type != SAVE_S) {
printf("fail test_str_ambi_simple(%s): "
"saved type = %d, expected %d\n",
saveid, sw.si[0].type, SAVE_S);
nfails++;
} else if (0 != strcmp(sw.si[0].sval, teststring)) {
printf("fail test_str_ambi_simple(%s): "
"saved string = '%s', expected '%s'\n",
saveid, sw.si[0].sval, teststring);
nfails++;
}
conf_clear(conf);
settings_r sr = {
.n = 1,
.si[0].key = saveid,
.si[0].type = SAVE_S,
};
snprintf(sr.si[0].sval, sizeof(sr.si[0].sval), "%s", teststring);
load_open_settings(&sr, conf);
const char *loaded = conf_get_str_ambi(conf, confid, &utf8);
if (0 != strcmp(loaded, teststring) || utf8) {
printf("fail test_str_ambi_simple(%s): "
"loaded string = '%s' (%s), expected '%s' (native)\n",
saveid, loaded, utf8 ? "native" : "UTF-8", teststring);
nfails++;
}
}
conf_free(conf);
}
void test_int_simple(int confid, const char *saveid, int defexp)
{
Conf *conf = conf_new();
@ -864,7 +975,10 @@ void test_conf_key_info(void)
}
if ((td->got_default_int && info->value_type != CONF_TYPE_INT) ||
(td->got_default_str && info->value_type != CONF_TYPE_STR) ||
(td->got_default_str &&
(info->value_type != CONF_TYPE_STR &&
info->value_type != CONF_TYPE_STR_AMBI &&
info->value_type != CONF_TYPE_UTF8)) ||
(td->got_default_bool && info->value_type != CONF_TYPE_BOOL)) {
fprintf(stderr, "%s: default doesn't match type\n", td->name);
nfails++;