1
0
mirror of https://git.tartarus.org/simon/putty.git synced 2025-07-01 03:22:48 -05:00

Add two new string types to the Conf system.

This begins the process of making PuTTY more able to handle Unicode
strings as a first-class type in its configuration. One of the new
types, CONF_TYPE_UTF8, looks physically just like CONF_TYPE_STR but
the semantics are that it's definitely encoded in UTF-8, instead of
'shrug, whatever the system locale's encoding is'.

Unfortunately, we can't yet switch over any Conf items to having that
type, because our data representations in saved configuration (both on
Unix and Windows) store char strings in the system encoding. So we'll
have to change that representation at the same time, which risks
breaking backwards compatibility with old PuTTYs reading the same
configuration.

So the other new type, CONF_TYPE_STR_AMBI, is intended as a
transitional form, recording a configuration setting that _might_ be
explicitly UTF-8 or might have the legacy 'shrug, whatever' semantics,
depending on where we got it from.

My general migration plan is that first I _enable_ Unicode support in
a Conf item, by turning it into STR_AMBI; the Unicode version of the
string (if any) is saved in a new location, and a best-effort
local-charset version is saved where it's always been. That way new
PuTTY can read the Unicode version, and old PuTTY reading that
configuration will behave no worse than it would have done already.

It would be nice to think that in the far future we've migrated
everything to STR_AMBI and can move them all to mandatory UTF-8,
obsoleting the old configuration. I think it's more likely we'll never
get there. But at least _new_ Conf items, with no backwards
compatibility requirement in the first place, can be CONF_TYPE_UTF8
where appropriate.

(In conf_get_str_ambi(), I considered making it mandatory via assert()
to pass the 'utf8' output pointer as non-NULL, to defend against lazy
adaptation of existing code by just changing the function call. But in
fact I think there's a legitimate use case for not caring if the
output is UTF-8 or not, because some of the existing SSH code
currently just shoves strings like usernames directly on to the wire
whether they're in the right encoding or not; so if you want to do the
correct UTF-8 thing where possible and preserve legacy behaviour if
not, then treating both classes of string the same _is_ the right
thing to do.)

This also requires linking the Unicode support into many Unix
applications that hadn't previously needed it.
This commit is contained in:
Simon Tatham
2024-09-23 12:00:37 +01:00
parent 4f756d2a4d
commit 75b6e12f84
6 changed files with 340 additions and 32 deletions

View File

@ -39,7 +39,10 @@ struct value {
union {
bool boolval;
int intval;
char *stringval;
struct {
char *str;
bool utf8;
} stringval;
Filename *fileval;
FontSpec *fontval;
} u;
@ -79,9 +82,12 @@ static int conf_cmp(void *av, void *bv)
return +1;
return 0;
case CONF_TYPE_STR:
case CONF_TYPE_UTF8:
return strcmp(a->secondary.s, b->secondary.s);
default:
case CONF_TYPE_NONE:
return 0;
default:
unreachable("Unsupported subkey type");
}
}
@ -102,9 +108,12 @@ static int conf_cmp_constkey(void *av, void *bv)
return +1;
return 0;
case CONF_TYPE_STR:
case CONF_TYPE_UTF8:
return strcmp(a->secondary.s, b->secondary.s);
default:
case CONF_TYPE_NONE:
return 0;
default:
unreachable("Unsupported subkey type");
}
}
@ -115,7 +124,8 @@ static int conf_cmp_constkey(void *av, void *bv)
*/
static void free_key(struct key *key)
{
if (conf_key_info[key->primary].subkey_type == CONF_TYPE_STR)
if (conf_key_info[key->primary].subkey_type == CONF_TYPE_STR ||
conf_key_info[key->primary].subkey_type == CONF_TYPE_UTF8)
sfree(key->secondary.s);
}
@ -131,6 +141,7 @@ static void copy_key(struct key *to, struct key *from)
to->secondary.i = from->secondary.i;
break;
case CONF_TYPE_STR:
case CONF_TYPE_UTF8:
to->secondary.s = dupstr(from->secondary.s);
break;
}
@ -143,8 +154,9 @@ static void copy_key(struct key *to, struct key *from)
*/
static void free_value(struct value *val, int type)
{
if (type == CONF_TYPE_STR)
sfree(val->u.stringval);
if (type == CONF_TYPE_STR || type == CONF_TYPE_UTF8 ||
type == CONF_TYPE_STR_AMBI)
sfree(val->u.stringval.str);
else if (type == CONF_TYPE_FILENAME)
filename_free(val->u.fileval);
else if (type == CONF_TYPE_FONT)
@ -165,7 +177,10 @@ static void copy_value(struct value *to, struct value *from, int type)
to->u.intval = from->u.intval;
break;
case CONF_TYPE_STR:
to->u.stringval = dupstr(from->u.stringval);
case CONF_TYPE_UTF8:
case CONF_TYPE_STR_AMBI:
to->u.stringval.str = dupstr(from->u.stringval.str);
to->u.stringval.utf8 = from->u.stringval.utf8;
break;
case CONF_TYPE_FILENAME:
to->u.fileval = filename_copy(from->u.fileval);
@ -296,7 +311,37 @@ char *conf_get_str(Conf *conf, int primary)
key.primary = primary;
entry = find234(conf->tree, &key, NULL);
assert(entry);
return entry->value.u.stringval;
return entry->value.u.stringval.str;
}
char *conf_get_utf8(Conf *conf, int primary)
{
struct key key;
struct conf_entry *entry;
assert(conf_key_info[primary].subkey_type == CONF_TYPE_NONE);
assert(conf_key_info[primary].value_type == CONF_TYPE_UTF8);
key.primary = primary;
entry = find234(conf->tree, &key, NULL);
assert(entry);
return entry->value.u.stringval.str;
}
char *conf_get_str_ambi(Conf *conf, int primary, bool *utf8)
{
struct key key;
struct conf_entry *entry;
assert(conf_key_info[primary].subkey_type == CONF_TYPE_NONE);
assert(conf_key_info[primary].value_type == CONF_TYPE_STR ||
conf_key_info[primary].value_type == CONF_TYPE_UTF8 ||
conf_key_info[primary].value_type == CONF_TYPE_STR_AMBI);
key.primary = primary;
entry = find234(conf->tree, &key, NULL);
assert(entry);
if (utf8)
*utf8 = entry->value.u.stringval.utf8;
return entry->value.u.stringval.str;
}
char *conf_get_str_str_opt(Conf *conf, int primary, const char *secondary)
@ -309,7 +354,7 @@ char *conf_get_str_str_opt(Conf *conf, int primary, const char *secondary)
key.primary = primary;
key.secondary.s = (char *)secondary;
entry = find234(conf->tree, &key, NULL);
return entry ? entry->value.u.stringval : NULL;
return entry ? entry->value.u.stringval.str : NULL;
}
char *conf_get_str_str(Conf *conf, int primary, const char *secondary)
@ -338,7 +383,7 @@ char *conf_get_str_strs(Conf *conf, int primary,
if (!entry || entry->key.primary != primary)
return NULL;
*subkeyout = entry->key.secondary.s;
return entry->value.u.stringval;
return entry->value.u.stringval.str;
}
char *conf_get_str_nthstrkey(Conf *conf, int primary, int n)
@ -422,15 +467,48 @@ void conf_set_int_int(Conf *conf, int primary,
conf_insert(conf, entry);
}
void conf_set_str(Conf *conf, int primary, const char *value)
bool conf_try_set_str(Conf *conf, int primary, const char *value)
{
struct conf_entry *entry = snew(struct conf_entry);
assert(conf_key_info[primary].subkey_type == CONF_TYPE_NONE);
assert(conf_key_info[primary].value_type == CONF_TYPE_STR);
if (conf_key_info[primary].value_type == CONF_TYPE_UTF8)
return false;
assert(conf_key_info[primary].value_type == CONF_TYPE_STR ||
conf_key_info[primary].value_type == CONF_TYPE_STR_AMBI);
entry->key.primary = primary;
entry->value.u.stringval = dupstr(value);
entry->value.u.stringval.str = dupstr(value);
entry->value.u.stringval.utf8 = false;
conf_insert(conf, entry);
return true;
}
void conf_set_str(Conf *conf, int primary, const char *value)
{
bool success = conf_try_set_str(conf, primary, value);
assert(success && "conf_set_str on CONF_TYPE_UTF8");
}
bool conf_try_set_utf8(Conf *conf, int primary, const char *value)
{
struct conf_entry *entry = snew(struct conf_entry);
assert(conf_key_info[primary].subkey_type == CONF_TYPE_NONE);
if (conf_key_info[primary].value_type == CONF_TYPE_STR)
return false;
assert(conf_key_info[primary].value_type == CONF_TYPE_UTF8 ||
conf_key_info[primary].value_type == CONF_TYPE_STR_AMBI);
entry->key.primary = primary;
entry->value.u.stringval.str = dupstr(value);
entry->value.u.stringval.utf8 = true;
conf_insert(conf, entry);
return true;
}
void conf_set_utf8(Conf *conf, int primary, const char *value)
{
bool success = conf_try_set_utf8(conf, primary, value);
assert(success && "conf_set_utf8 on CONF_TYPE_STR");
}
void conf_set_str_str(Conf *conf, int primary, const char *secondary,
@ -442,7 +520,8 @@ void conf_set_str_str(Conf *conf, int primary, const char *secondary,
assert(conf_key_info[primary].value_type == CONF_TYPE_STR);
entry->key.primary = primary;
entry->key.secondary.s = dupstr(secondary);
entry->value.u.stringval = dupstr(value);
entry->value.u.stringval.str = dupstr(value);
entry->value.u.stringval.utf8 = false;
conf_insert(conf, entry);
}
@ -508,7 +587,12 @@ void conf_serialise(BinarySink *bs, Conf *conf)
put_uint32(bs, entry->value.u.intval);
break;
case CONF_TYPE_STR:
put_asciz(bs, entry->value.u.stringval);
case CONF_TYPE_UTF8:
put_asciz(bs, entry->value.u.stringval.str);
break;
case CONF_TYPE_STR_AMBI:
put_asciz(bs, entry->value.u.stringval.str);
put_bool(bs, entry->value.u.stringval.utf8);
break;
case CONF_TYPE_FILENAME:
filename_serialise(bs, entry->value.u.fileval);
@ -557,7 +641,16 @@ bool conf_deserialise(Conf *conf, BinarySource *src)
entry->value.u.intval = toint(get_uint32(src));
break;
case CONF_TYPE_STR:
entry->value.u.stringval = dupstr(get_asciz(src));
entry->value.u.stringval.str = dupstr(get_asciz(src));
entry->value.u.stringval.utf8 = false;
break;
case CONF_TYPE_UTF8:
entry->value.u.stringval.str = dupstr(get_asciz(src));
entry->value.u.stringval.utf8 = true;
break;
case CONF_TYPE_STR_AMBI:
entry->value.u.stringval.str = dupstr(get_asciz(src));
entry->value.u.stringval.utf8 = get_bool(src);
break;
case CONF_TYPE_FILENAME:
entry->value.u.fileval = filename_deserialise(src);