1
0
mirror of https://git.tartarus.org/simon/putty.git synced 2025-01-25 01:02:24 +00:00

Add UTF-8 support to the new Windows ConsoleIO system.

This allows you to set a flag in conio_setup() which causes the
returned ConsoleIO object to interpret all its output as UTF-8, by
translating it to UTF-16 and using WriteConsoleW to write it in
Unicode. Similarly, input is read using ReadConsoleW and decoded from
UTF-16 to UTF-8.

This flag is set to false in most places, to avoid making sudden
breaking changes. But when we're about to present a prompts_t to the
user, it's set from the new 'utf8' flag in that prompt, which in turn
is set by the userauth layer in any case where the prompts are going
to the server.

The idea is that this should be the start of a fix for the long-
standing character-set handling bug that strings transmitted during
SSH userauth (usernames, passwords, k-i prompts and responses) are all
supposed to be in UTF-8, but we've always encoded them in whatever our
input system happens to be using, and not done any tidying up on them.
We get occasional complaints about this from users whose passwords
contain characters that are encoded differently between UTF-8 and
their local encoding, but I've never got round to fixing it because
it's a large piece of engineering.

Indeed, this isn't nearly the end of it. The next step is to add UTF-8
support to all the _other_ ways of presenting a prompts_t, as best we
can.

Like the previous change to console handling, it seems very likely
that this will break someone's workflow. So there's a fallback
command-line option '-legacy-charset-handling' to revert to PuTTY's
previous behaviour.
This commit is contained in:
Simon Tatham 2022-11-25 12:57:43 +00:00
parent 80aed96286
commit f4519b6533
10 changed files with 184 additions and 37 deletions

View File

@ -930,6 +930,16 @@ int cmdline_process_param(const char *p, char *value,
}
}
if (!strcmp(p, "-legacy-charset-handling") ||
!strcmp(p, "-legacy_charset_handling")) {
RETURN(1);
SAVEABLE(0);
if (!console_set_legacy_charset_handling(true)) {
cmdline_report_unavailable(p);
return ret;
}
}
#ifdef _WINDOWS
/*
* Cross-tool options only available on Windows.

7
misc.h
View File

@ -70,10 +70,11 @@ void strbuf_finalise_agent_query(strbuf *buf);
/* String-to-Unicode converters that auto-allocate the destination and
* work around the rather deficient interface of mb_to_wc. */
wchar_t *dup_mb_to_wc_c(int codepage, int flags, const char *string, int len);
wchar_t *dup_mb_to_wc_c(int codepage, int flags, const char *string,
size_t len, size_t *outlen_p);
wchar_t *dup_mb_to_wc(int codepage, int flags, const char *string);
char *dup_wc_to_mb_c(int codepage, int flags, const wchar_t *string, int len,
const char *defchr);
char *dup_wc_to_mb_c(int codepage, int flags, const wchar_t *string,
size_t len, const char *defchr, size_t *outlen_p);
char *dup_wc_to_mb(int codepage, int flags, const wchar_t *string,
const char *defchr);

12
putty.h
View File

@ -996,6 +996,17 @@ struct prompts_t {
* seat_get_userpass_input(); initially NULL */
SeatPromptResult spr; /* some implementations need to cache one of these */
/*
* Set this flag to indicate that the caller has encoded the
* prompts in UTF-8, and expects the responses to be UTF-8 too.
*
* Ideally this flag would be unnecessary because it would always
* be true, but for legacy reasons, we have to switch over a bit
* at a time from the old behaviour, and may never manage to get
* rid of it completely.
*/
bool utf8;
/*
* Callback you can fill in to be notified when all the prompts'
* responses are available. After you receive this notification, a
@ -2564,6 +2575,7 @@ bool have_ssh_host_key(const char *host, int port, const char *keytype);
extern bool console_batch_mode, console_antispoof_prompt;
extern bool console_set_batch_mode(bool);
extern bool console_set_stdio_prompts(bool);
extern bool console_set_legacy_charset_handling(bool);
SeatPromptResult console_get_userpass_input(prompts_t *p);
bool is_interactive(void);
void console_print_error_msg(const char *prefix, const char *msg);

View File

@ -752,6 +752,7 @@ static void ssh2_userauth_process_queue(PacketProtocolLayer *ppl)
*/
} else if ((s->username = s->default_username) == NULL) {
s->cur_prompt = ssh_ppl_new_prompts(&s->ppl);
s->cur_prompt->utf8 = true;
s->cur_prompt->to_server = true;
s->cur_prompt->from_server = false;
s->cur_prompt->name = dupstr("SSH login name");
@ -1816,6 +1817,7 @@ static void ssh2_userauth_process_queue(PacketProtocolLayer *ppl)
s->ppl.bpp->pls->actx = SSH2_PKTCTX_PASSWORD;
s->cur_prompt = ssh_ppl_new_prompts(&s->ppl);
s->cur_prompt->utf8 = true;
s->cur_prompt->to_server = true;
s->cur_prompt->from_server = false;
s->cur_prompt->name = dupstr("SSH password");
@ -1904,6 +1906,7 @@ static void ssh2_userauth_process_queue(PacketProtocolLayer *ppl)
prompt = get_string(pktin);
s->cur_prompt = ssh_ppl_new_prompts(&s->ppl);
s->cur_prompt->utf8 = true;
s->cur_prompt->to_server = true;
s->cur_prompt->from_server = false;
s->cur_prompt->name = dupstr("New SSH password");
@ -2095,6 +2098,7 @@ static bool ssh2_userauth_ki_setup_prompts(
inst = get_string(src);
get_string(src); /* skip language tag */
s->cur_prompt = ssh_ppl_new_prompts(&s->ppl);
s->cur_prompt->utf8 = true;
s->cur_prompt->to_server = true;
s->cur_prompt->from_server = true;

View File

@ -13,3 +13,8 @@ bool console_set_stdio_prompts(bool newvalue)
{
return false;
}
bool console_set_legacy_charset_handling(bool newvalue)
{
return false;
}

View File

@ -578,6 +578,12 @@ bool console_set_stdio_prompts(bool newvalue)
return false;
}
bool console_set_legacy_charset_handling(bool newvalue)
{
/* This probably _will_ need to be supported, but isn't yet. */
return false;
}
/*
* X11-forwarding-related things suitable for console.
*/

View File

@ -2,20 +2,25 @@
* dup_mb_to_wc: memory-allocating wrapper on mb_to_wc.
*
* Also dup_mb_to_wc_c: same but you already know the length of the
* string.
* string, and you get told the length of the returned wide string.
* (But it's still NUL-terminated, for convenience.)
*/
#include "putty.h"
#include "misc.h"
wchar_t *dup_mb_to_wc_c(int codepage, int flags, const char *string, int len)
wchar_t *dup_mb_to_wc_c(int codepage, int flags, const char *string,
size_t inlen, size_t *outlen_p)
{
int mult;
assert(inlen <= INT_MAX);
size_t mult;
for (mult = 1 ;; mult++) {
wchar_t *ret = snewn(mult*len + 2, wchar_t);
int outlen;
outlen = mb_to_wc(codepage, flags, string, len, ret, mult*len + 1);
if (outlen < mult*len+1) {
wchar_t *ret = snewn(mult*inlen + 2, wchar_t);
size_t outlen = mb_to_wc(codepage, flags, string, inlen, ret,
mult*inlen + 1);
if (outlen < mult*inlen+1) {
if (outlen_p)
*outlen_p = outlen;
ret[outlen] = L'\0';
return ret;
}
@ -25,5 +30,5 @@ wchar_t *dup_mb_to_wc_c(int codepage, int flags, const char *string, int len)
wchar_t *dup_mb_to_wc(int codepage, int flags, const char *string)
{
return dup_mb_to_wc_c(codepage, flags, string, strlen(string));
return dup_mb_to_wc_c(codepage, flags, string, strlen(string), NULL);
}

View File

@ -2,7 +2,8 @@
* dup_wc_to_mb: memory-allocating wrapper on wc_to_mb.
*
* Also dup_wc_to_mb_c: same but you already know the length of the
* string.
* wide string, and you get told the length of the returned string.
* (But it's still NUL-terminated, for convenience.).
*/
#include <wchar.h>
@ -10,19 +11,23 @@
#include "putty.h"
#include "misc.h"
char *dup_wc_to_mb_c(int codepage, int flags, const wchar_t *string, int len,
const char *defchr)
char *dup_wc_to_mb_c(int codepage, int flags, const wchar_t *string,
size_t inlen, const char *defchr, size_t *outlen_p)
{
size_t outsize = len+1;
assert(inlen <= INT_MAX);
size_t outsize = inlen+1;
char *out = snewn(outsize, char);
while (true) {
size_t outlen = wc_to_mb(codepage, flags, string, len, out, outsize,
size_t outlen = wc_to_mb(codepage, flags, string, inlen, out, outsize,
defchr);
/* We can only be sure we've consumed the whole input if the
* output is not within a multibyte-character-length of the
* end of the buffer! */
if (outlen < outsize && outsize - outlen > MB_LEN_MAX) {
if (outlen_p)
*outlen_p = outlen;
out[outlen] = '\0';
return out;
}
@ -34,5 +39,6 @@ char *dup_wc_to_mb_c(int codepage, int flags, const wchar_t *string, int len,
char *dup_wc_to_mb(int codepage, int flags, const wchar_t *string,
const char *defchr)
{
return dup_wc_to_mb_c(codepage, flags, string, wcslen(string), defchr);
return dup_wc_to_mb_c(codepage, flags, string, wcslen(string),
defchr, NULL);
}

View File

@ -18,6 +18,7 @@ prompts_t *new_prompts(void)
p->callback = NULL;
p->callback_ctx = NULL;
p->ldisc_ptr_to_us = NULL;
p->utf8 = false;
return p;
}

View File

@ -39,7 +39,9 @@ void console_print_error_msg(const char *prefix, const char *msg)
* In PuTTY 0.78 and before, these prompts used the standard I/O
* handles. But this means you can't redirect Plink's actual stdin
* from a sensible data channel without the responses to login prompts
* unwantedly being read from it too.
* unwantedly being read from it too. Also, if you have a real
* console handle then you can read from it in Unicode mode, which is
* an option not available for any old file handle.
*
* However, many versions of PuTTY have worked the old way, so we need
* a method of falling back to it for the sake of whoever's workflow
@ -53,21 +55,30 @@ bool console_set_stdio_prompts(bool newvalue)
return true;
}
static bool conio_use_utf8 = true;
bool console_set_legacy_charset_handling(bool newvalue)
{
conio_use_utf8 = !newvalue;
return true;
}
typedef struct ConsoleIO {
HANDLE hin, hout;
bool need_close_hin, need_close_hout;
bool hin_is_console, hout_is_console;
bool utf8;
BinarySink_IMPLEMENTATION;
} ConsoleIO;
static void console_write(BinarySink *bs, const void *data, size_t len);
static ConsoleIO *conio_setup(void)
static ConsoleIO *conio_setup(bool utf8)
{
ConsoleIO *conio = snew(ConsoleIO);
conio->hin = conio->hout = INVALID_HANDLE_VALUE;
conio->need_close_hin = conio->need_close_hout = false;
conio->utf8 = utf8 && conio_use_utf8;
/*
* First try opening the console itself, so that prompts will go
@ -132,13 +143,56 @@ static void console_write(BinarySink *bs, const void *data, size_t len)
{
ConsoleIO *conio = BinarySink_DOWNCAST(bs, ConsoleIO);
const char *cdata = (const char *)data;
size_t pos = 0;
DWORD nwritten;
if (conio_use_utf8) {
/*
* Convert the UTF-8 input into a wide string.
*/
size_t wlen;
wchar_t *wide = dup_mb_to_wc_c(CP_UTF8, 0, data, len, &wlen);
if (conio->hout_is_console) {
/*
* To write UTF-8 to a console, use WriteConsoleW on the
* wide string we've just made.
*/
size_t pos = 0;
DWORD nwritten;
while (pos < len && WriteFile(conio->hout, cdata+pos, len-pos,
&nwritten, NULL))
pos += nwritten;
while (pos < wlen && WriteConsoleW(conio->hout, wide+pos, wlen-pos,
&nwritten, NULL))
pos += nwritten;
} else {
/*
* To write a string encoded in UTF-8 to any other file
* handle, the best we can do is to convert it into the
* system code page. This will lose some characters, but
* what else can you do?
*/
size_t clen;
char *sys_cp = dup_wc_to_mb_c(CP_ACP, 0, wide, wlen, "?", &clen);
size_t pos = 0;
DWORD nwritten;
while (pos < clen && WriteFile(conio->hout, sys_cp+pos, clen-pos,
&nwritten, NULL))
pos += nwritten;
burnstr(sys_cp);
}
burnwcs(wide);
} else {
/*
* If we're in legacy non-UTF-8 mode, just send the bytes
* we're given to the file handle without trying to be clever.
*/
const char *cdata = (const char *)data;
size_t pos = 0;
DWORD nwritten;
while (pos < len && WriteFile(conio->hout, cdata+pos, len-pos,
&nwritten, NULL))
pos += nwritten;
}
}
static bool console_read_line_to_strbuf(ConsoleIO *conio, bool echo,
@ -166,13 +220,56 @@ static bool console_read_line_to_strbuf(ConsoleIO *conio, bool echo,
goto out;
}
char buf[4096];
DWORD nread;
if (!ReadFile(conio->hin, buf, lenof(buf), &nread, NULL))
goto out;
if (conio_use_utf8) {
wchar_t wbuf[4096];
size_t wlen;
put_data(sb, buf, nread);
smemclr(buf, sizeof(buf));
if (conio->hin_is_console) {
/*
* To read UTF-8 from a console, read wide character data
* via ReadConsoleW, and convert it to UTF-8.
*/
DWORD nread;
if (!ReadConsoleW(conio->hin, wbuf, lenof(wbuf), &nread, NULL))
goto out;
wlen = nread;
} else {
/*
* To read UTF-8 from an ordinary file handle, read it
* as normal bytes and then convert from CP_ACP to
* UTF-8, in the reverse of what we did above for
* output.
*/
char buf[4096];
DWORD nread;
if (!ReadFile(conio->hin, buf, lenof(buf), &nread, NULL))
goto out;
wlen = mb_to_wc(CP_ACP, 0, buf, nread, wbuf, lenof(wbuf));
smemclr(buf, sizeof(buf));
}
/* Allocate the maximum space in the strbuf that might be
* needed for this data */
size_t oldlen = sb->len, maxout = wlen * 4;
void *outptr = strbuf_append(sb, maxout);
size_t newlen = oldlen + wc_to_mb(CP_UTF8, 0, wbuf, wlen,
outptr, maxout, NULL);
strbuf_shrink_to(sb, newlen);
smemclr(wbuf, sizeof(wbuf));
} else {
/*
* If we're in legacy non-UTF-8 mode, just read bytes
* directly from the file handle into the output strbuf.
*/
char buf[4096];
DWORD nread;
if (!ReadFile(conio->hin, buf, lenof(buf), &nread, NULL))
goto out;
put_data(sb, buf, nread);
smemclr(buf, sizeof(buf));
}
}
out:
@ -245,7 +342,7 @@ SeatPromptResult console_confirm_ssh_host_key(
char *keystr, SeatDialogText *text, HelpCtx helpctx,
void (*callback)(void *ctx, SeatPromptResult result), void *ctx)
{
ConsoleIO *conio = conio_setup();
ConsoleIO *conio = conio_setup(false);
const char *prompt = NULL;
SeatPromptResult result;
@ -328,7 +425,7 @@ SeatPromptResult console_confirm_weak_crypto_primitive(
Seat *seat, const char *algtype, const char *algname,
void (*callback)(void *ctx, SeatPromptResult result), void *ctx)
{
ConsoleIO *conio = conio_setup();
ConsoleIO *conio = conio_setup(false);
SeatPromptResult result;
put_fmt(conio, weakcrypto_msg_common_fmt, algtype, algname);
@ -360,7 +457,7 @@ SeatPromptResult console_confirm_weak_cached_hostkey(
Seat *seat, const char *algname, const char *betteralgs,
void (*callback)(void *ctx, SeatPromptResult result), void *ctx)
{
ConsoleIO *conio = conio_setup();
ConsoleIO *conio = conio_setup(false);
SeatPromptResult result;
put_fmt(conio, weakhk_msg_common_fmt, algname, betteralgs);
@ -390,7 +487,7 @@ SeatPromptResult console_confirm_weak_cached_hostkey(
bool is_interactive(void)
{
ConsoleIO *conio = conio_setup();
ConsoleIO *conio = conio_setup(false);
bool toret = conio->hin_is_console;
conio_free(conio);
return toret;
@ -457,7 +554,7 @@ int console_askappend(LogPolicy *lp, Filename *filename,
"The session log file \"%.*s\" already exists.\n"
"Logging will not be enabled.\n";
ConsoleIO *conio = conio_setup();
ConsoleIO *conio = conio_setup(false);
int result;
if (console_batch_mode) {
@ -549,7 +646,7 @@ StripCtrlChars *console_stripctrl_new(
SeatPromptResult console_get_userpass_input(prompts_t *p)
{
ConsoleIO *conio = conio_setup();
ConsoleIO *conio = conio_setup(p->utf8);
SeatPromptResult result;
size_t curr_prompt;