mirror of
https://git.tartarus.org/simon/putty.git
synced 2025-01-09 01:18:00 +00:00
Add UTF-8 support to the new Windows ConsoleIO system.
This allows you to set a flag in conio_setup() which causes the returned ConsoleIO object to interpret all its output as UTF-8, by translating it to UTF-16 and using WriteConsoleW to write it in Unicode. Similarly, input is read using ReadConsoleW and decoded from UTF-16 to UTF-8. This flag is set to false in most places, to avoid making sudden breaking changes. But when we're about to present a prompts_t to the user, it's set from the new 'utf8' flag in that prompt, which in turn is set by the userauth layer in any case where the prompts are going to the server. The idea is that this should be the start of a fix for the long- standing character-set handling bug that strings transmitted during SSH userauth (usernames, passwords, k-i prompts and responses) are all supposed to be in UTF-8, but we've always encoded them in whatever our input system happens to be using, and not done any tidying up on them. We get occasional complaints about this from users whose passwords contain characters that are encoded differently between UTF-8 and their local encoding, but I've never got round to fixing it because it's a large piece of engineering. Indeed, this isn't nearly the end of it. The next step is to add UTF-8 support to all the _other_ ways of presenting a prompts_t, as best we can. Like the previous change to console handling, it seems very likely that this will break someone's workflow. So there's a fallback command-line option '-legacy-charset-handling' to revert to PuTTY's previous behaviour.
This commit is contained in:
parent
80aed96286
commit
f4519b6533
10
cmdline.c
10
cmdline.c
@ -930,6 +930,16 @@ int cmdline_process_param(const char *p, char *value,
|
||||
}
|
||||
}
|
||||
|
||||
if (!strcmp(p, "-legacy-charset-handling") ||
|
||||
!strcmp(p, "-legacy_charset_handling")) {
|
||||
RETURN(1);
|
||||
SAVEABLE(0);
|
||||
if (!console_set_legacy_charset_handling(true)) {
|
||||
cmdline_report_unavailable(p);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef _WINDOWS
|
||||
/*
|
||||
* Cross-tool options only available on Windows.
|
||||
|
7
misc.h
7
misc.h
@ -70,10 +70,11 @@ void strbuf_finalise_agent_query(strbuf *buf);
|
||||
|
||||
/* String-to-Unicode converters that auto-allocate the destination and
|
||||
* work around the rather deficient interface of mb_to_wc. */
|
||||
wchar_t *dup_mb_to_wc_c(int codepage, int flags, const char *string, int len);
|
||||
wchar_t *dup_mb_to_wc_c(int codepage, int flags, const char *string,
|
||||
size_t len, size_t *outlen_p);
|
||||
wchar_t *dup_mb_to_wc(int codepage, int flags, const char *string);
|
||||
char *dup_wc_to_mb_c(int codepage, int flags, const wchar_t *string, int len,
|
||||
const char *defchr);
|
||||
char *dup_wc_to_mb_c(int codepage, int flags, const wchar_t *string,
|
||||
size_t len, const char *defchr, size_t *outlen_p);
|
||||
char *dup_wc_to_mb(int codepage, int flags, const wchar_t *string,
|
||||
const char *defchr);
|
||||
|
||||
|
12
putty.h
12
putty.h
@ -996,6 +996,17 @@ struct prompts_t {
|
||||
* seat_get_userpass_input(); initially NULL */
|
||||
SeatPromptResult spr; /* some implementations need to cache one of these */
|
||||
|
||||
/*
|
||||
* Set this flag to indicate that the caller has encoded the
|
||||
* prompts in UTF-8, and expects the responses to be UTF-8 too.
|
||||
*
|
||||
* Ideally this flag would be unnecessary because it would always
|
||||
* be true, but for legacy reasons, we have to switch over a bit
|
||||
* at a time from the old behaviour, and may never manage to get
|
||||
* rid of it completely.
|
||||
*/
|
||||
bool utf8;
|
||||
|
||||
/*
|
||||
* Callback you can fill in to be notified when all the prompts'
|
||||
* responses are available. After you receive this notification, a
|
||||
@ -2564,6 +2575,7 @@ bool have_ssh_host_key(const char *host, int port, const char *keytype);
|
||||
extern bool console_batch_mode, console_antispoof_prompt;
|
||||
extern bool console_set_batch_mode(bool);
|
||||
extern bool console_set_stdio_prompts(bool);
|
||||
extern bool console_set_legacy_charset_handling(bool);
|
||||
SeatPromptResult console_get_userpass_input(prompts_t *p);
|
||||
bool is_interactive(void);
|
||||
void console_print_error_msg(const char *prefix, const char *msg);
|
||||
|
@ -752,6 +752,7 @@ static void ssh2_userauth_process_queue(PacketProtocolLayer *ppl)
|
||||
*/
|
||||
} else if ((s->username = s->default_username) == NULL) {
|
||||
s->cur_prompt = ssh_ppl_new_prompts(&s->ppl);
|
||||
s->cur_prompt->utf8 = true;
|
||||
s->cur_prompt->to_server = true;
|
||||
s->cur_prompt->from_server = false;
|
||||
s->cur_prompt->name = dupstr("SSH login name");
|
||||
@ -1816,6 +1817,7 @@ static void ssh2_userauth_process_queue(PacketProtocolLayer *ppl)
|
||||
s->ppl.bpp->pls->actx = SSH2_PKTCTX_PASSWORD;
|
||||
|
||||
s->cur_prompt = ssh_ppl_new_prompts(&s->ppl);
|
||||
s->cur_prompt->utf8 = true;
|
||||
s->cur_prompt->to_server = true;
|
||||
s->cur_prompt->from_server = false;
|
||||
s->cur_prompt->name = dupstr("SSH password");
|
||||
@ -1904,6 +1906,7 @@ static void ssh2_userauth_process_queue(PacketProtocolLayer *ppl)
|
||||
prompt = get_string(pktin);
|
||||
|
||||
s->cur_prompt = ssh_ppl_new_prompts(&s->ppl);
|
||||
s->cur_prompt->utf8 = true;
|
||||
s->cur_prompt->to_server = true;
|
||||
s->cur_prompt->from_server = false;
|
||||
s->cur_prompt->name = dupstr("New SSH password");
|
||||
@ -2095,6 +2098,7 @@ static bool ssh2_userauth_ki_setup_prompts(
|
||||
inst = get_string(src);
|
||||
get_string(src); /* skip language tag */
|
||||
s->cur_prompt = ssh_ppl_new_prompts(&s->ppl);
|
||||
s->cur_prompt->utf8 = true;
|
||||
s->cur_prompt->to_server = true;
|
||||
s->cur_prompt->from_server = true;
|
||||
|
||||
|
@ -13,3 +13,8 @@ bool console_set_stdio_prompts(bool newvalue)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
bool console_set_legacy_charset_handling(bool newvalue)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
@ -578,6 +578,12 @@ bool console_set_stdio_prompts(bool newvalue)
|
||||
return false;
|
||||
}
|
||||
|
||||
bool console_set_legacy_charset_handling(bool newvalue)
|
||||
{
|
||||
/* This probably _will_ need to be supported, but isn't yet. */
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* X11-forwarding-related things suitable for console.
|
||||
*/
|
||||
|
@ -2,20 +2,25 @@
|
||||
* dup_mb_to_wc: memory-allocating wrapper on mb_to_wc.
|
||||
*
|
||||
* Also dup_mb_to_wc_c: same but you already know the length of the
|
||||
* string.
|
||||
* string, and you get told the length of the returned wide string.
|
||||
* (But it's still NUL-terminated, for convenience.)
|
||||
*/
|
||||
|
||||
#include "putty.h"
|
||||
#include "misc.h"
|
||||
|
||||
wchar_t *dup_mb_to_wc_c(int codepage, int flags, const char *string, int len)
|
||||
wchar_t *dup_mb_to_wc_c(int codepage, int flags, const char *string,
|
||||
size_t inlen, size_t *outlen_p)
|
||||
{
|
||||
int mult;
|
||||
assert(inlen <= INT_MAX);
|
||||
size_t mult;
|
||||
for (mult = 1 ;; mult++) {
|
||||
wchar_t *ret = snewn(mult*len + 2, wchar_t);
|
||||
int outlen;
|
||||
outlen = mb_to_wc(codepage, flags, string, len, ret, mult*len + 1);
|
||||
if (outlen < mult*len+1) {
|
||||
wchar_t *ret = snewn(mult*inlen + 2, wchar_t);
|
||||
size_t outlen = mb_to_wc(codepage, flags, string, inlen, ret,
|
||||
mult*inlen + 1);
|
||||
if (outlen < mult*inlen+1) {
|
||||
if (outlen_p)
|
||||
*outlen_p = outlen;
|
||||
ret[outlen] = L'\0';
|
||||
return ret;
|
||||
}
|
||||
@ -25,5 +30,5 @@ wchar_t *dup_mb_to_wc_c(int codepage, int flags, const char *string, int len)
|
||||
|
||||
wchar_t *dup_mb_to_wc(int codepage, int flags, const char *string)
|
||||
{
|
||||
return dup_mb_to_wc_c(codepage, flags, string, strlen(string));
|
||||
return dup_mb_to_wc_c(codepage, flags, string, strlen(string), NULL);
|
||||
}
|
||||
|
@ -2,7 +2,8 @@
|
||||
* dup_wc_to_mb: memory-allocating wrapper on wc_to_mb.
|
||||
*
|
||||
* Also dup_wc_to_mb_c: same but you already know the length of the
|
||||
* string.
|
||||
* wide string, and you get told the length of the returned string.
|
||||
* (But it's still NUL-terminated, for convenience.).
|
||||
*/
|
||||
|
||||
#include <wchar.h>
|
||||
@ -10,19 +11,23 @@
|
||||
#include "putty.h"
|
||||
#include "misc.h"
|
||||
|
||||
char *dup_wc_to_mb_c(int codepage, int flags, const wchar_t *string, int len,
|
||||
const char *defchr)
|
||||
char *dup_wc_to_mb_c(int codepage, int flags, const wchar_t *string,
|
||||
size_t inlen, const char *defchr, size_t *outlen_p)
|
||||
{
|
||||
size_t outsize = len+1;
|
||||
assert(inlen <= INT_MAX);
|
||||
|
||||
size_t outsize = inlen+1;
|
||||
char *out = snewn(outsize, char);
|
||||
|
||||
while (true) {
|
||||
size_t outlen = wc_to_mb(codepage, flags, string, len, out, outsize,
|
||||
size_t outlen = wc_to_mb(codepage, flags, string, inlen, out, outsize,
|
||||
defchr);
|
||||
/* We can only be sure we've consumed the whole input if the
|
||||
* output is not within a multibyte-character-length of the
|
||||
* end of the buffer! */
|
||||
if (outlen < outsize && outsize - outlen > MB_LEN_MAX) {
|
||||
if (outlen_p)
|
||||
*outlen_p = outlen;
|
||||
out[outlen] = '\0';
|
||||
return out;
|
||||
}
|
||||
@ -34,5 +39,6 @@ char *dup_wc_to_mb_c(int codepage, int flags, const wchar_t *string, int len,
|
||||
char *dup_wc_to_mb(int codepage, int flags, const wchar_t *string,
|
||||
const char *defchr)
|
||||
{
|
||||
return dup_wc_to_mb_c(codepage, flags, string, wcslen(string), defchr);
|
||||
return dup_wc_to_mb_c(codepage, flags, string, wcslen(string),
|
||||
defchr, NULL);
|
||||
}
|
||||
|
@ -18,6 +18,7 @@ prompts_t *new_prompts(void)
|
||||
p->callback = NULL;
|
||||
p->callback_ctx = NULL;
|
||||
p->ldisc_ptr_to_us = NULL;
|
||||
p->utf8 = false;
|
||||
return p;
|
||||
}
|
||||
|
||||
|
@ -39,7 +39,9 @@ void console_print_error_msg(const char *prefix, const char *msg)
|
||||
* In PuTTY 0.78 and before, these prompts used the standard I/O
|
||||
* handles. But this means you can't redirect Plink's actual stdin
|
||||
* from a sensible data channel without the responses to login prompts
|
||||
* unwantedly being read from it too.
|
||||
* unwantedly being read from it too. Also, if you have a real
|
||||
* console handle then you can read from it in Unicode mode, which is
|
||||
* an option not available for any old file handle.
|
||||
*
|
||||
* However, many versions of PuTTY have worked the old way, so we need
|
||||
* a method of falling back to it for the sake of whoever's workflow
|
||||
@ -53,21 +55,30 @@ bool console_set_stdio_prompts(bool newvalue)
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool conio_use_utf8 = true;
|
||||
bool console_set_legacy_charset_handling(bool newvalue)
|
||||
{
|
||||
conio_use_utf8 = !newvalue;
|
||||
return true;
|
||||
}
|
||||
|
||||
typedef struct ConsoleIO {
|
||||
HANDLE hin, hout;
|
||||
bool need_close_hin, need_close_hout;
|
||||
bool hin_is_console, hout_is_console;
|
||||
bool utf8;
|
||||
BinarySink_IMPLEMENTATION;
|
||||
} ConsoleIO;
|
||||
|
||||
static void console_write(BinarySink *bs, const void *data, size_t len);
|
||||
|
||||
static ConsoleIO *conio_setup(void)
|
||||
static ConsoleIO *conio_setup(bool utf8)
|
||||
{
|
||||
ConsoleIO *conio = snew(ConsoleIO);
|
||||
|
||||
conio->hin = conio->hout = INVALID_HANDLE_VALUE;
|
||||
conio->need_close_hin = conio->need_close_hout = false;
|
||||
conio->utf8 = utf8 && conio_use_utf8;
|
||||
|
||||
/*
|
||||
* First try opening the console itself, so that prompts will go
|
||||
@ -132,13 +143,56 @@ static void console_write(BinarySink *bs, const void *data, size_t len)
|
||||
{
|
||||
ConsoleIO *conio = BinarySink_DOWNCAST(bs, ConsoleIO);
|
||||
|
||||
const char *cdata = (const char *)data;
|
||||
size_t pos = 0;
|
||||
DWORD nwritten;
|
||||
if (conio_use_utf8) {
|
||||
/*
|
||||
* Convert the UTF-8 input into a wide string.
|
||||
*/
|
||||
size_t wlen;
|
||||
wchar_t *wide = dup_mb_to_wc_c(CP_UTF8, 0, data, len, &wlen);
|
||||
if (conio->hout_is_console) {
|
||||
/*
|
||||
* To write UTF-8 to a console, use WriteConsoleW on the
|
||||
* wide string we've just made.
|
||||
*/
|
||||
size_t pos = 0;
|
||||
DWORD nwritten;
|
||||
|
||||
while (pos < len && WriteFile(conio->hout, cdata+pos, len-pos,
|
||||
&nwritten, NULL))
|
||||
pos += nwritten;
|
||||
while (pos < wlen && WriteConsoleW(conio->hout, wide+pos, wlen-pos,
|
||||
&nwritten, NULL))
|
||||
pos += nwritten;
|
||||
} else {
|
||||
/*
|
||||
* To write a string encoded in UTF-8 to any other file
|
||||
* handle, the best we can do is to convert it into the
|
||||
* system code page. This will lose some characters, but
|
||||
* what else can you do?
|
||||
*/
|
||||
size_t clen;
|
||||
char *sys_cp = dup_wc_to_mb_c(CP_ACP, 0, wide, wlen, "?", &clen);
|
||||
size_t pos = 0;
|
||||
DWORD nwritten;
|
||||
|
||||
while (pos < clen && WriteFile(conio->hout, sys_cp+pos, clen-pos,
|
||||
&nwritten, NULL))
|
||||
pos += nwritten;
|
||||
|
||||
burnstr(sys_cp);
|
||||
}
|
||||
|
||||
burnwcs(wide);
|
||||
} else {
|
||||
/*
|
||||
* If we're in legacy non-UTF-8 mode, just send the bytes
|
||||
* we're given to the file handle without trying to be clever.
|
||||
*/
|
||||
const char *cdata = (const char *)data;
|
||||
size_t pos = 0;
|
||||
DWORD nwritten;
|
||||
|
||||
while (pos < len && WriteFile(conio->hout, cdata+pos, len-pos,
|
||||
&nwritten, NULL))
|
||||
pos += nwritten;
|
||||
}
|
||||
}
|
||||
|
||||
static bool console_read_line_to_strbuf(ConsoleIO *conio, bool echo,
|
||||
@ -166,13 +220,56 @@ static bool console_read_line_to_strbuf(ConsoleIO *conio, bool echo,
|
||||
goto out;
|
||||
}
|
||||
|
||||
char buf[4096];
|
||||
DWORD nread;
|
||||
if (!ReadFile(conio->hin, buf, lenof(buf), &nread, NULL))
|
||||
goto out;
|
||||
if (conio_use_utf8) {
|
||||
wchar_t wbuf[4096];
|
||||
size_t wlen;
|
||||
|
||||
put_data(sb, buf, nread);
|
||||
smemclr(buf, sizeof(buf));
|
||||
if (conio->hin_is_console) {
|
||||
/*
|
||||
* To read UTF-8 from a console, read wide character data
|
||||
* via ReadConsoleW, and convert it to UTF-8.
|
||||
*/
|
||||
DWORD nread;
|
||||
if (!ReadConsoleW(conio->hin, wbuf, lenof(wbuf), &nread, NULL))
|
||||
goto out;
|
||||
wlen = nread;
|
||||
} else {
|
||||
/*
|
||||
* To read UTF-8 from an ordinary file handle, read it
|
||||
* as normal bytes and then convert from CP_ACP to
|
||||
* UTF-8, in the reverse of what we did above for
|
||||
* output.
|
||||
*/
|
||||
char buf[4096];
|
||||
DWORD nread;
|
||||
if (!ReadFile(conio->hin, buf, lenof(buf), &nread, NULL))
|
||||
goto out;
|
||||
|
||||
wlen = mb_to_wc(CP_ACP, 0, buf, nread, wbuf, lenof(wbuf));
|
||||
smemclr(buf, sizeof(buf));
|
||||
}
|
||||
|
||||
/* Allocate the maximum space in the strbuf that might be
|
||||
* needed for this data */
|
||||
size_t oldlen = sb->len, maxout = wlen * 4;
|
||||
void *outptr = strbuf_append(sb, maxout);
|
||||
size_t newlen = oldlen + wc_to_mb(CP_UTF8, 0, wbuf, wlen,
|
||||
outptr, maxout, NULL);
|
||||
strbuf_shrink_to(sb, newlen);
|
||||
smemclr(wbuf, sizeof(wbuf));
|
||||
} else {
|
||||
/*
|
||||
* If we're in legacy non-UTF-8 mode, just read bytes
|
||||
* directly from the file handle into the output strbuf.
|
||||
*/
|
||||
char buf[4096];
|
||||
DWORD nread;
|
||||
if (!ReadFile(conio->hin, buf, lenof(buf), &nread, NULL))
|
||||
goto out;
|
||||
|
||||
put_data(sb, buf, nread);
|
||||
smemclr(buf, sizeof(buf));
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
@ -245,7 +342,7 @@ SeatPromptResult console_confirm_ssh_host_key(
|
||||
char *keystr, SeatDialogText *text, HelpCtx helpctx,
|
||||
void (*callback)(void *ctx, SeatPromptResult result), void *ctx)
|
||||
{
|
||||
ConsoleIO *conio = conio_setup();
|
||||
ConsoleIO *conio = conio_setup(false);
|
||||
const char *prompt = NULL;
|
||||
SeatPromptResult result;
|
||||
|
||||
@ -328,7 +425,7 @@ SeatPromptResult console_confirm_weak_crypto_primitive(
|
||||
Seat *seat, const char *algtype, const char *algname,
|
||||
void (*callback)(void *ctx, SeatPromptResult result), void *ctx)
|
||||
{
|
||||
ConsoleIO *conio = conio_setup();
|
||||
ConsoleIO *conio = conio_setup(false);
|
||||
SeatPromptResult result;
|
||||
|
||||
put_fmt(conio, weakcrypto_msg_common_fmt, algtype, algname);
|
||||
@ -360,7 +457,7 @@ SeatPromptResult console_confirm_weak_cached_hostkey(
|
||||
Seat *seat, const char *algname, const char *betteralgs,
|
||||
void (*callback)(void *ctx, SeatPromptResult result), void *ctx)
|
||||
{
|
||||
ConsoleIO *conio = conio_setup();
|
||||
ConsoleIO *conio = conio_setup(false);
|
||||
SeatPromptResult result;
|
||||
|
||||
put_fmt(conio, weakhk_msg_common_fmt, algname, betteralgs);
|
||||
@ -390,7 +487,7 @@ SeatPromptResult console_confirm_weak_cached_hostkey(
|
||||
|
||||
bool is_interactive(void)
|
||||
{
|
||||
ConsoleIO *conio = conio_setup();
|
||||
ConsoleIO *conio = conio_setup(false);
|
||||
bool toret = conio->hin_is_console;
|
||||
conio_free(conio);
|
||||
return toret;
|
||||
@ -457,7 +554,7 @@ int console_askappend(LogPolicy *lp, Filename *filename,
|
||||
"The session log file \"%.*s\" already exists.\n"
|
||||
"Logging will not be enabled.\n";
|
||||
|
||||
ConsoleIO *conio = conio_setup();
|
||||
ConsoleIO *conio = conio_setup(false);
|
||||
int result;
|
||||
|
||||
if (console_batch_mode) {
|
||||
@ -549,7 +646,7 @@ StripCtrlChars *console_stripctrl_new(
|
||||
|
||||
SeatPromptResult console_get_userpass_input(prompts_t *p)
|
||||
{
|
||||
ConsoleIO *conio = conio_setup();
|
||||
ConsoleIO *conio = conio_setup(p->utf8);
|
||||
SeatPromptResult result;
|
||||
size_t curr_prompt;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user