diff --git a/cmdline.c b/cmdline.c index 645b6054..716a41b3 100644 --- a/cmdline.c +++ b/cmdline.c @@ -930,6 +930,16 @@ int cmdline_process_param(const char *p, char *value, } } + if (!strcmp(p, "-legacy-charset-handling") || + !strcmp(p, "-legacy_charset_handling")) { + RETURN(1); + SAVEABLE(0); + if (!console_set_legacy_charset_handling(true)) { + cmdline_report_unavailable(p); + return ret; + } + } + #ifdef _WINDOWS /* * Cross-tool options only available on Windows. diff --git a/misc.h b/misc.h index 46dded1b..e47f19f3 100644 --- a/misc.h +++ b/misc.h @@ -70,10 +70,11 @@ void strbuf_finalise_agent_query(strbuf *buf); /* String-to-Unicode converters that auto-allocate the destination and * work around the rather deficient interface of mb_to_wc. */ -wchar_t *dup_mb_to_wc_c(int codepage, int flags, const char *string, int len); +wchar_t *dup_mb_to_wc_c(int codepage, int flags, const char *string, + size_t len, size_t *outlen_p); wchar_t *dup_mb_to_wc(int codepage, int flags, const char *string); -char *dup_wc_to_mb_c(int codepage, int flags, const wchar_t *string, int len, - const char *defchr); +char *dup_wc_to_mb_c(int codepage, int flags, const wchar_t *string, + size_t len, const char *defchr, size_t *outlen_p); char *dup_wc_to_mb(int codepage, int flags, const wchar_t *string, const char *defchr); diff --git a/putty.h b/putty.h index 101a8a29..1477a71d 100644 --- a/putty.h +++ b/putty.h @@ -996,6 +996,17 @@ struct prompts_t { * seat_get_userpass_input(); initially NULL */ SeatPromptResult spr; /* some implementations need to cache one of these */ + /* + * Set this flag to indicate that the caller has encoded the + * prompts in UTF-8, and expects the responses to be UTF-8 too. + * + * Ideally this flag would be unnecessary because it would always + * be true, but for legacy reasons, we have to switch over a bit + * at a time from the old behaviour, and may never manage to get + * rid of it completely. + */ + bool utf8; + /* * Callback you can fill in to be notified when all the prompts' * responses are available. After you receive this notification, a @@ -2564,6 +2575,7 @@ bool have_ssh_host_key(const char *host, int port, const char *keytype); extern bool console_batch_mode, console_antispoof_prompt; extern bool console_set_batch_mode(bool); extern bool console_set_stdio_prompts(bool); +extern bool console_set_legacy_charset_handling(bool); SeatPromptResult console_get_userpass_input(prompts_t *p); bool is_interactive(void); void console_print_error_msg(const char *prefix, const char *msg); diff --git a/ssh/userauth2-client.c b/ssh/userauth2-client.c index b7e36371..d86c0043 100644 --- a/ssh/userauth2-client.c +++ b/ssh/userauth2-client.c @@ -752,6 +752,7 @@ static void ssh2_userauth_process_queue(PacketProtocolLayer *ppl) */ } else if ((s->username = s->default_username) == NULL) { s->cur_prompt = ssh_ppl_new_prompts(&s->ppl); + s->cur_prompt->utf8 = true; s->cur_prompt->to_server = true; s->cur_prompt->from_server = false; s->cur_prompt->name = dupstr("SSH login name"); @@ -1816,6 +1817,7 @@ static void ssh2_userauth_process_queue(PacketProtocolLayer *ppl) s->ppl.bpp->pls->actx = SSH2_PKTCTX_PASSWORD; s->cur_prompt = ssh_ppl_new_prompts(&s->ppl); + s->cur_prompt->utf8 = true; s->cur_prompt->to_server = true; s->cur_prompt->from_server = false; s->cur_prompt->name = dupstr("SSH password"); @@ -1904,6 +1906,7 @@ static void ssh2_userauth_process_queue(PacketProtocolLayer *ppl) prompt = get_string(pktin); s->cur_prompt = ssh_ppl_new_prompts(&s->ppl); + s->cur_prompt->utf8 = true; s->cur_prompt->to_server = true; s->cur_prompt->from_server = false; s->cur_prompt->name = dupstr("New SSH password"); @@ -2095,6 +2098,7 @@ static bool ssh2_userauth_ki_setup_prompts( inst = get_string(src); get_string(src); /* skip language tag */ s->cur_prompt = ssh_ppl_new_prompts(&s->ppl); + s->cur_prompt->utf8 = true; s->cur_prompt->to_server = true; s->cur_prompt->from_server = true; diff --git a/stubs/no-console.c b/stubs/no-console.c index 580cfd70..5dde9c89 100644 --- a/stubs/no-console.c +++ b/stubs/no-console.c @@ -13,3 +13,8 @@ bool console_set_stdio_prompts(bool newvalue) { return false; } + +bool console_set_legacy_charset_handling(bool newvalue) +{ + return false; +} diff --git a/unix/console.c b/unix/console.c index 3024fafa..00d3f6ac 100644 --- a/unix/console.c +++ b/unix/console.c @@ -578,6 +578,12 @@ bool console_set_stdio_prompts(bool newvalue) return false; } +bool console_set_legacy_charset_handling(bool newvalue) +{ + /* This probably _will_ need to be supported, but isn't yet. */ + return false; +} + /* * X11-forwarding-related things suitable for console. */ diff --git a/utils/dup_mb_to_wc.c b/utils/dup_mb_to_wc.c index c3f17aba..f6c48975 100644 --- a/utils/dup_mb_to_wc.c +++ b/utils/dup_mb_to_wc.c @@ -2,20 +2,25 @@ * dup_mb_to_wc: memory-allocating wrapper on mb_to_wc. * * Also dup_mb_to_wc_c: same but you already know the length of the - * string. + * string, and you get told the length of the returned wide string. + * (But it's still NUL-terminated, for convenience.) */ #include "putty.h" #include "misc.h" -wchar_t *dup_mb_to_wc_c(int codepage, int flags, const char *string, int len) +wchar_t *dup_mb_to_wc_c(int codepage, int flags, const char *string, + size_t inlen, size_t *outlen_p) { - int mult; + assert(inlen <= INT_MAX); + size_t mult; for (mult = 1 ;; mult++) { - wchar_t *ret = snewn(mult*len + 2, wchar_t); - int outlen; - outlen = mb_to_wc(codepage, flags, string, len, ret, mult*len + 1); - if (outlen < mult*len+1) { + wchar_t *ret = snewn(mult*inlen + 2, wchar_t); + size_t outlen = mb_to_wc(codepage, flags, string, inlen, ret, + mult*inlen + 1); + if (outlen < mult*inlen+1) { + if (outlen_p) + *outlen_p = outlen; ret[outlen] = L'\0'; return ret; } @@ -25,5 +30,5 @@ wchar_t *dup_mb_to_wc_c(int codepage, int flags, const char *string, int len) wchar_t *dup_mb_to_wc(int codepage, int flags, const char *string) { - return dup_mb_to_wc_c(codepage, flags, string, strlen(string)); + return dup_mb_to_wc_c(codepage, flags, string, strlen(string), NULL); } diff --git a/utils/dup_wc_to_mb.c b/utils/dup_wc_to_mb.c index 36088196..4a55803c 100644 --- a/utils/dup_wc_to_mb.c +++ b/utils/dup_wc_to_mb.c @@ -2,7 +2,8 @@ * dup_wc_to_mb: memory-allocating wrapper on wc_to_mb. * * Also dup_wc_to_mb_c: same but you already know the length of the - * string. + * wide string, and you get told the length of the returned string. + * (But it's still NUL-terminated, for convenience.). */ #include @@ -10,19 +11,23 @@ #include "putty.h" #include "misc.h" -char *dup_wc_to_mb_c(int codepage, int flags, const wchar_t *string, int len, - const char *defchr) +char *dup_wc_to_mb_c(int codepage, int flags, const wchar_t *string, + size_t inlen, const char *defchr, size_t *outlen_p) { - size_t outsize = len+1; + assert(inlen <= INT_MAX); + + size_t outsize = inlen+1; char *out = snewn(outsize, char); while (true) { - size_t outlen = wc_to_mb(codepage, flags, string, len, out, outsize, + size_t outlen = wc_to_mb(codepage, flags, string, inlen, out, outsize, defchr); /* We can only be sure we've consumed the whole input if the * output is not within a multibyte-character-length of the * end of the buffer! */ if (outlen < outsize && outsize - outlen > MB_LEN_MAX) { + if (outlen_p) + *outlen_p = outlen; out[outlen] = '\0'; return out; } @@ -34,5 +39,6 @@ char *dup_wc_to_mb_c(int codepage, int flags, const wchar_t *string, int len, char *dup_wc_to_mb(int codepage, int flags, const wchar_t *string, const char *defchr) { - return dup_wc_to_mb_c(codepage, flags, string, wcslen(string), defchr); + return dup_wc_to_mb_c(codepage, flags, string, wcslen(string), + defchr, NULL); } diff --git a/utils/prompts.c b/utils/prompts.c index d0823334..2b70133a 100644 --- a/utils/prompts.c +++ b/utils/prompts.c @@ -18,6 +18,7 @@ prompts_t *new_prompts(void) p->callback = NULL; p->callback_ctx = NULL; p->ldisc_ptr_to_us = NULL; + p->utf8 = false; return p; } diff --git a/windows/console.c b/windows/console.c index 8355ebfa..886d1a69 100644 --- a/windows/console.c +++ b/windows/console.c @@ -39,7 +39,9 @@ void console_print_error_msg(const char *prefix, const char *msg) * In PuTTY 0.78 and before, these prompts used the standard I/O * handles. But this means you can't redirect Plink's actual stdin * from a sensible data channel without the responses to login prompts - * unwantedly being read from it too. + * unwantedly being read from it too. Also, if you have a real + * console handle then you can read from it in Unicode mode, which is + * an option not available for any old file handle. * * However, many versions of PuTTY have worked the old way, so we need * a method of falling back to it for the sake of whoever's workflow @@ -53,21 +55,30 @@ bool console_set_stdio_prompts(bool newvalue) return true; } +static bool conio_use_utf8 = true; +bool console_set_legacy_charset_handling(bool newvalue) +{ + conio_use_utf8 = !newvalue; + return true; +} + typedef struct ConsoleIO { HANDLE hin, hout; bool need_close_hin, need_close_hout; bool hin_is_console, hout_is_console; + bool utf8; BinarySink_IMPLEMENTATION; } ConsoleIO; static void console_write(BinarySink *bs, const void *data, size_t len); -static ConsoleIO *conio_setup(void) +static ConsoleIO *conio_setup(bool utf8) { ConsoleIO *conio = snew(ConsoleIO); conio->hin = conio->hout = INVALID_HANDLE_VALUE; conio->need_close_hin = conio->need_close_hout = false; + conio->utf8 = utf8 && conio_use_utf8; /* * First try opening the console itself, so that prompts will go @@ -132,13 +143,56 @@ static void console_write(BinarySink *bs, const void *data, size_t len) { ConsoleIO *conio = BinarySink_DOWNCAST(bs, ConsoleIO); - const char *cdata = (const char *)data; - size_t pos = 0; - DWORD nwritten; + if (conio_use_utf8) { + /* + * Convert the UTF-8 input into a wide string. + */ + size_t wlen; + wchar_t *wide = dup_mb_to_wc_c(CP_UTF8, 0, data, len, &wlen); + if (conio->hout_is_console) { + /* + * To write UTF-8 to a console, use WriteConsoleW on the + * wide string we've just made. + */ + size_t pos = 0; + DWORD nwritten; - while (pos < len && WriteFile(conio->hout, cdata+pos, len-pos, - &nwritten, NULL)) - pos += nwritten; + while (pos < wlen && WriteConsoleW(conio->hout, wide+pos, wlen-pos, + &nwritten, NULL)) + pos += nwritten; + } else { + /* + * To write a string encoded in UTF-8 to any other file + * handle, the best we can do is to convert it into the + * system code page. This will lose some characters, but + * what else can you do? + */ + size_t clen; + char *sys_cp = dup_wc_to_mb_c(CP_ACP, 0, wide, wlen, "?", &clen); + size_t pos = 0; + DWORD nwritten; + + while (pos < clen && WriteFile(conio->hout, sys_cp+pos, clen-pos, + &nwritten, NULL)) + pos += nwritten; + + burnstr(sys_cp); + } + + burnwcs(wide); + } else { + /* + * If we're in legacy non-UTF-8 mode, just send the bytes + * we're given to the file handle without trying to be clever. + */ + const char *cdata = (const char *)data; + size_t pos = 0; + DWORD nwritten; + + while (pos < len && WriteFile(conio->hout, cdata+pos, len-pos, + &nwritten, NULL)) + pos += nwritten; + } } static bool console_read_line_to_strbuf(ConsoleIO *conio, bool echo, @@ -166,13 +220,56 @@ static bool console_read_line_to_strbuf(ConsoleIO *conio, bool echo, goto out; } - char buf[4096]; - DWORD nread; - if (!ReadFile(conio->hin, buf, lenof(buf), &nread, NULL)) - goto out; + if (conio_use_utf8) { + wchar_t wbuf[4096]; + size_t wlen; - put_data(sb, buf, nread); - smemclr(buf, sizeof(buf)); + if (conio->hin_is_console) { + /* + * To read UTF-8 from a console, read wide character data + * via ReadConsoleW, and convert it to UTF-8. + */ + DWORD nread; + if (!ReadConsoleW(conio->hin, wbuf, lenof(wbuf), &nread, NULL)) + goto out; + wlen = nread; + } else { + /* + * To read UTF-8 from an ordinary file handle, read it + * as normal bytes and then convert from CP_ACP to + * UTF-8, in the reverse of what we did above for + * output. + */ + char buf[4096]; + DWORD nread; + if (!ReadFile(conio->hin, buf, lenof(buf), &nread, NULL)) + goto out; + + wlen = mb_to_wc(CP_ACP, 0, buf, nread, wbuf, lenof(wbuf)); + smemclr(buf, sizeof(buf)); + } + + /* Allocate the maximum space in the strbuf that might be + * needed for this data */ + size_t oldlen = sb->len, maxout = wlen * 4; + void *outptr = strbuf_append(sb, maxout); + size_t newlen = oldlen + wc_to_mb(CP_UTF8, 0, wbuf, wlen, + outptr, maxout, NULL); + strbuf_shrink_to(sb, newlen); + smemclr(wbuf, sizeof(wbuf)); + } else { + /* + * If we're in legacy non-UTF-8 mode, just read bytes + * directly from the file handle into the output strbuf. + */ + char buf[4096]; + DWORD nread; + if (!ReadFile(conio->hin, buf, lenof(buf), &nread, NULL)) + goto out; + + put_data(sb, buf, nread); + smemclr(buf, sizeof(buf)); + } } out: @@ -245,7 +342,7 @@ SeatPromptResult console_confirm_ssh_host_key( char *keystr, SeatDialogText *text, HelpCtx helpctx, void (*callback)(void *ctx, SeatPromptResult result), void *ctx) { - ConsoleIO *conio = conio_setup(); + ConsoleIO *conio = conio_setup(false); const char *prompt = NULL; SeatPromptResult result; @@ -328,7 +425,7 @@ SeatPromptResult console_confirm_weak_crypto_primitive( Seat *seat, const char *algtype, const char *algname, void (*callback)(void *ctx, SeatPromptResult result), void *ctx) { - ConsoleIO *conio = conio_setup(); + ConsoleIO *conio = conio_setup(false); SeatPromptResult result; put_fmt(conio, weakcrypto_msg_common_fmt, algtype, algname); @@ -360,7 +457,7 @@ SeatPromptResult console_confirm_weak_cached_hostkey( Seat *seat, const char *algname, const char *betteralgs, void (*callback)(void *ctx, SeatPromptResult result), void *ctx) { - ConsoleIO *conio = conio_setup(); + ConsoleIO *conio = conio_setup(false); SeatPromptResult result; put_fmt(conio, weakhk_msg_common_fmt, algname, betteralgs); @@ -390,7 +487,7 @@ SeatPromptResult console_confirm_weak_cached_hostkey( bool is_interactive(void) { - ConsoleIO *conio = conio_setup(); + ConsoleIO *conio = conio_setup(false); bool toret = conio->hin_is_console; conio_free(conio); return toret; @@ -457,7 +554,7 @@ int console_askappend(LogPolicy *lp, Filename *filename, "The session log file \"%.*s\" already exists.\n" "Logging will not be enabled.\n"; - ConsoleIO *conio = conio_setup(); + ConsoleIO *conio = conio_setup(false); int result; if (console_batch_mode) { @@ -549,7 +646,7 @@ StripCtrlChars *console_stripctrl_new( SeatPromptResult console_get_userpass_input(prompts_t *p) { - ConsoleIO *conio = conio_setup(); + ConsoleIO *conio = conio_setup(p->utf8); SeatPromptResult result; size_t curr_prompt;