From e74790003c48e44cd62bd67997144832ceb2c52e Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Mon, 4 Mar 2019 20:58:44 +0000 Subject: [PATCH] StripCtrlChars: option to provide a target Terminal. If you use the new stripctrl_new_term() to construct a StripCtrlChars instead of the existing stripctrl_new(), then the resulting object will align itself with the character-set configuration of the Terminal object you point it at. (In fact, it'll reuse the same actual translation code, courtesy of the last few refactoring commits.) So it will interpret things as control characters precisely if that Terminal would also have done so. The previous locale-based sanitisation is appropriate if you're sending the sanitised output to an OS terminal device managed outside this process - the LC_CTYPE setting has the best chance of knowing how that terminal device will interpret a byte stream. But I want to start using the same sanitisation system for data intended for PuTTY's own internal terminal emulator, in which case there's no reason why LC_CTYPE should be expected to match that terminal's configuration, and no reason to need it to either since we can check the internal terminal configuration directly. One small bodge: stripctrl_new_term() is actually a macro, which passes in the function pointer term_translate() to the underlying real constructor. That's just so that console-only tools can link in stripctrl.c without acquiring a dependency on terminal.c (similarly to how we pass random_read in to the mp_random functions). --- Recipe | 10 ++-- misc.h | 6 +++ stripctrl.c | 135 +++++++++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 134 insertions(+), 17 deletions(-) diff --git a/Recipe b/Recipe index 90f27c59..12c7acd3 100644 --- a/Recipe +++ b/Recipe @@ -241,7 +241,7 @@ CFLAGS += -DWINVER=0x0500 -D_WIN32_WINDOWS=0x0410 -D_WIN32_WINNT=0x0500 CONF = conf marshal # Terminal emulator and its (platform-independent) dependencies. -TERMINAL = terminal wcwidth ldiscucs logging tree234 minibidi +TERMINAL = terminal stripctrl wcwidth ldiscucs logging tree234 minibidi + config dialog CONF # GUI front end and terminal emulator (putty, puttytel). @@ -281,11 +281,11 @@ WINSSH = SSH winnoise wincapi winpgntc wingss winshare winnps winnpc UXSSH = SSH uxnoise uxagentc uxgss uxshare # SFTP implementation (pscp, psftp). -SFTP = sftp sftpcommon logging cmdline stripctrl +SFTP = sftp sftpcommon logging cmdline # Miscellaneous objects appearing in all the utilities, or all the # network ones, or the Unix or Windows subsets of those in turn. -MISC = misc utils marshal memory +MISC = misc utils marshal memory stripctrl MISCNETCOMMON = timing callback MISC version tree234 CONF MISCNET = MISCNETCOMMON be_misc settings proxy WINMISC = MISCNET winstore winnet winhandl cmdline windefs winmisc winproxy @@ -333,7 +333,7 @@ U_BE_NOSSH = be_nos_s uxser nocproxy putty : [G] GUITERM NONSSH WINSSH W_BE_ALL WINMISC winx11 putty.res LIBS puttytel : [G] GUITERM NONSSH W_BE_NOSSH WINMISC puttytel.res nogss LIBS plink : [C] winplink wincons NONSSH WINSSH W_BE_ALL logging WINMISC - + winx11 plink.res winnojmp sessprep stripctrl noterm LIBS + + winx11 plink.res winnojmp sessprep noterm LIBS pscp : [C] pscp winsftp wincons WINSSH BE_SSH SFTP wildcard WINMISC + pscp.res winnojmp LIBS psftp : [C] psftp winsftp wincons WINSSH BE_SSH SFTP wildcard WINMISC @@ -361,7 +361,7 @@ puttytel : [X] GTKTERM uxmisc misc ldisc settings uxsel U_BE_NOSSH + nogss utils memory GTKMAIN plink : [U] uxplink uxcons NONSSH UXSSH U_BE_ALL logging UXMISC uxsignal - + ux_x11 noterm uxnogtk sessprep cmdline stripctrl + + ux_x11 noterm uxnogtk sessprep cmdline PUTTYGEN_UNIX = sshrsag sshdssg sshprime sshdes ARITH sshmd5 version sshprng + sshrand uxnoise sshsha MISC sshrsa sshdss uxcons uxstore uxmisc diff --git a/misc.h b/misc.h index d301e2ca..cf6a2b09 100644 --- a/misc.h +++ b/misc.h @@ -379,6 +379,12 @@ struct StripCtrlChars { }; StripCtrlChars *stripctrl_new( BinarySink *bs_out, bool permit_cr, wchar_t substitution); +StripCtrlChars *stripctrl_new_term_fn( + BinarySink *bs_out, bool permit_cr, wchar_t substitution, + Terminal *term, unsigned long (*translate)( + Terminal *, term_utf8_decode *, unsigned char)); +#define stripctrl_new_term(bs, cr, sub, term) \ + stripctrl_new_term_fn(bs, cr, sub, term, term_translate) void stripctrl_free(StripCtrlChars *sanpub); char *stripctrl_string_ptrlen(ptrlen str); static inline char *stripctrl_string(const char *str) diff --git a/stripctrl.c b/stripctrl.c index 47cfedef..d9bed001 100644 --- a/stripctrl.c +++ b/stripctrl.c @@ -11,7 +11,8 @@ #include #include -#include "defs.h" +#include "putty.h" +#include "terminal.h" #include "misc.h" #include "marshal.h" @@ -27,15 +28,22 @@ struct StripCtrlCharsImpl { char buf[SCC_BUFSIZE]; size_t buflen; + Terminal *term; + bool last_term_utf; + struct term_utf8_decode utf8; + unsigned long (*translate)(Terminal *, term_utf8_decode *, unsigned char); + BinarySink *bs_out; StripCtrlChars public; }; -static void stripctrl_BinarySink_write( +static void stripctrl_locale_BinarySink_write( + BinarySink *bs, const void *vp, size_t len); +static void stripctrl_term_BinarySink_write( BinarySink *bs, const void *vp, size_t len); -StripCtrlChars *stripctrl_new( +static StripCtrlCharsImpl *stripctrl_new_common( BinarySink *bs_out, bool permit_cr, wchar_t substitution) { StripCtrlCharsImpl *scc = snew(StripCtrlCharsImpl); @@ -43,7 +51,28 @@ StripCtrlChars *stripctrl_new( scc->bs_out = bs_out; scc->permit_cr = permit_cr; scc->substitution = substitution; - BinarySink_INIT(&scc->public, stripctrl_BinarySink_write); + return scc; +} + +StripCtrlChars *stripctrl_new( + BinarySink *bs_out, bool permit_cr, wchar_t substitution) +{ + StripCtrlCharsImpl *scc = stripctrl_new_common( + bs_out, permit_cr, substitution); + BinarySink_INIT(&scc->public, stripctrl_locale_BinarySink_write); + return &scc->public; +} + +StripCtrlChars *stripctrl_new_term_fn( + BinarySink *bs_out, bool permit_cr, wchar_t substitution, + Terminal *term, unsigned long (*translate)( + Terminal *, term_utf8_decode *, unsigned char)) +{ + StripCtrlCharsImpl *scc = stripctrl_new_common( + bs_out, permit_cr, substitution); + scc->term = term; + scc->translate = translate; + BinarySink_INIT(&scc->public, stripctrl_term_BinarySink_write); return &scc->public; } @@ -55,9 +84,14 @@ void stripctrl_free(StripCtrlChars *sccpub) sfree(scc); } -static inline void stripctrl_put_wc(StripCtrlCharsImpl *scc, wchar_t wc) +static inline bool stripctrl_ctrlchar_ok(StripCtrlCharsImpl *scc, wchar_t wc) { - if (wc == L'\n' || (wc == L'\r' && scc->permit_cr) || iswprint(wc)) { + return wc == L'\n' || (wc == L'\r' && scc->permit_cr); +} + +static inline void stripctrl_locale_put_wc(StripCtrlCharsImpl *scc, wchar_t wc) +{ + if (iswprint(wc) || stripctrl_ctrlchar_ok(scc, wc)) { /* Printable character, or one we're going to let through anyway. */ } else if (scc->substitution) { wc = scc->substitution; @@ -72,7 +106,54 @@ static inline void stripctrl_put_wc(StripCtrlCharsImpl *scc, wchar_t wc) put_data(scc->bs_out, outbuf, produced); } -static inline size_t stripctrl_try_consume( +static inline void stripctrl_term_put_wc( + StripCtrlCharsImpl *scc, unsigned long wc) +{ + if (!(wc & ~0x9F)) { + /* This is something the terminal interprets as a control + * character. */ + if (!stripctrl_ctrlchar_ok(scc, wc)) { + if (!scc->substitution) + return; + else + wc = scc->substitution; + } + + if (wc == '\012') { + /* Precede \n with \r, because our terminal will not + * generally be in the ONLCR mode where it assumes that + * internally, and any \r on input has been stripped + * out. */ + put_datapl(scc->bs_out, PTRLEN_LITERAL("\r")); + } + } + + char outbuf[6]; + size_t produced; + + /* + * The Terminal implementation encodes 7-bit ASCII characters in + * UTF-8 mode, and all printing characters in non-UTF-8 (i.e. + * single-byte character set) mode, as values in the surrogate + * range (a conveniently unused piece of space in this context) + * whose low byte is the original 1-byte representation of the + * character. + */ + if ((wc - 0xD800) < (0xE000 - 0xD800)) + wc &= 0xFF; + + if (in_utf(scc->term)) { + produced = encode_utf8(outbuf, wc); + } else { + outbuf[0] = wc; + produced = 1; + } + + if (produced > 0) + put_data(scc->bs_out, outbuf, produced); +} + +static inline size_t stripctrl_locale_try_consume( StripCtrlCharsImpl *scc, const char *p, size_t len) { wchar_t wc; @@ -115,7 +196,7 @@ static inline size_t stripctrl_try_consume( * some way other than a single zero byte - then probably lots * of other things will have gone wrong before we get here!) */ - stripctrl_put_wc(scc, L'\0'); + stripctrl_locale_put_wc(scc, L'\0'); return 1; } @@ -123,11 +204,11 @@ static inline size_t stripctrl_try_consume( * Otherwise, this is the easy case: consumed > 0, and we've eaten * a valid multibyte character. */ - stripctrl_put_wc(scc, wc); + stripctrl_locale_put_wc(scc, wc); return consumed; } -static void stripctrl_BinarySink_write( +static void stripctrl_locale_BinarySink_write( BinarySink *bs, const void *vp, size_t len) { StripCtrlChars *sccpub = BinarySink_DOWNCAST(bs, StripCtrlChars); @@ -148,7 +229,7 @@ static void stripctrl_BinarySink_write( to_copy = len; memcpy(scc->buf + scc->buflen, p, to_copy); - size_t consumed = stripctrl_try_consume( + size_t consumed = stripctrl_locale_try_consume( scc, scc->buf, scc->buflen + to_copy); if (consumed >= scc->buflen) { @@ -203,7 +284,7 @@ static void stripctrl_BinarySink_write( * Now charge along the main string. */ while (len > 0) { - size_t consumed = stripctrl_try_consume(scc, p, len); + size_t consumed = stripctrl_locale_try_consume(scc, p, len); if (consumed == 0) break; assert(consumed <= len); @@ -223,6 +304,36 @@ static void stripctrl_BinarySink_write( setlocale(LC_CTYPE, previous_locale); } +static void stripctrl_term_BinarySink_write( + BinarySink *bs, const void *vp, size_t len) +{ + StripCtrlChars *sccpub = BinarySink_DOWNCAST(bs, StripCtrlChars); + StripCtrlCharsImpl *scc = + container_of(sccpub, StripCtrlCharsImpl, public); + + bool utf = in_utf(scc->term); + if (utf != scc->last_term_utf) { + scc->last_term_utf = utf; + scc->utf8.state = 0; + } + + for (const unsigned char *p = (const unsigned char *)vp; + len > 0; len--, p++) { + unsigned long t = scc->translate(scc->term, &scc->utf8, *p); + if (t == UCSTRUNCATED) { + stripctrl_term_put_wc(scc, 0xFFFD); + /* go round again */ + t = scc->translate(scc->term, &scc->utf8, *p); + } + if (t == UCSINCOMPLETE) + continue; + if (t == UCSINVALID) + t = 0xFFFD; + + stripctrl_term_put_wc(scc, t); + } +} + char *stripctrl_string_ptrlen(ptrlen str) { strbuf *out = strbuf_new();