1
0
mirror of https://git.tartarus.org/simon/putty.git synced 2025-01-09 09:27:59 +00:00

StripCtrlChars: option to provide a target Terminal.

If you use the new stripctrl_new_term() to construct a StripCtrlChars
instead of the existing stripctrl_new(), then the resulting object
will align itself with the character-set configuration of the Terminal
object you point it at. (In fact, it'll reuse the same actual
translation code, courtesy of the last few refactoring commits.) So it
will interpret things as control characters precisely if that Terminal
would also have done so.

The previous locale-based sanitisation is appropriate if you're
sending the sanitised output to an OS terminal device managed outside
this process - the LC_CTYPE setting has the best chance of knowing how
that terminal device will interpret a byte stream. But I want to start
using the same sanitisation system for data intended for PuTTY's own
internal terminal emulator, in which case there's no reason why
LC_CTYPE should be expected to match that terminal's configuration,
and no reason to need it to either since we can check the internal
terminal configuration directly.

One small bodge: stripctrl_new_term() is actually a macro, which
passes in the function pointer term_translate() to the underlying real
constructor. That's just so that console-only tools can link in
stripctrl.c without acquiring a dependency on terminal.c (similarly to
how we pass random_read in to the mp_random functions).
This commit is contained in:
Simon Tatham 2019-03-04 20:58:44 +00:00
parent 511eea450a
commit e74790003c
3 changed files with 134 additions and 17 deletions

10
Recipe
View File

@ -241,7 +241,7 @@ CFLAGS += -DWINVER=0x0500 -D_WIN32_WINDOWS=0x0410 -D_WIN32_WINNT=0x0500
CONF = conf marshal
# Terminal emulator and its (platform-independent) dependencies.
TERMINAL = terminal wcwidth ldiscucs logging tree234 minibidi
TERMINAL = terminal stripctrl wcwidth ldiscucs logging tree234 minibidi
+ config dialog CONF
# GUI front end and terminal emulator (putty, puttytel).
@ -281,11 +281,11 @@ WINSSH = SSH winnoise wincapi winpgntc wingss winshare winnps winnpc
UXSSH = SSH uxnoise uxagentc uxgss uxshare
# SFTP implementation (pscp, psftp).
SFTP = sftp sftpcommon logging cmdline stripctrl
SFTP = sftp sftpcommon logging cmdline
# Miscellaneous objects appearing in all the utilities, or all the
# network ones, or the Unix or Windows subsets of those in turn.
MISC = misc utils marshal memory
MISC = misc utils marshal memory stripctrl
MISCNETCOMMON = timing callback MISC version tree234 CONF
MISCNET = MISCNETCOMMON be_misc settings proxy
WINMISC = MISCNET winstore winnet winhandl cmdline windefs winmisc winproxy
@ -333,7 +333,7 @@ U_BE_NOSSH = be_nos_s uxser nocproxy
putty : [G] GUITERM NONSSH WINSSH W_BE_ALL WINMISC winx11 putty.res LIBS
puttytel : [G] GUITERM NONSSH W_BE_NOSSH WINMISC puttytel.res nogss LIBS
plink : [C] winplink wincons NONSSH WINSSH W_BE_ALL logging WINMISC
+ winx11 plink.res winnojmp sessprep stripctrl noterm LIBS
+ winx11 plink.res winnojmp sessprep noterm LIBS
pscp : [C] pscp winsftp wincons WINSSH BE_SSH SFTP wildcard WINMISC
+ pscp.res winnojmp LIBS
psftp : [C] psftp winsftp wincons WINSSH BE_SSH SFTP wildcard WINMISC
@ -361,7 +361,7 @@ puttytel : [X] GTKTERM uxmisc misc ldisc settings uxsel U_BE_NOSSH
+ nogss utils memory GTKMAIN
plink : [U] uxplink uxcons NONSSH UXSSH U_BE_ALL logging UXMISC uxsignal
+ ux_x11 noterm uxnogtk sessprep cmdline stripctrl
+ ux_x11 noterm uxnogtk sessprep cmdline
PUTTYGEN_UNIX = sshrsag sshdssg sshprime sshdes ARITH sshmd5 version sshprng
+ sshrand uxnoise sshsha MISC sshrsa sshdss uxcons uxstore uxmisc

6
misc.h
View File

@ -379,6 +379,12 @@ struct StripCtrlChars {
};
StripCtrlChars *stripctrl_new(
BinarySink *bs_out, bool permit_cr, wchar_t substitution);
StripCtrlChars *stripctrl_new_term_fn(
BinarySink *bs_out, bool permit_cr, wchar_t substitution,
Terminal *term, unsigned long (*translate)(
Terminal *, term_utf8_decode *, unsigned char));
#define stripctrl_new_term(bs, cr, sub, term) \
stripctrl_new_term_fn(bs, cr, sub, term, term_translate)
void stripctrl_free(StripCtrlChars *sanpub);
char *stripctrl_string_ptrlen(ptrlen str);
static inline char *stripctrl_string(const char *str)

View File

@ -11,7 +11,8 @@
#include <wchar.h>
#include <wctype.h>
#include "defs.h"
#include "putty.h"
#include "terminal.h"
#include "misc.h"
#include "marshal.h"
@ -27,15 +28,22 @@ struct StripCtrlCharsImpl {
char buf[SCC_BUFSIZE];
size_t buflen;
Terminal *term;
bool last_term_utf;
struct term_utf8_decode utf8;
unsigned long (*translate)(Terminal *, term_utf8_decode *, unsigned char);
BinarySink *bs_out;
StripCtrlChars public;
};
static void stripctrl_BinarySink_write(
static void stripctrl_locale_BinarySink_write(
BinarySink *bs, const void *vp, size_t len);
static void stripctrl_term_BinarySink_write(
BinarySink *bs, const void *vp, size_t len);
StripCtrlChars *stripctrl_new(
static StripCtrlCharsImpl *stripctrl_new_common(
BinarySink *bs_out, bool permit_cr, wchar_t substitution)
{
StripCtrlCharsImpl *scc = snew(StripCtrlCharsImpl);
@ -43,7 +51,28 @@ StripCtrlChars *stripctrl_new(
scc->bs_out = bs_out;
scc->permit_cr = permit_cr;
scc->substitution = substitution;
BinarySink_INIT(&scc->public, stripctrl_BinarySink_write);
return scc;
}
StripCtrlChars *stripctrl_new(
BinarySink *bs_out, bool permit_cr, wchar_t substitution)
{
StripCtrlCharsImpl *scc = stripctrl_new_common(
bs_out, permit_cr, substitution);
BinarySink_INIT(&scc->public, stripctrl_locale_BinarySink_write);
return &scc->public;
}
StripCtrlChars *stripctrl_new_term_fn(
BinarySink *bs_out, bool permit_cr, wchar_t substitution,
Terminal *term, unsigned long (*translate)(
Terminal *, term_utf8_decode *, unsigned char))
{
StripCtrlCharsImpl *scc = stripctrl_new_common(
bs_out, permit_cr, substitution);
scc->term = term;
scc->translate = translate;
BinarySink_INIT(&scc->public, stripctrl_term_BinarySink_write);
return &scc->public;
}
@ -55,9 +84,14 @@ void stripctrl_free(StripCtrlChars *sccpub)
sfree(scc);
}
static inline void stripctrl_put_wc(StripCtrlCharsImpl *scc, wchar_t wc)
static inline bool stripctrl_ctrlchar_ok(StripCtrlCharsImpl *scc, wchar_t wc)
{
if (wc == L'\n' || (wc == L'\r' && scc->permit_cr) || iswprint(wc)) {
return wc == L'\n' || (wc == L'\r' && scc->permit_cr);
}
static inline void stripctrl_locale_put_wc(StripCtrlCharsImpl *scc, wchar_t wc)
{
if (iswprint(wc) || stripctrl_ctrlchar_ok(scc, wc)) {
/* Printable character, or one we're going to let through anyway. */
} else if (scc->substitution) {
wc = scc->substitution;
@ -72,7 +106,54 @@ static inline void stripctrl_put_wc(StripCtrlCharsImpl *scc, wchar_t wc)
put_data(scc->bs_out, outbuf, produced);
}
static inline size_t stripctrl_try_consume(
static inline void stripctrl_term_put_wc(
StripCtrlCharsImpl *scc, unsigned long wc)
{
if (!(wc & ~0x9F)) {
/* This is something the terminal interprets as a control
* character. */
if (!stripctrl_ctrlchar_ok(scc, wc)) {
if (!scc->substitution)
return;
else
wc = scc->substitution;
}
if (wc == '\012') {
/* Precede \n with \r, because our terminal will not
* generally be in the ONLCR mode where it assumes that
* internally, and any \r on input has been stripped
* out. */
put_datapl(scc->bs_out, PTRLEN_LITERAL("\r"));
}
}
char outbuf[6];
size_t produced;
/*
* The Terminal implementation encodes 7-bit ASCII characters in
* UTF-8 mode, and all printing characters in non-UTF-8 (i.e.
* single-byte character set) mode, as values in the surrogate
* range (a conveniently unused piece of space in this context)
* whose low byte is the original 1-byte representation of the
* character.
*/
if ((wc - 0xD800) < (0xE000 - 0xD800))
wc &= 0xFF;
if (in_utf(scc->term)) {
produced = encode_utf8(outbuf, wc);
} else {
outbuf[0] = wc;
produced = 1;
}
if (produced > 0)
put_data(scc->bs_out, outbuf, produced);
}
static inline size_t stripctrl_locale_try_consume(
StripCtrlCharsImpl *scc, const char *p, size_t len)
{
wchar_t wc;
@ -115,7 +196,7 @@ static inline size_t stripctrl_try_consume(
* some way other than a single zero byte - then probably lots
* of other things will have gone wrong before we get here!)
*/
stripctrl_put_wc(scc, L'\0');
stripctrl_locale_put_wc(scc, L'\0');
return 1;
}
@ -123,11 +204,11 @@ static inline size_t stripctrl_try_consume(
* Otherwise, this is the easy case: consumed > 0, and we've eaten
* a valid multibyte character.
*/
stripctrl_put_wc(scc, wc);
stripctrl_locale_put_wc(scc, wc);
return consumed;
}
static void stripctrl_BinarySink_write(
static void stripctrl_locale_BinarySink_write(
BinarySink *bs, const void *vp, size_t len)
{
StripCtrlChars *sccpub = BinarySink_DOWNCAST(bs, StripCtrlChars);
@ -148,7 +229,7 @@ static void stripctrl_BinarySink_write(
to_copy = len;
memcpy(scc->buf + scc->buflen, p, to_copy);
size_t consumed = stripctrl_try_consume(
size_t consumed = stripctrl_locale_try_consume(
scc, scc->buf, scc->buflen + to_copy);
if (consumed >= scc->buflen) {
@ -203,7 +284,7 @@ static void stripctrl_BinarySink_write(
* Now charge along the main string.
*/
while (len > 0) {
size_t consumed = stripctrl_try_consume(scc, p, len);
size_t consumed = stripctrl_locale_try_consume(scc, p, len);
if (consumed == 0)
break;
assert(consumed <= len);
@ -223,6 +304,36 @@ static void stripctrl_BinarySink_write(
setlocale(LC_CTYPE, previous_locale);
}
static void stripctrl_term_BinarySink_write(
BinarySink *bs, const void *vp, size_t len)
{
StripCtrlChars *sccpub = BinarySink_DOWNCAST(bs, StripCtrlChars);
StripCtrlCharsImpl *scc =
container_of(sccpub, StripCtrlCharsImpl, public);
bool utf = in_utf(scc->term);
if (utf != scc->last_term_utf) {
scc->last_term_utf = utf;
scc->utf8.state = 0;
}
for (const unsigned char *p = (const unsigned char *)vp;
len > 0; len--, p++) {
unsigned long t = scc->translate(scc->term, &scc->utf8, *p);
if (t == UCSTRUNCATED) {
stripctrl_term_put_wc(scc, 0xFFFD);
/* go round again */
t = scc->translate(scc->term, &scc->utf8, *p);
}
if (t == UCSINCOMPLETE)
continue;
if (t == UCSINVALID)
t = 0xFFFD;
stripctrl_term_put_wc(scc, t);
}
}
char *stripctrl_string_ptrlen(ptrlen str)
{
strbuf *out = strbuf_new();