1
0
mirror of https://git.tartarus.org/simon/putty.git synced 2025-01-25 01:02:24 +00:00
putty-source/windows/handle-socket.c

360 lines
10 KiB
C
Raw Normal View History

/*
* General mechanism for wrapping up reading/writing of Windows
* HANDLEs into a PuTTY Socket abstraction.
*/
#include <stdio.h>
#include <assert.h>
#include <limits.h>
#include "tree234.h"
#include "putty.h"
#include "network.h"
typedef struct HandleSocket {
HANDLE send_H, recv_H, stderr_H;
struct handle *send_h, *recv_h, *stderr_h;
/*
* Freezing one of these sockets is a slightly fiddly business,
* because the reads from the handle are happening in a separate
* thread as blocking system calls and so once one is in progress
* it can't sensibly be interrupted. Hence, after the user tries
* to freeze one of these sockets, it's unavoidable that we may
* receive one more load of data before we manage to get
* winhandl.c to stop reading.
*/
enum {
UNFROZEN, /* reading as normal */
FREEZING, /* have been set to frozen but winhandl is still reading */
FROZEN, /* really frozen - winhandl has been throttled */
THAWING /* we're gradually releasing our remaining data */
} frozen;
/* We buffer data here if we receive it from winhandl while frozen. */
bufchain inputdata;
/* Handle logging proxy error messages from stderr_H, if we have one. */
ProxyStderrBuf psb;
Convert a lot of 'int' variables to 'bool'. My normal habit these days, in new code, is to treat int and bool as _almost_ completely separate types. I'm still willing to use C's implicit test for zero on an integer (e.g. 'if (!blob.len)' is fine, no need to spell it out as blob.len != 0), but generally, if a variable is going to be conceptually a boolean, I like to declare it bool and assign to it using 'true' or 'false' rather than 0 or 1. PuTTY is an exception, because it predates the C99 bool, and I've stuck to its existing coding style even when adding new code to it. But it's been annoying me more and more, so now that I've decided C99 bool is an acceptable thing to require from our toolchain in the first place, here's a quite thorough trawl through the source doing 'boolification'. Many variables and function parameters are now typed as bool rather than int; many assignments of 0 or 1 to those variables are now spelled 'true' or 'false'. I managed this thorough conversion with the help of a custom clang plugin that I wrote to trawl the AST and apply heuristics to point out where things might want changing. So I've even managed to do a decent job on parts of the code I haven't looked at in years! To make the plugin's work easier, I pushed platform front ends generally in the direction of using standard 'bool' in preference to platform-specific boolean types like Windows BOOL or GTK's gboolean; I've left the platform booleans in places they _have_ to be for the platform APIs to work right, but variables only used by my own code have been converted wherever I found them. In a few places there are int values that look very like booleans in _most_ of the places they're used, but have a rarely-used third value, or a distinction between different nonzero values that most users don't care about. In these cases, I've _removed_ uses of 'true' and 'false' for the return values, to emphasise that there's something more subtle going on than a simple boolean answer: - the 'multisel' field in dialog.h's list box structure, for which the GTK front end in particular recognises a difference between 1 and 2 but nearly everything else treats as boolean - the 'urgent' parameter to plug_receive, where 1 vs 2 tells you something about the specific location of the urgent pointer, but most clients only care about 0 vs 'something nonzero' - the return value of wc_match, where -1 indicates a syntax error in the wildcard. - the return values from SSH-1 RSA-key loading functions, which use -1 for 'wrong passphrase' and 0 for all other failures (so any caller which already knows it's not loading an _encrypted private_ key can treat them as boolean) - term->esc_query, and the 'query' parameter in toggle_mode in terminal.c, which _usually_ hold 0 for ESC[123h or 1 for ESC[?123h, but can also hold -1 for some other intervening character that we don't support. In a few places there's an integer that I haven't turned into a bool even though it really _can_ only take values 0 or 1 (and, as above, tried to make the call sites consistent in not calling those values true and false), on the grounds that I thought it would make it more confusing to imply that the 0 value was in some sense 'negative' or bad and the 1 positive or good: - the return value of plug_accepting uses the POSIXish convention of 0=success and nonzero=error; I think if I made it bool then I'd also want to reverse its sense, and that's a job for a separate piece of work. - the 'screen' parameter to lineptr() in terminal.c, where 0 and 1 represent the default and alternate screens. There's no obvious reason why one of those should be considered 'true' or 'positive' or 'success' - they're just indices - so I've left it as int. ssh_scp_recv had particularly confusing semantics for its previous int return value: its call sites used '<= 0' to check for error, but it never actually returned a negative number, just 0 or 1. Now the function and its call sites agree that it's a bool. In a couple of places I've renamed variables called 'ret', because I don't like that name any more - it's unclear whether it means the return value (in preparation) for the _containing_ function or the return value received from a subroutine call, and occasionally I've accidentally used the same variable for both and introduced a bug. So where one of those got in my way, I've renamed it to 'toret' or 'retd' (the latter short for 'returned') in line with my usual modern practice, but I haven't done a thorough job of finding all of them. Finally, one amusing side effect of doing this is that I've had to separate quite a few chained assignments. It used to be perfectly fine to write 'a = b = c = TRUE' when a,b,c were int and TRUE was just a the 'true' defined by stdbool.h, that idiom provokes a warning from gcc: 'suggest parentheses around assignment used as truth value'!
2018-11-02 19:23:19 +00:00
bool defer_close, deferred_close; /* in case of re-entrance */
char *error;
SockAddr *addr;
int port;
Plug *plug;
Socket sock;
} HandleSocket;
static size_t handle_gotdata(
struct handle *h, const void *data, size_t len, int err)
{
HandleSocket *hs = (HandleSocket *)handle_get_privdata(h);
if (err) {
plug_closing(hs->plug, "Read error from handle", 0, 0);
return 0;
} else if (len == 0) {
plug_closing(hs->plug, NULL, 0, 0);
return 0;
} else {
assert(hs->frozen != FROZEN && hs->frozen != THAWING);
if (hs->frozen == FREEZING) {
/*
* If we've received data while this socket is supposed to
* be frozen (because the read winhandl.c started before
* sk_set_frozen was called has now returned) then buffer
* the data for when we unfreeze.
*/
bufchain_add(&hs->inputdata, data, len);
hs->frozen = FROZEN;
/*
* And return a very large backlog, to prevent further
* data arriving from winhandl until we unfreeze.
*/
return INT_MAX;
} else {
plug_receive(hs->plug, 0, data, len);
return 0;
}
}
}
static size_t handle_stderr(
struct handle *h, const void *data, size_t len, int err)
{
HandleSocket *hs = (HandleSocket *)handle_get_privdata(h);
if (!err && len > 0)
log_proxy_stderr(hs->plug, &hs->psb, data, len);
return 0;
}
static void handle_sentdata(struct handle *h, size_t new_backlog, int err)
{
HandleSocket *hs = (HandleSocket *)handle_get_privdata(h);
if (err) {
plug_closing(hs->plug, win_strerror(err), err, 0);
return;
}
plug_sent(hs->plug, new_backlog);
}
static Plug *sk_handle_plug(Socket *s, Plug *p)
{
HandleSocket *hs = container_of(s, HandleSocket, sock);
Plug *ret = hs->plug;
if (p)
hs->plug = p;
return ret;
}
static void sk_handle_close(Socket *s)
{
HandleSocket *hs = container_of(s, HandleSocket, sock);
if (hs->defer_close) {
hs->deferred_close = true;
return;
}
handle_free(hs->send_h);
handle_free(hs->recv_h);
CloseHandle(hs->send_H);
if (hs->recv_H != hs->send_H)
CloseHandle(hs->recv_H);
bufchain_clear(&hs->inputdata);
if (hs->addr)
sk_addr_free(hs->addr);
delete_callbacks_for_context(hs);
sfree(hs);
}
static size_t sk_handle_write(Socket *s, const void *data, size_t len)
{
HandleSocket *hs = container_of(s, HandleSocket, sock);
return handle_write(hs->send_h, data, len);
}
static size_t sk_handle_write_oob(Socket *s, const void *data, size_t len)
{
/*
* oob data is treated as inband; nasty, but nothing really
* better we can do
*/
return sk_handle_write(s, data, len);
}
static void sk_handle_write_eof(Socket *s)
{
HandleSocket *hs = container_of(s, HandleSocket, sock);
handle_write_eof(hs->send_h);
}
static void handle_socket_unfreeze(void *hsv)
{
HandleSocket *hs = (HandleSocket *)hsv;
/*
* If we've been put into a state other than THAWING since the
* last callback, then we're done.
*/
if (hs->frozen != THAWING)
return;
/*
* Get some of the data we've buffered.
*/
ptrlen data = bufchain_prefix(&hs->inputdata);
assert(data.len > 0);
/*
* Hand it off to the plug. Be careful of re-entrance - that might
* have the effect of trying to close this socket.
*/
hs->defer_close = true;
plug_receive(hs->plug, 0, data.ptr, data.len);
bufchain_consume(&hs->inputdata, data.len);
hs->defer_close = false;
if (hs->deferred_close) {
sk_handle_close(&hs->sock);
return;
}
if (bufchain_size(&hs->inputdata) > 0) {
/*
* If there's still data in our buffer, stay in THAWING state,
* and reschedule ourself.
*/
queue_toplevel_callback(handle_socket_unfreeze, hs);
} else {
/*
* Otherwise, we've successfully thawed!
*/
hs->frozen = UNFROZEN;
handle_unthrottle(hs->recv_h, 0);
}
}
Convert a lot of 'int' variables to 'bool'. My normal habit these days, in new code, is to treat int and bool as _almost_ completely separate types. I'm still willing to use C's implicit test for zero on an integer (e.g. 'if (!blob.len)' is fine, no need to spell it out as blob.len != 0), but generally, if a variable is going to be conceptually a boolean, I like to declare it bool and assign to it using 'true' or 'false' rather than 0 or 1. PuTTY is an exception, because it predates the C99 bool, and I've stuck to its existing coding style even when adding new code to it. But it's been annoying me more and more, so now that I've decided C99 bool is an acceptable thing to require from our toolchain in the first place, here's a quite thorough trawl through the source doing 'boolification'. Many variables and function parameters are now typed as bool rather than int; many assignments of 0 or 1 to those variables are now spelled 'true' or 'false'. I managed this thorough conversion with the help of a custom clang plugin that I wrote to trawl the AST and apply heuristics to point out where things might want changing. So I've even managed to do a decent job on parts of the code I haven't looked at in years! To make the plugin's work easier, I pushed platform front ends generally in the direction of using standard 'bool' in preference to platform-specific boolean types like Windows BOOL or GTK's gboolean; I've left the platform booleans in places they _have_ to be for the platform APIs to work right, but variables only used by my own code have been converted wherever I found them. In a few places there are int values that look very like booleans in _most_ of the places they're used, but have a rarely-used third value, or a distinction between different nonzero values that most users don't care about. In these cases, I've _removed_ uses of 'true' and 'false' for the return values, to emphasise that there's something more subtle going on than a simple boolean answer: - the 'multisel' field in dialog.h's list box structure, for which the GTK front end in particular recognises a difference between 1 and 2 but nearly everything else treats as boolean - the 'urgent' parameter to plug_receive, where 1 vs 2 tells you something about the specific location of the urgent pointer, but most clients only care about 0 vs 'something nonzero' - the return value of wc_match, where -1 indicates a syntax error in the wildcard. - the return values from SSH-1 RSA-key loading functions, which use -1 for 'wrong passphrase' and 0 for all other failures (so any caller which already knows it's not loading an _encrypted private_ key can treat them as boolean) - term->esc_query, and the 'query' parameter in toggle_mode in terminal.c, which _usually_ hold 0 for ESC[123h or 1 for ESC[?123h, but can also hold -1 for some other intervening character that we don't support. In a few places there's an integer that I haven't turned into a bool even though it really _can_ only take values 0 or 1 (and, as above, tried to make the call sites consistent in not calling those values true and false), on the grounds that I thought it would make it more confusing to imply that the 0 value was in some sense 'negative' or bad and the 1 positive or good: - the return value of plug_accepting uses the POSIXish convention of 0=success and nonzero=error; I think if I made it bool then I'd also want to reverse its sense, and that's a job for a separate piece of work. - the 'screen' parameter to lineptr() in terminal.c, where 0 and 1 represent the default and alternate screens. There's no obvious reason why one of those should be considered 'true' or 'positive' or 'success' - they're just indices - so I've left it as int. ssh_scp_recv had particularly confusing semantics for its previous int return value: its call sites used '<= 0' to check for error, but it never actually returned a negative number, just 0 or 1. Now the function and its call sites agree that it's a bool. In a couple of places I've renamed variables called 'ret', because I don't like that name any more - it's unclear whether it means the return value (in preparation) for the _containing_ function or the return value received from a subroutine call, and occasionally I've accidentally used the same variable for both and introduced a bug. So where one of those got in my way, I've renamed it to 'toret' or 'retd' (the latter short for 'returned') in line with my usual modern practice, but I haven't done a thorough job of finding all of them. Finally, one amusing side effect of doing this is that I've had to separate quite a few chained assignments. It used to be perfectly fine to write 'a = b = c = TRUE' when a,b,c were int and TRUE was just a the 'true' defined by stdbool.h, that idiom provokes a warning from gcc: 'suggest parentheses around assignment used as truth value'!
2018-11-02 19:23:19 +00:00
static void sk_handle_set_frozen(Socket *s, bool is_frozen)
{
HandleSocket *hs = container_of(s, HandleSocket, sock);
if (is_frozen) {
switch (hs->frozen) {
case FREEZING:
case FROZEN:
return; /* nothing to do */
case THAWING:
/*
* We were in the middle of emptying our bufchain, and got
* frozen again. In that case, winhandl.c is already
* throttled, so just return to FROZEN state. The toplevel
* callback will notice and disable itself.
*/
hs->frozen = FROZEN;
break;
case UNFROZEN:
/*
* The normal case. Go to FREEZING, and expect one more
* load of data from winhandl if we're unlucky.
*/
hs->frozen = FREEZING;
break;
}
} else {
switch (hs->frozen) {
case UNFROZEN:
case THAWING:
return; /* nothing to do */
case FREEZING:
/*
* If winhandl didn't send us any data throughout the time
* we were frozen, then we'll still be in this state and
* can just unfreeze in the trivial way.
*/
assert(bufchain_size(&hs->inputdata) == 0);
hs->frozen = UNFROZEN;
break;
case FROZEN:
/*
* If we have buffered data, go to THAWING and start
* releasing it in top-level callbacks.
*/
hs->frozen = THAWING;
queue_toplevel_callback(handle_socket_unfreeze, hs);
}
}
}
static const char *sk_handle_socket_error(Socket *s)
{
HandleSocket *hs = container_of(s, HandleSocket, sock);
return hs->error;
}
static SocketPeerInfo *sk_handle_peer_info(Socket *s)
{
HandleSocket *hs = container_of(s, HandleSocket, sock);
ULONG pid;
static HMODULE kernel32_module;
DECL_WINDOWS_FUNCTION(static, BOOL, GetNamedPipeClientProcessId,
(HANDLE, PULONG));
if (!kernel32_module) {
kernel32_module = load_system32_dll("kernel32.dll");
Replace mkfiles.pl with a CMake build system. This brings various concrete advantages over the previous system: - consistent support for out-of-tree builds on all platforms - more thorough support for Visual Studio IDE project files - support for Ninja-based builds, which is particularly useful on Windows where the alternative nmake has no parallel option - a really simple set of build instructions that work the same way on all the major platforms (look how much shorter README is!) - better decoupling of the project configuration from the toolchain configuration, so that my Windows cross-building doesn't need (much) special treatment in CMakeLists.txt - configure-time tests on Windows as well as Linux, so that a lot of ad-hoc #ifdefs second-guessing a particular feature's presence from the compiler version can now be replaced by tests of the feature itself Also some longer-term software-engineering advantages: - other people have actually heard of CMake, so they'll be able to produce patches to the new build setup more easily - unlike the old mkfiles.pl, CMake is not my personal problem to maintain - most importantly, mkfiles.pl was just a horrible pile of unmaintainable cruft, which even I found it painful to make changes to or to use, and desperately needed throwing in the bin. I've already thrown away all the variants of it I had in other projects of mine, and was only delaying this one so we could make the 0.75 release branch first. This change comes with a noticeable build-level restructuring. The previous Recipe worked by compiling every object file exactly once, and then making each executable by linking a precisely specified subset of the same object files. But in CMake, that's not the natural way to work - if you write the obvious command that puts the same source file into two executable targets, CMake generates a makefile that compiles it once per target. That can be an advantage, because it gives you the freedom to compile it differently in each case (e.g. with a #define telling it which program it's part of). But in a project that has many executable targets and had carefully contrived to _never_ need to build any module more than once, all it does is bloat the build time pointlessly! To avoid slowing down the build by a large factor, I've put most of the modules of the code base into a collection of static libraries organised vaguely thematically (SSH, other backends, crypto, network, ...). That means all those modules can still be compiled just once each, because once each library is built it's reused unchanged for all the executable targets. One upside of this library-based structure is that now I don't have to manually specify exactly which objects go into which programs any more - it's enough to specify which libraries are needed, and the linker will figure out the fine detail automatically. So there's less maintenance to do in CMakeLists.txt when the source code changes. But that reorganisation also adds fragility, because of the trad Unix linker semantics of walking along the library list once each, so that cyclic references between your libraries will provoke link errors. The current setup builds successfully, but I suspect it only just manages it. (In particular, I've found that MinGW is the most finicky on this score of the Windows compilers I've tried building with. So I've included a MinGW test build in the new-look Buildscr, because otherwise I think there'd be a significant risk of introducing MinGW-only build failures due to library search order, which wasn't a risk in the previous library-free build organisation.) In the longer term I hope to be able to reduce the risk of that, via gradual reorganisation (in particular, breaking up too-monolithic modules, to reduce the risk of knock-on references when you included a module for function A and it also contains function B with an unsatisfied dependency you didn't really need). Ideally I want to reach a state in which the libraries all have sensibly described purposes, a clearly documented (partial) order in which they're permitted to depend on each other, and a specification of what stubs you have to put where if you're leaving one of them out (e.g. nocrypto) and what callbacks you have to define in your non-library objects to satisfy dependencies from things low in the stack (e.g. out_of_memory()). One thing that's gone completely missing in this migration, unfortunately, is the unfinished MacOS port linked against Quartz GTK. That's because it turned out that I can't currently build it myself, on my own Mac: my previous installation of GTK had bit-rotted as a side effect of an Xcode upgrade, and I haven't yet been able to persuade jhbuild to make me a new one. So I can't even build the MacOS port with the _old_ makefiles, and hence, I have no way of checking that the new ones also work. I hope to bring that port back to life at some point, but I don't want it to block the rest of this change.
2021-04-10 14:21:11 +00:00
#if !HAVE_GETNAMEDPIPECLIENTPROCESSID
/* For older Visual Studio, and MinGW too (at least as of
* Ubuntu 16.04), this function isn't available in the header
* files to type-check. Ditto the toolchain I use for
* Coveritying the Windows code. */
GET_WINDOWS_FUNCTION_NO_TYPECHECK(
kernel32_module, GetNamedPipeClientProcessId);
#else
GET_WINDOWS_FUNCTION(
kernel32_module, GetNamedPipeClientProcessId);
#endif
}
/*
* Of course, not all handles managed by this module will be
* server ends of named pipes, but if they are, then it's useful
* to log what we can find out about the client end.
*/
if (p_GetNamedPipeClientProcessId &&
p_GetNamedPipeClientProcessId(hs->send_H, &pid)) {
SocketPeerInfo *pi = snew(SocketPeerInfo);
pi->addressfamily = ADDRTYPE_LOCAL;
pi->addr_text = NULL;
pi->port = -1;
pi->log_text = dupprintf("process id %lu", (unsigned long)pid);
return pi;
}
return NULL;
}
static const SocketVtable HandleSocket_sockvt = {
.plug = sk_handle_plug,
.close = sk_handle_close,
.write = sk_handle_write,
.write_oob = sk_handle_write_oob,
.write_eof = sk_handle_write_eof,
.set_frozen = sk_handle_set_frozen,
.socket_error = sk_handle_socket_error,
.peer_info = sk_handle_peer_info,
};
static void sk_handle_connect_success_callback(void *ctx)
{
HandleSocket *hs = (HandleSocket *)ctx;
plug_log(hs->plug, PLUGLOG_CONNECT_SUCCESS, hs->addr, hs->port, NULL, 0);
}
Socket *make_handle_socket(HANDLE send_H, HANDLE recv_H, HANDLE stderr_H,
SockAddr *addr, int port, Plug *plug,
bool overlapped)
{
HandleSocket *hs;
int flags = (overlapped ? HANDLE_FLAG_OVERLAPPED : 0);
hs = snew(HandleSocket);
hs->sock.vt = &HandleSocket_sockvt;
hs->addr = addr;
hs->port = port;
hs->plug = plug;
hs->error = NULL;
hs->frozen = UNFROZEN;
bufchain_init(&hs->inputdata);
psb_init(&hs->psb);
hs->recv_H = recv_H;
hs->recv_h = handle_input_new(hs->recv_H, handle_gotdata, hs, flags);
hs->send_H = send_H;
hs->send_h = handle_output_new(hs->send_H, handle_sentdata, hs, flags);
hs->stderr_H = stderr_H;
if (hs->stderr_H)
hs->stderr_h = handle_input_new(hs->stderr_H, handle_stderr,
hs, flags);
hs->defer_close = hs->deferred_close = false;
queue_toplevel_callback(sk_handle_connect_success_callback, hs);
return &hs->sock;
}