1
0
mirror of https://git.tartarus.org/simon/putty.git synced 2025-01-09 17:38:00 +00:00
putty-source/misc.h

553 lines
20 KiB
C
Raw Normal View History

/*
* Header for miscellaneous helper functions, mostly defined in the
* utils subdirectory.
*/
#ifndef PUTTY_MISC_H
#define PUTTY_MISC_H
#include "defs.h"
#include "puttymem.h"
New centralised binary-data marshalling system. I've finally got tired of all the code throughout PuTTY that repeats the same logic about how to format the SSH binary primitives like uint32, string, mpint. We've got reasonably organised code in ssh.c that appends things like that to 'struct Packet'; something similar in sftp.c which repeats a lot of the work; utility functions in various places to format an mpint to feed to one or another hash function; and no end of totally ad-hoc stuff in functions like public key blob formatters which actually have to _count up_ the size of data painstakingly, then malloc exactly that much and mess about with PUT_32BIT. It's time to bring all of that into one place, and stop repeating myself in error-prone ways everywhere. The new marshal.h defines a system in which I centralise all the actual marshalling functions, and then layer a touch of C macro trickery on top to allow me to (look as if I) pass a wide range of different types to those functions, as long as the target type has been set up in the right way to have a write() function. This commit adds the new header and source file, and sets up some general centralised types (strbuf and the various hash-function contexts like SHA_State), but doesn't use the new calls for anything yet. (I've also renamed some internal functions in import.c which were using the same names that I've just defined macros over. That won't last long - those functions are going to go away soon, so the changed names are strictly temporary.)
2018-05-24 08:17:13 +00:00
#include "marshal.h"
#include <stdio.h> /* for FILE * */
#include <stdarg.h> /* for va_list */
#include <stdlib.h> /* for abort */
#include <time.h> /* for struct tm */
#include <limits.h> /* for INT_MAX/MIN */
#include <assert.h> /* for assert (obviously) */
unsigned long parse_blocksize(const char *bs);
char ctrlparse(char *s, char **next);
size_t host_strcspn(const char *s, const char *set);
char *host_strchr(const char *s, int c);
char *host_strrchr(const char *s, int c);
char *host_strduptrim(const char *s);
char *dupstr(const char *s);
wchar_t *dupwcs(const wchar_t *s);
char *dupcat_fn(const char *s1, ...);
#define dupcat(...) dupcat_fn(__VA_ARGS__, (const char *)NULL)
char *dupprintf(const char *fmt, ...) PRINTF_LIKE(1, 2);
char *dupvprintf(const char *fmt, va_list ap);
void burnstr(char *string);
void burnwcs(wchar_t *string);
/*
* The visible part of a strbuf structure. There's a surrounding
* implementation struct in strbuf.c, which isn't exposed to client
* code.
*/
struct strbuf {
char *s;
unsigned char *u;
size_t len;
New centralised binary-data marshalling system. I've finally got tired of all the code throughout PuTTY that repeats the same logic about how to format the SSH binary primitives like uint32, string, mpint. We've got reasonably organised code in ssh.c that appends things like that to 'struct Packet'; something similar in sftp.c which repeats a lot of the work; utility functions in various places to format an mpint to feed to one or another hash function; and no end of totally ad-hoc stuff in functions like public key blob formatters which actually have to _count up_ the size of data painstakingly, then malloc exactly that much and mess about with PUT_32BIT. It's time to bring all of that into one place, and stop repeating myself in error-prone ways everywhere. The new marshal.h defines a system in which I centralise all the actual marshalling functions, and then layer a touch of C macro trickery on top to allow me to (look as if I) pass a wide range of different types to those functions, as long as the target type has been set up in the right way to have a write() function. This commit adds the new header and source file, and sets up some general centralised types (strbuf and the various hash-function contexts like SHA_State), but doesn't use the new calls for anything yet. (I've also renamed some internal functions in import.c which were using the same names that I've just defined macros over. That won't last long - those functions are going to go away soon, so the changed names are strictly temporary.)
2018-05-24 08:17:13 +00:00
BinarySink_IMPLEMENTATION;
};
/* strbuf constructors: strbuf_new_nm and strbuf_new differ in that a
* strbuf constructed using the _nm version will resize itself by
* alloc/copy/smemclr/free instead of realloc. Use that version for
* data sensitive enough that it's worth costing performance to
* avoid copies of it lingering in process memory. */
strbuf *strbuf_new(void);
strbuf *strbuf_new_nm(void);
/* Helpers to allocate a strbuf containing an existing string */
strbuf *strbuf_dup(ptrlen string);
strbuf *strbuf_dup_nm(ptrlen string);
void strbuf_free(strbuf *buf);
void *strbuf_append(strbuf *buf, size_t len);
void strbuf_shrink_to(strbuf *buf, size_t new_len);
void strbuf_shrink_by(strbuf *buf, size_t amount_to_remove);
char *strbuf_to_str(strbuf *buf); /* does free buf, but you must free result */
static inline void strbuf_clear(strbuf *buf) { strbuf_shrink_to(buf, 0); }
bool strbuf_chomp(strbuf *buf, char char_to_remove);
strbuf *strbuf_new_for_agent_query(void);
void strbuf_finalise_agent_query(strbuf *buf);
/* String-to-Unicode converters that auto-allocate the destination and
* work around the rather deficient interface of mb_to_wc. */
Add UTF-8 support to the new Windows ConsoleIO system. This allows you to set a flag in conio_setup() which causes the returned ConsoleIO object to interpret all its output as UTF-8, by translating it to UTF-16 and using WriteConsoleW to write it in Unicode. Similarly, input is read using ReadConsoleW and decoded from UTF-16 to UTF-8. This flag is set to false in most places, to avoid making sudden breaking changes. But when we're about to present a prompts_t to the user, it's set from the new 'utf8' flag in that prompt, which in turn is set by the userauth layer in any case where the prompts are going to the server. The idea is that this should be the start of a fix for the long- standing character-set handling bug that strings transmitted during SSH userauth (usernames, passwords, k-i prompts and responses) are all supposed to be in UTF-8, but we've always encoded them in whatever our input system happens to be using, and not done any tidying up on them. We get occasional complaints about this from users whose passwords contain characters that are encoded differently between UTF-8 and their local encoding, but I've never got round to fixing it because it's a large piece of engineering. Indeed, this isn't nearly the end of it. The next step is to add UTF-8 support to all the _other_ ways of presenting a prompts_t, as best we can. Like the previous change to console handling, it seems very likely that this will break someone's workflow. So there's a fallback command-line option '-legacy-charset-handling' to revert to PuTTY's previous behaviour.
2022-11-25 12:57:43 +00:00
wchar_t *dup_mb_to_wc_c(int codepage, int flags, const char *string,
size_t len, size_t *outlen_p);
wchar_t *dup_mb_to_wc(int codepage, int flags, const char *string);
Add UTF-8 support to the new Windows ConsoleIO system. This allows you to set a flag in conio_setup() which causes the returned ConsoleIO object to interpret all its output as UTF-8, by translating it to UTF-16 and using WriteConsoleW to write it in Unicode. Similarly, input is read using ReadConsoleW and decoded from UTF-16 to UTF-8. This flag is set to false in most places, to avoid making sudden breaking changes. But when we're about to present a prompts_t to the user, it's set from the new 'utf8' flag in that prompt, which in turn is set by the userauth layer in any case where the prompts are going to the server. The idea is that this should be the start of a fix for the long- standing character-set handling bug that strings transmitted during SSH userauth (usernames, passwords, k-i prompts and responses) are all supposed to be in UTF-8, but we've always encoded them in whatever our input system happens to be using, and not done any tidying up on them. We get occasional complaints about this from users whose passwords contain characters that are encoded differently between UTF-8 and their local encoding, but I've never got round to fixing it because it's a large piece of engineering. Indeed, this isn't nearly the end of it. The next step is to add UTF-8 support to all the _other_ ways of presenting a prompts_t, as best we can. Like the previous change to console handling, it seems very likely that this will break someone's workflow. So there's a fallback command-line option '-legacy-charset-handling' to revert to PuTTY's previous behaviour.
2022-11-25 12:57:43 +00:00
char *dup_wc_to_mb_c(int codepage, int flags, const wchar_t *string,
size_t len, const char *defchr, size_t *outlen_p);
char *dup_wc_to_mb(int codepage, int flags, const wchar_t *string,
const char *defchr);
static inline int toint(unsigned u)
{
/*
* Convert an unsigned to an int, without running into the
* undefined behaviour which happens by the strict C standard if
* the value overflows. You'd hope that sensible compilers would
* do the sensible thing in response to a cast, but actually I
* don't trust modern compilers not to do silly things like
* assuming that _obviously_ you wouldn't have caused an overflow
* and so they can elide an 'if (i < 0)' test immediately after
* the cast.
*
* Sensible compilers ought of course to optimise this entire
* function into 'just return the input value', and since it's
* also declared inline, elide it completely in their output.
*/
if (u <= (unsigned)INT_MAX)
return (int)u;
else if (u >= (unsigned)INT_MIN) /* wrap in cast _to_ unsigned is OK */
return INT_MIN + (int)(u - (unsigned)INT_MIN);
else
return INT_MIN; /* fallback; should never occur on binary machines */
}
char *fgetline(FILE *fp);
bool read_file_into(BinarySink *bs, FILE *fp);
char *chomp(char *str);
Convert a lot of 'int' variables to 'bool'. My normal habit these days, in new code, is to treat int and bool as _almost_ completely separate types. I'm still willing to use C's implicit test for zero on an integer (e.g. 'if (!blob.len)' is fine, no need to spell it out as blob.len != 0), but generally, if a variable is going to be conceptually a boolean, I like to declare it bool and assign to it using 'true' or 'false' rather than 0 or 1. PuTTY is an exception, because it predates the C99 bool, and I've stuck to its existing coding style even when adding new code to it. But it's been annoying me more and more, so now that I've decided C99 bool is an acceptable thing to require from our toolchain in the first place, here's a quite thorough trawl through the source doing 'boolification'. Many variables and function parameters are now typed as bool rather than int; many assignments of 0 or 1 to those variables are now spelled 'true' or 'false'. I managed this thorough conversion with the help of a custom clang plugin that I wrote to trawl the AST and apply heuristics to point out where things might want changing. So I've even managed to do a decent job on parts of the code I haven't looked at in years! To make the plugin's work easier, I pushed platform front ends generally in the direction of using standard 'bool' in preference to platform-specific boolean types like Windows BOOL or GTK's gboolean; I've left the platform booleans in places they _have_ to be for the platform APIs to work right, but variables only used by my own code have been converted wherever I found them. In a few places there are int values that look very like booleans in _most_ of the places they're used, but have a rarely-used third value, or a distinction between different nonzero values that most users don't care about. In these cases, I've _removed_ uses of 'true' and 'false' for the return values, to emphasise that there's something more subtle going on than a simple boolean answer: - the 'multisel' field in dialog.h's list box structure, for which the GTK front end in particular recognises a difference between 1 and 2 but nearly everything else treats as boolean - the 'urgent' parameter to plug_receive, where 1 vs 2 tells you something about the specific location of the urgent pointer, but most clients only care about 0 vs 'something nonzero' - the return value of wc_match, where -1 indicates a syntax error in the wildcard. - the return values from SSH-1 RSA-key loading functions, which use -1 for 'wrong passphrase' and 0 for all other failures (so any caller which already knows it's not loading an _encrypted private_ key can treat them as boolean) - term->esc_query, and the 'query' parameter in toggle_mode in terminal.c, which _usually_ hold 0 for ESC[123h or 1 for ESC[?123h, but can also hold -1 for some other intervening character that we don't support. In a few places there's an integer that I haven't turned into a bool even though it really _can_ only take values 0 or 1 (and, as above, tried to make the call sites consistent in not calling those values true and false), on the grounds that I thought it would make it more confusing to imply that the 0 value was in some sense 'negative' or bad and the 1 positive or good: - the return value of plug_accepting uses the POSIXish convention of 0=success and nonzero=error; I think if I made it bool then I'd also want to reverse its sense, and that's a job for a separate piece of work. - the 'screen' parameter to lineptr() in terminal.c, where 0 and 1 represent the default and alternate screens. There's no obvious reason why one of those should be considered 'true' or 'positive' or 'success' - they're just indices - so I've left it as int. ssh_scp_recv had particularly confusing semantics for its previous int return value: its call sites used '<= 0' to check for error, but it never actually returned a negative number, just 0 or 1. Now the function and its call sites agree that it's a bool. In a couple of places I've renamed variables called 'ret', because I don't like that name any more - it's unclear whether it means the return value (in preparation) for the _containing_ function or the return value received from a subroutine call, and occasionally I've accidentally used the same variable for both and introduced a bug. So where one of those got in my way, I've renamed it to 'toret' or 'retd' (the latter short for 'returned') in line with my usual modern practice, but I haven't done a thorough job of finding all of them. Finally, one amusing side effect of doing this is that I've had to separate quite a few chained assignments. It used to be perfectly fine to write 'a = b = c = TRUE' when a,b,c were int and TRUE was just a the 'true' defined by stdbool.h, that idiom provokes a warning from gcc: 'suggest parentheses around assignment used as truth value'!
2018-11-02 19:23:19 +00:00
bool strstartswith(const char *s, const char *t);
bool strendswith(const char *s, const char *t);
void base64_encode_atom(const unsigned char *data, int n, char *out);
int base64_decode_atom(const char *atom, unsigned char *out);
void base64_decode_bs(BinarySink *bs, ptrlen data);
void base64_decode_fp(FILE *fp, ptrlen data);
strbuf *base64_decode_sb(ptrlen data);
void base64_encode_bs(BinarySink *bs, ptrlen data, int cpl);
void base64_encode_fp(FILE *fp, ptrlen data, int cpl);
strbuf *base64_encode_sb(ptrlen data, int cpl);
bool base64_valid(ptrlen data);
void percent_encode_bs(BinarySink *bs, ptrlen data, const char *badchars);
void percent_encode_fp(FILE *fp, ptrlen data, const char *badchars);
strbuf *percent_encode_sb(ptrlen data, const char *badchars);
void percent_decode_bs(BinarySink *bs, ptrlen data);
void percent_decode_fp(FILE *fp, ptrlen data);
strbuf *percent_decode_sb(ptrlen data);
struct bufchain_granule;
struct bufchain_tag {
struct bufchain_granule *head, *tail;
size_t buffersize; /* current amount of buffered data */
Move standalone parts of misc.c into utils.c. misc.c has always contained a combination of things that are tied tightly into the PuTTY code base (e.g. they use the conf system, or work with our sockets abstraction) and things that are pure standalone utility functions like nullstrcmp() which could quite happily be dropped into any C program without causing a link failure. Now the latter kind of standalone utility code lives in the new source file utils.c, whose only external dependency is on memory.c (for snew, sfree etc), which in turn requires the user to provide an out_of_memory() function. So it should now be much easier to link test programs that use PuTTY's low-level functions without also pulling in half its bulky infrastructure. In the process, I came across a memory allocation logging system enabled by -DMALLOC_LOG that looks long since bit-rotted; in any case we have much more advanced tools for that kind of thing these days, like valgrind and Leak Sanitiser, so I've just removed it rather than trying to transplant it somewhere sensible. (We can always pull it back out of the version control history if really necessary, but I haven't used it in at least a decade.) The other slightly silly thing I did was to give bufchain a function pointer field that points to queue_idempotent_callback(), and disallow direct setting of the 'ic' field in favour of calling bufchain_set_callback which will fill that pointer in too. That allows the bufchain system to live in utils.c rather than misc.c, so that programs can use it without also having to link in the callback system or provide an annoying stub of that function. In fact that's just allowed me to remove stubs of that kind from PuTTYgen and Pageant!
2019-01-03 08:44:11 +00:00
void (*queue_idempotent_callback)(IdempotentCallback *ic);
IdempotentCallback *ic;
};
void bufchain_init(bufchain *ch);
void bufchain_clear(bufchain *ch);
size_t bufchain_size(bufchain *ch);
void bufchain_add(bufchain *ch, const void *data, size_t len);
ptrlen bufchain_prefix(bufchain *ch);
void bufchain_consume(bufchain *ch, size_t len);
void bufchain_fetch(bufchain *ch, void *data, size_t len);
void bufchain_fetch_consume(bufchain *ch, void *data, size_t len);
bool bufchain_try_consume(bufchain *ch, size_t len);
bool bufchain_try_fetch(bufchain *ch, void *data, size_t len);
bool bufchain_try_fetch_consume(bufchain *ch, void *data, size_t len);
size_t bufchain_fetch_consume_up_to(bufchain *ch, void *data, size_t len);
Move standalone parts of misc.c into utils.c. misc.c has always contained a combination of things that are tied tightly into the PuTTY code base (e.g. they use the conf system, or work with our sockets abstraction) and things that are pure standalone utility functions like nullstrcmp() which could quite happily be dropped into any C program without causing a link failure. Now the latter kind of standalone utility code lives in the new source file utils.c, whose only external dependency is on memory.c (for snew, sfree etc), which in turn requires the user to provide an out_of_memory() function. So it should now be much easier to link test programs that use PuTTY's low-level functions without also pulling in half its bulky infrastructure. In the process, I came across a memory allocation logging system enabled by -DMALLOC_LOG that looks long since bit-rotted; in any case we have much more advanced tools for that kind of thing these days, like valgrind and Leak Sanitiser, so I've just removed it rather than trying to transplant it somewhere sensible. (We can always pull it back out of the version control history if really necessary, but I haven't used it in at least a decade.) The other slightly silly thing I did was to give bufchain a function pointer field that points to queue_idempotent_callback(), and disallow direct setting of the 'ic' field in favour of calling bufchain_set_callback which will fill that pointer in too. That allows the bufchain system to live in utils.c rather than misc.c, so that programs can use it without also having to link in the callback system or provide an annoying stub of that function. In fact that's just allowed me to remove stubs of that kind from PuTTYgen and Pageant!
2019-01-03 08:44:11 +00:00
void bufchain_set_callback_inner(
bufchain *ch, IdempotentCallback *ic,
void (*queue_idempotent_callback)(IdempotentCallback *ic));
static inline void bufchain_set_callback(bufchain *ch, IdempotentCallback *ic)
{
extern void queue_idempotent_callback(struct IdempotentCallback *ic);
/* Wrapper that puts in the standard queue_idempotent_callback
* function. Lives here rather than in bufchain.c so that
* standalone programs can use the bufchain facility without this
* optional callback feature and not need to provide a stub of
Move standalone parts of misc.c into utils.c. misc.c has always contained a combination of things that are tied tightly into the PuTTY code base (e.g. they use the conf system, or work with our sockets abstraction) and things that are pure standalone utility functions like nullstrcmp() which could quite happily be dropped into any C program without causing a link failure. Now the latter kind of standalone utility code lives in the new source file utils.c, whose only external dependency is on memory.c (for snew, sfree etc), which in turn requires the user to provide an out_of_memory() function. So it should now be much easier to link test programs that use PuTTY's low-level functions without also pulling in half its bulky infrastructure. In the process, I came across a memory allocation logging system enabled by -DMALLOC_LOG that looks long since bit-rotted; in any case we have much more advanced tools for that kind of thing these days, like valgrind and Leak Sanitiser, so I've just removed it rather than trying to transplant it somewhere sensible. (We can always pull it back out of the version control history if really necessary, but I haven't used it in at least a decade.) The other slightly silly thing I did was to give bufchain a function pointer field that points to queue_idempotent_callback(), and disallow direct setting of the 'ic' field in favour of calling bufchain_set_callback which will fill that pointer in too. That allows the bufchain system to live in utils.c rather than misc.c, so that programs can use it without also having to link in the callback system or provide an annoying stub of that function. In fact that's just allowed me to remove stubs of that kind from PuTTYgen and Pageant!
2019-01-03 08:44:11 +00:00
* queue_idempotent_callback. */
bufchain_set_callback_inner(ch, ic, queue_idempotent_callback);
}
Convert a lot of 'int' variables to 'bool'. My normal habit these days, in new code, is to treat int and bool as _almost_ completely separate types. I'm still willing to use C's implicit test for zero on an integer (e.g. 'if (!blob.len)' is fine, no need to spell it out as blob.len != 0), but generally, if a variable is going to be conceptually a boolean, I like to declare it bool and assign to it using 'true' or 'false' rather than 0 or 1. PuTTY is an exception, because it predates the C99 bool, and I've stuck to its existing coding style even when adding new code to it. But it's been annoying me more and more, so now that I've decided C99 bool is an acceptable thing to require from our toolchain in the first place, here's a quite thorough trawl through the source doing 'boolification'. Many variables and function parameters are now typed as bool rather than int; many assignments of 0 or 1 to those variables are now spelled 'true' or 'false'. I managed this thorough conversion with the help of a custom clang plugin that I wrote to trawl the AST and apply heuristics to point out where things might want changing. So I've even managed to do a decent job on parts of the code I haven't looked at in years! To make the plugin's work easier, I pushed platform front ends generally in the direction of using standard 'bool' in preference to platform-specific boolean types like Windows BOOL or GTK's gboolean; I've left the platform booleans in places they _have_ to be for the platform APIs to work right, but variables only used by my own code have been converted wherever I found them. In a few places there are int values that look very like booleans in _most_ of the places they're used, but have a rarely-used third value, or a distinction between different nonzero values that most users don't care about. In these cases, I've _removed_ uses of 'true' and 'false' for the return values, to emphasise that there's something more subtle going on than a simple boolean answer: - the 'multisel' field in dialog.h's list box structure, for which the GTK front end in particular recognises a difference between 1 and 2 but nearly everything else treats as boolean - the 'urgent' parameter to plug_receive, where 1 vs 2 tells you something about the specific location of the urgent pointer, but most clients only care about 0 vs 'something nonzero' - the return value of wc_match, where -1 indicates a syntax error in the wildcard. - the return values from SSH-1 RSA-key loading functions, which use -1 for 'wrong passphrase' and 0 for all other failures (so any caller which already knows it's not loading an _encrypted private_ key can treat them as boolean) - term->esc_query, and the 'query' parameter in toggle_mode in terminal.c, which _usually_ hold 0 for ESC[123h or 1 for ESC[?123h, but can also hold -1 for some other intervening character that we don't support. In a few places there's an integer that I haven't turned into a bool even though it really _can_ only take values 0 or 1 (and, as above, tried to make the call sites consistent in not calling those values true and false), on the grounds that I thought it would make it more confusing to imply that the 0 value was in some sense 'negative' or bad and the 1 positive or good: - the return value of plug_accepting uses the POSIXish convention of 0=success and nonzero=error; I think if I made it bool then I'd also want to reverse its sense, and that's a job for a separate piece of work. - the 'screen' parameter to lineptr() in terminal.c, where 0 and 1 represent the default and alternate screens. There's no obvious reason why one of those should be considered 'true' or 'positive' or 'success' - they're just indices - so I've left it as int. ssh_scp_recv had particularly confusing semantics for its previous int return value: its call sites used '<= 0' to check for error, but it never actually returned a negative number, just 0 or 1. Now the function and its call sites agree that it's a bool. In a couple of places I've renamed variables called 'ret', because I don't like that name any more - it's unclear whether it means the return value (in preparation) for the _containing_ function or the return value received from a subroutine call, and occasionally I've accidentally used the same variable for both and introduced a bug. So where one of those got in my way, I've renamed it to 'toret' or 'retd' (the latter short for 'returned') in line with my usual modern practice, but I haven't done a thorough job of finding all of them. Finally, one amusing side effect of doing this is that I've had to separate quite a few chained assignments. It used to be perfectly fine to write 'a = b = c = TRUE' when a,b,c were int and TRUE was just a the 'true' defined by stdbool.h, that idiom provokes a warning from gcc: 'suggest parentheses around assignment used as truth value'!
2018-11-02 19:23:19 +00:00
bool validate_manual_hostkey(char *key);
struct tm ltime(void);
/*
* Special form of strcmp which can cope with NULL inputs. NULL is
* defined to sort before even the empty string.
*/
int nullstrcmp(const char *a, const char *b);
static inline ptrlen make_ptrlen(const void *ptr, size_t len)
{
ptrlen pl;
pl.ptr = ptr;
pl.len = len;
return pl;
}
static inline const void *ptrlen_end(ptrlen pl)
{
return (const char *)pl.ptr + pl.len;
}
static inline ptrlen make_ptrlen_startend(const void *startv, const void *endv)
{
const char *start = (const char *)startv, *end = (const char *)endv;
assert(end >= start);
ptrlen pl;
pl.ptr = start;
pl.len = end - start;
return pl;
}
static inline ptrlen ptrlen_from_asciz(const char *str)
{
return make_ptrlen(str, strlen(str));
}
static inline ptrlen ptrlen_from_strbuf(strbuf *sb)
{
return make_ptrlen(sb->u, sb->len);
}
Convert a lot of 'int' variables to 'bool'. My normal habit these days, in new code, is to treat int and bool as _almost_ completely separate types. I'm still willing to use C's implicit test for zero on an integer (e.g. 'if (!blob.len)' is fine, no need to spell it out as blob.len != 0), but generally, if a variable is going to be conceptually a boolean, I like to declare it bool and assign to it using 'true' or 'false' rather than 0 or 1. PuTTY is an exception, because it predates the C99 bool, and I've stuck to its existing coding style even when adding new code to it. But it's been annoying me more and more, so now that I've decided C99 bool is an acceptable thing to require from our toolchain in the first place, here's a quite thorough trawl through the source doing 'boolification'. Many variables and function parameters are now typed as bool rather than int; many assignments of 0 or 1 to those variables are now spelled 'true' or 'false'. I managed this thorough conversion with the help of a custom clang plugin that I wrote to trawl the AST and apply heuristics to point out where things might want changing. So I've even managed to do a decent job on parts of the code I haven't looked at in years! To make the plugin's work easier, I pushed platform front ends generally in the direction of using standard 'bool' in preference to platform-specific boolean types like Windows BOOL or GTK's gboolean; I've left the platform booleans in places they _have_ to be for the platform APIs to work right, but variables only used by my own code have been converted wherever I found them. In a few places there are int values that look very like booleans in _most_ of the places they're used, but have a rarely-used third value, or a distinction between different nonzero values that most users don't care about. In these cases, I've _removed_ uses of 'true' and 'false' for the return values, to emphasise that there's something more subtle going on than a simple boolean answer: - the 'multisel' field in dialog.h's list box structure, for which the GTK front end in particular recognises a difference between 1 and 2 but nearly everything else treats as boolean - the 'urgent' parameter to plug_receive, where 1 vs 2 tells you something about the specific location of the urgent pointer, but most clients only care about 0 vs 'something nonzero' - the return value of wc_match, where -1 indicates a syntax error in the wildcard. - the return values from SSH-1 RSA-key loading functions, which use -1 for 'wrong passphrase' and 0 for all other failures (so any caller which already knows it's not loading an _encrypted private_ key can treat them as boolean) - term->esc_query, and the 'query' parameter in toggle_mode in terminal.c, which _usually_ hold 0 for ESC[123h or 1 for ESC[?123h, but can also hold -1 for some other intervening character that we don't support. In a few places there's an integer that I haven't turned into a bool even though it really _can_ only take values 0 or 1 (and, as above, tried to make the call sites consistent in not calling those values true and false), on the grounds that I thought it would make it more confusing to imply that the 0 value was in some sense 'negative' or bad and the 1 positive or good: - the return value of plug_accepting uses the POSIXish convention of 0=success and nonzero=error; I think if I made it bool then I'd also want to reverse its sense, and that's a job for a separate piece of work. - the 'screen' parameter to lineptr() in terminal.c, where 0 and 1 represent the default and alternate screens. There's no obvious reason why one of those should be considered 'true' or 'positive' or 'success' - they're just indices - so I've left it as int. ssh_scp_recv had particularly confusing semantics for its previous int return value: its call sites used '<= 0' to check for error, but it never actually returned a negative number, just 0 or 1. Now the function and its call sites agree that it's a bool. In a couple of places I've renamed variables called 'ret', because I don't like that name any more - it's unclear whether it means the return value (in preparation) for the _containing_ function or the return value received from a subroutine call, and occasionally I've accidentally used the same variable for both and introduced a bug. So where one of those got in my way, I've renamed it to 'toret' or 'retd' (the latter short for 'returned') in line with my usual modern practice, but I haven't done a thorough job of finding all of them. Finally, one amusing side effect of doing this is that I've had to separate quite a few chained assignments. It used to be perfectly fine to write 'a = b = c = TRUE' when a,b,c were int and TRUE was just a the 'true' defined by stdbool.h, that idiom provokes a warning from gcc: 'suggest parentheses around assignment used as truth value'!
2018-11-02 19:23:19 +00:00
bool ptrlen_eq_string(ptrlen pl, const char *str);
bool ptrlen_eq_ptrlen(ptrlen pl1, ptrlen pl2);
int ptrlen_strcmp(ptrlen pl1, ptrlen pl2);
/* ptrlen_startswith and ptrlen_endswith write through their 'tail'
* argument if and only if it is non-NULL and they return true. Hence
* you can write ptrlen_startswith(thing, prefix, &thing), writing
* back to the same ptrlen it read from, to remove a prefix if present
* and say whether it did so. */
Convert a lot of 'int' variables to 'bool'. My normal habit these days, in new code, is to treat int and bool as _almost_ completely separate types. I'm still willing to use C's implicit test for zero on an integer (e.g. 'if (!blob.len)' is fine, no need to spell it out as blob.len != 0), but generally, if a variable is going to be conceptually a boolean, I like to declare it bool and assign to it using 'true' or 'false' rather than 0 or 1. PuTTY is an exception, because it predates the C99 bool, and I've stuck to its existing coding style even when adding new code to it. But it's been annoying me more and more, so now that I've decided C99 bool is an acceptable thing to require from our toolchain in the first place, here's a quite thorough trawl through the source doing 'boolification'. Many variables and function parameters are now typed as bool rather than int; many assignments of 0 or 1 to those variables are now spelled 'true' or 'false'. I managed this thorough conversion with the help of a custom clang plugin that I wrote to trawl the AST and apply heuristics to point out where things might want changing. So I've even managed to do a decent job on parts of the code I haven't looked at in years! To make the plugin's work easier, I pushed platform front ends generally in the direction of using standard 'bool' in preference to platform-specific boolean types like Windows BOOL or GTK's gboolean; I've left the platform booleans in places they _have_ to be for the platform APIs to work right, but variables only used by my own code have been converted wherever I found them. In a few places there are int values that look very like booleans in _most_ of the places they're used, but have a rarely-used third value, or a distinction between different nonzero values that most users don't care about. In these cases, I've _removed_ uses of 'true' and 'false' for the return values, to emphasise that there's something more subtle going on than a simple boolean answer: - the 'multisel' field in dialog.h's list box structure, for which the GTK front end in particular recognises a difference between 1 and 2 but nearly everything else treats as boolean - the 'urgent' parameter to plug_receive, where 1 vs 2 tells you something about the specific location of the urgent pointer, but most clients only care about 0 vs 'something nonzero' - the return value of wc_match, where -1 indicates a syntax error in the wildcard. - the return values from SSH-1 RSA-key loading functions, which use -1 for 'wrong passphrase' and 0 for all other failures (so any caller which already knows it's not loading an _encrypted private_ key can treat them as boolean) - term->esc_query, and the 'query' parameter in toggle_mode in terminal.c, which _usually_ hold 0 for ESC[123h or 1 for ESC[?123h, but can also hold -1 for some other intervening character that we don't support. In a few places there's an integer that I haven't turned into a bool even though it really _can_ only take values 0 or 1 (and, as above, tried to make the call sites consistent in not calling those values true and false), on the grounds that I thought it would make it more confusing to imply that the 0 value was in some sense 'negative' or bad and the 1 positive or good: - the return value of plug_accepting uses the POSIXish convention of 0=success and nonzero=error; I think if I made it bool then I'd also want to reverse its sense, and that's a job for a separate piece of work. - the 'screen' parameter to lineptr() in terminal.c, where 0 and 1 represent the default and alternate screens. There's no obvious reason why one of those should be considered 'true' or 'positive' or 'success' - they're just indices - so I've left it as int. ssh_scp_recv had particularly confusing semantics for its previous int return value: its call sites used '<= 0' to check for error, but it never actually returned a negative number, just 0 or 1. Now the function and its call sites agree that it's a bool. In a couple of places I've renamed variables called 'ret', because I don't like that name any more - it's unclear whether it means the return value (in preparation) for the _containing_ function or the return value received from a subroutine call, and occasionally I've accidentally used the same variable for both and introduced a bug. So where one of those got in my way, I've renamed it to 'toret' or 'retd' (the latter short for 'returned') in line with my usual modern practice, but I haven't done a thorough job of finding all of them. Finally, one amusing side effect of doing this is that I've had to separate quite a few chained assignments. It used to be perfectly fine to write 'a = b = c = TRUE' when a,b,c were int and TRUE was just a the 'true' defined by stdbool.h, that idiom provokes a warning from gcc: 'suggest parentheses around assignment used as truth value'!
2018-11-02 19:23:19 +00:00
bool ptrlen_startswith(ptrlen whole, ptrlen prefix, ptrlen *tail);
bool ptrlen_endswith(ptrlen whole, ptrlen suffix, ptrlen *tail);
ptrlen ptrlen_get_word(ptrlen *input, const char *separators);
bool ptrlen_contains(ptrlen input, const char *characters);
bool ptrlen_contains_only(ptrlen input, const char *characters);
char *mkstr(ptrlen pl);
int string_length_for_printf(size_t);
/* Derive two printf arguments from a ptrlen, suitable for "%.*s" */
#define PTRLEN_PRINTF(pl) \
string_length_for_printf((pl).len), (const char *)(pl).ptr
/* Make a ptrlen out of a compile-time string literal. We try to
* enforce that it _is_ a string literal by token-pasting "" on to it,
* which should provoke a compile error if it's any other kind of
* string. */
#define PTRLEN_LITERAL(stringlit) \
TYPECHECK("" stringlit "", make_ptrlen(stringlit, sizeof(stringlit)-1))
/* Make a ptrlen out of a compile-time string literal in a way that
* allows you to declare the ptrlen itself as a compile-time initialiser. */
#define PTRLEN_DECL_LITERAL(stringlit) \
{ TYPECHECK("" stringlit "", stringlit), sizeof(stringlit)-1 }
/* Make a ptrlen out of a constant byte array. */
#define PTRLEN_FROM_CONST_BYTES(a) make_ptrlen(a, sizeof(a))
void wordwrap(BinarySink *bs, ptrlen input, size_t maxwid);
/* Wipe sensitive data out of memory that's about to be freed. Simpler
* than memset because we don't need the fill char parameter; also
* attempts (by fiddly use of volatile) to inhibit the compiler from
* over-cleverly trying to optimise the memset away because it knows
* the variable is going out of scope. */
void smemclr(void *b, size_t len);
/* Compare two fixed-length chunks of memory for equality, without
* data-dependent control flow (so an attacker with a very accurate
* stopwatch can't try to guess where the first mismatching byte was).
* Returns 0 for mismatch or 1 for equality (unlike memcmp), hinted at
* by the 'eq' in the name. */
unsigned smemeq(const void *av, const void *bv, size_t len);
/* Encode a wide-character string into UTF-8. Tolerates surrogates if
* sizeof(wchar_t) == 2, assuming that in that case the wide string is
* encoded in UTF-16. */
char *encode_wide_string_as_utf8(const wchar_t *wstr);
/* Decode a single UTF-8 character. Returns U+FFFD for any of the
* illegal cases. If the source is empty, returns L'\0' (and sets the
* error indicator on the source, of course). */
#define DECODE_UTF8_FAILURE_LIST(X) \
X(DUTF8_SUCCESS, "success") \
X(DUTF8_SPURIOUS_CONTINUATION, "spurious continuation byte") \
X(DUTF8_ILLEGAL_BYTE, "illegal UTF-8 byte value") \
X(DUTF8_E_OUT_OF_DATA, "unfinished multibyte encoding at end of string") \
X(DUTF8_TRUNCATED_SEQUENCE, "multibyte encoding interrupted by " \
"non-continuation byte") \
X(DUTF8_OVERLONG_ENCODING, "overlong encoding") \
X(DUTF8_ENCODED_SURROGATE, "Unicode surrogate character encoded in " \
"UTF-8") \
X(DUTF8_CODE_POINT_TOO_BIG, "code point outside the Unicode range") \
/* end of list */
typedef enum DecodeUTF8Failure {
#define ENUM_DECL(sym, string) sym,
DECODE_UTF8_FAILURE_LIST(ENUM_DECL)
#undef ENUM_DECL
DUTF8_N_FAILURE_CODES
} DecodeUTF8Failure;
unsigned decode_utf8(BinarySource *src, DecodeUTF8Failure *err);
extern const char *const decode_utf8_error_strings[DUTF8_N_FAILURE_CODES];
/* Decode a single UTF-8 character to an output buffer of the
* platform's wchar_t. May write a pair of surrogates if
* sizeof(wchar_t) == 2, assuming that in that case the wide string is
* encoded in UTF-16. Otherwise, writes one character. Returns the
* number written. */
size_t decode_utf8_to_wchar(BinarySource *src, wchar_t *out,
DecodeUTF8Failure *err);
/* Normalise a UTF-8 string into Normalisation Form C. */
strbuf *utf8_to_nfc(ptrlen input);
/* Determine if a UTF-8 string contains any characters unknown to our
* supported version of Unicode. */
char *utf8_unknown_char(ptrlen input);
/* Write a string out in C string-literal format. */
void write_c_string_literal(FILE *fp, ptrlen str);
char *buildinfo(const char *newline);
/*
* A function you can put at points in the code where execution should
* never reach in the first place. Better than assert(false), or even
* assert(false && "some explanatory message"), because some compilers
* don't interpret assert(false) as a declaration of unreachability,
* so they may still warn about pointless things like some variable
* not being initialised on the unreachable code path.
*
* I follow the assertion with a call to abort() just in case someone
* compiles with -DNDEBUG, and I wrap that abort inside my own
* function labelled NORETURN just in case some unusual kind of system
* header wasn't foresighted enough to label abort() itself that way.
*/
static inline NORETURN void unreachable_internal(void) { abort(); }
#define unreachable(msg) (assert(false && msg), unreachable_internal())
/*
* Debugging functions.
*
* Output goes to debug.log
*
* debug() is like printf().
*
* dmemdump() and dmemdumpl() both do memory dumps. The difference
* is that dmemdumpl() is more suited for when the memory address is
* important (say because you'll be recording pointer values later
* on). dmemdump() is more concise.
*/
#ifdef DEBUG
void debug_printf(const char *fmt, ...) PRINTF_LIKE(1, 2);
Convert a lot of 'int' variables to 'bool'. My normal habit these days, in new code, is to treat int and bool as _almost_ completely separate types. I'm still willing to use C's implicit test for zero on an integer (e.g. 'if (!blob.len)' is fine, no need to spell it out as blob.len != 0), but generally, if a variable is going to be conceptually a boolean, I like to declare it bool and assign to it using 'true' or 'false' rather than 0 or 1. PuTTY is an exception, because it predates the C99 bool, and I've stuck to its existing coding style even when adding new code to it. But it's been annoying me more and more, so now that I've decided C99 bool is an acceptable thing to require from our toolchain in the first place, here's a quite thorough trawl through the source doing 'boolification'. Many variables and function parameters are now typed as bool rather than int; many assignments of 0 or 1 to those variables are now spelled 'true' or 'false'. I managed this thorough conversion with the help of a custom clang plugin that I wrote to trawl the AST and apply heuristics to point out where things might want changing. So I've even managed to do a decent job on parts of the code I haven't looked at in years! To make the plugin's work easier, I pushed platform front ends generally in the direction of using standard 'bool' in preference to platform-specific boolean types like Windows BOOL or GTK's gboolean; I've left the platform booleans in places they _have_ to be for the platform APIs to work right, but variables only used by my own code have been converted wherever I found them. In a few places there are int values that look very like booleans in _most_ of the places they're used, but have a rarely-used third value, or a distinction between different nonzero values that most users don't care about. In these cases, I've _removed_ uses of 'true' and 'false' for the return values, to emphasise that there's something more subtle going on than a simple boolean answer: - the 'multisel' field in dialog.h's list box structure, for which the GTK front end in particular recognises a difference between 1 and 2 but nearly everything else treats as boolean - the 'urgent' parameter to plug_receive, where 1 vs 2 tells you something about the specific location of the urgent pointer, but most clients only care about 0 vs 'something nonzero' - the return value of wc_match, where -1 indicates a syntax error in the wildcard. - the return values from SSH-1 RSA-key loading functions, which use -1 for 'wrong passphrase' and 0 for all other failures (so any caller which already knows it's not loading an _encrypted private_ key can treat them as boolean) - term->esc_query, and the 'query' parameter in toggle_mode in terminal.c, which _usually_ hold 0 for ESC[123h or 1 for ESC[?123h, but can also hold -1 for some other intervening character that we don't support. In a few places there's an integer that I haven't turned into a bool even though it really _can_ only take values 0 or 1 (and, as above, tried to make the call sites consistent in not calling those values true and false), on the grounds that I thought it would make it more confusing to imply that the 0 value was in some sense 'negative' or bad and the 1 positive or good: - the return value of plug_accepting uses the POSIXish convention of 0=success and nonzero=error; I think if I made it bool then I'd also want to reverse its sense, and that's a job for a separate piece of work. - the 'screen' parameter to lineptr() in terminal.c, where 0 and 1 represent the default and alternate screens. There's no obvious reason why one of those should be considered 'true' or 'positive' or 'success' - they're just indices - so I've left it as int. ssh_scp_recv had particularly confusing semantics for its previous int return value: its call sites used '<= 0' to check for error, but it never actually returned a negative number, just 0 or 1. Now the function and its call sites agree that it's a bool. In a couple of places I've renamed variables called 'ret', because I don't like that name any more - it's unclear whether it means the return value (in preparation) for the _containing_ function or the return value received from a subroutine call, and occasionally I've accidentally used the same variable for both and introduced a bug. So where one of those got in my way, I've renamed it to 'toret' or 'retd' (the latter short for 'returned') in line with my usual modern practice, but I haven't done a thorough job of finding all of them. Finally, one amusing side effect of doing this is that I've had to separate quite a few chained assignments. It used to be perfectly fine to write 'a = b = c = TRUE' when a,b,c were int and TRUE was just a the 'true' defined by stdbool.h, that idiom provokes a warning from gcc: 'suggest parentheses around assignment used as truth value'!
2018-11-02 19:23:19 +00:00
void debug_memdump(const void *buf, int len, bool L);
#define debug(...) (debug_printf(__VA_ARGS__))
#define dmemdump(buf,len) (debug_memdump(buf, len, false))
#define dmemdumpl(buf,len) (debug_memdump(buf, len, true))
#else
#define debug(...) ((void)0)
#define dmemdump(buf,len) ((void)0)
#define dmemdumpl(buf,len) ((void)0)
#endif
#ifndef lenof
#define lenof(x) ( (sizeof((x))) / (sizeof(*(x))))
#endif
#ifndef min
#define min(x,y) ( (x) < (y) ? (x) : (y) )
#endif
#ifndef max
#define max(x,y) ( (x) > (y) ? (x) : (y) )
#endif
static inline uint64_t GET_64BIT_LSB_FIRST(const void *vp)
{
const uint8_t *p = (const uint8_t *)vp;
return (((uint64_t)p[0] ) | ((uint64_t)p[1] << 8) |
((uint64_t)p[2] << 16) | ((uint64_t)p[3] << 24) |
((uint64_t)p[4] << 32) | ((uint64_t)p[5] << 40) |
((uint64_t)p[6] << 48) | ((uint64_t)p[7] << 56));
}
static inline void PUT_64BIT_LSB_FIRST(void *vp, uint64_t value)
{
uint8_t *p = (uint8_t *)vp;
p[0] = (uint8_t)(value);
p[1] = (uint8_t)(value >> 8);
p[2] = (uint8_t)(value >> 16);
p[3] = (uint8_t)(value >> 24);
p[4] = (uint8_t)(value >> 32);
p[5] = (uint8_t)(value >> 40);
p[6] = (uint8_t)(value >> 48);
p[7] = (uint8_t)(value >> 56);
}
static inline uint32_t GET_32BIT_LSB_FIRST(const void *vp)
{
const uint8_t *p = (const uint8_t *)vp;
return (((uint32_t)p[0] ) | ((uint32_t)p[1] << 8) |
((uint32_t)p[2] << 16) | ((uint32_t)p[3] << 24));
}
static inline void PUT_32BIT_LSB_FIRST(void *vp, uint32_t value)
{
uint8_t *p = (uint8_t *)vp;
p[0] = (uint8_t)(value);
p[1] = (uint8_t)(value >> 8);
p[2] = (uint8_t)(value >> 16);
p[3] = (uint8_t)(value >> 24);
}
static inline uint16_t GET_16BIT_LSB_FIRST(const void *vp)
{
const uint8_t *p = (const uint8_t *)vp;
return (((uint16_t)p[0] ) | ((uint16_t)p[1] << 8));
}
static inline void PUT_16BIT_LSB_FIRST(void *vp, uint16_t value)
{
uint8_t *p = (uint8_t *)vp;
p[0] = (uint8_t)(value);
p[1] = (uint8_t)(value >> 8);
}
static inline uint64_t GET_64BIT_MSB_FIRST(const void *vp)
{
const uint8_t *p = (const uint8_t *)vp;
return (((uint64_t)p[7] ) | ((uint64_t)p[6] << 8) |
((uint64_t)p[5] << 16) | ((uint64_t)p[4] << 24) |
((uint64_t)p[3] << 32) | ((uint64_t)p[2] << 40) |
((uint64_t)p[1] << 48) | ((uint64_t)p[0] << 56));
}
static inline void PUT_64BIT_MSB_FIRST(void *vp, uint64_t value)
{
uint8_t *p = (uint8_t *)vp;
p[7] = (uint8_t)(value);
p[6] = (uint8_t)(value >> 8);
p[5] = (uint8_t)(value >> 16);
p[4] = (uint8_t)(value >> 24);
p[3] = (uint8_t)(value >> 32);
p[2] = (uint8_t)(value >> 40);
p[1] = (uint8_t)(value >> 48);
p[0] = (uint8_t)(value >> 56);
}
static inline uint32_t GET_32BIT_MSB_FIRST(const void *vp)
{
const uint8_t *p = (const uint8_t *)vp;
return (((uint32_t)p[3] ) | ((uint32_t)p[2] << 8) |
((uint32_t)p[1] << 16) | ((uint32_t)p[0] << 24));
}
static inline void PUT_32BIT_MSB_FIRST(void *vp, uint32_t value)
{
uint8_t *p = (uint8_t *)vp;
p[3] = (uint8_t)(value);
p[2] = (uint8_t)(value >> 8);
p[1] = (uint8_t)(value >> 16);
p[0] = (uint8_t)(value >> 24);
}
static inline uint16_t GET_16BIT_MSB_FIRST(const void *vp)
{
const uint8_t *p = (const uint8_t *)vp;
return (((uint16_t)p[1] ) | ((uint16_t)p[0] << 8));
}
static inline void PUT_16BIT_MSB_FIRST(void *vp, uint16_t value)
{
uint8_t *p = (uint8_t *)vp;
p[1] = (uint8_t)(value);
p[0] = (uint8_t)(value >> 8);
}
Break up x11fwd.c. This is a module that I'd noticed in the past was too monolithic. There's a big pile of stub functions in uxpgnt.c that only have to be there because the implementation of true X11 _forwarding_ (i.e. actually managing a channel within an SSH connection), which Pageant doesn't need, was in the same module as more general X11-related utility functions which Pageant does need. So I've broken up this awkward monolith. Now x11fwd.c contains only the code that really does all go together for dealing with SSH X forwarding: the management of an X forwarding channel (including the vtables to make it behave as Channel at the SSH end and a Plug at the end that connects to the local X server), and the management of authorisation for those channels, including maintaining a tree234 of possible auth values and verifying the one we received. Most of the functions removed from this file have moved into the utils subdir, and also into the utils library (i.e. further down the link order), because they were basically just string and data processing. One exception is x11_setup_display, which parses a display string and returns a struct telling you everything about how to connect to it. That talks to the networking code (it does name lookups and makes a SockAddr), so it has to live in the network library rather than utils, and therefore it's not in the utils subdirectory either. The other exception is x11_get_screen_number, which it turned out nothing called at all! Apparently the job it used to do is now done as part of x11_setup_display. So I've just removed it completely.
2021-04-17 16:01:08 +00:00
/* For use in X11-related applications, an endianness-variable form of
* {GET,PUT}_16BIT which expects 'endian' to be either 'B' or 'l' */
static inline uint16_t GET_16BIT_X11(char endian, const void *p)
{
return endian == 'B' ? GET_16BIT_MSB_FIRST(p) : GET_16BIT_LSB_FIRST(p);
}
static inline void PUT_16BIT_X11(char endian, void *p, uint16_t value)
{
if (endian == 'B')
PUT_16BIT_MSB_FIRST(p, value);
else
PUT_16BIT_LSB_FIRST(p, value);
}
/* Replace NULL with the empty string, permitting an idiom in which we
* get a string (pointer,length) pair that might be NULL,0 and can
* then safely say things like printf("%.*s", length, NULLTOEMPTY(ptr)) */
static inline const char *NULLTOEMPTY(const char *s)
{
return s ? s : "";
}
New utility object, StripCtrlChars. This is for sanitising output that's going to be sent to a terminal, if you don't want it to be able to send arbitrary escape sequences and thereby (for example) move the cursor back up to existing text on the screen and overprint it confusingly. It works using the standard C library: we convert to a wide-character string and back, and then use wctype.h to spot control characters in the intermediate form. This means its idea of the conversion character set is locale-based rather than any of our own charset library's fixed settings - which is what you want if the aim is to protect your local terminal (which we assume the system locale represents accurately). This also means that the sanitiser strips things that will _act_ as control characters when sent to the local terminal, whether or not they were intended as control characters by a server that might have had a different character set in mind. Since the main aim is to protect the local terminal rather than to faithfully replicate the server's intention, I think that's the right criterion. It only strips control characters at the charset-independent layer, like backspace, carriage return and the escape character: wctype.h classifies those as control characters, but classifies as printing all of the more Unicode-specific controls like bidirectional overrides. But that's enough to prevent cursor repositioning, for example. stripctrl.c comes with a test main() of its own, which I wasn't able to fold into testcrypt and put in the test suite because of its dependence on the system locale - it wouldn't be guaranteed to work the same way on different test systems anyway. A knock-on build tweak: because you can feed data into this sanitiser in chunks of arbitrary size, including partial multibyte chars, I had to use mbrtowc() for the decoding, and that means that in the 'old' Win32 builds I have to link against the Visual Studio C++ library as well as the C library, because for some reason that's where mbrtowc lived in VS2003.
2019-02-20 06:56:40 +00:00
/* StripCtrlChars, defined in stripctrl.c: an adapter you can put on
* the front of one BinarySink and which functions as one in turn.
* Interprets its input as a stream of multibyte characters in the
* system locale, and removes any that are not either printable
* characters or newlines. */
struct StripCtrlChars {
BinarySink_IMPLEMENTATION;
/* and this is contained in a larger structure */
};
StripCtrlChars *stripctrl_new(
BinarySink *bs_out, bool permit_cr, wchar_t substitution);
StripCtrlChars *stripctrl_new_term_fn(
BinarySink *bs_out, bool permit_cr, wchar_t substitution,
Terminal *term, unsigned long (*translate)(
Terminal *, term_utf8_decode *, unsigned char));
#define stripctrl_new_term(bs, cr, sub, term) \
stripctrl_new_term_fn(bs, cr, sub, term, term_translate)
void stripctrl_retarget(StripCtrlChars *sccpub, BinarySink *new_bs_out);
void stripctrl_reset(StripCtrlChars *sccpub);
New utility object, StripCtrlChars. This is for sanitising output that's going to be sent to a terminal, if you don't want it to be able to send arbitrary escape sequences and thereby (for example) move the cursor back up to existing text on the screen and overprint it confusingly. It works using the standard C library: we convert to a wide-character string and back, and then use wctype.h to spot control characters in the intermediate form. This means its idea of the conversion character set is locale-based rather than any of our own charset library's fixed settings - which is what you want if the aim is to protect your local terminal (which we assume the system locale represents accurately). This also means that the sanitiser strips things that will _act_ as control characters when sent to the local terminal, whether or not they were intended as control characters by a server that might have had a different character set in mind. Since the main aim is to protect the local terminal rather than to faithfully replicate the server's intention, I think that's the right criterion. It only strips control characters at the charset-independent layer, like backspace, carriage return and the escape character: wctype.h classifies those as control characters, but classifies as printing all of the more Unicode-specific controls like bidirectional overrides. But that's enough to prevent cursor repositioning, for example. stripctrl.c comes with a test main() of its own, which I wasn't able to fold into testcrypt and put in the test suite because of its dependence on the system locale - it wouldn't be guaranteed to work the same way on different test systems anyway. A knock-on build tweak: because you can feed data into this sanitiser in chunks of arbitrary size, including partial multibyte chars, I had to use mbrtowc() for the decoding, and that means that in the 'old' Win32 builds I have to link against the Visual Studio C++ library as well as the C library, because for some reason that's where mbrtowc lived in VS2003.
2019-02-20 06:56:40 +00:00
void stripctrl_free(StripCtrlChars *sanpub);
void stripctrl_enable_line_limiting(StripCtrlChars *sccpub);
char *stripctrl_string_ptrlen(StripCtrlChars *sccpub, ptrlen str);
static inline char *stripctrl_string(StripCtrlChars *sccpub, const char *str)
New utility object, StripCtrlChars. This is for sanitising output that's going to be sent to a terminal, if you don't want it to be able to send arbitrary escape sequences and thereby (for example) move the cursor back up to existing text on the screen and overprint it confusingly. It works using the standard C library: we convert to a wide-character string and back, and then use wctype.h to spot control characters in the intermediate form. This means its idea of the conversion character set is locale-based rather than any of our own charset library's fixed settings - which is what you want if the aim is to protect your local terminal (which we assume the system locale represents accurately). This also means that the sanitiser strips things that will _act_ as control characters when sent to the local terminal, whether or not they were intended as control characters by a server that might have had a different character set in mind. Since the main aim is to protect the local terminal rather than to faithfully replicate the server's intention, I think that's the right criterion. It only strips control characters at the charset-independent layer, like backspace, carriage return and the escape character: wctype.h classifies those as control characters, but classifies as printing all of the more Unicode-specific controls like bidirectional overrides. But that's enough to prevent cursor repositioning, for example. stripctrl.c comes with a test main() of its own, which I wasn't able to fold into testcrypt and put in the test suite because of its dependence on the system locale - it wouldn't be guaranteed to work the same way on different test systems anyway. A knock-on build tweak: because you can feed data into this sanitiser in chunks of arbitrary size, including partial multibyte chars, I had to use mbrtowc() for the decoding, and that means that in the 'old' Win32 builds I have to link against the Visual Studio C++ library as well as the C library, because for some reason that's where mbrtowc lived in VS2003.
2019-02-20 06:56:40 +00:00
{
return stripctrl_string_ptrlen(sccpub, ptrlen_from_asciz(str));
New utility object, StripCtrlChars. This is for sanitising output that's going to be sent to a terminal, if you don't want it to be able to send arbitrary escape sequences and thereby (for example) move the cursor back up to existing text on the screen and overprint it confusingly. It works using the standard C library: we convert to a wide-character string and back, and then use wctype.h to spot control characters in the intermediate form. This means its idea of the conversion character set is locale-based rather than any of our own charset library's fixed settings - which is what you want if the aim is to protect your local terminal (which we assume the system locale represents accurately). This also means that the sanitiser strips things that will _act_ as control characters when sent to the local terminal, whether or not they were intended as control characters by a server that might have had a different character set in mind. Since the main aim is to protect the local terminal rather than to faithfully replicate the server's intention, I think that's the right criterion. It only strips control characters at the charset-independent layer, like backspace, carriage return and the escape character: wctype.h classifies those as control characters, but classifies as printing all of the more Unicode-specific controls like bidirectional overrides. But that's enough to prevent cursor repositioning, for example. stripctrl.c comes with a test main() of its own, which I wasn't able to fold into testcrypt and put in the test suite because of its dependence on the system locale - it wouldn't be guaranteed to work the same way on different test systems anyway. A knock-on build tweak: because you can feed data into this sanitiser in chunks of arbitrary size, including partial multibyte chars, I had to use mbrtowc() for the decoding, and that means that in the 'old' Win32 builds I have to link against the Visual Studio C++ library as well as the C library, because for some reason that's where mbrtowc lived in VS2003.
2019-02-20 06:56:40 +00:00
}
/*
* A mechanism for loading a file from disk into a memory buffer where
* it can be picked apart as a BinarySource.
*/
struct LoadedFile {
char *data;
size_t len, max_size;
BinarySource_IMPLEMENTATION;
};
typedef enum {
LF_OK, /* file loaded successfully */
LF_TOO_BIG, /* file didn't fit in buffer */
LF_ERROR, /* error from stdio layer */
} LoadFileStatus;
LoadedFile *lf_new(size_t max_size);
void lf_free(LoadedFile *lf);
LoadFileStatus lf_load_fp(LoadedFile *lf, FILE *fp);
LoadFileStatus lf_load(LoadedFile *lf, const Filename *filename);
static inline ptrlen ptrlen_from_lf(LoadedFile *lf)
{ return make_ptrlen(lf->data, lf->len); }
2021-02-20 16:47:52 +00:00
/* Set the memory block of 'size' bytes at 'out' to the bitwise XOR of
* the two blocks of the same size at 'in1' and 'in2'.
*
* 'out' may point to exactly the same address as one of the inputs,
* but if the input and output blocks overlap in any other way, the
* result of this function is not guaranteed. No memmove-style effort
* is made to handle difficult overlap cases. */
void memxor(uint8_t *out, const uint8_t *in1, const uint8_t *in2, size_t size);
/* Boolean expressions used in OpenSSH certificate configuration */
bool cert_expr_valid(const char *expression,
char **error_msg, ptrlen *error_loc);
bool cert_expr_match_str(const char *expression,
const char *hostname, unsigned port);
/* Build a certificate expression out of hostname wildcards. Required
* to handle legacy configuration from early in development, when
* multiple wildcards were stored separately in config, implicitly
* ORed together. */
CertExprBuilder *cert_expr_builder_new(void);
void cert_expr_builder_free(CertExprBuilder *eb);
void cert_expr_builder_add(CertExprBuilder *eb, const char *wildcard);
char *cert_expr_expression(CertExprBuilder *eb);
#endif