1
0
mirror of https://git.tartarus.org/simon/putty.git synced 2025-01-09 17:38:00 +00:00
putty-source/windows/winutils.c

634 lines
23 KiB
C
Raw Normal View History

/*
* winutils.c: miscellaneous Windows utilities for GUI apps
*/
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include "putty.h"
#include "misc.h"
#ifdef TESTMODE
/* Definitions to allow this module to be compiled standalone for testing
* split_into_argv(). */
#define smalloc malloc
#define srealloc realloc
#define sfree free
#endif
/*
* GetOpenFileName/GetSaveFileName tend to muck around with the process'
* working directory on at least some versions of Windows.
* Here's a wrapper that gives more control over this, and hides a little
* bit of other grottiness.
*/
struct filereq_tag {
TCHAR cwd[MAX_PATH];
};
/*
* `of' is expected to be initialised with most interesting fields, but
* this function does some administrivia. (assume `of' was memset to 0)
* save==1 -> GetSaveFileName; save==0 -> GetOpenFileName
* `state' is optional.
*/
Convert a lot of 'int' variables to 'bool'. My normal habit these days, in new code, is to treat int and bool as _almost_ completely separate types. I'm still willing to use C's implicit test for zero on an integer (e.g. 'if (!blob.len)' is fine, no need to spell it out as blob.len != 0), but generally, if a variable is going to be conceptually a boolean, I like to declare it bool and assign to it using 'true' or 'false' rather than 0 or 1. PuTTY is an exception, because it predates the C99 bool, and I've stuck to its existing coding style even when adding new code to it. But it's been annoying me more and more, so now that I've decided C99 bool is an acceptable thing to require from our toolchain in the first place, here's a quite thorough trawl through the source doing 'boolification'. Many variables and function parameters are now typed as bool rather than int; many assignments of 0 or 1 to those variables are now spelled 'true' or 'false'. I managed this thorough conversion with the help of a custom clang plugin that I wrote to trawl the AST and apply heuristics to point out where things might want changing. So I've even managed to do a decent job on parts of the code I haven't looked at in years! To make the plugin's work easier, I pushed platform front ends generally in the direction of using standard 'bool' in preference to platform-specific boolean types like Windows BOOL or GTK's gboolean; I've left the platform booleans in places they _have_ to be for the platform APIs to work right, but variables only used by my own code have been converted wherever I found them. In a few places there are int values that look very like booleans in _most_ of the places they're used, but have a rarely-used third value, or a distinction between different nonzero values that most users don't care about. In these cases, I've _removed_ uses of 'true' and 'false' for the return values, to emphasise that there's something more subtle going on than a simple boolean answer: - the 'multisel' field in dialog.h's list box structure, for which the GTK front end in particular recognises a difference between 1 and 2 but nearly everything else treats as boolean - the 'urgent' parameter to plug_receive, where 1 vs 2 tells you something about the specific location of the urgent pointer, but most clients only care about 0 vs 'something nonzero' - the return value of wc_match, where -1 indicates a syntax error in the wildcard. - the return values from SSH-1 RSA-key loading functions, which use -1 for 'wrong passphrase' and 0 for all other failures (so any caller which already knows it's not loading an _encrypted private_ key can treat them as boolean) - term->esc_query, and the 'query' parameter in toggle_mode in terminal.c, which _usually_ hold 0 for ESC[123h or 1 for ESC[?123h, but can also hold -1 for some other intervening character that we don't support. In a few places there's an integer that I haven't turned into a bool even though it really _can_ only take values 0 or 1 (and, as above, tried to make the call sites consistent in not calling those values true and false), on the grounds that I thought it would make it more confusing to imply that the 0 value was in some sense 'negative' or bad and the 1 positive or good: - the return value of plug_accepting uses the POSIXish convention of 0=success and nonzero=error; I think if I made it bool then I'd also want to reverse its sense, and that's a job for a separate piece of work. - the 'screen' parameter to lineptr() in terminal.c, where 0 and 1 represent the default and alternate screens. There's no obvious reason why one of those should be considered 'true' or 'positive' or 'success' - they're just indices - so I've left it as int. ssh_scp_recv had particularly confusing semantics for its previous int return value: its call sites used '<= 0' to check for error, but it never actually returned a negative number, just 0 or 1. Now the function and its call sites agree that it's a bool. In a couple of places I've renamed variables called 'ret', because I don't like that name any more - it's unclear whether it means the return value (in preparation) for the _containing_ function or the return value received from a subroutine call, and occasionally I've accidentally used the same variable for both and introduced a bug. So where one of those got in my way, I've renamed it to 'toret' or 'retd' (the latter short for 'returned') in line with my usual modern practice, but I haven't done a thorough job of finding all of them. Finally, one amusing side effect of doing this is that I've had to separate quite a few chained assignments. It used to be perfectly fine to write 'a = b = c = TRUE' when a,b,c were int and TRUE was just a the 'true' defined by stdbool.h, that idiom provokes a warning from gcc: 'suggest parentheses around assignment used as truth value'!
2018-11-02 19:23:19 +00:00
bool request_file(filereq *state, OPENFILENAME *of, bool preserve, bool save)
{
TCHAR cwd[MAX_PATH]; /* process CWD */
Convert a lot of 'int' variables to 'bool'. My normal habit these days, in new code, is to treat int and bool as _almost_ completely separate types. I'm still willing to use C's implicit test for zero on an integer (e.g. 'if (!blob.len)' is fine, no need to spell it out as blob.len != 0), but generally, if a variable is going to be conceptually a boolean, I like to declare it bool and assign to it using 'true' or 'false' rather than 0 or 1. PuTTY is an exception, because it predates the C99 bool, and I've stuck to its existing coding style even when adding new code to it. But it's been annoying me more and more, so now that I've decided C99 bool is an acceptable thing to require from our toolchain in the first place, here's a quite thorough trawl through the source doing 'boolification'. Many variables and function parameters are now typed as bool rather than int; many assignments of 0 or 1 to those variables are now spelled 'true' or 'false'. I managed this thorough conversion with the help of a custom clang plugin that I wrote to trawl the AST and apply heuristics to point out where things might want changing. So I've even managed to do a decent job on parts of the code I haven't looked at in years! To make the plugin's work easier, I pushed platform front ends generally in the direction of using standard 'bool' in preference to platform-specific boolean types like Windows BOOL or GTK's gboolean; I've left the platform booleans in places they _have_ to be for the platform APIs to work right, but variables only used by my own code have been converted wherever I found them. In a few places there are int values that look very like booleans in _most_ of the places they're used, but have a rarely-used third value, or a distinction between different nonzero values that most users don't care about. In these cases, I've _removed_ uses of 'true' and 'false' for the return values, to emphasise that there's something more subtle going on than a simple boolean answer: - the 'multisel' field in dialog.h's list box structure, for which the GTK front end in particular recognises a difference between 1 and 2 but nearly everything else treats as boolean - the 'urgent' parameter to plug_receive, where 1 vs 2 tells you something about the specific location of the urgent pointer, but most clients only care about 0 vs 'something nonzero' - the return value of wc_match, where -1 indicates a syntax error in the wildcard. - the return values from SSH-1 RSA-key loading functions, which use -1 for 'wrong passphrase' and 0 for all other failures (so any caller which already knows it's not loading an _encrypted private_ key can treat them as boolean) - term->esc_query, and the 'query' parameter in toggle_mode in terminal.c, which _usually_ hold 0 for ESC[123h or 1 for ESC[?123h, but can also hold -1 for some other intervening character that we don't support. In a few places there's an integer that I haven't turned into a bool even though it really _can_ only take values 0 or 1 (and, as above, tried to make the call sites consistent in not calling those values true and false), on the grounds that I thought it would make it more confusing to imply that the 0 value was in some sense 'negative' or bad and the 1 positive or good: - the return value of plug_accepting uses the POSIXish convention of 0=success and nonzero=error; I think if I made it bool then I'd also want to reverse its sense, and that's a job for a separate piece of work. - the 'screen' parameter to lineptr() in terminal.c, where 0 and 1 represent the default and alternate screens. There's no obvious reason why one of those should be considered 'true' or 'positive' or 'success' - they're just indices - so I've left it as int. ssh_scp_recv had particularly confusing semantics for its previous int return value: its call sites used '<= 0' to check for error, but it never actually returned a negative number, just 0 or 1. Now the function and its call sites agree that it's a bool. In a couple of places I've renamed variables called 'ret', because I don't like that name any more - it's unclear whether it means the return value (in preparation) for the _containing_ function or the return value received from a subroutine call, and occasionally I've accidentally used the same variable for both and introduced a bug. So where one of those got in my way, I've renamed it to 'toret' or 'retd' (the latter short for 'returned') in line with my usual modern practice, but I haven't done a thorough job of finding all of them. Finally, one amusing side effect of doing this is that I've had to separate quite a few chained assignments. It used to be perfectly fine to write 'a = b = c = TRUE' when a,b,c were int and TRUE was just a the 'true' defined by stdbool.h, that idiom provokes a warning from gcc: 'suggest parentheses around assignment used as truth value'!
2018-11-02 19:23:19 +00:00
bool ret;
/* Get process CWD */
if (preserve) {
DWORD r = GetCurrentDirectory(lenof(cwd), cwd);
if (r == 0 || r >= lenof(cwd))
/* Didn't work, oh well. Stop trying to be clever. */
preserve = false;
}
/* Open the file requester, maybe setting lpstrInitialDir */
{
#ifdef OPENFILENAME_SIZE_VERSION_400
of->lStructSize = OPENFILENAME_SIZE_VERSION_400;
#else
of->lStructSize = sizeof(*of);
#endif
of->lpstrInitialDir = (state && state->cwd[0]) ? state->cwd : NULL;
/* Actually put up the requester. */
ret = save ? GetSaveFileName(of) : GetOpenFileName(of);
}
/* Get CWD left by requester */
if (state) {
DWORD r = GetCurrentDirectory(lenof(state->cwd), state->cwd);
if (r == 0 || r >= lenof(state->cwd))
/* Didn't work, oh well. */
state->cwd[0] = '\0';
}
/* Restore process CWD */
if (preserve)
/* If it fails, there's not much we can do. */
(void) SetCurrentDirectory(cwd);
return ret;
}
filereq *filereq_new(void)
{
filereq *ret = snew(filereq);
ret->cwd[0] = '\0';
return ret;
}
void filereq_free(filereq *state)
{
sfree(state);
}
/*
* Message box with optional context help.
*/
static HWND message_box_owner;
/* Callback function to launch context help. */
static VOID CALLBACK message_box_help_callback(LPHELPINFO lpHelpInfo)
{
const char *context = NULL;
#define CHECK_CTX(name) \
do { \
if (lpHelpInfo->dwContextId == WINHELP_CTXID_ ## name) \
context = WINHELP_CTX_ ## name; \
} while (0)
CHECK_CTX(errors_hostkey_absent);
CHECK_CTX(errors_hostkey_changed);
CHECK_CTX(errors_cantloadkey);
CHECK_CTX(option_cleanup);
CHECK_CTX(pgp_fingerprints);
#undef CHECK_CTX
if (context)
launch_help(message_box_owner, context);
}
int message_box(HWND owner, LPCTSTR text, LPCTSTR caption,
DWORD style, DWORD helpctxid)
{
MSGBOXPARAMS mbox;
/*
* We use MessageBoxIndirect() because it allows us to specify a
* callback function for the Help button.
*/
mbox.cbSize = sizeof(mbox);
/* Assumes the globals `hinst' and `hwnd' have sensible values. */
mbox.hInstance = hinst;
mbox.hwndOwner = message_box_owner = owner;
mbox.lpfnMsgBoxCallback = &message_box_help_callback;
mbox.dwLanguageId = LANG_NEUTRAL;
mbox.lpszText = text;
mbox.lpszCaption = caption;
mbox.dwContextHelpId = helpctxid;
mbox.dwStyle = style;
if (helpctxid != 0 && has_help()) mbox.dwStyle |= MB_HELP;
return MessageBoxIndirect(&mbox);
}
/*
* Display the fingerprints of the PGP Master Keys to the user.
*/
void pgp_fingerprints_msgbox(HWND owner)
{
message_box(
owner,
"These are the fingerprints of the PuTTY PGP Master Keys. They can\n"
"be used to establish a trust path from this executable to another\n"
"one. See the manual for more information.\n"
"(Note: these fingerprints have nothing to do with SSH!)\n"
"\n"
"PuTTY Master Key as of " PGP_MASTER_KEY_YEAR
" (" PGP_MASTER_KEY_DETAILS "):\n"
" " PGP_MASTER_KEY_FP "\n\n"
"Previous Master Key (" PGP_PREV_MASTER_KEY_YEAR
", " PGP_PREV_MASTER_KEY_DETAILS "):\n"
" " PGP_PREV_MASTER_KEY_FP,
"PGP fingerprints", MB_ICONINFORMATION | MB_OK,
HELPCTXID(pgp_fingerprints));
}
/*
* Handy wrapper around GetDlgItemText which doesn't make you invent
* an arbitrary length limit on the output string. Returned string is
* dynamically allocated; caller must free.
*/
char *GetDlgItemText_alloc(HWND hwnd, int id)
{
char *ret = NULL;
size_t size = 0;
do {
sgrowarray_nm(ret, size, size);
GetDlgItemText(hwnd, id, ret, size);
} while (!memchr(ret, '\0', size-1));
return ret;
}
/*
* Split a complete command line into argc/argv, attempting to do it
* exactly the same way the Visual Studio C library would do it (so
* that our console utilities, which receive argc and argv already
* broken apart by the C library, will have their command lines
* processed in the same way as the GUI utilities which get a whole
* command line and must call this function).
*
* Does not modify the input command line.
*
* The final parameter (argstart) is used to return a second array
* of char * pointers, the same length as argv, each one pointing
* at the start of the corresponding element of argv in the
* original command line. So if you get half way through processing
* your command line in argc/argv form and then decide you want to
* treat the rest as a raw string, you can. If you don't want to,
* `argstart' can be safely left NULL.
*/
void split_into_argv(char *cmdline, int *argc, char ***argv,
char ***argstart)
{
char *p;
char *outputline, *q;
char **outputargv, **outputargstart;
int outputargc;
/*
* These argument-breaking rules apply to Visual Studio 7, which
* is currently the compiler expected to be used for PuTTY. Visual
* Studio 10 has different rules, lacking the curious mod 3
* behaviour of consecutive quotes described below; I presume they
* fixed a bug. As and when we migrate to a newer compiler, we'll
* have to adjust this to match; however, for the moment we
* faithfully imitate in our GUI utilities what our CLI utilities
* can't be prevented from doing.
*
* When I investigated this, at first glance the rules appeared to
* be:
*
* - Single quotes are not special characters.
*
* - Double quotes are removed, but within them spaces cease
* to be special.
*
* - Backslashes are _only_ special when a sequence of them
* appear just before a double quote. In this situation,
* they are treated like C backslashes: so \" just gives a
* literal quote, \\" gives a literal backslash and then
* opens or closes a double-quoted segment, \\\" gives a
* literal backslash and then a literal quote, \\\\" gives
* two literal backslashes and then opens/closes a
* double-quoted segment, and so forth. Note that this
* behaviour is identical inside and outside double quotes.
*
* - Two successive double quotes become one literal double
* quote, but only _inside_ a double-quoted segment.
* Outside, they just form an empty double-quoted segment
* (which may cause an empty argument word).
*
* - That only leaves the interesting question of what happens
* when one or more backslashes precedes two or more double
* quotes, starting inside a double-quoted string. And the
* answer to that appears somewhat bizarre. Here I tabulate
* number of backslashes (across the top) against number of
* quotes (down the left), and indicate how many backslashes
* are output, how many quotes are output, and whether a
* quoted segment is open at the end of the sequence:
*
* backslashes
*
* 0 1 2 3 4
*
* 0 0,0,y | 1,0,y 2,0,y 3,0,y 4,0,y
* --------+-----------------------------
* 1 0,0,n | 0,1,y 1,0,n 1,1,y 2,0,n
* q 2 0,1,n | 0,1,n 1,1,n 1,1,n 2,1,n
* u 3 0,1,y | 0,2,n 1,1,y 1,2,n 2,1,y
* o 4 0,1,n | 0,2,y 1,1,n 1,2,y 2,1,n
* t 5 0,2,n | 0,2,n 1,2,n 1,2,n 2,2,n
* e 6 0,2,y | 0,3,n 1,2,y 1,3,n 2,2,y
* s 7 0,2,n | 0,3,y 1,2,n 1,3,y 2,2,n
* 8 0,3,n | 0,3,n 1,3,n 1,3,n 2,3,n
* 9 0,3,y | 0,4,n 1,3,y 1,4,n 2,3,y
* 10 0,3,n | 0,4,y 1,3,n 1,4,y 2,3,n
* 11 0,4,n | 0,4,n 1,4,n 1,4,n 2,4,n
*
*
* [Test fragment was of the form "a\\\"""b c" d.]
*
* There is very weird mod-3 behaviour going on here in the
* number of quotes, and it even applies when there aren't any
* backslashes! How ghastly.
*
* With a bit of thought, this extremely odd diagram suddenly
* coalesced itself into a coherent, if still ghastly, model of
* how things work:
*
* - As before, backslashes are only special when one or more
* of them appear contiguously before at least one double
* quote. In this situation the backslashes do exactly what
* you'd expect: each one quotes the next thing in front of
* it, so you end up with n/2 literal backslashes (if n is
* even) or (n-1)/2 literal backslashes and a literal quote
* (if n is odd). In the latter case the double quote
* character right after the backslashes is used up.
*
* - After that, any remaining double quotes are processed. A
* string of contiguous unescaped double quotes has a mod-3
* behaviour:
*
* * inside a quoted segment, a quote ends the segment.
* * _immediately_ after ending a quoted segment, a quote
* simply produces a literal quote.
* * otherwise, outside a quoted segment, a quote begins a
* quoted segment.
*
* So, for example, if we started inside a quoted segment
* then two contiguous quotes would close the segment and
* produce a literal quote; three would close the segment,
* produce a literal quote, and open a new segment. If we
* started outside a quoted segment, then two contiguous
* quotes would open and then close a segment, producing no
* output (but potentially creating a zero-length argument);
* but three quotes would open and close a segment and then
* produce a literal quote.
*/
/*
* First deal with the simplest of all special cases: if there
* aren't any arguments, return 0,NULL,NULL.
*/
while (*cmdline && isspace(*cmdline)) cmdline++;
if (!*cmdline) {
if (argc) *argc = 0;
if (argv) *argv = NULL;
if (argstart) *argstart = NULL;
return;
}
/*
* This will guaranteeably be big enough; we can realloc it
* down later.
*/
outputline = snewn(1+strlen(cmdline), char);
outputargv = snewn(strlen(cmdline)+1 / 2, char *);
outputargstart = snewn(strlen(cmdline)+1 / 2, char *);
p = cmdline; q = outputline; outputargc = 0;
while (*p) {
bool quote;
/* Skip whitespace searching for start of argument. */
while (*p && isspace(*p)) p++;
if (!*p) break;
/* We have an argument; start it. */
outputargv[outputargc] = q;
outputargstart[outputargc] = p;
outputargc++;
quote = false;
/* Copy data into the argument until it's finished. */
while (*p) {
if (!quote && isspace(*p))
break; /* argument is finished */
if (*p == '"' || *p == '\\') {
/*
* We have a sequence of zero or more backslashes
* followed by a sequence of zero or more quotes.
* Count up how many of each, and then deal with
* them as appropriate.
*/
int i, slashes = 0, quotes = 0;
while (*p == '\\') slashes++, p++;
while (*p == '"') quotes++, p++;
if (!quotes) {
/*
* Special case: if there are no quotes,
* slashes are not special at all, so just copy
* n slashes to the output string.
*/
while (slashes--) *q++ = '\\';
} else {
/* Slashes annihilate in pairs. */
while (slashes >= 2) slashes -= 2, *q++ = '\\';
/* One remaining slash takes out the first quote. */
if (slashes) quotes--, *q++ = '"';
if (quotes > 0) {
/* Outside a quote segment, a quote starts one. */
if (!quote) quotes--;
/* Now we produce (n+1)/3 literal quotes... */
for (i = 3; i <= quotes+1; i += 3) *q++ = '"';
/* ... and end in a quote segment iff 3 divides n. */
quote = (quotes % 3 == 0);
}
}
} else {
*q++ = *p++;
}
}
/* At the end of an argument, just append a trailing NUL. */
*q++ = '\0';
}
outputargv = sresize(outputargv, outputargc, char *);
outputargstart = sresize(outputargstart, outputargc, char *);
if (argc) *argc = outputargc;
if (argv) *argv = outputargv; else sfree(outputargv);
if (argstart) *argstart = outputargstart; else sfree(outputargstart);
}
#ifdef TESTMODE
const struct argv_test {
const char *cmdline;
const char *argv[10];
} argv_tests[] = {
/*
* We generate this set of tests by invoking ourself with
* `-generate'.
*/
{"ab c\" d", {"ab", "c d", NULL}},
{"a\"b c\" d", {"ab c", "d", NULL}},
{"a\"\"b c\" d", {"ab", "c d", NULL}},
{"a\"\"\"b c\" d", {"a\"b", "c d", NULL}},
{"a\"\"\"\"b c\" d", {"a\"b c", "d", NULL}},
{"a\"\"\"\"\"b c\" d", {"a\"b", "c d", NULL}},
{"a\"\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
{"a\"\"\"\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}},
{"a\"\"\"\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
{"a\\b c\" d", {"a\\b", "c d", NULL}},
{"a\\\"b c\" d", {"a\"b", "c d", NULL}},
{"a\\\"\"b c\" d", {"a\"b c", "d", NULL}},
{"a\\\"\"\"b c\" d", {"a\"b", "c d", NULL}},
{"a\\\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
{"a\\\"\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}},
{"a\\\"\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
{"a\\\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}},
{"a\\\"\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b c", "d", NULL}},
{"a\\\\b c\" d", {"a\\\\b", "c d", NULL}},
{"a\\\\\"b c\" d", {"a\\b c", "d", NULL}},
{"a\\\\\"\"b c\" d", {"a\\b", "c d", NULL}},
{"a\\\\\"\"\"b c\" d", {"a\\\"b", "c d", NULL}},
{"a\\\\\"\"\"\"b c\" d", {"a\\\"b c", "d", NULL}},
{"a\\\\\"\"\"\"\"b c\" d", {"a\\\"b", "c d", NULL}},
{"a\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
{"a\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}},
{"a\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
{"a\\\\\\b c\" d", {"a\\\\\\b", "c d", NULL}},
{"a\\\\\\\"b c\" d", {"a\\\"b", "c d", NULL}},
{"a\\\\\\\"\"b c\" d", {"a\\\"b c", "d", NULL}},
{"a\\\\\\\"\"\"b c\" d", {"a\\\"b", "c d", NULL}},
{"a\\\\\\\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
{"a\\\\\\\"\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}},
{"a\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
{"a\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}},
{"a\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b c", "d", NULL}},
{"a\\\\\\\\b c\" d", {"a\\\\\\\\b", "c d", NULL}},
{"a\\\\\\\\\"b c\" d", {"a\\\\b c", "d", NULL}},
{"a\\\\\\\\\"\"b c\" d", {"a\\\\b", "c d", NULL}},
{"a\\\\\\\\\"\"\"b c\" d", {"a\\\\\"b", "c d", NULL}},
{"a\\\\\\\\\"\"\"\"b c\" d", {"a\\\\\"b c", "d", NULL}},
{"a\\\\\\\\\"\"\"\"\"b c\" d", {"a\\\\\"b", "c d", NULL}},
{"a\\\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b", "c d", NULL}},
{"a\\\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b c", "d", NULL}},
{"a\\\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b", "c d", NULL}},
{"\"ab c\" d", {"ab c", "d", NULL}},
{"\"a\"b c\" d", {"ab", "c d", NULL}},
{"\"a\"\"b c\" d", {"a\"b", "c d", NULL}},
{"\"a\"\"\"b c\" d", {"a\"b c", "d", NULL}},
{"\"a\"\"\"\"b c\" d", {"a\"b", "c d", NULL}},
{"\"a\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
{"\"a\"\"\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}},
{"\"a\"\"\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
{"\"a\"\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}},
{"\"a\\b c\" d", {"a\\b c", "d", NULL}},
{"\"a\\\"b c\" d", {"a\"b c", "d", NULL}},
{"\"a\\\"\"b c\" d", {"a\"b", "c d", NULL}},
{"\"a\\\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
{"\"a\\\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}},
{"\"a\\\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
{"\"a\\\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}},
{"\"a\\\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b c", "d", NULL}},
{"\"a\\\"\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}},
{"\"a\\\\b c\" d", {"a\\\\b c", "d", NULL}},
{"\"a\\\\\"b c\" d", {"a\\b", "c d", NULL}},
{"\"a\\\\\"\"b c\" d", {"a\\\"b", "c d", NULL}},
{"\"a\\\\\"\"\"b c\" d", {"a\\\"b c", "d", NULL}},
{"\"a\\\\\"\"\"\"b c\" d", {"a\\\"b", "c d", NULL}},
{"\"a\\\\\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
{"\"a\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}},
{"\"a\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
{"\"a\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}},
{"\"a\\\\\\b c\" d", {"a\\\\\\b c", "d", NULL}},
{"\"a\\\\\\\"b c\" d", {"a\\\"b c", "d", NULL}},
{"\"a\\\\\\\"\"b c\" d", {"a\\\"b", "c d", NULL}},
{"\"a\\\\\\\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
{"\"a\\\\\\\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}},
{"\"a\\\\\\\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
{"\"a\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}},
{"\"a\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b c", "d", NULL}},
{"\"a\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}},
{"\"a\\\\\\\\b c\" d", {"a\\\\\\\\b c", "d", NULL}},
{"\"a\\\\\\\\\"b c\" d", {"a\\\\b", "c d", NULL}},
{"\"a\\\\\\\\\"\"b c\" d", {"a\\\\\"b", "c d", NULL}},
{"\"a\\\\\\\\\"\"\"b c\" d", {"a\\\\\"b c", "d", NULL}},
{"\"a\\\\\\\\\"\"\"\"b c\" d", {"a\\\\\"b", "c d", NULL}},
{"\"a\\\\\\\\\"\"\"\"\"b c\" d", {"a\\\\\"\"b", "c d", NULL}},
{"\"a\\\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b c", "d", NULL}},
{"\"a\\\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b", "c d", NULL}},
{"\"a\\\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"\"b", "c d", NULL}},
};
int main(int argc, char **argv)
{
int i, j;
if (argc > 1) {
/*
* Generation of tests.
*
* Given `-splat <args>', we print out a C-style
* representation of each argument (in the form "a", "b",
* NULL), backslash-escaping each backslash and double
* quote.
*
* Given `-split <string>', we first doctor `string' by
* turning forward slashes into backslashes, single quotes
* into double quotes and underscores into spaces; and then
* we feed the resulting string to ourself with `-splat'.
*
* Given `-generate', we concoct a variety of fun test
* cases, encode them in quote-safe form (mapping \, " and
* space to /, ' and _ respectively) and feed each one to
* `-split'.
*/
if (!strcmp(argv[1], "-splat")) {
int i;
char *p;
for (i = 2; i < argc; i++) {
putchar('"');
for (p = argv[i]; *p; p++) {
if (*p == '\\' || *p == '"')
putchar('\\');
putchar(*p);
}
printf("\", ");
}
printf("NULL");
return 0;
}
if (!strcmp(argv[1], "-split") && argc > 2) {
char *str = malloc(20 + strlen(argv[0]) + strlen(argv[2]));
char *p, *q;
q = str + sprintf(str, "%s -splat ", argv[0]);
printf(" {\"");
for (p = argv[2]; *p; p++, q++) {
switch (*p) {
case '/': printf("\\\\"); *q = '\\'; break;
case '\'': printf("\\\""); *q = '"'; break;
case '_': printf(" "); *q = ' '; break;
default: putchar(*p); *q = *p; break;
}
}
*p = '\0';
printf("\", {");
fflush(stdout);
system(str);
printf("}},\n");
return 0;
}
if (!strcmp(argv[1], "-generate")) {
char *teststr, *p;
int i, initialquote, backslashes, quotes;
teststr = malloc(200 + strlen(argv[0]));
for (initialquote = 0; initialquote <= 1; initialquote++) {
for (backslashes = 0; backslashes < 5; backslashes++) {
for (quotes = 0; quotes < 9; quotes++) {
p = teststr + sprintf(teststr, "%s -split ", argv[0]);
if (initialquote) *p++ = '\'';
*p++ = 'a';
for (i = 0; i < backslashes; i++) *p++ = '/';
for (i = 0; i < quotes; i++) *p++ = '\'';
*p++ = 'b';
*p++ = '_';
*p++ = 'c';
*p++ = '\'';
*p++ = '_';
*p++ = 'd';
*p = '\0';
system(teststr);
}
}
}
return 0;
}
fprintf(stderr, "unrecognised option: \"%s\"\n", argv[1]);
return 1;
}
/*
* If we get here, we were invoked with no arguments, so just
* run the tests.
*/
for (i = 0; i < lenof(argv_tests); i++) {
int ac;
char **av;
split_into_argv(argv_tests[i].cmdline, &ac, &av);
for (j = 0; j < ac && argv_tests[i].argv[j]; j++) {
if (strcmp(av[j], argv_tests[i].argv[j])) {
printf("failed test %d (|%s|) arg %d: |%s| should be |%s|\n",
i, argv_tests[i].cmdline,
j, av[j], argv_tests[i].argv[j]);
}
#ifdef VERBOSE
else {
printf("test %d (|%s|) arg %d: |%s| == |%s|\n",
i, argv_tests[i].cmdline,
j, av[j], argv_tests[i].argv[j]);
}
#endif
}
if (j < ac)
printf("failed test %d (|%s|): %d args returned, should be %d\n",
i, argv_tests[i].cmdline, ac, j);
if (argv_tests[i].argv[j])
printf("failed test %d (|%s|): %d args returned, should be more\n",
i, argv_tests[i].cmdline, ac);
}
return 0;
}
#endif