mirror of
https://git.tartarus.org/simon/putty.git
synced 2025-01-10 18:07:59 +00:00
18cac59b43
I just happened to notice that just below my huge comment explaining the two command-line splitting policies, there's a smaller one that refers to it as '(see large comment below)'. It's not below - it's above! That was because the older parts of that comment had previously been inside split_into_argv(), until I moved the explanation further up the file to the top level. Another consequence of that was that the older section of the comment was wrapped to a strangely narrow line width, because it had previously been indented further right. Folded the two comments together, and rewrapped the narrow paragraphs.
708 lines
28 KiB
C
708 lines
28 KiB
C
/*
|
|
* Split a complete command line into argc/argv, attempting to do it
|
|
* exactly the same way the Visual Studio C library would do it (so
|
|
* that our console utilities, which receive argc and argv already
|
|
* broken apart by the C library, will have their command lines
|
|
* processed in the same way as the GUI utilities which get a whole
|
|
* command line and must call this function).
|
|
*
|
|
* Does not modify the input command line.
|
|
*
|
|
* The final parameter (argstart) is used to return a second array
|
|
* of char * pointers, the same length as argv, each one pointing
|
|
* at the start of the corresponding element of argv in the
|
|
* original command line. So if you get half way through processing
|
|
* your command line in argc/argv form and then decide you want to
|
|
* treat the rest as a raw string, you can. If you don't want to,
|
|
* `argstart' can be safely left NULL.
|
|
*/
|
|
|
|
#include "putty.h"
|
|
|
|
/*
|
|
* The precise argument-breaking rules vary with compiler version, or
|
|
* rather, with the crt0-type startup code that comes with each
|
|
* compiler's C library. We do our best to match the compiler version,
|
|
* so that we faithfully imitate in our GUI utilities what the
|
|
* corresponding set of CLI utilities can't be prevented from doing.
|
|
*
|
|
* The basic rules are:
|
|
*
|
|
* - Single quotes are not special characters.
|
|
*
|
|
* - Double quotes are removed, but within them spaces cease to be
|
|
* special.
|
|
*
|
|
* - Backslashes are _only_ special when a sequence of them appear
|
|
* just before a double quote. In this situation, they are treated
|
|
* like C backslashes: so \" just gives a literal quote, \\" gives
|
|
* a literal backslash and then opens or closes a double-quoted
|
|
* segment, \\\" gives a literal backslash and then a literal
|
|
* quote, \\\\" gives two literal backslashes and then opens/closes
|
|
* a double-quoted segment, and so forth. Note that this behaviour
|
|
* is identical inside and outside double quotes.
|
|
*
|
|
* - Two successive double quotes become one literal double quote,
|
|
* but only _inside_ a double-quoted segment. Outside, they just
|
|
* form an empty double-quoted segment (which may cause an empty
|
|
* argument word).
|
|
*
|
|
* That only leaves the interesting question of what happens when one
|
|
* or more backslashes precedes two or more double quotes, starting
|
|
* inside a double-quoted string.
|
|
*
|
|
* Modern Visual Studio (as of 2021)
|
|
* ---------------------------------
|
|
*
|
|
* I investigated this in an ordinary CLI program, using the
|
|
* toolchain's crt0 to split a command line of the form
|
|
*
|
|
* "a\\\"""b c" d
|
|
*
|
|
* Here I tabulate number of backslashes (across the top) against
|
|
* number of quotes (down the left), and indicate how many backslashes
|
|
* are output, how many quotes are output, and whether a quoted
|
|
* segment is open at the end of the sequence:
|
|
*
|
|
* backslashes
|
|
*
|
|
* 0 1 2 3 4
|
|
*
|
|
* 0 0,0,y | 1,0,y 2,0,y 3,0,y 4,0,y
|
|
* --------+-----------------------------
|
|
* 1 0,0,n | 0,1,y 1,0,n 1,1,y 2,0,n
|
|
* q 2 0,1,y | 0,1,n 1,1,y 1,1,n 2,1,y
|
|
* u 3 0,1,n | 0,2,y 1,1,n 1,2,y 2,1,n
|
|
* o 4 0,2,y | 0,2,n 1,2,y 1,2,n 2,2,y
|
|
* t 5 0,2,n | 0,3,y 1,2,n 1,3,y 2,2,n
|
|
* e 6 0,3,y | 0,3,n 1,3,y 1,3,n 2,3,y
|
|
* s 7 0,3,n | 0,4,y 1,3,n 1,4,y 2,3,n
|
|
* 8 0,4,y | 0,4,n 1,4,y 1,4,n 2,4,y
|
|
*
|
|
* The row at the top of this table, with quotes=0, demonstrates what
|
|
* I claimed above, that when a sequence of backslashes are not
|
|
* followed by a double quote, they don't act specially at all. The
|
|
* rest of the table shows that the backslashes escape each other in
|
|
* pairs (so that with 2n or 2n+1 input backslashes you get n output
|
|
* ones); if there's an odd number of input backslashes then the last
|
|
* one escapes the first double quote (so you get a literal quote and
|
|
* enter a quoted string); thereafter, each input quote character
|
|
* either opens or closes a quoted string, and if it closes one, it
|
|
* generates a literal " as a side effect.
|
|
*
|
|
* Older Visual Studio
|
|
* -------------------
|
|
*
|
|
* But here's the corresponding table from the older Visual Studio 7:
|
|
*
|
|
* backslashes
|
|
*
|
|
* 0 1 2 3 4
|
|
*
|
|
* 0 0,0,y | 1,0,y 2,0,y 3,0,y 4,0,y
|
|
* --------+-----------------------------
|
|
* 1 0,0,n | 0,1,y 1,0,n 1,1,y 2,0,n
|
|
* q 2 0,1,n | 0,1,n 1,1,n 1,1,n 2,1,n
|
|
* u 3 0,1,y | 0,2,n 1,1,y 1,2,n 2,1,y
|
|
* o 4 0,1,n | 0,2,y 1,1,n 1,2,y 2,1,n
|
|
* t 5 0,2,n | 0,2,n 1,2,n 1,2,n 2,2,n
|
|
* e 6 0,2,y | 0,3,n 1,2,y 1,3,n 2,2,y
|
|
* s 7 0,2,n | 0,3,y 1,2,n 1,3,y 2,2,n
|
|
* 8 0,3,n | 0,3,n 1,3,n 1,3,n 2,3,n
|
|
* 9 0,3,y | 0,4,n 1,3,y 1,4,n 2,3,y
|
|
* 10 0,3,n | 0,4,y 1,3,n 1,4,y 2,3,n
|
|
* 11 0,4,n | 0,4,n 1,4,n 1,4,n 2,4,n
|
|
*
|
|
* There is very weird mod-3 behaviour going on here in the number of
|
|
* quotes, and it even applies when there aren't any backslashes! How
|
|
* ghastly.
|
|
*
|
|
* With a bit of thought, this extremely odd diagram suddenly
|
|
* coalesced itself into a coherent, if still ghastly, model of how
|
|
* things work:
|
|
*
|
|
* - As before, backslashes are only special when one or more of them
|
|
* appear contiguously before at least one double quote. In this
|
|
* situation the backslashes do exactly what you'd expect: each one
|
|
* quotes the next thing in front of it, so you end up with n/2
|
|
* literal backslashes (if n is even) or (n-1)/2 literal
|
|
* backslashes and a literal quote (if n is odd). In the latter
|
|
* case the double quote character right after the backslashes is
|
|
* used up.
|
|
*
|
|
* - After that, any remaining double quotes are processed. A string
|
|
* of contiguous unescaped double quotes has a mod-3 behaviour:
|
|
*
|
|
* * inside a quoted segment, a quote ends the segment.
|
|
* * _immediately_ after ending a quoted segment, a quote simply
|
|
* produces a literal quote.
|
|
* * otherwise, outside a quoted segment, a quote begins a quoted
|
|
* segment.
|
|
*
|
|
* So, for example, if we started inside a quoted segment then two
|
|
* contiguous quotes would close the segment and produce a literal
|
|
* quote; three would close the segment, produce a literal quote,
|
|
* and open a new segment. If we started outside a quoted segment,
|
|
* then two contiguous quotes would open and then close a segment,
|
|
* producing no output (but potentially creating a zero-length
|
|
* argument); but three quotes would open and close a segment and
|
|
* then produce a literal quote.
|
|
*
|
|
* I don't know exactly when the bug fix happened, but I know that VS7
|
|
* had the odd mod-3 behaviour. So the #if below will ensure that
|
|
* modern (2015 onwards) versions of VS use the new more sensible
|
|
* behaviour, and VS7 uses the old one. Things in between may be
|
|
* wrong; if anyone cares, patches to change the cutoff version in
|
|
* this #if are welcome.
|
|
*/
|
|
#if _MSC_VER < 1400
|
|
#define MOD3 1
|
|
#else
|
|
#define MOD3 0
|
|
#endif
|
|
|
|
void split_into_argv(char *cmdline, int *argc, char ***argv,
|
|
char ***argstart)
|
|
{
|
|
char *p;
|
|
char *outputline, *q;
|
|
char **outputargv, **outputargstart;
|
|
int outputargc;
|
|
|
|
/*
|
|
* First deal with the simplest of all special cases: if there
|
|
* aren't any arguments, return 0,NULL,NULL.
|
|
*/
|
|
while (*cmdline && isspace(*cmdline)) cmdline++;
|
|
if (!*cmdline) {
|
|
if (argc) *argc = 0;
|
|
if (argv) *argv = NULL;
|
|
if (argstart) *argstart = NULL;
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* This will guaranteeably be big enough; we can realloc it
|
|
* down later.
|
|
*/
|
|
outputline = snewn(1+strlen(cmdline), char);
|
|
outputargv = snewn(strlen(cmdline)+1 / 2, char *);
|
|
outputargstart = snewn(strlen(cmdline)+1 / 2, char *);
|
|
|
|
p = cmdline; q = outputline; outputargc = 0;
|
|
|
|
while (*p) {
|
|
bool quote;
|
|
|
|
/* Skip whitespace searching for start of argument. */
|
|
while (*p && isspace(*p)) p++;
|
|
if (!*p) break;
|
|
|
|
/* We have an argument; start it. */
|
|
outputargv[outputargc] = q;
|
|
outputargstart[outputargc] = p;
|
|
outputargc++;
|
|
quote = false;
|
|
|
|
/* Copy data into the argument until it's finished. */
|
|
while (*p) {
|
|
if (!quote && isspace(*p))
|
|
break; /* argument is finished */
|
|
|
|
if (*p == '"' || *p == '\\') {
|
|
/*
|
|
* We have a sequence of zero or more backslashes
|
|
* followed by a sequence of zero or more quotes.
|
|
* Count up how many of each, and then deal with
|
|
* them as appropriate.
|
|
*/
|
|
int i, slashes = 0, quotes = 0;
|
|
while (*p == '\\') slashes++, p++;
|
|
while (*p == '"') quotes++, p++;
|
|
|
|
if (!quotes) {
|
|
/*
|
|
* Special case: if there are no quotes,
|
|
* slashes are not special at all, so just copy
|
|
* n slashes to the output string.
|
|
*/
|
|
while (slashes--) *q++ = '\\';
|
|
} else {
|
|
/* Slashes annihilate in pairs. */
|
|
while (slashes >= 2) slashes -= 2, *q++ = '\\';
|
|
|
|
/* One remaining slash takes out the first quote. */
|
|
if (slashes) quotes--, *q++ = '"';
|
|
|
|
if (quotes > 0) {
|
|
/* Outside a quote segment, a quote starts one. */
|
|
if (!quote) quotes--;
|
|
|
|
#if !MOD3
|
|
/* New behaviour: produce n/2 literal quotes... */
|
|
for (i = 2; i <= quotes; i += 2) *q++ = '"';
|
|
/* ... and end in a quote segment iff 2 divides n. */
|
|
quote = (quotes % 2 == 0);
|
|
#else
|
|
/* Old behaviour: produce (n+1)/3 literal quotes... */
|
|
for (i = 3; i <= quotes+1; i += 3) *q++ = '"';
|
|
/* ... and end in a quote segment iff 3 divides n. */
|
|
quote = (quotes % 3 == 0);
|
|
#endif
|
|
}
|
|
}
|
|
} else {
|
|
*q++ = *p++;
|
|
}
|
|
}
|
|
|
|
/* At the end of an argument, just append a trailing NUL. */
|
|
*q++ = '\0';
|
|
}
|
|
|
|
outputargv = sresize(outputargv, outputargc, char *);
|
|
outputargstart = sresize(outputargstart, outputargc, char *);
|
|
|
|
if (argc) *argc = outputargc;
|
|
if (argv) *argv = outputargv; else sfree(outputargv);
|
|
if (argstart) *argstart = outputargstart; else sfree(outputargstart);
|
|
}
|
|
|
|
#ifdef TEST
|
|
|
|
const struct argv_test {
|
|
const char *cmdline;
|
|
const char *argv[10];
|
|
} argv_tests[] = {
|
|
/*
|
|
* We generate this set of tests by invoking ourself with
|
|
* `-generate'.
|
|
*/
|
|
#if !MOD3
|
|
/* Newer behaviour, with no weird mod-3 glitch. */
|
|
{"ab c\" d", {"ab", "c d", NULL}},
|
|
{"a\"b c\" d", {"ab c", "d", NULL}},
|
|
{"a\"\"b c\" d", {"ab", "c d", NULL}},
|
|
{"a\"\"\"b c\" d", {"a\"b c", "d", NULL}},
|
|
{"a\"\"\"\"b c\" d", {"a\"b", "c d", NULL}},
|
|
{"a\"\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}},
|
|
{"a\"\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
|
|
{"a\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b c", "d", NULL}},
|
|
{"a\"\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}},
|
|
{"a\\b c\" d", {"a\\b", "c d", NULL}},
|
|
{"a\\\"b c\" d", {"a\"b", "c d", NULL}},
|
|
{"a\\\"\"b c\" d", {"a\"b c", "d", NULL}},
|
|
{"a\\\"\"\"b c\" d", {"a\"b", "c d", NULL}},
|
|
{"a\\\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}},
|
|
{"a\\\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
|
|
{"a\\\"\"\"\"\"\"b c\" d", {"a\"\"\"b c", "d", NULL}},
|
|
{"a\\\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}},
|
|
{"a\\\"\"\"\"\"\"\"\"b c\" d", {"a\"\"\"\"b c", "d", NULL}},
|
|
{"a\\\\b c\" d", {"a\\\\b", "c d", NULL}},
|
|
{"a\\\\\"b c\" d", {"a\\b c", "d", NULL}},
|
|
{"a\\\\\"\"b c\" d", {"a\\b", "c d", NULL}},
|
|
{"a\\\\\"\"\"b c\" d", {"a\\\"b c", "d", NULL}},
|
|
{"a\\\\\"\"\"\"b c\" d", {"a\\\"b", "c d", NULL}},
|
|
{"a\\\\\"\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}},
|
|
{"a\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
|
|
{"a\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b c", "d", NULL}},
|
|
{"a\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}},
|
|
{"a\\\\\\b c\" d", {"a\\\\\\b", "c d", NULL}},
|
|
{"a\\\\\\\"b c\" d", {"a\\\"b", "c d", NULL}},
|
|
{"a\\\\\\\"\"b c\" d", {"a\\\"b c", "d", NULL}},
|
|
{"a\\\\\\\"\"\"b c\" d", {"a\\\"b", "c d", NULL}},
|
|
{"a\\\\\\\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}},
|
|
{"a\\\\\\\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
|
|
{"a\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b c", "d", NULL}},
|
|
{"a\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}},
|
|
{"a\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"\"b c", "d", NULL}},
|
|
{"a\\\\\\\\b c\" d", {"a\\\\\\\\b", "c d", NULL}},
|
|
{"a\\\\\\\\\"b c\" d", {"a\\\\b c", "d", NULL}},
|
|
{"a\\\\\\\\\"\"b c\" d", {"a\\\\b", "c d", NULL}},
|
|
{"a\\\\\\\\\"\"\"b c\" d", {"a\\\\\"b c", "d", NULL}},
|
|
{"a\\\\\\\\\"\"\"\"b c\" d", {"a\\\\\"b", "c d", NULL}},
|
|
{"a\\\\\\\\\"\"\"\"\"b c\" d", {"a\\\\\"\"b c", "d", NULL}},
|
|
{"a\\\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b", "c d", NULL}},
|
|
{"a\\\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"\"b c", "d", NULL}},
|
|
{"a\\\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"\"b", "c d", NULL}},
|
|
{"\"ab c\" d", {"ab c", "d", NULL}},
|
|
{"\"a\"b c\" d", {"ab", "c d", NULL}},
|
|
{"\"a\"\"b c\" d", {"a\"b c", "d", NULL}},
|
|
{"\"a\"\"\"b c\" d", {"a\"b", "c d", NULL}},
|
|
{"\"a\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}},
|
|
{"\"a\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
|
|
{"\"a\"\"\"\"\"\"b c\" d", {"a\"\"\"b c", "d", NULL}},
|
|
{"\"a\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}},
|
|
{"\"a\"\"\"\"\"\"\"\"b c\" d", {"a\"\"\"\"b c", "d", NULL}},
|
|
{"\"a\\b c\" d", {"a\\b c", "d", NULL}},
|
|
{"\"a\\\"b c\" d", {"a\"b c", "d", NULL}},
|
|
{"\"a\\\"\"b c\" d", {"a\"b", "c d", NULL}},
|
|
{"\"a\\\"\"\"b c\" d", {"a\"\"b c", "d", NULL}},
|
|
{"\"a\\\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
|
|
{"\"a\\\"\"\"\"\"b c\" d", {"a\"\"\"b c", "d", NULL}},
|
|
{"\"a\\\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}},
|
|
{"\"a\\\"\"\"\"\"\"\"b c\" d", {"a\"\"\"\"b c", "d", NULL}},
|
|
{"\"a\\\"\"\"\"\"\"\"\"b c\" d", {"a\"\"\"\"b", "c d", NULL}},
|
|
{"\"a\\\\b c\" d", {"a\\\\b c", "d", NULL}},
|
|
{"\"a\\\\\"b c\" d", {"a\\b", "c d", NULL}},
|
|
{"\"a\\\\\"\"b c\" d", {"a\\\"b c", "d", NULL}},
|
|
{"\"a\\\\\"\"\"b c\" d", {"a\\\"b", "c d", NULL}},
|
|
{"\"a\\\\\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}},
|
|
{"\"a\\\\\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
|
|
{"\"a\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b c", "d", NULL}},
|
|
{"\"a\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}},
|
|
{"\"a\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"\"b c", "d", NULL}},
|
|
{"\"a\\\\\\b c\" d", {"a\\\\\\b c", "d", NULL}},
|
|
{"\"a\\\\\\\"b c\" d", {"a\\\"b c", "d", NULL}},
|
|
{"\"a\\\\\\\"\"b c\" d", {"a\\\"b", "c d", NULL}},
|
|
{"\"a\\\\\\\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}},
|
|
{"\"a\\\\\\\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
|
|
{"\"a\\\\\\\"\"\"\"\"b c\" d", {"a\\\"\"\"b c", "d", NULL}},
|
|
{"\"a\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}},
|
|
{"\"a\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"\"b c", "d", NULL}},
|
|
{"\"a\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"\"b", "c d", NULL}},
|
|
{"\"a\\\\\\\\b c\" d", {"a\\\\\\\\b c", "d", NULL}},
|
|
{"\"a\\\\\\\\\"b c\" d", {"a\\\\b", "c d", NULL}},
|
|
{"\"a\\\\\\\\\"\"b c\" d", {"a\\\\\"b c", "d", NULL}},
|
|
{"\"a\\\\\\\\\"\"\"b c\" d", {"a\\\\\"b", "c d", NULL}},
|
|
{"\"a\\\\\\\\\"\"\"\"b c\" d", {"a\\\\\"\"b c", "d", NULL}},
|
|
{"\"a\\\\\\\\\"\"\"\"\"b c\" d", {"a\\\\\"\"b", "c d", NULL}},
|
|
{"\"a\\\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\\\"\"\"b c", "d", NULL}},
|
|
{"\"a\\\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"\"b", "c d", NULL}},
|
|
{"\"a\\\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"\"\"b c", "d", NULL}},
|
|
#else /* MOD3 */
|
|
/* VS7 mod-3 behaviour. */
|
|
{"ab c\" d", {"ab", "c d", NULL}},
|
|
{"a\"b c\" d", {"ab c", "d", NULL}},
|
|
{"a\"\"b c\" d", {"ab", "c d", NULL}},
|
|
{"a\"\"\"b c\" d", {"a\"b", "c d", NULL}},
|
|
{"a\"\"\"\"b c\" d", {"a\"b c", "d", NULL}},
|
|
{"a\"\"\"\"\"b c\" d", {"a\"b", "c d", NULL}},
|
|
{"a\"\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
|
|
{"a\"\"\"\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}},
|
|
{"a\"\"\"\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
|
|
{"a\\b c\" d", {"a\\b", "c d", NULL}},
|
|
{"a\\\"b c\" d", {"a\"b", "c d", NULL}},
|
|
{"a\\\"\"b c\" d", {"a\"b c", "d", NULL}},
|
|
{"a\\\"\"\"b c\" d", {"a\"b", "c d", NULL}},
|
|
{"a\\\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
|
|
{"a\\\"\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}},
|
|
{"a\\\"\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
|
|
{"a\\\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}},
|
|
{"a\\\"\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b c", "d", NULL}},
|
|
{"a\\\\b c\" d", {"a\\\\b", "c d", NULL}},
|
|
{"a\\\\\"b c\" d", {"a\\b c", "d", NULL}},
|
|
{"a\\\\\"\"b c\" d", {"a\\b", "c d", NULL}},
|
|
{"a\\\\\"\"\"b c\" d", {"a\\\"b", "c d", NULL}},
|
|
{"a\\\\\"\"\"\"b c\" d", {"a\\\"b c", "d", NULL}},
|
|
{"a\\\\\"\"\"\"\"b c\" d", {"a\\\"b", "c d", NULL}},
|
|
{"a\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
|
|
{"a\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}},
|
|
{"a\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
|
|
{"a\\\\\\b c\" d", {"a\\\\\\b", "c d", NULL}},
|
|
{"a\\\\\\\"b c\" d", {"a\\\"b", "c d", NULL}},
|
|
{"a\\\\\\\"\"b c\" d", {"a\\\"b c", "d", NULL}},
|
|
{"a\\\\\\\"\"\"b c\" d", {"a\\\"b", "c d", NULL}},
|
|
{"a\\\\\\\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
|
|
{"a\\\\\\\"\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}},
|
|
{"a\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
|
|
{"a\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}},
|
|
{"a\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b c", "d", NULL}},
|
|
{"a\\\\\\\\b c\" d", {"a\\\\\\\\b", "c d", NULL}},
|
|
{"a\\\\\\\\\"b c\" d", {"a\\\\b c", "d", NULL}},
|
|
{"a\\\\\\\\\"\"b c\" d", {"a\\\\b", "c d", NULL}},
|
|
{"a\\\\\\\\\"\"\"b c\" d", {"a\\\\\"b", "c d", NULL}},
|
|
{"a\\\\\\\\\"\"\"\"b c\" d", {"a\\\\\"b c", "d", NULL}},
|
|
{"a\\\\\\\\\"\"\"\"\"b c\" d", {"a\\\\\"b", "c d", NULL}},
|
|
{"a\\\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b", "c d", NULL}},
|
|
{"a\\\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b c", "d", NULL}},
|
|
{"a\\\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b", "c d", NULL}},
|
|
{"\"ab c\" d", {"ab c", "d", NULL}},
|
|
{"\"a\"b c\" d", {"ab", "c d", NULL}},
|
|
{"\"a\"\"b c\" d", {"a\"b", "c d", NULL}},
|
|
{"\"a\"\"\"b c\" d", {"a\"b c", "d", NULL}},
|
|
{"\"a\"\"\"\"b c\" d", {"a\"b", "c d", NULL}},
|
|
{"\"a\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
|
|
{"\"a\"\"\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}},
|
|
{"\"a\"\"\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
|
|
{"\"a\"\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}},
|
|
{"\"a\\b c\" d", {"a\\b c", "d", NULL}},
|
|
{"\"a\\\"b c\" d", {"a\"b c", "d", NULL}},
|
|
{"\"a\\\"\"b c\" d", {"a\"b", "c d", NULL}},
|
|
{"\"a\\\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
|
|
{"\"a\\\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}},
|
|
{"\"a\\\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
|
|
{"\"a\\\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}},
|
|
{"\"a\\\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b c", "d", NULL}},
|
|
{"\"a\\\"\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}},
|
|
{"\"a\\\\b c\" d", {"a\\\\b c", "d", NULL}},
|
|
{"\"a\\\\\"b c\" d", {"a\\b", "c d", NULL}},
|
|
{"\"a\\\\\"\"b c\" d", {"a\\\"b", "c d", NULL}},
|
|
{"\"a\\\\\"\"\"b c\" d", {"a\\\"b c", "d", NULL}},
|
|
{"\"a\\\\\"\"\"\"b c\" d", {"a\\\"b", "c d", NULL}},
|
|
{"\"a\\\\\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
|
|
{"\"a\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}},
|
|
{"\"a\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
|
|
{"\"a\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}},
|
|
{"\"a\\\\\\b c\" d", {"a\\\\\\b c", "d", NULL}},
|
|
{"\"a\\\\\\\"b c\" d", {"a\\\"b c", "d", NULL}},
|
|
{"\"a\\\\\\\"\"b c\" d", {"a\\\"b", "c d", NULL}},
|
|
{"\"a\\\\\\\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
|
|
{"\"a\\\\\\\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}},
|
|
{"\"a\\\\\\\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
|
|
{"\"a\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}},
|
|
{"\"a\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b c", "d", NULL}},
|
|
{"\"a\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}},
|
|
{"\"a\\\\\\\\b c\" d", {"a\\\\\\\\b c", "d", NULL}},
|
|
{"\"a\\\\\\\\\"b c\" d", {"a\\\\b", "c d", NULL}},
|
|
{"\"a\\\\\\\\\"\"b c\" d", {"a\\\\\"b", "c d", NULL}},
|
|
{"\"a\\\\\\\\\"\"\"b c\" d", {"a\\\\\"b c", "d", NULL}},
|
|
{"\"a\\\\\\\\\"\"\"\"b c\" d", {"a\\\\\"b", "c d", NULL}},
|
|
{"\"a\\\\\\\\\"\"\"\"\"b c\" d", {"a\\\\\"\"b", "c d", NULL}},
|
|
{"\"a\\\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b c", "d", NULL}},
|
|
{"\"a\\\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b", "c d", NULL}},
|
|
{"\"a\\\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"\"b", "c d", NULL}},
|
|
#endif /* MOD3 */
|
|
};
|
|
|
|
void out_of_memory(void)
|
|
{
|
|
fprintf(stderr, "out of memory!\n");
|
|
exit(2);
|
|
}
|
|
|
|
int main(int argc, char **argv)
|
|
{
|
|
int i, j;
|
|
|
|
if (argc > 1) {
|
|
/*
|
|
* Generation of tests.
|
|
*
|
|
* Given `-splat <args>', we print out a C-style
|
|
* representation of each argument (in the form "a", "b",
|
|
* NULL), backslash-escaping each backslash and double
|
|
* quote.
|
|
*
|
|
* Given `-split <string>', we first doctor `string' by
|
|
* turning forward slashes into backslashes, single quotes
|
|
* into double quotes and underscores into spaces; and then
|
|
* we feed the resulting string to ourself with `-splat'.
|
|
*
|
|
* Given `-generate', we concoct a variety of fun test
|
|
* cases, encode them in quote-safe form (mapping \, " and
|
|
* space to /, ' and _ respectively) and feed each one to
|
|
* `-split'.
|
|
*/
|
|
if (!strcmp(argv[1], "-splat")) {
|
|
int i;
|
|
char *p;
|
|
for (i = 2; i < argc; i++) {
|
|
putchar('"');
|
|
for (p = argv[i]; *p; p++) {
|
|
if (*p == '\\' || *p == '"')
|
|
putchar('\\');
|
|
putchar(*p);
|
|
}
|
|
printf("\", ");
|
|
}
|
|
printf("NULL");
|
|
return 0;
|
|
}
|
|
|
|
if (!strcmp(argv[1], "-split") && argc > 2) {
|
|
strbuf *cmdline = strbuf_new();
|
|
char *p;
|
|
|
|
strbuf_catf(cmdline, "%s -splat ", argv[0]);
|
|
printf(" {\"");
|
|
size_t args_start = cmdline->len;
|
|
for (p = argv[2]; *p; p++) {
|
|
char c = (*p == '/' ? '\\' :
|
|
*p == '\'' ? '"' :
|
|
*p == '_' ? ' ' :
|
|
*p);
|
|
put_byte(cmdline, c);
|
|
}
|
|
write_c_string_literal(stdout, ptrlen_from_asciz(
|
|
cmdline->s + args_start));
|
|
printf("\", {");
|
|
fflush(stdout);
|
|
|
|
system(cmdline->s);
|
|
|
|
printf("}},\n");
|
|
|
|
strbuf_free(cmdline);
|
|
return 0;
|
|
}
|
|
|
|
if (!strcmp(argv[1], "-generate")) {
|
|
char *teststr, *p;
|
|
int i, initialquote, backslashes, quotes;
|
|
|
|
teststr = malloc(200 + strlen(argv[0]));
|
|
|
|
for (initialquote = 0; initialquote <= 1; initialquote++) {
|
|
for (backslashes = 0; backslashes < 5; backslashes++) {
|
|
for (quotes = 0; quotes < 9; quotes++) {
|
|
p = teststr + sprintf(teststr, "%s -split ", argv[0]);
|
|
if (initialquote) *p++ = '\'';
|
|
*p++ = 'a';
|
|
for (i = 0; i < backslashes; i++) *p++ = '/';
|
|
for (i = 0; i < quotes; i++) *p++ = '\'';
|
|
*p++ = 'b';
|
|
*p++ = '_';
|
|
*p++ = 'c';
|
|
*p++ = '\'';
|
|
*p++ = '_';
|
|
*p++ = 'd';
|
|
*p = '\0';
|
|
|
|
system(teststr);
|
|
}
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
if (!strcmp(argv[1], "-tabulate")) {
|
|
char table[] = "\
|
|
* backslashes \n\
|
|
* \n\
|
|
* 0 1 2 3 4 \n\
|
|
* \n\
|
|
* 0 | \n\
|
|
* --------+----------------------------- \n\
|
|
* 1 | \n\
|
|
* q 2 | \n\
|
|
* u 3 | \n\
|
|
* o 4 | \n\
|
|
* t 5 | \n\
|
|
* e 6 | \n\
|
|
* s 7 | \n\
|
|
* 8 | \n\
|
|
";
|
|
char *linestarts[14];
|
|
char *p = table;
|
|
for (i = 0; i < lenof(linestarts); i++) {
|
|
linestarts[i] = p;
|
|
p += strcspn(p, "\n");
|
|
if (*p) p++;
|
|
}
|
|
|
|
for (i = 0; i < lenof(argv_tests); i++) {
|
|
const struct argv_test *test = &argv_tests[i];
|
|
const char *q = test->cmdline;
|
|
|
|
/* Skip tests that aren't telling us something about
|
|
* the behaviour _inside_ a quoted string */
|
|
if (*q != '"')
|
|
continue;
|
|
|
|
q++;
|
|
|
|
assert(*q == 'a');
|
|
q++;
|
|
int backslashes_in = 0, quotes_in = 0;
|
|
while (*q == '\\') {
|
|
q++;
|
|
backslashes_in++;
|
|
}
|
|
while (*q == '"') {
|
|
q++;
|
|
quotes_in++;
|
|
}
|
|
|
|
q = test->argv[0];
|
|
assert(*q == 'a');
|
|
q++;
|
|
int backslashes_out = 0, quotes_out = 0;
|
|
while (*q == '\\') {
|
|
q++;
|
|
backslashes_out++;
|
|
}
|
|
while (*q == '"') {
|
|
q++;
|
|
quotes_out++;
|
|
}
|
|
assert(*q == 'b');
|
|
q++;
|
|
bool in_quoted_string = (*q == ' ');
|
|
|
|
int x = (backslashes_in == 0 ? 15 : 18 + 7 * backslashes_in);
|
|
int y = (quotes_in == 0 ? 4 : 5 + quotes_in);
|
|
char *buf = dupprintf("%d,%d,%c",
|
|
backslashes_out, quotes_out,
|
|
in_quoted_string ? 'y' : 'n');
|
|
memcpy(linestarts[y] + x, buf, strlen(buf));
|
|
sfree(buf);
|
|
}
|
|
|
|
fputs(table, stdout);
|
|
return 0;
|
|
}
|
|
|
|
fprintf(stderr, "unrecognised option: \"%s\"\n", argv[1]);
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* If we get here, we were invoked with no arguments, so just
|
|
* run the tests.
|
|
*/
|
|
int passes = 0, fails = 0;
|
|
|
|
for (i = 0; i < lenof(argv_tests); i++) {
|
|
int ac;
|
|
char **av;
|
|
bool failed = false;
|
|
|
|
split_into_argv((char *)argv_tests[i].cmdline, &ac, &av, NULL);
|
|
|
|
for (j = 0; j < ac && argv_tests[i].argv[j]; j++) {
|
|
if (strcmp(av[j], argv_tests[i].argv[j])) {
|
|
printf("failed test %d (|%s|) arg %d: |%s| should be |%s|\n",
|
|
i, argv_tests[i].cmdline,
|
|
j, av[j], argv_tests[i].argv[j]);
|
|
failed = true;
|
|
}
|
|
#ifdef VERBOSE
|
|
else {
|
|
printf("test %d (|%s|) arg %d: |%s| == |%s|\n",
|
|
i, argv_tests[i].cmdline,
|
|
j, av[j], argv_tests[i].argv[j]);
|
|
}
|
|
#endif
|
|
}
|
|
if (j < ac) {
|
|
printf("failed test %d (|%s|): %d args returned, should be %d\n",
|
|
i, argv_tests[i].cmdline, ac, j);
|
|
failed = true;
|
|
}
|
|
if (argv_tests[i].argv[j]) {
|
|
printf("failed test %d (|%s|): %d args returned, should be more\n",
|
|
i, argv_tests[i].cmdline, ac);
|
|
failed = true;
|
|
}
|
|
|
|
if (failed)
|
|
fails++;
|
|
else
|
|
passes++;
|
|
}
|
|
|
|
printf("passed %d failed %d (%s mode)\n",
|
|
passes, fails,
|
|
#if MOD3
|
|
"mod 3"
|
|
#else
|
|
"mod 2"
|
|
#endif
|
|
);
|
|
|
|
return fails != 0;
|
|
}
|
|
|
|
#endif /* TEST */
|