/* * Split a complete command line into argc/argv, attempting to do it * exactly the same way the Visual Studio C library would do it (so * that our console utilities, which receive argc and argv already * broken apart by the C library, will have their command lines * processed in the same way as the GUI utilities which get a whole * command line and must call this function). * * Does not modify the input command line. * * The final parameter (argstart) is used to return a second array * of char * pointers, the same length as argv, each one pointing * at the start of the corresponding element of argv in the * original command line. So if you get half way through processing * your command line in argc/argv form and then decide you want to * treat the rest as a raw string, you can. If you don't want to, * `argstart' can be safely left NULL. */ #include "putty.h" /* * The precise argument-breaking rules vary with compiler version, or * rather, with the crt0-type startup code that comes with each * compiler's C library. We do our best to match the compiler version, * so that we faithfully imitate in our GUI utilities what the * corresponding set of CLI utilities can't be prevented from doing. * * The basic rules are: * * - Single quotes are not special characters. * * - Double quotes are removed, but within them spaces cease to be * special. * * - Backslashes are _only_ special when a sequence of them appear * just before a double quote. In this situation, they are treated * like C backslashes: so \" just gives a literal quote, \\" gives * a literal backslash and then opens or closes a double-quoted * segment, \\\" gives a literal backslash and then a literal * quote, \\\\" gives two literal backslashes and then opens/closes * a double-quoted segment, and so forth. Note that this behaviour * is identical inside and outside double quotes. * * - Two successive double quotes become one literal double quote, * but only _inside_ a double-quoted segment. Outside, they just * form an empty double-quoted segment (which may cause an empty * argument word). * * That only leaves the interesting question of what happens when one * or more backslashes precedes two or more double quotes, starting * inside a double-quoted string. * * Modern Visual Studio (as of 2021) * --------------------------------- * * I investigated this in an ordinary CLI program, using the * toolchain's crt0 to split a command line of the form * * "a\\\"""b c" d * * Here I tabulate number of backslashes (across the top) against * number of quotes (down the left), and indicate how many backslashes * are output, how many quotes are output, and whether a quoted * segment is open at the end of the sequence: * * backslashes * * 0 1 2 3 4 * * 0 0,0,y | 1,0,y 2,0,y 3,0,y 4,0,y * --------+----------------------------- * 1 0,0,n | 0,1,y 1,0,n 1,1,y 2,0,n * q 2 0,1,y | 0,1,n 1,1,y 1,1,n 2,1,y * u 3 0,1,n | 0,2,y 1,1,n 1,2,y 2,1,n * o 4 0,2,y | 0,2,n 1,2,y 1,2,n 2,2,y * t 5 0,2,n | 0,3,y 1,2,n 1,3,y 2,2,n * e 6 0,3,y | 0,3,n 1,3,y 1,3,n 2,3,y * s 7 0,3,n | 0,4,y 1,3,n 1,4,y 2,3,n * 8 0,4,y | 0,4,n 1,4,y 1,4,n 2,4,y * * The row at the top of this table, with quotes=0, demonstrates what * I claimed above, that when a sequence of backslashes are not * followed by a double quote, they don't act specially at all. The * rest of the table shows that the backslashes escape each other in * pairs (so that with 2n or 2n+1 input backslashes you get n output * ones); if there's an odd number of input backslashes then the last * one escapes the first double quote (so you get a literal quote and * enter a quoted string); thereafter, each input quote character * either opens or closes a quoted string, and if it closes one, it * generates a literal " as a side effect. * * Older Visual Studio * ------------------- * * But here's the corresponding table from the older Visual Studio 7: * * backslashes * * 0 1 2 3 4 * * 0 0,0,y | 1,0,y 2,0,y 3,0,y 4,0,y * --------+----------------------------- * 1 0,0,n | 0,1,y 1,0,n 1,1,y 2,0,n * q 2 0,1,n | 0,1,n 1,1,n 1,1,n 2,1,n * u 3 0,1,y | 0,2,n 1,1,y 1,2,n 2,1,y * o 4 0,1,n | 0,2,y 1,1,n 1,2,y 2,1,n * t 5 0,2,n | 0,2,n 1,2,n 1,2,n 2,2,n * e 6 0,2,y | 0,3,n 1,2,y 1,3,n 2,2,y * s 7 0,2,n | 0,3,y 1,2,n 1,3,y 2,2,n * 8 0,3,n | 0,3,n 1,3,n 1,3,n 2,3,n * 9 0,3,y | 0,4,n 1,3,y 1,4,n 2,3,y * 10 0,3,n | 0,4,y 1,3,n 1,4,y 2,3,n * 11 0,4,n | 0,4,n 1,4,n 1,4,n 2,4,n * * There is very weird mod-3 behaviour going on here in the number of * quotes, and it even applies when there aren't any backslashes! How * ghastly. * * With a bit of thought, this extremely odd diagram suddenly * coalesced itself into a coherent, if still ghastly, model of how * things work: * * - As before, backslashes are only special when one or more of them * appear contiguously before at least one double quote. In this * situation the backslashes do exactly what you'd expect: each one * quotes the next thing in front of it, so you end up with n/2 * literal backslashes (if n is even) or (n-1)/2 literal * backslashes and a literal quote (if n is odd). In the latter * case the double quote character right after the backslashes is * used up. * * - After that, any remaining double quotes are processed. A string * of contiguous unescaped double quotes has a mod-3 behaviour: * * * inside a quoted segment, a quote ends the segment. * * _immediately_ after ending a quoted segment, a quote simply * produces a literal quote. * * otherwise, outside a quoted segment, a quote begins a quoted * segment. * * So, for example, if we started inside a quoted segment then two * contiguous quotes would close the segment and produce a literal * quote; three would close the segment, produce a literal quote, * and open a new segment. If we started outside a quoted segment, * then two contiguous quotes would open and then close a segment, * producing no output (but potentially creating a zero-length * argument); but three quotes would open and close a segment and * then produce a literal quote. * * I don't know exactly when the bug fix happened, but I know that VS7 * had the odd mod-3 behaviour. So the #if below will ensure that * modern (2015 onwards) versions of VS use the new more sensible * behaviour, and VS7 uses the old one. Things in between may be * wrong; if anyone cares, patches to change the cutoff version in * this #if are welcome. */ #if _MSC_VER < 1400 #define MOD3 1 #else #define MOD3 0 #endif static inline bool is_word_sep(char c) { return c == ' ' || c == '\t'; } void split_into_argv(char *cmdline, bool includes_program_name, int *argc, char ***argv, char ***argstart) { char *p; char *outputline, *q; char **outputargv, **outputargstart; int outputargc; /* * First deal with the simplest of all special cases: if there * aren't any arguments, return 0,NULL,NULL. */ while (*cmdline && is_word_sep(*cmdline)) cmdline++; if (!*cmdline) { if (argc) *argc = 0; if (argv) *argv = NULL; if (argstart) *argstart = NULL; return; } /* * This will guaranteeably be big enough; we can realloc it * down later. */ outputline = snewn(1+strlen(cmdline), char); outputargv = snewn(strlen(cmdline)+1 / 2, char *); outputargstart = snewn(strlen(cmdline)+1 / 2, char *); p = cmdline; q = outputline; outputargc = 0; while (*p) { bool quote; /* Skip whitespace searching for start of argument. */ while (*p && is_word_sep(*p)) p++; if (!*p) break; /* * Check if this argument is the program name. If so, * different rules apply. * * In most arguments, the special characters are the double * quote and the backslash. An exception is the program name * at the start of the command line, in which backslashes are * _not_ special - if one appears before a quote, it does not * make the quote literal. * * The C library must implement this special rule, and we must * follow suit here, in order to match the way CreateProcess * scans the command line to determine the program name. It * will consider that all these commands refer to the same * file equally validly: * * "C:\Program Files\Foo"\bar.exe * "C:\Program Files\Foo\"bar.exe * "C:\Program Files\Foo\bar.exe" * * Each one contains a quoted section that protects the space * in "Program Files", and the closing quote takes effect the * same in all cases - even though, in the middle case, it's * immediately preceded by one of the path separators in the * name. For CreateProcess, backslashes aren't special. * * So, if our caller told us that the input command line * includes the program name (which it does if it came from * GetCommandLine, but not if it was passed in to WinMain), * then we must treat the 0th output argument specially, by * not considering backslashes to affect the quoting. */ bool backslash_special = !(outputargc == 0 && includes_program_name); /* We have an argument; start it. */ outputargv[outputargc] = q; outputargstart[outputargc] = p; outputargc++; quote = false; /* Copy data into the argument until it's finished. */ while (*p) { if (!quote && is_word_sep(*p)) break; /* argument is finished */ if (*p == '"' || (*p == '\\' && backslash_special)) { /* * We have a sequence of zero or more backslashes * followed by a sequence of zero or more quotes. * Count up how many of each, and then deal with * them as appropriate. */ int i, slashes = 0, quotes = 0; while (*p == '\\') slashes++, p++; while (*p == '"') quotes++, p++; if (!quotes) { /* * Special case: if there are no quotes, * slashes are not special at all, so just copy * n slashes to the output string. */ while (slashes--) *q++ = '\\'; } else { /* Slashes annihilate in pairs. */ while (slashes >= 2) slashes -= 2, *q++ = '\\'; /* One remaining slash takes out the first quote. */ if (slashes) quotes--, *q++ = '"'; if (quotes > 0) { /* Outside a quote segment, a quote starts one. */ if (!quote) quotes--; #if !MOD3 /* New behaviour: produce n/2 literal quotes... */ for (i = 2; i <= quotes; i += 2) *q++ = '"'; /* ... and end in a quote segment iff 2 divides n. */ quote = (quotes % 2 == 0); #else /* Old behaviour: produce (n+1)/3 literal quotes... */ for (i = 3; i <= quotes+1; i += 3) *q++ = '"'; /* ... and end in a quote segment iff 3 divides n. */ quote = (quotes % 3 == 0); #endif } } } else { *q++ = *p++; } } /* At the end of an argument, just append a trailing NUL. */ *q++ = '\0'; } outputargv = sresize(outputargv, outputargc, char *); outputargstart = sresize(outputargstart, outputargc, char *); if (argc) *argc = outputargc; if (argv) *argv = outputargv; else sfree(outputargv); if (argstart) *argstart = outputargstart; else sfree(outputargstart); } #ifdef TEST const struct argv_test { const char *cmdline; const char *argv[10]; bool include_program_name; } argv_tests[] = { /* * We generate this set of tests by invoking ourself with * `-generate'. */ #if !MOD3 /* Newer behaviour, with no weird mod-3 glitch. */ {"ab c\" d", {"ab", "c d", NULL}}, {"a\"b c\" d", {"ab c", "d", NULL}}, {"a\"\"b c\" d", {"ab", "c d", NULL}}, {"a\"\"\"b c\" d", {"a\"b c", "d", NULL}}, {"a\"\"\"\"b c\" d", {"a\"b", "c d", NULL}}, {"a\"\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}}, {"a\"\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}}, {"a\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b c", "d", NULL}}, {"a\"\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}}, {"a\\b c\" d", {"a\\b", "c d", NULL}}, {"a\\\"b c\" d", {"a\"b", "c d", NULL}}, {"a\\\"\"b c\" d", {"a\"b c", "d", NULL}}, {"a\\\"\"\"b c\" d", {"a\"b", "c d", NULL}}, {"a\\\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}}, {"a\\\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}}, {"a\\\"\"\"\"\"\"b c\" d", {"a\"\"\"b c", "d", NULL}}, {"a\\\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}}, {"a\\\"\"\"\"\"\"\"\"b c\" d", {"a\"\"\"\"b c", "d", NULL}}, {"a\\\\b c\" d", {"a\\\\b", "c d", NULL}}, {"a\\\\\"b c\" d", {"a\\b c", "d", NULL}}, {"a\\\\\"\"b c\" d", {"a\\b", "c d", NULL}}, {"a\\\\\"\"\"b c\" d", {"a\\\"b c", "d", NULL}}, {"a\\\\\"\"\"\"b c\" d", {"a\\\"b", "c d", NULL}}, {"a\\\\\"\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}}, {"a\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}}, {"a\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b c", "d", NULL}}, {"a\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}}, {"a\\\\\\b c\" d", {"a\\\\\\b", "c d", NULL}}, {"a\\\\\\\"b c\" d", {"a\\\"b", "c d", NULL}}, {"a\\\\\\\"\"b c\" d", {"a\\\"b c", "d", NULL}}, {"a\\\\\\\"\"\"b c\" d", {"a\\\"b", "c d", NULL}}, {"a\\\\\\\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}}, {"a\\\\\\\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}}, {"a\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b c", "d", NULL}}, {"a\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}}, {"a\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"\"b c", "d", NULL}}, {"a\\\\\\\\b c\" d", {"a\\\\\\\\b", "c d", NULL}}, {"a\\\\\\\\\"b c\" d", {"a\\\\b c", "d", NULL}}, {"a\\\\\\\\\"\"b c\" d", {"a\\\\b", "c d", NULL}}, {"a\\\\\\\\\"\"\"b c\" d", {"a\\\\\"b c", "d", NULL}}, {"a\\\\\\\\\"\"\"\"b c\" d", {"a\\\\\"b", "c d", NULL}}, {"a\\\\\\\\\"\"\"\"\"b c\" d", {"a\\\\\"\"b c", "d", NULL}}, {"a\\\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b", "c d", NULL}}, {"a\\\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"\"b c", "d", NULL}}, {"a\\\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"\"b", "c d", NULL}}, {"\"ab c\" d", {"ab c", "d", NULL}}, {"\"a\"b c\" d", {"ab", "c d", NULL}}, {"\"a\"\"b c\" d", {"a\"b c", "d", NULL}}, {"\"a\"\"\"b c\" d", {"a\"b", "c d", NULL}}, {"\"a\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}}, {"\"a\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}}, {"\"a\"\"\"\"\"\"b c\" d", {"a\"\"\"b c", "d", NULL}}, {"\"a\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}}, {"\"a\"\"\"\"\"\"\"\"b c\" d", {"a\"\"\"\"b c", "d", NULL}}, {"\"a\\b c\" d", {"a\\b c", "d", NULL}}, {"\"a\\\"b c\" d", {"a\"b c", "d", NULL}}, {"\"a\\\"\"b c\" d", {"a\"b", "c d", NULL}}, {"\"a\\\"\"\"b c\" d", {"a\"\"b c", "d", NULL}}, {"\"a\\\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}}, {"\"a\\\"\"\"\"\"b c\" d", {"a\"\"\"b c", "d", NULL}}, {"\"a\\\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}}, {"\"a\\\"\"\"\"\"\"\"b c\" d", {"a\"\"\"\"b c", "d", NULL}}, {"\"a\\\"\"\"\"\"\"\"\"b c\" d", {"a\"\"\"\"b", "c d", NULL}}, {"\"a\\\\b c\" d", {"a\\\\b c", "d", NULL}}, {"\"a\\\\\"b c\" d", {"a\\b", "c d", NULL}}, {"\"a\\\\\"\"b c\" d", {"a\\\"b c", "d", NULL}}, {"\"a\\\\\"\"\"b c\" d", {"a\\\"b", "c d", NULL}}, {"\"a\\\\\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}}, {"\"a\\\\\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}}, {"\"a\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b c", "d", NULL}}, {"\"a\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}}, {"\"a\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"\"b c", "d", NULL}}, {"\"a\\\\\\b c\" d", {"a\\\\\\b c", "d", NULL}}, {"\"a\\\\\\\"b c\" d", {"a\\\"b c", "d", NULL}}, {"\"a\\\\\\\"\"b c\" d", {"a\\\"b", "c d", NULL}}, {"\"a\\\\\\\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}}, {"\"a\\\\\\\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}}, {"\"a\\\\\\\"\"\"\"\"b c\" d", {"a\\\"\"\"b c", "d", NULL}}, {"\"a\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}}, {"\"a\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"\"b c", "d", NULL}}, {"\"a\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"\"b", "c d", NULL}}, {"\"a\\\\\\\\b c\" d", {"a\\\\\\\\b c", "d", NULL}}, {"\"a\\\\\\\\\"b c\" d", {"a\\\\b", "c d", NULL}}, {"\"a\\\\\\\\\"\"b c\" d", {"a\\\\\"b c", "d", NULL}}, {"\"a\\\\\\\\\"\"\"b c\" d", {"a\\\\\"b", "c d", NULL}}, {"\"a\\\\\\\\\"\"\"\"b c\" d", {"a\\\\\"\"b c", "d", NULL}}, {"\"a\\\\\\\\\"\"\"\"\"b c\" d", {"a\\\\\"\"b", "c d", NULL}}, {"\"a\\\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\\\"\"\"b c", "d", NULL}}, {"\"a\\\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"\"b", "c d", NULL}}, {"\"a\\\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"\"\"b c", "d", NULL}}, #else /* MOD3 */ /* VS7 mod-3 behaviour. */ {"ab c\" d", {"ab", "c d", NULL}}, {"a\"b c\" d", {"ab c", "d", NULL}}, {"a\"\"b c\" d", {"ab", "c d", NULL}}, {"a\"\"\"b c\" d", {"a\"b", "c d", NULL}}, {"a\"\"\"\"b c\" d", {"a\"b c", "d", NULL}}, {"a\"\"\"\"\"b c\" d", {"a\"b", "c d", NULL}}, {"a\"\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}}, {"a\"\"\"\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}}, {"a\"\"\"\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}}, {"a\\b c\" d", {"a\\b", "c d", NULL}}, {"a\\\"b c\" d", {"a\"b", "c d", NULL}}, {"a\\\"\"b c\" d", {"a\"b c", "d", NULL}}, {"a\\\"\"\"b c\" d", {"a\"b", "c d", NULL}}, {"a\\\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}}, {"a\\\"\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}}, {"a\\\"\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}}, {"a\\\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}}, {"a\\\"\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b c", "d", NULL}}, {"a\\\\b c\" d", {"a\\\\b", "c d", NULL}}, {"a\\\\\"b c\" d", {"a\\b c", "d", NULL}}, {"a\\\\\"\"b c\" d", {"a\\b", "c d", NULL}}, {"a\\\\\"\"\"b c\" d", {"a\\\"b", "c d", NULL}}, {"a\\\\\"\"\"\"b c\" d", {"a\\\"b c", "d", NULL}}, {"a\\\\\"\"\"\"\"b c\" d", {"a\\\"b", "c d", NULL}}, {"a\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}}, {"a\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}}, {"a\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}}, {"a\\\\\\b c\" d", {"a\\\\\\b", "c d", NULL}}, {"a\\\\\\\"b c\" d", {"a\\\"b", "c d", NULL}}, {"a\\\\\\\"\"b c\" d", {"a\\\"b c", "d", NULL}}, {"a\\\\\\\"\"\"b c\" d", {"a\\\"b", "c d", NULL}}, {"a\\\\\\\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}}, {"a\\\\\\\"\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}}, {"a\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}}, {"a\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}}, {"a\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b c", "d", NULL}}, {"a\\\\\\\\b c\" d", {"a\\\\\\\\b", "c d", NULL}}, {"a\\\\\\\\\"b c\" d", {"a\\\\b c", "d", NULL}}, {"a\\\\\\\\\"\"b c\" d", {"a\\\\b", "c d", NULL}}, {"a\\\\\\\\\"\"\"b c\" d", {"a\\\\\"b", "c d", NULL}}, {"a\\\\\\\\\"\"\"\"b c\" d", {"a\\\\\"b c", "d", NULL}}, {"a\\\\\\\\\"\"\"\"\"b c\" d", {"a\\\\\"b", "c d", NULL}}, {"a\\\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b", "c d", NULL}}, {"a\\\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b c", "d", NULL}}, {"a\\\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b", "c d", NULL}}, {"\"ab c\" d", {"ab c", "d", NULL}}, {"\"a\"b c\" d", {"ab", "c d", NULL}}, {"\"a\"\"b c\" d", {"a\"b", "c d", NULL}}, {"\"a\"\"\"b c\" d", {"a\"b c", "d", NULL}}, {"\"a\"\"\"\"b c\" d", {"a\"b", "c d", NULL}}, {"\"a\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}}, {"\"a\"\"\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}}, {"\"a\"\"\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}}, {"\"a\"\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}}, {"\"a\\b c\" d", {"a\\b c", "d", NULL}}, {"\"a\\\"b c\" d", {"a\"b c", "d", NULL}}, {"\"a\\\"\"b c\" d", {"a\"b", "c d", NULL}}, {"\"a\\\"\"\"b c\" d", {"a\"\"b", "c d", NULL}}, {"\"a\\\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}}, {"\"a\\\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}}, {"\"a\\\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}}, {"\"a\\\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b c", "d", NULL}}, {"\"a\\\"\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}}, {"\"a\\\\b c\" d", {"a\\\\b c", "d", NULL}}, {"\"a\\\\\"b c\" d", {"a\\b", "c d", NULL}}, {"\"a\\\\\"\"b c\" d", {"a\\\"b", "c d", NULL}}, {"\"a\\\\\"\"\"b c\" d", {"a\\\"b c", "d", NULL}}, {"\"a\\\\\"\"\"\"b c\" d", {"a\\\"b", "c d", NULL}}, {"\"a\\\\\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}}, {"\"a\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}}, {"\"a\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}}, {"\"a\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}}, {"\"a\\\\\\b c\" d", {"a\\\\\\b c", "d", NULL}}, {"\"a\\\\\\\"b c\" d", {"a\\\"b c", "d", NULL}}, {"\"a\\\\\\\"\"b c\" d", {"a\\\"b", "c d", NULL}}, {"\"a\\\\\\\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}}, {"\"a\\\\\\\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}}, {"\"a\\\\\\\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}}, {"\"a\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}}, {"\"a\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b c", "d", NULL}}, {"\"a\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}}, {"\"a\\\\\\\\b c\" d", {"a\\\\\\\\b c", "d", NULL}}, {"\"a\\\\\\\\\"b c\" d", {"a\\\\b", "c d", NULL}}, {"\"a\\\\\\\\\"\"b c\" d", {"a\\\\\"b", "c d", NULL}}, {"\"a\\\\\\\\\"\"\"b c\" d", {"a\\\\\"b c", "d", NULL}}, {"\"a\\\\\\\\\"\"\"\"b c\" d", {"a\\\\\"b", "c d", NULL}}, {"\"a\\\\\\\\\"\"\"\"\"b c\" d", {"a\\\\\"\"b", "c d", NULL}}, {"\"a\\\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b c", "d", NULL}}, {"\"a\\\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b", "c d", NULL}}, {"\"a\\\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"\"b", "c d", NULL}}, #endif /* MOD3 */ /* Common tests that check the special program-name rule. */ {"\"a b\\\"c \"d e\" \"f g\"", {"a b\\c", "d e", "f g", NULL}, true}, {"\"a b\\\"c \"d e\" \"f g\"", {"a b\"c d", "e f", "g", NULL}, false}, }; void out_of_memory(void) { fprintf(stderr, "out of memory!\n"); exit(2); } int main(int argc, char **argv) { int i, j; if (argc > 1) { /* * Generation of tests. * * Given `-splat ', we print out a C-style * representation of each argument (in the form "a", "b", * NULL), backslash-escaping each backslash and double * quote. * * Given `-split ', we first doctor `string' by * turning forward slashes into backslashes, single quotes * into double quotes and underscores into spaces; and then * we feed the resulting string to ourself with `-splat'. * * Given `-generate', we concoct a variety of fun test * cases, encode them in quote-safe form (mapping \, " and * space to /, ' and _ respectively) and feed each one to * `-split'. */ if (!strcmp(argv[1], "-splat")) { int i; char *p; for (i = 2; i < argc; i++) { putchar('"'); for (p = argv[i]; *p; p++) { if (*p == '\\' || *p == '"') putchar('\\'); putchar(*p); } printf("\", "); } printf("NULL"); return 0; } if (!strcmp(argv[1], "-split") && argc > 2) { strbuf *cmdline = strbuf_new(); char *p; put_fmt(cmdline, "%s -splat ", argv[0]); printf(" {\""); size_t args_start = cmdline->len; for (p = argv[2]; *p; p++) { char c = (*p == '/' ? '\\' : *p == '\'' ? '"' : *p == '_' ? ' ' : *p); put_byte(cmdline, c); } write_c_string_literal(stdout, ptrlen_from_asciz( cmdline->s + args_start)); printf("\", {"); fflush(stdout); system(cmdline->s); printf("}},\n"); strbuf_free(cmdline); return 0; } if (!strcmp(argv[1], "-generate")) { char *teststr, *p; int i, initialquote, backslashes, quotes; teststr = malloc(200 + strlen(argv[0])); for (initialquote = 0; initialquote <= 1; initialquote++) { for (backslashes = 0; backslashes < 5; backslashes++) { for (quotes = 0; quotes < 9; quotes++) { p = teststr + sprintf(teststr, "%s -split ", argv[0]); if (initialquote) *p++ = '\''; *p++ = 'a'; for (i = 0; i < backslashes; i++) *p++ = '/'; for (i = 0; i < quotes; i++) *p++ = '\''; *p++ = 'b'; *p++ = '_'; *p++ = 'c'; *p++ = '\''; *p++ = '_'; *p++ = 'd'; *p = '\0'; system(teststr); } } } return 0; } if (!strcmp(argv[1], "-tabulate")) { char table[] = "\ * backslashes \n\ * \n\ * 0 1 2 3 4 \n\ * \n\ * 0 | \n\ * --------+----------------------------- \n\ * 1 | \n\ * q 2 | \n\ * u 3 | \n\ * o 4 | \n\ * t 5 | \n\ * e 6 | \n\ * s 7 | \n\ * 8 | \n\ "; char *linestarts[14]; char *p = table; for (i = 0; i < lenof(linestarts); i++) { linestarts[i] = p; p += strcspn(p, "\n"); if (*p) p++; } for (i = 0; i < lenof(argv_tests); i++) { const struct argv_test *test = &argv_tests[i]; const char *q = test->cmdline; /* Skip tests that aren't telling us something about * the behaviour _inside_ a quoted string */ if (*q != '"') continue; q++; assert(*q == 'a'); q++; int backslashes_in = 0, quotes_in = 0; while (*q == '\\') { q++; backslashes_in++; } while (*q == '"') { q++; quotes_in++; } q = test->argv[0]; assert(*q == 'a'); q++; int backslashes_out = 0, quotes_out = 0; while (*q == '\\') { q++; backslashes_out++; } while (*q == '"') { q++; quotes_out++; } assert(*q == 'b'); q++; bool in_quoted_string = (*q == ' '); int x = (backslashes_in == 0 ? 15 : 18 + 7 * backslashes_in); int y = (quotes_in == 0 ? 4 : 5 + quotes_in); char *buf = dupprintf("%d,%d,%c", backslashes_out, quotes_out, in_quoted_string ? 'y' : 'n'); memcpy(linestarts[y] + x, buf, strlen(buf)); sfree(buf); } fputs(table, stdout); return 0; } fprintf(stderr, "unrecognised option: \"%s\"\n", argv[1]); return 1; } /* * If we get here, we were invoked with no arguments, so just * run the tests. */ int passes = 0, fails = 0; for (i = 0; i < lenof(argv_tests); i++) { int ac; char **av; bool failed = false; split_into_argv((char *)argv_tests[i].cmdline, argv_tests[i].include_program_name, &ac, &av, NULL); for (j = 0; j < ac && argv_tests[i].argv[j]; j++) { if (strcmp(av[j], argv_tests[i].argv[j])) { printf("failed test %d (|%s|) arg %d: |%s| should be |%s|\n", i, argv_tests[i].cmdline, j, av[j], argv_tests[i].argv[j]); failed = true; } #ifdef VERBOSE else { printf("test %d (|%s|) arg %d: |%s| == |%s|\n", i, argv_tests[i].cmdline, j, av[j], argv_tests[i].argv[j]); } #endif } if (j < ac) { printf("failed test %d (|%s|): %d args returned, should be %d\n", i, argv_tests[i].cmdline, ac, j); failed = true; } if (argv_tests[i].argv[j]) { printf("failed test %d (|%s|): %d args returned, should be more\n", i, argv_tests[i].cmdline, ac); failed = true; } if (failed) fails++; else passes++; } printf("passed %d failed %d (%s mode)\n", passes, fails, #if MOD3 "mod 3" #else "mod 2" #endif ); return fails != 0; } #endif /* TEST */