1
0
mirror of https://git.tartarus.org/simon/putty.git synced 2025-01-25 01:02:24 +00:00
putty-source/windows/utils/cmdline_arg.c

219 lines
5.9 KiB
C
Raw Permalink Normal View History

New abstraction for command-line arguments. This begins the process of enabling our Windows applications to handle Unicode characters on their command lines which don't fit in the system code page. Instead of passing plain strings to cmdline_process_param, we now pass a partially opaque and platform-specific thing called a CmdlineArg. This has a method that extracts the argument word as a default-encoded string, and another one that tries to extract it as UTF-8 (though it may fail if the UTF-8 isn't available). On Windows, the command line is now constructed by calling split_into_argv_w on the Unicode command line returned by GetCommandLineW(), and the UTF-8 method returns text converted directly from that wide-character form, not going via the system code page. So it _can_ include UTF-8 characters that wouldn't have round-tripped via CP_ACP. This commit introduces the abstraction and switches over the cross-platform and Windows argv-handling code to use it, with minimal functional change. Nothing yet tries to call cmdline_arg_get_utf8(). I say 'cross-platform and Windows' because on the Unix side there's still a lot of use of plain old argv which I haven't converted. That would be a much larger project, and isn't currently needed: the _current_ aim of this abstraction is to get the right things to happen relating to Unicode on Windows, so for code that doesn't run on Windows anyway, it's not adding value. (Also there's a tension with GTK, which wants to talk to standard argv and extract arguments _it_ knows about, so at the very least we'd have to let it munge argv before importing it into this new system.)
2024-09-25 09:18:38 +00:00
/*
* Implementation of the CmdlineArg abstraction for Windows
*/
#include <wchar.h>
#include "putty.h"
typedef struct CmdlineArgWin CmdlineArgWin;
struct CmdlineArgWin {
/*
* The original wide-character argument.
*/
wchar_t *wide;
/*
* Two translations of the wide-character argument into UTF-8
* (maximally faithful to the original) and CP_ACP (the normal
* system code page).
*/
char *utf8, *acp;
/*
* Our index in the CmdlineArgList, or (size_t)-1 if we don't have
* one and are an argument invented later.
*/
size_t index;
/*
* Public part of the structure.
*/
CmdlineArg argp;
};
typedef struct CmdlineArgListWin CmdlineArgListWin;
struct CmdlineArgListWin {
/*
* Wide string pointer returned from GetCommandLineW. This points
* to the 'official' version of the command line, in the sense
* that overwriting it causes different text to show up in the
* Task Manager display of the process's command line. (So this is
* what we'll need to overwrite _on purpose_ for cmdline_arg_wipe.)
*/
wchar_t *cmdline;
/*
* Data returned from split_into_argv_w.
*/
size_t argc;
wchar_t **argv, **argstart;
/*
* Public part of the structure.
*/
CmdlineArgList listp;
};
static CmdlineArgWin *cmdline_arg_new_in_list(CmdlineArgList *listp)
{
CmdlineArgWin *arg = snew(CmdlineArgWin);
arg->wide = NULL;
arg->utf8 = arg->acp = NULL;
arg->index = (size_t)-1;
arg->argp.list = listp;
sgrowarray(listp->args, listp->argssize, listp->nargs);
listp->args[listp->nargs++] = &arg->argp;
return arg;
}
static CmdlineArg *cmdline_arg_from_wide_argv_word(
CmdlineArgList *list, wchar_t *word)
{
CmdlineArgWin *arg = cmdline_arg_new_in_list(list);
arg->wide = dupwcs(word);
arg->utf8 = dup_wc_to_mb(CP_UTF8, word, "");
arg->acp = dup_wc_to_mb(CP_ACP, word, "");
return &arg->argp;
}
CmdlineArgList *cmdline_arg_list_from_GetCommandLineW(void)
{
CmdlineArgListWin *list = snew(CmdlineArgListWin);
CmdlineArgList *listp = &list->listp;
list->cmdline = GetCommandLineW();
int argc;
split_into_argv_w(list->cmdline, true,
&argc, &list->argv, &list->argstart);
list->argc = (size_t)argc;
listp->args = NULL;
listp->nargs = listp->argssize = 0;
for (size_t i = 1; i < list->argc; i++) {
CmdlineArg *argp = cmdline_arg_from_wide_argv_word(
listp, list->argv[i]);
CmdlineArgWin *arg = container_of(argp, CmdlineArgWin, argp);
arg->index = i - 1; /* index in list->args[], not in argv[] */
}
sgrowarray(listp->args, listp->argssize, listp->nargs);
listp->args[listp->nargs++] = NULL;
return listp;
}
void cmdline_arg_free(CmdlineArg *argp)
{
if (!argp)
return;
CmdlineArgWin *arg = container_of(argp, CmdlineArgWin, argp);
burnwcs(arg->wide);
burnstr(arg->utf8);
burnstr(arg->acp);
sfree(arg);
}
void cmdline_arg_list_free(CmdlineArgList *listp)
{
CmdlineArgListWin *list = container_of(listp, CmdlineArgListWin, listp);
for (size_t i = 0; i < listp->nargs; i++)
cmdline_arg_free(listp->args[i]);
/* list->argv[0] points at the start of the string allocated by
* split_into_argv_w */
sfree(list->argv[0]);
sfree(list->argv);
sfree(list->argstart);
sfree(list);
}
CmdlineArg *cmdline_arg_from_str(CmdlineArgList *listp, const char *string)
{
CmdlineArgWin *arg = cmdline_arg_new_in_list(listp);
arg->acp = dupstr(string);
arg->wide = dup_mb_to_wc(CP_ACP, string);
arg->utf8 = dup_wc_to_mb(CP_UTF8, arg->wide, "");
return &arg->argp;
}
CmdlineArg *cmdline_arg_from_utf8(CmdlineArgList *listp, const char *string)
{
CmdlineArgWin *arg = cmdline_arg_new_in_list(listp);
arg->acp = dupstr(string);
arg->wide = dup_mb_to_wc(CP_UTF8, string);
arg->utf8 = dup_wc_to_mb(CP_ACP, arg->wide, "");
return &arg->argp;
}
const char *cmdline_arg_to_str(CmdlineArg *argp)
{
if (!argp)
return NULL;
CmdlineArgWin *arg = container_of(argp, CmdlineArgWin, argp);
return arg->acp;
}
const char *cmdline_arg_to_utf8(CmdlineArg *argp)
{
if (!argp)
return NULL;
CmdlineArgWin *arg = container_of(argp, CmdlineArgWin, argp);
return arg->utf8;
}
Filename *cmdline_arg_to_filename(CmdlineArg *argp)
{
if (!argp)
return NULL;
CmdlineArgWin *arg = container_of(argp, CmdlineArgWin, argp);
return filename_from_wstr(arg->wide);
}
New abstraction for command-line arguments. This begins the process of enabling our Windows applications to handle Unicode characters on their command lines which don't fit in the system code page. Instead of passing plain strings to cmdline_process_param, we now pass a partially opaque and platform-specific thing called a CmdlineArg. This has a method that extracts the argument word as a default-encoded string, and another one that tries to extract it as UTF-8 (though it may fail if the UTF-8 isn't available). On Windows, the command line is now constructed by calling split_into_argv_w on the Unicode command line returned by GetCommandLineW(), and the UTF-8 method returns text converted directly from that wide-character form, not going via the system code page. So it _can_ include UTF-8 characters that wouldn't have round-tripped via CP_ACP. This commit introduces the abstraction and switches over the cross-platform and Windows argv-handling code to use it, with minimal functional change. Nothing yet tries to call cmdline_arg_get_utf8(). I say 'cross-platform and Windows' because on the Unix side there's still a lot of use of plain old argv which I haven't converted. That would be a much larger project, and isn't currently needed: the _current_ aim of this abstraction is to get the right things to happen relating to Unicode on Windows, so for code that doesn't run on Windows anyway, it's not adding value. (Also there's a tension with GTK, which wants to talk to standard argv and extract arguments _it_ knows about, so at the very least we'd have to let it munge argv before importing it into this new system.)
2024-09-25 09:18:38 +00:00
void cmdline_arg_wipe(CmdlineArg *argp)
{
if (!argp)
return;
CmdlineArgWin *arg = container_of(argp, CmdlineArgWin, argp);
if (arg->index != (size_t)-1) {
CmdlineArgList *listp = argp->list;
CmdlineArgListWin *list = container_of(
listp, CmdlineArgListWin, listp);
/* arg->index starts from the first argument _after_ program
* name, whereas argstart is indexed from argv[0] */
wchar_t *p = list->argstart[arg->index + 1];
wchar_t *end = (arg->index + 2 < list->argc ?
list->argstart[arg->index + 2] :
p + wcslen(p));
while (p < end)
*p++ = L' ';
}
}
const wchar_t *cmdline_arg_remainder_wide(CmdlineArg *argp)
{
CmdlineArgWin *arg = container_of(argp, CmdlineArgWin, argp);
CmdlineArgList *listp = argp->list;
CmdlineArgListWin *list = container_of(listp, CmdlineArgListWin, listp);
size_t index = arg->index;
assert(index != (size_t)-1);
/* arg->index starts from the first argument _after_ program
* name, whereas argstart is indexed from argv[0] */
return list->argstart[index + 1];
}
char *cmdline_arg_remainder_acp(CmdlineArg *argp)
{
return dup_wc_to_mb(CP_ACP, cmdline_arg_remainder_wide(argp), "");
}
char *cmdline_arg_remainder_utf8(CmdlineArg *argp)
{
return dup_wc_to_mb(CP_UTF8, cmdline_arg_remainder_wide(argp), "");
}