From 8bd75b85eda6e4d89a3773e003591f1602577c82 Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Sun, 28 May 2023 11:30:59 +0100 Subject: [PATCH] Some support for wide-character filenames in Windows. The Windows version of the Filename structure now contains three versions of the pathname, in UTF-16, UTF-8 and the system code page. Callers can use whichever is most convenient. All uses of filenames for actually opening files now use the UTF-16 version, which means they can tolerate 'exotic' filenames, by which I mean those including Unicode characters outside the host system's CP_ACP default code page. Other uses of Filename structures inside the 'windows' subdirectory do something appropriate, e.g. when printing a filename inside a message box or a console message, we use the UTF-8 version of the filename with the UTF-8 version of the appropriate API. There are three remaining pieces to full Unicode filename support: One is that the cross-platform code has many calls to filename_to_str(), embodying the assumption that a file name can be reliably converted into the unspecified current character set; those will all need changing in some way. Another is that write_setting_filename(), in windows/storage.c, still saves filenames to the Registry as an ordinary REG_SZ in the system code page. So even if an exotic filename were stored in a Conf, that Conf couldn't round-trip via the Registry and back without corrupting that filename by coercing it back to a string that fits in CP_ACP and therefore doesn't represent the same file. This can't be fixed without a compatibility break in the storage format, and I don't want to make a minimal change in that area: if we're going to break compatibility, then we should break it good and hard (the Nanny Ogg principle), and devise a completely fresh storage representation that fixes as many other legacy problems as possible at the same time. So that's my plan, not yet started. The final point, much more obviously, is that we're still short of methods to _construct_ any Filename structures using a Unicode input string! It should now work to enter one in the GUI configurer (either by manual text input or via the file selector), but it won't round-trip through a save and load (as discussed above), and there's still no way to specify one on the command line (the groundwork is laid by commit 10e1ac7752de928 but not yet linked up). But this is a start. --- windows/console.c | 6 +-- windows/controls.c | 34 ++++++------ windows/dialog.c | 7 +-- windows/gss.c | 33 ++++++------ windows/pageant.c | 2 +- windows/platform.h | 44 ++++++++++----- windows/storage.c | 18 ++++++- windows/utils/filename.c | 54 ++++++++++++++++--- .../utils/open_for_write_would_lose_data.c | 14 ++--- windows/window.c | 20 ++++--- 10 files changed, 159 insertions(+), 73 deletions(-) diff --git a/windows/console.c b/windows/console.c index 865c5cf3..ad86eb5c 100644 --- a/windows/console.c +++ b/windows/console.c @@ -554,15 +554,15 @@ int console_askappend(LogPolicy *lp, Filename *filename, "The session log file \"%.*s\" already exists.\n" "Logging will not be enabled.\n"; - ConsoleIO *conio = conio_setup(false); + ConsoleIO *conio = conio_setup(true); int result; if (console_batch_mode) { - put_fmt(conio, msgtemplate_batch, FILENAME_MAX, filename->path); + put_fmt(conio, msgtemplate_batch, FILENAME_MAX, filename->utf8path); result = 0; goto out; } - put_fmt(conio, msgtemplate, FILENAME_MAX, filename->path); + put_fmt(conio, msgtemplate, FILENAME_MAX, filename->utf8path); ResponseType response = parse_and_free_response( console_read_line(conio, true)); diff --git a/windows/controls.c b/windows/controls.c index 1d1c74c0..84708168 100644 --- a/windows/controls.c +++ b/windows/controls.c @@ -1988,32 +1988,36 @@ bool winctrl_handle_command(struct dlgparam *dp, UINT msg, (msg == WM_COMMAND && (HIWORD(wParam) == BN_CLICKED || HIWORD(wParam) == BN_DOUBLECLICKED))) { - OPENFILENAME of; - char filename[FILENAME_MAX]; + OPENFILENAMEW of; + wchar_t filename[FILENAME_MAX]; + + wchar_t *title_to_free = NULL; memset(&of, 0, sizeof(of)); of.hwndOwner = dp->hwnd; if (ctrl->fileselect.filter) of.lpstrFilter = ctrl->fileselect.filter; else - of.lpstrFilter = "All Files (*.*)\0*\0\0\0"; + of.lpstrFilter = L"All Files (*.*)\0*\0\0\0"; of.lpstrCustomFilter = NULL; of.nFilterIndex = 1; of.lpstrFile = filename; if (!ctrl->fileselect.just_button) { - GetDlgItemText(dp->hwnd, c->base_id+1, - filename, lenof(filename)); - filename[lenof(filename)-1] = '\0'; + GetDlgItemTextW(dp->hwnd, c->base_id+1, + filename, lenof(filename)); + filename[lenof(filename)-1] = L'\0'; } else { - *filename = '\0'; + *filename = L'\0'; } of.nMaxFile = lenof(filename); of.lpstrFileTitle = NULL; - of.lpstrTitle = ctrl->fileselect.title; + of.lpstrTitle = title_to_free = dup_mb_to_wc( + DEFAULT_CODEPAGE, 0, ctrl->fileselect.title); of.Flags = 0; - if (request_file(NULL, &of, false, ctrl->fileselect.for_writing)) { + if (request_file_w(NULL, &of, false, + ctrl->fileselect.for_writing)) { if (!ctrl->fileselect.just_button) { - SetDlgItemText(dp->hwnd, c->base_id + 1, filename); + SetDlgItemTextW(dp->hwnd, c->base_id + 1, filename); ctrl->handler(ctrl, dp, dp->data, EVENT_VALCHANGE); } else { assert(!c->data); @@ -2022,6 +2026,8 @@ bool winctrl_handle_command(struct dlgparam *dp, UINT msg, c->data = NULL; } } + + sfree(title_to_free); } break; case CTRL_FONTSELECT: @@ -2394,19 +2400,17 @@ void dlg_filesel_set(dlgcontrol *ctrl, dlgparam *dp, Filename *fn) assert(c); assert(c->ctrl->type == CTRL_FILESELECT); assert(!c->ctrl->fileselect.just_button); - SetDlgItemText(dp->hwnd, c->base_id+1, fn->path); + SetDlgItemTextW(dp->hwnd, c->base_id+1, fn->wpath); } Filename *dlg_filesel_get(dlgcontrol *ctrl, dlgparam *dp) { struct winctrl *c = dlg_findbyctrl(dp, ctrl); - char *tmp; - Filename *ret; assert(c); assert(c->ctrl->type == CTRL_FILESELECT); if (!c->ctrl->fileselect.just_button) { - tmp = GetDlgItemText_alloc(dp->hwnd, c->base_id+1); - ret = filename_from_str(tmp); + wchar_t *tmp = GetDlgItemTextW_alloc(dp->hwnd, c->base_id+1); + Filename *ret = filename_from_wstr(tmp); sfree(tmp); return ret; } else { diff --git a/windows/dialog.c b/windows/dialog.c index b97c4031..61bcf7ba 100644 --- a/windows/dialog.c +++ b/windows/dialog.c @@ -1227,11 +1227,12 @@ static int win_gui_askappend(LogPolicy *lp, Filename *filename, char *mbtitle; int mbret; - message = dupprintf(msgtemplate, FILENAME_MAX, filename->path); + message = dupprintf(msgtemplate, FILENAME_MAX, filename->utf8path); mbtitle = dupprintf("%s Log to File", appname); - mbret = MessageBox(NULL, message, mbtitle, - MB_ICONQUESTION | MB_YESNOCANCEL | MB_DEFBUTTON3); + mbret = message_box(NULL, message, mbtitle, + MB_ICONQUESTION | MB_YESNOCANCEL | MB_DEFBUTTON3, + true, 0); socket_reselect_all(); diff --git a/windows/gss.c b/windows/gss.c index b7bca7d1..bb1e914a 100644 --- a/windows/gss.c +++ b/windows/gss.c @@ -119,7 +119,6 @@ struct ssh_gss_liblist *ssh_gss_setup(Conf *conf) { HMODULE module; struct ssh_gss_liblist *list = snew(struct ssh_gss_liblist); - char *path; static HMODULE kernel32_module; if (!kernel32_module) { kernel32_module = load_system32_dll("kernel32.dll"); @@ -233,34 +232,36 @@ struct ssh_gss_liblist *ssh_gss_setup(Conf *conf) * Custom GSSAPI DLL. */ module = NULL; - path = conf_get_filename(conf, CONF_ssh_gss_custom)->path; - if (*path) { + Filename *customlib = conf_get_filename(conf, CONF_ssh_gss_custom); + if (!filename_is_null(customlib)) { + const wchar_t *path = customlib->wpath; if (p_AddDllDirectory) { + /* Add the custom directory as well in case it chainloads * some other DLLs (e.g a non-installed MIT Kerberos * instance) */ - int pathlen = strlen(path); + int pathlen = wcslen(path); - while (pathlen > 0 && path[pathlen-1] != ':' && - path[pathlen-1] != '\\') + while (pathlen > 0 && path[pathlen-1] != L':' && + path[pathlen-1] != L'\\') pathlen--; - if (pathlen > 0 && path[pathlen-1] != '\\') + if (pathlen > 0 && path[pathlen-1] != L'\\') pathlen--; if (pathlen > 0) { - char *dirpath = dupprintf("%.*s", pathlen, path); - wchar_t *dllPath = dup_mb_to_wc(DEFAULT_CODEPAGE, 0, dirpath); - p_AddDllDirectory(dllPath); - sfree(dllPath); + wchar_t *dirpath = snewn(pathlen + 1, wchar_t); + memcpy(dirpath, path, pathlen * sizeof(wchar_t)); + dirpath[pathlen] = L'\0'; + p_AddDllDirectory(dirpath); sfree(dirpath); } } - module = LoadLibraryEx(path, NULL, - LOAD_LIBRARY_SEARCH_SYSTEM32 | - LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR | - LOAD_LIBRARY_SEARCH_USER_DIRS); + module = LoadLibraryExW(path, NULL, + LOAD_LIBRARY_SEARCH_SYSTEM32 | + LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR | + LOAD_LIBRARY_SEARCH_USER_DIRS); } if (module) { struct ssh_gss_library *lib = @@ -268,7 +269,7 @@ struct ssh_gss_liblist *ssh_gss_setup(Conf *conf) lib->id = 2; lib->gsslogmsg = dupprintf("Using GSSAPI from user-specified" - " library '%s'", path); + " library '%s'", customlib->cpath); lib->handle = (void *)module; #define BIND_GSS_FN(name) \ diff --git a/windows/pageant.c b/windows/pageant.c index 221154e2..d66eb0bc 100644 --- a/windows/pageant.c +++ b/windows/pageant.c @@ -549,7 +549,7 @@ static void prompt_add_keyfile(bool encrypted) if (!keypath) keypath = filereq_new(); memset(&of, 0, sizeof(of)); of.hwndOwner = traywindow; - of.lpstrFilter = FILTER_KEY_FILES; + of.lpstrFilter = FILTER_KEY_FILES_C; of.lpstrCustomFilter = NULL; of.nFilterIndex = 1; of.lpstrFile = filelist; diff --git a/windows/platform.h b/windows/platform.h index 409aa824..8555bc76 100644 --- a/windows/platform.h +++ b/windows/platform.h @@ -48,13 +48,25 @@ #define AGENT_COPYDATA_ID 0x804e50ba struct Filename { - char *path; + /* + * A Windows Filename stores a path in three formats: + * + * - wchar_t (in Windows UTF-16 encoding). The best format to use + * for actual file API functions, because all legal Windows + * file names are representable. + * + * - char, in the system default codepage. A fallback to use if + * necessary, e.g. in diagnostics written to somewhere that is + * unavoidably encoded _in_ the system codepage. + * + * - char, in UTF-8. An equally general representation to wpath, + * but suitable for keeping in char-typed strings. + */ + wchar_t *wpath; + char *cpath, *utf8path; }; -static inline FILE *f_open(const Filename *filename, const char *mode, - bool isprivate) -{ - return fopen(filename->path, mode); -} +Filename *filename_from_wstr(const wchar_t *str); +FILE *f_open(const Filename *filename, const char *mode, bool isprivate); struct FontSpec { char *name; @@ -286,13 +298,21 @@ void write_aclip(HWND hwnd, int clipboard, char *, int); * these strings are of exactly the type needed to go in * `lpstrFilter' in an OPENFILENAME structure. */ -typedef const char *FILESELECT_FILTER_TYPE; -#define FILTER_KEY_FILES ("PuTTY Private Key Files (*.ppk)\0*.ppk\0" \ - "All Files (*.*)\0*\0\0\0") -#define FILTER_WAVE_FILES ("Wave Files (*.wav)\0*.WAV\0" \ +typedef const wchar_t *FILESELECT_FILTER_TYPE; +#define FILTER_KEY_FILES (L"PuTTY Private Key Files (*.ppk)\0*.ppk\0" \ + L"All Files (*.*)\0*\0\0\0") +#define FILTER_WAVE_FILES (L"Wave Files (*.wav)\0*.WAV\0" \ + L"All Files (*.*)\0*\0\0\0") +#define FILTER_DYNLIB_FILES (L"Dynamic Library Files (*.dll)\0*.dll\0" \ + L"All Files (*.*)\0*\0\0\0") + +/* char-based versions of the above, for outlying uses of file selectors. */ +#define FILTER_KEY_FILES_C ("PuTTY Private Key Files (*.ppk)\0*.ppk\0" \ + "All Files (*.*)\0*\0\0\0") +#define FILTER_WAVE_FILES_C ("Wave Files (*.wav)\0*.WAV\0" \ + "All Files (*.*)\0*\0\0\0") +#define FILTER_DYNLIB_FILES_C ("Dynamic Library Files (*.dll)\0*.dll\0" \ "All Files (*.*)\0*\0\0\0") -#define FILTER_DYNLIB_FILES ("Dynamic Library Files (*.dll)\0*.dll\0" \ - "All Files (*.*)\0*\0\0\0") /* * Exports from network.c. diff --git a/windows/storage.c b/windows/storage.c index b871ad5b..a1cab238 100644 --- a/windows/storage.c +++ b/windows/storage.c @@ -183,7 +183,23 @@ Filename *read_setting_filename(settings_r *handle, const char *name) void write_setting_filename(settings_w *handle, const char *name, Filename *result) { - write_setting_s(handle, name, result->path); + /* + * When saving a session involving a Filename, we use the 'cpath' + * member of the Filename structure, because otherwise we break + * backwards compatibility with existing saved sessions. + * + * This means that 'exotic' filenames - those including Unicode + * characters outside the host system's CP_ACP default code page - + * cannot be represented faithfully, and saving and reloading a + * Conf including one will break it. + * + * This can't be fixed without breaking backwards compatibility, + * and if we're going to break compatibility then we should break + * it good and hard (the Nanny Ogg principle), and devise a + * completely fresh storage representation that fixes as many + * other legacy problems as possible at the same time. + */ + write_setting_s(handle, name, result->cpath); /* FIXME */ } void close_settings_r(settings_r *handle) diff --git a/windows/utils/filename.c b/windows/utils/filename.c index a91732c8..eb358b1f 100644 --- a/windows/utils/filename.c +++ b/windows/utils/filename.c @@ -2,48 +2,79 @@ * Implementation of Filename for Windows. */ +#include + #include "putty.h" Filename *filename_from_str(const char *str) { Filename *fn = snew(Filename); - fn->path = dupstr(str); + fn->cpath = dupstr(str); + fn->wpath = dup_mb_to_wc(DEFAULT_CODEPAGE, 0, fn->cpath); + fn->utf8path = encode_wide_string_as_utf8(fn->wpath); + return fn; +} + +Filename *filename_from_wstr(const wchar_t *str) +{ + Filename *fn = snew(Filename); + fn->wpath = dupwcs(str); + fn->cpath = dup_wc_to_mb(DEFAULT_CODEPAGE, 0, fn->wpath, "?"); + fn->utf8path = encode_wide_string_as_utf8(fn->wpath); + return fn; +} + +Filename *filename_from_utf8(const char *ustr) +{ + Filename *fn = snew(Filename); + fn->utf8path = dupstr(ustr); + fn->wpath = decode_utf8_to_wide_string(fn->utf8path); + fn->cpath = dup_wc_to_mb(DEFAULT_CODEPAGE, 0, fn->wpath, "?"); return fn; } Filename *filename_copy(const Filename *fn) { - return filename_from_str(fn->path); + Filename *newfn = snew(Filename); + newfn->cpath = dupstr(fn->cpath); + newfn->wpath = dupwcs(fn->wpath); + newfn->utf8path = dupstr(fn->utf8path); + return newfn; } const char *filename_to_str(const Filename *fn) { - return fn->path; + return fn->cpath; /* FIXME */ } bool filename_equal(const Filename *f1, const Filename *f2) { - return !strcmp(f1->path, f2->path); + /* wpath is primary: two filenames refer to the same file if they + * have the same wpath */ + return !wcscmp(f1->wpath, f2->wpath); } bool filename_is_null(const Filename *fn) { - return !*fn->path; + return !*fn->wpath; } void filename_free(Filename *fn) { - sfree(fn->path); + sfree(fn->wpath); + sfree(fn->cpath); + sfree(fn->utf8path); sfree(fn); } void filename_serialise(BinarySink *bs, const Filename *f) { - put_asciz(bs, f->path); + put_asciz(bs, f->utf8path); } Filename *filename_deserialise(BinarySource *src) { - return filename_from_str(get_asciz(src)); + const char *utf8 = get_asciz(src); + return filename_from_utf8(utf8); } char filename_char_sanitise(char c) @@ -52,3 +83,10 @@ char filename_char_sanitise(char c) return '.'; return c; } + +FILE *f_open(const Filename *fn, const char *mode, bool isprivate) +{ + wchar_t *wmode = dup_mb_to_wc(DEFAULT_CODEPAGE, 0, mode); + return _wfopen(fn->wpath, wmode); + sfree(wmode); +} diff --git a/windows/utils/open_for_write_would_lose_data.c b/windows/utils/open_for_write_would_lose_data.c index 2aef5c5a..0645d7ac 100644 --- a/windows/utils/open_for_write_would_lose_data.c +++ b/windows/utils/open_for_write_would_lose_data.c @@ -39,17 +39,17 @@ static inline bool open_for_write_would_lose_data_impl( bool open_for_write_would_lose_data(const Filename *fn) { static HMODULE kernel32_module; - DECL_WINDOWS_FUNCTION(static, BOOL, GetFileAttributesExA, - (LPCSTR, GET_FILEEX_INFO_LEVELS, LPVOID)); + DECL_WINDOWS_FUNCTION(static, BOOL, GetFileAttributesExW, + (LPCWSTR, GET_FILEEX_INFO_LEVELS, LPVOID)); if (!kernel32_module) { kernel32_module = load_system32_dll("kernel32.dll"); - GET_WINDOWS_FUNCTION(kernel32_module, GetFileAttributesExA); + GET_WINDOWS_FUNCTION(kernel32_module, GetFileAttributesExW); } - if (p_GetFileAttributesExA) { + if (p_GetFileAttributesExW) { WIN32_FILE_ATTRIBUTE_DATA attrs; - if (!p_GetFileAttributesExA(fn->path, GetFileExInfoStandard, &attrs)) { + if (!p_GetFileAttributesExW(fn->wpath, GetFileExInfoStandard, &attrs)) { /* * Generally, if we don't identify a specific reason why we * should return true from this function, we return false, and @@ -61,8 +61,8 @@ bool open_for_write_would_lose_data(const Filename *fn) return open_for_write_would_lose_data_impl( attrs.dwFileAttributes, attrs.nFileSizeHigh, attrs.nFileSizeLow); } else { - WIN32_FIND_DATA fd; - HANDLE h = FindFirstFile(fn->path, &fd); + WIN32_FIND_DATAW fd; + HANDLE h = FindFirstFileW(fn->wpath, &fd); if (h == INVALID_HANDLE_VALUE) { /* * As above, if we can't find the file at all, return false. diff --git a/windows/window.c b/windows/window.c index e339c6af..fb1ec9fa 100644 --- a/windows/window.c +++ b/windows/window.c @@ -4120,7 +4120,8 @@ static int wintw_char_width(TermWin *tw, int uc) DECL_WINDOWS_FUNCTION(static, BOOL, FlashWindowEx, (PFLASHWINFO)); DECL_WINDOWS_FUNCTION(static, BOOL, ToUnicodeEx, (UINT, UINT, const BYTE *, LPWSTR, int, UINT, HKL)); -DECL_WINDOWS_FUNCTION(static, BOOL, PlaySound, (LPCTSTR, HMODULE, DWORD)); +DECL_WINDOWS_FUNCTION(static, BOOL, PlaySoundW, (LPCWSTR, HMODULE, DWORD)); +DECL_WINDOWS_FUNCTION(static, BOOL, PlaySoundA, (LPCSTR, HMODULE, DWORD)); static void init_winfuncs(void) { @@ -4129,7 +4130,8 @@ static void init_winfuncs(void) HMODULE shcore_module = load_system32_dll("shcore.dll"); GET_WINDOWS_FUNCTION(user32_module, FlashWindowEx); GET_WINDOWS_FUNCTION(user32_module, ToUnicodeEx); - GET_WINDOWS_FUNCTION_PP(winmm_module, PlaySound); + GET_WINDOWS_FUNCTION(winmm_module, PlaySoundW); + GET_WINDOWS_FUNCTION(winmm_module, PlaySoundA); GET_WINDOWS_FUNCTION_NO_TYPECHECK(user32_module, GetMonitorInfoA); GET_WINDOWS_FUNCTION_NO_TYPECHECK(user32_module, MonitorFromPoint); GET_WINDOWS_FUNCTION_NO_TYPECHECK(user32_module, MonitorFromWindow); @@ -5610,16 +5612,20 @@ static void wintw_bell(TermWin *tw, int mode) } else if (mode == BELL_WAVEFILE) { Filename *bell_wavefile = conf_get_filename( wgs->conf, CONF_bell_wavefile); - if (!p_PlaySound || !p_PlaySound(bell_wavefile->path, NULL, - SND_ASYNC | SND_FILENAME)) { + bool success = ( + p_PlaySoundW ? p_PlaySoundW(bell_wavefile->wpath, NULL, + SND_ASYNC | SND_FILENAME) : + p_PlaySoundA ? p_PlaySoundA(bell_wavefile->cpath, NULL, + SND_ASYNC | SND_FILENAME) : false); + if (!success) { char *buf, *otherbuf; show_mouseptr(wgs, true); buf = dupprintf( "Unable to play sound file\n%s\nUsing default sound instead", - bell_wavefile->path); + bell_wavefile->utf8path); otherbuf = dupprintf("%s Sound Error", appname); - MessageBox(wgs->term_hwnd, buf, otherbuf, - MB_OK | MB_ICONEXCLAMATION); + message_box(wgs->term_hwnd, buf, otherbuf, + MB_OK | MB_ICONEXCLAMATION, true, 0); sfree(buf); sfree(otherbuf); conf_set_int(wgs->conf, CONF_beep, BELL_DEFAULT);