1
0
mirror of https://git.tartarus.org/simon/putty.git synced 2025-01-10 01:48:00 +00:00

New utility function: decode_utf8_to_wide_string.

We already had encode_wide_string_as_utf8, which treats the wide
string as UTF-16 or UTF-32 as appropriate to the size of wchar_t. I'm
about to need the inverse function, and was surprised that it didn't
already exist (even though enough component parts did to make it easy).
This commit is contained in:
Simon Tatham 2023-05-29 13:27:16 +01:00
parent 36db93748e
commit 392be3e494
3 changed files with 40 additions and 0 deletions

4
misc.h
View File

@ -256,6 +256,10 @@ unsigned smemeq(const void *av, const void *bv, size_t len);
* encoded in UTF-16. */ * encoded in UTF-16. */
char *encode_wide_string_as_utf8(const wchar_t *wstr); char *encode_wide_string_as_utf8(const wchar_t *wstr);
/* Decode UTF-8 to a wide-character string, emitting UTF-16 surrogates
* if sizeof(wchar_t) == 2. */
wchar_t *decode_utf8_to_wide_string(const char *ustr);
/* Decode a single UTF-8 character. Returns U+FFFD for any of the /* Decode a single UTF-8 character. Returns U+FFFD for any of the
* illegal cases. If the source is empty, returns L'\0' (and sets the * illegal cases. If the source is empty, returns L'\0' (and sets the
* error indicator on the source, of course). */ * error indicator on the source, of course). */

View File

@ -21,6 +21,7 @@ add_sources_from_current_dir(utils
debug.c debug.c
decode_utf8.c decode_utf8.c
decode_utf8_to_wchar.c decode_utf8_to_wchar.c
decode_utf8_to_wide_string.c
default_description.c default_description.c
dupcat.c dupcat.c
dupprintf.c dupprintf.c

View File

@ -0,0 +1,35 @@
/*
* Decode a string of UTF-8 to a wchar_t string.
*/
#include "misc.h"
wchar_t *decode_utf8_to_wide_string(const char *s)
{
wchar_t *ws = NULL;
size_t wlen = 0, wsize = 0;
BinarySource src[1];
BinarySource_BARE_INIT_PL(src, ptrlen_from_asciz(s));
while (get_avail(src) > 0) {
/*
* decode_utf8_to_wchar might emit up to 2 wchar_t if wchar_t
* is 16 bits (because of UTF-16 surrogates), but will emit at
* most one if wchar_t is 32-bit
*/
sgrowarrayn(ws, wsize, wlen, 1 + (sizeof(wchar_t) < 4));
/* We ignore 'err': if it is set, then the character decode
* function will have emitted U+FFFD REPLACEMENT CHARACTER,
* which is what we'd have done in response anyway. */
DecodeUTF8Failure err;
wlen += decode_utf8_to_wchar(src, ws + wlen, &err);
}
/* Reallocate to the final size and append the trailing NUL */
ws = sresize(ws, wlen + 1, wchar_t);
ws[wlen] = L'\0';
return ws;
}