mirror of
https://git.tartarus.org/simon/putty.git
synced 2025-01-09 01:18:00 +00:00
New utility function: decode_utf8_to_wide_string.
We already had encode_wide_string_as_utf8, which treats the wide string as UTF-16 or UTF-32 as appropriate to the size of wchar_t. I'm about to need the inverse function, and was surprised that it didn't already exist (even though enough component parts did to make it easy).
This commit is contained in:
parent
36db93748e
commit
392be3e494
4
misc.h
4
misc.h
@ -256,6 +256,10 @@ unsigned smemeq(const void *av, const void *bv, size_t len);
|
||||
* encoded in UTF-16. */
|
||||
char *encode_wide_string_as_utf8(const wchar_t *wstr);
|
||||
|
||||
/* Decode UTF-8 to a wide-character string, emitting UTF-16 surrogates
|
||||
* if sizeof(wchar_t) == 2. */
|
||||
wchar_t *decode_utf8_to_wide_string(const char *ustr);
|
||||
|
||||
/* Decode a single UTF-8 character. Returns U+FFFD for any of the
|
||||
* illegal cases. If the source is empty, returns L'\0' (and sets the
|
||||
* error indicator on the source, of course). */
|
||||
|
@ -21,6 +21,7 @@ add_sources_from_current_dir(utils
|
||||
debug.c
|
||||
decode_utf8.c
|
||||
decode_utf8_to_wchar.c
|
||||
decode_utf8_to_wide_string.c
|
||||
default_description.c
|
||||
dupcat.c
|
||||
dupprintf.c
|
||||
|
35
utils/decode_utf8_to_wide_string.c
Normal file
35
utils/decode_utf8_to_wide_string.c
Normal file
@ -0,0 +1,35 @@
|
||||
/*
|
||||
* Decode a string of UTF-8 to a wchar_t string.
|
||||
*/
|
||||
|
||||
#include "misc.h"
|
||||
|
||||
wchar_t *decode_utf8_to_wide_string(const char *s)
|
||||
{
|
||||
wchar_t *ws = NULL;
|
||||
size_t wlen = 0, wsize = 0;
|
||||
|
||||
BinarySource src[1];
|
||||
BinarySource_BARE_INIT_PL(src, ptrlen_from_asciz(s));
|
||||
|
||||
while (get_avail(src) > 0) {
|
||||
/*
|
||||
* decode_utf8_to_wchar might emit up to 2 wchar_t if wchar_t
|
||||
* is 16 bits (because of UTF-16 surrogates), but will emit at
|
||||
* most one if wchar_t is 32-bit
|
||||
*/
|
||||
sgrowarrayn(ws, wsize, wlen, 1 + (sizeof(wchar_t) < 4));
|
||||
|
||||
/* We ignore 'err': if it is set, then the character decode
|
||||
* function will have emitted U+FFFD REPLACEMENT CHARACTER,
|
||||
* which is what we'd have done in response anyway. */
|
||||
DecodeUTF8Failure err;
|
||||
wlen += decode_utf8_to_wchar(src, ws + wlen, &err);
|
||||
}
|
||||
|
||||
/* Reallocate to the final size and append the trailing NUL */
|
||||
ws = sresize(ws, wlen + 1, wchar_t);
|
||||
ws[wlen] = L'\0';
|
||||
|
||||
return ws;
|
||||
}
|
Loading…
Reference in New Issue
Block a user