mirror of
https://git.tartarus.org/simon/putty.git
synced 2025-01-25 01:02:24 +00:00
New utility function: decode_utf8_to_wide_string.
We already had encode_wide_string_as_utf8, which treats the wide string as UTF-16 or UTF-32 as appropriate to the size of wchar_t. I'm about to need the inverse function, and was surprised that it didn't already exist (even though enough component parts did to make it easy).
This commit is contained in:
parent
36db93748e
commit
392be3e494
4
misc.h
4
misc.h
@ -256,6 +256,10 @@ unsigned smemeq(const void *av, const void *bv, size_t len);
|
|||||||
* encoded in UTF-16. */
|
* encoded in UTF-16. */
|
||||||
char *encode_wide_string_as_utf8(const wchar_t *wstr);
|
char *encode_wide_string_as_utf8(const wchar_t *wstr);
|
||||||
|
|
||||||
|
/* Decode UTF-8 to a wide-character string, emitting UTF-16 surrogates
|
||||||
|
* if sizeof(wchar_t) == 2. */
|
||||||
|
wchar_t *decode_utf8_to_wide_string(const char *ustr);
|
||||||
|
|
||||||
/* Decode a single UTF-8 character. Returns U+FFFD for any of the
|
/* Decode a single UTF-8 character. Returns U+FFFD for any of the
|
||||||
* illegal cases. If the source is empty, returns L'\0' (and sets the
|
* illegal cases. If the source is empty, returns L'\0' (and sets the
|
||||||
* error indicator on the source, of course). */
|
* error indicator on the source, of course). */
|
||||||
|
@ -21,6 +21,7 @@ add_sources_from_current_dir(utils
|
|||||||
debug.c
|
debug.c
|
||||||
decode_utf8.c
|
decode_utf8.c
|
||||||
decode_utf8_to_wchar.c
|
decode_utf8_to_wchar.c
|
||||||
|
decode_utf8_to_wide_string.c
|
||||||
default_description.c
|
default_description.c
|
||||||
dupcat.c
|
dupcat.c
|
||||||
dupprintf.c
|
dupprintf.c
|
||||||
|
35
utils/decode_utf8_to_wide_string.c
Normal file
35
utils/decode_utf8_to_wide_string.c
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
/*
|
||||||
|
* Decode a string of UTF-8 to a wchar_t string.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "misc.h"
|
||||||
|
|
||||||
|
wchar_t *decode_utf8_to_wide_string(const char *s)
|
||||||
|
{
|
||||||
|
wchar_t *ws = NULL;
|
||||||
|
size_t wlen = 0, wsize = 0;
|
||||||
|
|
||||||
|
BinarySource src[1];
|
||||||
|
BinarySource_BARE_INIT_PL(src, ptrlen_from_asciz(s));
|
||||||
|
|
||||||
|
while (get_avail(src) > 0) {
|
||||||
|
/*
|
||||||
|
* decode_utf8_to_wchar might emit up to 2 wchar_t if wchar_t
|
||||||
|
* is 16 bits (because of UTF-16 surrogates), but will emit at
|
||||||
|
* most one if wchar_t is 32-bit
|
||||||
|
*/
|
||||||
|
sgrowarrayn(ws, wsize, wlen, 1 + (sizeof(wchar_t) < 4));
|
||||||
|
|
||||||
|
/* We ignore 'err': if it is set, then the character decode
|
||||||
|
* function will have emitted U+FFFD REPLACEMENT CHARACTER,
|
||||||
|
* which is what we'd have done in response anyway. */
|
||||||
|
DecodeUTF8Failure err;
|
||||||
|
wlen += decode_utf8_to_wchar(src, ws + wlen, &err);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Reallocate to the final size and append the trailing NUL */
|
||||||
|
ws = sresize(ws, wlen + 1, wchar_t);
|
||||||
|
ws[wlen] = L'\0';
|
||||||
|
|
||||||
|
return ws;
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user