mirror of
https://git.tartarus.org/simon/putty.git
synced 2025-07-01 19:42:48 -05:00
Rework Unicode conversion APIs to use a BinarySink.
The previous mb_to_wc and wc_to_mb had horrible and also buggy APIs. This commit introduces a fresh pair of functions to replace them, which generate output by writing to a BinarySink. So it's now up to the caller to decide whether it wants the output written to a fixed-size buffer with overflow checking (via buffer_sink), or dynamically allocated, or even written directly to some other output channel. Nothing uses the new functions yet. I plan to migrate things over in upcoming commits. What was wrong with the old APIs: they had that awkward undocumented Windows-specific 'flags' parameter that I described in the previous commit and took out of the dup_X_to_Y wrappers. But much worse, the semantics for buffer overflow were not just undocumented but actually inconsistent. dup_wc_to_mb() in utils assumed that the underlying wc_to_mb would fill the buffer nearly full and return the size of data it wrote. In fact, this was untrue in the case where wc_to_mb called WideCharToMultiByte: that returns straight-up failure, setting the Windows error code to ERROR_INSUFFICIENT_BUFFER. It _does_ partially fill the output buffer, but doesn't tell you how much it wrote! What's wrong with the new API: it's a bit awkward to write a sequence of wchar_t in native byte order to a byte-oriented BinarySink, so people using put_mb_to_wc directly have to do some annoying pointer casting. But I think that's less horrible than the previous APIs. Another change: in the new API for wc_to_mb, defchr can be "", but not NULL.
This commit is contained in:
@ -12,20 +12,16 @@
|
||||
wchar_t *dup_mb_to_wc_c(int codepage, const char *string,
|
||||
size_t inlen, size_t *outlen_p)
|
||||
{
|
||||
assert(inlen <= INT_MAX);
|
||||
size_t mult;
|
||||
for (mult = 1 ;; mult++) {
|
||||
wchar_t *ret = snewn(mult*inlen + 2, wchar_t);
|
||||
size_t outlen = mb_to_wc(codepage, 0, string, inlen, ret,
|
||||
mult*inlen + 1);
|
||||
if (outlen < mult*inlen+1) {
|
||||
if (outlen_p)
|
||||
*outlen_p = outlen;
|
||||
ret[outlen] = L'\0';
|
||||
return ret;
|
||||
}
|
||||
sfree(ret);
|
||||
}
|
||||
strbuf *sb = strbuf_new();
|
||||
put_mb_to_wc(sb, codepage, string, inlen);
|
||||
if (outlen_p)
|
||||
*outlen_p = sb->len / sizeof(wchar_t);
|
||||
|
||||
/* Append a trailing L'\0'. For this we only need to write one
|
||||
* byte _fewer_ than sizeof(wchar_t), because strbuf will append a
|
||||
* byte '\0' for us. */
|
||||
put_padding(sb, sizeof(wchar_t) - 1, 0);
|
||||
return (wchar_t *)strbuf_to_str(sb);
|
||||
}
|
||||
|
||||
wchar_t *dup_mb_to_wc(int codepage, const char *string)
|
||||
|
@ -14,26 +14,11 @@
|
||||
char *dup_wc_to_mb_c(int codepage, const wchar_t *string,
|
||||
size_t inlen, const char *defchr, size_t *outlen_p)
|
||||
{
|
||||
assert(inlen <= INT_MAX);
|
||||
|
||||
size_t outsize = inlen+1;
|
||||
char *out = snewn(outsize, char);
|
||||
|
||||
while (true) {
|
||||
size_t outlen = wc_to_mb(codepage, 0, string, inlen, out, outsize,
|
||||
defchr);
|
||||
/* We can only be sure we've consumed the whole input if the
|
||||
* output is not within a multibyte-character-length of the
|
||||
* end of the buffer! */
|
||||
if (outlen < outsize && outsize - outlen > MB_LEN_MAX) {
|
||||
if (outlen_p)
|
||||
*outlen_p = outlen;
|
||||
out[outlen] = '\0';
|
||||
return out;
|
||||
}
|
||||
|
||||
sgrowarray(out, outsize, outsize);
|
||||
}
|
||||
strbuf *sb = strbuf_new();
|
||||
put_wc_to_mb(sb, codepage, string, inlen, defchr);
|
||||
if (outlen_p)
|
||||
*outlen_p = sb->len;
|
||||
return strbuf_to_str(sb);
|
||||
}
|
||||
|
||||
char *dup_wc_to_mb(int codepage, const wchar_t *string,
|
||||
|
Reference in New Issue
Block a user