1
0
mirror of https://git.tartarus.org/simon/putty.git synced 2025-07-01 19:42:48 -05:00

Rework Unicode conversion APIs to use a BinarySink.

The previous mb_to_wc and wc_to_mb had horrible and also buggy APIs.
This commit introduces a fresh pair of functions to replace them,
which generate output by writing to a BinarySink. So it's now up to
the caller to decide whether it wants the output written to a
fixed-size buffer with overflow checking (via buffer_sink), or
dynamically allocated, or even written directly to some other output
channel.

Nothing uses the new functions yet. I plan to migrate things over in
upcoming commits.

What was wrong with the old APIs: they had that awkward undocumented
Windows-specific 'flags' parameter that I described in the previous
commit and took out of the dup_X_to_Y wrappers. But much worse, the
semantics for buffer overflow were not just undocumented but actually
inconsistent. dup_wc_to_mb() in utils assumed that the underlying
wc_to_mb would fill the buffer nearly full and return the size of data
it wrote. In fact, this was untrue in the case where wc_to_mb called
WideCharToMultiByte: that returns straight-up failure, setting the
Windows error code to ERROR_INSUFFICIENT_BUFFER. It _does_ partially
fill the output buffer, but doesn't tell you how much it wrote!

What's wrong with the new API: it's a bit awkward to write a sequence
of wchar_t in native byte order to a byte-oriented BinarySink, so
people using put_mb_to_wc directly have to do some annoying pointer
casting. But I think that's less horrible than the previous APIs.

Another change: in the new API for wc_to_mb, defchr can be "", but not
NULL.
This commit is contained in:
Simon Tatham
2024-09-24 08:18:48 +01:00
parent 32b8da1177
commit 4f756d2a4d
10 changed files with 267 additions and 239 deletions

View File

@ -3497,11 +3497,7 @@ static strbuf *term_input_data_from_unicode(
* (But also we must allow space for the trailing NUL that
* wc_to_mb will write.)
*/
char *bufptr = strbuf_append(buf, len + 1);
int rv;
rv = wc_to_mb(term->ucsdata->line_codepage, 0, widebuf, len,
bufptr, len + 1, NULL);
strbuf_shrink_to(buf, rv < 0 ? 0 : rv);
put_wc_to_mb(buf, term->ucsdata->line_codepage, widebuf, len, "");
}
return buf;
@ -3510,18 +3506,12 @@ static strbuf *term_input_data_from_unicode(
static strbuf *term_input_data_from_charset(
Terminal *term, int codepage, const char *str, size_t len)
{
strbuf *buf;
strbuf *buf = strbuf_new();
if (codepage < 0) {
buf = strbuf_new();
if (codepage < 0)
put_data(buf, str, len);
} else {
size_t widesize = len * 2; /* allow for UTF-16 surrogates */
wchar_t *widebuf = snewn(widesize, wchar_t);
int widelen = mb_to_wc(codepage, 0, str, len, widebuf, widesize);
buf = term_input_data_from_unicode(term, widebuf, widelen);
sfree(widebuf);
}
else
put_mb_to_wc(buf, codepage, str, len);
return buf;
}
@ -6734,23 +6724,24 @@ static void clipme(Terminal *term, pos top, pos bottom, bool rect, bool desel,
if (DIRECT_FONT(uc)) {
if (c >= ' ' && c != 0x7F) {
char buf[4];
WCHAR wbuf[4];
int rv;
char buf[2];
buffer_sink bs[1];
buffer_sink_init(bs, cbuf,
sizeof(cbuf) - sizeof(wchar_t));
if (is_dbcs_leadbyte(term->ucsdata->font_codepage, (BYTE) c)) {
buf[0] = c;
buf[1] = (char) (0xFF & ldata->chars[top.x + 1].chr);
rv = mb_to_wc(term->ucsdata->font_codepage, 0, buf, 2, wbuf, 4);
put_mb_to_wc(bs, term->ucsdata->font_codepage,
buf, 2);
top.x++;
} else {
buf[0] = c;
rv = mb_to_wc(term->ucsdata->font_codepage, 0, buf, 1, wbuf, 4);
put_mb_to_wc(bs, term->ucsdata->font_codepage,
buf, 1);
}
if (rv > 0) {
memcpy(cbuf, wbuf, rv * sizeof(wchar_t));
cbuf[rv] = 0;
}
assert(!bs->overflowed);
*(wchar_t *)bs->out = L'\0';
}
}