mirror of
https://git.tartarus.org/simon/putty.git
synced 2025-06-30 19:12:48 -05:00
Rework Unicode conversion APIs to use a BinarySink.
The previous mb_to_wc and wc_to_mb had horrible and also buggy APIs. This commit introduces a fresh pair of functions to replace them, which generate output by writing to a BinarySink. So it's now up to the caller to decide whether it wants the output written to a fixed-size buffer with overflow checking (via buffer_sink), or dynamically allocated, or even written directly to some other output channel. Nothing uses the new functions yet. I plan to migrate things over in upcoming commits. What was wrong with the old APIs: they had that awkward undocumented Windows-specific 'flags' parameter that I described in the previous commit and took out of the dup_X_to_Y wrappers. But much worse, the semantics for buffer overflow were not just undocumented but actually inconsistent. dup_wc_to_mb() in utils assumed that the underlying wc_to_mb would fill the buffer nearly full and return the size of data it wrote. In fact, this was untrue in the case where wc_to_mb called WideCharToMultiByte: that returns straight-up failure, setting the Windows error code to ERROR_INSUFFICIENT_BUFFER. It _does_ partially fill the output buffer, but doesn't tell you how much it wrote! What's wrong with the new API: it's a bit awkward to write a sequence of wchar_t in native byte order to a byte-oriented BinarySink, so people using put_mb_to_wc directly have to do some annoying pointer casting. But I think that's less horrible than the previous APIs. Another change: in the new API for wc_to_mb, defchr can be "", but not NULL.
This commit is contained in:
@ -21,81 +21,99 @@ bool is_dbcs_leadbyte(int codepage, char byte)
|
||||
return false; /* we don't do DBCS */
|
||||
}
|
||||
|
||||
int mb_to_wc(int codepage, int flags, const char *mbstr, int mblen,
|
||||
wchar_t *wcstr, int wclen)
|
||||
bool BinarySink_put_mb_to_wc(
|
||||
BinarySink *bs, int codepage, const char *mbstr, int mblen)
|
||||
{
|
||||
if (codepage == DEFAULT_CODEPAGE) {
|
||||
int n = 0;
|
||||
mbstate_t state;
|
||||
|
||||
memset(&state, 0, sizeof state);
|
||||
|
||||
while (mblen > 0) {
|
||||
if (n >= wclen)
|
||||
return n;
|
||||
size_t i = mbrtowc(wcstr+n, mbstr, (size_t)mblen, &state);
|
||||
wchar_t wc;
|
||||
size_t i = mbrtowc(&wc, mbstr, (size_t)mblen, &state);
|
||||
if (i == (size_t)-1 || i == (size_t)-2)
|
||||
break;
|
||||
n++;
|
||||
put_data(bs, &wc, sizeof(wc));
|
||||
mbstr += i;
|
||||
mblen -= i;
|
||||
}
|
||||
|
||||
return n;
|
||||
} else if (codepage == CS_NONE) {
|
||||
int n = 0;
|
||||
|
||||
while (mblen > 0) {
|
||||
if (n >= wclen)
|
||||
return n;
|
||||
wcstr[n] = 0xD800 | (mbstr[0] & 0xFF);
|
||||
n++;
|
||||
wchar_t wc = 0xD800 | (mbstr[0] & 0xFF);
|
||||
put_data(bs, &wc, sizeof(wc));
|
||||
mbstr++;
|
||||
mblen--;
|
||||
}
|
||||
} else {
|
||||
wchar_t wbuf[1024];
|
||||
while (mblen > 0) {
|
||||
int wlen = charset_to_unicode(&mbstr, &mblen, wbuf, lenof(wbuf),
|
||||
codepage, NULL, NULL, 0);
|
||||
put_data(bs, wbuf, wlen * sizeof(wchar_t));
|
||||
}
|
||||
}
|
||||
|
||||
return n;
|
||||
} else
|
||||
return charset_to_unicode(&mbstr, &mblen, wcstr, wclen, codepage,
|
||||
NULL, NULL, 0);
|
||||
/* We never expect to receive invalid charset values on Unix,
|
||||
* because we're not dependent on an externally defined space of
|
||||
* OS-provided code pages */
|
||||
return true;
|
||||
}
|
||||
|
||||
int wc_to_mb(int codepage, int flags, const wchar_t *wcstr, int wclen,
|
||||
char *mbstr, int mblen, const char *defchr)
|
||||
bool BinarySink_put_wc_to_mb(
|
||||
BinarySink *bs, int codepage, const wchar_t *wcstr, int wclen,
|
||||
const char *defchr)
|
||||
{
|
||||
size_t defchr_len = 0;
|
||||
bool defchr_len_known = false;
|
||||
|
||||
if (codepage == DEFAULT_CODEPAGE) {
|
||||
char output[MB_LEN_MAX];
|
||||
mbstate_t state;
|
||||
int n = 0;
|
||||
|
||||
memset(&state, 0, sizeof state);
|
||||
|
||||
while (wclen > 0) {
|
||||
size_t i = wcrtomb(output, wcstr[0], &state);
|
||||
if (i == (size_t)-1 || i > n - mblen)
|
||||
break;
|
||||
memcpy(mbstr+n, output, i);
|
||||
n += i;
|
||||
if (i == (size_t)-1) {
|
||||
if (!defchr_len_known) {
|
||||
defchr_len = strlen(defchr);
|
||||
defchr_len_known = true;
|
||||
}
|
||||
put_data(bs, defchr, defchr_len);
|
||||
} else {
|
||||
put_data(bs, output, i);
|
||||
}
|
||||
wcstr++;
|
||||
wclen--;
|
||||
}
|
||||
|
||||
return n;
|
||||
} else if (codepage == CS_NONE) {
|
||||
int n = 0;
|
||||
while (wclen > 0 && n < mblen) {
|
||||
if (*wcstr >= 0xD800 && *wcstr < 0xD900)
|
||||
mbstr[n++] = (*wcstr & 0xFF);
|
||||
else if (defchr)
|
||||
mbstr[n++] = *defchr;
|
||||
while (wclen > 0) {
|
||||
if (*wcstr >= 0xD800 && *wcstr < 0xD900) {
|
||||
put_byte(bs, *wcstr & 0xFF);
|
||||
} else {
|
||||
if (!defchr_len_known) {
|
||||
defchr_len = strlen(defchr);
|
||||
defchr_len_known = true;
|
||||
}
|
||||
put_data(bs, defchr, defchr_len);
|
||||
}
|
||||
wcstr++;
|
||||
wclen--;
|
||||
}
|
||||
return n;
|
||||
} else {
|
||||
return charset_from_unicode(&wcstr, &wclen, mbstr, mblen, codepage,
|
||||
NULL, defchr?defchr:NULL, defchr?1:0);
|
||||
char buf[2048];
|
||||
defchr_len = strlen(defchr);
|
||||
|
||||
while (wclen > 0) {
|
||||
int len = charset_from_unicode(
|
||||
&wcstr, &wclen, buf, lenof(buf), codepage,
|
||||
NULL, defchr, defchr_len);
|
||||
put_data(bs, buf, len);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -598,14 +598,14 @@ static bool x11font_has_glyph(unifont *font, wchar_t glyph)
|
||||
* This X font has 8-bit indices, so we must convert to the
|
||||
* appropriate character set.
|
||||
*/
|
||||
char sbstring[2];
|
||||
int sblen = wc_to_mb(xfont->real_charset, 0, &glyph, 1,
|
||||
sbstring, 2, "");
|
||||
if (sblen == 0 || !sbstring[0])
|
||||
char c = '\0';
|
||||
buffer_sink bs[1];
|
||||
buffer_sink_init(bs, &c, 1);
|
||||
put_wc_to_mb(bs, xfont->real_charset, &glyph, 1, "");
|
||||
if (!c)
|
||||
return false; /* not even in the charset */
|
||||
|
||||
return x11_font_has_glyph(xfont->fonts[0].xfs, 0,
|
||||
(unsigned char)sbstring[0]);
|
||||
return x11_font_has_glyph(xfont->fonts[0].xfs, 0, (unsigned char)c);
|
||||
}
|
||||
}
|
||||
|
||||
@ -953,14 +953,13 @@ static void x11font_draw_text(unifont_drawctx *ctx, unifont *font,
|
||||
* This X font has 8-bit indices, so we must convert to the
|
||||
* appropriate character set.
|
||||
*/
|
||||
char *sbstring = snewn(len+1, char);
|
||||
int sblen = wc_to_mb(xfont->real_charset, 0, string, len,
|
||||
sbstring, len+1, ".");
|
||||
strbuf *sb = strbuf_new();
|
||||
put_wc_to_mb(sb, xfont->real_charset, string, len, ".");
|
||||
x11font_really_draw_text(x11font_drawfuncs + index + 0, ctx,
|
||||
&xfont->fonts[sfid], xfont->disp, x, y,
|
||||
sbstring, sblen, shadowoffset,
|
||||
sb->s, sb->len, shadowoffset,
|
||||
xfont->variable, cellwidth * mult);
|
||||
sfree(sbstring);
|
||||
strbuf_free(sb);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1603,7 +1602,7 @@ static void pangofont_draw_internal(unifont_drawctx *ctx, unifont *font,
|
||||
PangoLayout *layout;
|
||||
PangoRectangle rect;
|
||||
char *utfstring, *utfptr;
|
||||
int utflen;
|
||||
size_t utflen;
|
||||
bool shadowbold = false;
|
||||
void (*draw_layout)(unifont_drawctx *ctx,
|
||||
gint x, gint y, PangoLayout *layout) = NULL;
|
||||
@ -1642,12 +1641,11 @@ static void pangofont_draw_internal(unifont_drawctx *ctx, unifont *font,
|
||||
* Pango always expects UTF-8, so convert the input wide character
|
||||
* string to UTF-8.
|
||||
*/
|
||||
utfstring = snewn(len*6+1, char); /* UTF-8 has max 6 bytes/char */
|
||||
utflen = wc_to_mb(CS_UTF8, 0, string, len, utfstring, len*6+1, ".");
|
||||
utfstring = dup_wc_to_mb_c(CS_UTF8, string, len, "", &utflen);
|
||||
|
||||
utfptr = utfstring;
|
||||
while (utflen > 0) {
|
||||
int clen, n;
|
||||
size_t clen, n;
|
||||
int desired = cellwidth * PANGO_SCALE;
|
||||
|
||||
/*
|
||||
|
@ -1603,10 +1603,12 @@ gint key_event(GtkWidget *widget, GdkEventKey *event, gpointer data)
|
||||
const wchar_t *wp;
|
||||
int wlen;
|
||||
int ulen;
|
||||
buffer_sink bs[1];
|
||||
|
||||
wlen = mb_to_wc(DEFAULT_CODEPAGE, 0,
|
||||
event_string, strlen(event_string),
|
||||
widedata, lenof(widedata)-1);
|
||||
buffer_sink_init(bs, widedata, sizeof(widedata) - sizeof(wchar_t));
|
||||
put_mb_to_wc(bs, DEFAULT_CODEPAGE,
|
||||
event_string, strlen(event_string));
|
||||
wlen = (wchar_t *)bs->out - widedata;
|
||||
|
||||
#ifdef KEY_EVENT_DIAGNOSTICS
|
||||
{
|
||||
@ -2954,16 +2956,12 @@ static void clipboard_text_received(GtkClipboard *clipboard,
|
||||
{
|
||||
GtkFrontend *inst = (GtkFrontend *)data;
|
||||
wchar_t *paste;
|
||||
int paste_len;
|
||||
int length;
|
||||
size_t paste_len;
|
||||
|
||||
if (!text)
|
||||
return;
|
||||
|
||||
length = strlen(text);
|
||||
|
||||
paste = snewn(length, wchar_t);
|
||||
paste_len = mb_to_wc(CS_UTF8, 0, text, length, paste, length);
|
||||
paste = dup_mb_to_wc(CS_UTF8, text, length, &paste_len);
|
||||
|
||||
term_do_paste(inst->term, paste, paste_len);
|
||||
|
||||
@ -3102,17 +3100,15 @@ static void gtkwin_clip_write(
|
||||
state->pasteout_data_ctext_len = 0;
|
||||
}
|
||||
|
||||
state->pasteout_data = snewn(len*6, char);
|
||||
state->pasteout_data_len = len*6;
|
||||
state->pasteout_data_len = wc_to_mb(inst->ucsdata.line_codepage, 0,
|
||||
data, len, state->pasteout_data,
|
||||
state->pasteout_data_len, NULL);
|
||||
if (state->pasteout_data_len == 0) {
|
||||
sfree(state->pasteout_data);
|
||||
state->pasteout_data = NULL;
|
||||
} else {
|
||||
state->pasteout_data =
|
||||
sresize(state->pasteout_data, state->pasteout_data_len, char);
|
||||
{
|
||||
size_t outlen;
|
||||
state->pasteout_data = dup_wc_to_mb_c(
|
||||
inst->ucsdata.line_codepage, data, len, "", &outlen);
|
||||
/* We can't handle pastes larger than INT_MAX, because
|
||||
* gtk_selection_data_set_text's length parameter is a gint */
|
||||
if (outlen > INT_MAX)
|
||||
outlen = INT_MAX;
|
||||
state->pasteout_data_len = outlen;
|
||||
}
|
||||
|
||||
#ifndef NOT_X_WINDOWS
|
||||
@ -3240,7 +3236,7 @@ static void selection_received(GtkWidget *widget, GtkSelectionData *seldata,
|
||||
const guchar *seldata_data = gtk_selection_data_get_data(seldata);
|
||||
gint seldata_length = gtk_selection_data_get_length(seldata);
|
||||
wchar_t *paste;
|
||||
int paste_len;
|
||||
size_t paste_len;
|
||||
struct clipboard_state *state = clipboard_from_atom(
|
||||
inst, gtk_selection_data_get_selection(seldata));
|
||||
|
||||
@ -3333,11 +3329,8 @@ static void selection_received(GtkWidget *widget, GtkSelectionData *seldata,
|
||||
}
|
||||
}
|
||||
|
||||
paste = snewn(length, wchar_t);
|
||||
paste_len = mb_to_wc(charset, 0, text, length, paste, length);
|
||||
|
||||
paste = dup_mb_to_wc_c(charset, text, length, &paste_len);
|
||||
term_do_paste(inst->term, paste, paste_len);
|
||||
|
||||
sfree(paste);
|
||||
|
||||
#ifndef NOT_X_WINDOWS
|
||||
|
Reference in New Issue
Block a user