mirror of
https://git.tartarus.org/simon/putty.git
synced 2025-01-09 17:38:00 +00:00
Rework Unicode conversion APIs to use a BinarySink.
The previous mb_to_wc and wc_to_mb had horrible and also buggy APIs. This commit introduces a fresh pair of functions to replace them, which generate output by writing to a BinarySink. So it's now up to the caller to decide whether it wants the output written to a fixed-size buffer with overflow checking (via buffer_sink), or dynamically allocated, or even written directly to some other output channel. Nothing uses the new functions yet. I plan to migrate things over in upcoming commits. What was wrong with the old APIs: they had that awkward undocumented Windows-specific 'flags' parameter that I described in the previous commit and took out of the dup_X_to_Y wrappers. But much worse, the semantics for buffer overflow were not just undocumented but actually inconsistent. dup_wc_to_mb() in utils assumed that the underlying wc_to_mb would fill the buffer nearly full and return the size of data it wrote. In fact, this was untrue in the case where wc_to_mb called WideCharToMultiByte: that returns straight-up failure, setting the Windows error code to ERROR_INSUFFICIENT_BUFFER. It _does_ partially fill the output buffer, but doesn't tell you how much it wrote! What's wrong with the new API: it's a bit awkward to write a sequence of wchar_t in native byte order to a byte-oriented BinarySink, so people using put_mb_to_wc directly have to do some annoying pointer casting. But I think that's less horrible than the previous APIs. Another change: in the new API for wc_to_mb, defchr can be "", but not NULL.
This commit is contained in:
parent
32b8da1177
commit
4f756d2a4d
12
marshal.h
12
marshal.h
@ -160,6 +160,12 @@ struct BinarySink {
|
|||||||
#define put_utf8_char(bs, c) \
|
#define put_utf8_char(bs, c) \
|
||||||
BinarySink_put_utf8_char(BinarySink_UPCAST(bs), c)
|
BinarySink_put_utf8_char(BinarySink_UPCAST(bs), c)
|
||||||
|
|
||||||
|
/* More complicated functions still implemented in <platform>/unicode.c */
|
||||||
|
#define put_mb_to_wc(bs, codepage, mbstr, mblen) \
|
||||||
|
BinarySink_put_mb_to_wc(BinarySink_UPCAST(bs), codepage, mbstr, mblen)
|
||||||
|
#define put_wc_to_mb(bs, codepage, wcstr, wclen, def) \
|
||||||
|
BinarySink_put_wc_to_mb(BinarySink_UPCAST(bs), codepage, wcstr, wclen, def)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The underlying real C functions that implement most of those
|
* The underlying real C functions that implement most of those
|
||||||
* macros. Generally you won't want to call these directly, because
|
* macros. Generally you won't want to call these directly, because
|
||||||
@ -190,6 +196,12 @@ void BinarySink_put_fmt(BinarySink *, const char *fmt, ...) PRINTF_LIKE(2, 3);
|
|||||||
void BinarySink_put_fmtv(BinarySink *, const char *fmt, va_list ap);
|
void BinarySink_put_fmtv(BinarySink *, const char *fmt, va_list ap);
|
||||||
void BinarySink_put_c_string_literal(BinarySink *, ptrlen);
|
void BinarySink_put_c_string_literal(BinarySink *, ptrlen);
|
||||||
void BinarySink_put_utf8_char(BinarySink *, unsigned);
|
void BinarySink_put_utf8_char(BinarySink *, unsigned);
|
||||||
|
/* put_mb_to_wc / put_wc_to_mb return false if the codepage is invalid */
|
||||||
|
bool BinarySink_put_mb_to_wc(
|
||||||
|
BinarySink *bs, int codepage, const char *mbstr, int mblen);
|
||||||
|
bool BinarySink_put_wc_to_mb(
|
||||||
|
BinarySink *bs, int codepage, const wchar_t *wcstr, int wclen,
|
||||||
|
const char *defchr);
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
5
putty.h
5
putty.h
@ -2246,10 +2246,7 @@ extern const char commitid[];
|
|||||||
*/
|
*/
|
||||||
/* void init_ucs(void); -- this is now in platform-specific headers */
|
/* void init_ucs(void); -- this is now in platform-specific headers */
|
||||||
bool is_dbcs_leadbyte(int codepage, char byte);
|
bool is_dbcs_leadbyte(int codepage, char byte);
|
||||||
int mb_to_wc(int codepage, int flags, const char *mbstr, int mblen,
|
/* For put_mb_to_wc / put_wc_to_mb, see marshal.h */
|
||||||
wchar_t *wcstr, int wclen);
|
|
||||||
int wc_to_mb(int codepage, int flags, const wchar_t *wcstr, int wclen,
|
|
||||||
char *mbstr, int mblen, const char *defchr);
|
|
||||||
wchar_t xlat_uskbd2cyrllic(int ch);
|
wchar_t xlat_uskbd2cyrllic(int ch);
|
||||||
int check_compose(int first, int second);
|
int check_compose(int first, int second);
|
||||||
int decode_codepage(const char *cp_name);
|
int decode_codepage(const char *cp_name);
|
||||||
|
@ -3497,11 +3497,7 @@ static strbuf *term_input_data_from_unicode(
|
|||||||
* (But also we must allow space for the trailing NUL that
|
* (But also we must allow space for the trailing NUL that
|
||||||
* wc_to_mb will write.)
|
* wc_to_mb will write.)
|
||||||
*/
|
*/
|
||||||
char *bufptr = strbuf_append(buf, len + 1);
|
put_wc_to_mb(buf, term->ucsdata->line_codepage, widebuf, len, "");
|
||||||
int rv;
|
|
||||||
rv = wc_to_mb(term->ucsdata->line_codepage, 0, widebuf, len,
|
|
||||||
bufptr, len + 1, NULL);
|
|
||||||
strbuf_shrink_to(buf, rv < 0 ? 0 : rv);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return buf;
|
return buf;
|
||||||
@ -3510,18 +3506,12 @@ static strbuf *term_input_data_from_unicode(
|
|||||||
static strbuf *term_input_data_from_charset(
|
static strbuf *term_input_data_from_charset(
|
||||||
Terminal *term, int codepage, const char *str, size_t len)
|
Terminal *term, int codepage, const char *str, size_t len)
|
||||||
{
|
{
|
||||||
strbuf *buf;
|
strbuf *buf = strbuf_new();
|
||||||
|
|
||||||
if (codepage < 0) {
|
if (codepage < 0)
|
||||||
buf = strbuf_new();
|
|
||||||
put_data(buf, str, len);
|
put_data(buf, str, len);
|
||||||
} else {
|
else
|
||||||
size_t widesize = len * 2; /* allow for UTF-16 surrogates */
|
put_mb_to_wc(buf, codepage, str, len);
|
||||||
wchar_t *widebuf = snewn(widesize, wchar_t);
|
|
||||||
int widelen = mb_to_wc(codepage, 0, str, len, widebuf, widesize);
|
|
||||||
buf = term_input_data_from_unicode(term, widebuf, widelen);
|
|
||||||
sfree(widebuf);
|
|
||||||
}
|
|
||||||
|
|
||||||
return buf;
|
return buf;
|
||||||
}
|
}
|
||||||
@ -6734,23 +6724,24 @@ static void clipme(Terminal *term, pos top, pos bottom, bool rect, bool desel,
|
|||||||
|
|
||||||
if (DIRECT_FONT(uc)) {
|
if (DIRECT_FONT(uc)) {
|
||||||
if (c >= ' ' && c != 0x7F) {
|
if (c >= ' ' && c != 0x7F) {
|
||||||
char buf[4];
|
char buf[2];
|
||||||
WCHAR wbuf[4];
|
buffer_sink bs[1];
|
||||||
int rv;
|
buffer_sink_init(bs, cbuf,
|
||||||
|
sizeof(cbuf) - sizeof(wchar_t));
|
||||||
if (is_dbcs_leadbyte(term->ucsdata->font_codepage, (BYTE) c)) {
|
if (is_dbcs_leadbyte(term->ucsdata->font_codepage, (BYTE) c)) {
|
||||||
buf[0] = c;
|
buf[0] = c;
|
||||||
buf[1] = (char) (0xFF & ldata->chars[top.x + 1].chr);
|
buf[1] = (char) (0xFF & ldata->chars[top.x + 1].chr);
|
||||||
rv = mb_to_wc(term->ucsdata->font_codepage, 0, buf, 2, wbuf, 4);
|
put_mb_to_wc(bs, term->ucsdata->font_codepage,
|
||||||
|
buf, 2);
|
||||||
top.x++;
|
top.x++;
|
||||||
} else {
|
} else {
|
||||||
buf[0] = c;
|
buf[0] = c;
|
||||||
rv = mb_to_wc(term->ucsdata->font_codepage, 0, buf, 1, wbuf, 4);
|
put_mb_to_wc(bs, term->ucsdata->font_codepage,
|
||||||
|
buf, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (rv > 0) {
|
assert(!bs->overflowed);
|
||||||
memcpy(cbuf, wbuf, rv * sizeof(wchar_t));
|
*(wchar_t *)bs->out = L'\0';
|
||||||
cbuf[rv] = 0;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -21,81 +21,99 @@ bool is_dbcs_leadbyte(int codepage, char byte)
|
|||||||
return false; /* we don't do DBCS */
|
return false; /* we don't do DBCS */
|
||||||
}
|
}
|
||||||
|
|
||||||
int mb_to_wc(int codepage, int flags, const char *mbstr, int mblen,
|
bool BinarySink_put_mb_to_wc(
|
||||||
wchar_t *wcstr, int wclen)
|
BinarySink *bs, int codepage, const char *mbstr, int mblen)
|
||||||
{
|
{
|
||||||
if (codepage == DEFAULT_CODEPAGE) {
|
if (codepage == DEFAULT_CODEPAGE) {
|
||||||
int n = 0;
|
|
||||||
mbstate_t state;
|
mbstate_t state;
|
||||||
|
|
||||||
memset(&state, 0, sizeof state);
|
memset(&state, 0, sizeof state);
|
||||||
|
|
||||||
while (mblen > 0) {
|
while (mblen > 0) {
|
||||||
if (n >= wclen)
|
wchar_t wc;
|
||||||
return n;
|
size_t i = mbrtowc(&wc, mbstr, (size_t)mblen, &state);
|
||||||
size_t i = mbrtowc(wcstr+n, mbstr, (size_t)mblen, &state);
|
|
||||||
if (i == (size_t)-1 || i == (size_t)-2)
|
if (i == (size_t)-1 || i == (size_t)-2)
|
||||||
break;
|
break;
|
||||||
n++;
|
put_data(bs, &wc, sizeof(wc));
|
||||||
mbstr += i;
|
mbstr += i;
|
||||||
mblen -= i;
|
mblen -= i;
|
||||||
}
|
}
|
||||||
|
|
||||||
return n;
|
|
||||||
} else if (codepage == CS_NONE) {
|
} else if (codepage == CS_NONE) {
|
||||||
int n = 0;
|
|
||||||
|
|
||||||
while (mblen > 0) {
|
while (mblen > 0) {
|
||||||
if (n >= wclen)
|
wchar_t wc = 0xD800 | (mbstr[0] & 0xFF);
|
||||||
return n;
|
put_data(bs, &wc, sizeof(wc));
|
||||||
wcstr[n] = 0xD800 | (mbstr[0] & 0xFF);
|
|
||||||
n++;
|
|
||||||
mbstr++;
|
mbstr++;
|
||||||
mblen--;
|
mblen--;
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
wchar_t wbuf[1024];
|
||||||
|
while (mblen > 0) {
|
||||||
|
int wlen = charset_to_unicode(&mbstr, &mblen, wbuf, lenof(wbuf),
|
||||||
|
codepage, NULL, NULL, 0);
|
||||||
|
put_data(bs, wbuf, wlen * sizeof(wchar_t));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return n;
|
/* We never expect to receive invalid charset values on Unix,
|
||||||
} else
|
* because we're not dependent on an externally defined space of
|
||||||
return charset_to_unicode(&mbstr, &mblen, wcstr, wclen, codepage,
|
* OS-provided code pages */
|
||||||
NULL, NULL, 0);
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
int wc_to_mb(int codepage, int flags, const wchar_t *wcstr, int wclen,
|
bool BinarySink_put_wc_to_mb(
|
||||||
char *mbstr, int mblen, const char *defchr)
|
BinarySink *bs, int codepage, const wchar_t *wcstr, int wclen,
|
||||||
|
const char *defchr)
|
||||||
{
|
{
|
||||||
|
size_t defchr_len = 0;
|
||||||
|
bool defchr_len_known = false;
|
||||||
|
|
||||||
if (codepage == DEFAULT_CODEPAGE) {
|
if (codepage == DEFAULT_CODEPAGE) {
|
||||||
char output[MB_LEN_MAX];
|
char output[MB_LEN_MAX];
|
||||||
mbstate_t state;
|
mbstate_t state;
|
||||||
int n = 0;
|
|
||||||
|
|
||||||
memset(&state, 0, sizeof state);
|
memset(&state, 0, sizeof state);
|
||||||
|
|
||||||
while (wclen > 0) {
|
while (wclen > 0) {
|
||||||
size_t i = wcrtomb(output, wcstr[0], &state);
|
size_t i = wcrtomb(output, wcstr[0], &state);
|
||||||
if (i == (size_t)-1 || i > n - mblen)
|
if (i == (size_t)-1) {
|
||||||
break;
|
if (!defchr_len_known) {
|
||||||
memcpy(mbstr+n, output, i);
|
defchr_len = strlen(defchr);
|
||||||
n += i;
|
defchr_len_known = true;
|
||||||
|
}
|
||||||
|
put_data(bs, defchr, defchr_len);
|
||||||
|
} else {
|
||||||
|
put_data(bs, output, i);
|
||||||
|
}
|
||||||
wcstr++;
|
wcstr++;
|
||||||
wclen--;
|
wclen--;
|
||||||
}
|
}
|
||||||
|
|
||||||
return n;
|
|
||||||
} else if (codepage == CS_NONE) {
|
} else if (codepage == CS_NONE) {
|
||||||
int n = 0;
|
while (wclen > 0) {
|
||||||
while (wclen > 0 && n < mblen) {
|
if (*wcstr >= 0xD800 && *wcstr < 0xD900) {
|
||||||
if (*wcstr >= 0xD800 && *wcstr < 0xD900)
|
put_byte(bs, *wcstr & 0xFF);
|
||||||
mbstr[n++] = (*wcstr & 0xFF);
|
} else {
|
||||||
else if (defchr)
|
if (!defchr_len_known) {
|
||||||
mbstr[n++] = *defchr;
|
defchr_len = strlen(defchr);
|
||||||
|
defchr_len_known = true;
|
||||||
|
}
|
||||||
|
put_data(bs, defchr, defchr_len);
|
||||||
|
}
|
||||||
wcstr++;
|
wcstr++;
|
||||||
wclen--;
|
wclen--;
|
||||||
}
|
}
|
||||||
return n;
|
|
||||||
} else {
|
} else {
|
||||||
return charset_from_unicode(&wcstr, &wclen, mbstr, mblen, codepage,
|
char buf[2048];
|
||||||
NULL, defchr?defchr:NULL, defchr?1:0);
|
defchr_len = strlen(defchr);
|
||||||
|
|
||||||
|
while (wclen > 0) {
|
||||||
|
int len = charset_from_unicode(
|
||||||
|
&wcstr, &wclen, buf, lenof(buf), codepage,
|
||||||
|
NULL, defchr, defchr_len);
|
||||||
|
put_data(bs, buf, len);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -598,14 +598,14 @@ static bool x11font_has_glyph(unifont *font, wchar_t glyph)
|
|||||||
* This X font has 8-bit indices, so we must convert to the
|
* This X font has 8-bit indices, so we must convert to the
|
||||||
* appropriate character set.
|
* appropriate character set.
|
||||||
*/
|
*/
|
||||||
char sbstring[2];
|
char c = '\0';
|
||||||
int sblen = wc_to_mb(xfont->real_charset, 0, &glyph, 1,
|
buffer_sink bs[1];
|
||||||
sbstring, 2, "");
|
buffer_sink_init(bs, &c, 1);
|
||||||
if (sblen == 0 || !sbstring[0])
|
put_wc_to_mb(bs, xfont->real_charset, &glyph, 1, "");
|
||||||
|
if (!c)
|
||||||
return false; /* not even in the charset */
|
return false; /* not even in the charset */
|
||||||
|
|
||||||
return x11_font_has_glyph(xfont->fonts[0].xfs, 0,
|
return x11_font_has_glyph(xfont->fonts[0].xfs, 0, (unsigned char)c);
|
||||||
(unsigned char)sbstring[0]);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -953,14 +953,13 @@ static void x11font_draw_text(unifont_drawctx *ctx, unifont *font,
|
|||||||
* This X font has 8-bit indices, so we must convert to the
|
* This X font has 8-bit indices, so we must convert to the
|
||||||
* appropriate character set.
|
* appropriate character set.
|
||||||
*/
|
*/
|
||||||
char *sbstring = snewn(len+1, char);
|
strbuf *sb = strbuf_new();
|
||||||
int sblen = wc_to_mb(xfont->real_charset, 0, string, len,
|
put_wc_to_mb(sb, xfont->real_charset, string, len, ".");
|
||||||
sbstring, len+1, ".");
|
|
||||||
x11font_really_draw_text(x11font_drawfuncs + index + 0, ctx,
|
x11font_really_draw_text(x11font_drawfuncs + index + 0, ctx,
|
||||||
&xfont->fonts[sfid], xfont->disp, x, y,
|
&xfont->fonts[sfid], xfont->disp, x, y,
|
||||||
sbstring, sblen, shadowoffset,
|
sb->s, sb->len, shadowoffset,
|
||||||
xfont->variable, cellwidth * mult);
|
xfont->variable, cellwidth * mult);
|
||||||
sfree(sbstring);
|
strbuf_free(sb);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1603,7 +1602,7 @@ static void pangofont_draw_internal(unifont_drawctx *ctx, unifont *font,
|
|||||||
PangoLayout *layout;
|
PangoLayout *layout;
|
||||||
PangoRectangle rect;
|
PangoRectangle rect;
|
||||||
char *utfstring, *utfptr;
|
char *utfstring, *utfptr;
|
||||||
int utflen;
|
size_t utflen;
|
||||||
bool shadowbold = false;
|
bool shadowbold = false;
|
||||||
void (*draw_layout)(unifont_drawctx *ctx,
|
void (*draw_layout)(unifont_drawctx *ctx,
|
||||||
gint x, gint y, PangoLayout *layout) = NULL;
|
gint x, gint y, PangoLayout *layout) = NULL;
|
||||||
@ -1642,12 +1641,11 @@ static void pangofont_draw_internal(unifont_drawctx *ctx, unifont *font,
|
|||||||
* Pango always expects UTF-8, so convert the input wide character
|
* Pango always expects UTF-8, so convert the input wide character
|
||||||
* string to UTF-8.
|
* string to UTF-8.
|
||||||
*/
|
*/
|
||||||
utfstring = snewn(len*6+1, char); /* UTF-8 has max 6 bytes/char */
|
utfstring = dup_wc_to_mb_c(CS_UTF8, string, len, "", &utflen);
|
||||||
utflen = wc_to_mb(CS_UTF8, 0, string, len, utfstring, len*6+1, ".");
|
|
||||||
|
|
||||||
utfptr = utfstring;
|
utfptr = utfstring;
|
||||||
while (utflen > 0) {
|
while (utflen > 0) {
|
||||||
int clen, n;
|
size_t clen, n;
|
||||||
int desired = cellwidth * PANGO_SCALE;
|
int desired = cellwidth * PANGO_SCALE;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1603,10 +1603,12 @@ gint key_event(GtkWidget *widget, GdkEventKey *event, gpointer data)
|
|||||||
const wchar_t *wp;
|
const wchar_t *wp;
|
||||||
int wlen;
|
int wlen;
|
||||||
int ulen;
|
int ulen;
|
||||||
|
buffer_sink bs[1];
|
||||||
|
|
||||||
wlen = mb_to_wc(DEFAULT_CODEPAGE, 0,
|
buffer_sink_init(bs, widedata, sizeof(widedata) - sizeof(wchar_t));
|
||||||
event_string, strlen(event_string),
|
put_mb_to_wc(bs, DEFAULT_CODEPAGE,
|
||||||
widedata, lenof(widedata)-1);
|
event_string, strlen(event_string));
|
||||||
|
wlen = (wchar_t *)bs->out - widedata;
|
||||||
|
|
||||||
#ifdef KEY_EVENT_DIAGNOSTICS
|
#ifdef KEY_EVENT_DIAGNOSTICS
|
||||||
{
|
{
|
||||||
@ -2954,16 +2956,12 @@ static void clipboard_text_received(GtkClipboard *clipboard,
|
|||||||
{
|
{
|
||||||
GtkFrontend *inst = (GtkFrontend *)data;
|
GtkFrontend *inst = (GtkFrontend *)data;
|
||||||
wchar_t *paste;
|
wchar_t *paste;
|
||||||
int paste_len;
|
size_t paste_len;
|
||||||
int length;
|
|
||||||
|
|
||||||
if (!text)
|
if (!text)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
length = strlen(text);
|
paste = dup_mb_to_wc(CS_UTF8, text, length, &paste_len);
|
||||||
|
|
||||||
paste = snewn(length, wchar_t);
|
|
||||||
paste_len = mb_to_wc(CS_UTF8, 0, text, length, paste, length);
|
|
||||||
|
|
||||||
term_do_paste(inst->term, paste, paste_len);
|
term_do_paste(inst->term, paste, paste_len);
|
||||||
|
|
||||||
@ -3102,17 +3100,15 @@ static void gtkwin_clip_write(
|
|||||||
state->pasteout_data_ctext_len = 0;
|
state->pasteout_data_ctext_len = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
state->pasteout_data = snewn(len*6, char);
|
{
|
||||||
state->pasteout_data_len = len*6;
|
size_t outlen;
|
||||||
state->pasteout_data_len = wc_to_mb(inst->ucsdata.line_codepage, 0,
|
state->pasteout_data = dup_wc_to_mb_c(
|
||||||
data, len, state->pasteout_data,
|
inst->ucsdata.line_codepage, data, len, "", &outlen);
|
||||||
state->pasteout_data_len, NULL);
|
/* We can't handle pastes larger than INT_MAX, because
|
||||||
if (state->pasteout_data_len == 0) {
|
* gtk_selection_data_set_text's length parameter is a gint */
|
||||||
sfree(state->pasteout_data);
|
if (outlen > INT_MAX)
|
||||||
state->pasteout_data = NULL;
|
outlen = INT_MAX;
|
||||||
} else {
|
state->pasteout_data_len = outlen;
|
||||||
state->pasteout_data =
|
|
||||||
sresize(state->pasteout_data, state->pasteout_data_len, char);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef NOT_X_WINDOWS
|
#ifndef NOT_X_WINDOWS
|
||||||
@ -3240,7 +3236,7 @@ static void selection_received(GtkWidget *widget, GtkSelectionData *seldata,
|
|||||||
const guchar *seldata_data = gtk_selection_data_get_data(seldata);
|
const guchar *seldata_data = gtk_selection_data_get_data(seldata);
|
||||||
gint seldata_length = gtk_selection_data_get_length(seldata);
|
gint seldata_length = gtk_selection_data_get_length(seldata);
|
||||||
wchar_t *paste;
|
wchar_t *paste;
|
||||||
int paste_len;
|
size_t paste_len;
|
||||||
struct clipboard_state *state = clipboard_from_atom(
|
struct clipboard_state *state = clipboard_from_atom(
|
||||||
inst, gtk_selection_data_get_selection(seldata));
|
inst, gtk_selection_data_get_selection(seldata));
|
||||||
|
|
||||||
@ -3333,11 +3329,8 @@ static void selection_received(GtkWidget *widget, GtkSelectionData *seldata,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
paste = snewn(length, wchar_t);
|
paste = dup_mb_to_wc_c(charset, text, length, &paste_len);
|
||||||
paste_len = mb_to_wc(charset, 0, text, length, paste, length);
|
|
||||||
|
|
||||||
term_do_paste(inst->term, paste, paste_len);
|
term_do_paste(inst->term, paste, paste_len);
|
||||||
|
|
||||||
sfree(paste);
|
sfree(paste);
|
||||||
|
|
||||||
#ifndef NOT_X_WINDOWS
|
#ifndef NOT_X_WINDOWS
|
||||||
|
@ -12,20 +12,16 @@
|
|||||||
wchar_t *dup_mb_to_wc_c(int codepage, const char *string,
|
wchar_t *dup_mb_to_wc_c(int codepage, const char *string,
|
||||||
size_t inlen, size_t *outlen_p)
|
size_t inlen, size_t *outlen_p)
|
||||||
{
|
{
|
||||||
assert(inlen <= INT_MAX);
|
strbuf *sb = strbuf_new();
|
||||||
size_t mult;
|
put_mb_to_wc(sb, codepage, string, inlen);
|
||||||
for (mult = 1 ;; mult++) {
|
if (outlen_p)
|
||||||
wchar_t *ret = snewn(mult*inlen + 2, wchar_t);
|
*outlen_p = sb->len / sizeof(wchar_t);
|
||||||
size_t outlen = mb_to_wc(codepage, 0, string, inlen, ret,
|
|
||||||
mult*inlen + 1);
|
/* Append a trailing L'\0'. For this we only need to write one
|
||||||
if (outlen < mult*inlen+1) {
|
* byte _fewer_ than sizeof(wchar_t), because strbuf will append a
|
||||||
if (outlen_p)
|
* byte '\0' for us. */
|
||||||
*outlen_p = outlen;
|
put_padding(sb, sizeof(wchar_t) - 1, 0);
|
||||||
ret[outlen] = L'\0';
|
return (wchar_t *)strbuf_to_str(sb);
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
sfree(ret);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
wchar_t *dup_mb_to_wc(int codepage, const char *string)
|
wchar_t *dup_mb_to_wc(int codepage, const char *string)
|
||||||
|
@ -14,26 +14,11 @@
|
|||||||
char *dup_wc_to_mb_c(int codepage, const wchar_t *string,
|
char *dup_wc_to_mb_c(int codepage, const wchar_t *string,
|
||||||
size_t inlen, const char *defchr, size_t *outlen_p)
|
size_t inlen, const char *defchr, size_t *outlen_p)
|
||||||
{
|
{
|
||||||
assert(inlen <= INT_MAX);
|
strbuf *sb = strbuf_new();
|
||||||
|
put_wc_to_mb(sb, codepage, string, inlen, defchr);
|
||||||
size_t outsize = inlen+1;
|
if (outlen_p)
|
||||||
char *out = snewn(outsize, char);
|
*outlen_p = sb->len;
|
||||||
|
return strbuf_to_str(sb);
|
||||||
while (true) {
|
|
||||||
size_t outlen = wc_to_mb(codepage, 0, string, inlen, out, outsize,
|
|
||||||
defchr);
|
|
||||||
/* We can only be sure we've consumed the whole input if the
|
|
||||||
* output is not within a multibyte-character-length of the
|
|
||||||
* end of the buffer! */
|
|
||||||
if (outlen < outsize && outsize - outlen > MB_LEN_MAX) {
|
|
||||||
if (outlen_p)
|
|
||||||
*outlen_p = outlen;
|
|
||||||
out[outlen] = '\0';
|
|
||||||
return out;
|
|
||||||
}
|
|
||||||
|
|
||||||
sgrowarray(out, outsize, outsize);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
char *dup_wc_to_mb(int codepage, const wchar_t *string,
|
char *dup_wc_to_mb(int codepage, const wchar_t *string,
|
||||||
|
@ -221,7 +221,7 @@ static bool console_read_line_to_strbuf(ConsoleIO *conio, bool echo,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (conio->utf8) {
|
if (conio->utf8) {
|
||||||
wchar_t wbuf[4096];
|
wchar_t wbuf[4097];
|
||||||
size_t wlen;
|
size_t wlen;
|
||||||
|
|
||||||
if (conio->hin_is_console) {
|
if (conio->hin_is_console) {
|
||||||
@ -245,17 +245,15 @@ static bool console_read_line_to_strbuf(ConsoleIO *conio, bool echo,
|
|||||||
if (!ReadFile(conio->hin, buf, lenof(buf), &nread, NULL))
|
if (!ReadFile(conio->hin, buf, lenof(buf), &nread, NULL))
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
wlen = mb_to_wc(CP_ACP, 0, buf, nread, wbuf, lenof(wbuf));
|
buffer_sink bs[1];
|
||||||
|
buffer_sink_init(bs, wbuf, sizeof(wbuf) - sizeof(wchar_t));
|
||||||
|
put_mb_to_wc(bs, CP_ACP, buf, nread);
|
||||||
|
assert(!bs->overflowed);
|
||||||
|
wlen = (wchar_t *)bs->out - wbuf;
|
||||||
smemclr(buf, sizeof(buf));
|
smemclr(buf, sizeof(buf));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Allocate the maximum space in the strbuf that might be
|
put_wc_to_mb(sb, CP_UTF8, wbuf, wlen, "");
|
||||||
* needed for this data */
|
|
||||||
size_t oldlen = sb->len, maxout = wlen * 4;
|
|
||||||
void *outptr = strbuf_append(sb, maxout);
|
|
||||||
size_t newlen = oldlen + wc_to_mb(CP_UTF8, 0, wbuf, wlen,
|
|
||||||
outptr, maxout, NULL);
|
|
||||||
strbuf_shrink_to(sb, newlen);
|
|
||||||
smemclr(wbuf, sizeof(wbuf));
|
smemclr(wbuf, sizeof(wbuf));
|
||||||
} else {
|
} else {
|
||||||
/*
|
/*
|
||||||
|
@ -1232,8 +1232,7 @@ void get_unitab(int codepage, wchar_t *unitab, int ftype)
|
|||||||
for (i = 0; i < max; i++) {
|
for (i = 0; i < max; i++) {
|
||||||
tbuf[0] = i;
|
tbuf[0] = i;
|
||||||
|
|
||||||
if (mb_to_wc(codepage, flg, tbuf, 1, unitab + i, 1)
|
if (MultiByteToWideChar(codepage, flg, tbuf, 1, unitab+i, 1) != 1)
|
||||||
!= 1)
|
|
||||||
unitab[i] = 0xFFFD;
|
unitab[i] = 0xFFFD;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -1245,151 +1244,192 @@ void get_unitab(int codepage, wchar_t *unitab, int ftype)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int wc_to_mb(int codepage, int flags, const wchar_t *wcstr, int wclen,
|
bool BinarySink_put_wc_to_mb(
|
||||||
char *mbstr, int mblen, const char *defchr)
|
BinarySink *bs, int codepage, const wchar_t *wcstr, int wclen,
|
||||||
|
const char *defchr)
|
||||||
{
|
{
|
||||||
|
if (!wclen)
|
||||||
|
return true;
|
||||||
|
|
||||||
reverse_mapping *rmap = get_reverse_mapping(codepage);
|
reverse_mapping *rmap = get_reverse_mapping(codepage);
|
||||||
|
|
||||||
if (rmap) {
|
if (rmap) {
|
||||||
|
size_t defchr_len = 0;
|
||||||
|
bool defchr_len_known = false;
|
||||||
|
|
||||||
/* Do this by array lookup if we can. */
|
/* Do this by array lookup if we can. */
|
||||||
if (wclen < 0) {
|
for (size_t i = 0; i < wclen; i++) {
|
||||||
for (wclen = 0; wcstr[wclen++] ;); /* will include the NUL */
|
|
||||||
}
|
|
||||||
char *p;
|
|
||||||
int i;
|
|
||||||
for (p = mbstr, i = 0; i < wclen; i++) {
|
|
||||||
wchar_t ch = wcstr[i];
|
wchar_t ch = wcstr[i];
|
||||||
int by;
|
int by;
|
||||||
const char *p1;
|
const char *blk;
|
||||||
|
|
||||||
#define WRITECH(chr) do \
|
if ((blk = rmap->blocks[(ch >> 8) & 0xFF]) != NULL &&
|
||||||
{ \
|
(by = blk[ch & 0xFF]) != '\0')
|
||||||
assert(p - mbstr < mblen); \
|
put_byte(bs, by);
|
||||||
*p++ = (char)(chr); \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
if ((p1 = rmap->blocks[(ch >> 8) & 0xFF]) != NULL &&
|
|
||||||
(by = p1[ch & 0xFF]) != '\0')
|
|
||||||
WRITECH(by);
|
|
||||||
else if (ch < 0x80)
|
else if (ch < 0x80)
|
||||||
WRITECH(ch);
|
put_byte(bs, ch);
|
||||||
else if (defchr)
|
else if (defchr) {
|
||||||
for (const char *q = defchr; *q; q++)
|
if (!defchr_len_known) {
|
||||||
WRITECH(*q);
|
defchr_len = strlen(defchr);
|
||||||
#if 1
|
defchr_len_known = true;
|
||||||
else
|
}
|
||||||
WRITECH('.');
|
put_data(bs, defchr, defchr_len);
|
||||||
#endif
|
}
|
||||||
|
|
||||||
#undef WRITECH
|
|
||||||
}
|
}
|
||||||
return p - mbstr;
|
return true;
|
||||||
} else {
|
}
|
||||||
int defused, ret;
|
|
||||||
ret = WideCharToMultiByte(codepage, flags, wcstr, wclen,
|
{
|
||||||
mbstr, mblen, defchr, &defused);
|
char internalbuf[2048];
|
||||||
if (ret)
|
char *allocbuf = NULL;
|
||||||
return ret;
|
size_t allocsize = 0;
|
||||||
|
char *currbuf = internalbuf;
|
||||||
|
size_t currsize = lenof(internalbuf);
|
||||||
|
bool success;
|
||||||
|
|
||||||
|
BOOL defused = false;
|
||||||
|
BOOL *defusedp = &defused;
|
||||||
|
|
||||||
|
if (codepage == CP_UTF8 || !defchr[0]) {
|
||||||
|
/*
|
||||||
|
* The Win32 API spec says that defchr and defused must be
|
||||||
|
* NULL when doing a UTF-8 conversion, on pain of
|
||||||
|
* ERROR_INVALID_PARAMETER.
|
||||||
|
*
|
||||||
|
* Also, translate defchr="" on input to NULL in the Win32
|
||||||
|
* API.
|
||||||
|
*/
|
||||||
|
defchr = NULL;
|
||||||
|
defusedp = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
int ret = WideCharToMultiByte(
|
||||||
|
codepage, 0, wcstr, wclen, currbuf, currsize,
|
||||||
|
defchr, defusedp);
|
||||||
|
|
||||||
|
if (ret) {
|
||||||
|
put_data(bs, currbuf, ret);
|
||||||
|
success = true;
|
||||||
|
break;
|
||||||
|
} else if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
|
||||||
|
success = false;
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
sgrowarray_nm(allocbuf, allocsize, currsize);
|
||||||
|
currbuf = allocbuf;
|
||||||
|
currsize = allocsize;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
smemclr(allocbuf, allocsize);
|
||||||
|
if (success)
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef LEGACY_WINDOWS
|
#ifdef LEGACY_WINDOWS
|
||||||
/*
|
/*
|
||||||
* Fallback for legacy platforms too old to support UTF-8: if
|
* Fallback for legacy platforms too old to support UTF-8: if
|
||||||
* the codepage is UTF-8, we can do the translation ourselves.
|
* the codepage is UTF-8, we can do the translation ourselves.
|
||||||
*/
|
*/
|
||||||
if (codepage == CP_UTF8 && mblen > 0 && wclen > 0) {
|
if (codepage == CP_UTF8 && wclen > 0) {
|
||||||
buffer_sink bs[1];
|
while (wclen > 0) {
|
||||||
buffer_sink_init(bs, mbstr, mblen);
|
unsigned long wc = (wclen--, *wcstr++);
|
||||||
|
if (wclen > 0 && IS_SURROGATE_PAIR(wc, *wcstr)) {
|
||||||
while (wclen > 0) {
|
wc = FROM_SURROGATES(wc, *wcstr);
|
||||||
unsigned long wc = (wclen--, *wcstr++);
|
wclen--, wcstr++;
|
||||||
if (wclen > 0 && IS_SURROGATE_PAIR(wc, *wcstr)) {
|
|
||||||
wc = FROM_SURROGATES(wc, *wcstr);
|
|
||||||
wclen--, wcstr++;
|
|
||||||
}
|
|
||||||
|
|
||||||
const char *prev_ptr = bs->out;
|
|
||||||
put_utf8_char(bs, wc);
|
|
||||||
if (bs->overflowed)
|
|
||||||
return prev_ptr - mbstr;
|
|
||||||
}
|
}
|
||||||
|
put_utf8_char(bs, wc);
|
||||||
return bs->out - mbstr;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* No other fallbacks are available */
|
/* No other fallbacks are available */
|
||||||
return 0;
|
return false;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int mb_to_wc(int codepage, int flags, const char *mbstr, int mblen,
|
bool BinarySink_put_mb_to_wc(
|
||||||
wchar_t *wcstr, int wclen)
|
BinarySink *bs, int codepage, const char *mbstr, int mblen)
|
||||||
{
|
{
|
||||||
|
if (!mblen)
|
||||||
|
return true;
|
||||||
|
|
||||||
if (codepage >= 65536) {
|
if (codepage >= 65536) {
|
||||||
/* Character set not known to Windows, so we'll have to
|
/* Character set not known to Windows, so we'll have to
|
||||||
* translate it ourself */
|
* translate it ourself */
|
||||||
size_t index = codepage - 65536;
|
size_t index = codepage - 65536;
|
||||||
if (index >= lenof(cp_list))
|
if (index >= lenof(cp_list))
|
||||||
return 0;
|
return false;
|
||||||
const struct cp_list_item *cp = &cp_list[index];
|
const struct cp_list_item *cp = &cp_list[index];
|
||||||
if (!cp->cp_table)
|
if (!cp->cp_table)
|
||||||
return 0;
|
return false;
|
||||||
|
|
||||||
size_t remaining = wclen;
|
|
||||||
wchar_t *p = wcstr;
|
|
||||||
unsigned tablebase = 256 - cp->cp_size;
|
unsigned tablebase = 256 - cp->cp_size;
|
||||||
|
|
||||||
while (mblen > 0) {
|
while (mblen > 0) {
|
||||||
mblen--;
|
mblen--;
|
||||||
unsigned c = 0xFF & *mbstr++;
|
unsigned c = 0xFF & *mbstr++;
|
||||||
wchar_t wc = (c < tablebase ? c : cp->cp_table[c - tablebase]);
|
wchar_t wc = (c < tablebase ? c : cp->cp_table[c - tablebase]);
|
||||||
if (remaining > 0) {
|
put_data(bs, &wc, sizeof(wc));
|
||||||
remaining--;
|
}
|
||||||
*p++ = wc;
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
wchar_t internalbuf[1024];
|
||||||
|
wchar_t *allocbuf = NULL;
|
||||||
|
size_t allocsize = 0;
|
||||||
|
wchar_t *currbuf = internalbuf;
|
||||||
|
size_t currsize = lenof(internalbuf);
|
||||||
|
bool success;
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
int ret = MultiByteToWideChar(
|
||||||
|
codepage, 0, mbstr, mblen, currbuf, currsize);
|
||||||
|
|
||||||
|
if (ret > 0) {
|
||||||
|
put_data(bs, currbuf, ret * sizeof(wchar_t));
|
||||||
|
success = true;
|
||||||
|
break;
|
||||||
|
} else if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
|
||||||
|
success = false;
|
||||||
|
break;
|
||||||
} else {
|
} else {
|
||||||
return p - wcstr;
|
sgrowarray_nm(allocbuf, allocsize, currsize);
|
||||||
|
currbuf = allocbuf;
|
||||||
|
currsize = allocsize;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return p - wcstr;
|
smemclr(allocbuf, allocsize * sizeof(wchar_t));
|
||||||
|
if (success)
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
int ret = MultiByteToWideChar(codepage, flags, mbstr, mblen, wcstr, wclen);
|
|
||||||
if (ret)
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
#ifdef LEGACY_WINDOWS
|
#ifdef LEGACY_WINDOWS
|
||||||
/*
|
/*
|
||||||
* Fallback for legacy platforms too old to support UTF-8: if the
|
* Fallback for legacy platforms too old to support UTF-8: if the
|
||||||
* codepage is UTF-8, we can do the translation ourselves.
|
* codepage is UTF-8, we can do the translation ourselves.
|
||||||
*/
|
*/
|
||||||
if (codepage == CP_UTF8 && mblen > 0 && wclen > 0) {
|
if (codepage == CP_UTF8 && mblen > 0) {
|
||||||
BinarySource src[1];
|
BinarySource src[1];
|
||||||
BinarySource_BARE_INIT(src, mbstr, mblen);
|
BinarySource_BARE_INIT(src, mbstr, mblen);
|
||||||
|
|
||||||
size_t remaining = wclen;
|
|
||||||
wchar_t *p = wcstr;
|
|
||||||
|
|
||||||
while (get_avail(src)) {
|
while (get_avail(src)) {
|
||||||
wchar_t wcbuf[2];
|
wchar_t wcbuf[2];
|
||||||
size_t nwc = decode_utf8_to_wchar(src, wcbuf, NULL);
|
size_t nwc = decode_utf8_to_wchar(src, wcbuf, NULL);
|
||||||
|
put_data(bs, wcbuf, nwc * sizeof(wchar_t));
|
||||||
for (size_t i = 0; i < nwc; i++) {
|
|
||||||
if (remaining > 0) {
|
|
||||||
remaining--;
|
|
||||||
*p++ = wcbuf[i];
|
|
||||||
} else {
|
|
||||||
return p - wcstr;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return p - wcstr;
|
return true;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* No other fallbacks are available */
|
/* No other fallbacks are available */
|
||||||
return 0;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool is_dbcs_leadbyte(int codepage, char byte)
|
bool is_dbcs_leadbyte(int codepage, char byte)
|
||||||
|
Loading…
Reference in New Issue
Block a user