From 87aafaa89a5c39bfe121a5bedfc84887bdeb9f20 Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Tue, 24 Mar 2009 22:24:31 +0000 Subject: [PATCH] Support in the cross-platform code for translating to and from UTF-16 when exchanging wchar_t strings with the front end. Enabled by a #define in the platform's header file (one should not promiscuously translate UTF-16 surrogate pairs on 32-bit wchar_t platforms since that could give rise to redundant encoding attacks), which is present on Windows. [originally from svn r8495] --- ldiscucs.c | 28 ++++++++++++++++++++++++---- terminal.c | 13 +++++++++++-- windows/winstuff.h | 3 +++ 3 files changed, 38 insertions(+), 6 deletions(-) diff --git a/ldiscucs.c b/ldiscucs.c index 4ac28d7e..a9e1e133 100644 --- a/ldiscucs.c +++ b/ldiscucs.c @@ -49,19 +49,39 @@ void luni_send(void *handle, wchar_t * widebuf, int len, int interactive) if (in_utf(ldisc->term)) { /* UTF is a simple algorithm */ for (p = linebuffer, i = 0; i < len; i++) { - wchar_t ch = widebuf[i]; - /* We only deal with 16-bit wide chars */ - if ((ch&0xF800) == 0xD800) ch = '.'; + unsigned long ch = widebuf[i]; + + if ((ch & 0xF800) == 0xD800) { +#ifdef PLATFORM_IS_UTF16 + if (i+1 < len) { + unsigned long ch2 = widebuf[i+1]; + if ((ch & 0xFC00) == 0xD800 && + (ch2 & 0xFC00) == 0xDC00) { + ch = 0x10000 + ((ch & 0x3FF) << 10) + (ch2 & 0x3FF); + i++; + } + } else +#endif + { + /* Unrecognised UTF-16 sequence */ + ch = '.'; + } + } if (ch < 0x80) { *p++ = (char) (ch); } else if (ch < 0x800) { *p++ = (0xC0 | (ch >> 6)); *p++ = (0x80 | (ch & 0x3F)); - } else { + } else if (ch < 0x10000) { *p++ = (0xE0 | (ch >> 12)); *p++ = (0x80 | ((ch >> 6) & 0x3F)); *p++ = (0x80 | (ch & 0x3F)); + } else { + *p++ = (0xF0 | (ch >> 18)); + *p++ = (0x80 | ((ch >> 12) & 0x3F)); + *p++ = (0x80 | ((ch >> 6) & 0x3F)); + *p++ = (0x80 | (ch & 0x3F)); } } } else { diff --git a/terminal.c b/terminal.c index 4f0d8225..996a01fb 100644 --- a/terminal.c +++ b/terminal.c @@ -5262,8 +5262,17 @@ static void clipme(Terminal *term, pos top, pos bottom, int rect, int desel) set = (uc & CSET_MASK); c = (uc & ~CSET_MASK); - cbuf[0] = uc; - cbuf[1] = 0; +#ifdef PLATFORM_IS_UTF16 + if (uc > 0x10000 && uc < 0x110000) { + cbuf[0] = 0xD800 | ((uc - 0x10000) >> 10); + cbuf[1] = 0xDC00 | ((uc - 0x10000) & 0x3FF); + cbuf[2] = 0; + } else +#endif + { + cbuf[0] = uc; + cbuf[1] = 0; + } if (DIRECT_FONT(uc)) { if (c >= ' ' && c != 0x7F) { diff --git a/windows/winstuff.h b/windows/winstuff.h index 6308ea68..64f3232d 100644 --- a/windows/winstuff.h +++ b/windows/winstuff.h @@ -36,6 +36,9 @@ struct FontSpec { (fq) == FQ_NONANTIALIASED ? NONANTIALIASED_QUALITY : \ CLEARTYPE_QUALITY) +#define PLATFORM_IS_UTF16 /* enable UTF-16 processing when exchanging + * wchar_t strings with environment */ + /* * Where we can, we use GetWindowLongPtr and friends because they're * more useful on 64-bit platforms, but they're a relatively recent