1
0
mirror of https://git.tartarus.org/simon/putty.git synced 2025-01-10 01:48:00 +00:00

Patch from Yoshida Masato to fill in the missing pieces of Windows

UTF-16 support. High Unicode characters in the terminal are now
converted back into surrogates during copy and draw operations, and
the Windows drawing code takes account of that when splitting up the
UTF-16 string for display. Meanwhile, accidental uses of wchar_t have
been replaced with 32-bit integers in parts of the cross-platform code
which were expecting not to have to deal with UTF-16.

[originally from svn r9409]
This commit is contained in:
Simon Tatham 2012-02-17 19:28:55 +00:00
parent e350ca2b4e
commit 053d2ba6d1
6 changed files with 155 additions and 27 deletions

View File

@ -51,13 +51,12 @@ void luni_send(void *handle, wchar_t * widebuf, int len, int interactive)
for (p = linebuffer, i = 0; i < len; i++) {
unsigned long ch = widebuf[i];
if ((ch & 0xF800) == 0xD800) {
if (IS_SURROGATE(ch)) {
#ifdef PLATFORM_IS_UTF16
if (i+1 < len) {
unsigned long ch2 = widebuf[i+1];
if ((ch & 0xFC00) == 0xD800 &&
(ch2 & 0xFC00) == 0xDC00) {
ch = 0x10000 + ((ch & 0x3FF) << 10) + (ch2 & 0x3FF);
if (IS_SURROGATE_PAIR(ch, ch2)) {
ch = FROM_SURROGATES(ch, ch2);
i++;
}
} else

View File

@ -58,7 +58,7 @@ shapetypes[(xh)-SHAPE_FIRST].type : SU) /*))*/
#define leastGreaterEven(x) ( ((x)+2) &~ 1 )
typedef struct bidi_char {
wchar_t origwc, wc;
unsigned int origwc, wc;
unsigned short index;
} bidi_char;

35
putty.h
View File

@ -1126,10 +1126,10 @@ void get_unitab(int codepage, wchar_t * unitab, int ftype);
/*
* Exports from wcwidth.c
*/
int mk_wcwidth(wchar_t ucs);
int mk_wcswidth(const wchar_t *pwcs, size_t n);
int mk_wcwidth_cjk(wchar_t ucs);
int mk_wcswidth_cjk(const wchar_t *pwcs, size_t n);
int mk_wcwidth(unsigned int ucs);
int mk_wcswidth(const unsigned int *pwcs, size_t n);
int mk_wcwidth_cjk(unsigned int ucs);
int mk_wcswidth_cjk(const unsigned int *pwcs, size_t n);
/*
* Exports from mscrypto.c
@ -1257,7 +1257,7 @@ void setup_config_box(struct controlbox *b, int midsession,
* Exports from minibidi.c.
*/
typedef struct bidi_char {
wchar_t origwc, wc;
unsigned int origwc, wc;
unsigned short index;
} bidi_char;
int do_bidi(bidi_char *line, int count);
@ -1399,4 +1399,29 @@ void timer_change_notify(long next);
#define remove_session_from_jumplist(x) ((void)0)
#endif
/* SURROGATE PAIR */
#ifndef IS_HIGH_SURROGATE
#define HIGH_SURROGATE_START 0xd800
#define HIGH_SURROGATE_END 0xdbff
#define LOW_SURROGATE_START 0xdc00
#define LOW_SURROGATE_END 0xdfff
#define IS_HIGH_SURROGATE(wch) (((wch) >= HIGH_SURROGATE_START) && \
((wch) <= HIGH_SURROGATE_END))
#define IS_LOW_SURROGATE(wch) (((wch) >= LOW_SURROGATE_START) && \
((wch) <= LOW_SURROGATE_END))
#define IS_SURROGATE_PAIR(hs, ls) (IS_HIGH_SURROGATE(hs) && \
IS_LOW_SURROGATE(ls))
#endif
#define IS_SURROGATE(wch) (((wch) >= HIGH_SURROGATE_START) && \
((wch) <= LOW_SURROGATE_END))
#define HIGH_SURROGATE_OF(codept) \
(HIGH_SURROGATE_START + (((codept) - 0x10000) >> 10))
#define LOW_SURROGATE_OF(codept) \
(LOW_SURROGATE_START + (((codept) - 0x10000) & 0x3FF))
#define FROM_SURROGATES(wch1, wch2) \
(0x10000 + (((wch1) & 0x3FF) << 10) + ((wch2) & 0x3FF))
#endif

View File

@ -3016,8 +3016,8 @@ static void term_out(Terminal *term)
width = 1;
if (!width)
width = (term->cjk_ambig_wide ?
mk_wcwidth_cjk((wchar_t) c) :
mk_wcwidth((wchar_t) c));
mk_wcwidth_cjk((unsigned int) c) :
mk_wcwidth((unsigned int) c));
if (term->wrapnext && term->wrap && width > 0) {
cline->lattr |= LATTR_WRAPPED;
@ -4692,7 +4692,7 @@ static termchar *term_bidi_line(Terminal *term, struct termline *ldata,
}
term->wcFrom[it].origwc = term->wcFrom[it].wc =
(wchar_t)uc;
(unsigned int)uc;
term->wcFrom[it].index = it;
}
@ -5067,10 +5067,17 @@ static void do_paint(Terminal *term, Context ctx, int may_optimise)
dirty_run = TRUE;
}
if (ccount >= chlen) {
if (ccount+2 > chlen) {
chlen = ccount + 256;
ch = sresize(ch, chlen, wchar_t);
}
#ifdef PLATFORM_IS_UTF16
if (tchar > 0x10000 && tchar < 0x110000) {
ch[ccount++] = (wchar_t) HIGH_SURROGATE_OF(tchar);
ch[ccount++] = (wchar_t) LOW_SURROGATE_OF(tchar);
} else
#endif /* PLATFORM_IS_UTF16 */
ch[ccount++] = (wchar_t) tchar;
if (d->cc_next) {
@ -5094,10 +5101,17 @@ static void do_paint(Terminal *term, Context ctx, int may_optimise)
break;
}
if (ccount >= chlen) {
if (ccount+2 > chlen) {
chlen = ccount + 256;
ch = sresize(ch, chlen, wchar_t);
}
#ifdef PLATFORM_IS_UTF16
if (schar > 0x10000 && schar < 0x110000) {
ch[ccount++] = (wchar_t) HIGH_SURROGATE_OF(schar);
ch[ccount++] = (wchar_t) LOW_SURROGATE_OF(schar);
} else
#endif /* PLATFORM_IS_UTF16 */
ch[ccount++] = (wchar_t) schar;
}

View File

@ -69,7 +69,7 @@ struct interval {
};
/* auxiliary function for binary search in interval table */
static int bisearch(wchar_t ucs, const struct interval *table, int max) {
static int bisearch(unsigned int ucs, const struct interval *table, int max) {
int min = 0;
int mid;
@ -121,7 +121,7 @@ static int bisearch(wchar_t ucs, const struct interval *table, int max) {
* in ISO 10646.
*/
int mk_wcwidth(wchar_t ucs)
int mk_wcwidth(unsigned int ucs)
{
/* sorted list of non-overlapping intervals of non-spacing characters */
/* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */
@ -198,7 +198,7 @@ int mk_wcwidth(wchar_t ucs)
}
int mk_wcswidth(const wchar_t *pwcs, size_t n)
int mk_wcswidth(const unsigned int *pwcs, size_t n)
{
int w, width = 0;
@ -221,7 +221,7 @@ int mk_wcswidth(const wchar_t *pwcs, size_t n)
* the traditional terminal character-width behaviour. It is not
* otherwise recommended for general use.
*/
int mk_wcwidth_cjk(wchar_t ucs)
int mk_wcwidth_cjk(unsigned int ucs)
{
/* sorted list of non-overlapping intervals of East Asian Ambiguous
* characters, generated by "uniset +WIDTH-A -cat=Me -cat=Mn -cat=Cf c" */
@ -289,7 +289,7 @@ int mk_wcwidth_cjk(wchar_t ucs)
}
int mk_wcswidth_cjk(const wchar_t *pwcs, size_t n)
int mk_wcswidth_cjk(const unsigned int *pwcs, size_t n)
{
int w, width = 0;

View File

@ -206,6 +206,12 @@ static int compose_state = 0;
static UINT wm_mousewheel = WM_MOUSEWHEEL;
#define IS_HIGH_VARSEL(wch1, wch2) \
((wch1) == 0xDB40 && ((wch2) >= 0xDD00 && (wch2) <= 0xDDEF))
#define IS_LOW_VARSEL(wch) \
(((wch) >= 0x180B && (wch) <= 0x180D) || /* MONGOLIAN FREE VARIATION SELECTOR */ \
((wch) >= 0xFE00 && (wch) <= 0xFE0F)) /* VARIATION SELECTOR 1-16 */
/* Dummy routine, only required in plink. */
void ldisc_update(void *frontend, int echo, int edit)
{
@ -3105,9 +3111,20 @@ static LRESULT CALLBACK WndProc(HWND hwnd, UINT message,
* instead we luni_send the characters one by one.
*/
term_seen_key_event(term);
for (i = 0; i < n; i += 2) {
if (ldisc)
/* don't divide SURROGATE PAIR */
if (ldisc) {
for (i = 0; i < n; i += 2) {
WCHAR hs = *(unsigned short *)(buff+i);
if (IS_HIGH_SURROGATE(hs) && i+2 < n) {
WCHAR ls = *(unsigned short *)(buff+i+2);
if (IS_LOW_SURROGATE(ls)) {
luni_send(ldisc, (unsigned short *)(buff+i), 2, 1);
i += 2;
continue;
}
}
luni_send(ldisc, (unsigned short *)(buff+i), 1, 1);
}
}
free(buff);
}
@ -3309,6 +3326,7 @@ void do_text_internal(Context ctx, int x, int y, wchar_t *text, int len,
static int *lpDx = NULL;
static int lpDx_len = 0;
int *lpDx_maybe;
int len2; /* for SURROGATE PAIR */
lattr &= LATTR_MODE;
@ -3379,7 +3397,8 @@ void do_text_internal(Context ctx, int x, int y, wchar_t *text, int len,
}
/* Anything left as an original character set is unprintable. */
if (DIRECT_CHAR(text[0])) {
if (DIRECT_CHAR(text[0]) &&
(len < 2 || !IS_SURROGATE_PAIR(text[0], text[1]))) {
int i;
for (i = 0; i < len; i++)
text[i] = 0xFFFD;
@ -3432,6 +3451,24 @@ void do_text_internal(Context ctx, int x, int y, wchar_t *text, int len,
line_box.top = y;
line_box.right = x + char_width * len;
line_box.bottom = y + font_height;
/* adjust line_box.right for SURROGATE PAIR & VARIATION SELECTOR */
{
int i;
int rc_width = 0;
for (i = 0; i < len ; i++) {
if (i+1 < len && IS_HIGH_VARSEL(text[i], text[i+1])) {
i++;
} else if (i+1 < len && IS_SURROGATE_PAIR(text[i], text[i+1])) {
rc_width += char_width;
i++;
} else if (IS_LOW_VARSEL(text[i])) {
/* do nothing */
} else {
rc_width += char_width;
}
}
line_box.right = line_box.left + rc_width;
}
/* Only want the left half of double width lines */
if (line_box.right > font_width*term->cols+offset_width)
@ -3462,8 +3499,19 @@ void do_text_internal(Context ctx, int x, int y, wchar_t *text, int len,
opaque = TRUE; /* start by erasing the rectangle */
for (remaining = len; remaining > 0;
text += len, remaining -= len, x += char_width * len) {
text += len, remaining -= len, x += char_width * len2) {
len = (maxlen < remaining ? maxlen : remaining);
/* don't divide SURROGATE PAIR and VARIATION SELECTOR */
len2 = len;
if (maxlen == 1) {
if (remaining >= 1 && IS_SURROGATE_PAIR(text[0], text[1]))
len++;
if (remaining-len >= 1 && IS_LOW_VARSEL(text[len]))
len++;
else if (remaining-len >= 2 &&
IS_HIGH_VARSEL(text[len], text[len+1]))
len += 2;
}
if (len > lpDx_len) {
if (len > lpDx_len) {
@ -3473,8 +3521,24 @@ void do_text_internal(Context ctx, int x, int y, wchar_t *text, int len,
}
{
int i;
for (i = 0; i < len; i++)
/* only last char has dx width in SURROGATE PAIR and
* VARIATION sequence */
for (i = 0; i < len; i++) {
lpDx[i] = char_width;
if (i+1 < len && IS_HIGH_VARSEL(text[i], text[i+1])) {
if (i > 0) lpDx[i-1] = 0;
lpDx[i] = 0;
i++;
lpDx[i] = char_width;
} else if (i+1 < len && IS_SURROGATE_PAIR(text[i],text[i+1])) {
lpDx[i] = 0;
i++;
lpDx[i] = char_width;
} else if (IS_LOW_VARSEL(text[i])) {
if (i > 0) lpDx[i-1] = 0;
lpDx[i] = char_width;
}
}
}
/* We're using a private area for direct to font. (512 chars.) */
@ -3623,9 +3687,35 @@ void do_text(Context ctx, int x, int y, wchar_t *text, int len,
{
if (attr & TATTR_COMBINING) {
unsigned long a = 0;
attr &= ~TATTR_COMBINING;
int len0 = 1;
/* don't divide SURROGATE PAIR and VARIATION SELECTOR */
if (len >= 2 && IS_SURROGATE_PAIR(text[0], text[1]))
len0 = 2;
if (len-len0 >= 1 && IS_LOW_VARSEL(text[len0])) {
attr &= ~TATTR_COMBINING;
do_text_internal(ctx, x, y, text, len0+1, attr, lattr);
text += len0+1;
len -= len0+1;
a = TATTR_COMBINING;
} else if (len-len0 >= 2 && IS_HIGH_VARSEL(text[len0], text[len0+1])) {
attr &= ~TATTR_COMBINING;
do_text_internal(ctx, x, y, text, len0+2, attr, lattr);
text += len0+2;
len -= len0+2;
a = TATTR_COMBINING;
} else {
attr &= ~TATTR_COMBINING;
}
while (len--) {
do_text_internal(ctx, x, y, text, 1, attr | a, lattr);
if (len >= 1 && IS_SURROGATE_PAIR(text[0], text[1])) {
do_text_internal(ctx, x, y, text, 2, attr | a, lattr);
len--;
text++;
} else {
do_text_internal(ctx, x, y, text, 1, attr | a, lattr);
}
text++;
a = TATTR_COMBINING;
}