diff --git a/ldiscucs.c b/ldiscucs.c index c4fc726d..fe0da8a6 100644 --- a/ldiscucs.c +++ b/ldiscucs.c @@ -51,13 +51,12 @@ void luni_send(void *handle, wchar_t * widebuf, int len, int interactive) for (p = linebuffer, i = 0; i < len; i++) { unsigned long ch = widebuf[i]; - if ((ch & 0xF800) == 0xD800) { + if (IS_SURROGATE(ch)) { #ifdef PLATFORM_IS_UTF16 if (i+1 < len) { unsigned long ch2 = widebuf[i+1]; - if ((ch & 0xFC00) == 0xD800 && - (ch2 & 0xFC00) == 0xDC00) { - ch = 0x10000 + ((ch & 0x3FF) << 10) + (ch2 & 0x3FF); + if (IS_SURROGATE_PAIR(ch, ch2)) { + ch = FROM_SURROGATES(ch, ch2); i++; } } else diff --git a/minibidi.c b/minibidi.c index c0197a5e..f2c68c4f 100644 --- a/minibidi.c +++ b/minibidi.c @@ -58,7 +58,7 @@ shapetypes[(xh)-SHAPE_FIRST].type : SU) /*))*/ #define leastGreaterEven(x) ( ((x)+2) &~ 1 ) typedef struct bidi_char { - wchar_t origwc, wc; + unsigned int origwc, wc; unsigned short index; } bidi_char; diff --git a/putty.h b/putty.h index 4c338b4e..93b683e1 100644 --- a/putty.h +++ b/putty.h @@ -1126,10 +1126,10 @@ void get_unitab(int codepage, wchar_t * unitab, int ftype); /* * Exports from wcwidth.c */ -int mk_wcwidth(wchar_t ucs); -int mk_wcswidth(const wchar_t *pwcs, size_t n); -int mk_wcwidth_cjk(wchar_t ucs); -int mk_wcswidth_cjk(const wchar_t *pwcs, size_t n); +int mk_wcwidth(unsigned int ucs); +int mk_wcswidth(const unsigned int *pwcs, size_t n); +int mk_wcwidth_cjk(unsigned int ucs); +int mk_wcswidth_cjk(const unsigned int *pwcs, size_t n); /* * Exports from mscrypto.c @@ -1257,7 +1257,7 @@ void setup_config_box(struct controlbox *b, int midsession, * Exports from minibidi.c. */ typedef struct bidi_char { - wchar_t origwc, wc; + unsigned int origwc, wc; unsigned short index; } bidi_char; int do_bidi(bidi_char *line, int count); @@ -1399,4 +1399,29 @@ void timer_change_notify(long next); #define remove_session_from_jumplist(x) ((void)0) #endif +/* SURROGATE PAIR */ +#ifndef IS_HIGH_SURROGATE +#define HIGH_SURROGATE_START 0xd800 +#define HIGH_SURROGATE_END 0xdbff +#define LOW_SURROGATE_START 0xdc00 +#define LOW_SURROGATE_END 0xdfff + +#define IS_HIGH_SURROGATE(wch) (((wch) >= HIGH_SURROGATE_START) && \ + ((wch) <= HIGH_SURROGATE_END)) +#define IS_LOW_SURROGATE(wch) (((wch) >= LOW_SURROGATE_START) && \ + ((wch) <= LOW_SURROGATE_END)) +#define IS_SURROGATE_PAIR(hs, ls) (IS_HIGH_SURROGATE(hs) && \ + IS_LOW_SURROGATE(ls)) +#endif + + +#define IS_SURROGATE(wch) (((wch) >= HIGH_SURROGATE_START) && \ + ((wch) <= LOW_SURROGATE_END)) +#define HIGH_SURROGATE_OF(codept) \ + (HIGH_SURROGATE_START + (((codept) - 0x10000) >> 10)) +#define LOW_SURROGATE_OF(codept) \ + (LOW_SURROGATE_START + (((codept) - 0x10000) & 0x3FF)) +#define FROM_SURROGATES(wch1, wch2) \ + (0x10000 + (((wch1) & 0x3FF) << 10) + ((wch2) & 0x3FF)) + #endif diff --git a/terminal.c b/terminal.c index abb06e2a..d23cbe91 100644 --- a/terminal.c +++ b/terminal.c @@ -3016,8 +3016,8 @@ static void term_out(Terminal *term) width = 1; if (!width) width = (term->cjk_ambig_wide ? - mk_wcwidth_cjk((wchar_t) c) : - mk_wcwidth((wchar_t) c)); + mk_wcwidth_cjk((unsigned int) c) : + mk_wcwidth((unsigned int) c)); if (term->wrapnext && term->wrap && width > 0) { cline->lattr |= LATTR_WRAPPED; @@ -4692,7 +4692,7 @@ static termchar *term_bidi_line(Terminal *term, struct termline *ldata, } term->wcFrom[it].origwc = term->wcFrom[it].wc = - (wchar_t)uc; + (unsigned int)uc; term->wcFrom[it].index = it; } @@ -5067,10 +5067,17 @@ static void do_paint(Terminal *term, Context ctx, int may_optimise) dirty_run = TRUE; } - if (ccount >= chlen) { + if (ccount+2 > chlen) { chlen = ccount + 256; ch = sresize(ch, chlen, wchar_t); } + +#ifdef PLATFORM_IS_UTF16 + if (tchar > 0x10000 && tchar < 0x110000) { + ch[ccount++] = (wchar_t) HIGH_SURROGATE_OF(tchar); + ch[ccount++] = (wchar_t) LOW_SURROGATE_OF(tchar); + } else +#endif /* PLATFORM_IS_UTF16 */ ch[ccount++] = (wchar_t) tchar; if (d->cc_next) { @@ -5094,10 +5101,17 @@ static void do_paint(Terminal *term, Context ctx, int may_optimise) break; } - if (ccount >= chlen) { + if (ccount+2 > chlen) { chlen = ccount + 256; ch = sresize(ch, chlen, wchar_t); } + +#ifdef PLATFORM_IS_UTF16 + if (schar > 0x10000 && schar < 0x110000) { + ch[ccount++] = (wchar_t) HIGH_SURROGATE_OF(schar); + ch[ccount++] = (wchar_t) LOW_SURROGATE_OF(schar); + } else +#endif /* PLATFORM_IS_UTF16 */ ch[ccount++] = (wchar_t) schar; } diff --git a/wcwidth.c b/wcwidth.c index 94ecb540..46ac1e35 100644 --- a/wcwidth.c +++ b/wcwidth.c @@ -69,7 +69,7 @@ struct interval { }; /* auxiliary function for binary search in interval table */ -static int bisearch(wchar_t ucs, const struct interval *table, int max) { +static int bisearch(unsigned int ucs, const struct interval *table, int max) { int min = 0; int mid; @@ -121,7 +121,7 @@ static int bisearch(wchar_t ucs, const struct interval *table, int max) { * in ISO 10646. */ -int mk_wcwidth(wchar_t ucs) +int mk_wcwidth(unsigned int ucs) { /* sorted list of non-overlapping intervals of non-spacing characters */ /* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */ @@ -198,7 +198,7 @@ int mk_wcwidth(wchar_t ucs) } -int mk_wcswidth(const wchar_t *pwcs, size_t n) +int mk_wcswidth(const unsigned int *pwcs, size_t n) { int w, width = 0; @@ -221,7 +221,7 @@ int mk_wcswidth(const wchar_t *pwcs, size_t n) * the traditional terminal character-width behaviour. It is not * otherwise recommended for general use. */ -int mk_wcwidth_cjk(wchar_t ucs) +int mk_wcwidth_cjk(unsigned int ucs) { /* sorted list of non-overlapping intervals of East Asian Ambiguous * characters, generated by "uniset +WIDTH-A -cat=Me -cat=Mn -cat=Cf c" */ @@ -289,7 +289,7 @@ int mk_wcwidth_cjk(wchar_t ucs) } -int mk_wcswidth_cjk(const wchar_t *pwcs, size_t n) +int mk_wcswidth_cjk(const unsigned int *pwcs, size_t n) { int w, width = 0; diff --git a/windows/window.c b/windows/window.c index a1d99bae..80ca7955 100644 --- a/windows/window.c +++ b/windows/window.c @@ -206,6 +206,12 @@ static int compose_state = 0; static UINT wm_mousewheel = WM_MOUSEWHEEL; +#define IS_HIGH_VARSEL(wch1, wch2) \ + ((wch1) == 0xDB40 && ((wch2) >= 0xDD00 && (wch2) <= 0xDDEF)) +#define IS_LOW_VARSEL(wch) \ + (((wch) >= 0x180B && (wch) <= 0x180D) || /* MONGOLIAN FREE VARIATION SELECTOR */ \ + ((wch) >= 0xFE00 && (wch) <= 0xFE0F)) /* VARIATION SELECTOR 1-16 */ + /* Dummy routine, only required in plink. */ void ldisc_update(void *frontend, int echo, int edit) { @@ -3105,9 +3111,20 @@ static LRESULT CALLBACK WndProc(HWND hwnd, UINT message, * instead we luni_send the characters one by one. */ term_seen_key_event(term); - for (i = 0; i < n; i += 2) { - if (ldisc) + /* don't divide SURROGATE PAIR */ + if (ldisc) { + for (i = 0; i < n; i += 2) { + WCHAR hs = *(unsigned short *)(buff+i); + if (IS_HIGH_SURROGATE(hs) && i+2 < n) { + WCHAR ls = *(unsigned short *)(buff+i+2); + if (IS_LOW_SURROGATE(ls)) { + luni_send(ldisc, (unsigned short *)(buff+i), 2, 1); + i += 2; + continue; + } + } luni_send(ldisc, (unsigned short *)(buff+i), 1, 1); + } } free(buff); } @@ -3309,6 +3326,7 @@ void do_text_internal(Context ctx, int x, int y, wchar_t *text, int len, static int *lpDx = NULL; static int lpDx_len = 0; int *lpDx_maybe; + int len2; /* for SURROGATE PAIR */ lattr &= LATTR_MODE; @@ -3379,7 +3397,8 @@ void do_text_internal(Context ctx, int x, int y, wchar_t *text, int len, } /* Anything left as an original character set is unprintable. */ - if (DIRECT_CHAR(text[0])) { + if (DIRECT_CHAR(text[0]) && + (len < 2 || !IS_SURROGATE_PAIR(text[0], text[1]))) { int i; for (i = 0; i < len; i++) text[i] = 0xFFFD; @@ -3432,6 +3451,24 @@ void do_text_internal(Context ctx, int x, int y, wchar_t *text, int len, line_box.top = y; line_box.right = x + char_width * len; line_box.bottom = y + font_height; + /* adjust line_box.right for SURROGATE PAIR & VARIATION SELECTOR */ + { + int i; + int rc_width = 0; + for (i = 0; i < len ; i++) { + if (i+1 < len && IS_HIGH_VARSEL(text[i], text[i+1])) { + i++; + } else if (i+1 < len && IS_SURROGATE_PAIR(text[i], text[i+1])) { + rc_width += char_width; + i++; + } else if (IS_LOW_VARSEL(text[i])) { + /* do nothing */ + } else { + rc_width += char_width; + } + } + line_box.right = line_box.left + rc_width; + } /* Only want the left half of double width lines */ if (line_box.right > font_width*term->cols+offset_width) @@ -3462,8 +3499,19 @@ void do_text_internal(Context ctx, int x, int y, wchar_t *text, int len, opaque = TRUE; /* start by erasing the rectangle */ for (remaining = len; remaining > 0; - text += len, remaining -= len, x += char_width * len) { + text += len, remaining -= len, x += char_width * len2) { len = (maxlen < remaining ? maxlen : remaining); + /* don't divide SURROGATE PAIR and VARIATION SELECTOR */ + len2 = len; + if (maxlen == 1) { + if (remaining >= 1 && IS_SURROGATE_PAIR(text[0], text[1])) + len++; + if (remaining-len >= 1 && IS_LOW_VARSEL(text[len])) + len++; + else if (remaining-len >= 2 && + IS_HIGH_VARSEL(text[len], text[len+1])) + len += 2; + } if (len > lpDx_len) { if (len > lpDx_len) { @@ -3473,8 +3521,24 @@ void do_text_internal(Context ctx, int x, int y, wchar_t *text, int len, } { int i; - for (i = 0; i < len; i++) + /* only last char has dx width in SURROGATE PAIR and + * VARIATION sequence */ + for (i = 0; i < len; i++) { lpDx[i] = char_width; + if (i+1 < len && IS_HIGH_VARSEL(text[i], text[i+1])) { + if (i > 0) lpDx[i-1] = 0; + lpDx[i] = 0; + i++; + lpDx[i] = char_width; + } else if (i+1 < len && IS_SURROGATE_PAIR(text[i],text[i+1])) { + lpDx[i] = 0; + i++; + lpDx[i] = char_width; + } else if (IS_LOW_VARSEL(text[i])) { + if (i > 0) lpDx[i-1] = 0; + lpDx[i] = char_width; + } + } } /* We're using a private area for direct to font. (512 chars.) */ @@ -3623,9 +3687,35 @@ void do_text(Context ctx, int x, int y, wchar_t *text, int len, { if (attr & TATTR_COMBINING) { unsigned long a = 0; - attr &= ~TATTR_COMBINING; + int len0 = 1; + /* don't divide SURROGATE PAIR and VARIATION SELECTOR */ + if (len >= 2 && IS_SURROGATE_PAIR(text[0], text[1])) + len0 = 2; + if (len-len0 >= 1 && IS_LOW_VARSEL(text[len0])) { + attr &= ~TATTR_COMBINING; + do_text_internal(ctx, x, y, text, len0+1, attr, lattr); + text += len0+1; + len -= len0+1; + a = TATTR_COMBINING; + } else if (len-len0 >= 2 && IS_HIGH_VARSEL(text[len0], text[len0+1])) { + attr &= ~TATTR_COMBINING; + do_text_internal(ctx, x, y, text, len0+2, attr, lattr); + text += len0+2; + len -= len0+2; + a = TATTR_COMBINING; + } else { + attr &= ~TATTR_COMBINING; + } + while (len--) { - do_text_internal(ctx, x, y, text, 1, attr | a, lattr); + if (len >= 1 && IS_SURROGATE_PAIR(text[0], text[1])) { + do_text_internal(ctx, x, y, text, 2, attr | a, lattr); + len--; + text++; + } else { + do_text_internal(ctx, x, y, text, 1, attr | a, lattr); + } + text++; a = TATTR_COMBINING; }