diff --git a/misc.h b/misc.h index fe8eb06a..b4b9bd57 100644 --- a/misc.h +++ b/misc.h @@ -72,9 +72,9 @@ void strbuf_finalise_agent_query(strbuf *buf); wchar_t *dup_mb_to_wc_c(int codepage, int flags, const char *string, int len); wchar_t *dup_mb_to_wc(int codepage, int flags, const char *string); char *dup_wc_to_mb_c(int codepage, int flags, const wchar_t *string, int len, - const char *defchr, struct unicode_data *ucsdata); + const char *defchr); char *dup_wc_to_mb(int codepage, int flags, const wchar_t *string, - const char *defchr, struct unicode_data *ucsdata); + const char *defchr); static inline int toint(unsigned u) { diff --git a/putty.h b/putty.h index eddc7b56..d7f33369 100644 --- a/putty.h +++ b/putty.h @@ -266,7 +266,6 @@ struct sesslist { }; struct unicode_data { - char **uni_tbl; bool dbcs_screenfont; int font_codepage; int line_codepage; @@ -2436,8 +2435,7 @@ bool is_dbcs_leadbyte(int codepage, char byte); int mb_to_wc(int codepage, int flags, const char *mbstr, int mblen, wchar_t *wcstr, int wclen); int wc_to_mb(int codepage, int flags, const wchar_t *wcstr, int wclen, - char *mbstr, int mblen, const char *defchr, - struct unicode_data *ucsdata); + char *mbstr, int mblen, const char *defchr); wchar_t xlat_uskbd2cyrllic(int ch); int check_compose(int first, int second); int decode_codepage(const char *cp_name); diff --git a/terminal/terminal.c b/terminal/terminal.c index fbaa619b..96bc98f8 100644 --- a/terminal/terminal.c +++ b/terminal/terminal.c @@ -3416,7 +3416,7 @@ static strbuf *term_input_data_from_unicode( char *bufptr = strbuf_append(buf, len + 1); int rv; rv = wc_to_mb(term->ucsdata->line_codepage, 0, widebuf, len, - bufptr, len + 1, NULL, term->ucsdata); + bufptr, len + 1, NULL); strbuf_shrink_to(buf, rv < 0 ? 0 : rv); } diff --git a/unix/unicode.c b/unix/unicode.c index 1db17ef4..a98c8d3b 100644 --- a/unix/unicode.c +++ b/unix/unicode.c @@ -61,8 +61,7 @@ int mb_to_wc(int codepage, int flags, const char *mbstr, int mblen, } int wc_to_mb(int codepage, int flags, const wchar_t *wcstr, int wclen, - char *mbstr, int mblen, const char *defchr, - struct unicode_data *ucsdata) + char *mbstr, int mblen, const char *defchr) { if (codepage == DEFAULT_CODEPAGE) { char output[MB_LEN_MAX]; diff --git a/unix/unifont.c b/unix/unifont.c index 62445db2..5eaded54 100644 --- a/unix/unifont.c +++ b/unix/unifont.c @@ -600,7 +600,7 @@ static bool x11font_has_glyph(unifont *font, wchar_t glyph) */ char sbstring[2]; int sblen = wc_to_mb(xfont->real_charset, 0, &glyph, 1, - sbstring, 2, "", NULL); + sbstring, 2, ""); if (sblen == 0 || !sbstring[0]) return false; /* not even in the charset */ @@ -956,7 +956,7 @@ static void x11font_draw_text(unifont_drawctx *ctx, unifont *font, */ char *sbstring = snewn(len+1, char); int sblen = wc_to_mb(xfont->real_charset, 0, string, len, - sbstring, len+1, ".", NULL); + sbstring, len+1, "."); x11font_really_draw_text(x11font_drawfuncs + index + 0, ctx, &xfont->fonts[sfid], xfont->disp, x, y, sbstring, sblen, shadowoffset, @@ -1644,8 +1644,7 @@ static void pangofont_draw_internal(unifont_drawctx *ctx, unifont *font, * string to UTF-8. */ utfstring = snewn(len*6+1, char); /* UTF-8 has max 6 bytes/char */ - utflen = wc_to_mb(CS_UTF8, 0, string, len, - utfstring, len*6+1, ".", NULL); + utflen = wc_to_mb(CS_UTF8, 0, string, len, utfstring, len*6+1, "."); utfptr = utfstring; while (utflen > 0) { diff --git a/unix/window.c b/unix/window.c index 811ffc17..9d33904a 100644 --- a/unix/window.c +++ b/unix/window.c @@ -3056,8 +3056,7 @@ static void gtkwin_clip_write( state->pasteout_data_len = len*6; state->pasteout_data_len = wc_to_mb(inst->ucsdata.line_codepage, 0, data, len, state->pasteout_data, - state->pasteout_data_len, - NULL, NULL); + state->pasteout_data_len, NULL); if (state->pasteout_data_len == 0) { sfree(state->pasteout_data); state->pasteout_data = NULL; diff --git a/utils/dup_wc_to_mb.c b/utils/dup_wc_to_mb.c index e91a8916..36088196 100644 --- a/utils/dup_wc_to_mb.c +++ b/utils/dup_wc_to_mb.c @@ -11,14 +11,14 @@ #include "misc.h" char *dup_wc_to_mb_c(int codepage, int flags, const wchar_t *string, int len, - const char *defchr, struct unicode_data *ucsdata) + const char *defchr) { size_t outsize = len+1; char *out = snewn(outsize, char); while (true) { size_t outlen = wc_to_mb(codepage, flags, string, len, out, outsize, - defchr, ucsdata); + defchr); /* We can only be sure we've consumed the whole input if the * output is not within a multibyte-character-length of the * end of the buffer! */ @@ -32,8 +32,7 @@ char *dup_wc_to_mb_c(int codepage, int flags, const wchar_t *string, int len, } char *dup_wc_to_mb(int codepage, int flags, const wchar_t *string, - const char *defchr, struct unicode_data *ucsdata) + const char *defchr) { - return dup_wc_to_mb_c(codepage, flags, string, wcslen(string), - defchr, ucsdata); + return dup_wc_to_mb_c(codepage, flags, string, wcslen(string), defchr); } diff --git a/windows/unicode.c b/windows/unicode.c index ed9a3050..09bce095 100644 --- a/windows/unicode.c +++ b/windows/unicode.c @@ -437,9 +437,114 @@ static const struct cp_list_item cp_list[] = { static void link_font(WCHAR * line_tbl, WCHAR * font_tbl, WCHAR attr); +/* + * We keep a collection of reverse mappings from Unicode back to code pages, + * in the form of array[256] of array[256] of char. These live forever in a + * local tree234, and we just make a new one whenever we find a need. + */ +typedef struct reverse_mapping { + int codepage; + char **blocks; +} reverse_mapping; +static tree234 *reverse_mappings = NULL; + +static int reverse_mapping_cmp(void *av, void *bv) +{ + const reverse_mapping *a = (const reverse_mapping *)av; + const reverse_mapping *b = (const reverse_mapping *)bv; + if (a->codepage < b->codepage) + return -1; + if (a->codepage > b->codepage) + return +1; + return 0; +} + +static int reverse_mapping_find(void *av, void *bv) +{ + const reverse_mapping *a = (const reverse_mapping *)av; + int b_codepage = *(const int *)bv; + if (a->codepage < b_codepage) + return -1; + if (a->codepage > b_codepage) + return +1; + return 0; +} + +static reverse_mapping *get_existing_reverse_mapping(int codepage) +{ + if (!reverse_mappings) + return NULL; + return find234(reverse_mappings, &codepage, reverse_mapping_find); +} + +static reverse_mapping *make_reverse_mapping_inner( + int codepage, const wchar_t *mapping) +{ + if (!reverse_mappings) + reverse_mappings = newtree234(reverse_mapping_cmp); + + reverse_mapping *rmap = snew(reverse_mapping); + rmap->blocks = snewn(256, char *); + memset(rmap->blocks, 0, 256 * sizeof(char *)); + + for (size_t i = 0; i < 256; i++) { + /* These special kinds of value correspond to no Unicode character */ + if (DIRECT_CHAR(mapping[i])) + continue; + if (DIRECT_FONT(mapping[i])) + continue; + + size_t chr = mapping[i]; + size_t block = chr >> 8, index = chr & 0xFF; + + if (!rmap->blocks[block]) { + rmap->blocks[block] = snewn(256, char); + memset(rmap->blocks[block], 0, 256); + } + rmap->blocks[block][index] = i; + } + + rmap->codepage = codepage; + reverse_mapping *added = add234(reverse_mappings, rmap); + assert(added == rmap); /* we already checked it wasn't already in there */ + return added; +} + +static void make_reverse_mapping(int codepage, const wchar_t *mapping) +{ + if (get_existing_reverse_mapping(codepage)) + return; /* we've already got this one */ + make_reverse_mapping_inner(codepage, mapping); +} + +static reverse_mapping *get_reverse_mapping(int codepage) +{ + /* + * Try harder to get a reverse mapping for a codepage we implement + * internally via a translation table, by hastily making it if it doesn't + * already exist. + */ + + reverse_mapping *rmap = get_existing_reverse_mapping(codepage); + if (rmap) + return rmap; + + if (codepage < 65536) + return NULL; + if (codepage > 65536 + lenof(cp_list)) + return NULL; + const struct cp_list_item *cp = &cp_list[codepage - 65536]; + if (!cp->cp_table) + return NULL; + + wchar_t mapping[256]; + get_unitab(codepage, mapping, 0); + return make_reverse_mapping_inner(codepage, mapping); +} + void init_ucs(Conf *conf, struct unicode_data *ucsdata) { - int i, j; + int i; bool used_dtf = false; int vtmode; @@ -522,31 +627,9 @@ void init_ucs(Conf *conf, struct unicode_data *ucsdata) sizeof(unitab_xterm_std)); ucsdata->unitab_xterm['_'] = ' '; - /* Generate UCS ->line page table. */ - if (ucsdata->uni_tbl) { - for (i = 0; i < 256; i++) - if (ucsdata->uni_tbl[i]) - sfree(ucsdata->uni_tbl[i]); - sfree(ucsdata->uni_tbl); - ucsdata->uni_tbl = 0; - } if (!used_dtf) { - for (i = 0; i < 256; i++) { - if (DIRECT_CHAR(ucsdata->unitab_line[i])) - continue; - if (DIRECT_FONT(ucsdata->unitab_line[i])) - continue; - if (!ucsdata->uni_tbl) { - ucsdata->uni_tbl = snewn(256, char *); - memset(ucsdata->uni_tbl, 0, 256 * sizeof(char *)); - } - j = ((ucsdata->unitab_line[i] >> 8) & 0xFF); - if (!ucsdata->uni_tbl[j]) { - ucsdata->uni_tbl[j] = snewn(256, char); - memset(ucsdata->uni_tbl[j], 0, 256 * sizeof(char)); - } - ucsdata->uni_tbl[j][ucsdata->unitab_line[i] & 0xFF] = i; - } + /* Make sure a reverse mapping exists for this code page. */ + make_reverse_mapping(ucsdata->line_codepage, ucsdata->unitab_line); } /* Find the line control characters. */ @@ -1156,20 +1239,21 @@ void get_unitab(int codepage, wchar_t * unitab, int ftype) } int wc_to_mb(int codepage, int flags, const wchar_t *wcstr, int wclen, - char *mbstr, int mblen, const char *defchr, - struct unicode_data *ucsdata) + char *mbstr, int mblen, const char *defchr) { - char *p; - int i; - if (ucsdata && codepage == ucsdata->line_codepage && ucsdata->uni_tbl) { + reverse_mapping *rmap = get_reverse_mapping(codepage); + + if (rmap) { /* Do this by array lookup if we can. */ if (wclen < 0) { for (wclen = 0; wcstr[wclen++] ;); /* will include the NUL */ } + char *p; + int i; for (p = mbstr, i = 0; i < wclen; i++) { wchar_t ch = wcstr[i]; int by; - char *p1; + const char *p1; #define WRITECH(chr) do \ { \ @@ -1177,8 +1261,7 @@ int wc_to_mb(int codepage, int flags, const wchar_t *wcstr, int wclen, *p++ = (char)(chr); \ } while (0) - if (ucsdata->uni_tbl && - (p1 = ucsdata->uni_tbl[(ch >> 8) & 0xFF]) != NULL && + if ((p1 = rmap->blocks[(ch >> 8) & 0xFF]) != NULL && (by = p1[ch & 0xFF]) != '\0') WRITECH(by); else if (ch < 0x80) diff --git a/windows/window.c b/windows/window.c index 164e3a90..40eb5c14 100644 --- a/windows/window.c +++ b/windows/window.c @@ -473,7 +473,7 @@ static void sw_SetWindowText(HWND hwnd, wchar_t *text) if (unicode_window) { SetWindowTextW(hwnd, text); } else { - char *mb = dup_wc_to_mb(DEFAULT_CODEPAGE, 0, text, "?", &ucsdata); + char *mb = dup_wc_to_mb(DEFAULT_CODEPAGE, 0, text, "?"); SetWindowTextA(hwnd, mb); sfree(mb); }