diff --git a/putty.h b/putty.h index 113d355e..c956d508 100644 --- a/putty.h +++ b/putty.h @@ -589,7 +589,7 @@ extern char ver[]; #ifndef CP_UTF8 #define CP_UTF8 65001 #endif -void init_ucs(void); +/* void init_ucs(void); -- this is now in platform-specific headers */ int is_dbcs_leadbyte(int codepage, char byte); int mb_to_wc(int codepage, int flags, char *mbstr, int mblen, wchar_t *wcstr, int wclen); diff --git a/unix/pterm.1 b/unix/pterm.1 index e08d033a..bbdb081a 100644 --- a/unix/pterm.1 +++ b/unix/pterm.1 @@ -110,6 +110,10 @@ supported by \fIpterm\fP) should be valid here (examples are encoding which is valid in an X logical font description should be valid ("ibm-cp437", for example). +\fIpterm\fP's default behaviour is to use the same character +encoding as its primary font. If you supply a Unicode (iso10646-1) +font, it will default to the UTF-8 character set. + Character set names are case-insensitive. .IP "\fB\-nethack\fP" Tells \fIpterm\fP to enable NetHack keypad mode, in which the diff --git a/unix/pterm.c b/unix/pterm.c index fa9fe1df..b7f93f9c 100644 --- a/unix/pterm.c +++ b/unix/pterm.c @@ -836,18 +836,27 @@ gint key_event(GtkWidget *widget, GdkEventKey *event, gpointer data) printf("\n"); #endif - /* - * The stuff we've just generated is assumed to be - * ISO-8859-1! This sounds insane, but `man XLookupString' - * agrees: strings of this type returned from the X server - * are hardcoded to 8859-1. Strictly speaking we should be - * doing this using some sort of GtkIMContext, which (if - * we're lucky) would give us our data directly in Unicode; - * but that's not supported in GTK 1.2 as far as I can - * tell, and it's poorly documented even in 2.0, so it'll - * have to wait. - */ - lpage_send(inst->ldisc, CS_ISO8859_1, output+start, end-start, 1); + if (inst->fontinfo[0].charset != CS_NONE) { + /* + * The stuff we've just generated is assumed to be + * ISO-8859-1! This sounds insane, but `man + * XLookupString' agrees: strings of this type returned + * from the X server are hardcoded to 8859-1. Strictly + * speaking we should be doing this using some sort of + * GtkIMContext, which (if we're lucky) would give us + * our data directly in Unicode; but that's not + * supported in GTK 1.2 as far as I can tell, and it's + * poorly documented even in 2.0, so it'll have to + * wait. + */ + lpage_send(inst->ldisc, CS_ISO8859_1, output+start, end-start, 1); + } else { + /* + * In direct-to-font mode, we just send the string + * exactly as we received it. + */ + ldisc_send(inst->ldisc, output+start, end-start, 1); + } show_mouseptr(inst, 0); term_seen_key_event(inst->term); @@ -1218,17 +1227,25 @@ void write_clip(void *frontend, wchar_t * data, int len, int must_deselect) if (inst->pasteout_data_utf8) sfree(inst->pasteout_data_utf8); - inst->pasteout_data_utf8 = smalloc(len*6); - inst->pasteout_data_utf8_len = len*6; - { + /* + * Set up UTF-8 paste data. This only happens if we aren't in + * direct-to-font mode using the D800 hack. + */ + if (inst->fontinfo[0].charset != CS_NONE) { wchar_t *tmp = data; int tmplen = len; + + inst->pasteout_data_utf8 = smalloc(len*6); + inst->pasteout_data_utf8_len = len*6; inst->pasteout_data_utf8_len = charset_from_unicode(&tmp, &tmplen, inst->pasteout_data_utf8, inst->pasteout_data_utf8_len, CS_UTF8, NULL, NULL, 0); inst->pasteout_data_utf8 = srealloc(inst->pasteout_data_utf8, inst->pasteout_data_utf8_len); + } else { + inst->pasteout_data_utf8 = NULL; + inst->pasteout_data_utf8_len = 0; } inst->pasteout_data = smalloc(len); @@ -1243,8 +1260,9 @@ void write_clip(void *frontend, wchar_t * data, int len, int must_deselect) GDK_SELECTION_TYPE_STRING, 1); gtk_selection_add_target(inst->area, GDK_SELECTION_PRIMARY, inst->compound_text_atom, 1); - gtk_selection_add_target(inst->area, GDK_SELECTION_PRIMARY, - inst->utf8_string_atom, 1); + if (inst->pasteout_data_utf8) + gtk_selection_add_target(inst->area, GDK_SELECTION_PRIMARY, + inst->utf8_string_atom, 1); } } @@ -1286,15 +1304,24 @@ void request_paste(void *frontend) * comes back _then_ we can call term_do_paste(). */ - /* - * First we attempt to retrieve the selection as a UTF-8 string - * (which we will convert to the correct code page before - * sending to the session, of course). If that fails, - * selection_received() will be informed and will fall back to - * an ordinary string. - */ - gtk_selection_convert(inst->area, GDK_SELECTION_PRIMARY, - inst->utf8_string_atom, GDK_CURRENT_TIME); + if (inst->fontinfo[0].charset != CS_NONE) { + /* + * First we attempt to retrieve the selection as a UTF-8 + * string (which we will convert to the correct code page + * before sending to the session, of course). If that + * fails, selection_received() will be informed and will + * fall back to an ordinary string. + */ + gtk_selection_convert(inst->area, GDK_SELECTION_PRIMARY, + inst->utf8_string_atom, GDK_CURRENT_TIME); + } else { + /* + * If we're in direct-to-font mode, we disable UTF-8 + * pasting, and go straight to ordinary string data. + */ + gtk_selection_convert(inst->area, GDK_SELECTION_PRIMARY, + GDK_SELECTION_TYPE_STRING, GDK_CURRENT_TIME); + } } gint idle_paste_func(gpointer data); /* forward ref */ @@ -1562,12 +1589,9 @@ void do_text_internal(Context ctx, int x, int y, char *text, int len, gwcs, len*2); sfree(gwcs); } else { - wchar_t *wcstmp = wcs; - int lentmp = len; gcs = smalloc(sizeof(GdkWChar) * (len+1)); - charset_from_unicode(&wcstmp, &lentmp, gcs, len, - inst->fontinfo[fontid].charset, - NULL, ".", 1); + wc_to_mb(inst->fontinfo[fontid].charset, 0, + wcs, len, gcs, len, ".", NULL); gdk_draw_text(inst->pixmap, inst->fonts[fontid], gc, x*inst->font_width+cfg.window_border, y*inst->font_height+cfg.window_border+inst->fonts[0]->ascent, @@ -2101,13 +2125,20 @@ static void block_signal(int sig, int block_it) { } } -static void set_font_info(struct gui_data *inst, int fontid) +/* + * This function retrieves the character set encoding of a font. It + * returns the character set without the X11 hack (in case the user + * asks to use the font's own encoding). + */ +static int set_font_info(struct gui_data *inst, int fontid) { GdkFont *font = inst->fonts[fontid]; XFontStruct *xfs = GDK_FONT_XFONT(font); Display *disp = GDK_FONT_XDISPLAY(font); Atom charset_registry, charset_encoding; unsigned long registry_ret, encoding_ret; + int retval = CS_NONE; + charset_registry = XInternAtom(disp, "CHARSET_REGISTRY", False); charset_encoding = XInternAtom(disp, "CHARSET_ENCODING", False); inst->fontinfo[fontid].charset = CS_NONE; @@ -2119,10 +2150,13 @@ static void set_font_info(struct gui_data *inst, int fontid) enc = XGetAtomName(disp, (Atom)encoding_ret); if (reg && enc) { char *encoding = dupcat(reg, "-", enc, NULL); - inst->fontinfo[fontid].charset = charset_from_xenc(encoding); + retval = inst->fontinfo[fontid].charset = + charset_from_xenc(encoding); /* FIXME: when libcharset supports wide encodings fix this. */ - if (!strcasecmp(encoding, "iso10646-1")) + if (!strcasecmp(encoding, "iso10646-1")) { inst->fontinfo[fontid].is_wide = 1; + retval = CS_UTF8; + } /* * Hack for X line-drawing characters: if the primary @@ -2148,19 +2182,11 @@ static void set_font_info(struct gui_data *inst, int fontid) inst->fontinfo[fontid].charset = CS_ISO8859_1_X11; } - /* - * FIXME: this is a hack. Currently fonts with - * incomprehensible encodings are dealt with by - * pretending they're 8859-1. It's ugly, but it's good - * enough to stop things crashing. Should do something - * better here. - */ - if (inst->fontinfo[fontid].charset == CS_NONE) - inst->fontinfo[fontid].charset = CS_ISO8859_1; - sfree(encoding); } } + + return retval; } int main(int argc, char **argv) @@ -2168,6 +2194,7 @@ int main(int argc, char **argv) extern int pty_master_fd; /* declared in pty.c */ extern void pty_pre_init(void); /* declared in pty.c */ struct gui_data *inst; + int font_charset; /* defer any child exit handling until we're ready to deal with * it */ @@ -2195,7 +2222,7 @@ int main(int argc, char **argv) fprintf(stderr, "pterm: unable to load font \"%s\"\n", cfg.font); exit(1); } - set_font_info(inst, 0); + font_charset = set_font_info(inst, 0); if (cfg.boldfont[0]) { inst->fonts[1] = gdk_font_load(cfg.boldfont); if (!inst->fonts[1]) { @@ -2233,7 +2260,7 @@ int main(int argc, char **argv) inst->compound_text_atom = gdk_atom_intern("COMPOUND_TEXT", FALSE); inst->utf8_string_atom = gdk_atom_intern("UTF8_STRING", FALSE); - init_ucs(); + init_ucs(font_charset); inst->window = gtk_window_new(GTK_WINDOW_TOPLEVEL); diff --git a/unix/unix.h b/unix/unix.h index 9aa044d1..ab5dc88c 100644 --- a/unix/unix.h +++ b/unix/unix.h @@ -66,4 +66,9 @@ int next_socket(int *state, int *rwx); /* BSD-semantics version of signal() */ void (*putty_signal(int sig, void (*func)(int)))(int); +/* + * Exports from unicode.c. + */ +void init_ucs(int font_charset); + #endif diff --git a/unix/uxucs.c b/unix/uxucs.c index 2bf65a8a..928acae9 100644 --- a/unix/uxucs.c +++ b/unix/uxucs.c @@ -40,6 +40,17 @@ int mb_to_wc(int codepage, int flags, char *mbstr, int mblen, setlocale(LC_CTYPE, "C"); + return n; + } else if (codepage == CS_NONE) { + int n = 0; + + while (mblen > 0) { + wcstr[n] = 0xD800 | (mbstr[0] & 0xFF); + n++; + mbstr++; + mblen--; + } + return n; } else return charset_to_unicode(&mbstr, &mblen, wcstr, wclen, codepage, @@ -73,12 +84,24 @@ int wc_to_mb(int codepage, int flags, wchar_t *wcstr, int wclen, setlocale(LC_CTYPE, "C"); return n; - } else + } else if (codepage == CS_NONE) { + int n = 0; + while (wclen > 0 && n < mblen) { + if (*wcstr >= 0xD800 && *wcstr < 0xD900) + mbstr[n++] = (*wcstr & 0xFF); + else if (defchr) + mbstr[n++] = *defchr; + wcstr++; + wclen--; + } + return n; + } else { return charset_from_unicode(&wcstr, &wclen, mbstr, mblen, codepage, NULL, NULL, 0); + } } -void init_ucs(void) +void init_ucs(int font_charset) { int i; @@ -97,14 +120,16 @@ void init_ucs(void) line_codepage = charset_from_mimeenc(cfg.line_codepage); if (line_codepage == CS_NONE) line_codepage = charset_from_xenc(cfg.line_codepage); - /* If it's still CS_NONE, we should assume direct-to-font. */ - /* FIXME: this is a hack. Currently fonts with incomprehensible - * encodings are dealt with by pretending they're 8859-1. It's - * ugly, but it's good enough to stop things crashing. Should do - * something better here. */ + /* + * If line_codepage is _still_ CS_NONE, we assume we're using + * the font's own encoding. This has been passed in to us, so + * we use that. If it's still CS_NONE after _that_ - i.e. the + * font we were given had an incomprehensible charset - then we + * fall back to using the D800 page. + */ if (line_codepage == CS_NONE) - line_codepage = CS_ISO8859_1; + line_codepage = font_charset; /* * Set up unitab_line, by translating each individual character @@ -117,7 +142,10 @@ void init_ucs(void) c[0] = i; p = c; len = 1; - if (1 == charset_to_unicode(&p,&len,wc,1,line_codepage,NULL,L"",0)) + if (line_codepage == CS_NONE) + unitab_line[i] = 0xD800 | i; + else if (1 == charset_to_unicode(&p, &len, wc, 1, line_codepage, + NULL, L"", 0)) unitab_line[i] = wc[0]; else unitab_line[i] = 0xFFFD; @@ -157,17 +185,25 @@ void init_ucs(void) c[0] = i; p = c; len = 1; - if (1 == charset_to_unicode(&p,&len,wc,1,CS_CP437,NULL,L"",0)) + if (1 == charset_to_unicode(&p, &len, wc, 1, CS_CP437, NULL, L"", 0)) unitab_scoacs[i] = wc[0]; else unitab_scoacs[i] = 0xFFFD; } - /* Find the line control characters. */ - for (i = 0; i < 256; i++) - if (unitab_line[i] < ' ' - || (unitab_line[i] >= 0x7F && unitab_line[i] < 0xA0)) + /* + * Find the control characters in the line codepage. For + * direct-to-font mode using the D800 hack, we assume 00-1F and + * 7F are controls, but allow 80-9F through. (It's as good a + * guess as anything; and my bet is that half the weird fonts + * used in this way will be IBM or MS code pages anyway.) + */ + for (i = 0; i < 256; i++) { + int lineval = unitab_line[i]; + if (lineval < ' ' || (lineval >= 0x7F && lineval < 0xA0) || + (lineval >= 0xD800 && lineval < 0xD820) || (lineval == 0xD87F)) unitab_ctrl[i] = i; else unitab_ctrl[i] = 0xFF; + } } diff --git a/winstuff.h b/winstuff.h index 06fb9440..e2d700ee 100644 --- a/winstuff.h +++ b/winstuff.h @@ -189,4 +189,9 @@ void force_normal(HWND hwnd); void UpdateSizeTip(HWND src, int cx, int cy); void EnableSizeTip(int bEnable); +/* + * Exports from unicode.c. + */ +void init_ucs(void); + #endif