First draft of Unicode support in pterm. It's pretty complete: it

does UTF-8 copy and paste (falling back to normal strings if necessary), it understands X font encodings and translates things accordingly so that if you have a Unicode font you can ask for virtually any single-byte encoding and get it (Mac-Roman pterm, anyone?), and so on. There's work left to be done (wide fonts for CJK spring to mind), but I reckon this is a pretty good start. [originally from svn r2395]
2025-06-30 11:02:48 -05:00 · 2002-12-31 12:20:34 +00:00
parent 241570c04f
commit ad2bbc52a4
20 changed files with 2836 additions and 50 deletions
--- a/unix/pterm.1
+++ b/unix/pterm.1
@ -90,6 +90,20 @@ to specify it explicitly if you have changed the default using the
 .IP "\fB\-log\fP \fIfilename\fP"
 This option makes \fIpterm\fP log all the terminal output to a file
 as well as displaying it in the terminal.
+.IP "\fB\-cs\fP \fIcharset\fP"
+This option specifies the character set in which \fIpterm\fP should
+assume the session is operating. This character set will be used to
+interpret all the data received from the session, and all input you
+type or paste into \fIpterm\fP will be converted into this character
+set before being sent to the session.
+
+Any character set name which is valid in a MIME header (and
+supported by \fIpterm\fP) should be valid here (examples are
+"ISO-8859-1", "windows-1252" or "UTF-8"). Also, any character
+encoding which is valid in an X logical font description should be
+valid ("ibm-cp437", for example).
+
+Character set names are case-insensitive.
 .IP "\fB\-nethack\fP"
 Tells \fIpterm\fP to enable NetHack keypad mode, in which the
 numeric keypad generates the NetHack "hjklyubn" direction keys. This
@ -385,6 +399,14 @@ reset to the very bottom.
 This option should be set to either 0 or 1; the default is 1. When
 set to 1, any activity in the display causes the position of the
 scrollback to be reset to the very bottom.
+.IP "\fBpterm.LineCodePage\fP"
+This option specifies the character set to be used for the session.
+This is the same as the \fI\-cs\fP command-line option.
+.IP "\fBpterm.NoRemoteCharset\fP"
+This option disables the terminal's ability to change its character
+set when it receives escape sequences telling it to. You might need
+to do this to interoperate with programs which incorrectly change
+the character set to something they think is sensible.
 .IP "\fBpterm.BCE\fP"
 This option should be set to either 0 or 1; the default is 1. When
 set to 1, the various control sequences that erase parts of the
--- a/unix/pterm.c
+++ b/unix/pterm.c
@ -24,6 +24,7 @@
 #include <X11/Xutil.h>

 #define PUTTY_DO_GLOBALS	       /* actually _define_ globals */
+
 #include "putty.h"
 #include "terminal.h"

@ -39,18 +40,22 @@ struct gui_data {
    GtkAdjustment *sbar_adjust;
    GdkPixmap *pixmap;
    GdkFont *fonts[2];                 /* normal and bold (for now!) */
+    struct {
+	int charset;
+	int is_wide;
+    } fontinfo[2];
    GdkCursor *rawcursor, *textcursor, *blankcursor, *currcursor;
    GdkColor cols[NCOLOURS];
    GdkColormap *colmap;
    wchar_t *pastein_data;
    int pastein_data_len;
-    char *pasteout_data;
-    int pasteout_data_len;
+    char *pasteout_data, *pasteout_data_utf8;
+    int pasteout_data_len, pasteout_data_utf8_len;
    int font_width, font_height;
    int ignore_sbar;
    int mouseptr_visible;
    guint term_paste_idle_id;
-    GdkAtom compound_text_atom;
+    GdkAtom compound_text_atom, utf8_string_atom;
    int alt_keycode;
    int alt_digits;
    char wintitle[sizeof(((Config *)0)->wintitle)];
@ -831,7 +836,19 @@ gint key_event(GtkWidget *widget, GdkEventKey *event, gpointer data)
 	printf("\n");
 #endif

-	ldisc_send(inst->ldisc, output+start, end-start, 1);
+	/*
+	 * The stuff we've just generated is assumed to be
+	 * ISO-8859-1! This sounds insane, but `man XLookupString'
+	 * agrees: strings of this type returned from the X server
+	 * are hardcoded to 8859-1. Strictly speaking we should be
+	 * doing this using some sort of GtkIMContext, which (if
+	 * we're lucky) would give us our data directly in Unicode;
+	 * but that's not supported in GTK 1.2 as far as I can
+	 * tell, and it's poorly documented even in 2.0, so it'll
+	 * have to wait.
+	 */
+	lpage_send(inst->ldisc, CS_ISO8859_1, output+start, end-start, 1);
+
 	show_mouseptr(inst, 0);
 	term_seen_key_event(inst->term);
 	term_out(inst->term);
@ -1198,9 +1215,26 @@ void write_clip(void *frontend, wchar_t * data, int len, int must_deselect)
    struct gui_data *inst = (struct gui_data *)frontend;
    if (inst->pasteout_data)
 	sfree(inst->pasteout_data);
+    if (inst->pasteout_data_utf8)
+	sfree(inst->pasteout_data_utf8);
+
+    inst->pasteout_data_utf8 = smalloc(len*6);
+    inst->pasteout_data_utf8_len = len*6;
+    {
+	wchar_t *tmp = data;
+	int tmplen = len;
+	inst->pasteout_data_utf8_len =
+	    charset_from_unicode(&tmp, &tmplen, inst->pasteout_data_utf8,
+				 inst->pasteout_data_utf8_len,
+				 CS_UTF8, NULL, NULL, 0);
+	inst->pasteout_data_utf8 =
+	    srealloc(inst->pasteout_data_utf8, inst->pasteout_data_utf8_len);
+    }
+
    inst->pasteout_data = smalloc(len);
    inst->pasteout_data_len = len;
-    wc_to_mb(0, 0, data, len, inst->pasteout_data, inst->pasteout_data_len,
+    wc_to_mb(line_codepage, 0, data, len,
+	     inst->pasteout_data, inst->pasteout_data_len,
 	     NULL, NULL);

    if (gtk_selection_owner_set(inst->area, GDK_SELECTION_PRIMARY,
@ -1209,6 +1243,8 @@ void write_clip(void *frontend, wchar_t * data, int len, int must_deselect)
 				 GDK_SELECTION_TYPE_STRING, 1);
 	gtk_selection_add_target(inst->area, GDK_SELECTION_PRIMARY,
 				 inst->compound_text_atom, 1);
+	gtk_selection_add_target(inst->area, GDK_SELECTION_PRIMARY,
+				 inst->utf8_string_atom, 1);
    }
 }

@ -1216,8 +1252,13 @@ void selection_get(GtkWidget *widget, GtkSelectionData *seldata,
 		   guint info, guint time_stamp, gpointer data)
 {
    struct gui_data *inst = (struct gui_data *)data;
-    gtk_selection_data_set(seldata, GDK_SELECTION_TYPE_STRING, 8,
-			   inst->pasteout_data, inst->pasteout_data_len);
+    if (seldata->target == inst->utf8_string_atom)
+	gtk_selection_data_set(seldata, seldata->target, 8,
+			       inst->pasteout_data_utf8,
+			       inst->pasteout_data_utf8_len);
+    else
+	gtk_selection_data_set(seldata, seldata->target, 8,
+			       inst->pasteout_data, inst->pasteout_data_len);
 }

 gint selection_clear(GtkWidget *widget, GdkEventSelection *seldata,
@ -1227,8 +1268,12 @@ gint selection_clear(GtkWidget *widget, GdkEventSelection *seldata,
    term_deselect(inst->term);
    if (inst->pasteout_data)
 	sfree(inst->pasteout_data);
+    if (inst->pasteout_data_utf8)
+	sfree(inst->pasteout_data_utf8);
    inst->pasteout_data = NULL;
    inst->pasteout_data_len = 0;
+    inst->pasteout_data_utf8 = NULL;
+    inst->pasteout_data_utf8_len = 0;
    return TRUE;
 }

@ -1240,8 +1285,16 @@ void request_paste(void *frontend)
     * moment is to call gtk_selection_convert(), and when the data
     * comes back _then_ we can call term_do_paste().
     */
+
+    /*
+     * First we attempt to retrieve the selection as a UTF-8 string
+     * (which we will convert to the correct code page before
+     * sending to the session, of course). If that fails,
+     * selection_received() will be informed and will fall back to
+     * an ordinary string.
+     */
    gtk_selection_convert(inst->area, GDK_SELECTION_PRIMARY,
-			  GDK_SELECTION_TYPE_STRING, GDK_CURRENT_TIME);
+			  inst->utf8_string_atom, GDK_CURRENT_TIME);
 }

 gint idle_paste_func(gpointer data);   /* forward ref */
@ -1251,8 +1304,22 @@ void selection_received(GtkWidget *widget, GtkSelectionData *seldata,
 {
    struct gui_data *inst = (struct gui_data *)data;

+    if (seldata->target == inst->utf8_string_atom && seldata->length <= 0) {
+	/*
+	 * Failed to get a UTF-8 selection string. Try an ordinary
+	 * string.
+	 */
+	gtk_selection_convert(inst->area, GDK_SELECTION_PRIMARY,
+			      GDK_SELECTION_TYPE_STRING, GDK_CURRENT_TIME);
+	return;
+    }
+
+    /*
+     * Any other failure should just go foom.
+     */
    if (seldata->length <= 0 ||
-	seldata->type != GDK_SELECTION_TYPE_STRING)
+	(seldata->type != GDK_SELECTION_TYPE_STRING &&
+	 seldata->type != inst->utf8_string_atom))
 	return;			       /* Nothing happens. */

    if (inst->pastein_data)
@ -1260,8 +1327,11 @@ void selection_received(GtkWidget *widget, GtkSelectionData *seldata,

    inst->pastein_data = smalloc(seldata->length * sizeof(wchar_t));
    inst->pastein_data_len = seldata->length;
-    mb_to_wc(0, 0, seldata->data, seldata->length,
-	     inst->pastein_data, inst->pastein_data_len);
+    inst->pastein_data_len =
+	mb_to_wc((seldata->type == inst->utf8_string_atom ?
+		  CS_UTF8 : line_codepage),
+		 0, seldata->data, seldata->length,
+		 inst->pastein_data, inst->pastein_data_len);

    term_do_paste(inst->term);

@ -1457,10 +1527,45 @@ void do_text_internal(Context ctx, int x, int y, char *text, int len,
 		       rlen*inst->font_width, inst->font_height);

    gdk_gc_set_foreground(gc, &inst->cols[nfg]);
-    gdk_draw_text(inst->pixmap, inst->fonts[fontid], gc,
-		  x*inst->font_width+cfg.window_border,
-		  y*inst->font_height+cfg.window_border+inst->fonts[0]->ascent,
-		  text, len);
+    {
+	GdkWChar *gwcs;
+	gchar *gcs;
+	wchar_t *wcs;
+	int i;
+
+	wcs = smalloc(sizeof(wchar_t) * (len+1));
+	for (i = 0; i < len; i++) {
+	    wcs[i] = (wchar_t) ((attr & CSET_MASK) + (text[i] & CHAR_MASK));
+	}
+
+	if (inst->fontinfo[fontid].is_wide) {
+	    gwcs = smalloc(sizeof(GdkWChar) * (len+1));
+	    /*
+	     * FIXME: when we have a wide-char equivalent of
+	     * from_unicode, use it instead of this.
+	     */
+	    for (i = 0; i <= len; i++)
+		gwcs[i] = wcs[i];
+	    gdk_draw_text_wc(inst->pixmap, inst->fonts[fontid], gc,
+			     x*inst->font_width+cfg.window_border,
+			     y*inst->font_height+cfg.window_border+inst->fonts[0]->ascent,
+			     gwcs, len*2);
+	    sfree(gwcs);
+	} else {
+	    wchar_t *wcstmp = wcs;
+	    int lentmp = len;
+	    gcs = smalloc(sizeof(GdkWChar) * (len+1));
+	    charset_from_unicode(&wcstmp, &lentmp, gcs, len,
+				 inst->fontinfo[fontid].charset,
+				 NULL, ".", 1);
+	    gdk_draw_text(inst->pixmap, inst->fonts[fontid], gc,
+			  x*inst->font_width+cfg.window_border,
+			  y*inst->font_height+cfg.window_border+inst->fonts[0]->ascent,
+			  gcs, len);
+	    sfree(gcs);
+	}
+	sfree(wcs);
+    }

    if (shadow) {
 	gdk_draw_text(inst->pixmap, inst->fonts[fontid], gc,
@ -1818,6 +1923,12 @@ int do_cmdline(int argc, char **argv, int do_everything)
 	    strncpy(cfg.boldfont, val, sizeof(cfg.boldfont));
 	    cfg.boldfont[sizeof(cfg.boldfont)-1] = '\0';

+	} else if (!strcmp(p, "-cs")) {
+	    EXPECTS_ARG;
+	    SECOND_PASS_ONLY;
+	    strncpy(cfg.line_codepage, val, sizeof(cfg.line_codepage));
+	    cfg.line_codepage[sizeof(cfg.line_codepage)-1] = '\0';
+
 	} else if (!strcmp(p, "-geometry")) {
 	    int flags, x, y, w, h;
 	    EXPECTS_ARG;
@ -1955,6 +2066,68 @@ static void block_signal(int sig, int block_it) {
  }
 }

+static void set_font_info(struct gui_data *inst, int fontid)
+{
+    GdkFont *font = inst->fonts[fontid];
+    XFontStruct *xfs = GDK_FONT_XFONT(font);
+    Display *disp = GDK_FONT_XDISPLAY(font);
+    Atom charset_registry, charset_encoding;
+    unsigned long registry_ret, encoding_ret;
+    charset_registry = XInternAtom(disp, "CHARSET_REGISTRY", False);
+    charset_encoding = XInternAtom(disp, "CHARSET_ENCODING", False);
+    inst->fontinfo[fontid].charset = CS_NONE;
+    inst->fontinfo[fontid].is_wide = 0;
+    if (XGetFontProperty(xfs, charset_registry, &registry_ret) &&
+	XGetFontProperty(xfs, charset_encoding, &encoding_ret)) {
+	char *reg, *enc;
+	reg = XGetAtomName(disp, (Atom)registry_ret);
+	enc = XGetAtomName(disp, (Atom)encoding_ret);
+	if (reg && enc) {
+	    char *encoding = dupcat(reg, "-", enc, NULL);
+	    inst->fontinfo[fontid].charset = charset_from_xenc(encoding);
+	    /* FIXME: when libcharset supports wide encodings fix this. */
+	    if (!strcasecmp(encoding, "iso10646-1"))
+		inst->fontinfo[fontid].is_wide = 1;
+
+	    /*
+	     * Hack for X line-drawing characters: if the primary
+	     * font is encoded as ISO-8859-anything, and has valid
+	     * glyphs in the first 32 char positions, it is assumed
+	     * that those glyphs are the VT100 line-drawing
+	     * character set.
+	     * 
+	     * Actually, we'll hack even harder by only checking
+	     * position 0x19 (vertical line, VT100 linedrawing
+	     * `x'). Then we can check it easily by seeing if the
+	     * ascent and descent differ.
+	     */
+	    if (inst->fontinfo[fontid].charset == CS_ISO8859_1) {
+		int lb, rb, wid, asc, desc;
+		gchar text[2];
+
+		text[1] = '\0';
+		text[0] = '\x12';
+		gdk_string_extents(inst->fonts[fontid], text,
+				   &lb, &rb, &wid, &asc, &desc);
+		if (asc != desc)
+		    inst->fontinfo[fontid].charset = CS_ISO8859_1_X11;
+	    }
+
+	    /*
+	     * FIXME: this is a hack. Currently fonts with
+	     * incomprehensible encodings are dealt with by
+	     * pretending they're 8859-1. It's ugly, but it's good
+	     * enough to stop things crashing. Should do something
+	     * better here.
+	     */
+	    if (inst->fontinfo[fontid].charset == CS_NONE)
+		inst->fontinfo[fontid].charset = CS_ISO8859_1;
+
+	    sfree(encoding);
+	}
+    }
+}
+
 int main(int argc, char **argv)
 {
    extern int pty_master_fd;	       /* declared in pty.c */
@ -1987,6 +2160,7 @@ int main(int argc, char **argv)
 	fprintf(stderr, "pterm: unable to load font \"%s\"\n", cfg.font);
 	exit(1);
    }
+    set_font_info(inst, 0);
    if (cfg.boldfont[0]) {
 	inst->fonts[1] = gdk_font_load(cfg.boldfont);
 	if (!inst->fonts[1]) {
@ -1994,6 +2168,7 @@ int main(int argc, char **argv)
 		    cfg.boldfont);
 	    exit(1);
 	}
+	set_font_info(inst, 1);
    } else
 	inst->fonts[1] = NULL;

@ -2001,6 +2176,7 @@ int main(int argc, char **argv)
    inst->font_height = inst->fonts[0]->ascent + inst->fonts[0]->descent;

    inst->compound_text_atom = gdk_atom_intern("COMPOUND_TEXT", FALSE);
+    inst->utf8_string_atom = gdk_atom_intern("UTF8_STRING", FALSE);

    init_ucs();

--- a/unix/unix.h
+++ b/unix/unix.h
@ -1,6 +1,8 @@
 #ifndef PUTTY_UNIX_H
 #define PUTTY_UNIX_H

+#include "charset.h"
+
 typedef void *Context;                 /* FIXME: probably needs changing */

 extern Backend pty_backend;
@ -47,7 +49,16 @@ int select_result(int fd, int event);
 int first_socket(int *state, int *rwx);
 int next_socket(int *state, int *rwx);

-#define DEFAULT_CODEPAGE 0	       /* FIXME: no idea how to do this */
+/*
+ * In the Unix Unicode layer, DEFAULT_CODEPAGE is a special value
+ * which causes mb_to_wc and wc_to_mb to call _libc_ rather than
+ * libcharset. That way, we can interface the various charsets
+ * supported by libcharset with the one supported by mbstowcs and
+ * wcstombs (which will be the character set in which stuff read
+ * from the command line or config files is assumed to be encoded).
+ */
+#define DEFAULT_CODEPAGE 0xFFFF
+#define CP_UTF8 CS_UTF8		       /* from libcharset */

 #define strnicmp strncasecmp
 #define stricmp strcasecmp
--- a/unix/uxucs.c
+++ b/unix/uxucs.c
@ -1,17 +1,18 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <ctype.h>
+#include <locale.h>
+#include <limits.h>
+#include <wchar.h>

 #include <time.h>
+
 #include "putty.h"
 #include "terminal.h"
 #include "misc.h"

 /*
 * Unix Unicode-handling routines.
- * 
- * FIXME: currently trivial stub versions assuming all codepages
- * are ISO8859-1.
 */

 int is_dbcs_leadbyte(int codepage, char byte)
@ -22,48 +23,151 @@ int is_dbcs_leadbyte(int codepage, char byte)
 int mb_to_wc(int codepage, int flags, char *mbstr, int mblen,
 	     wchar_t *wcstr, int wclen)
 {
-    int ret = 0;
-    while (mblen > 0 && wclen > 0) {
-	*wcstr++ = (unsigned char) *mbstr++;
-	mblen--, wclen--, ret++;
-    }
-    return ret;			       /* FIXME: check error codes! */
+    if (codepage == DEFAULT_CODEPAGE) {
+	int n = 0;
+	mbstate_t state = { 0 };
+
+	setlocale(LC_CTYPE, "");
+
+	while (mblen > 0) {
+	    size_t i = mbrtowc(wcstr+n, mbstr, (size_t)mblen, &state);
+	    if (i == (size_t)-1 || i == (size_t)-2)
+		break;
+	    n++;
+	    mbstr += i;
+	    mblen -= i;
+	}
+
+	setlocale(LC_CTYPE, "C");
+
+	return n;
+    } else
+	return charset_to_unicode(&mbstr, &mblen, wcstr, wclen, codepage,
+				  NULL, NULL, 0);
 }

 int wc_to_mb(int codepage, int flags, wchar_t *wcstr, int wclen,
 	     char *mbstr, int mblen, char *defchr, int *defused)
 {
-    int ret = 0;
+    /* FIXME: we should remove the defused param completely... */
    if (defused)
 	*defused = 0;
-    while (mblen > 0 && wclen > 0) {
-	if (*wcstr >= 0x100) {
-	    if (defchr)
-		*mbstr++ = *defchr;
-	    else
-		*mbstr++ = '.';
-	    if (defused)
-		*defused = 1;
-	} else
-	    *mbstr++ = (unsigned char) *wcstr;
-	wcstr++;
-	mblen--, wclen--, ret++;
-    }
-    return ret;			       /* FIXME: check error codes! */
+
+    if (codepage == DEFAULT_CODEPAGE) {
+	char output[MB_LEN_MAX];
+	mbstate_t state = { 0 };
+	int n = 0;
+
+	setlocale(LC_CTYPE, "");
+
+	while (wclen > 0) {
+	    int i = wcrtomb(output, wcstr[0], &state);
+	    if (i == (size_t)-1 || i > n - mblen)
+		break;
+	    memcpy(mbstr+n, output, i);
+	    n += i;
+	    wcstr++;
+	    wclen--;
+	}
+
+	setlocale(LC_CTYPE, "C");
+
+	return n;
+    } else
+	return charset_from_unicode(&wcstr, &wclen, mbstr, mblen, codepage,
+				    NULL, NULL, 0);
 }

 void init_ucs(void)
 {
    int i;
-    /* Find the line control characters. FIXME: this is not right. */
+
+    /*
+     * In the platform-independent parts of the code, font_codepage
+     * is used only for system DBCS support - which we don't
+     * support at all. So we set this to something which will never
+     * be used.
+     */
+    font_codepage = -1;
+
+    /*
+     * line_codepage should be decoded from the specification in
+     * cfg.
+     */
+    line_codepage = charset_from_mimeenc(cfg.line_codepage);
+    if (line_codepage == CS_NONE)
+	line_codepage = charset_from_xenc(cfg.line_codepage);
+    /* If it's still CS_NONE, we should assume direct-to-font. */
+
+    /* FIXME: this is a hack. Currently fonts with incomprehensible
+     * encodings are dealt with by pretending they're 8859-1. It's
+     * ugly, but it's good enough to stop things crashing. Should do
+     * something better here. */
+    if (line_codepage == CS_NONE)
+	line_codepage = CS_ISO8859_1;
+
+    /*
+     * Set up unitab_line, by translating each individual character
+     * in the line codepage into Unicode.
+     */
+    for (i = 0; i < 256; i++) {
+	char c[1], *p;
+	wchar_t wc[1];
+	int len;
+	c[0] = i;
+	p = c;
+	len = 1;
+	if (1 == charset_to_unicode(&p,&len,wc,1,line_codepage,NULL,L"",0))
+	    unitab_line[i] = wc[0];
+	else
+	    unitab_line[i] = 0xFFFD;
+    }
+
+    /*
+     * Set up unitab_xterm. This is the same as unitab_line except
+     * in the line-drawing regions, where it follows the Unicode
+     * encoding.
+     * 
+     * (Note that the strange X encoding of line-drawing characters
+     * in the bottom 32 glyphs of ISO8859-1 fonts is taken care of
+     * by the font encoding, which will spot such a font and act as
+     * if it were in a variant encoding of ISO8859-1.)
+     */
+    for (i = 0; i < 256; i++) {
+	static const wchar_t unitab_xterm_std[32] = {
+	    0x2666, 0x2592, 0x2409, 0x240c, 0x240d, 0x240a, 0x00b0, 0x00b1,
+	    0x2424, 0x240b, 0x2518, 0x2510, 0x250c, 0x2514, 0x253c, 0x23ba,
+	    0x23bb, 0x2500, 0x23bc, 0x23bd, 0x251c, 0x2524, 0x2534, 0x252c,
+	    0x2502, 0x2264, 0x2265, 0x03c0, 0x2260, 0x00a3, 0x00b7, 0x0020
+	};
+	if (i >= 0x5F && i < 0x7F)
+	    unitab_xterm[i] = unitab_xterm_std[i & 0x1F];
+	else
+	    unitab_xterm[i] = unitab_line[i];
+    }
+
+    /*
+     * Set up unitab_scoacs. The SCO Alternate Character Set is
+     * simply CP437.
+     */
+    for (i = 0; i < 256; i++) {
+	char c[1], *p;
+	wchar_t wc[1];
+	int len;
+	c[0] = i;
+	p = c;
+	len = 1;
+	if (1 == charset_to_unicode(&p,&len,wc,1,CS_CP437,NULL,L"",0))
+	    unitab_scoacs[i] = wc[0];
+	else
+	    unitab_scoacs[i] = 0xFFFD;
+    }
+
+    /* Find the line control characters. */
    for (i = 0; i < 256; i++)
-	if (i < ' ' || (i >= 0x7F && i < 0xA0))
+	if (unitab_line[i] < ' '
+	    || (unitab_line[i] >= 0x7F && unitab_line[i] < 0xA0))
 	    unitab_ctrl[i] = i;
 	else
 	    unitab_ctrl[i] = 0xFF;
-
-    for (i = 0; i < 256; i++) {
-	unitab_line[i] = unitab_scoacs[i] = i;
-	unitab_xterm[i] = (i >= 0x5F && i < 0x7F) ? ((i+1) & 0x1F) : i;
-    }
 }