mirror of
https://git.tartarus.org/simon/putty.git
synced 2025-06-30 11:02:48 -05:00
First draft of Unicode support in pterm. It's pretty complete: it
does UTF-8 copy and paste (falling back to normal strings if necessary), it understands X font encodings and translates things accordingly so that if you have a Unicode font you can ask for virtually any single-byte encoding and get it (Mac-Roman pterm, anyone?), and so on. There's work left to be done (wide fonts for CJK spring to mind), but I reckon this is a pretty good start. [originally from svn r2395]
This commit is contained in:
22
unix/pterm.1
22
unix/pterm.1
@ -90,6 +90,20 @@ to specify it explicitly if you have changed the default using the
|
||||
.IP "\fB\-log\fP \fIfilename\fP"
|
||||
This option makes \fIpterm\fP log all the terminal output to a file
|
||||
as well as displaying it in the terminal.
|
||||
.IP "\fB\-cs\fP \fIcharset\fP"
|
||||
This option specifies the character set in which \fIpterm\fP should
|
||||
assume the session is operating. This character set will be used to
|
||||
interpret all the data received from the session, and all input you
|
||||
type or paste into \fIpterm\fP will be converted into this character
|
||||
set before being sent to the session.
|
||||
|
||||
Any character set name which is valid in a MIME header (and
|
||||
supported by \fIpterm\fP) should be valid here (examples are
|
||||
"ISO-8859-1", "windows-1252" or "UTF-8"). Also, any character
|
||||
encoding which is valid in an X logical font description should be
|
||||
valid ("ibm-cp437", for example).
|
||||
|
||||
Character set names are case-insensitive.
|
||||
.IP "\fB\-nethack\fP"
|
||||
Tells \fIpterm\fP to enable NetHack keypad mode, in which the
|
||||
numeric keypad generates the NetHack "hjklyubn" direction keys. This
|
||||
@ -385,6 +399,14 @@ reset to the very bottom.
|
||||
This option should be set to either 0 or 1; the default is 1. When
|
||||
set to 1, any activity in the display causes the position of the
|
||||
scrollback to be reset to the very bottom.
|
||||
.IP "\fBpterm.LineCodePage\fP"
|
||||
This option specifies the character set to be used for the session.
|
||||
This is the same as the \fI\-cs\fP command-line option.
|
||||
.IP "\fBpterm.NoRemoteCharset\fP"
|
||||
This option disables the terminal's ability to change its character
|
||||
set when it receives escape sequences telling it to. You might need
|
||||
to do this to interoperate with programs which incorrectly change
|
||||
the character set to something they think is sensible.
|
||||
.IP "\fBpterm.BCE\fP"
|
||||
This option should be set to either 0 or 1; the default is 1. When
|
||||
set to 1, the various control sequences that erase parts of the
|
||||
|
206
unix/pterm.c
206
unix/pterm.c
@ -24,6 +24,7 @@
|
||||
#include <X11/Xutil.h>
|
||||
|
||||
#define PUTTY_DO_GLOBALS /* actually _define_ globals */
|
||||
|
||||
#include "putty.h"
|
||||
#include "terminal.h"
|
||||
|
||||
@ -39,18 +40,22 @@ struct gui_data {
|
||||
GtkAdjustment *sbar_adjust;
|
||||
GdkPixmap *pixmap;
|
||||
GdkFont *fonts[2]; /* normal and bold (for now!) */
|
||||
struct {
|
||||
int charset;
|
||||
int is_wide;
|
||||
} fontinfo[2];
|
||||
GdkCursor *rawcursor, *textcursor, *blankcursor, *currcursor;
|
||||
GdkColor cols[NCOLOURS];
|
||||
GdkColormap *colmap;
|
||||
wchar_t *pastein_data;
|
||||
int pastein_data_len;
|
||||
char *pasteout_data;
|
||||
int pasteout_data_len;
|
||||
char *pasteout_data, *pasteout_data_utf8;
|
||||
int pasteout_data_len, pasteout_data_utf8_len;
|
||||
int font_width, font_height;
|
||||
int ignore_sbar;
|
||||
int mouseptr_visible;
|
||||
guint term_paste_idle_id;
|
||||
GdkAtom compound_text_atom;
|
||||
GdkAtom compound_text_atom, utf8_string_atom;
|
||||
int alt_keycode;
|
||||
int alt_digits;
|
||||
char wintitle[sizeof(((Config *)0)->wintitle)];
|
||||
@ -831,7 +836,19 @@ gint key_event(GtkWidget *widget, GdkEventKey *event, gpointer data)
|
||||
printf("\n");
|
||||
#endif
|
||||
|
||||
ldisc_send(inst->ldisc, output+start, end-start, 1);
|
||||
/*
|
||||
* The stuff we've just generated is assumed to be
|
||||
* ISO-8859-1! This sounds insane, but `man XLookupString'
|
||||
* agrees: strings of this type returned from the X server
|
||||
* are hardcoded to 8859-1. Strictly speaking we should be
|
||||
* doing this using some sort of GtkIMContext, which (if
|
||||
* we're lucky) would give us our data directly in Unicode;
|
||||
* but that's not supported in GTK 1.2 as far as I can
|
||||
* tell, and it's poorly documented even in 2.0, so it'll
|
||||
* have to wait.
|
||||
*/
|
||||
lpage_send(inst->ldisc, CS_ISO8859_1, output+start, end-start, 1);
|
||||
|
||||
show_mouseptr(inst, 0);
|
||||
term_seen_key_event(inst->term);
|
||||
term_out(inst->term);
|
||||
@ -1198,9 +1215,26 @@ void write_clip(void *frontend, wchar_t * data, int len, int must_deselect)
|
||||
struct gui_data *inst = (struct gui_data *)frontend;
|
||||
if (inst->pasteout_data)
|
||||
sfree(inst->pasteout_data);
|
||||
if (inst->pasteout_data_utf8)
|
||||
sfree(inst->pasteout_data_utf8);
|
||||
|
||||
inst->pasteout_data_utf8 = smalloc(len*6);
|
||||
inst->pasteout_data_utf8_len = len*6;
|
||||
{
|
||||
wchar_t *tmp = data;
|
||||
int tmplen = len;
|
||||
inst->pasteout_data_utf8_len =
|
||||
charset_from_unicode(&tmp, &tmplen, inst->pasteout_data_utf8,
|
||||
inst->pasteout_data_utf8_len,
|
||||
CS_UTF8, NULL, NULL, 0);
|
||||
inst->pasteout_data_utf8 =
|
||||
srealloc(inst->pasteout_data_utf8, inst->pasteout_data_utf8_len);
|
||||
}
|
||||
|
||||
inst->pasteout_data = smalloc(len);
|
||||
inst->pasteout_data_len = len;
|
||||
wc_to_mb(0, 0, data, len, inst->pasteout_data, inst->pasteout_data_len,
|
||||
wc_to_mb(line_codepage, 0, data, len,
|
||||
inst->pasteout_data, inst->pasteout_data_len,
|
||||
NULL, NULL);
|
||||
|
||||
if (gtk_selection_owner_set(inst->area, GDK_SELECTION_PRIMARY,
|
||||
@ -1209,6 +1243,8 @@ void write_clip(void *frontend, wchar_t * data, int len, int must_deselect)
|
||||
GDK_SELECTION_TYPE_STRING, 1);
|
||||
gtk_selection_add_target(inst->area, GDK_SELECTION_PRIMARY,
|
||||
inst->compound_text_atom, 1);
|
||||
gtk_selection_add_target(inst->area, GDK_SELECTION_PRIMARY,
|
||||
inst->utf8_string_atom, 1);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1216,8 +1252,13 @@ void selection_get(GtkWidget *widget, GtkSelectionData *seldata,
|
||||
guint info, guint time_stamp, gpointer data)
|
||||
{
|
||||
struct gui_data *inst = (struct gui_data *)data;
|
||||
gtk_selection_data_set(seldata, GDK_SELECTION_TYPE_STRING, 8,
|
||||
inst->pasteout_data, inst->pasteout_data_len);
|
||||
if (seldata->target == inst->utf8_string_atom)
|
||||
gtk_selection_data_set(seldata, seldata->target, 8,
|
||||
inst->pasteout_data_utf8,
|
||||
inst->pasteout_data_utf8_len);
|
||||
else
|
||||
gtk_selection_data_set(seldata, seldata->target, 8,
|
||||
inst->pasteout_data, inst->pasteout_data_len);
|
||||
}
|
||||
|
||||
gint selection_clear(GtkWidget *widget, GdkEventSelection *seldata,
|
||||
@ -1227,8 +1268,12 @@ gint selection_clear(GtkWidget *widget, GdkEventSelection *seldata,
|
||||
term_deselect(inst->term);
|
||||
if (inst->pasteout_data)
|
||||
sfree(inst->pasteout_data);
|
||||
if (inst->pasteout_data_utf8)
|
||||
sfree(inst->pasteout_data_utf8);
|
||||
inst->pasteout_data = NULL;
|
||||
inst->pasteout_data_len = 0;
|
||||
inst->pasteout_data_utf8 = NULL;
|
||||
inst->pasteout_data_utf8_len = 0;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
@ -1240,8 +1285,16 @@ void request_paste(void *frontend)
|
||||
* moment is to call gtk_selection_convert(), and when the data
|
||||
* comes back _then_ we can call term_do_paste().
|
||||
*/
|
||||
|
||||
/*
|
||||
* First we attempt to retrieve the selection as a UTF-8 string
|
||||
* (which we will convert to the correct code page before
|
||||
* sending to the session, of course). If that fails,
|
||||
* selection_received() will be informed and will fall back to
|
||||
* an ordinary string.
|
||||
*/
|
||||
gtk_selection_convert(inst->area, GDK_SELECTION_PRIMARY,
|
||||
GDK_SELECTION_TYPE_STRING, GDK_CURRENT_TIME);
|
||||
inst->utf8_string_atom, GDK_CURRENT_TIME);
|
||||
}
|
||||
|
||||
gint idle_paste_func(gpointer data); /* forward ref */
|
||||
@ -1251,8 +1304,22 @@ void selection_received(GtkWidget *widget, GtkSelectionData *seldata,
|
||||
{
|
||||
struct gui_data *inst = (struct gui_data *)data;
|
||||
|
||||
if (seldata->target == inst->utf8_string_atom && seldata->length <= 0) {
|
||||
/*
|
||||
* Failed to get a UTF-8 selection string. Try an ordinary
|
||||
* string.
|
||||
*/
|
||||
gtk_selection_convert(inst->area, GDK_SELECTION_PRIMARY,
|
||||
GDK_SELECTION_TYPE_STRING, GDK_CURRENT_TIME);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Any other failure should just go foom.
|
||||
*/
|
||||
if (seldata->length <= 0 ||
|
||||
seldata->type != GDK_SELECTION_TYPE_STRING)
|
||||
(seldata->type != GDK_SELECTION_TYPE_STRING &&
|
||||
seldata->type != inst->utf8_string_atom))
|
||||
return; /* Nothing happens. */
|
||||
|
||||
if (inst->pastein_data)
|
||||
@ -1260,8 +1327,11 @@ void selection_received(GtkWidget *widget, GtkSelectionData *seldata,
|
||||
|
||||
inst->pastein_data = smalloc(seldata->length * sizeof(wchar_t));
|
||||
inst->pastein_data_len = seldata->length;
|
||||
mb_to_wc(0, 0, seldata->data, seldata->length,
|
||||
inst->pastein_data, inst->pastein_data_len);
|
||||
inst->pastein_data_len =
|
||||
mb_to_wc((seldata->type == inst->utf8_string_atom ?
|
||||
CS_UTF8 : line_codepage),
|
||||
0, seldata->data, seldata->length,
|
||||
inst->pastein_data, inst->pastein_data_len);
|
||||
|
||||
term_do_paste(inst->term);
|
||||
|
||||
@ -1457,10 +1527,45 @@ void do_text_internal(Context ctx, int x, int y, char *text, int len,
|
||||
rlen*inst->font_width, inst->font_height);
|
||||
|
||||
gdk_gc_set_foreground(gc, &inst->cols[nfg]);
|
||||
gdk_draw_text(inst->pixmap, inst->fonts[fontid], gc,
|
||||
x*inst->font_width+cfg.window_border,
|
||||
y*inst->font_height+cfg.window_border+inst->fonts[0]->ascent,
|
||||
text, len);
|
||||
{
|
||||
GdkWChar *gwcs;
|
||||
gchar *gcs;
|
||||
wchar_t *wcs;
|
||||
int i;
|
||||
|
||||
wcs = smalloc(sizeof(wchar_t) * (len+1));
|
||||
for (i = 0; i < len; i++) {
|
||||
wcs[i] = (wchar_t) ((attr & CSET_MASK) + (text[i] & CHAR_MASK));
|
||||
}
|
||||
|
||||
if (inst->fontinfo[fontid].is_wide) {
|
||||
gwcs = smalloc(sizeof(GdkWChar) * (len+1));
|
||||
/*
|
||||
* FIXME: when we have a wide-char equivalent of
|
||||
* from_unicode, use it instead of this.
|
||||
*/
|
||||
for (i = 0; i <= len; i++)
|
||||
gwcs[i] = wcs[i];
|
||||
gdk_draw_text_wc(inst->pixmap, inst->fonts[fontid], gc,
|
||||
x*inst->font_width+cfg.window_border,
|
||||
y*inst->font_height+cfg.window_border+inst->fonts[0]->ascent,
|
||||
gwcs, len*2);
|
||||
sfree(gwcs);
|
||||
} else {
|
||||
wchar_t *wcstmp = wcs;
|
||||
int lentmp = len;
|
||||
gcs = smalloc(sizeof(GdkWChar) * (len+1));
|
||||
charset_from_unicode(&wcstmp, &lentmp, gcs, len,
|
||||
inst->fontinfo[fontid].charset,
|
||||
NULL, ".", 1);
|
||||
gdk_draw_text(inst->pixmap, inst->fonts[fontid], gc,
|
||||
x*inst->font_width+cfg.window_border,
|
||||
y*inst->font_height+cfg.window_border+inst->fonts[0]->ascent,
|
||||
gcs, len);
|
||||
sfree(gcs);
|
||||
}
|
||||
sfree(wcs);
|
||||
}
|
||||
|
||||
if (shadow) {
|
||||
gdk_draw_text(inst->pixmap, inst->fonts[fontid], gc,
|
||||
@ -1818,6 +1923,12 @@ int do_cmdline(int argc, char **argv, int do_everything)
|
||||
strncpy(cfg.boldfont, val, sizeof(cfg.boldfont));
|
||||
cfg.boldfont[sizeof(cfg.boldfont)-1] = '\0';
|
||||
|
||||
} else if (!strcmp(p, "-cs")) {
|
||||
EXPECTS_ARG;
|
||||
SECOND_PASS_ONLY;
|
||||
strncpy(cfg.line_codepage, val, sizeof(cfg.line_codepage));
|
||||
cfg.line_codepage[sizeof(cfg.line_codepage)-1] = '\0';
|
||||
|
||||
} else if (!strcmp(p, "-geometry")) {
|
||||
int flags, x, y, w, h;
|
||||
EXPECTS_ARG;
|
||||
@ -1955,6 +2066,68 @@ static void block_signal(int sig, int block_it) {
|
||||
}
|
||||
}
|
||||
|
||||
static void set_font_info(struct gui_data *inst, int fontid)
|
||||
{
|
||||
GdkFont *font = inst->fonts[fontid];
|
||||
XFontStruct *xfs = GDK_FONT_XFONT(font);
|
||||
Display *disp = GDK_FONT_XDISPLAY(font);
|
||||
Atom charset_registry, charset_encoding;
|
||||
unsigned long registry_ret, encoding_ret;
|
||||
charset_registry = XInternAtom(disp, "CHARSET_REGISTRY", False);
|
||||
charset_encoding = XInternAtom(disp, "CHARSET_ENCODING", False);
|
||||
inst->fontinfo[fontid].charset = CS_NONE;
|
||||
inst->fontinfo[fontid].is_wide = 0;
|
||||
if (XGetFontProperty(xfs, charset_registry, ®istry_ret) &&
|
||||
XGetFontProperty(xfs, charset_encoding, &encoding_ret)) {
|
||||
char *reg, *enc;
|
||||
reg = XGetAtomName(disp, (Atom)registry_ret);
|
||||
enc = XGetAtomName(disp, (Atom)encoding_ret);
|
||||
if (reg && enc) {
|
||||
char *encoding = dupcat(reg, "-", enc, NULL);
|
||||
inst->fontinfo[fontid].charset = charset_from_xenc(encoding);
|
||||
/* FIXME: when libcharset supports wide encodings fix this. */
|
||||
if (!strcasecmp(encoding, "iso10646-1"))
|
||||
inst->fontinfo[fontid].is_wide = 1;
|
||||
|
||||
/*
|
||||
* Hack for X line-drawing characters: if the primary
|
||||
* font is encoded as ISO-8859-anything, and has valid
|
||||
* glyphs in the first 32 char positions, it is assumed
|
||||
* that those glyphs are the VT100 line-drawing
|
||||
* character set.
|
||||
*
|
||||
* Actually, we'll hack even harder by only checking
|
||||
* position 0x19 (vertical line, VT100 linedrawing
|
||||
* `x'). Then we can check it easily by seeing if the
|
||||
* ascent and descent differ.
|
||||
*/
|
||||
if (inst->fontinfo[fontid].charset == CS_ISO8859_1) {
|
||||
int lb, rb, wid, asc, desc;
|
||||
gchar text[2];
|
||||
|
||||
text[1] = '\0';
|
||||
text[0] = '\x12';
|
||||
gdk_string_extents(inst->fonts[fontid], text,
|
||||
&lb, &rb, &wid, &asc, &desc);
|
||||
if (asc != desc)
|
||||
inst->fontinfo[fontid].charset = CS_ISO8859_1_X11;
|
||||
}
|
||||
|
||||
/*
|
||||
* FIXME: this is a hack. Currently fonts with
|
||||
* incomprehensible encodings are dealt with by
|
||||
* pretending they're 8859-1. It's ugly, but it's good
|
||||
* enough to stop things crashing. Should do something
|
||||
* better here.
|
||||
*/
|
||||
if (inst->fontinfo[fontid].charset == CS_NONE)
|
||||
inst->fontinfo[fontid].charset = CS_ISO8859_1;
|
||||
|
||||
sfree(encoding);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
extern int pty_master_fd; /* declared in pty.c */
|
||||
@ -1987,6 +2160,7 @@ int main(int argc, char **argv)
|
||||
fprintf(stderr, "pterm: unable to load font \"%s\"\n", cfg.font);
|
||||
exit(1);
|
||||
}
|
||||
set_font_info(inst, 0);
|
||||
if (cfg.boldfont[0]) {
|
||||
inst->fonts[1] = gdk_font_load(cfg.boldfont);
|
||||
if (!inst->fonts[1]) {
|
||||
@ -1994,6 +2168,7 @@ int main(int argc, char **argv)
|
||||
cfg.boldfont);
|
||||
exit(1);
|
||||
}
|
||||
set_font_info(inst, 1);
|
||||
} else
|
||||
inst->fonts[1] = NULL;
|
||||
|
||||
@ -2001,6 +2176,7 @@ int main(int argc, char **argv)
|
||||
inst->font_height = inst->fonts[0]->ascent + inst->fonts[0]->descent;
|
||||
|
||||
inst->compound_text_atom = gdk_atom_intern("COMPOUND_TEXT", FALSE);
|
||||
inst->utf8_string_atom = gdk_atom_intern("UTF8_STRING", FALSE);
|
||||
|
||||
init_ucs();
|
||||
|
||||
|
13
unix/unix.h
13
unix/unix.h
@ -1,6 +1,8 @@
|
||||
#ifndef PUTTY_UNIX_H
|
||||
#define PUTTY_UNIX_H
|
||||
|
||||
#include "charset.h"
|
||||
|
||||
typedef void *Context; /* FIXME: probably needs changing */
|
||||
|
||||
extern Backend pty_backend;
|
||||
@ -47,7 +49,16 @@ int select_result(int fd, int event);
|
||||
int first_socket(int *state, int *rwx);
|
||||
int next_socket(int *state, int *rwx);
|
||||
|
||||
#define DEFAULT_CODEPAGE 0 /* FIXME: no idea how to do this */
|
||||
/*
|
||||
* In the Unix Unicode layer, DEFAULT_CODEPAGE is a special value
|
||||
* which causes mb_to_wc and wc_to_mb to call _libc_ rather than
|
||||
* libcharset. That way, we can interface the various charsets
|
||||
* supported by libcharset with the one supported by mbstowcs and
|
||||
* wcstombs (which will be the character set in which stuff read
|
||||
* from the command line or config files is assumed to be encoded).
|
||||
*/
|
||||
#define DEFAULT_CODEPAGE 0xFFFF
|
||||
#define CP_UTF8 CS_UTF8 /* from libcharset */
|
||||
|
||||
#define strnicmp strncasecmp
|
||||
#define stricmp strcasecmp
|
||||
|
166
unix/uxucs.c
166
unix/uxucs.c
@ -1,17 +1,18 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <ctype.h>
|
||||
#include <locale.h>
|
||||
#include <limits.h>
|
||||
#include <wchar.h>
|
||||
|
||||
#include <time.h>
|
||||
|
||||
#include "putty.h"
|
||||
#include "terminal.h"
|
||||
#include "misc.h"
|
||||
|
||||
/*
|
||||
* Unix Unicode-handling routines.
|
||||
*
|
||||
* FIXME: currently trivial stub versions assuming all codepages
|
||||
* are ISO8859-1.
|
||||
*/
|
||||
|
||||
int is_dbcs_leadbyte(int codepage, char byte)
|
||||
@ -22,48 +23,151 @@ int is_dbcs_leadbyte(int codepage, char byte)
|
||||
int mb_to_wc(int codepage, int flags, char *mbstr, int mblen,
|
||||
wchar_t *wcstr, int wclen)
|
||||
{
|
||||
int ret = 0;
|
||||
while (mblen > 0 && wclen > 0) {
|
||||
*wcstr++ = (unsigned char) *mbstr++;
|
||||
mblen--, wclen--, ret++;
|
||||
}
|
||||
return ret; /* FIXME: check error codes! */
|
||||
if (codepage == DEFAULT_CODEPAGE) {
|
||||
int n = 0;
|
||||
mbstate_t state = { 0 };
|
||||
|
||||
setlocale(LC_CTYPE, "");
|
||||
|
||||
while (mblen > 0) {
|
||||
size_t i = mbrtowc(wcstr+n, mbstr, (size_t)mblen, &state);
|
||||
if (i == (size_t)-1 || i == (size_t)-2)
|
||||
break;
|
||||
n++;
|
||||
mbstr += i;
|
||||
mblen -= i;
|
||||
}
|
||||
|
||||
setlocale(LC_CTYPE, "C");
|
||||
|
||||
return n;
|
||||
} else
|
||||
return charset_to_unicode(&mbstr, &mblen, wcstr, wclen, codepage,
|
||||
NULL, NULL, 0);
|
||||
}
|
||||
|
||||
int wc_to_mb(int codepage, int flags, wchar_t *wcstr, int wclen,
|
||||
char *mbstr, int mblen, char *defchr, int *defused)
|
||||
{
|
||||
int ret = 0;
|
||||
/* FIXME: we should remove the defused param completely... */
|
||||
if (defused)
|
||||
*defused = 0;
|
||||
while (mblen > 0 && wclen > 0) {
|
||||
if (*wcstr >= 0x100) {
|
||||
if (defchr)
|
||||
*mbstr++ = *defchr;
|
||||
else
|
||||
*mbstr++ = '.';
|
||||
if (defused)
|
||||
*defused = 1;
|
||||
} else
|
||||
*mbstr++ = (unsigned char) *wcstr;
|
||||
wcstr++;
|
||||
mblen--, wclen--, ret++;
|
||||
}
|
||||
return ret; /* FIXME: check error codes! */
|
||||
|
||||
if (codepage == DEFAULT_CODEPAGE) {
|
||||
char output[MB_LEN_MAX];
|
||||
mbstate_t state = { 0 };
|
||||
int n = 0;
|
||||
|
||||
setlocale(LC_CTYPE, "");
|
||||
|
||||
while (wclen > 0) {
|
||||
int i = wcrtomb(output, wcstr[0], &state);
|
||||
if (i == (size_t)-1 || i > n - mblen)
|
||||
break;
|
||||
memcpy(mbstr+n, output, i);
|
||||
n += i;
|
||||
wcstr++;
|
||||
wclen--;
|
||||
}
|
||||
|
||||
setlocale(LC_CTYPE, "C");
|
||||
|
||||
return n;
|
||||
} else
|
||||
return charset_from_unicode(&wcstr, &wclen, mbstr, mblen, codepage,
|
||||
NULL, NULL, 0);
|
||||
}
|
||||
|
||||
void init_ucs(void)
|
||||
{
|
||||
int i;
|
||||
/* Find the line control characters. FIXME: this is not right. */
|
||||
|
||||
/*
|
||||
* In the platform-independent parts of the code, font_codepage
|
||||
* is used only for system DBCS support - which we don't
|
||||
* support at all. So we set this to something which will never
|
||||
* be used.
|
||||
*/
|
||||
font_codepage = -1;
|
||||
|
||||
/*
|
||||
* line_codepage should be decoded from the specification in
|
||||
* cfg.
|
||||
*/
|
||||
line_codepage = charset_from_mimeenc(cfg.line_codepage);
|
||||
if (line_codepage == CS_NONE)
|
||||
line_codepage = charset_from_xenc(cfg.line_codepage);
|
||||
/* If it's still CS_NONE, we should assume direct-to-font. */
|
||||
|
||||
/* FIXME: this is a hack. Currently fonts with incomprehensible
|
||||
* encodings are dealt with by pretending they're 8859-1. It's
|
||||
* ugly, but it's good enough to stop things crashing. Should do
|
||||
* something better here. */
|
||||
if (line_codepage == CS_NONE)
|
||||
line_codepage = CS_ISO8859_1;
|
||||
|
||||
/*
|
||||
* Set up unitab_line, by translating each individual character
|
||||
* in the line codepage into Unicode.
|
||||
*/
|
||||
for (i = 0; i < 256; i++) {
|
||||
char c[1], *p;
|
||||
wchar_t wc[1];
|
||||
int len;
|
||||
c[0] = i;
|
||||
p = c;
|
||||
len = 1;
|
||||
if (1 == charset_to_unicode(&p,&len,wc,1,line_codepage,NULL,L"",0))
|
||||
unitab_line[i] = wc[0];
|
||||
else
|
||||
unitab_line[i] = 0xFFFD;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set up unitab_xterm. This is the same as unitab_line except
|
||||
* in the line-drawing regions, where it follows the Unicode
|
||||
* encoding.
|
||||
*
|
||||
* (Note that the strange X encoding of line-drawing characters
|
||||
* in the bottom 32 glyphs of ISO8859-1 fonts is taken care of
|
||||
* by the font encoding, which will spot such a font and act as
|
||||
* if it were in a variant encoding of ISO8859-1.)
|
||||
*/
|
||||
for (i = 0; i < 256; i++) {
|
||||
static const wchar_t unitab_xterm_std[32] = {
|
||||
0x2666, 0x2592, 0x2409, 0x240c, 0x240d, 0x240a, 0x00b0, 0x00b1,
|
||||
0x2424, 0x240b, 0x2518, 0x2510, 0x250c, 0x2514, 0x253c, 0x23ba,
|
||||
0x23bb, 0x2500, 0x23bc, 0x23bd, 0x251c, 0x2524, 0x2534, 0x252c,
|
||||
0x2502, 0x2264, 0x2265, 0x03c0, 0x2260, 0x00a3, 0x00b7, 0x0020
|
||||
};
|
||||
if (i >= 0x5F && i < 0x7F)
|
||||
unitab_xterm[i] = unitab_xterm_std[i & 0x1F];
|
||||
else
|
||||
unitab_xterm[i] = unitab_line[i];
|
||||
}
|
||||
|
||||
/*
|
||||
* Set up unitab_scoacs. The SCO Alternate Character Set is
|
||||
* simply CP437.
|
||||
*/
|
||||
for (i = 0; i < 256; i++) {
|
||||
char c[1], *p;
|
||||
wchar_t wc[1];
|
||||
int len;
|
||||
c[0] = i;
|
||||
p = c;
|
||||
len = 1;
|
||||
if (1 == charset_to_unicode(&p,&len,wc,1,CS_CP437,NULL,L"",0))
|
||||
unitab_scoacs[i] = wc[0];
|
||||
else
|
||||
unitab_scoacs[i] = 0xFFFD;
|
||||
}
|
||||
|
||||
/* Find the line control characters. */
|
||||
for (i = 0; i < 256; i++)
|
||||
if (i < ' ' || (i >= 0x7F && i < 0xA0))
|
||||
if (unitab_line[i] < ' '
|
||||
|| (unitab_line[i] >= 0x7F && unitab_line[i] < 0xA0))
|
||||
unitab_ctrl[i] = i;
|
||||
else
|
||||
unitab_ctrl[i] = 0xFF;
|
||||
|
||||
for (i = 0; i < 256; i++) {
|
||||
unitab_line[i] = unitab_scoacs[i] = i;
|
||||
unitab_xterm[i] = (i >= 0x5F && i < 0x7F) ? ((i+1) & 0x1F) : i;
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user