mirror of
https://git.tartarus.org/simon/putty.git
synced 2025-02-04 06:02:24 +00:00
Factor out term_out's character set translation.
I've moved it into a subfunction term_translate(), which I'm about to reuse elsewhere. No functional change intended.
This commit is contained in:
parent
deafaa811e
commit
3cb846e70f
280
terminal.c
280
terminal.c
@ -2866,6 +2866,153 @@ static void term_display_graphic_char(Terminal *term, unsigned long c)
|
|||||||
seen_disp_event(term);
|
seen_disp_event(term);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* UCSINCOMPLETE is returned from term_translate if it's successfully
|
||||||
|
* absorbed a byte but not emitted a complete character yet.
|
||||||
|
* UCSTRUNCATED indicates a truncated multibyte sequence (so the
|
||||||
|
* caller emits an error character and then calls term_translate again
|
||||||
|
* with the same input byte). UCSINVALID indicates some other invalid
|
||||||
|
* multibyte sequence, such as an overlong synonym, or a standalone
|
||||||
|
* continuation byte, or a completely illegal thing like 0xFE. These
|
||||||
|
* values are not stored in the terminal data structures at all.
|
||||||
|
*/
|
||||||
|
#define UCSINCOMPLETE 0x8000003FU /* '?' */
|
||||||
|
#define UCSTRUNCATED 0x80000021U /* '!' */
|
||||||
|
#define UCSINVALID 0x8000002AU /* '*' */
|
||||||
|
|
||||||
|
static unsigned long term_translate(Terminal *term, unsigned char c)
|
||||||
|
{
|
||||||
|
if (in_utf(term)) {
|
||||||
|
switch (term->utf_state) {
|
||||||
|
case 0:
|
||||||
|
if (c < 0x80) {
|
||||||
|
/* UTF-8 must be stateless so we ignore iso2022. */
|
||||||
|
if (term->ucsdata->unitab_ctrl[c] != 0xFF) {
|
||||||
|
return term->ucsdata->unitab_ctrl[c];
|
||||||
|
} else if ((term->utf8linedraw) &&
|
||||||
|
(term->cset_attr[term->cset] == CSET_LINEDRW)) {
|
||||||
|
/* Linedraw characters are explicitly enabled */
|
||||||
|
return c | CSET_LINEDRW;
|
||||||
|
} else {
|
||||||
|
return c | CSET_ASCII;
|
||||||
|
}
|
||||||
|
} else if ((c & 0xe0) == 0xc0) {
|
||||||
|
term->utf_size = term->utf_state = 1;
|
||||||
|
term->utf_char = (c & 0x1f);
|
||||||
|
} else if ((c & 0xf0) == 0xe0) {
|
||||||
|
term->utf_size = term->utf_state = 2;
|
||||||
|
term->utf_char = (c & 0x0f);
|
||||||
|
} else if ((c & 0xf8) == 0xf0) {
|
||||||
|
term->utf_size = term->utf_state = 3;
|
||||||
|
term->utf_char = (c & 0x07);
|
||||||
|
} else if ((c & 0xfc) == 0xf8) {
|
||||||
|
term->utf_size = term->utf_state = 4;
|
||||||
|
term->utf_char = (c & 0x03);
|
||||||
|
} else if ((c & 0xfe) == 0xfc) {
|
||||||
|
term->utf_size = term->utf_state = 5;
|
||||||
|
term->utf_char = (c & 0x01);
|
||||||
|
} else {
|
||||||
|
return UCSINVALID;
|
||||||
|
}
|
||||||
|
return UCSINCOMPLETE;
|
||||||
|
case 1:
|
||||||
|
case 2:
|
||||||
|
case 3:
|
||||||
|
case 4:
|
||||||
|
case 5:
|
||||||
|
if ((c & 0xC0) != 0x80) {
|
||||||
|
term->utf_state = 0;
|
||||||
|
return UCSTRUNCATED; /* caller will then give us the
|
||||||
|
* same byte again */
|
||||||
|
}
|
||||||
|
term->utf_char = (term->utf_char << 6) | (c & 0x3f);
|
||||||
|
if (--term->utf_state)
|
||||||
|
return UCSINCOMPLETE;
|
||||||
|
|
||||||
|
unsigned long t = term->utf_char;
|
||||||
|
|
||||||
|
/* Is somebody trying to be evil! */
|
||||||
|
if (t < 0x80 ||
|
||||||
|
(t < 0x800 && term->utf_size >= 2) ||
|
||||||
|
(t < 0x10000 && term->utf_size >= 3) ||
|
||||||
|
(t < 0x200000 && term->utf_size >= 4) ||
|
||||||
|
(t < 0x4000000 && term->utf_size >= 5))
|
||||||
|
return UCSINVALID;
|
||||||
|
|
||||||
|
/* Unicode line separator and paragraph separator are CR-LF */
|
||||||
|
if (t == 0x2028 || t == 0x2029)
|
||||||
|
return 0x85;
|
||||||
|
|
||||||
|
/* High controls are probably a Baaad idea too. */
|
||||||
|
if (t < 0xA0)
|
||||||
|
return 0xFFFD;
|
||||||
|
|
||||||
|
/* The UTF-16 surrogates are not nice either. */
|
||||||
|
/* The standard give the option of decoding these:
|
||||||
|
* I don't want to! */
|
||||||
|
if (t >= 0xD800 && t < 0xE000)
|
||||||
|
return UCSINVALID;
|
||||||
|
|
||||||
|
/* ISO 10646 characters now limited to UTF-16 range. */
|
||||||
|
if (t > 0x10FFFF)
|
||||||
|
return UCSINVALID;
|
||||||
|
|
||||||
|
/* This is currently a TagPhobic application.. */
|
||||||
|
if (t >= 0xE0000 && t <= 0xE007F)
|
||||||
|
return UCSINCOMPLETE;
|
||||||
|
|
||||||
|
/* U+FEFF is best seen as a null. */
|
||||||
|
if (t == 0xFEFF)
|
||||||
|
return UCSINCOMPLETE;
|
||||||
|
/* But U+FFFE is an error. */
|
||||||
|
if (t == 0xFFFE || t == 0xFFFF)
|
||||||
|
return UCSINVALID;
|
||||||
|
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
} else if (term->sco_acs &&
|
||||||
|
(c!='\033' && c!='\012' && c!='\015' && c!='\b')) {
|
||||||
|
/* Are we in the nasty ACS mode? Note: no sco in utf mode. */
|
||||||
|
if (term->sco_acs == 2)
|
||||||
|
c |= 0x80;
|
||||||
|
|
||||||
|
return c | CSET_SCOACS;
|
||||||
|
} else {
|
||||||
|
switch (term->cset_attr[term->cset]) {
|
||||||
|
/*
|
||||||
|
* Linedraw characters are different from 'ESC ( B'
|
||||||
|
* only for a small range. For ones outside that
|
||||||
|
* range, make sure we use the same font as well as
|
||||||
|
* the same encoding.
|
||||||
|
*/
|
||||||
|
case CSET_LINEDRW:
|
||||||
|
if (term->ucsdata->unitab_ctrl[c] != 0xFF)
|
||||||
|
return term->ucsdata->unitab_ctrl[c];
|
||||||
|
else
|
||||||
|
return c | CSET_LINEDRW;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case CSET_GBCHR:
|
||||||
|
/* If UK-ASCII, make the '#' a LineDraw Pound */
|
||||||
|
if (c == '#')
|
||||||
|
return '}' | CSET_LINEDRW;
|
||||||
|
/* fall through */
|
||||||
|
|
||||||
|
case CSET_ASCII:
|
||||||
|
if (term->ucsdata->unitab_ctrl[c] != 0xFF)
|
||||||
|
return term->ucsdata->unitab_ctrl[c];
|
||||||
|
else
|
||||||
|
return c | CSET_ASCII;
|
||||||
|
break;
|
||||||
|
case CSET_SCOACS:
|
||||||
|
if (c >= ' ')
|
||||||
|
return c | CSET_SCOACS;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Remove everything currently in `inbuf' and stick it up on the
|
* Remove everything currently in `inbuf' and stick it up on the
|
||||||
* in-memory display. There's a big state machine in here to
|
* in-memory display. There's a big state machine in here to
|
||||||
@ -2945,135 +3092,22 @@ static void term_out(Terminal *term)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* First see about all those translations. */
|
/* Do character-set translation. */
|
||||||
if (term->termstate == TOPLEVEL) {
|
if (term->termstate == TOPLEVEL) {
|
||||||
if (in_utf(term))
|
unsigned long t = term_translate(term, c);
|
||||||
switch (term->utf_state) {
|
switch (t) {
|
||||||
case 0:
|
case UCSINCOMPLETE:
|
||||||
if (c < 0x80) {
|
continue; /* didn't complete a multibyte char */
|
||||||
/* UTF-8 must be stateless so we ignore iso2022. */
|
case UCSTRUNCATED:
|
||||||
if (term->ucsdata->unitab_ctrl[c] != 0xFF)
|
|
||||||
c = term->ucsdata->unitab_ctrl[c];
|
|
||||||
else if ((term->utf8linedraw) &&
|
|
||||||
(term->cset_attr[term->cset] == CSET_LINEDRW))
|
|
||||||
/* Linedraw characters are explicitly enabled */
|
|
||||||
c = ((unsigned char) c) | CSET_LINEDRW;
|
|
||||||
else c = ((unsigned char)c) | CSET_ASCII;
|
|
||||||
break;
|
|
||||||
} else if ((c & 0xe0) == 0xc0) {
|
|
||||||
term->utf_size = term->utf_state = 1;
|
|
||||||
term->utf_char = (c & 0x1f);
|
|
||||||
} else if ((c & 0xf0) == 0xe0) {
|
|
||||||
term->utf_size = term->utf_state = 2;
|
|
||||||
term->utf_char = (c & 0x0f);
|
|
||||||
} else if ((c & 0xf8) == 0xf0) {
|
|
||||||
term->utf_size = term->utf_state = 3;
|
|
||||||
term->utf_char = (c & 0x07);
|
|
||||||
} else if ((c & 0xfc) == 0xf8) {
|
|
||||||
term->utf_size = term->utf_state = 4;
|
|
||||||
term->utf_char = (c & 0x03);
|
|
||||||
} else if ((c & 0xfe) == 0xfc) {
|
|
||||||
term->utf_size = term->utf_state = 5;
|
|
||||||
term->utf_char = (c & 0x01);
|
|
||||||
} else {
|
|
||||||
c = UCSERR;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
case 1:
|
|
||||||
case 2:
|
|
||||||
case 3:
|
|
||||||
case 4:
|
|
||||||
case 5:
|
|
||||||
if ((c & 0xC0) != 0x80) {
|
|
||||||
unget = c;
|
unget = c;
|
||||||
|
/* fall through */
|
||||||
|
case UCSINVALID:
|
||||||
c = UCSERR;
|
c = UCSERR;
|
||||||
term->utf_state = 0;
|
|
||||||
break;
|
break;
|
||||||
}
|
default:
|
||||||
term->utf_char = (term->utf_char << 6) | (c & 0x3f);
|
c = t;
|
||||||
if (--term->utf_state)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
c = term->utf_char;
|
|
||||||
|
|
||||||
/* Is somebody trying to be evil! */
|
|
||||||
if (c < 0x80 ||
|
|
||||||
(c < 0x800 && term->utf_size >= 2) ||
|
|
||||||
(c < 0x10000 && term->utf_size >= 3) ||
|
|
||||||
(c < 0x200000 && term->utf_size >= 4) ||
|
|
||||||
(c < 0x4000000 && term->utf_size >= 5))
|
|
||||||
c = UCSERR;
|
|
||||||
|
|
||||||
/* Unicode line separator and paragraph separator are CR-LF */
|
|
||||||
if (c == 0x2028 || c == 0x2029)
|
|
||||||
c = 0x85;
|
|
||||||
|
|
||||||
/* High controls are probably a Baaad idea too. */
|
|
||||||
if (c < 0xA0)
|
|
||||||
c = 0xFFFD;
|
|
||||||
|
|
||||||
/* The UTF-16 surrogates are not nice either. */
|
|
||||||
/* The standard give the option of decoding these:
|
|
||||||
* I don't want to! */
|
|
||||||
if (c >= 0xD800 && c < 0xE000)
|
|
||||||
c = UCSERR;
|
|
||||||
|
|
||||||
/* ISO 10646 characters now limited to UTF-16 range. */
|
|
||||||
if (c > 0x10FFFF)
|
|
||||||
c = UCSERR;
|
|
||||||
|
|
||||||
/* This is currently a TagPhobic application.. */
|
|
||||||
if (c >= 0xE0000 && c <= 0xE007F)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
/* U+FEFF is best seen as a null. */
|
|
||||||
if (c == 0xFEFF)
|
|
||||||
continue;
|
|
||||||
/* But U+FFFE is an error. */
|
|
||||||
if (c == 0xFFFE || c == 0xFFFF)
|
|
||||||
c = UCSERR;
|
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
/* Are we in the nasty ACS mode? Note: no sco in utf mode. */
|
|
||||||
else if(term->sco_acs &&
|
|
||||||
(c!='\033' && c!='\012' && c!='\015' && c!='\b'))
|
|
||||||
{
|
|
||||||
if (term->sco_acs == 2) c |= 0x80;
|
|
||||||
c |= CSET_SCOACS;
|
|
||||||
} else {
|
|
||||||
switch (term->cset_attr[term->cset]) {
|
|
||||||
/*
|
|
||||||
* Linedraw characters are different from 'ESC ( B'
|
|
||||||
* only for a small range. For ones outside that
|
|
||||||
* range, make sure we use the same font as well as
|
|
||||||
* the same encoding.
|
|
||||||
*/
|
|
||||||
case CSET_LINEDRW:
|
|
||||||
if (term->ucsdata->unitab_ctrl[c] != 0xFF)
|
|
||||||
c = term->ucsdata->unitab_ctrl[c];
|
|
||||||
else
|
|
||||||
c = ((unsigned char) c) | CSET_LINEDRW;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case CSET_GBCHR:
|
|
||||||
/* If UK-ASCII, make the '#' a LineDraw Pound */
|
|
||||||
if (c == '#') {
|
|
||||||
c = '}' | CSET_LINEDRW;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
/*FALLTHROUGH*/ case CSET_ASCII:
|
|
||||||
if (term->ucsdata->unitab_ctrl[c] != 0xFF)
|
|
||||||
c = term->ucsdata->unitab_ctrl[c];
|
|
||||||
else
|
|
||||||
c = ((unsigned char) c) | CSET_ASCII;
|
|
||||||
break;
|
|
||||||
case CSET_SCOACS:
|
|
||||||
if (c>=' ') c = ((unsigned char)c) | CSET_SCOACS;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
Loading…
Reference in New Issue
Block a user