1
0
mirror of https://git.tartarus.org/simon/putty.git synced 2025-01-25 01:02:24 +00:00

Make encode_utf8() output to a BinarySink.

Previously it output to an ordinary char buffer, and returned the
number of bytes it had written. But three out of the four call sites
immediately chucked the resulting bytes into a BinarySink anyway. The
fourth, in windows/unicode.c, really is writing into successive
locations of a fixed-size buffer - but we can make that into a
BinarySink too, using the buffer_sink added in the previous commit.

So now encode_utf8() is renamed put_utf8_char, and the call sites all
look simpler than they started out.
This commit is contained in:
Simon Tatham 2022-11-09 18:56:51 +00:00
parent 991e22c9bb
commit 834b58e39b
7 changed files with 27 additions and 45 deletions

View File

@ -156,6 +156,10 @@ struct BinarySink {
#define put_c_string_literal(bs, str) \ #define put_c_string_literal(bs, str) \
BinarySink_put_c_string_literal(BinarySink_UPCAST(bs), str) BinarySink_put_c_string_literal(BinarySink_UPCAST(bs), str)
/* More complicated function implemented in encode_utf8.c */
#define put_utf8_char(bs, c) \
BinarySink_put_utf8_char(BinarySink_UPCAST(bs), c)
/* /*
* The underlying real C functions that implement most of those * The underlying real C functions that implement most of those
* macros. Generally you won't want to call these directly, because * macros. Generally you won't want to call these directly, because
@ -185,6 +189,7 @@ void BinarySink_put_mp_ssh2(BinarySink *bs, mp_int *x);
void BinarySink_put_fmt(BinarySink *, const char *fmt, ...) PRINTF_LIKE(2, 3); void BinarySink_put_fmt(BinarySink *, const char *fmt, ...) PRINTF_LIKE(2, 3);
void BinarySink_put_fmtv(BinarySink *, const char *fmt, va_list ap); void BinarySink_put_fmtv(BinarySink *, const char *fmt, va_list ap);
void BinarySink_put_c_string_literal(BinarySink *, ptrlen); void BinarySink_put_c_string_literal(BinarySink *, ptrlen);
void BinarySink_put_utf8_char(BinarySink *, unsigned);
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */

5
misc.h
View File

@ -248,11 +248,6 @@ void smemclr(void *b, size_t len);
* by the 'eq' in the name. */ * by the 'eq' in the name. */
unsigned smemeq(const void *av, const void *bv, size_t len); unsigned smemeq(const void *av, const void *bv, size_t len);
/* Encode a single UTF-8 character. Assumes that illegal characters
* (such as things in the surrogate range, or > 0x10FFFF) have already
* been removed. */
size_t encode_utf8(void *output, unsigned long ch);
/* Encode a wide-character string into UTF-8. Tolerates surrogates if /* Encode a wide-character string into UTF-8. Tolerates surrogates if
* sizeof(wchar_t) == 2, assuming that in that case the wide string is * sizeof(wchar_t) == 2, assuming that in that case the wide string is
* encoded in UTF-16. */ * encoded in UTF-16. */

View File

@ -3398,8 +3398,7 @@ static strbuf *term_input_data_from_unicode(
} }
} }
char utf8_chr[6]; put_utf8_char(buf, ch);
put_data(buf, utf8_chr, encode_utf8(utf8_chr, ch));
} }
} else { } else {
/* /*

View File

@ -5,25 +5,22 @@
#include "defs.h" #include "defs.h"
#include "misc.h" #include "misc.h"
size_t encode_utf8(void *output, unsigned long ch) void BinarySink_put_utf8_char(BinarySink *output, unsigned ch)
{ {
unsigned char *start = (unsigned char *)output, *p = start;
if (ch < 0x80) { if (ch < 0x80) {
*p++ = ch; put_byte(output, ch);
} else if (ch < 0x800) { } else if (ch < 0x800) {
*p++ = 0xC0 | (ch >> 6); put_byte(output, 0xC0 | (ch >> 6));
*p++ = 0x80 | (ch & 0x3F); put_byte(output, 0x80 | (ch & 0x3F));
} else if (ch < 0x10000) { } else if (ch < 0x10000) {
*p++ = 0xE0 | (ch >> 12); put_byte(output, 0xE0 | (ch >> 12));
*p++ = 0x80 | ((ch >> 6) & 0x3F); put_byte(output, 0x80 | ((ch >> 6) & 0x3F));
*p++ = 0x80 | (ch & 0x3F); put_byte(output, 0x80 | (ch & 0x3F));
} else { } else {
assert(ch <= 0x10FFFF); assert(ch <= 0x10FFFF);
*p++ = 0xF0 | (ch >> 18); put_byte(output, 0xF0 | (ch >> 18));
*p++ = 0x80 | ((ch >> 12) & 0x3F); put_byte(output, 0x80 | ((ch >> 12) & 0x3F));
*p++ = 0x80 | ((ch >> 6) & 0x3F); put_byte(output, 0x80 | ((ch >> 6) & 0x3F));
*p++ = 0x80 | (ch & 0x3F); put_byte(output, 0x80 | (ch & 0x3F));
} }
return p - start;
} }

View File

@ -17,9 +17,7 @@ char *encode_wide_string_as_utf8(const wchar_t *ws)
} else if (IS_SURROGATE(ch)) { } else if (IS_SURROGATE(ch)) {
ch = 0xfffd; /* illegal UTF-16 -> REPLACEMENT CHARACTER */ ch = 0xfffd; /* illegal UTF-16 -> REPLACEMENT CHARACTER */
} }
char utf8[6]; put_utf8_char(sb, ch);
size_t size = encode_utf8(utf8, ch);
put_data(sb, utf8, size);
} }
return strbuf_to_str(sb); return strbuf_to_str(sb);
} }

View File

@ -217,9 +217,6 @@ static inline void stripctrl_term_put_wc(
if (prefix.len) if (prefix.len)
put_datapl(scc->bs_out, prefix); put_datapl(scc->bs_out, prefix);
char outbuf[6];
size_t produced;
/* /*
* The Terminal implementation encodes 7-bit ASCII characters in * The Terminal implementation encodes 7-bit ASCII characters in
* UTF-8 mode, and all printing characters in non-UTF-8 (i.e. * UTF-8 mode, and all printing characters in non-UTF-8 (i.e.
@ -232,14 +229,10 @@ static inline void stripctrl_term_put_wc(
wc &= 0xFF; wc &= 0xFF;
if (in_utf(scc->term)) { if (in_utf(scc->term)) {
produced = encode_utf8(outbuf, wc); put_utf8_char(scc->bs_out, wc);
} else { } else {
outbuf[0] = wc; put_byte(scc->bs_out, wc);
produced = 1;
} }
if (produced > 0)
put_data(scc->bs_out, outbuf, produced);
} }
static inline size_t stripctrl_locale_try_consume( static inline size_t stripctrl_locale_try_consume(

View File

@ -1290,8 +1290,8 @@ int wc_to_mb(int codepage, int flags, const wchar_t *wcstr, int wclen,
* the codepage is UTF-8, we can do the translation ourselves. * the codepage is UTF-8, we can do the translation ourselves.
*/ */
if (codepage == CP_UTF8 && mblen > 0 && wclen > 0) { if (codepage == CP_UTF8 && mblen > 0 && wclen > 0) {
size_t remaining = mblen; buffer_sink bs[1];
char *p = mbstr; buffer_sink_init(bs, mbstr, mblen);
while (wclen > 0) { while (wclen > 0) {
unsigned long wc = (wclen--, *wcstr++); unsigned long wc = (wclen--, *wcstr++);
@ -1300,18 +1300,13 @@ int wc_to_mb(int codepage, int flags, const wchar_t *wcstr, int wclen,
wclen--, wcstr++; wclen--, wcstr++;
} }
char utfbuf[6]; const char *prev_ptr = bs->out;
size_t utflen = encode_utf8(utfbuf, wc); put_utf8_char(bs, wc);
if (utflen <= remaining) { if (bs->overflowed)
memcpy(p, utfbuf, utflen); return prev_ptr - mbstr;
p += utflen;
remaining -= utflen;
} else {
return p - mbstr;
}
} }
return p - mbstr; return bs->out - mbstr;
} }
#endif #endif