Rework Unicode conversion APIs to use a BinarySink.

The previous mb_to_wc and wc_to_mb had horrible and also buggy APIs. This commit introduces a fresh pair of functions to replace them, which generate output by writing to a BinarySink. So it's now up to the caller to decide whether it wants the output written to a fixed-size buffer with overflow checking (via buffer_sink), or dynamically allocated, or even written directly to some other output channel. Nothing uses the new functions yet. I plan to migrate things over in upcoming commits. What was wrong with the old APIs: they had that awkward undocumented Windows-specific 'flags' parameter that I described in the previous commit and took out of the dup_X_to_Y wrappers. But much worse, the semantics for buffer overflow were not just undocumented but actually inconsistent. dup_wc_to_mb() in utils assumed that the underlying wc_to_mb would fill the buffer nearly full and return the size of data it wrote. In fact, this was untrue in the case where wc_to_mb called WideCharToMultiByte: that returns straight-up failure, setting the Windows error code to ERROR_INSUFFICIENT_BUFFER. It _does_ partially fill the output buffer, but doesn't tell you how much it wrote! What's wrong with the new API: it's a bit awkward to write a sequence of wchar_t in native byte order to a byte-oriented BinarySink, so people using put_mb_to_wc directly have to do some annoying pointer casting. But I think that's less horrible than the previous APIs. Another change: in the new API for wc_to_mb, defchr can be "", but not NULL.
2025-07-01 19:42:48 -05:00 · 2024-09-24 08:18:48 +01:00
parent 32b8da1177
commit 4f756d2a4d
10 changed files with 267 additions and 239 deletions
--- a/terminal/terminal.c
+++ b/terminal/terminal.c
@ -3497,11 +3497,7 @@ static strbuf *term_input_data_from_unicode(
         * (But also we must allow space for the trailing NUL that
         * wc_to_mb will write.)
         */
-        char *bufptr = strbuf_append(buf, len + 1);
-        int rv;
-        rv = wc_to_mb(term->ucsdata->line_codepage, 0, widebuf, len,
-                      bufptr, len + 1, NULL);
-        strbuf_shrink_to(buf, rv < 0 ? 0 : rv);
+        put_wc_to_mb(buf, term->ucsdata->line_codepage, widebuf, len, "");
    }

    return buf;
@ -3510,18 +3506,12 @@ static strbuf *term_input_data_from_unicode(
 static strbuf *term_input_data_from_charset(
    Terminal *term, int codepage, const char *str, size_t len)
 {
-    strbuf *buf;
+    strbuf *buf = strbuf_new();

-    if (codepage < 0) {
-        buf = strbuf_new();
+    if (codepage < 0)
        put_data(buf, str, len);
-    } else {
-        size_t widesize = len * 2;        /* allow for UTF-16 surrogates */
-        wchar_t *widebuf = snewn(widesize, wchar_t);
-        int widelen = mb_to_wc(codepage, 0, str, len, widebuf, widesize);
-        buf = term_input_data_from_unicode(term, widebuf, widelen);
-        sfree(widebuf);
-    }
+    else
+        put_mb_to_wc(buf, codepage, str, len);

    return buf;
 }
@ -6734,23 +6724,24 @@ static void clipme(Terminal *term, pos top, pos bottom, bool rect, bool desel,

                if (DIRECT_FONT(uc)) {
                    if (c >= ' ' && c != 0x7F) {
-                        char buf[4];
-                        WCHAR wbuf[4];
-                        int rv;
+                        char buf[2];
+                        buffer_sink bs[1];
+                        buffer_sink_init(bs, cbuf,
+                                         sizeof(cbuf) - sizeof(wchar_t));
                        if (is_dbcs_leadbyte(term->ucsdata->font_codepage, (BYTE) c)) {
                            buf[0] = c;
                            buf[1] = (char) (0xFF & ldata->chars[top.x + 1].chr);
-                            rv = mb_to_wc(term->ucsdata->font_codepage, 0, buf, 2, wbuf, 4);
+                            put_mb_to_wc(bs, term->ucsdata->font_codepage,
+                                         buf, 2);
                            top.x++;
                        } else {
                            buf[0] = c;
-                            rv = mb_to_wc(term->ucsdata->font_codepage, 0, buf, 1, wbuf, 4);
+                            put_mb_to_wc(bs, term->ucsdata->font_codepage,
+                                         buf, 1);
                        }

-                        if (rv > 0) {
-                            memcpy(cbuf, wbuf, rv * sizeof(wchar_t));
-                            cbuf[rv] = 0;
-                        }
+                        assert(!bs->overflowed);
+                        *(wchar_t *)bs->out = L'\0';
                    }
                }