From 08d58fe13e84a929c72c5d4aa98001279463a79f Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Sun, 19 Jun 2022 10:36:50 +0100 Subject: [PATCH] Routines for %-encoding and %-decoding. These make a good storage format for mostly-textual data in configuration, if it can't afford to reserve any character as a delimiter. Assuming very few characters need to be escaped, the space cost is lower than base64, and also you can read it by eye. --- misc.h | 7 +++++++ utils/CMakeLists.txt | 2 ++ utils/percent_decode.c | 41 +++++++++++++++++++++++++++++++++++++++++ utils/percent_encode.c | 34 ++++++++++++++++++++++++++++++++++ 4 files changed, 84 insertions(+) create mode 100644 utils/percent_decode.c create mode 100644 utils/percent_encode.c diff --git a/misc.h b/misc.h index afde1502..9cf656fd 100644 --- a/misc.h +++ b/misc.h @@ -116,6 +116,13 @@ void base64_encode_fp(FILE *fp, ptrlen data, int cpl); strbuf *base64_encode_sb(ptrlen data, int cpl); bool base64_valid(ptrlen data); +void percent_encode_bs(BinarySink *bs, ptrlen data, const char *badchars); +void percent_encode_fp(FILE *fp, ptrlen data, const char *badchars); +strbuf *percent_encode_sb(ptrlen data, const char *badchars); +void percent_decode_bs(BinarySink *bs, ptrlen data); +void percent_decode_fp(FILE *fp, ptrlen data); +strbuf *percent_decode_sb(ptrlen data); + struct bufchain_granule; struct bufchain_tag { struct bufchain_granule *head, *tail; diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt index 957b861c..56cc3c1b 100644 --- a/utils/CMakeLists.txt +++ b/utils/CMakeLists.txt @@ -46,6 +46,8 @@ add_sources_from_current_dir(utils nullstrcmp.c out_of_memory.c parse_blocksize.c + percent_decode.c + percent_encode.c prompts.c ptrlen.c read_file_into.c diff --git a/utils/percent_decode.c b/utils/percent_decode.c new file mode 100644 index 00000000..dff2c233 --- /dev/null +++ b/utils/percent_decode.c @@ -0,0 +1,41 @@ +/* + * Decode %-encoding in URL style. + */ + +#include + +#include "misc.h" + +void percent_decode_bs(BinarySink *bs, ptrlen data) +{ + for (const char *p = data.ptr, *e = ptrlen_end(data); p < e; p++) { + char c = *p; + if (c == '%' && e-p >= 3 && + isxdigit((unsigned char)p[1]) && + isxdigit((unsigned char)p[2])) { + char hex[3]; + hex[0] = p[1]; + hex[1] = p[2]; + hex[2] = '\0'; + put_byte(bs, strtoul(hex, NULL, 16)); + p += 2; + } else { + put_byte(bs, c); + } + } + +} + +void percent_decode_fp(FILE *fp, ptrlen data) +{ + stdio_sink ss; + stdio_sink_init(&ss, fp); + percent_decode_bs(BinarySink_UPCAST(&ss), data); +} + +strbuf *percent_decode_sb(ptrlen data) +{ + strbuf *sb = strbuf_new(); + percent_decode_bs(BinarySink_UPCAST(sb), data); + return sb; +} diff --git a/utils/percent_encode.c b/utils/percent_encode.c new file mode 100644 index 00000000..ea04b0ac --- /dev/null +++ b/utils/percent_encode.c @@ -0,0 +1,34 @@ +/* + * %-encoding in URL style. + * + * Defaults to escaping % itself (necessary for decoding to even + * work), and any C0 escape character. Further bad characters can be + * provided in 'badchars'. + */ + +#include "misc.h" + +void percent_encode_bs(BinarySink *bs, ptrlen data, const char *badchars) +{ + for (const char *p = data.ptr, *e = ptrlen_end(data); p < e; p++) { + char c = *p; + if (c == '%' || c < ' ' || (badchars && strchr(badchars, c))) + put_fmt(bs, "%%%02X", (unsigned char)c); + else + put_byte(bs, c); + } +} + +void percent_encode_fp(FILE *fp, ptrlen data, const char *badchars) +{ + stdio_sink ss; + stdio_sink_init(&ss, fp); + percent_encode_bs(BinarySink_UPCAST(&ss), data, badchars); +} + +strbuf *percent_encode_sb(ptrlen data, const char *badchars) +{ + strbuf *sb = strbuf_new(); + percent_encode_bs(BinarySink_UPCAST(sb), data, badchars); + return sb; +}