From 93ba74579a22a8976e49bbb1c9e45b5bbd0d35bf Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Sun, 10 Oct 2021 14:52:17 +0100 Subject: [PATCH] Test rig for the new bidi algorithm. This standalone CLI program runs the UCD bidi tests in the form provided in Unicode 14.0.0. You can run it by just saying bidi_test --class BidiTest.txt --char BidiCharacterTest.txt assuming those two UCD files are in the current directory. --- CMakeLists.txt | 4 + terminal/bidi.c | 8 + terminal/bidi.h | 13 ++ terminal/bidi_test.c | 365 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 390 insertions(+) create mode 100644 terminal/bidi_test.c diff --git a/CMakeLists.txt b/CMakeLists.txt index debd6e1d..299f04c4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -86,6 +86,10 @@ add_executable(bidi_gettype terminal/bidi_gettype.c) target_link_libraries(bidi_gettype guiterminal utils ${platform_libraries}) +add_executable(bidi_test + terminal/bidi_test.c) +target_link_libraries(bidi_test guiterminal utils ${platform_libraries}) + add_executable(plink ${platform}/plink.c be_all_s.c) diff --git a/terminal/bidi.c b/terminal/bidi.c index ea05e2bd..0e72b49d 100644 --- a/terminal/bidi.c +++ b/terminal/bidi.c @@ -3589,6 +3589,14 @@ void do_bidi_new(BidiContext *ctx, bidi_char *text, size_t textlen) reverse_sequences(ctx); } +size_t do_bidi_test(BidiContext *ctx, bidi_char *text, size_t textlen, + int override) +{ + ctx->paragraphOverride = (override > 0 ? L : override < 0 ? R : ON); + do_bidi_new(ctx, text, textlen); + return ctx->textlen; +} + void do_bidi(BidiContext *ctx, bidi_char *text, size_t textlen) { #ifdef REMOVE_FORMATTING_CHARACTERS diff --git a/terminal/bidi.h b/terminal/bidi.h index dd488e1f..90d68e5b 100644 --- a/terminal/bidi.h +++ b/terminal/bidi.h @@ -131,4 +131,17 @@ static inline bool typeIsETOrBN(BidiType t) return ((1< + +#include "putty.h" +#include "misc.h" +#include "bidi.h" + +static int pass = 0, fail = 0; + +static BidiContext *ctx; + +static const char *extract_word(char **ptr) +{ + char *p = *ptr; + while (*p && isspace((unsigned char)*p)) p++; + + char *start = p; + while (*p && !isspace((unsigned char)*p)) p++; + + if (*p) { + *p++ = '\0'; + while (*p && isspace((unsigned char)*p)) p++; + } + + *ptr = p; + return start; +} + +#define TYPETONAME(X) #X, +static const char *const typenames[] = { BIDI_CHAR_TYPE_LIST(TYPETONAME) }; +#undef TYPETONAME + +static void run_test(const char *filename, unsigned lineno, + bidi_char *bcs, size_t bcs_len, + const unsigned *order, size_t order_len, + int override) +{ + size_t bcs_orig_len = bcs_len; + bidi_char *bcs_orig = snewn(bcs_orig_len, bidi_char); + memcpy(bcs_orig, bcs, bcs_orig_len * sizeof(bidi_char)); + + bcs_len = do_bidi_test(ctx, bcs, bcs_len, override); + + /* + * TR9 revision 44 rule X9 says we remove explicit embedding + * controls and BN characters. So the test cases don't list them + * in the expected outputs. Do the same to our own output - unless + * we're testing the standard version of the algorithm, in which + * case, we expect the output to be exactly as the test cases say. + */ + unsigned *our_order = snewn(bcs_len, unsigned); + size_t our_order_len = 0; + for (size_t i = 0; i < bcs_len; i++) { + BidiType t = bidi_getType(bcs[i].wc); +#ifndef REMOVE_FORMATTING_CHARS + if (typeIsRemovedDuringProcessing(t)) + continue; +#endif + our_order[our_order_len++] = bcs[i].index; + } + + bool ok = false; + if (our_order_len == order_len) { + ok = true; + for (size_t i = 0; i < our_order_len; i++) + if (our_order[i] != order[i]) + ok = false; + } + if (ok) { + pass++; + } else { + fail++; + printf("%s:%u: failed order\n", filename, lineno); + printf(" input chars:"); + for (size_t i = 0; i < bcs_orig_len; i++) + printf(" %04x", bcs_orig[i].wc); + printf("\n"); + printf(" classes: "); + for (size_t i = 0; i < bcs_orig_len; i++) + printf(" %-4s", typenames[bidi_getType(bcs_orig[i].wc)]); + printf("\n"); + printf(" para level = %s\n", + override > 0 ? "LTR" : override < 0 ? "RTL" : "auto"); + printf(" expected:"); + for (size_t i = 0; i < order_len; i++) + printf(" %u", order[i]); + printf("\n"); + printf(" got: "); + for (size_t i = 0; i < our_order_len; i++) + printf(" %u", our_order[i]); + printf("\n"); + } + + /* Put the original data back so we can re-test with another override */ + memcpy(bcs, bcs_orig, bcs_orig_len * sizeof(bidi_char)); + + sfree(bcs_orig); + sfree(our_order); +} + +static void class_test(const char *filename, FILE *fp) +{ + unsigned lineno = 0; + size_t bcs_size = 0, bcs_len = 0; + bidi_char *bcs = NULL; + size_t order_size = 0, order_len = 0; + unsigned *order = NULL; + + /* Preliminary: find a representative character of every bidi + * type. Prefer positive-width ones if available. */ + unsigned representatives[N_BIDI_TYPES]; + for (size_t i = 0; i < N_BIDI_TYPES; i++) + representatives[i] = 0; + for (unsigned uc = 1; uc < 0x110000; uc++) { + unsigned type = bidi_getType(uc); + if (!representatives[type] || + (mk_wcwidth(representatives[type]) <= 0 && mk_wcwidth(uc) > 0)) + representatives[type] = uc; + } + + while (true) { + lineno++; + char *line = chomp(fgetline(fp)); + if (!line) + break; + + /* Skip blank lines and comments */ + if (!line[0] || line[0] == '#') { + sfree(line); + continue; + } + + /* Parse @Reorder lines, which tell us the expected output + * order for all following test cases (until superseded) */ + if (strstartswith(line, "@Reorder:")) { + char *p = line; + extract_word(&p); /* eat the "@Reorder:" header itself */ + order_len = 0; + while (1) { + const char *word = extract_word(&p); + if (!*word) + break; + sgrowarray(order, order_size, order_len); + order[order_len++] = strtoul(word, NULL, 0); + } + + sfree(line); + continue; + } + + /* Skip @Levels lines, which we don't (yet?) do anything with */ + if (strstartswith(line, "@Levels:")) { + sfree(line); + continue; + } + + /* Everything remaining should be an actual test */ + char *semicolon = strchr(line, ';'); + if (!semicolon) { + printf("%s:%u: bad test line': no bitmap\n", filename, lineno); + sfree(line); + continue; + } + *semicolon++ = '\0'; + unsigned bitmask = strtoul(semicolon, NULL, 0); + char *p = line; + bcs_len = 0; + bool test_ok = true; + while (1) { + const char *word = extract_word(&p); + if (!*word) + break; + unsigned type; + for (type = 0; type < N_BIDI_TYPES; type++) + if (!strcmp(word, typenames[type])) + break; + if (type == N_BIDI_TYPES) { + printf("%s:%u: bad test line: bad bidi type '%s'\n", + filename, lineno, word); + test_ok = false; + break; + } + sgrowarray(bcs, bcs_size, bcs_len); + bcs[bcs_len].wc = representatives[type]; + bcs[bcs_len].origwc = bcs[bcs_len].wc; + bcs[bcs_len].index = bcs_len; + bcs[bcs_len].nchars = 1; + bcs_len++; + } + + if (!test_ok) { + sfree(line); + continue; + } + + if (bitmask & 1) + run_test(filename, lineno, bcs, bcs_len, order, order_len, 0); + if (bitmask & 2) + run_test(filename, lineno, bcs, bcs_len, order, order_len, +1); + if (bitmask & 4) + run_test(filename, lineno, bcs, bcs_len, order, order_len, -1); + + sfree(line); + } + + sfree(bcs); + sfree(order); +} + +static void char_test(const char *filename, FILE *fp) +{ + unsigned lineno = 0; + size_t bcs_size = 0, bcs_len = 0; + bidi_char *bcs = NULL; + size_t order_size = 0, order_len = 0; + unsigned *order = NULL; + + while (true) { + lineno++; + char *line = chomp(fgetline(fp)); + if (!line) + break; + + /* Skip blank lines and comments */ + if (!line[0] || line[0] == '#') { + sfree(line); + continue; + } + + /* Break each test line up into its main fields */ + ptrlen input_pl, para_dir_pl, para_level_pl, levels_pl, order_pl; + { + ptrlen pl = ptrlen_from_asciz(line); + input_pl = ptrlen_get_word(&pl, ";"); + para_dir_pl = ptrlen_get_word(&pl, ";"); + para_level_pl = ptrlen_get_word(&pl, ";"); + levels_pl = ptrlen_get_word(&pl, ";"); + order_pl = ptrlen_get_word(&pl, ";"); + } + + int override; + { + char *para_dir_str = mkstr(para_dir_pl); + unsigned para_dir = strtoul(para_dir_str, NULL, 0); + sfree(para_dir_str); + + override = (para_dir == 0 ? +1 : para_dir == 1 ? -1 : 0); + } + + /* Break up the input into Unicode characters */ + bcs_len = 0; + { + ptrlen pl = input_pl; + while (pl.len) { + ptrlen chr = ptrlen_get_word(&pl, " "); + char *chrstr = mkstr(chr); + sgrowarray(bcs, bcs_size, bcs_len); + bcs[bcs_len].wc = strtoul(chrstr, NULL, 16); + bcs[bcs_len].origwc = bcs[bcs_len].wc; + bcs[bcs_len].index = bcs_len; + bcs[bcs_len].nchars = 1; + bcs_len++; + sfree(chrstr); + } + } + + /* Ditto the expected output order */ + order_len = 0; + { + ptrlen pl = order_pl; + while (pl.len) { + ptrlen chr = ptrlen_get_word(&pl, " "); + char *chrstr = mkstr(chr); + sgrowarray(order, order_size, order_len); + order[order_len++] = strtoul(chrstr, NULL, 0); + sfree(chrstr); + } + } + + run_test(filename, lineno, bcs, bcs_len, order, order_len, override); + sfree(line); + } + + sfree(bcs); + sfree(order); +} + +void out_of_memory(void) +{ + fprintf(stderr, "out of memory!\n"); + exit(2); +} + +static void usage(FILE *fp) +{ + fprintf(fp, "\ +usage: bidi_test ( ( --class | --char ) infile... )...\n\ +e.g.: bidi_test --class BidiTest.txt --char BidiCharacterTest.txt\n\ +also: --help display this text\n\ +"); +} + +int main(int argc, char **argv) +{ + void (*testfn)(const char *, FILE *) = NULL; + bool doing_opts = true; + const char *filename = NULL; + bool done_something = false; + + ctx = bidi_new_context(); + + while (--argc > 0) { + const char *arg = *++argv; + if (doing_opts && arg[0] == '-' && arg[1]) { + if (!strcmp(arg, "--")) { + doing_opts = false; + } else if (!strcmp(arg, "--class")) { + testfn = class_test; + } else if (!strcmp(arg, "--char")) { + testfn = char_test; + } else if (!strcmp(arg, "--help")) { + usage(stdout); + return 0; + } else { + fprintf(stderr, "unrecognised option '%s'\n", arg); + return 1; + } + } else { + const char *filename = arg; + + if (!strcmp(filename, "-")) { + testfn("", stdin); + } else { + FILE *fp = fopen(filename, "r"); + if (!fp) { + fprintf(stderr, "unable to open '%s'\n", filename); + return 1; + } + testfn(filename, fp); + fclose(fp); + } + done_something = true; + } + } + + if (!done_something) { + usage(stderr); + return 1; + } + + if (!filename) + filename = "-"; + + printf("pass %d fail %d total %d\n", pass, fail, pass + fail); + + bidi_free_context(ctx); + return fail != 0; +}