diff --git a/CMakeLists.txt b/CMakeLists.txt index debd6e1d..299f04c4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -86,6 +86,10 @@ add_executable(bidi_gettype terminal/bidi_gettype.c) target_link_libraries(bidi_gettype guiterminal utils ${platform_libraries}) +add_executable(bidi_test + terminal/bidi_test.c) +target_link_libraries(bidi_test guiterminal utils ${platform_libraries}) + add_executable(plink ${platform}/plink.c be_all_s.c) diff --git a/terminal/bidi.c b/terminal/bidi.c index ea05e2bd..0e72b49d 100644 --- a/terminal/bidi.c +++ b/terminal/bidi.c @@ -3589,6 +3589,14 @@ void do_bidi_new(BidiContext *ctx, bidi_char *text, size_t textlen) reverse_sequences(ctx); } +size_t do_bidi_test(BidiContext *ctx, bidi_char *text, size_t textlen, + int override) +{ + ctx->paragraphOverride = (override > 0 ? L : override < 0 ? R : ON); + do_bidi_new(ctx, text, textlen); + return ctx->textlen; +} + void do_bidi(BidiContext *ctx, bidi_char *text, size_t textlen) { #ifdef REMOVE_FORMATTING_CHARACTERS diff --git a/terminal/bidi.h b/terminal/bidi.h index dd488e1f..90d68e5b 100644 --- a/terminal/bidi.h +++ b/terminal/bidi.h @@ -131,4 +131,17 @@ static inline bool typeIsETOrBN(BidiType t) return ((1< + +#include "putty.h" +#include "misc.h" +#include "bidi.h" + +static int pass = 0, fail = 0; + +static BidiContext *ctx; + +static const char *extract_word(char **ptr) +{ + char *p = *ptr; + while (*p && isspace((unsigned char)*p)) p++; + + char *start = p; + while (*p && !isspace((unsigned char)*p)) p++; + + if (*p) { + *p++ = '\0'; + while (*p && isspace((unsigned char)*p)) p++; + } + + *ptr = p; + return start; +} + +#define TYPETONAME(X) #X, +static const char *const typenames[] = { BIDI_CHAR_TYPE_LIST(TYPETONAME) }; +#undef TYPETONAME + +static void run_test(const char *filename, unsigned lineno, + bidi_char *bcs, size_t bcs_len, + const unsigned *order, size_t order_len, + int override) +{ + size_t bcs_orig_len = bcs_len; + bidi_char *bcs_orig = snewn(bcs_orig_len, bidi_char); + memcpy(bcs_orig, bcs, bcs_orig_len * sizeof(bidi_char)); + + bcs_len = do_bidi_test(ctx, bcs, bcs_len, override); + + /* + * TR9 revision 44 rule X9 says we remove explicit embedding + * controls and BN characters. So the test cases don't list them + * in the expected outputs. Do the same to our own output - unless + * we're testing the standard version of the algorithm, in which + * case, we expect the output to be exactly as the test cases say. + */ + unsigned *our_order = snewn(bcs_len, unsigned); + size_t our_order_len = 0; + for (size_t i = 0; i < bcs_len; i++) { + BidiType t = bidi_getType(bcs[i].wc); +#ifndef REMOVE_FORMATTING_CHARS + if (typeIsRemovedDuringProcessing(t)) + continue; +#endif + our_order[our_order_len++] = bcs[i].index; + } + + bool ok = false; + if (our_order_len == order_len) { + ok = true; + for (size_t i = 0; i < our_order_len; i++) + if (our_order[i] != order[i]) + ok = false; + } + if (ok) { + pass++; + } else { + fail++; + printf("%s:%u: failed order\n", filename, lineno); + printf(" input chars:"); + for (size_t i = 0; i < bcs_orig_len; i++) + printf(" %04x", bcs_orig[i].wc); + printf("\n"); + printf(" classes: "); + for (size_t i = 0; i < bcs_orig_len; i++) + printf(" %-4s", typenames[bidi_getType(bcs_orig[i].wc)]); + printf("\n"); + printf(" para level = %s\n", + override > 0 ? "LTR" : override < 0 ? "RTL" : "auto"); + printf(" expected:"); + for (size_t i = 0; i < order_len; i++) + printf(" %u", order[i]); + printf("\n"); + printf(" got: "); + for (size_t i = 0; i < our_order_len; i++) + printf(" %u", our_order[i]); + printf("\n"); + } + + /* Put the original data back so we can re-test with another override */ + memcpy(bcs, bcs_orig, bcs_orig_len * sizeof(bidi_char)); + + sfree(bcs_orig); + sfree(our_order); +} + +static void class_test(const char *filename, FILE *fp) +{ + unsigned lineno = 0; + size_t bcs_size = 0, bcs_len = 0; + bidi_char *bcs = NULL; + size_t order_size = 0, order_len = 0; + unsigned *order = NULL; + + /* Preliminary: find a representative character of every bidi + * type. Prefer positive-width ones if available. */ + unsigned representatives[N_BIDI_TYPES]; + for (size_t i = 0; i < N_BIDI_TYPES; i++) + representatives[i] = 0; + for (unsigned uc = 1; uc < 0x110000; uc++) { + unsigned type = bidi_getType(uc); + if (!representatives[type] || + (mk_wcwidth(representatives[type]) <= 0 && mk_wcwidth(uc) > 0)) + representatives[type] = uc; + } + + while (true) { + lineno++; + char *line = chomp(fgetline(fp)); + if (!line) + break; + + /* Skip blank lines and comments */ + if (!line[0] || line[0] == '#') { + sfree(line); + continue; + } + + /* Parse @Reorder lines, which tell us the expected output + * order for all following test cases (until superseded) */ + if (strstartswith(line, "@Reorder:")) { + char *p = line; + extract_word(&p); /* eat the "@Reorder:" header itself */ + order_len = 0; + while (1) { + const char *word = extract_word(&p); + if (!*word) + break; + sgrowarray(order, order_size, order_len); + order[order_len++] = strtoul(word, NULL, 0); + } + + sfree(line); + continue; + } + + /* Skip @Levels lines, which we don't (yet?) do anything with */ + if (strstartswith(line, "@Levels:")) { + sfree(line); + continue; + } + + /* Everything remaining should be an actual test */ + char *semicolon = strchr(line, ';'); + if (!semicolon) { + printf("%s:%u: bad test line': no bitmap\n", filename, lineno); + sfree(line); + continue; + } + *semicolon++ = '\0'; + unsigned bitmask = strtoul(semicolon, NULL, 0); + char *p = line; + bcs_len = 0; + bool test_ok = true; + while (1) { + const char *word = extract_word(&p); + if (!*word) + break; + unsigned type; + for (type = 0; type < N_BIDI_TYPES; type++) + if (!strcmp(word, typenames[type])) + break; + if (type == N_BIDI_TYPES) { + printf("%s:%u: bad test line: bad bidi type '%s'\n", + filename, lineno, word); + test_ok = false; + break; + } + sgrowarray(bcs, bcs_size, bcs_len); + bcs[bcs_len].wc = representatives[type]; + bcs[bcs_len].origwc = bcs[bcs_len].wc; + bcs[bcs_len].index = bcs_len; + bcs[bcs_len].nchars = 1; + bcs_len++; + } + + if (!test_ok) { + sfree(line); + continue; + } + + if (bitmask & 1) + run_test(filename, lineno, bcs, bcs_len, order, order_len, 0); + if (bitmask & 2) + run_test(filename, lineno, bcs, bcs_len, order, order_len, +1); + if (bitmask & 4) + run_test(filename, lineno, bcs, bcs_len, order, order_len, -1); + + sfree(line); + } + + sfree(bcs); + sfree(order); +} + +static void char_test(const char *filename, FILE *fp) +{ + unsigned lineno = 0; + size_t bcs_size = 0, bcs_len = 0; + bidi_char *bcs = NULL; + size_t order_size = 0, order_len = 0; + unsigned *order = NULL; + + while (true) { + lineno++; + char *line = chomp(fgetline(fp)); + if (!line) + break; + + /* Skip blank lines and comments */ + if (!line[0] || line[0] == '#') { + sfree(line); + continue; + } + + /* Break each test line up into its main fields */ + ptrlen input_pl, para_dir_pl, para_level_pl, levels_pl, order_pl; + { + ptrlen pl = ptrlen_from_asciz(line); + input_pl = ptrlen_get_word(&pl, ";"); + para_dir_pl = ptrlen_get_word(&pl, ";"); + para_level_pl = ptrlen_get_word(&pl, ";"); + levels_pl = ptrlen_get_word(&pl, ";"); + order_pl = ptrlen_get_word(&pl, ";"); + } + + int override; + { + char *para_dir_str = mkstr(para_dir_pl); + unsigned para_dir = strtoul(para_dir_str, NULL, 0); + sfree(para_dir_str); + + override = (para_dir == 0 ? +1 : para_dir == 1 ? -1 : 0); + } + + /* Break up the input into Unicode characters */ + bcs_len = 0; + { + ptrlen pl = input_pl; + while (pl.len) { + ptrlen chr = ptrlen_get_word(&pl, " "); + char *chrstr = mkstr(chr); + sgrowarray(bcs, bcs_size, bcs_len); + bcs[bcs_len].wc = strtoul(chrstr, NULL, 16); + bcs[bcs_len].origwc = bcs[bcs_len].wc; + bcs[bcs_len].index = bcs_len; + bcs[bcs_len].nchars = 1; + bcs_len++; + sfree(chrstr); + } + } + + /* Ditto the expected output order */ + order_len = 0; + { + ptrlen pl = order_pl; + while (pl.len) { + ptrlen chr = ptrlen_get_word(&pl, " "); + char *chrstr = mkstr(chr); + sgrowarray(order, order_size, order_len); + order[order_len++] = strtoul(chrstr, NULL, 0); + sfree(chrstr); + } + } + + run_test(filename, lineno, bcs, bcs_len, order, order_len, override); + sfree(line); + } + + sfree(bcs); + sfree(order); +} + +void out_of_memory(void) +{ + fprintf(stderr, "out of memory!\n"); + exit(2); +} + +static void usage(FILE *fp) +{ + fprintf(fp, "\ +usage: bidi_test ( ( --class | --char ) infile... )...\n\ +e.g.: bidi_test --class BidiTest.txt --char BidiCharacterTest.txt\n\ +also: --help display this text\n\ +"); +} + +int main(int argc, char **argv) +{ + void (*testfn)(const char *, FILE *) = NULL; + bool doing_opts = true; + const char *filename = NULL; + bool done_something = false; + + ctx = bidi_new_context(); + + while (--argc > 0) { + const char *arg = *++argv; + if (doing_opts && arg[0] == '-' && arg[1]) { + if (!strcmp(arg, "--")) { + doing_opts = false; + } else if (!strcmp(arg, "--class")) { + testfn = class_test; + } else if (!strcmp(arg, "--char")) { + testfn = char_test; + } else if (!strcmp(arg, "--help")) { + usage(stdout); + return 0; + } else { + fprintf(stderr, "unrecognised option '%s'\n", arg); + return 1; + } + } else { + const char *filename = arg; + + if (!strcmp(filename, "-")) { + testfn("", stdin); + } else { + FILE *fp = fopen(filename, "r"); + if (!fp) { + fprintf(stderr, "unable to open '%s'\n", filename); + return 1; + } + testfn(filename, fp); + fclose(fp); + } + done_something = true; + } + } + + if (!done_something) { + usage(stderr); + return 1; + } + + if (!filename) + filename = "-"; + + printf("pass %d fail %d total %d\n", pass, fail, pass + fail); + + bidi_free_context(ctx); + return fail != 0; +}