1
0
mirror of https://git.tartarus.org/simon/putty.git synced 2025-01-25 01:02:24 +00:00
putty-source/terminal/bidi_test.c
Simon Tatham 93ba74579a Test rig for the new bidi algorithm.
This standalone CLI program runs the UCD bidi tests in the form
provided in Unicode 14.0.0. You can run it by just saying

  bidi_test --class BidiTest.txt --char BidiCharacterTest.txt

assuming those two UCD files are in the current directory.
2021-10-10 15:00:30 +01:00

366 lines
11 KiB
C

/*
* Test program that reads the Unicode bidi algorithm test case lists
* that form part of the Unicode Character Database:
*
* https://www.unicode.org/Public/UCD/latest/ucd/BidiTest.txt
* https://www.unicode.org/Public/UCD/latest/ucd/BidiCharacterTest.txt
*/
#include <ctype.h>
#include "putty.h"
#include "misc.h"
#include "bidi.h"
static int pass = 0, fail = 0;
static BidiContext *ctx;
static const char *extract_word(char **ptr)
{
char *p = *ptr;
while (*p && isspace((unsigned char)*p)) p++;
char *start = p;
while (*p && !isspace((unsigned char)*p)) p++;
if (*p) {
*p++ = '\0';
while (*p && isspace((unsigned char)*p)) p++;
}
*ptr = p;
return start;
}
#define TYPETONAME(X) #X,
static const char *const typenames[] = { BIDI_CHAR_TYPE_LIST(TYPETONAME) };
#undef TYPETONAME
static void run_test(const char *filename, unsigned lineno,
bidi_char *bcs, size_t bcs_len,
const unsigned *order, size_t order_len,
int override)
{
size_t bcs_orig_len = bcs_len;
bidi_char *bcs_orig = snewn(bcs_orig_len, bidi_char);
memcpy(bcs_orig, bcs, bcs_orig_len * sizeof(bidi_char));
bcs_len = do_bidi_test(ctx, bcs, bcs_len, override);
/*
* TR9 revision 44 rule X9 says we remove explicit embedding
* controls and BN characters. So the test cases don't list them
* in the expected outputs. Do the same to our own output - unless
* we're testing the standard version of the algorithm, in which
* case, we expect the output to be exactly as the test cases say.
*/
unsigned *our_order = snewn(bcs_len, unsigned);
size_t our_order_len = 0;
for (size_t i = 0; i < bcs_len; i++) {
BidiType t = bidi_getType(bcs[i].wc);
#ifndef REMOVE_FORMATTING_CHARS
if (typeIsRemovedDuringProcessing(t))
continue;
#endif
our_order[our_order_len++] = bcs[i].index;
}
bool ok = false;
if (our_order_len == order_len) {
ok = true;
for (size_t i = 0; i < our_order_len; i++)
if (our_order[i] != order[i])
ok = false;
}
if (ok) {
pass++;
} else {
fail++;
printf("%s:%u: failed order\n", filename, lineno);
printf(" input chars:");
for (size_t i = 0; i < bcs_orig_len; i++)
printf(" %04x", bcs_orig[i].wc);
printf("\n");
printf(" classes: ");
for (size_t i = 0; i < bcs_orig_len; i++)
printf(" %-4s", typenames[bidi_getType(bcs_orig[i].wc)]);
printf("\n");
printf(" para level = %s\n",
override > 0 ? "LTR" : override < 0 ? "RTL" : "auto");
printf(" expected:");
for (size_t i = 0; i < order_len; i++)
printf(" %u", order[i]);
printf("\n");
printf(" got: ");
for (size_t i = 0; i < our_order_len; i++)
printf(" %u", our_order[i]);
printf("\n");
}
/* Put the original data back so we can re-test with another override */
memcpy(bcs, bcs_orig, bcs_orig_len * sizeof(bidi_char));
sfree(bcs_orig);
sfree(our_order);
}
static void class_test(const char *filename, FILE *fp)
{
unsigned lineno = 0;
size_t bcs_size = 0, bcs_len = 0;
bidi_char *bcs = NULL;
size_t order_size = 0, order_len = 0;
unsigned *order = NULL;
/* Preliminary: find a representative character of every bidi
* type. Prefer positive-width ones if available. */
unsigned representatives[N_BIDI_TYPES];
for (size_t i = 0; i < N_BIDI_TYPES; i++)
representatives[i] = 0;
for (unsigned uc = 1; uc < 0x110000; uc++) {
unsigned type = bidi_getType(uc);
if (!representatives[type] ||
(mk_wcwidth(representatives[type]) <= 0 && mk_wcwidth(uc) > 0))
representatives[type] = uc;
}
while (true) {
lineno++;
char *line = chomp(fgetline(fp));
if (!line)
break;
/* Skip blank lines and comments */
if (!line[0] || line[0] == '#') {
sfree(line);
continue;
}
/* Parse @Reorder lines, which tell us the expected output
* order for all following test cases (until superseded) */
if (strstartswith(line, "@Reorder:")) {
char *p = line;
extract_word(&p); /* eat the "@Reorder:" header itself */
order_len = 0;
while (1) {
const char *word = extract_word(&p);
if (!*word)
break;
sgrowarray(order, order_size, order_len);
order[order_len++] = strtoul(word, NULL, 0);
}
sfree(line);
continue;
}
/* Skip @Levels lines, which we don't (yet?) do anything with */
if (strstartswith(line, "@Levels:")) {
sfree(line);
continue;
}
/* Everything remaining should be an actual test */
char *semicolon = strchr(line, ';');
if (!semicolon) {
printf("%s:%u: bad test line': no bitmap\n", filename, lineno);
sfree(line);
continue;
}
*semicolon++ = '\0';
unsigned bitmask = strtoul(semicolon, NULL, 0);
char *p = line;
bcs_len = 0;
bool test_ok = true;
while (1) {
const char *word = extract_word(&p);
if (!*word)
break;
unsigned type;
for (type = 0; type < N_BIDI_TYPES; type++)
if (!strcmp(word, typenames[type]))
break;
if (type == N_BIDI_TYPES) {
printf("%s:%u: bad test line: bad bidi type '%s'\n",
filename, lineno, word);
test_ok = false;
break;
}
sgrowarray(bcs, bcs_size, bcs_len);
bcs[bcs_len].wc = representatives[type];
bcs[bcs_len].origwc = bcs[bcs_len].wc;
bcs[bcs_len].index = bcs_len;
bcs[bcs_len].nchars = 1;
bcs_len++;
}
if (!test_ok) {
sfree(line);
continue;
}
if (bitmask & 1)
run_test(filename, lineno, bcs, bcs_len, order, order_len, 0);
if (bitmask & 2)
run_test(filename, lineno, bcs, bcs_len, order, order_len, +1);
if (bitmask & 4)
run_test(filename, lineno, bcs, bcs_len, order, order_len, -1);
sfree(line);
}
sfree(bcs);
sfree(order);
}
static void char_test(const char *filename, FILE *fp)
{
unsigned lineno = 0;
size_t bcs_size = 0, bcs_len = 0;
bidi_char *bcs = NULL;
size_t order_size = 0, order_len = 0;
unsigned *order = NULL;
while (true) {
lineno++;
char *line = chomp(fgetline(fp));
if (!line)
break;
/* Skip blank lines and comments */
if (!line[0] || line[0] == '#') {
sfree(line);
continue;
}
/* Break each test line up into its main fields */
ptrlen input_pl, para_dir_pl, para_level_pl, levels_pl, order_pl;
{
ptrlen pl = ptrlen_from_asciz(line);
input_pl = ptrlen_get_word(&pl, ";");
para_dir_pl = ptrlen_get_word(&pl, ";");
para_level_pl = ptrlen_get_word(&pl, ";");
levels_pl = ptrlen_get_word(&pl, ";");
order_pl = ptrlen_get_word(&pl, ";");
}
int override;
{
char *para_dir_str = mkstr(para_dir_pl);
unsigned para_dir = strtoul(para_dir_str, NULL, 0);
sfree(para_dir_str);
override = (para_dir == 0 ? +1 : para_dir == 1 ? -1 : 0);
}
/* Break up the input into Unicode characters */
bcs_len = 0;
{
ptrlen pl = input_pl;
while (pl.len) {
ptrlen chr = ptrlen_get_word(&pl, " ");
char *chrstr = mkstr(chr);
sgrowarray(bcs, bcs_size, bcs_len);
bcs[bcs_len].wc = strtoul(chrstr, NULL, 16);
bcs[bcs_len].origwc = bcs[bcs_len].wc;
bcs[bcs_len].index = bcs_len;
bcs[bcs_len].nchars = 1;
bcs_len++;
sfree(chrstr);
}
}
/* Ditto the expected output order */
order_len = 0;
{
ptrlen pl = order_pl;
while (pl.len) {
ptrlen chr = ptrlen_get_word(&pl, " ");
char *chrstr = mkstr(chr);
sgrowarray(order, order_size, order_len);
order[order_len++] = strtoul(chrstr, NULL, 0);
sfree(chrstr);
}
}
run_test(filename, lineno, bcs, bcs_len, order, order_len, override);
sfree(line);
}
sfree(bcs);
sfree(order);
}
void out_of_memory(void)
{
fprintf(stderr, "out of memory!\n");
exit(2);
}
static void usage(FILE *fp)
{
fprintf(fp, "\
usage: bidi_test ( ( --class | --char ) infile... )...\n\
e.g.: bidi_test --class BidiTest.txt --char BidiCharacterTest.txt\n\
also: --help display this text\n\
");
}
int main(int argc, char **argv)
{
void (*testfn)(const char *, FILE *) = NULL;
bool doing_opts = true;
const char *filename = NULL;
bool done_something = false;
ctx = bidi_new_context();
while (--argc > 0) {
const char *arg = *++argv;
if (doing_opts && arg[0] == '-' && arg[1]) {
if (!strcmp(arg, "--")) {
doing_opts = false;
} else if (!strcmp(arg, "--class")) {
testfn = class_test;
} else if (!strcmp(arg, "--char")) {
testfn = char_test;
} else if (!strcmp(arg, "--help")) {
usage(stdout);
return 0;
} else {
fprintf(stderr, "unrecognised option '%s'\n", arg);
return 1;
}
} else {
const char *filename = arg;
if (!strcmp(filename, "-")) {
testfn("<standard input>", stdin);
} else {
FILE *fp = fopen(filename, "r");
if (!fp) {
fprintf(stderr, "unable to open '%s'\n", filename);
return 1;
}
testfn(filename, fp);
fclose(fp);
}
done_something = true;
}
}
if (!done_something) {
usage(stderr);
return 1;
}
if (!filename)
filename = "-";
printf("pass %d fail %d total %d\n", pass, fail, pass + fail);
bidi_free_context(ctx);
return fail != 0;
}