1
0
mirror of https://git.tartarus.org/simon/putty.git synced 2025-01-25 01:02:24 +00:00

Test rig for the new bidi algorithm.

This standalone CLI program runs the UCD bidi tests in the form
provided in Unicode 14.0.0. You can run it by just saying

  bidi_test --class BidiTest.txt --char BidiCharacterTest.txt

assuming those two UCD files are in the current directory.
This commit is contained in:
Simon Tatham 2021-10-10 14:52:17 +01:00
parent b8be01adca
commit 93ba74579a
4 changed files with 390 additions and 0 deletions

View File

@ -86,6 +86,10 @@ add_executable(bidi_gettype
terminal/bidi_gettype.c) terminal/bidi_gettype.c)
target_link_libraries(bidi_gettype guiterminal utils ${platform_libraries}) target_link_libraries(bidi_gettype guiterminal utils ${platform_libraries})
add_executable(bidi_test
terminal/bidi_test.c)
target_link_libraries(bidi_test guiterminal utils ${platform_libraries})
add_executable(plink add_executable(plink
${platform}/plink.c ${platform}/plink.c
be_all_s.c) be_all_s.c)

View File

@ -3589,6 +3589,14 @@ void do_bidi_new(BidiContext *ctx, bidi_char *text, size_t textlen)
reverse_sequences(ctx); reverse_sequences(ctx);
} }
size_t do_bidi_test(BidiContext *ctx, bidi_char *text, size_t textlen,
int override)
{
ctx->paragraphOverride = (override > 0 ? L : override < 0 ? R : ON);
do_bidi_new(ctx, text, textlen);
return ctx->textlen;
}
void do_bidi(BidiContext *ctx, bidi_char *text, size_t textlen) void do_bidi(BidiContext *ctx, bidi_char *text, size_t textlen)
{ {
#ifdef REMOVE_FORMATTING_CHARACTERS #ifdef REMOVE_FORMATTING_CHARACTERS

View File

@ -131,4 +131,17 @@ static inline bool typeIsETOrBN(BidiType t)
return ((1<<ET) | (1<<BN)) & (1 << t); return ((1<<ET) | (1<<BN)) & (1 << t);
} }
/*
* More featureful interface to the bidi code, for use in bidi_test.c.
* It returns a potentially different value of textlen (in case we're
* compiling in REMOVE_FORMATTING_CHARACTERS mode), and also permits
* you to pass in an override to the paragraph direction (because many
* of the UCD conformance tests use one).
*
* 'override' is 0 for no override, +1 for left-to-right, -1 for
* right-to-left.
*/
size_t do_bidi_test(BidiContext *ctx, bidi_char *text, size_t textlen,
int override);
#endif /* PUTTY_BIDI_H */ #endif /* PUTTY_BIDI_H */

365
terminal/bidi_test.c Normal file
View File

@ -0,0 +1,365 @@
/*
* Test program that reads the Unicode bidi algorithm test case lists
* that form part of the Unicode Character Database:
*
* https://www.unicode.org/Public/UCD/latest/ucd/BidiTest.txt
* https://www.unicode.org/Public/UCD/latest/ucd/BidiCharacterTest.txt
*/
#include <ctype.h>
#include "putty.h"
#include "misc.h"
#include "bidi.h"
static int pass = 0, fail = 0;
static BidiContext *ctx;
static const char *extract_word(char **ptr)
{
char *p = *ptr;
while (*p && isspace((unsigned char)*p)) p++;
char *start = p;
while (*p && !isspace((unsigned char)*p)) p++;
if (*p) {
*p++ = '\0';
while (*p && isspace((unsigned char)*p)) p++;
}
*ptr = p;
return start;
}
#define TYPETONAME(X) #X,
static const char *const typenames[] = { BIDI_CHAR_TYPE_LIST(TYPETONAME) };
#undef TYPETONAME
static void run_test(const char *filename, unsigned lineno,
bidi_char *bcs, size_t bcs_len,
const unsigned *order, size_t order_len,
int override)
{
size_t bcs_orig_len = bcs_len;
bidi_char *bcs_orig = snewn(bcs_orig_len, bidi_char);
memcpy(bcs_orig, bcs, bcs_orig_len * sizeof(bidi_char));
bcs_len = do_bidi_test(ctx, bcs, bcs_len, override);
/*
* TR9 revision 44 rule X9 says we remove explicit embedding
* controls and BN characters. So the test cases don't list them
* in the expected outputs. Do the same to our own output - unless
* we're testing the standard version of the algorithm, in which
* case, we expect the output to be exactly as the test cases say.
*/
unsigned *our_order = snewn(bcs_len, unsigned);
size_t our_order_len = 0;
for (size_t i = 0; i < bcs_len; i++) {
BidiType t = bidi_getType(bcs[i].wc);
#ifndef REMOVE_FORMATTING_CHARS
if (typeIsRemovedDuringProcessing(t))
continue;
#endif
our_order[our_order_len++] = bcs[i].index;
}
bool ok = false;
if (our_order_len == order_len) {
ok = true;
for (size_t i = 0; i < our_order_len; i++)
if (our_order[i] != order[i])
ok = false;
}
if (ok) {
pass++;
} else {
fail++;
printf("%s:%u: failed order\n", filename, lineno);
printf(" input chars:");
for (size_t i = 0; i < bcs_orig_len; i++)
printf(" %04x", bcs_orig[i].wc);
printf("\n");
printf(" classes: ");
for (size_t i = 0; i < bcs_orig_len; i++)
printf(" %-4s", typenames[bidi_getType(bcs_orig[i].wc)]);
printf("\n");
printf(" para level = %s\n",
override > 0 ? "LTR" : override < 0 ? "RTL" : "auto");
printf(" expected:");
for (size_t i = 0; i < order_len; i++)
printf(" %u", order[i]);
printf("\n");
printf(" got: ");
for (size_t i = 0; i < our_order_len; i++)
printf(" %u", our_order[i]);
printf("\n");
}
/* Put the original data back so we can re-test with another override */
memcpy(bcs, bcs_orig, bcs_orig_len * sizeof(bidi_char));
sfree(bcs_orig);
sfree(our_order);
}
static void class_test(const char *filename, FILE *fp)
{
unsigned lineno = 0;
size_t bcs_size = 0, bcs_len = 0;
bidi_char *bcs = NULL;
size_t order_size = 0, order_len = 0;
unsigned *order = NULL;
/* Preliminary: find a representative character of every bidi
* type. Prefer positive-width ones if available. */
unsigned representatives[N_BIDI_TYPES];
for (size_t i = 0; i < N_BIDI_TYPES; i++)
representatives[i] = 0;
for (unsigned uc = 1; uc < 0x110000; uc++) {
unsigned type = bidi_getType(uc);
if (!representatives[type] ||
(mk_wcwidth(representatives[type]) <= 0 && mk_wcwidth(uc) > 0))
representatives[type] = uc;
}
while (true) {
lineno++;
char *line = chomp(fgetline(fp));
if (!line)
break;
/* Skip blank lines and comments */
if (!line[0] || line[0] == '#') {
sfree(line);
continue;
}
/* Parse @Reorder lines, which tell us the expected output
* order for all following test cases (until superseded) */
if (strstartswith(line, "@Reorder:")) {
char *p = line;
extract_word(&p); /* eat the "@Reorder:" header itself */
order_len = 0;
while (1) {
const char *word = extract_word(&p);
if (!*word)
break;
sgrowarray(order, order_size, order_len);
order[order_len++] = strtoul(word, NULL, 0);
}
sfree(line);
continue;
}
/* Skip @Levels lines, which we don't (yet?) do anything with */
if (strstartswith(line, "@Levels:")) {
sfree(line);
continue;
}
/* Everything remaining should be an actual test */
char *semicolon = strchr(line, ';');
if (!semicolon) {
printf("%s:%u: bad test line': no bitmap\n", filename, lineno);
sfree(line);
continue;
}
*semicolon++ = '\0';
unsigned bitmask = strtoul(semicolon, NULL, 0);
char *p = line;
bcs_len = 0;
bool test_ok = true;
while (1) {
const char *word = extract_word(&p);
if (!*word)
break;
unsigned type;
for (type = 0; type < N_BIDI_TYPES; type++)
if (!strcmp(word, typenames[type]))
break;
if (type == N_BIDI_TYPES) {
printf("%s:%u: bad test line: bad bidi type '%s'\n",
filename, lineno, word);
test_ok = false;
break;
}
sgrowarray(bcs, bcs_size, bcs_len);
bcs[bcs_len].wc = representatives[type];
bcs[bcs_len].origwc = bcs[bcs_len].wc;
bcs[bcs_len].index = bcs_len;
bcs[bcs_len].nchars = 1;
bcs_len++;
}
if (!test_ok) {
sfree(line);
continue;
}
if (bitmask & 1)
run_test(filename, lineno, bcs, bcs_len, order, order_len, 0);
if (bitmask & 2)
run_test(filename, lineno, bcs, bcs_len, order, order_len, +1);
if (bitmask & 4)
run_test(filename, lineno, bcs, bcs_len, order, order_len, -1);
sfree(line);
}
sfree(bcs);
sfree(order);
}
static void char_test(const char *filename, FILE *fp)
{
unsigned lineno = 0;
size_t bcs_size = 0, bcs_len = 0;
bidi_char *bcs = NULL;
size_t order_size = 0, order_len = 0;
unsigned *order = NULL;
while (true) {
lineno++;
char *line = chomp(fgetline(fp));
if (!line)
break;
/* Skip blank lines and comments */
if (!line[0] || line[0] == '#') {
sfree(line);
continue;
}
/* Break each test line up into its main fields */
ptrlen input_pl, para_dir_pl, para_level_pl, levels_pl, order_pl;
{
ptrlen pl = ptrlen_from_asciz(line);
input_pl = ptrlen_get_word(&pl, ";");
para_dir_pl = ptrlen_get_word(&pl, ";");
para_level_pl = ptrlen_get_word(&pl, ";");
levels_pl = ptrlen_get_word(&pl, ";");
order_pl = ptrlen_get_word(&pl, ";");
}
int override;
{
char *para_dir_str = mkstr(para_dir_pl);
unsigned para_dir = strtoul(para_dir_str, NULL, 0);
sfree(para_dir_str);
override = (para_dir == 0 ? +1 : para_dir == 1 ? -1 : 0);
}
/* Break up the input into Unicode characters */
bcs_len = 0;
{
ptrlen pl = input_pl;
while (pl.len) {
ptrlen chr = ptrlen_get_word(&pl, " ");
char *chrstr = mkstr(chr);
sgrowarray(bcs, bcs_size, bcs_len);
bcs[bcs_len].wc = strtoul(chrstr, NULL, 16);
bcs[bcs_len].origwc = bcs[bcs_len].wc;
bcs[bcs_len].index = bcs_len;
bcs[bcs_len].nchars = 1;
bcs_len++;
sfree(chrstr);
}
}
/* Ditto the expected output order */
order_len = 0;
{
ptrlen pl = order_pl;
while (pl.len) {
ptrlen chr = ptrlen_get_word(&pl, " ");
char *chrstr = mkstr(chr);
sgrowarray(order, order_size, order_len);
order[order_len++] = strtoul(chrstr, NULL, 0);
sfree(chrstr);
}
}
run_test(filename, lineno, bcs, bcs_len, order, order_len, override);
sfree(line);
}
sfree(bcs);
sfree(order);
}
void out_of_memory(void)
{
fprintf(stderr, "out of memory!\n");
exit(2);
}
static void usage(FILE *fp)
{
fprintf(fp, "\
usage: bidi_test ( ( --class | --char ) infile... )...\n\
e.g.: bidi_test --class BidiTest.txt --char BidiCharacterTest.txt\n\
also: --help display this text\n\
");
}
int main(int argc, char **argv)
{
void (*testfn)(const char *, FILE *) = NULL;
bool doing_opts = true;
const char *filename = NULL;
bool done_something = false;
ctx = bidi_new_context();
while (--argc > 0) {
const char *arg = *++argv;
if (doing_opts && arg[0] == '-' && arg[1]) {
if (!strcmp(arg, "--")) {
doing_opts = false;
} else if (!strcmp(arg, "--class")) {
testfn = class_test;
} else if (!strcmp(arg, "--char")) {
testfn = char_test;
} else if (!strcmp(arg, "--help")) {
usage(stdout);
return 0;
} else {
fprintf(stderr, "unrecognised option '%s'\n", arg);
return 1;
}
} else {
const char *filename = arg;
if (!strcmp(filename, "-")) {
testfn("<standard input>", stdin);
} else {
FILE *fp = fopen(filename, "r");
if (!fp) {
fprintf(stderr, "unable to open '%s'\n", filename);
return 1;
}
testfn(filename, fp);
fclose(fp);
}
done_something = true;
}
}
if (!done_something) {
usage(stderr);
return 1;
}
if (!filename)
filename = "-";
printf("pass %d fail %d total %d\n", pass, fail, pass + fail);
bidi_free_context(ctx);
return fail != 0;
}