diff --git a/.gitignore b/.gitignore
index ec0f7bf6..6c56a061 100644
--- a/.gitignore
+++ b/.gitignore
@@ -43,6 +43,7 @@
 /unix/Pterm.app
 /fuzzterm
 /testcrypt
+/testsc
 /testzlib
 /cgtest
 /*.DSA
diff --git a/Recipe b/Recipe
index 2ce432c1..84953f87 100644
--- a/Recipe
+++ b/Recipe
@@ -263,8 +263,8 @@ SSHCRYPTO = ARITH sshmd5 sshsha sshsh256 sshsh512
 	 + sshrsa sshdss sshecc
          + sshdes sshblowf sshaes sshccp ssharcf
          + sshdh sshcrc sshcrcda sshauxcrypt
-         + sshhmac sshprng
-SSHCOMMON = sshcommon sshrand SSHCRYPTO
+         + sshhmac
+SSHCOMMON = sshcommon sshprng sshrand SSHCRYPTO
          + sshverstring
          + sshpubk sshzlib
          + sshmac marshal nullplug
@@ -389,10 +389,12 @@ osxlaunch : [UT] osxlaunch
 
 fuzzterm : [UT] UXTERM CHARSET MISC version uxmisc uxucs fuzzterm time settings
 	 + uxstore be_none uxnogtk memory
-testcrypt : [UT] testcrypt SSHCRYPTO sshprime marshal utils memory tree234
-          + uxutils
-testcrypt : [C] testcrypt SSHCRYPTO sshprime marshal utils memory tree234
-          + winmiscs
+testcrypt : [UT] testcrypt SSHCRYPTO sshprng sshprime marshal utils
+          + memory tree234 uxutils
+testcrypt : [C] testcrypt SSHCRYPTO sshprng sshprime marshal utils
+          + memory tree234 winmiscs
+testsc    : [UT] testsc SSHCRYPTO marshal utils memory tree234 wildcard
+          + sshmac uxutils
 testzlib : [UT] testzlib sshzlib memory
 
 uppity   : [UT] uxserver SSHSERVER UXMISC uxsignal uxnoise uxgss uxnogtk
diff --git a/test/sclog/.gitignore b/test/sclog/.gitignore
new file mode 100644
index 00000000..75512ebb
--- /dev/null
+++ b/test/sclog/.gitignore
@@ -0,0 +1,7 @@
+/.ninja_*
+/CMakeFiles
+/CMakeCache.txt
+/cmake_install.cmake
+/*.ninja
+/*.ldscript
+/libsclog.so
diff --git a/test/sclog/CMakeLists.txt b/test/sclog/CMakeLists.txt
new file mode 100644
index 00000000..6ac0ddfa
--- /dev/null
+++ b/test/sclog/CMakeLists.txt
@@ -0,0 +1,16 @@
+# CMake script for the 'sclog' DynamoRIO instrumentation system that
+# goes with the PuTTY test binary 'testsc'. For build instructions see
+# the comment at the top of testsc.c.
+
+cmake_minimum_required(VERSION 3.5)
+
+find_package(DynamoRIO)
+if (NOT DynamoRIO_FOUND)
+  message(FATAL_ERROR "DynamoRIO not found")
+endif()
+
+add_library(sclog SHARED sclog.c)
+configure_DynamoRIO_client(sclog)
+foreach(extension drmgr drsyms drreg drutil drwrap)
+  use_DynamoRIO_extension(sclog ${extension})
+endforeach()
diff --git a/test/sclog/sclog.c b/test/sclog/sclog.c
new file mode 100644
index 00000000..3512fade
--- /dev/null
+++ b/test/sclog/sclog.c
@@ -0,0 +1,592 @@
+/*
+ * sclog: the DynamoRIO instrumentation system that goes with the
+ * PuTTY test binary 'testsc'.
+ *
+ * For general discussion and build instructions, see the comment at
+ * the top of testsc.c.
+ */
+
+#include <inttypes.h>
+#include <string.h>
+
+#include "dr_api.h"
+#include "drmgr.h"
+#include "drsyms.h"
+#include "drreg.h"
+#include "drutil.h"
+#include "drwrap.h"
+
+/*
+ * The file we're currently logging to, if any.
+ */
+static file_t outfile = INVALID_FILE;
+
+/*
+ * A counter which we can increment and decrement around any library
+ * function we don't want to log the details of what happens inside.
+ * Mainly this is for memory allocation functions, which will diverge
+ * control depending on the progress of their search for something
+ * they can allocate.
+ */
+size_t logging_paused = 0;
+
+/*
+ * This log message appears at the start of whatever DynamoRIO
+ * considers a 'basic block', i.e. a sequence of instructions with no
+ * branches. Logging these is cheaper than logging every single
+ * instruction, and should still be adequate to detect any divergence
+ * of control flow.
+ */
+static void log_pc(const char *loc)
+{
+    if (outfile == INVALID_FILE || logging_paused)
+        return;
+    dr_fprintf(outfile, "%s: start basic block\n", loc);
+}
+
+/*
+ * Hardware division instructions are unlikely to run in time
+ * independent of the data, so we log both their parameters.
+ */
+static void log_div(uint n, uint d, const char *loc)
+{
+    if (outfile == INVALID_FILE || logging_paused)
+        return;
+    dr_fprintf(outfile, "%s: divide %"PRIuMAX" / %"PRIuMAX"\n",
+               loc, (uintmax_t)n, (uintmax_t)d);
+}
+
+/*
+ * Register-controlled shift instructions are not reliably one cycle
+ * long on all platforms, so we log the shift couhnt.
+ */
+static void log_var_shift(uint sh, const char *loc)
+{
+    if (outfile == INVALID_FILE || logging_paused)
+        return;
+    dr_fprintf(outfile, "%s: var shift by %"PRIuMAX"\n", loc, (uintmax_t)sh);
+}
+
+/*
+ * We need to log memory accesses, so as to detect data-dependent
+ * changes in the access pattern (e.g. incautious use of a lookup
+ * table). But one thing we _can't_ control for perfectly is that in
+ * two successive runs of the same crypto primitive, malloc may be
+ * called, and may return different addresses - which of course is not
+ * dependent on the data (unless the size of the allocated block
+ * does).
+ *
+ * So we track all the memory allocations that happen during logging,
+ * and any addresses accessed within those blocks are logged as
+ * something along the lines of 'n bytes from the start of the mth
+ * allocation'.
+ *
+ * Allocations that happened before a given log file was opened are
+ * not tracked. The program under test will ensure that any of those
+ * used by the primitive are at the same address in all runs anyway.
+ */
+struct allocation {
+    /*
+     * We store the list of allocations in a linked list, so we can
+     * look them up by address, and delete them as they're freed.
+     *
+     * A balanced binary search tree would be faster, but this is
+     * easier to get right first time!
+     */
+    struct allocation *prev, *next;
+    uintptr_t start, size, index;
+};
+static struct allocation alloc_ends[1] = { alloc_ends, alloc_ends, 0, 0, 0 };
+static uintptr_t next_alloc_index = 0;
+
+static void free_allocation(struct allocation *alloc)
+{
+    alloc->next->prev = alloc->prev;
+    alloc->prev->next = alloc->next;
+    dr_global_free(alloc, sizeof(struct allocation));
+}
+
+/*
+ * Wrap the log_set_file() function in testsc.c, and respond to it by
+ * opening or closing log files.
+ */
+static void wrap_logsetfile(void *wrapctx, void **user_data)
+{
+    if (outfile) {
+        dr_close_file(outfile);
+        outfile = INVALID_FILE;
+    }
+
+    const char *outfilename = drwrap_get_arg(wrapctx, 0);
+    if (outfilename) {
+        outfile = dr_open_file(outfilename, DR_FILE_WRITE_OVERWRITE);
+        DR_ASSERT(outfile != INVALID_FILE);
+    }
+
+    /*
+     * Reset the allocation list to empty, whenever we open or close a
+     * log file.
+     */
+    while (alloc_ends->next != alloc_ends)
+        free_allocation(alloc_ends->next);
+    next_alloc_index = 0;
+}
+
+/*
+ * Wrap the dry_run() function in testsc.c, to tell it we're here.
+ */
+static void wrap_dryrun(void *wrapctx, void *user_data)
+{
+    drwrap_set_retval(wrapctx, (void *)0);
+}
+
+/*
+ * Look up the memory allocation record corresponding to an address.
+ */
+static struct allocation *find_allocation(const void *ptr)
+{
+    uintptr_t address = (uintptr_t)ptr;
+    for (struct allocation *alloc = alloc_ends->next;
+         alloc != alloc_ends; alloc = alloc->next) {
+        if (alloc && address - alloc->start < alloc->size)
+            return alloc;
+    }
+    return NULL;
+}
+
+/*
+ * Log a memory access.
+ */
+static void log_mem(app_pc addr, uint size, uint write, const char *loc)
+{
+    if (outfile == INVALID_FILE || logging_paused)
+        return;
+
+    struct allocation *alloc = find_allocation((const void *)addr);
+    if (!alloc) {
+        dr_fprintf(outfile, "%s: %s %"PRIuMAX" @ %"PRIxMAX"\n",
+                   loc, write ? "store" : "load", (uintmax_t)size,
+                   (uintmax_t)addr);
+    } else {
+        dr_fprintf(outfile, "%s: %s %"PRIuMAX" @ allocations[%"PRIuPTR"]"
+                   " + %"PRIxMAX"\n",
+                   loc, write ? "store" : "load", (uintmax_t)size,
+                   alloc->index, (uintmax_t)(addr - alloc->start));
+    }
+}
+
+/*
+ * Record the allocation of some memory. (Common code between malloc
+ * and realloc.)
+ */
+static void allocated(void *ptr, size_t size)
+{
+    if (outfile == INVALID_FILE)
+        return; /* no need to track allocations outside a logging interval */
+
+    struct allocation *alloc = dr_global_alloc(sizeof(struct allocation));
+    alloc->start = (uintptr_t)ptr;
+    alloc->size = size;
+    alloc->index = next_alloc_index++;
+    alloc->prev = alloc_ends->prev;
+    alloc->next = alloc_ends;
+    alloc->prev->next = alloc->next->prev = alloc;
+}
+
+/*
+ * Record that memory has been freed. Note that we may free something
+ * that was allocated when we weren't logging, so we must cope with
+ * find_allocation returning NULL.
+ */
+static void freed(void *ptr)
+{
+    struct allocation *alloc = find_allocation(ptr);
+    if (alloc)
+        free_allocation(alloc);
+}
+
+/*
+ * The actual wrapper functions for malloc, realloc and free.
+ */
+static void wrap_malloc_pre(void *wrapctx, void **user_data)
+{
+    logging_paused++;
+    *user_data = drwrap_get_arg(wrapctx, 0);
+}
+static void wrap_free_pre(void *wrapctx, void **user_data)
+{
+    logging_paused++;
+    void *ptr = drwrap_get_arg(wrapctx, 0);
+    freed(ptr);
+}
+static void wrap_realloc_pre(void *wrapctx, void **user_data)
+{
+    logging_paused++;
+    void *ptr = drwrap_get_arg(wrapctx, 0);
+    freed(ptr);
+    *user_data = drwrap_get_arg(wrapctx, 1);
+}
+static void wrap_alloc_post(void *wrapctx, void *user_data)
+{
+    void *ptr = drwrap_get_retval(wrapctx);
+    if (!ptr)
+        return;
+    size_t size = (size_t)user_data;
+    allocated(ptr, size);
+    logging_paused--;
+}
+
+/*
+ * We wrap the C library function memset, because I've noticed that at
+ * least one optimised implementation of it diverges control flow
+ * internally based on what appears to be the _alignment_ of the input
+ * pointer - and that alignment check can vary depending on the
+ * addresses of allocated blocks. So I can't guarantee no divergence
+ * of control flow inside memset if malloc doesn't return the same
+ * values, and instead I just have to trust that memset isn't reading
+ * the contents of the block and basing control flow decisions on that.
+ */
+static void wrap_memset_pre(void *wrapctx, void **user_data)
+{
+    uint was_already_paused = logging_paused++;
+
+    if (outfile == INVALID_FILE || was_already_paused)
+        return;
+
+    const void *addr = drwrap_get_arg(wrapctx, 0);
+    size_t size = (size_t)drwrap_get_arg(wrapctx, 2);
+
+    struct allocation *alloc = find_allocation(addr);
+    if (!alloc) {
+        dr_fprintf(outfile, "memset %"PRIuMAX" @ %"PRIxMAX"\n",
+                   (uintmax_t)size, (uintmax_t)addr);
+    } else {
+        dr_fprintf(outfile, "memset %"PRIuMAX" @ allocations[%"PRIuPTR"]"
+                   " + %"PRIxMAX"\n", (uintmax_t)size, alloc->index,
+                   (uintmax_t)(addr - alloc->start));
+    }
+}
+
+/*
+ * Common post-wrapper function for memset and free, whose entire
+ * function is to unpause the logging.
+ */
+static void unpause_post(void *wrapctx, void *user_data)
+{
+    logging_paused--;
+}
+
+/*
+ * Make a string representation of the address of an instruction,
+ * including a function name and/or a file+line combination if
+ * possible. These will be logged alongside every act of interest
+ * where we can make one.
+ */
+static void instr_format_location(instr_t *instr, char **outloc)
+{
+    app_pc addr = (app_pc)instr_get_app_pc(instr);
+    char location[2048], symbol[512], fileline[1024];
+    bool got_sym = false, got_line = false;
+
+    if (*outloc)
+        return;
+
+    symbol[0] = '\0';
+    fileline[0] = '\0';
+
+    module_data_t *data = dr_lookup_module(addr);
+    if (data) {
+        drsym_info_t sym;
+        char file[MAXIMUM_PATH];
+
+        sym.struct_size = sizeof(sym);
+        sym.name = symbol;
+        sym.name_size = sizeof(symbol);
+        sym.file = file;
+        sym.file_size = sizeof(file);
+
+        drsym_error_t status = drsym_lookup_address(
+            data->full_path, addr - data->start, &sym, DRSYM_DEFAULT_FLAGS);
+
+        got_line = (status == DRSYM_SUCCESS);
+        got_sym = got_line || status == DRSYM_ERROR_LINE_NOT_AVAILABLE;
+
+        if (got_line)
+            snprintf(fileline, sizeof(fileline), " = %s:%"PRIu64,
+                     file, (uint64_t)sym.line);
+    }
+
+    snprintf(location, sizeof(location),
+             "%"PRIx64"%s%s%s",
+             (uint64_t)addr, got_sym ? " = " : "", got_sym ? symbol : "",
+             fileline);
+    size_t len = strlen(location) + 1;
+    char *loc = dr_global_alloc(len);
+    memcpy(loc, location, len);
+    *outloc = loc;
+}
+
+/*
+ * Function that tests a single operand of an instruction to see if
+ * it's a memory reference, and if so, adds a call to log_mem.
+ */
+static void try_mem_opnd(
+    void *drcontext, instrlist_t *bb, instr_t *instr, char **loc,
+    opnd_t opnd, bool write)
+{
+    if (!opnd_is_memory_reference(opnd))
+        return;
+
+    instr_format_location(instr, loc);
+
+    reg_id_t r0, r1;
+    drreg_status_t st;
+    st = drreg_reserve_register(drcontext, bb, instr, NULL, &r0);
+    DR_ASSERT(st == DRREG_SUCCESS);
+    st = drreg_reserve_register(drcontext, bb, instr, NULL, &r1);
+    DR_ASSERT(st == DRREG_SUCCESS);
+
+    bool ok = drutil_insert_get_mem_addr(drcontext, bb, instr, opnd, r0, r1);
+    DR_ASSERT(ok);
+
+    uint size = drutil_opnd_mem_size_in_bytes(opnd, instr);
+
+    dr_insert_clean_call(
+        drcontext, bb, instr, (void *)log_mem, false,
+        4, opnd_create_reg(r0), OPND_CREATE_INT32(size),
+        OPND_CREATE_INT32(write), OPND_CREATE_INTPTR(*loc));
+
+    st = drreg_unreserve_register(drcontext, bb, instr, r1);
+    DR_ASSERT(st == DRREG_SUCCESS);
+    st = drreg_unreserve_register(drcontext, bb, instr, r0);
+    DR_ASSERT(st == DRREG_SUCCESS);
+}
+
+/*
+ * The main function called to instrument each machine instruction.
+ */
+static dr_emit_flags_t instrument_instr(
+    void *drcontext, void *tag, instrlist_t *bb, instr_t *instr,
+    bool for_trace, bool translating, void *user_data)
+{
+    char *loc = NULL;
+
+    /*
+     * If this instruction is the first in its basic block, call
+     * log_pc to record that we're executing this block at all.
+     */
+    if (drmgr_is_first_instr(drcontext, instr)) {
+        instr_format_location(instr, &loc);
+        dr_insert_clean_call(
+            drcontext, bb, instr, (void *)log_pc, false,
+            1, OPND_CREATE_INTPTR(loc));
+    }
+
+    /*
+     * If the instruction reads or writes memory, log its access.
+     */
+    if (instr_reads_memory(instr) || instr_writes_memory(instr)) {
+        for (int i = 0, limit = instr_num_srcs(instr); i < limit; i++)
+            try_mem_opnd(drcontext, bb, instr, &loc,
+                         instr_get_src(instr, i), false);
+        for (int i = 0, limit = instr_num_dsts(instr); i < limit; i++)
+            try_mem_opnd(drcontext, bb, instr, &loc,
+                         instr_get_dst(instr, i), false);
+    }
+
+    /*
+     * Now do opcode-specific checks.
+     */
+    int opcode = instr_get_opcode(instr);
+
+    switch (opcode) {
+      case OP_div:
+      case OP_idiv:
+        /*
+         * x86 hardware divisions. The operand order for DR's
+         * representation of these seem to be: 0 = denominator, 1 =
+         * numerator MSW, 2 = numerator LSW.
+         */
+        instr_format_location(instr, &loc);
+        dr_insert_clean_call(
+            drcontext, bb, instr, (void *)log_div, false,
+            3, instr_get_src(instr, 2), instr_get_src(instr, 0),
+            OPND_CREATE_INTPTR(loc));
+        break;
+      case OP_shl:
+      case OP_shr:
+      case OP_sar:
+      case OP_shlx:
+      case OP_shrx:
+      case OP_sarx:
+      case OP_rol:
+      case OP_ror:
+      case OP_rcl:
+      case OP_rcr:
+        /*
+         * Shift instructions. If they're register-controlled, log the
+         * shift count.
+         */
+        {
+            opnd_t shiftcount = instr_get_src(instr, 0);
+            if (!opnd_is_immed(shiftcount)) {
+                reg_id_t r0;
+                drreg_status_t st;
+                st = drreg_reserve_register(drcontext, bb, instr, NULL, &r0);
+                DR_ASSERT(st == DRREG_SUCCESS);
+                opnd_t op_r0 = opnd_create_reg(r0);
+                instrlist_preinsert(bb, instr, INSTR_CREATE_movzx(
+                                        drcontext, op_r0, shiftcount));
+                instr_format_location(instr, &loc);
+                dr_insert_clean_call(
+                    drcontext, bb, instr, (void *)log_var_shift, false,
+                    2, op_r0, OPND_CREATE_INTPTR(loc));
+                st = drreg_unreserve_register(drcontext, bb, instr, r0);
+                DR_ASSERT(st == DRREG_SUCCESS);
+            }
+        }
+        break;
+    }
+
+    return DR_EMIT_DEFAULT;
+}
+
+static void exit_event(void)
+{
+    if (outfile != INVALID_FILE) {
+        dr_fprintf(outfile, "exit while recording enabled\n");
+        dr_close_file(outfile);
+        outfile = INVALID_FILE;
+    }
+    drsym_exit();
+    drreg_exit();
+    drwrap_exit();
+    drutil_exit();
+    drmgr_exit();
+}
+
+/*
+ * We ask DR to expand any x86 string instructions like REP MOVSB, so
+ * that we can log all the individual memory accesses without getting
+ * confused.
+ */
+static dr_emit_flags_t expand_rep_movsb(
+    void *drcontext, void *tag, instrlist_t *bb, bool for_trace,
+    bool translating)
+{
+    bool ok = drutil_expand_rep_string(drcontext, bb);
+    DR_ASSERT(ok);
+    return DR_EMIT_DEFAULT;
+}
+
+typedef void (*prewrapper_t)(void *wrapctx, void **user_data);
+typedef void (*postwrapper_t)(void *wrapctx, void *user_data);
+
+/*
+ * Helper function for bulk use of drwrap.
+ */
+static void try_wrap_fn(const module_data_t *module, const char *name,
+                        prewrapper_t pre, postwrapper_t post, bool *done)
+{
+    if (*done)
+        return;
+
+    size_t offset;
+    drsym_error_t status = drsym_lookup_symbol(
+        module->full_path, name, &offset, DRSYM_DEFAULT_FLAGS);
+    if (status == DRSYM_SUCCESS) {
+        app_pc notify_fn = module->start + offset;
+        bool ok = drwrap_wrap(notify_fn, pre, post);
+        DR_ASSERT(ok);
+        *done = true;
+    }
+}
+
+/*
+ * When each module (e.g. shared library) is loaded, try to wrap all
+ * the functions we care about. For each one, we keep a static bool
+ * that will stop us trying again once we've found it the first time.
+ */
+static void load_module(
+    void *drcontext, const module_data_t *module, bool loaded)
+{
+
+#define TRY_WRAP(fn, pre, post) do                              \
+    {                                                           \
+        static bool done_this_one = false;                      \
+        try_wrap_fn(module, fn, pre, post, &done_this_one);    \
+    } while (0)
+
+    if (loaded) {
+        TRY_WRAP("log_to_file_real", wrap_logsetfile, NULL);
+        TRY_WRAP("dry_run_real", NULL, wrap_dryrun);
+        TRY_WRAP("malloc", wrap_malloc_pre, wrap_alloc_post);
+        TRY_WRAP("realloc", wrap_realloc_pre, wrap_alloc_post);
+        TRY_WRAP("free", wrap_free_pre, unpause_post);
+        TRY_WRAP("memset", wrap_memset_pre, unpause_post);
+
+        /*
+         * More strangely named versions of standard C library
+         * functions, which I've observed in practice to be where the
+         * calls end up. I think these are probably selected by
+         * STT_IFUNC in libc.so, so that the normally named version of
+         * the function is never reached at all.
+         *
+         * This list is not expected to be complete. If you re-run
+         * this test on a different platform and find control flow
+         * diverging inside some libc function that looks as if it's
+         * another name for malloc or memset or whatever, then you may
+         * need to add more aliases here to stop the test failing.
+         */
+        TRY_WRAP("__GI___libc_malloc", wrap_malloc_pre, wrap_alloc_post);
+        TRY_WRAP("__GI___libc_realloc", wrap_realloc_pre, wrap_alloc_post);
+        TRY_WRAP("__GI___libc_free", wrap_free_pre, unpause_post);
+        TRY_WRAP("__memset_sse2_unaligned", wrap_memset_pre, unpause_post);
+    }
+}
+
+/*
+ * Main entry point that sets up all the facilities we need.
+ */
+DR_EXPORT void dr_client_main(client_id_t id, int argc, const char **argv)
+{
+    dr_set_client_name(
+        "Time-sensitive activity logger for PuTTY crypto testing",
+        "https://www.chiark.greenend.org.uk/~sgtatham/putty/");
+
+    outfile = INVALID_FILE;
+
+    bool ok = drmgr_init();
+    DR_ASSERT(ok);
+
+    /*
+     * Run our main instrumentation pass with lower priority than
+     * drwrap, so that we don't start logging the inside of a function
+     * whose drwrap pre-wrapper would have wanted to disable logging.
+     */
+    drmgr_priority_t pri = {sizeof(pri), "sclog", NULL, NULL,
+                            DRMGR_PRIORITY_INSERT_DRWRAP+1};
+    ok = drmgr_register_bb_instrumentation_event(
+        NULL, instrument_instr, &pri);
+    DR_ASSERT(ok);
+
+    ok = drutil_init();
+    DR_ASSERT(ok);
+
+    ok = drwrap_init();
+    DR_ASSERT(ok);
+
+    drsym_error_t symstatus = drsym_init(0);
+    DR_ASSERT(symstatus == DRSYM_SUCCESS);
+
+    dr_register_exit_event(exit_event);
+
+    drreg_options_t ops = { sizeof(ops), 3, false };
+    drreg_status_t regstatus = drreg_init(&ops);
+    DR_ASSERT(regstatus == DRREG_SUCCESS);
+
+    drmgr_register_module_load_event(load_module);
+
+    ok = drmgr_register_bb_app2app_event(expand_rep_movsb, NULL);
+    DR_ASSERT(ok);
+}
diff --git a/testsc.c b/testsc.c
new file mode 100644
index 00000000..ab60d9ab
--- /dev/null
+++ b/testsc.c
@@ -0,0 +1,1558 @@
+/*
+ * testsc: run PuTTY's crypto primitives under instrumentation that
+ * checks for cache and timing side channels.
+ *
+ * The idea is: cryptographic code should avoid leaking secret data
+ * through timing information, or through traces of its activity left
+ * in the caches.
+ *
+ * (This property is sometimes called 'constant-time', although really
+ * that's a misnomer. It would be impossible to avoid the execution
+ * time varying for any number of reasons outside the code's control,
+ * such as the prior contents of caches and branch predictors,
+ * temperature-based CPU throttling, system load, etc. And in any case
+ * you don't _need_ the execution time to be literally constant: you
+ * just need it to be independent of your secrets. It can vary as much
+ * as it likes based on anything else.)
+ *
+ * To avoid this, you need to ensure that various aspects of the
+ * code's behaviour do not depend on the secret data. The control
+ * flow, for a start - no conditional branches based on secrets - and
+ * also the memory access pattern (no using secret data as an index
+ * into a lookup table). A couple of other kinds of CPU instruction
+ * also can't be trusted to run in constant time: we check for
+ * register-controlled shifts and hardware divisions. (But, again,
+ * it's perfectly fine to _use_ those instructions in the course of
+ * crypto code. You just can't use a secret as any time-affecting
+ * operand.)
+ *
+ * This test program works by running the same crypto primitive
+ * multiple times, with different secret input data. The relevant
+ * details of each run is logged to a file via the DynamoRIO-based
+ * instrumentation system living in the subdirectory test/sclog. Then
+ * we check over all the files and ensure they're identical.
+ *
+ * This program itself (testsc) is built by the ordinary PuTTY
+ * makefiles. But run by itself, it will do nothing useful: it needs
+ * to be run under DynamoRIO, with the sclog instrumentation library.
+ *
+ * Here's an example of how I built it:
+ *
+ * Download the DynamoRIO source. I did this by cloning
+ * https://github.com/DynamoRIO/dynamorio.git, and at the time of
+ * writing this, 259c182a75ce80112bcad329c97ada8d56ba854d was the head
+ * commit.
+ *
+ * In the DynamoRIO checkout:
+ *
+ *   mkdir build
+ *   cd build
+ *   cmake -G Ninja ..
+ *   ninja
+ *
+ * Now set the shell variable DRBUILD to be the location of the build
+ * directory you did that in. (Or not, if you prefer, but the example
+ * build commands below will assume that that's where the DynamoRIO
+ * libraries, headers and runtime can be found.)
+ *
+ * Then, in test/sclog:
+ *
+ *   cmake -G Ninja -DCMAKE_PREFIX_PATH=$DRBUILD/cmake .
+ *   ninja
+ *
+ * Finally, to run the actual test, set SCTMP to some temp directory
+ * you don't mind filling with large temp files (several GB at a
+ * time), and in the main PuTTY source directory (assuming that's
+ * where testsc has been built):
+ *
+ *   $DRBUILD/bin64/drrun -c test/sclog/libsclog.so -- ./testsc -O $SCTMP
+ */
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "defs.h"
+#include "putty.h"
+#include "ssh.h"
+#include "misc.h"
+#include "mpint.h"
+#include "ecc.h"
+
+static NORETURN void fatal_error(const char *p, ...)
+{
+    va_list ap;
+    fprintf(stderr, "testsc: ");
+    va_start(ap, p);
+    vfprintf(stderr, p, ap);
+    va_end(ap);
+    fputc('\n', stderr);
+    exit(1);
+}
+
+void out_of_memory(void) { fatal_error("out of memory"); }
+
+/*
+ * A simple deterministic PRNG, without any of the Fortuna
+ * complexities, for generating test inputs in a way that's repeatable
+ * between runs of the program, even if only a subset of test cases is
+ * run.
+ */
+static uint64_t random_counter = 0;
+static const char *random_seedstr = NULL;
+static uint8_t random_buf[MAX_HASH_LEN];
+static size_t random_buf_limit = 0;
+
+static void random_seed(const char *seedstr)
+{
+    random_seedstr = seedstr;
+    random_counter = 0;
+    random_buf_limit = 0;
+}
+
+void random_read(void *vbuf, size_t size)
+{
+    assert(random_seedstr);
+    uint8_t *buf = (uint8_t *)vbuf;
+    while (size-- > 0) {
+        if (random_buf_limit == 0) {
+            ssh_hash *h = ssh_hash_new(&ssh_sha256);
+            put_asciz(h, random_seedstr);
+            put_uint64(h, random_counter);
+            random_counter++;
+            random_buf_limit = ssh_hash_alg(h)->hlen;
+            ssh_hash_final(h, random_buf);
+        }
+        *buf++ = random_buf[random_buf_limit--];
+    }
+}
+
+/*
+ * Macro that defines a function, and also a volatile function pointer
+ * pointing to it. Callers indirect through the function pointer
+ * instead of directly calling the function, to ensure that the
+ * compiler doesn't try to get clever by eliminating the call
+ * completely, or inlining it.
+ *
+ * This is used to mark functions that DynamoRIO will look for to
+ * intercept, and also to inhibit inlining and unrolling where they'd
+ * cause a failure of experimental control in the main test.
+ */
+#define VOLATILE_WRAPPED_DEFN(qualifier, rettype, fn, params)   \
+    qualifier rettype fn##_real params;                         \
+    qualifier rettype (*volatile fn) params = fn##_real;        \
+    qualifier rettype fn##_real params
+
+VOLATILE_WRAPPED_DEFN(, void, log_to_file, (const char *filename))
+{
+    /*
+     * This function is intercepted by the DynamoRIO side of the
+     * mechanism. We use it to send instructions to the DR wrapper,
+     * namely, 'please start logging to this file' or 'please stop
+     * logging' (if filename == NULL). But we don't have to actually
+     * do anything in _this_ program - all the functionality is in the
+     * DR wrapper.
+     */
+}
+
+static const char *outdir = NULL;
+char *log_filename(const char *basename, size_t index)
+{
+    return dupprintf("%s/%s.%04zu", outdir, basename, index);
+}
+
+static char *last_filename;
+static const char *test_basename;
+static size_t test_index = 0;
+void log_start(void)
+{
+    last_filename = log_filename(test_basename, test_index++);
+    log_to_file(last_filename);
+}
+void log_end(void)
+{
+    log_to_file(NULL);
+    sfree(last_filename);
+}
+
+static bool test_skipped = false;
+
+VOLATILE_WRAPPED_DEFN(, intptr_t, dry_run, (void))
+{
+    /*
+     * This is another function intercepted by DynamoRIO. In this
+     * case, DR overrides this function to return 0 rather than 1, so
+     * we can use it as a check for whether we're running under
+     * instrumentation, or whether this is just a dry run which goes
+     * through the motions but doesn't expect to find any log files
+     * created.
+     */
+    return 1;
+}
+
+static void mp_random_bits_into(mp_int *r, size_t bits)
+{
+    mp_int *x = mp_random_bits(bits);
+    mp_copy_into(r, x);
+    mp_free(x);
+}
+
+static void mp_random_fill(mp_int *r)
+{
+    mp_random_bits_into(r, mp_max_bits(r));
+}
+
+VOLATILE_WRAPPED_DEFN(static, size_t, looplimit, (size_t x))
+{
+    /*
+     * looplimit() is the identity function on size_t, but the
+     * compiler isn't allowed to rely on it being that. I use it to
+     * make loops in the test functions look less attractive to
+     * compilers' unrolling heuristics.
+     */
+    return x;
+}
+
+/* Ciphers that we expect to pass this test. Blowfish and Arcfour are
+ * intentionally omitted, because we already know they don't. */
+#define CIPHERS(X, Y)                           \
+    X(Y, ssh_3des_ssh1)                         \
+    X(Y, ssh_3des_ssh2_ctr)                     \
+    X(Y, ssh_3des_ssh2)                         \
+    X(Y, ssh_des)                               \
+    X(Y, ssh_des_sshcom_ssh2)                   \
+    X(Y, ssh_aes256_sdctr)                      \
+    X(Y, ssh_aes256_sdctr_hw)                   \
+    X(Y, ssh_aes256_sdctr_sw)                   \
+    X(Y, ssh_aes256_cbc)                        \
+    X(Y, ssh_aes256_cbc_hw)                     \
+    X(Y, ssh_aes256_cbc_sw)                     \
+    X(Y, ssh_aes192_sdctr)                      \
+    X(Y, ssh_aes192_sdctr_hw)                   \
+    X(Y, ssh_aes192_sdctr_sw)                   \
+    X(Y, ssh_aes192_cbc)                        \
+    X(Y, ssh_aes192_cbc_hw)                     \
+    X(Y, ssh_aes192_cbc_sw)                     \
+    X(Y, ssh_aes128_sdctr)                      \
+    X(Y, ssh_aes128_sdctr_hw)                   \
+    X(Y, ssh_aes128_sdctr_sw)                   \
+    X(Y, ssh_aes128_cbc)                        \
+    X(Y, ssh_aes128_cbc_hw)                     \
+    X(Y, ssh_aes128_cbc_sw)                     \
+    X(Y, ssh2_chacha20_poly1305)                \
+    /* end of list */
+
+#define CIPHER_TESTLIST(X, name) X(cipher_ ## name)
+
+#define MACS(X, Y)                              \
+    X(Y, ssh_hmac_md5)                          \
+    X(Y, ssh_hmac_sha1)                         \
+    X(Y, ssh_hmac_sha1_buggy)                   \
+    X(Y, ssh_hmac_sha1_96)                      \
+    X(Y, ssh_hmac_sha1_96_buggy)                \
+    X(Y, ssh_hmac_sha256)                       \
+    /* end of list */
+
+#define MAC_TESTLIST(X, name) X(mac_ ## name)
+
+#define HASHES(X, Y)                            \
+    X(Y, ssh_md5)                               \
+    X(Y, ssh_sha1)                              \
+    X(Y, ssh_sha1_hw)                           \
+    X(Y, ssh_sha1_sw)                           \
+    X(Y, ssh_sha256)                            \
+    X(Y, ssh_sha256_hw)                         \
+    X(Y, ssh_sha256_sw)                         \
+    X(Y, ssh_sha384)                            \
+    X(Y, ssh_sha512)                            \
+    /* end of list */
+
+#define HASH_TESTLIST(X, name) X(hash_ ## name)
+
+#define TESTLIST(X)                             \
+    X(mp_get_nbits)                             \
+    X(mp_from_decimal)                          \
+    X(mp_from_hex)                              \
+    X(mp_get_decimal)                           \
+    X(mp_get_hex)                               \
+    X(mp_cmp_hs)                                \
+    X(mp_cmp_eq)                                \
+    X(mp_min)                                   \
+    X(mp_max)                                   \
+    X(mp_select_into)                           \
+    X(mp_cond_swap)                             \
+    X(mp_cond_clear)                            \
+    X(mp_add)                                   \
+    X(mp_sub)                                   \
+    X(mp_mul)                                   \
+    X(mp_rshift_safe)                           \
+    X(mp_divmod)                                \
+    X(mp_modadd)                                \
+    X(mp_modsub)                                \
+    X(mp_modmul)                                \
+    X(mp_modpow)                                \
+    X(mp_invert_mod_2to)                        \
+    X(mp_invert)                                \
+    X(mp_modsqrt)                               \
+    X(ecc_weierstrass_add)                      \
+    X(ecc_weierstrass_double)                   \
+    X(ecc_weierstrass_add_general)              \
+    X(ecc_weierstrass_multiply)                 \
+    X(ecc_weierstrass_is_identity)              \
+    X(ecc_weierstrass_get_affine)               \
+    X(ecc_weierstrass_decompress)               \
+    X(ecc_montgomery_diff_add)                  \
+    X(ecc_montgomery_double)                    \
+    X(ecc_montgomery_multiply)                  \
+    X(ecc_montgomery_get_affine)                \
+    X(ecc_edwards_add)                          \
+    X(ecc_edwards_multiply)                     \
+    X(ecc_edwards_eq)                           \
+    X(ecc_edwards_get_affine)                   \
+    X(ecc_edwards_decompress)                   \
+    CIPHERS(CIPHER_TESTLIST, X)                 \
+    MACS(MAC_TESTLIST, X)                       \
+    HASHES(HASH_TESTLIST, X)                    \
+    /* end of list */
+
+static void test_mp_get_nbits(void)
+{
+    mp_int *z = mp_new(512);
+    static const size_t bitposns[] = {
+        0, 1, 5, 16, 23, 32, 67, 123, 234, 511
+    };
+    mp_int *prev = mp_from_integer(0);
+    for (size_t i = 0; i < looplimit(lenof(bitposns)); i++) {
+        mp_int *x = mp_power_2(bitposns[i]);
+        mp_add_into(z, x, prev);
+        mp_free(prev);
+        prev = x;
+        log_start();
+        mp_get_nbits(z);
+        log_end();
+    }
+    mp_free(prev);
+}
+
+static void test_mp_from_decimal(void)
+{
+    char dec[64];
+    static const size_t starts[] = { 0, 1, 5, 16, 23, 32, 63, 64 };
+    for (size_t i = 0; i < looplimit(lenof(starts)); i++) {
+        memset(dec, '0', lenof(dec));
+        for (size_t j = starts[i]; j < lenof(dec); j++) {
+            uint8_t r[4];
+            random_read(r, 4);
+            dec[j] = '0' + GET_32BIT_MSB_FIRST(r) % 10;
+        }
+        log_start();
+        mp_int *x = mp_from_decimal_pl(make_ptrlen(dec, lenof(dec)));
+        log_end();
+        mp_free(x);
+    }
+}
+
+static void test_mp_from_hex(void)
+{
+    char hex[64];
+    static const size_t starts[] = { 0, 1, 5, 16, 23, 32, 63, 64 };
+    static const char digits[] = "0123456789abcdefABCDEF";
+    for (size_t i = 0; i < looplimit(lenof(starts)); i++) {
+        memset(hex, '0', lenof(hex));
+        for (size_t j = starts[i]; j < lenof(hex); j++) {
+            uint8_t r[4];
+            random_read(r, 4);
+            hex[j] = digits[GET_32BIT_MSB_FIRST(r) % lenof(digits)];
+        }
+        log_start();
+        mp_int *x = mp_from_hex_pl(make_ptrlen(hex, lenof(hex)));
+        log_end();
+        mp_free(x);
+    }
+}
+
+static void test_mp_string_format(char *(*mp_format)(mp_int *x))
+{
+    mp_int *z = mp_new(512);
+    static const size_t bitposns[] = {
+        0, 1, 5, 16, 23, 32, 67, 123, 234, 511
+    };
+    for (size_t i = 0; i < looplimit(lenof(bitposns)); i++) {
+        mp_random_bits_into(z, bitposns[i]);
+        log_start();
+        char *formatted = mp_format(z);
+        log_end();
+        sfree(formatted);
+    }
+}
+
+static void test_mp_get_decimal(void)
+{
+    test_mp_string_format(mp_get_decimal);
+}
+
+static void test_mp_get_hex(void)
+{
+    test_mp_string_format(mp_get_hex);
+}
+
+static void test_mp_cmp(unsigned (*mp_cmp)(mp_int *a, mp_int *b))
+{
+    mp_int *a = mp_new(512), *b = mp_new(512);
+    static const size_t bitposns[] = {
+        0, 1, 5, 16, 23, 32, 67, 123, 234, 511
+    };
+    for (size_t i = 0; i < looplimit(lenof(bitposns)); i++) {
+        mp_random_fill(b);
+        mp_int *x = mp_random_bits(bitposns[i]);
+        mp_xor_into(a, b, x);
+        mp_free(x);
+        log_start();
+        mp_cmp(a, b);
+        log_end();
+    }
+    mp_free(a);
+    mp_free(b);
+}
+
+static void test_mp_cmp_hs(void)
+{
+    test_mp_cmp(mp_cmp_hs);
+}
+
+static void test_mp_cmp_eq(void)
+{
+    test_mp_cmp(mp_cmp_eq);
+}
+
+static void test_mp_minmax(
+    void (*mp_minmax_into)(mp_int *r, mp_int *x, mp_int *y))
+{
+    mp_int *a = mp_new(256), *b = mp_new(256);
+    for (size_t i = 0; i < looplimit(10); i++) {
+        uint8_t lens[2];
+        random_read(lens, 2);
+        mp_int *x = mp_random_bits(lens[0]);
+        mp_copy_into(a, x);
+        mp_free(x);
+        mp_int *y = mp_random_bits(lens[1]);
+        mp_copy_into(a, y);
+        mp_free(y);
+        log_start();
+        mp_minmax_into(a, a, b);
+        log_end();
+    }
+    mp_free(a);
+    mp_free(b);
+}
+
+static void test_mp_max(void)
+{
+    test_mp_minmax(mp_max_into);
+}
+
+static void test_mp_min(void)
+{
+    test_mp_minmax(mp_min_into);
+}
+
+static void test_mp_select_into(void)
+{
+    mp_int *a = mp_random_bits(256);
+    mp_int *b = mp_random_bits(512);
+    mp_int *r = mp_new(384);
+    for (size_t i = 0; i < looplimit(16); i++) {
+        log_start();
+        mp_select_into(r, a, b, i & 1);
+        log_end();
+    }
+    mp_free(a);
+    mp_free(b);
+    mp_free(r);
+}
+
+static void test_mp_cond_swap(void)
+{
+    mp_int *a = mp_random_bits(512);
+    mp_int *b = mp_random_bits(512);
+    for (size_t i = 0; i < looplimit(16); i++) {
+        log_start();
+        mp_cond_swap(a, b, i & 1);
+        log_end();
+    }
+    mp_free(a);
+    mp_free(b);
+}
+
+static void test_mp_cond_clear(void)
+{
+    mp_int *a = mp_random_bits(512);
+    mp_int *x = mp_copy(a);
+    for (size_t i = 0; i < looplimit(16); i++) {
+        mp_copy_into(x, a);
+        log_start();
+        mp_cond_clear(a, i & 1);
+        log_end();
+    }
+    mp_free(a);
+    mp_free(x);
+}
+
+static void test_mp_arithmetic(mp_int *(*mp_arith)(mp_int *x, mp_int *y))
+{
+    mp_int *a = mp_new(256), *b = mp_new(512);
+    for (size_t i = 0; i < looplimit(16); i++) {
+        mp_random_fill(a);
+        mp_random_fill(b);
+        log_start();
+        mp_int *r = mp_arith(a, b);
+        log_end();
+        mp_free(r);
+    }
+    mp_free(a);
+    mp_free(b);
+}
+
+static void test_mp_add(void)
+{
+    test_mp_arithmetic(mp_add);
+}
+
+static void test_mp_sub(void)
+{
+    test_mp_arithmetic(mp_sub);
+}
+
+static void test_mp_mul(void)
+{
+    test_mp_arithmetic(mp_mul);
+}
+
+static void test_mp_invert(void)
+{
+    test_mp_arithmetic(mp_invert);
+}
+
+static void test_mp_rshift_safe(void)
+{
+    mp_int *x = mp_random_bits(256);
+
+    for (size_t i = 0; i < looplimit(mp_max_bits(x)+1); i++) {
+        log_start();
+        mp_int *r = mp_rshift_safe(x, i);
+        log_end();
+        mp_free(r);
+    }
+
+    mp_free(x);
+}
+
+static void test_mp_divmod(void)
+{
+    mp_int *n = mp_new(256), *d = mp_new(256);
+    mp_int *q = mp_new(256), *r = mp_new(256);
+
+    for (size_t i = 0; i < looplimit(32); i++) {
+        uint8_t sizes[2];
+        random_read(sizes, 2);
+        mp_random_bits_into(n, sizes[0]);
+        mp_random_bits_into(d, sizes[1]);
+        log_start();
+        mp_divmod_into(n, d, q, r);
+        log_end();
+    }
+
+    mp_free(n);
+    mp_free(d);
+    mp_free(q);
+    mp_free(r);
+}
+
+static void test_mp_modarith(
+    mp_int *(*mp_modarith)(mp_int *x, mp_int *y, mp_int *modulus))
+{
+    mp_int *base = mp_new(256);
+    mp_int *exponent = mp_new(256);
+    mp_int *modulus = mp_new(256);
+
+    for (size_t i = 0; i < looplimit(8); i++) {
+        mp_random_fill(base);
+        mp_random_fill(exponent);
+        mp_random_fill(modulus);
+        mp_set_bit(modulus, 0, 1);    /* we only support odd moduli */
+
+        log_start();
+        mp_int *out = mp_modarith(base, exponent, modulus);
+        log_end();
+
+        mp_free(out);
+    }
+}
+
+static void test_mp_modadd(void)
+{
+    test_mp_modarith(mp_modadd);
+}
+
+static void test_mp_modsub(void)
+{
+    test_mp_modarith(mp_modsub);
+}
+
+static void test_mp_modmul(void)
+{
+    test_mp_modarith(mp_modmul);
+}
+
+static void test_mp_modpow(void)
+{
+    test_mp_modarith(mp_modpow);
+}
+
+static void test_mp_invert_mod_2to(void)
+{
+    mp_int *x = mp_new(512);
+
+    for (size_t i = 0; i < looplimit(32); i++) {
+        mp_random_fill(x);
+        mp_set_bit(x, 0, 1);           /* input should be odd */
+
+        log_start();
+        mp_int *out = mp_invert_mod_2to(x, 511);
+        log_end();
+
+        mp_free(out);
+    }
+}
+
+static void test_mp_modsqrt(void)
+{
+    /* The prime isn't secret in this function (and in any case
+     * finding a non-square on the fly would be prohibitively
+     * annoying), so I hardcode a fixed one, selected to have a lot of
+     * factors of two in p-1 so as to exercise lots of choices in the
+     * algorithm. */
+    mp_int *p =
+        MP_LITERAL(0xb56a517b206a88c73cfa9ec6f704c7030d18212cace82401);
+    mp_int *nonsquare = MP_LITERAL(0x5);
+    size_t bits = mp_max_bits(p);
+    ModsqrtContext *sc = modsqrt_new(p, nonsquare);
+    mp_free(p);
+    mp_free(nonsquare);
+
+    mp_int *x = mp_new(bits);
+    unsigned success;
+
+    /* Do one initial call to cause the lazily initialised sub-context
+     * to be set up. This will take a while, but it can't be helped. */
+    mp_modsqrt(sc, x, &success);
+
+    for (size_t i = 0; i < looplimit(8); i++) {
+        mp_random_bits_into(x, bits - 1);
+        log_start();
+        mp_int *out = mp_modsqrt(sc, x, &success);
+        log_end();
+        mp_free(out);
+    }
+
+    mp_free(x);
+}
+
+static WeierstrassCurve *wcurve(void)
+{
+    mp_int *p = MP_LITERAL(0xc19337603dc856acf31e01375a696fdf5451);
+    mp_int *a = MP_LITERAL(0x864946f50eecca4cde7abad4865e34be8f67);
+    mp_int *b = MP_LITERAL(0x6a5bf56db3a03ba91cfbf3241916c90feeca);
+    mp_int *nonsquare = mp_from_integer(3);
+    WeierstrassCurve *wc = ecc_weierstrass_curve(p, a, b, nonsquare);
+    mp_free(p);
+    mp_free(a);
+    mp_free(b);
+    mp_free(nonsquare);
+    return wc;
+}
+
+static WeierstrassPoint *wpoint(WeierstrassCurve *wc, size_t index)
+{
+    mp_int *x = NULL, *y = NULL;
+    WeierstrassPoint *wp;
+    switch (index) {
+      case 0:
+        break;
+      case 1:
+        x = MP_LITERAL(0x12345);
+        y = MP_LITERAL(0x3c2c799a365b53d003ef37dab65860bf80ae);
+        break;
+      case 2:
+        x = MP_LITERAL(0x4e1c77e3c00f7c3b15869e6a4e5f86b3ee53);
+        y = MP_LITERAL(0x5bde01693130591400b5c9d257d8325a44a5);
+        break;
+      case 3:
+        x = MP_LITERAL(0xb5f0e722b2f0f7e729f55ba9f15511e3b399);
+        y = MP_LITERAL(0x033d636b855c931cfe679f0b18db164a0d64);
+        break;
+      case 4:
+        x = MP_LITERAL(0xb5f0e722b2f0f7e729f55ba9f15511e3b399);
+        y = MP_LITERAL(0xbe55d3f4b86bc38ff4b6622c418e599546ed);
+        break;
+      default:
+        unreachable("only 5 example Weierstrass points defined");
+    }
+    if (x && y) {
+        wp = ecc_weierstrass_point_new(wc, x, y);
+    } else {
+        wp = ecc_weierstrass_point_new_identity(wc);
+    }
+    if (x)
+        mp_free(x);
+    if (y)
+        mp_free(y);
+    return wp;
+}
+
+static void test_ecc_weierstrass_add(void)
+{
+    WeierstrassCurve *wc = wcurve();
+    WeierstrassPoint *a = ecc_weierstrass_point_new_identity(wc);
+    WeierstrassPoint *b = ecc_weierstrass_point_new_identity(wc);
+    for (size_t i = 0; i < looplimit(5); i++) {
+        for (size_t j = 0; j < looplimit(5); j++) {
+            if (i == 0 || j == 0 || i == j ||
+                (i==3 && j==4) || (i==4 && j==3))
+                continue;              /* difficult cases */
+
+            WeierstrassPoint *A = wpoint(wc, i), *B = wpoint(wc, j);
+            ecc_weierstrass_point_copy_into(a, A);
+            ecc_weierstrass_point_copy_into(b, B);
+            ecc_weierstrass_point_free(A);
+            ecc_weierstrass_point_free(B);
+
+            log_start();
+            WeierstrassPoint *r = ecc_weierstrass_add(a, b);
+            log_end();
+            ecc_weierstrass_point_free(r);
+        }
+    }
+    ecc_weierstrass_point_free(a);
+    ecc_weierstrass_point_free(b);
+    ecc_weierstrass_curve_free(wc);
+}
+
+static void test_ecc_weierstrass_double(void)
+{
+    WeierstrassCurve *wc = wcurve();
+    WeierstrassPoint *a = ecc_weierstrass_point_new_identity(wc);
+    for (size_t i = 0; i < looplimit(5); i++) {
+        WeierstrassPoint *A = wpoint(wc, i);
+        ecc_weierstrass_point_copy_into(a, A);
+        ecc_weierstrass_point_free(A);
+
+        log_start();
+        WeierstrassPoint *r = ecc_weierstrass_double(a);
+        log_end();
+        ecc_weierstrass_point_free(r);
+    }
+    ecc_weierstrass_point_free(a);
+    ecc_weierstrass_curve_free(wc);
+}
+
+static void test_ecc_weierstrass_add_general(void)
+{
+    WeierstrassCurve *wc = wcurve();
+    WeierstrassPoint *a = ecc_weierstrass_point_new_identity(wc);
+    WeierstrassPoint *b = ecc_weierstrass_point_new_identity(wc);
+    for (size_t i = 0; i < looplimit(5); i++) {
+        for (size_t j = 0; j < looplimit(5); j++) {
+            WeierstrassPoint *A = wpoint(wc, i), *B = wpoint(wc, j);
+            ecc_weierstrass_point_copy_into(a, A);
+            ecc_weierstrass_point_copy_into(b, B);
+            ecc_weierstrass_point_free(A);
+            ecc_weierstrass_point_free(B);
+
+            log_start();
+            WeierstrassPoint *r = ecc_weierstrass_add_general(a, b);
+            log_end();
+            ecc_weierstrass_point_free(r);
+        }
+    }
+    ecc_weierstrass_point_free(a);
+    ecc_weierstrass_point_free(b);
+    ecc_weierstrass_curve_free(wc);
+}
+
+static void test_ecc_weierstrass_multiply(void)
+{
+    WeierstrassCurve *wc = wcurve();
+    WeierstrassPoint *a = ecc_weierstrass_point_new_identity(wc);
+    mp_int *exponent = mp_new(56);
+    for (size_t i = 1; i < looplimit(5); i++) {
+        WeierstrassPoint *A = wpoint(wc, i);
+        ecc_weierstrass_point_copy_into(a, A);
+        ecc_weierstrass_point_free(A);
+        mp_random_fill(exponent);
+
+        log_start();
+        WeierstrassPoint *r = ecc_weierstrass_multiply(a, exponent);
+        log_end();
+
+        ecc_weierstrass_point_free(r);
+    }
+    ecc_weierstrass_point_free(a);
+    ecc_weierstrass_curve_free(wc);
+}
+
+static void test_ecc_weierstrass_is_identity(void)
+{
+    WeierstrassCurve *wc = wcurve();
+    WeierstrassPoint *a = ecc_weierstrass_point_new_identity(wc);
+    for (size_t i = 1; i < looplimit(5); i++) {
+        WeierstrassPoint *A = wpoint(wc, i);
+        ecc_weierstrass_point_copy_into(a, A);
+        ecc_weierstrass_point_free(A);
+
+        log_start();
+        ecc_weierstrass_is_identity(a);
+        log_end();
+    }
+    ecc_weierstrass_point_free(a);
+    ecc_weierstrass_curve_free(wc);
+}
+
+static void test_ecc_weierstrass_get_affine(void)
+{
+    WeierstrassCurve *wc = wcurve();
+    WeierstrassPoint *r = ecc_weierstrass_point_new_identity(wc);
+    for (size_t i = 0; i < looplimit(4); i++) {
+        WeierstrassPoint *A = wpoint(wc, i), *B = wpoint(wc, i+1);
+        WeierstrassPoint *R = ecc_weierstrass_add_general(A, B);
+        ecc_weierstrass_point_copy_into(r, R);
+        ecc_weierstrass_point_free(A);
+        ecc_weierstrass_point_free(B);
+        ecc_weierstrass_point_free(R);
+
+        log_start();
+        mp_int *x, *y;
+        ecc_weierstrass_get_affine(r, &x, &y);
+        log_end();
+        mp_free(x);
+        mp_free(y);
+    }
+    ecc_weierstrass_point_free(r);
+    ecc_weierstrass_curve_free(wc);
+}
+
+static void test_ecc_weierstrass_decompress(void)
+{
+    WeierstrassCurve *wc = wcurve();
+
+    /* As in the mp_modsqrt test, prime the lazy initialisation of the
+     * ModsqrtContext */
+    mp_int *x = mp_new(144);
+    WeierstrassPoint *a = ecc_weierstrass_point_new_from_x(wc, x, 0);
+    if (a)                 /* don't care whether this one succeeded */
+        ecc_weierstrass_point_free(a);
+
+    for (size_t p = 0; p < looplimit(2); p++) {
+        for (size_t i = 1; i < looplimit(5); i++) {
+            WeierstrassPoint *A = wpoint(wc, i);
+            mp_int *X;
+            ecc_weierstrass_get_affine(A, &X, NULL);
+            mp_copy_into(x, X);
+            mp_free(X);
+            ecc_weierstrass_point_free(A);
+
+            log_start();
+            WeierstrassPoint *a = ecc_weierstrass_point_new_from_x(wc, x, p);
+            log_end();
+
+            ecc_weierstrass_point_free(a);
+        }
+    }
+    mp_free(x);
+    ecc_weierstrass_curve_free(wc);
+}
+
+static MontgomeryCurve *mcurve(void)
+{
+    mp_int *p = MP_LITERAL(0xde978eb1db35236a5792e9f0c04d86000659);
+    mp_int *a = MP_LITERAL(0x799b62a612b1b30e1c23cea6d67b2e33c51a);
+    mp_int *b = MP_LITERAL(0x944bf9042b56821a8c9e0b49b636c2502b2b);
+    MontgomeryCurve *mc = ecc_montgomery_curve(p, a, b);
+    mp_free(p);
+    mp_free(a);
+    mp_free(b);
+    return mc;
+}
+
+static MontgomeryPoint *mpoint(MontgomeryCurve *wc, size_t index)
+{
+    mp_int *x = NULL;
+    MontgomeryPoint *mp;
+    switch (index) {
+      case 0:
+        x = MP_LITERAL(31415);
+        break;
+      case 1:
+        x = MP_LITERAL(0x4d352c654c06eecfe19104118857b38398e8);
+        break;
+      case 2:
+        x = MP_LITERAL(0x03fca2a73983bc3434caae3134599cd69cce);
+        break;
+      case 3:
+        x = MP_LITERAL(0xa0fd735ce9b3406498b5f035ee655bda4e15);
+        break;
+      case 4:
+        x = MP_LITERAL(0x7c7f46a00cc286dbe47db39b6d8f5efd920e);
+        break;
+      case 5:
+        x = MP_LITERAL(0x07a6dc30d3b320448e6f8999be417e6b7c6b);
+        break;
+      case 6:
+        x = MP_LITERAL(0x7832da5fc16dfbd358170b2b96896cd3cd06);
+        break;
+      default:
+        unreachable("only 7 example Weierstrass points defined");
+    }
+    mp = ecc_montgomery_point_new(wc, x);
+    mp_free(x);
+    return mp;
+}
+
+static void test_ecc_montgomery_diff_add(void)
+{
+    MontgomeryCurve *wc = mcurve();
+    MontgomeryPoint *a = NULL, *b = NULL, *c = NULL;
+    for (size_t i = 0; i < looplimit(5); i++) {
+        MontgomeryPoint *A = mpoint(wc, i);
+        MontgomeryPoint *B = mpoint(wc, i);
+        MontgomeryPoint *C = mpoint(wc, i);
+        if (!a) {
+            a = A;
+            b = B;
+            c = C;
+        } else {
+            ecc_montgomery_point_copy_into(a, A);
+            ecc_montgomery_point_copy_into(b, B);
+            ecc_montgomery_point_copy_into(c, C);
+            ecc_montgomery_point_free(A);
+            ecc_montgomery_point_free(B);
+            ecc_montgomery_point_free(C);
+        }
+
+        log_start();
+        MontgomeryPoint *r = ecc_montgomery_diff_add(b, c, a);
+        log_end();
+
+        ecc_montgomery_point_free(r);
+    }
+    ecc_montgomery_point_free(a);
+    ecc_montgomery_point_free(b);
+    ecc_montgomery_point_free(c);
+    ecc_montgomery_curve_free(wc);
+}
+
+static void test_ecc_montgomery_double(void)
+{
+    MontgomeryCurve *wc = mcurve();
+    MontgomeryPoint *a = NULL;
+    for (size_t i = 0; i < looplimit(7); i++) {
+        MontgomeryPoint *A = mpoint(wc, i);
+        if (!a) {
+            a = A;
+        } else {
+            ecc_montgomery_point_copy_into(a, A);
+            ecc_montgomery_point_free(A);
+        }
+
+        log_start();
+        MontgomeryPoint *r = ecc_montgomery_double(a);
+        log_end();
+
+        ecc_montgomery_point_free(r);
+    }
+    ecc_montgomery_point_free(a);
+    ecc_montgomery_curve_free(wc);
+}
+
+static void test_ecc_montgomery_multiply(void)
+{
+    MontgomeryCurve *wc = mcurve();
+    MontgomeryPoint *a = NULL;
+    mp_int *exponent = mp_new(56);
+    for (size_t i = 0; i < looplimit(7); i++) {
+        MontgomeryPoint *A = mpoint(wc, i);
+        if (!a) {
+            a = A;
+        } else {
+            ecc_montgomery_point_copy_into(a, A);
+            ecc_montgomery_point_free(A);
+        }
+        mp_random_fill(exponent);
+
+        log_start();
+        MontgomeryPoint *r = ecc_montgomery_multiply(a, exponent);
+        log_end();
+
+        ecc_montgomery_point_free(r);
+    }
+    ecc_montgomery_point_free(a);
+    ecc_montgomery_curve_free(wc);
+}
+
+static void test_ecc_montgomery_get_affine(void)
+{
+    MontgomeryCurve *wc = mcurve();
+    MontgomeryPoint *r = NULL;
+    for (size_t i = 0; i < looplimit(5); i++) {
+        MontgomeryPoint *A = mpoint(wc, i);
+        MontgomeryPoint *B = mpoint(wc, i);
+        MontgomeryPoint *C = mpoint(wc, i);
+        MontgomeryPoint *R = ecc_montgomery_diff_add(B, C, A);
+        ecc_montgomery_point_free(A);
+        ecc_montgomery_point_free(B);
+        ecc_montgomery_point_free(C);
+        if (!r) {
+            r = R;
+        } else {
+            ecc_montgomery_point_copy_into(r, R);
+            ecc_montgomery_point_free(R);
+        }
+
+        log_start();
+        mp_int *x;
+        ecc_montgomery_get_affine(r, &x);
+        log_end();
+
+        mp_free(x);
+    }
+    ecc_montgomery_point_free(r);
+    ecc_montgomery_curve_free(wc);
+}
+
+static EdwardsCurve *ecurve(void)
+{
+    mp_int *p = MP_LITERAL(0xfce2dac1704095de0b5c48876c45063cd475);
+    mp_int *d = MP_LITERAL(0xbd4f77401c3b14ae1742a7d1d367adac8f3e);
+    mp_int *a = MP_LITERAL(0x51d0845da3fa871aaac4341adea53b861919);
+    mp_int *nonsquare = mp_from_integer(2);
+    EdwardsCurve *ec = ecc_edwards_curve(p, d, a, nonsquare);
+    mp_free(p);
+    mp_free(d);
+    mp_free(a);
+    mp_free(nonsquare);
+    return ec;
+}
+
+static EdwardsPoint *epoint(EdwardsCurve *wc, size_t index)
+{
+    mp_int *x, *y;
+    EdwardsPoint *ep;
+    switch (index) {
+      case 0:
+        x = MP_LITERAL(0x0);
+        y = MP_LITERAL(0x1);
+        break;
+      case 1:
+        x = MP_LITERAL(0x3d8aef0294a67c1c7e8e185d987716250d7c);
+        y = MP_LITERAL(0x27184);
+        break;
+      case 2:
+        x = MP_LITERAL(0xf44ed5b8a6debfd3ab24b7874cd2589fd672);
+        y = MP_LITERAL(0xd635d8d15d367881c8a3af472c8fe487bf40);
+        break;
+      case 3:
+        x = MP_LITERAL(0xde114ecc8b944684415ef81126a07269cd30);
+        y = MP_LITERAL(0xbe0fd45ff67ebba047ed0ec5a85d22e688a1);
+        break;
+      case 4:
+        x = MP_LITERAL(0x76bd2f90898d271b492c9c20dd7bbfe39fe5);
+        y = MP_LITERAL(0xbf1c82698b4a5a12c1057631c1ebdc216ae2);
+        break;
+      default:
+        unreachable("only 5 example Edwards points defined");
+    }
+    ep = ecc_edwards_point_new(wc, x, y);
+    mp_free(x);
+    mp_free(y);
+    return ep;
+}
+
+static void test_ecc_edwards_add(void)
+{
+    EdwardsCurve *ec = ecurve();
+    EdwardsPoint *a = NULL, *b = NULL;
+    for (size_t i = 0; i < looplimit(5); i++) {
+        for (size_t j = 0; j < looplimit(5); j++) {
+            EdwardsPoint *A = epoint(ec, i), *B = epoint(ec, j);
+            if (!a) {
+                a = A;
+                b = B;
+            } else {
+                ecc_edwards_point_copy_into(a, A);
+                ecc_edwards_point_copy_into(b, B);
+                ecc_edwards_point_free(A);
+                ecc_edwards_point_free(B);
+            }
+
+            log_start();
+            EdwardsPoint *r = ecc_edwards_add(a, b);
+            log_end();
+
+            ecc_edwards_point_free(r);
+        }
+    }
+    ecc_edwards_point_free(a);
+    ecc_edwards_point_free(b);
+    ecc_edwards_curve_free(ec);
+}
+
+static void test_ecc_edwards_multiply(void)
+{
+    EdwardsCurve *ec = ecurve();
+    EdwardsPoint *a = NULL;
+    mp_int *exponent = mp_new(56);
+    for (size_t i = 1; i < looplimit(5); i++) {
+        EdwardsPoint *A = epoint(ec, i);
+        if (!a) {
+            a = A;
+        } else {
+            ecc_edwards_point_copy_into(a, A);
+            ecc_edwards_point_free(A);
+        }
+        mp_random_fill(exponent);
+
+        log_start();
+        EdwardsPoint *r = ecc_edwards_multiply(a, exponent);
+        log_end();
+
+        ecc_edwards_point_free(r);
+    }
+    ecc_edwards_point_free(a);
+    ecc_edwards_curve_free(ec);
+}
+
+static void test_ecc_edwards_eq(void)
+{
+    EdwardsCurve *ec = ecurve();
+    EdwardsPoint *a = NULL, *b = NULL;
+    for (size_t i = 0; i < looplimit(5); i++) {
+        for (size_t j = 0; j < looplimit(5); j++) {
+            EdwardsPoint *A = epoint(ec, i), *B = epoint(ec, j);
+            if (!a) {
+                a = A;
+                b = B;
+            } else {
+                ecc_edwards_point_copy_into(a, A);
+                ecc_edwards_point_copy_into(b, B);
+                ecc_edwards_point_free(A);
+                ecc_edwards_point_free(B);
+            }
+
+            log_start();
+            ecc_edwards_eq(a, b);
+            log_end();
+        }
+    }
+    ecc_edwards_point_free(a);
+    ecc_edwards_point_free(b);
+    ecc_edwards_curve_free(ec);
+}
+
+static void test_ecc_edwards_get_affine(void)
+{
+    EdwardsCurve *ec = ecurve();
+    EdwardsPoint *r = NULL;
+    for (size_t i = 0; i < looplimit(4); i++) {
+        EdwardsPoint *A = epoint(ec, i), *B = epoint(ec, i+1);
+        EdwardsPoint *R = ecc_edwards_add(A, B);
+        ecc_edwards_point_free(A);
+        ecc_edwards_point_free(B);
+        if (!r) {
+            r = R;
+        } else {
+            ecc_edwards_point_copy_into(r, R);
+            ecc_edwards_point_free(R);
+        }
+
+        log_start();
+        mp_int *x, *y;
+        ecc_edwards_get_affine(r, &x, &y);
+        log_end();
+
+        mp_free(x);
+        mp_free(y);
+    }
+    ecc_edwards_point_free(r);
+    ecc_edwards_curve_free(ec);
+}
+
+static void test_ecc_edwards_decompress(void)
+{
+    EdwardsCurve *ec = ecurve();
+
+    /* As in the mp_modsqrt test, prime the lazy initialisation of the
+     * ModsqrtContext */
+    mp_int *y = mp_new(144);
+    EdwardsPoint *a = ecc_edwards_point_new_from_y(ec, y, 0);
+    if (a)                 /* don't care whether this one succeeded */
+        ecc_edwards_point_free(a);
+
+    for (size_t p = 0; p < looplimit(2); p++) {
+        for (size_t i = 0; i < looplimit(5); i++) {
+            EdwardsPoint *A = epoint(ec, i);
+            mp_int *Y;
+            ecc_edwards_get_affine(A, NULL, &Y);
+            mp_copy_into(y, Y);
+            mp_free(Y);
+            ecc_edwards_point_free(A);
+
+            log_start();
+            EdwardsPoint *a = ecc_edwards_point_new_from_y(ec, y, p);
+            log_end();
+
+            ecc_edwards_point_free(a);
+        }
+    }
+    mp_free(y);
+    ecc_edwards_curve_free(ec);
+}
+
+static void test_cipher(const ssh_cipheralg *calg)
+{
+    ssh_cipher *c = ssh_cipher_new(calg);
+    if (!c) {
+        test_skipped = true;
+        return;
+    }
+    const ssh2_macalg *malg = calg->required_mac;
+    ssh2_mac *m = NULL;
+    if (malg) {
+        m = ssh2_mac_new(malg, c);
+        if (!m) {
+            ssh_cipher_free(c);
+            test_skipped = true;
+            return;
+        }
+    }
+
+    uint8_t *ckey = snewn(calg->padded_keybytes, uint8_t);
+    uint8_t *civ = snewn(calg->blksize, uint8_t);
+    uint8_t *mkey = malg ? snewn(malg->keylen, uint8_t) : NULL;
+    size_t datalen = calg->blksize * 8;
+    size_t maclen = malg ? malg->len : 0;
+    uint8_t *data = snewn(datalen + maclen, uint8_t);
+    size_t lenlen = 4;
+    uint8_t *lendata = snewn(lenlen, uint8_t);
+
+    for (size_t i = 0; i < looplimit(16); i++) {
+        random_read(ckey, calg->padded_keybytes);
+        if (malg)
+            random_read(mkey, malg->keylen);
+        random_read(data, datalen);
+        random_read(lendata, lenlen);
+        if (i == 0) {
+            /* Ensure one of our test IVs will cause SDCTR wraparound */
+            memset(civ, 0xFF, calg->blksize);
+        } else {
+            random_read(civ, calg->blksize);
+        }
+        uint8_t seqbuf[4];
+        random_read(seqbuf, 4);
+        uint32_t seq = GET_32BIT_MSB_FIRST(seqbuf);
+
+        log_start();
+        ssh_cipher_setkey(c, ckey);
+        ssh_cipher_setiv(c, civ);
+        if (m)
+            ssh2_mac_setkey(m, make_ptrlen(mkey, malg->keylen));
+        if (calg->flags & SSH_CIPHER_SEPARATE_LENGTH)
+            ssh_cipher_encrypt_length(c, data, datalen, seq);
+        ssh_cipher_encrypt(c, data, datalen);
+        if (m) {
+            ssh2_mac_generate(m, data, datalen, seq);
+            ssh2_mac_verify(m, data, datalen, seq);
+        }
+        if (calg->flags & SSH_CIPHER_SEPARATE_LENGTH)
+            ssh_cipher_decrypt_length(c, data, datalen, seq);
+        ssh_cipher_decrypt(c, data, datalen);
+        log_end();
+    }
+
+    sfree(ckey);
+    sfree(civ);
+    sfree(mkey);
+    sfree(data);
+    sfree(lendata);
+    if (m)
+        ssh2_mac_free(m);
+    ssh_cipher_free(c);
+}
+
+#define CIPHER_TESTFN(Y_unused, cipher)                                 \
+    static void test_cipher_##cipher(void) { test_cipher(&cipher); }
+CIPHERS(CIPHER_TESTFN, Y_unused)
+
+static void test_mac(const ssh2_macalg *malg)
+{
+    ssh2_mac *m = ssh2_mac_new(malg, NULL);
+    if (!m) {
+        test_skipped = true;
+        return;
+    }
+
+    uint8_t *mkey = malg ? snewn(malg->keylen, uint8_t) : NULL;
+    size_t datalen = 256;
+    size_t maclen = malg ? malg->len : 0;
+    uint8_t *data = snewn(datalen + maclen, uint8_t);
+
+    /* Preliminarily key the MAC, to avoid the divergence of control
+     * flow in which hmac_key() avoids some free()s the first time
+     * through */
+    random_read(mkey, malg->keylen);
+    ssh2_mac_setkey(m, make_ptrlen(mkey, malg->keylen));
+
+    for (size_t i = 0; i < looplimit(16); i++) {
+        random_read(mkey, malg->keylen);
+        random_read(data, datalen);
+        uint8_t seqbuf[4];
+        random_read(seqbuf, 4);
+        uint32_t seq = GET_32BIT_MSB_FIRST(seqbuf);
+
+        log_start();
+        ssh2_mac_setkey(m, make_ptrlen(mkey, malg->keylen));
+        ssh2_mac_generate(m, data, datalen, seq);
+        ssh2_mac_verify(m, data, datalen, seq);
+        log_end();
+    }
+
+    sfree(mkey);
+    sfree(data);
+    ssh2_mac_free(m);
+}
+
+#define MAC_TESTFN(Y_unused, mac)                                 \
+    static void test_mac_##mac(void) { test_mac(&mac); }
+MACS(MAC_TESTFN, Y_unused)
+
+static void test_hash(const ssh_hashalg *halg)
+{
+    ssh_hash *h = ssh_hash_new(halg);
+    if (!h) {
+        test_skipped = true;
+        return;
+    }
+
+    size_t datalen = 256;
+    uint8_t *data = snewn(datalen, uint8_t);
+    uint8_t *hash = snewn(halg->hlen, uint8_t);
+
+    for (size_t i = 0; i < looplimit(16); i++) {
+        random_read(data, datalen);
+
+        log_start();
+        put_data(h, data, datalen);
+        ssh_hash_final(h, hash);
+        log_end();
+
+        h = ssh_hash_new(halg);
+    }
+
+    sfree(data);
+    sfree(hash);
+    ssh_hash_free(h);
+}
+
+#define HASH_TESTFN(Y_unused, hash)                             \
+    static void test_hash_##hash(void) { test_hash(&hash); }
+HASHES(HASH_TESTFN, Y_unused)
+
+struct test {
+    const char *testname;
+    void (*testfn)(void);
+};
+
+static const struct test tests[] = {
+#define STRUCT_TEST(X) { #X, test_##X },
+TESTLIST(STRUCT_TEST)
+#undef STRUCT_TEST
+};
+
+int main(int argc, char **argv)
+{
+    bool doing_opts = true;
+    const char *pname = argv[0];
+    uint8_t tests_to_run[lenof(tests)];
+    bool keep_outfiles = false;
+    bool test_names_given = false;
+
+    memset(tests_to_run, 1, sizeof(tests_to_run));
+
+    while (--argc > 0) {
+        char *p = *++argv;
+
+        if (p[0] == '-' && doing_opts) {
+            if (!strcmp(p, "-O")) {
+                if (--argc <= 0) {
+                    fprintf(stderr, "'-O' expects a directory name\n");
+                    return 1;
+                }
+                outdir = *++argv;
+            } else if (!strcmp(p, "-k") || !strcmp(p, "--keep")) {
+                keep_outfiles = true;
+            } else if (!strcmp(p, "--")) {
+                doing_opts = false;
+            } else if (!strcmp(p, "--help")) {
+                printf("  usage: drrun -c test/sclog/libsclog.so -- "
+                       "%s -O <outdir>\n", pname);
+                printf("options: -O <outdir>           "
+                       "put log files in the specified directory\n");
+                printf("         -k, --keep            "
+                       "do not delete log files for tests that passed\n");
+                printf("   also: --help                "
+                       "display this text\n");
+                return 0;
+            } else {
+                fprintf(stderr, "unknown command line option '%s'\n", p);
+                return 1;
+            }
+        } else {
+            if (!test_names_given) {
+                test_names_given = true;
+                memset(tests_to_run, 0, sizeof(tests_to_run));
+            }
+            bool found_one = false;
+            for (size_t i = 0; i < lenof(tests); i++) {
+                if (wc_match(p, tests[i].testname)) {
+                    tests_to_run[i] = 1;
+                    found_one = true;
+                }
+            }
+            if (!found_one) {
+                fprintf(stderr, "no test name matched '%s'\n", p);
+                return 1;
+            }
+        }
+    }
+
+    bool is_dry_run = dry_run();
+
+    if (is_dry_run) {
+        printf("Dry run (DynamoRIO instrumentation not detected)\n");
+    } else {
+        if (!outdir) {
+            fprintf(stderr, "expected -O <outdir> option\n");
+            return 1;
+        }
+        printf("Will write log files to %s\n", outdir);
+    }
+
+    size_t nrun = 0, npass = 0;
+
+    for (size_t i = 0; i < lenof(tests); i++) {
+        bool keep_these_outfiles = true;
+
+        if (!tests_to_run[i])
+            continue;
+        const struct test *test = &tests[i];
+        printf("Running test %s ... ", test->testname);
+        fflush(stdout);
+
+        test_skipped = false;
+        random_seed(test->testname);
+        test_basename = test->testname;
+        test_index = 0;
+
+        test->testfn();
+
+        if (test_skipped) {
+            /* Used for e.g. tests of hardware-accelerated crypto when
+             * the hardware acceleration isn't available */
+            printf("skipped\n");
+            continue;
+        }
+
+        nrun++;
+
+        if (is_dry_run) {
+            printf("dry run done\n");
+            continue;                  /* test files won't exist anyway */
+        }
+
+        if (test_index < 2) {
+            printf("FAIL: test did not generate multiple output files\n");
+            goto test_done;
+        }
+
+        char *firstfile = log_filename(test_basename, 0);
+        FILE *firstfp = fopen(firstfile, "rb");
+        if (!firstfp) {
+            printf("ERR: %s: open: %s\n", firstfile, strerror(errno));
+            goto test_done;
+        }
+        for (size_t i = 1; i < test_index; i++) {
+            char *nextfile = log_filename(test_basename, i);
+            FILE *nextfp = fopen(nextfile, "rb");
+            if (!nextfp) {
+                printf("ERR: %s: open: %s\n", nextfile, strerror(errno));
+                goto test_done;
+            }
+
+            rewind(firstfp);
+            char buf1[4096], bufn[4096];
+            bool compare_ok = false;
+            while (true) {
+                size_t r1 = fread(buf1, 1, sizeof(buf1), firstfp);
+                size_t rn = fread(bufn, 1, sizeof(bufn), nextfp);
+                if (r1 != rn) {
+                    printf("FAIL: %s %s: different lengths\n",
+                           firstfile, nextfile);
+                    break;
+                }
+                if (r1 == 0) {
+                    if (feof(firstfp) && feof(nextfp)) {
+                        compare_ok = true;
+                    } else {
+                        printf("FAIL: %s %s: error at end of file\n",
+                               firstfile, nextfile);
+                    }
+                    break;
+                }
+                if (memcmp(buf1, bufn, r1) != 0) {
+                    printf("FAIL: %s %s: different content\n",
+                           firstfile, nextfile);
+                    break;
+                }
+            }
+            fclose(nextfp);
+            sfree(nextfile);
+            if (!compare_ok) {
+                goto test_done;
+            }
+        }
+        fclose(firstfp);
+        sfree(firstfile);
+
+        printf("pass\n");
+        npass++;
+        keep_these_outfiles = keep_outfiles;
+
+      test_done:
+        if (!keep_these_outfiles) {
+            for (size_t i = 0; i < test_index; i++) {
+                char *file = log_filename(test_basename, i);
+                remove(file);
+                sfree(file);
+            }
+        }
+    }
+
+    if (npass == nrun) {
+        printf("All tests passed\n");
+        return 0;
+    } else {
+        printf("%zu tests failed\n", nrun - npass);
+        return 1;
+    }
+}