diff --git a/Recipe b/Recipe
index ef2c36a6..a4c5e828 100644
--- a/Recipe
+++ b/Recipe
@@ -250,10 +250,11 @@ GTKMAIN  = gtkmain cmdline
 NONSSH   = telnet raw rlogin ldisc pinger
 
 # SSH back end (putty, plink, pscp, psftp).
+ARITH    = mpint ecc
 SSHCOMMON = sshcommon sshrand
          + sshverstring sshcrc sshdes sshmd5 sshrsa sshsha sshblowf
          + sshdh sshcrcda sshpubk sshzlib sshdss ssharcf
-         + sshaes sshccp sshsh256 sshsh512 sshbn sshmac marshal nullplug
+         + sshaes sshccp sshsh256 sshsh512 ARITH sshmac marshal nullplug
          + sshgssc pgssapi sshecc wildcard ssh1censor ssh2censor ssh2bpp
 	 + ssh2transport ssh2transhk ssh2connection portfwd x11fwd
          + ssh1connection ssh1bpp
@@ -325,11 +326,11 @@ pscp     : [C] pscp winsftp wincons WINSSH BE_SSH SFTP wildcard WINMISC
 psftp    : [C] psftp winsftp wincons WINSSH BE_SSH SFTP wildcard WINMISC
          + psftp.res winnojmp LIBS
 
-pageant  : [G] winpgnt pageant sshrsa sshpubk sshdes sshbn sshmd5 version
+pageant  : [G] winpgnt pageant sshrsa sshpubk sshdes ARITH sshmd5 version
 	 + tree234 MISC sshaes sshsha winsecur winpgntc aqsync sshdss sshsh256
 	 + sshsh512 winutils sshecc winmisc winhelp conf pageant.res LIBS
 
-puttygen : [G] winpgen sshrsag sshdssg sshprime sshdes sshbn sshmd5 version
+puttygen : [G] winpgen sshrsag sshdssg sshprime sshdes ARITH sshmd5 version
          + sshrand winnoise sshsha winstore MISC winctrls sshrsa sshdss winmisc
          + sshpubk sshaes sshsh256 sshsh512 IMPORT winutils puttygen.res
          + tree234 notiming winhelp winnojmp CONF LIBS wintime sshecc
@@ -348,7 +349,7 @@ puttytel : [X] GTKTERM uxmisc misc ldisc settings uxsel U_BE_NOSSH
 plink    : [U] uxplink uxcons NONSSH UXSSH U_BE_ALL logging UXMISC uxsignal
          + ux_x11 noterm uxnogtk sessprep cmdline
 
-PUTTYGEN_UNIX = sshrsag sshdssg sshprime sshdes sshbn sshmd5 version
+PUTTYGEN_UNIX = sshrsag sshdssg sshprime sshdes ARITH sshmd5 version
          + sshrand uxnoise sshsha MISC sshrsa sshdss uxcons uxstore uxmisc
          + sshpubk sshaes sshsh256 sshsh512 IMPORT puttygen.res time tree234
          + uxgen notiming CONF sshecc sshecdsag uxnogtk
@@ -358,7 +359,7 @@ cgtest   : [UT] cgtest PUTTYGEN_UNIX
 pscp     : [U] pscp uxsftp uxcons UXSSH BE_SSH SFTP wildcard UXMISC uxnogtk
 psftp    : [U] psftp uxsftp uxcons UXSSH BE_SSH SFTP wildcard UXMISC uxnogtk
 
-pageant  : [X] uxpgnt uxagentc aqsync pageant sshrsa sshpubk sshdes sshbn
+pageant  : [X] uxpgnt uxagentc aqsync pageant sshrsa sshpubk sshdes ARITH
 	 + sshmd5 version tree234 misc sshaes sshsha sshdss sshsh256 sshsh512
 	 + sshecc CONF uxsignal nocproxy nogss be_none x11fwd ux_x11 uxcons
          + gtkask gtkmisc nullplug logging UXMISC uxagentsock memory
@@ -373,8 +374,6 @@ osxlaunch : [UT] osxlaunch
 
 fuzzterm : [UT] UXTERM CHARSET misc version uxmisc uxucs fuzzterm time settings
 	 + uxstore be_none uxnogtk memory
-testbn   : [UT] testbn sshbn MISC version CONF tree234 uxmisc uxnogtk
-testbn   : [C] testbn sshbn MISC version CONF tree234 winmisc LIBS
 testzlib : [UT] testzlib sshzlib memory
 
 uppity   : [UT] uxserver SSHSERVER UXMISC uxsignal uxnoise uxgss uxnogtk
diff --git a/cmdgen.c b/cmdgen.c
index 6730eeff..da2f98de 100644
--- a/cmdgen.c
+++ b/cmdgen.c
@@ -704,16 +704,16 @@ int main(int argc, char **argv)
 	    ssh2key->key = &dsskey->sshk;
 	    ssh1key = NULL;
         } else if (keytype == ECDSA) {
-            struct ec_key *ec = snew(struct ec_key);
-            ec_generate(ec, bits, progressfn, &prog);
+            struct ecdsa_key *ek = snew(struct ecdsa_key);
+            ecdsa_generate(ek, bits, progressfn, &prog);
             ssh2key = snew(struct ssh2_userkey);
-            ssh2key->key = &ec->sshk;
+            ssh2key->key = &ek->sshk;
             ssh1key = NULL;
         } else if (keytype == ED25519) {
-            struct ec_key *ec = snew(struct ec_key);
-            ec_edgenerate(ec, bits, progressfn, &prog);
+            struct eddsa_key *ek = snew(struct eddsa_key);
+            eddsa_generate(ek, bits, progressfn, &prog);
             ssh2key = snew(struct ssh2_userkey);
-            ssh2key->key = &ec->sshk;
+            ssh2key->key = &ek->sshk;
             ssh1key = NULL;
 	} else {
 	    struct RSAKey *rsakey = snew(struct RSAKey);
diff --git a/contrib/eccref.py b/contrib/eccref.py
new file mode 100644
index 00000000..55dfa042
--- /dev/null
+++ b/contrib/eccref.py
@@ -0,0 +1,401 @@
+import numbers
+import itertools
+
+def jacobi(n,m):
+    """Compute the Jacobi symbol.
+
+    The special case of this when m is prime is the Legendre symbol,
+    which is 0 if n is congruent to 0 mod m; 1 if n is congruent to a
+    non-zero square number mod m; -1 if n is not congruent to any
+    square mod m.
+
+    """
+    assert m & 1
+    acc = 1
+    while True:
+        n %= m
+        if n == 0:
+            return 0
+        while not (n & 1):
+            n >>= 1
+            if (m & 7) not in {1,7}:
+                acc *= -1
+        if n == 1:
+            return acc
+        if (n & 3) == 3 and (m & 3) == 3:
+            acc *= -1
+        n, m = m, n
+
+class SqrtModP(object):
+    """Class for finding square roots of numbers mod p.
+
+    p must be an odd prime (but its primality is not checked)."""
+
+    def __init__(self, p):
+        p = abs(p)
+        assert p & 1
+        self.p = p
+
+        # Decompose p as 2^e k + 1 for odd k.
+        self.k = p-1
+        self.e = 0
+        while not (self.k & 1):
+            self.k >>= 1
+            self.e += 1
+
+        # Find a non-square mod p.
+        for self.z in itertools.count(1):
+            if jacobi(self.z, self.p) == -1:
+                break
+        self.zinv = ModP(self.p, self.z).invert()
+
+    def sqrt_recurse(self, a):
+        ak = pow(a, self.k, self.p)
+        for i in range(self.e, -1, -1):
+            if ak == 1:
+                break
+            ak = ak*ak % self.p
+        assert i > 0
+        if i == self.e:
+            return pow(a, (self.k+1) // 2, self.p)
+        r_prime = self.sqrt_recurse(a * pow(self.z, 2**i, self.p))
+        return r_prime * pow(self.zinv, 2**(i-1), self.p) % self.p
+
+    def sqrt(self, a):
+        j = jacobi(a, self.p)
+        if j == 0:
+            return 0
+        if j < 0:
+            raise ValueError("{} has no square root mod {}".format(a, self.p))
+        a %= self.p
+        r = self.sqrt_recurse(a)
+        assert r*r % self.p == a
+        # Normalise to the smaller (or 'positive') one of the two roots.
+        return min(r, self.p - r)
+
+    def __str__(self):
+        return "{}({})".format(type(self).__name__, self.p)
+    def __repr__(self):
+        return self.__str__()
+
+class ModP(object):
+    """Class that represents integers mod p as a field.
+
+    All the usual arithmetic operations are supported directly,
+    including division, so you can write formulas in a natural way
+    without having to keep saying '% p' everywhere or call a
+    cumbersome modular_inverse() function.
+
+    """
+    def __init__(self, p, n=0):
+        self.p = p
+        if isinstance(n, type(self)):
+            self.check(n)
+            n = n.n
+        self.n = n % p
+    def check(self, other):
+        assert isinstance(other, type(self))
+        assert isinstance(self, type(other))
+        assert self.p == other.p
+    def coerce_to(self, other):
+        if not isinstance(other, type(self)):
+            other = type(self)(self.p, other)
+        else:
+            self.check(other)
+        return other
+    def invert(self):
+        "Internal routine which returns the bare inverse."
+        if self.n % self.p == 0:
+            raise ZeroDivisionError("division by {!r}".format(self))
+        a = self.n, 1, 0
+        b = self.p, 0, 1
+        while b[0]:
+            q = a[0] // b[0]
+            a = a[0] - q*b[0], a[1] - q*b[1], a[2] - q*b[2]
+            b, a = a, b
+        assert abs(a[0]) == 1
+        return a[1]*a[0]
+    def __add__(self, rhs):
+        rhs = self.coerce_to(rhs)
+        return type(self)(self.p, (self.n + rhs.n) % self.p)
+    def __neg__(self):
+        return type(self)(self.p, -self.n % self.p)
+    def __radd__(self, rhs):
+        rhs = self.coerce_to(rhs)
+        return type(self)(self.p, (self.n + rhs.n) % self.p)
+    def __sub__(self, rhs):
+        rhs = self.coerce_to(rhs)
+        return type(self)(self.p, (self.n - rhs.n) % self.p)
+    def __rsub__(self, rhs):
+        rhs = self.coerce_to(rhs)
+        return type(self)(self.p, (rhs.n - self.n) % self.p)
+    def __mul__(self, rhs):
+        rhs = self.coerce_to(rhs)
+        return type(self)(self.p, (self.n * rhs.n) % self.p)
+    def __rmul__(self, rhs):
+        rhs = self.coerce_to(rhs)
+        return type(self)(self.p, (self.n * rhs.n) % self.p)
+    def __div__(self, rhs):
+        rhs = self.coerce_to(rhs)
+        return type(self)(self.p, (self.n * rhs.invert()) % self.p)
+    def __rdiv__(self, rhs):
+        rhs = self.coerce_to(rhs)
+        return type(self)(self.p, (rhs.n * self.invert()) % self.p)
+    def __pow__(self, exponent):
+        assert exponent >= 0
+        n, b_to_n = 1, self
+        total = type(self)(self.p, 1)
+        while True:
+            if exponent & n:
+                exponent -= n
+                total *= b_to_n
+            n *= 2
+            if n > exponent:
+                break
+            b_to_n *= b_to_n
+        return total
+    def __cmp__(self, rhs):
+        rhs = self.coerce_to(rhs)
+        return cmp(self.n, rhs.n)
+    def __eq__(self, rhs):
+        rhs = self.coerce_to(rhs)
+        return self.n == rhs.n
+    def __ne__(self, rhs):
+        rhs = self.coerce_to(rhs)
+        return self.n != rhs.n
+    def __lt__(self, rhs):
+        raise ValueError("Elements of a modular ring have no ordering")
+    def __le__(self, rhs):
+        raise ValueError("Elements of a modular ring have no ordering")
+    def __gt__(self, rhs):
+        raise ValueError("Elements of a modular ring have no ordering")
+    def __ge__(self, rhs):
+        raise ValueError("Elements of a modular ring have no ordering")
+    def __str__(self):
+        return "0x{:x}".format(self.n)
+    def __repr__(self):
+        return "{}(0x{:x},0x{:x})".format(type(self).__name__, self.p, self.n)
+
+class AffinePoint(object):
+    """Base class for points on an elliptic curve."""
+
+    def __init__(self, curve, *args):
+        self.curve = curve
+        if len(args) == 0:
+            self.infinite = True
+            self.x = self.y = None
+        else:
+            assert len(args) == 2
+            self.infinite = False
+            self.x = ModP(self.curve.p, args[0])
+            self.y = ModP(self.curve.p, args[1])
+            self.check_equation()
+    def __neg__(self):
+        if self.infinite:
+            return self
+        return type(self)(self.curve, self.x, -self.y)
+    def __mul__(self, rhs):
+        if not isinstance(rhs, numbers.Integral):
+            raise ValueError("Elliptic curve points can only be multiplied by integers")
+        P = self
+        if rhs < 0:
+            rhs = -rhs
+            P = -P
+        toret = self.curve.point()
+        n = 1
+        nP = P
+        while rhs != 0:
+            if rhs & n:
+                rhs -= n
+                toret += nP
+            n += n
+            nP += nP
+        return toret
+    def __rmul__(self, rhs):
+        return self * rhs
+    def __sub__(self, rhs):
+        return self + (-rhs)
+    def __rsub__(self, rhs):
+        return (-self) + rhs
+    def __str__(self):
+        if self.infinite:
+            return "inf"
+        else:
+            return "({},{})".format(self.x, self.y)
+    def __repr__(self):
+        if self.infinite:
+            args = ""
+        else:
+            args = ", {}, {}".format(self.x, self.y)
+        return "{}.Point({}{})".format(type(self.curve).__name__,
+                                       self.curve, args)
+    def __eq__(self, rhs):
+        if self.infinite or rhs.infinite:
+            return self.infinite and rhs.infinite
+        return (self.x, self.y) == (rhs.x, rhs.y)
+    def __ne__(self, rhs):
+        return not (self == rhs)
+    def __lt__(self, rhs):
+        raise ValueError("Elliptic curve points have no ordering")
+    def __le__(self, rhs):
+        raise ValueError("Elliptic curve points have no ordering")
+    def __gt__(self, rhs):
+        raise ValueError("Elliptic curve points have no ordering")
+    def __ge__(self, rhs):
+        raise ValueError("Elliptic curve points have no ordering")
+    def __hash__(self):
+        if self.infinite:
+            return hash((True,))
+        else:
+            return hash((False, self.x, self.y))
+
+class CurveBase(object):
+    def point(self, *args):
+        return self.Point(self, *args)
+
+class WeierstrassCurve(CurveBase):
+    class Point(AffinePoint):
+        def check_equation(self):
+            assert (self.y*self.y ==
+                    self.x*self.x*self.x +
+                    self.curve.a*self.x + self.curve.b)
+        def __add__(self, rhs):
+            if self.infinite:
+                return rhs
+            if rhs.infinite:
+                return self
+            if self.x == rhs.x and self.y != rhs.y:
+                return self.curve.point()
+            x1, x2, y1, y2 = self.x, rhs.x, self.y, rhs.y
+            xdiff = x2-x1
+            if xdiff != 0:
+                slope = (y2-y1) / xdiff
+            else:
+                assert y1 == y2
+                slope = (3*x1*x1 + self.curve.a) / (2*y1)
+            xp = slope*slope - x1 - x2
+            yp = -(y1 + slope * (xp-x1))
+            return self.curve.point(xp, yp)
+
+    def __init__(self, p, a, b):
+        self.p = p
+        self.a = ModP(p, a)
+        self.b = ModP(p, b)
+
+    def cpoint(self, x, yparity=0):
+        if not hasattr(self, 'sqrtmodp'):
+            self.sqrtmodp = SqrtModP(self.p)
+        rhs = x**3 + self.a.n * x + self.b.n
+        y = self.sqrtmodp.sqrt(rhs)
+        if (y - yparity) % 2:
+            y = -y
+        return self.point(x, y)
+
+    def __repr__(self):
+        return "{}(0x{:x}, {}, {})".format(
+            type(self).__name__, self.p, self.a, self.b)
+
+class MontgomeryCurve(CurveBase):
+    class Point(AffinePoint):
+        def check_equation(self):
+            assert (self.curve.b*self.y*self.y ==
+                    self.x*self.x*self.x +
+                    self.curve.a*self.x*self.x + self.x)
+        def __add__(self, rhs):
+            if self.infinite:
+                return rhs
+            if rhs.infinite:
+                return self
+            if self.x == rhs.x and self.y != rhs.y:
+                return self.curve.point()
+            x1, x2, y1, y2 = self.x, rhs.x, self.y, rhs.y
+            xdiff = x2-x1
+            if xdiff != 0:
+                slope = (y2-y1) / xdiff
+            else:
+                assert y1 == y2
+                slope = (3*x1*x1 + 2*self.curve.a*x1 + 1) / (2*self.curve.b*y1)
+            xp = self.curve.b*slope*slope - self.curve.a - x1 - x2
+            yp = -(y1 + slope * (xp-x1))
+            return self.curve.point(xp, yp)
+
+    def __init__(self, p, a, b):
+        self.p = p
+        self.a = ModP(p, a)
+        self.b = ModP(p, b)
+
+    def cpoint(self, x, yparity=0):
+        if not hasattr(self, 'sqrtmodp'):
+            self.sqrtmodp = SqrtModP(self.p)
+        rhs = x**3 + self.a.n * x**2 + self.b.n * x
+        y = self.sqrtmodp.sqrt(rhs)
+        if (y - yparity) % 2:
+            y = -y
+        return self.point(x, y)
+
+    def __repr__(self):
+        return "{}(0x{:x}, {}, {})".format(
+            type(self).__name__, self.p, self.a, self.b)
+
+class TwistedEdwardsCurve(CurveBase):
+    class Point(AffinePoint):
+        def check_equation(self):
+            x2, y2 = self.x*self.x, self.y*self.y
+            assert (self.curve.a*x2 + y2 == 1 + self.curve.d*x2*y2)
+        def __neg__(self):
+            return type(self)(self.curve, -self.x, self.y)
+        def __add__(self, rhs):
+            x1, x2, y1, y2 = self.x, rhs.x, self.y, rhs.y
+            x1y2, y1x2, y1y2, x1x2 = x1*y2, y1*x2, y1*y2, x1*x2
+            dxxyy = self.curve.d*x1x2*y1y2
+            return self.curve.point((x1y2+y1x2)/(1+dxxyy),
+                                    (y1y2-self.curve.a*x1x2)/(1-dxxyy))
+
+    def __init__(self, p, d, a):
+        self.p = p
+        self.d = ModP(p, d)
+        self.a = ModP(p, a)
+
+    def point(self, *args):
+        # This curve form represents the identity using finite
+        # numbers, so it doesn't need the special infinity flag.
+        # Detect a no-argument call to point() and substitute the pair
+        # of integers that gives the identity.
+        if len(args) == 0:
+            args = [0, 1]
+        return super(TwistedEdwardsCurve, self).point(*args)
+
+    def cpoint(self, y, xparity=0):
+        if not hasattr(self, 'sqrtmodp'):
+            self.sqrtmodp = SqrtModP(self.p)
+        y = ModP(self.p, y)
+        y2 = y**2
+        radicand = (y2 - 1) / (self.d * y2 - self.a)
+        x = self.sqrtmodp.sqrt(radicand.n)
+        if (x - xparity) % 2:
+            x = -x
+        return self.point(x, y)
+
+    def __repr__(self):
+        return "{}(0x{:x}, {}, {})".format(
+            type(self).__name__, self.p, self.d, self.a)
+
+p256 = WeierstrassCurve(0xffffffff00000001000000000000000000000000ffffffffffffffffffffffff, -3, 0x5ac635d8aa3a93e7b3ebbd55769886bc651d06b0cc53b0f63bce3c3e27d2604b)
+p256.G = p256.point(0x6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296,0x4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5)
+p256.G_order = 0xffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632551
+
+p384 = WeierstrassCurve(0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000ffffffff, -3, 0xb3312fa7e23ee7e4988e056be3f82d19181d9c6efe8141120314088f5013875ac656398d8a2ed19d2a85c8edd3ec2aef)
+p384.G = p384.point(0xaa87ca22be8b05378eb1c71ef320ad746e1d3b628ba79b9859f741e082542a385502f25dbf55296c3a545e3872760ab7, 0x3617de4a96262c6f5d9e98bf9292dc29f8f41dbd289a147ce9da3113b5f0b8c00a60b1ce1d7e819d7a431d7c90ea0e5f)
+p384.G_order = 0xffffffffffffffffffffffffffffffffffffffffffffffffc7634d81f4372ddf581a0db248b0a77aecec196accc52973
+
+p521 = WeierstrassCurve(0x01ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff, -3, 0x0051953eb9618e1c9a1f929a21a0b68540eea2da725b99b315f3b8b489918ef109e156193951ec7e937b1652c0bd3bb1bf073573df883d2c34f1ef451fd46b503f00)
+p521.G = p521.point(0x00c6858e06b70404e9cd9e3ecb662395b4429c648139053fb521f828af606b4d3dbaa14b5e77efe75928fe1dc127a2ffa8de3348b3c1856a429bf97e7e31c2e5bd66,0x011839296a789a3bc0045c8a5fb42c7d1bd998f54449579b446817afbd17273e662c97ee72995ef42640c550b9013fad0761353c7086a272c24088be94769fd16650)
+p521.G_order = 0x01fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffa51868783bf2f966b7fcc0148f709a5d03bb5c9b8899c47aebb6fb71e91386409
+
+curve25519 = MontgomeryCurve(2**255-19, 0x76d06, 1)
+curve25519.G = curve25519.cpoint(9)
+
+ed25519 = TwistedEdwardsCurve(2**255-19, 0x52036cee2b6ffe738cc740797779e89800700a4d4141d8ab75eb4dca135978a3, -1)
+ed25519.G = ed25519.point(0x216936d3cd6e53fec0a4e231fdd6dc5c692cc7609525a7b2c9562d608f25d51a,0x6666666666666666666666666666666666666666666666666666666666666658)
+ed25519.G_order = 0x1000000000000000000000000000000014def9dea2f79cd65812631a5cf5d3ed
+
diff --git a/contrib/gdb.py b/contrib/gdb.py
index 9bfd9584..34bbb0ec 100644
--- a/contrib/gdb.py
+++ b/contrib/gdb.py
@@ -2,39 +2,43 @@ import gdb
 import re
 import gdb.printing
 
-class PuTTYBignumPrettyPrinter(gdb.printing.PrettyPrinter):
-    "Pretty-print PuTTY's Bignum type."
-    name = "Bignum"
+class PuTTYMpintPrettyPrinter(gdb.printing.PrettyPrinter):
+    "Pretty-print PuTTY's mp_int type."
+    name = "mp_int"
 
     def __init__(self, val):
-        super(PuTTYBignumPrettyPrinter, self).__init__(self.name)
+        super(PuTTYMpintPrettyPrinter, self).__init__(self.name)
         self.val = val
 
     def to_string(self):
         type_BignumInt = gdb.lookup_type("BignumInt")
         type_BignumIntPtr = type_BignumInt.pointer()
         BIGNUM_INT_BITS = 8 * type_BignumInt.sizeof
-        array = self.val.cast(type_BignumIntPtr)
+        array = self.val["w"]
         aget = lambda i: int(array[i]) & ((1 << BIGNUM_INT_BITS)-1)
 
         try:
-            length = aget(0)
+            length = int(self.val["nw"])
             value = 0
             for i in range(length):
-                value |= aget(i+1) << (BIGNUM_INT_BITS * i)
-            return "Bignum({:#x})".format(value)
+                value |= aget(i) << (BIGNUM_INT_BITS * i)
+            return "mp_int({:#x})".format(value)
 
         except gdb.MemoryError:
-            address = int(array)
+            address = int(self.val)
             if address == 0:
-                return "Bignum(NULL)".format(address)
-            return "Bignum(invalid @ {:#x})".format(address)
+                return "mp_int(NULL)".format(address)
+            return "mp_int(invalid @ {:#x})".format(address)
 
-rcpp = gdb.printing.RegexpCollectionPrettyPrinter("PuTTY")
-rcpp.add_printer(PuTTYBignumPrettyPrinter.name, "^Bignum$",
-                 PuTTYBignumPrettyPrinter)
+class PuTTYPrinterSelector(gdb.printing.PrettyPrinter):
+    def __init__(self):
+        super(PuTTYPrinterSelector, self).__init__("PuTTY")
+    def __call__(self, val):
+        if str(val.type) == "mp_int *":
+            return PuTTYMpintPrettyPrinter(val)
+        return None
 
-gdb.printing.register_pretty_printer(None, rcpp)
+gdb.printing.register_pretty_printer(None, PuTTYPrinterSelector())
 
 class MemDumpCommand(gdb.Command):
     """Print a hex+ASCII dump of object EXP.
diff --git a/defs.h b/defs.h
index 19d501a5..ff76655b 100644
--- a/defs.h
+++ b/defs.h
@@ -63,6 +63,16 @@ typedef struct TermWinVtable TermWinVtable;
 
 typedef struct Ssh Ssh;
 
+typedef struct mp_int mp_int;
+typedef struct MontyContext MontyContext;
+
+typedef struct WeierstrassCurve WeierstrassCurve;
+typedef struct WeierstrassPoint WeierstrassPoint;
+typedef struct MontgomeryCurve MontgomeryCurve;
+typedef struct MontgomeryPoint MontgomeryPoint;
+typedef struct EdwardsCurve EdwardsCurve;
+typedef struct EdwardsPoint EdwardsPoint;
+
 typedef struct SftpServer SftpServer;
 typedef struct SftpServerVtable SftpServerVtable;
 
diff --git a/ecc.c b/ecc.c
new file mode 100644
index 00000000..753616bd
--- /dev/null
+++ b/ecc.c
@@ -0,0 +1,1112 @@
+#include <assert.h>
+
+#include "ssh.h"
+#include "mpint.h"
+#include "ecc.h"
+
+/* ----------------------------------------------------------------------
+ * Weierstrass curves.
+ */
+
+struct WeierstrassPoint {
+    /*
+     * Internally, we represent a point using 'Jacobian coordinates',
+     * which are three values X,Y,Z whose relation to the affine
+     * coordinates x,y is that x = X/Z^2 and y = Y/Z^3.
+     *
+     * This allows us to do most of our calculations without having to
+     * take an inverse mod p: every time the obvious affine formulae
+     * would need you to divide by something, you instead multiply it
+     * into the 'denominator' coordinate Z. You only have to actually
+     * take the inverse of Z when you need to get the affine
+     * coordinates back out, which means you do it once after your
+     * entire computation instead of at every intermediate step.
+     *
+     * The point at infinity is represented by setting all three
+     * coordinates to zero.
+     *
+     * These values are also stored in the Montgomery-multiplication
+     * transformed representation.
+     */
+    mp_int *X, *Y, *Z;
+
+    WeierstrassCurve *wc;
+};
+
+struct WeierstrassCurve {
+    /* Prime modulus of the finite field. */
+    mp_int *p;
+
+    /* Persistent Montgomery context for doing arithmetic mod p. */
+    MontyContext *mc;
+
+    /* Modsqrt context for point decompression. NULL if this curve was
+     * constructed without providing nonsquare_mod_p. */
+    ModsqrtContext *sc;
+
+    /* Parameters of the curve, in Montgomery-multiplication
+     * transformed form. */
+    mp_int *a, *b;
+};
+
+WeierstrassCurve *ecc_weierstrass_curve(
+    mp_int *p, mp_int *a, mp_int *b, mp_int *nonsquare_mod_p)
+{
+    WeierstrassCurve *wc = snew(WeierstrassCurve);
+    wc->p = mp_copy(p);
+    wc->mc = monty_new(p);
+    wc->a = monty_import(wc->mc, a);
+    wc->b = monty_import(wc->mc, b);
+
+    if (nonsquare_mod_p)
+        wc->sc = modsqrt_new(p, nonsquare_mod_p);
+    else
+        wc->sc = NULL;
+
+    return wc;
+}
+
+void ecc_weierstrass_curve_free(WeierstrassCurve *wc)
+{
+    mp_free(wc->p);
+    mp_free(wc->a);
+    mp_free(wc->b);
+    monty_free(wc->mc);
+    if (wc->sc)
+        modsqrt_free(wc->sc);
+    sfree(wc);
+}
+
+static WeierstrassPoint *ecc_weierstrass_point_new_empty(WeierstrassCurve *wc)
+{
+    WeierstrassPoint *wp = snew(WeierstrassPoint);
+    wp->wc = wc;
+    wp->X = wp->Y = wp->Z = NULL;
+    return wp;
+}
+
+static WeierstrassPoint *ecc_weierstrass_point_new_imported(
+    WeierstrassCurve *wc, mp_int *monty_x, mp_int *monty_y)
+{
+    WeierstrassPoint *wp = ecc_weierstrass_point_new_empty(wc);
+    wp->X = monty_x;
+    wp->Y = monty_y;
+    wp->Z = mp_copy(monty_identity(wc->mc));
+    return wp;
+}
+
+WeierstrassPoint *ecc_weierstrass_point_new(
+    WeierstrassCurve *wc, mp_int *x, mp_int *y)
+{
+    return ecc_weierstrass_point_new_imported(
+        wc, monty_import(wc->mc, x), monty_import(wc->mc, y));
+}
+
+WeierstrassPoint *ecc_weierstrass_point_new_identity(WeierstrassCurve *wc)
+{
+    WeierstrassPoint *wp = ecc_weierstrass_point_new_empty(wc);
+    size_t bits = mp_max_bits(wc->p);
+    wp->X = mp_new(bits);
+    wp->Y = mp_new(bits);
+    wp->Z = mp_new(bits);
+    return wp;
+}
+
+WeierstrassPoint *ecc_weierstrass_point_copy(WeierstrassPoint *orig)
+{
+    WeierstrassPoint *wp = ecc_weierstrass_point_new_empty(orig->wc);
+    wp->X = mp_copy(orig->X);
+    wp->Y = mp_copy(orig->Y);
+    wp->Z = mp_copy(orig->Z);
+    return wp;
+}
+
+void ecc_weierstrass_point_free(WeierstrassPoint *wp)
+{
+    mp_free(wp->X);
+    mp_free(wp->Y);
+    mp_free(wp->Z);
+    smemclr(wp, sizeof(*wp));
+    sfree(wp);
+}
+
+static mp_int *ecc_weierstrass_equation_rhs(
+    WeierstrassCurve *wc, mp_int *monty_x)
+{
+    mp_int *x2 = monty_mul(wc->mc, monty_x, monty_x);
+    mp_int *x2_plus_a = monty_add(wc->mc, x2, wc->a);
+    mp_int *x3_plus_ax = monty_mul(wc->mc, x2_plus_a, monty_x);
+    mp_int *rhs = monty_add(wc->mc, x3_plus_ax, wc->b);
+    mp_free(x2);
+    mp_free(x2_plus_a);
+    mp_free(x3_plus_ax);
+    return rhs;
+}
+
+WeierstrassPoint *ecc_weierstrass_point_new_from_x(
+    WeierstrassCurve *wc, mp_int *xorig, unsigned desired_y_parity)
+{
+    assert(wc->sc);
+
+    /*
+     * The curve equation is y^2 = x^3 + ax + b, which is already
+     * conveniently in a form where we can compute the RHS and take
+     * the square root of it to get y.
+     */
+    unsigned success;
+
+    mp_int *x = monty_import(wc->mc, xorig);
+    mp_int *rhs = ecc_weierstrass_equation_rhs(wc, x);
+    mp_int *y = monty_modsqrt(wc->sc, rhs, &success);
+    mp_free(rhs);
+
+    if (!success) {
+        /* Failure! x^3+ax+b worked out to be a number that has no
+         * square root mod p. In this situation there's no point in
+         * trying to be time-constant, since the protocol sequence is
+         * going to diverge anyway when we complain to whoever gave us
+         * this bogus value. */
+        mp_free(x);
+        mp_free(y);
+        return NULL;
+    }
+
+    /*
+     * Choose whichever of y and p-y has the specified parity (of its
+     * lowest positive residue mod p).
+     */
+    mp_int *tmp = monty_export(wc->mc, y);
+    unsigned flip = (mp_get_bit(tmp, 0) ^ desired_y_parity) & 1;
+    mp_sub_into(tmp, wc->p, y);
+    mp_select_into(y, y, tmp, flip);
+    mp_free(tmp);
+
+    return ecc_weierstrass_point_new_imported(wc, x, y);
+}
+
+static void ecc_weierstrass_cond_overwrite(
+    WeierstrassPoint *dest, WeierstrassPoint *src, unsigned overwrite)
+{
+    mp_select_into(dest->X, dest->X, src->X, overwrite);
+    mp_select_into(dest->Y, dest->Y, src->Y, overwrite);
+    mp_select_into(dest->Z, dest->Z, src->Z, overwrite);
+}
+
+static void ecc_weierstrass_cond_swap(
+    WeierstrassPoint *P, WeierstrassPoint *Q, unsigned swap)
+{
+    mp_cond_swap(P->X, Q->X, swap);
+    mp_cond_swap(P->Y, Q->Y, swap);
+    mp_cond_swap(P->Z, Q->Z, swap);
+}
+
+/*
+ * Shared code between all three of the basic arithmetic functions:
+ * once we've determined the slope of the line that we're intersecting
+ * the curve with, this takes care of finding the coordinates of the
+ * third intersection point (given the two input x-coordinates and one
+ * of the y-coords) and negating it to generate the output.
+ */
+static inline void ecc_weierstrass_epilogue(
+    mp_int *Px, mp_int *Qx, mp_int *Py, mp_int *common_Z,
+    mp_int *lambda_n, mp_int *lambda_d, WeierstrassPoint *out)
+{
+    WeierstrassCurve *wc = out->wc;
+
+    /* Powers of the numerator and denominator of the slope lambda */
+    mp_int *lambda_n2 = monty_mul(wc->mc, lambda_n, lambda_n);
+    mp_int *lambda_d2 = monty_mul(wc->mc, lambda_d, lambda_d);
+    mp_int *lambda_d3 = monty_mul(wc->mc, lambda_d, lambda_d2);
+
+    /* Make the output x-coordinate */
+    mp_int *xsum = monty_add(wc->mc, Px, Qx);
+    mp_int *lambda_d2_xsum = monty_mul(wc->mc, lambda_d2, xsum);
+    out->X = monty_sub(wc->mc, lambda_n2, lambda_d2_xsum);
+
+    /* Make the output y-coordinate */
+    mp_int *lambda_d2_Px = monty_mul(wc->mc, lambda_d2, Px);
+    mp_int *xdiff = monty_sub(wc->mc, lambda_d2_Px, out->X);
+    mp_int *lambda_n_xdiff = monty_mul(wc->mc, lambda_n, xdiff);
+    mp_int *lambda_d3_Py = monty_mul(wc->mc, lambda_d3, Py);
+    out->Y = monty_sub(wc->mc, lambda_n_xdiff, lambda_d3_Py);
+
+    /* Make the output z-coordinate */
+    out->Z = monty_mul(wc->mc, common_Z, lambda_d);
+
+    mp_free(lambda_n2);
+    mp_free(lambda_d2);
+    mp_free(lambda_d3);
+    mp_free(xsum);
+    mp_free(xdiff);
+    mp_free(lambda_d2_xsum);
+    mp_free(lambda_n_xdiff);
+    mp_free(lambda_d2_Px);
+    mp_free(lambda_d3_Py);
+}
+
+/*
+ * Shared code between add and add_general: put the two input points
+ * over a common denominator, and determine the slope lambda of the
+ * line through both of them. If the points have the same
+ * x-coordinate, then the slope will be returned with a zero
+ * denominator.
+ */
+static inline void ecc_weierstrass_add_prologue(
+    WeierstrassPoint *P, WeierstrassPoint *Q,
+    mp_int **Px, mp_int **Py, mp_int **Qx, mp_int **denom,
+    mp_int **lambda_n, mp_int **lambda_d)
+{
+    WeierstrassCurve *wc = P->wc;
+
+    /* Powers of the points' denominators */
+    mp_int *Pz2 = monty_mul(wc->mc, P->Z, P->Z);
+    mp_int *Pz3 = monty_mul(wc->mc, Pz2, P->Z);
+    mp_int *Qz2 = monty_mul(wc->mc, Q->Z, Q->Z);
+    mp_int *Qz3 = monty_mul(wc->mc, Qz2, Q->Z);
+
+    /* Points' x,y coordinates scaled by the other one's denominator
+     * (raised to the appropriate power) */
+    *Px = monty_mul(wc->mc, P->X, Qz2);
+    *Py = monty_mul(wc->mc, P->Y, Qz3);
+    *Qx = monty_mul(wc->mc, Q->X, Pz2);
+    mp_int *Qy = monty_mul(wc->mc, Q->Y, Pz3);
+
+    /* Common denominator */
+    *denom = monty_mul(wc->mc, P->Z, Q->Z);
+
+    /* Slope of the line through the two points, if P != Q */
+    *lambda_n = monty_sub(wc->mc, Qy, *Py);
+    *lambda_d = monty_sub(wc->mc, *Qx, *Px);
+
+    mp_free(Pz2);
+    mp_free(Pz3);
+    mp_free(Qz2);
+    mp_free(Qz3);
+    mp_free(Qy);
+}
+
+WeierstrassPoint *ecc_weierstrass_add(WeierstrassPoint *P, WeierstrassPoint *Q)
+{
+    WeierstrassCurve *wc = P->wc;
+    assert(Q->wc == wc);
+
+    WeierstrassPoint *S = ecc_weierstrass_point_new_empty(wc);
+
+    mp_int *Px, *Py, *Qx, *denom, *lambda_n, *lambda_d;
+    ecc_weierstrass_add_prologue(
+        P, Q, &Px, &Py, &Qx, &denom, &lambda_n, &lambda_d);
+
+    /* Never expect to have received two mutually inverse inputs, or
+     * two identical ones (which would make this a doubling). In other
+     * words, the two input x-coordinates (after putting over a common
+     * denominator) should never have been equal. */
+    assert(!mp_eq_integer(lambda_n, 0));
+
+    /* Now go to the common epilogue code. */
+    ecc_weierstrass_epilogue(Px, Qx, Py, denom, lambda_n, lambda_d, S);
+
+    mp_free(Px);
+    mp_free(Py);
+    mp_free(Qx);
+    mp_free(denom);
+    mp_free(lambda_n);
+    mp_free(lambda_d);
+
+    return S;
+}
+
+/*
+ * Code to determine the slope of the line you need to intersect with
+ * the curve in the case where you're adding a point to itself. In
+ * this situation you can't just say "the line through both input
+ * points" because that's under-determined; instead, you have to take
+ * the _tangent_ to the curve at the given point, by differentiating
+ * the curve equation y^2=x^3+ax+b to get 2y dy/dx = 3x^2+a.
+ */
+static inline void ecc_weierstrass_tangent_slope(
+    WeierstrassPoint *P, mp_int **lambda_n, mp_int **lambda_d)
+{
+    WeierstrassCurve *wc = P->wc;
+
+    mp_int *X2 = monty_mul(wc->mc, P->X, P->X);
+    mp_int *twoX2 = monty_add(wc->mc, X2, X2);
+    mp_int *threeX2 = monty_add(wc->mc, twoX2, X2);
+    mp_int *Z2 = monty_mul(wc->mc, P->Z, P->Z);
+    mp_int *Z4 = monty_mul(wc->mc, Z2, Z2);
+    mp_int *aZ4 = monty_mul(wc->mc, wc->a, Z4);
+
+    *lambda_n = monty_add(wc->mc, threeX2, aZ4);
+    *lambda_d = monty_add(wc->mc, P->Y, P->Y);
+
+    mp_free(X2);
+    mp_free(twoX2);
+    mp_free(threeX2);
+    mp_free(Z2);
+    mp_free(Z4);
+    mp_free(aZ4);
+}
+
+WeierstrassPoint *ecc_weierstrass_double(WeierstrassPoint *P)
+{
+    WeierstrassCurve *wc = P->wc;
+    WeierstrassPoint *D = ecc_weierstrass_point_new_empty(wc);
+
+    mp_int *lambda_n, *lambda_d;
+    ecc_weierstrass_tangent_slope(P, &lambda_n, &lambda_d);
+    ecc_weierstrass_epilogue(P->X, P->X, P->Y, P->Z, lambda_n, lambda_d, D);
+    mp_free(lambda_n);
+    mp_free(lambda_d);
+
+    return D;
+}
+
+static inline void ecc_weierstrass_select_into(
+    WeierstrassPoint *dest, WeierstrassPoint *P, WeierstrassPoint *Q,
+    unsigned choose_Q)
+{
+    mp_select_into(dest->X, P->X, Q->X, choose_Q);
+    mp_select_into(dest->Y, P->Y, Q->Y, choose_Q);
+    mp_select_into(dest->Z, P->Z, Q->Z, choose_Q);
+}
+
+WeierstrassPoint *ecc_weierstrass_add_general(
+    WeierstrassPoint *P, WeierstrassPoint *Q)
+{
+    WeierstrassCurve *wc = P->wc;
+    assert(Q->wc == wc);
+
+    WeierstrassPoint *S = ecc_weierstrass_point_new_empty(wc);
+
+    /* Parameters for the epilogue, and slope of the line if P != Q */
+    mp_int *Px, *Py, *Qx, *denom, *lambda_n, *lambda_d;
+    ecc_weierstrass_add_prologue(
+        P, Q, &Px, &Py, &Qx, &denom, &lambda_n, &lambda_d);
+
+    /* Slope if P == Q */
+    mp_int *lambda_n_tangent, *lambda_d_tangent;
+    ecc_weierstrass_tangent_slope(P, &lambda_n_tangent, &lambda_d_tangent);
+
+    /* Select between those slopes depending on whether P == Q */
+    unsigned same_x_coord = mp_eq_integer(lambda_d, 0);
+    unsigned same_y_coord = mp_eq_integer(lambda_n, 0);
+    unsigned equality = same_x_coord & same_y_coord;
+    mp_select_into(lambda_n, lambda_n, lambda_n_tangent, equality);
+    mp_select_into(lambda_d, lambda_d, lambda_d_tangent, equality);
+
+    /* Now go to the common code between addition and doubling */
+    ecc_weierstrass_epilogue(Px, Qx, Py, denom, lambda_n, lambda_d, S);
+
+    /* Check for the input identity cases, and overwrite the output if
+     * necessary. */
+    ecc_weierstrass_select_into(S, S, Q, mp_eq_integer(P->Z, 0));
+    ecc_weierstrass_select_into(S, S, P, mp_eq_integer(Q->Z, 0));
+
+    /*
+     * In the case where P == -Q and so the output is the identity,
+     * we'll have calculated lambda_d = 0 and so the output will have
+     * z==0 already. Detect that and use it to normalise the other two
+     * coordinates to zero.
+     */
+    unsigned output_id = mp_eq_integer(S->Z, 0);
+    mp_cond_clear(S->X, output_id);
+    mp_cond_clear(S->Y, output_id);
+
+    mp_free(Px);
+    mp_free(Py);
+    mp_free(Qx);
+    mp_free(denom);
+    mp_free(lambda_n);
+    mp_free(lambda_d);
+    mp_free(lambda_n_tangent);
+    mp_free(lambda_d_tangent);
+
+    return S;
+}
+
+WeierstrassPoint *ecc_weierstrass_multiply(WeierstrassPoint *B, mp_int *n)
+{
+    WeierstrassPoint *two_B = ecc_weierstrass_double(B);
+    WeierstrassPoint *k_B = ecc_weierstrass_point_copy(B);
+    WeierstrassPoint *kplus1_B = ecc_weierstrass_point_copy(two_B);
+
+    /*
+     * This multiply routine more or less follows the shape of the
+     * 'Montgomery ladder' technique that you have to use under the
+     * extra constraint on addition in Montgomery curves, because it
+     * was fresh in my mind and easier to just do it the same way. See
+     * the comment in ecc_montgomery_multiply.
+     */
+
+    unsigned not_started_yet = 1;
+    for (size_t bitindex = mp_max_bits(n); bitindex-- > 0 ;) {
+        unsigned nbit = mp_get_bit(n, bitindex);
+
+        WeierstrassPoint *sum = ecc_weierstrass_add(k_B, kplus1_B);
+        ecc_weierstrass_cond_swap(k_B, kplus1_B, nbit);
+        WeierstrassPoint *other = ecc_weierstrass_double(k_B);
+        ecc_weierstrass_point_free(k_B);
+        ecc_weierstrass_point_free(kplus1_B);
+        k_B = other;
+        kplus1_B = sum;
+        ecc_weierstrass_cond_swap(k_B, kplus1_B, nbit);
+
+        ecc_weierstrass_cond_overwrite(k_B, B, not_started_yet);
+        ecc_weierstrass_cond_overwrite(kplus1_B, two_B, not_started_yet);
+        not_started_yet &= ~nbit;
+    }
+
+    ecc_weierstrass_point_free(two_B);
+    ecc_weierstrass_point_free(kplus1_B);
+    return k_B;
+}
+
+unsigned ecc_weierstrass_is_identity(WeierstrassPoint *wp)
+{
+    return mp_eq_integer(wp->Z, 0);
+}
+
+/*
+ * Normalise a point by scaling its Jacobian coordinates so that Z=1.
+ * This doesn't change what point is represented by the triple, but it
+ * means the affine x,y can now be easily recovered from X and Y.
+ */
+static void ecc_weierstrass_normalise(WeierstrassPoint *wp)
+{
+    WeierstrassCurve *wc = wp->wc;
+    mp_int *zinv = monty_invert(wc->mc, wp->Z);
+    mp_int *zinv2 = monty_mul(wc->mc, zinv, zinv);
+    mp_int *zinv3 = monty_mul(wc->mc, zinv2, zinv);
+    monty_mul_into(wc->mc, wp->X, wp->X, zinv2);
+    monty_mul_into(wc->mc, wp->Y, wp->Y, zinv3);
+    mp_free(zinv);
+    mp_free(zinv2);
+    mp_free(zinv3);
+    mp_copy_into(wp->Z, monty_identity(wc->mc));
+}
+
+void ecc_weierstrass_get_affine(
+    WeierstrassPoint *wp, mp_int **x, mp_int **y)
+{
+    WeierstrassCurve *wc = wp->wc;
+
+    ecc_weierstrass_normalise(wp);
+
+    if (x)
+        *x = monty_export(wc->mc, wp->X);
+    if (y)
+        *y = monty_export(wc->mc, wp->Y);
+}
+
+unsigned ecc_weierstrass_point_valid(WeierstrassPoint *P)
+{
+    mp_int *rhs = ecc_weierstrass_equation_rhs(P->wc, P->X);
+    mp_int *lhs = monty_mul(P->wc->mc, P->Y, P->Y);
+    unsigned valid = mp_cmp_eq(lhs, rhs);
+    mp_free(lhs);
+    mp_free(rhs);
+    return valid;
+}
+
+/* ----------------------------------------------------------------------
+ * Montgomery curves.
+ */
+
+struct MontgomeryPoint {
+    /* XZ coordinates. These represent the affine x coordinate by the
+     * relationship x = X/Z. */
+    mp_int *X, *Z;
+
+    MontgomeryCurve *mc;
+};
+
+struct MontgomeryCurve {
+    /* Prime modulus of the finite field. */
+    mp_int *p;
+
+    /* Montgomery context for arithmetic mod p. */
+    MontyContext *mc;
+
+    /* Parameters of the curve, in Montgomery-multiplication
+     * transformed form. */
+    mp_int *a, *b;
+
+    /* (a+2)/4, also in Montgomery-multiplication form. */
+    mp_int *aplus2over4;
+};
+
+MontgomeryCurve *ecc_montgomery_curve(
+    mp_int *p, mp_int *a, mp_int *b)
+{
+    MontgomeryCurve *mc = snew(MontgomeryCurve);
+    mc->p = mp_copy(p);
+    mc->mc = monty_new(p);
+    mc->a = monty_import(mc->mc, a);
+    mc->b = monty_import(mc->mc, b);
+
+    mp_int *four = mp_from_integer(4);
+    mp_int *fourinverse = mp_invert(four, mc->p);
+    mp_int *aplus2 = mp_copy(a);
+    mp_add_integer_into(aplus2, aplus2, 2);
+    mp_int *aplus2over4 = mp_modmul(aplus2, fourinverse, mc->p);
+    mc->aplus2over4 = monty_import(mc->mc, aplus2over4);
+    mp_free(four);
+    mp_free(fourinverse);
+    mp_free(aplus2);
+    mp_free(aplus2over4);
+
+    return mc;
+}
+
+void ecc_montgomery_curve_free(MontgomeryCurve *mc)
+{
+    mp_free(mc->p);
+    mp_free(mc->a);
+    mp_free(mc->b);
+    mp_free(mc->aplus2over4);
+    monty_free(mc->mc);
+    sfree(mc);
+}
+
+static MontgomeryPoint *ecc_montgomery_point_new_empty(MontgomeryCurve *mc)
+{
+    MontgomeryPoint *mp = snew(MontgomeryPoint);
+    mp->mc = mc;
+    mp->X = mp->Z = NULL;
+    return mp;
+}
+
+MontgomeryPoint *ecc_montgomery_point_new(MontgomeryCurve *mc, mp_int *x)
+{
+    MontgomeryPoint *mp = ecc_montgomery_point_new_empty(mc);
+    mp->X = monty_import(mc->mc, x);
+    mp->Z = mp_copy(monty_identity(mc->mc));
+    return mp;
+}
+
+MontgomeryPoint *ecc_montgomery_point_copy(MontgomeryPoint *orig)
+{
+    MontgomeryPoint *mp = ecc_montgomery_point_new_empty(orig->mc);
+    mp->X = mp_copy(orig->X);
+    mp->Z = mp_copy(orig->Z);
+    return mp;
+}
+
+void ecc_montgomery_point_free(MontgomeryPoint *mp)
+{
+    mp_free(mp->X);
+    mp_free(mp->Z);
+    smemclr(mp, sizeof(*mp));
+    sfree(mp);
+}
+
+static void ecc_montgomery_cond_overwrite(
+    MontgomeryPoint *dest, MontgomeryPoint *src, unsigned overwrite)
+{
+    mp_select_into(dest->X, dest->X, src->X, overwrite);
+    mp_select_into(dest->Z, dest->Z, src->Z, overwrite);
+}
+
+static void ecc_montgomery_cond_swap(
+    MontgomeryPoint *P, MontgomeryPoint *Q, unsigned swap)
+{
+    mp_cond_swap(P->X, Q->X, swap);
+    mp_cond_swap(P->Z, Q->Z, swap);
+}
+
+MontgomeryPoint *ecc_montgomery_diff_add(
+    MontgomeryPoint *P, MontgomeryPoint *Q, MontgomeryPoint *PminusQ)
+{
+    MontgomeryCurve *mc = P->mc;
+    assert(Q->mc == mc);
+    assert(PminusQ->mc == mc);
+
+    /*
+     * Differential addition is achieved using the following formula
+     * that relates the affine x-coordinates of P, Q, P+Q and P-Q:
+     *
+     * x(P+Q) x(P-Q) (x(Q)-x(P))^2 = (x(P)x(Q) - 1)^2
+     *
+     * As with the Weierstrass coordinates, the code below transforms
+     * that affine relation into a projective one to avoid having to
+     * do a division during the main arithmetic.
+     */
+
+    MontgomeryPoint *S = ecc_montgomery_point_new_empty(mc);
+
+    mp_int *Px_m_Pz = monty_sub(mc->mc, P->X, P->Z);
+    mp_int *Px_p_Pz = monty_add(mc->mc, P->X, P->Z);
+    mp_int *Qx_m_Qz = monty_sub(mc->mc, Q->X, Q->Z);
+    mp_int *Qx_p_Qz = monty_add(mc->mc, Q->X, Q->Z);
+    mp_int *PmQp = monty_mul(mc->mc, Px_m_Pz, Qx_p_Qz);
+    mp_int *PpQm = monty_mul(mc->mc, Px_p_Pz, Qx_m_Qz);
+    mp_int *Xpre = monty_add(mc->mc, PmQp, PpQm);
+    mp_int *Zpre = monty_sub(mc->mc, PmQp, PpQm);
+    mp_int *Xpre2 = monty_mul(mc->mc, Xpre, Xpre);
+    mp_int *Zpre2 = monty_mul(mc->mc, Zpre, Zpre);
+    S->X = monty_mul(mc->mc, Xpre2, PminusQ->Z);
+    S->Z = monty_mul(mc->mc, Zpre2, PminusQ->X);
+
+    mp_free(Px_m_Pz);
+    mp_free(Px_p_Pz);
+    mp_free(Qx_m_Qz);
+    mp_free(Qx_p_Qz);
+    mp_free(PmQp);
+    mp_free(PpQm);
+    mp_free(Xpre);
+    mp_free(Zpre);
+    mp_free(Xpre2);
+    mp_free(Zpre2);
+
+    return S;
+}
+
+MontgomeryPoint *ecc_montgomery_double(MontgomeryPoint *P)
+{
+    MontgomeryCurve *mc = P->mc;
+    MontgomeryPoint *D = ecc_montgomery_point_new_empty(mc);
+
+    /*
+     * To double a point in affine coordinates, in principle you can
+     * use the same technique as for Weierstrass: differentiate the
+     * curve equation to get the tangent line at the input point, use
+     * that to get an expression for y which you substitute back into
+     * the curve equation, and subtract the known two roots (in this
+     * case both the same) from the x^2 coefficient of the resulting
+     * cubic.
+     *
+     * In this case, we don't have an input y-coordinate, so you have
+     * to do a bit of extra transformation to find a formula that can
+     * work without it. The tangent formula is (3x^2 + 2ax + 1)/(2y),
+     * and when that appears in the final formula it will be squared -
+     * so we can substitute the y^2 in the denominator for the RHS of
+     * the curve equation. Put together, that gives
+     *
+     *   x_out = (x+1)^2 (x-1)^2 / 4(x^3+ax^2+x)
+     *
+     * and, as usual, the code below transforms that into projective
+     * form to avoid the division.
+     */
+
+    mp_int *Px_m_Pz = monty_sub(mc->mc, P->X, P->Z);
+    mp_int *Px_p_Pz = monty_add(mc->mc, P->X, P->Z);
+    mp_int *Px_m_Pz_2 = monty_mul(mc->mc, Px_m_Pz, Px_m_Pz);
+    mp_int *Px_p_Pz_2 = monty_mul(mc->mc, Px_p_Pz, Px_p_Pz);
+    D->X = monty_mul(mc->mc, Px_m_Pz_2, Px_p_Pz_2);
+    mp_int *XZ = monty_mul(mc->mc, P->X, P->Z);
+    mp_int *twoXZ = monty_add(mc->mc, XZ, XZ);
+    mp_int *fourXZ = monty_add(mc->mc, twoXZ, twoXZ);
+    mp_int *fourXZ_scaled = monty_mul(mc->mc, fourXZ, mc->aplus2over4);
+    mp_int *Zpre = monty_add(mc->mc, Px_m_Pz_2, fourXZ_scaled);
+    D->Z = monty_mul(mc->mc, fourXZ, Zpre);
+
+    mp_free(Px_m_Pz);
+    mp_free(Px_p_Pz);
+    mp_free(Px_m_Pz_2);
+    mp_free(Px_p_Pz_2);
+    mp_free(XZ);
+    mp_free(twoXZ);
+    mp_free(fourXZ);
+    mp_free(fourXZ_scaled);
+    mp_free(Zpre);
+
+    return D;
+}
+
+static void ecc_montgomery_normalise(MontgomeryPoint *mp)
+{
+    MontgomeryCurve *mc = mp->mc;
+    mp_int *zinv = monty_invert(mc->mc, mp->Z);
+    monty_mul_into(mc->mc, mp->X, mp->X, zinv);
+    mp_free(zinv);
+    mp_copy_into(mp->Z, monty_identity(mc->mc));
+}
+
+MontgomeryPoint *ecc_montgomery_multiply(MontgomeryPoint *B, mp_int *n)
+{
+    /*
+     * 'Montgomery ladder' technique, to compute an arbitrary integer
+     * multiple of B under the constraint that you can only add two
+     * unequal points if you also know their difference.
+     *
+     * The setup is that you maintain two curve points one of which is
+     * always the other one plus B. Call them kB and (k+1)B, where k
+     * is some integer that evolves as we go along. We begin by
+     * doubling the input B, to initialise those points to B and 2B,
+     * so that k=1.
+     *
+     * At each stage, we add kB and (k+1)B together - which we can do
+     * under the differential-addition constraint because we know
+     * their difference is always just B - to give us (2k+1)B. Then we
+     * double one of kB or (k+1)B, and depending on which one we
+     * choose, we end up with (2k)B or (2k+2)B. Either way, that
+     * differs by B from the other value we've just computed. So in
+     * each iteration, we do one diff-add and one doubling, plus a
+     * couple of conditional swaps to choose which value we double and
+     * which way round we put the output points, and the effect is to
+     * replace k with either 2k or 2k+1, which we choose based on the
+     * appropriate bit of the desired exponent.
+     *
+     * This routine doesn't assume we know the exact location of the
+     * topmost set bit of the exponent. So to maintain constant time
+     * it does an iteration for every _potential_ bit, starting from
+     * the top downwards; after each iteration in which we haven't
+     * seen a set exponent bit yet, we just overwrite the two points
+     * with B and 2B again,
+     */
+
+    MontgomeryPoint *two_B = ecc_montgomery_double(B);
+    MontgomeryPoint *k_B = ecc_montgomery_point_copy(B);
+    MontgomeryPoint *kplus1_B = ecc_montgomery_point_copy(two_B);
+
+    unsigned not_started_yet = 1;
+    for (size_t bitindex = mp_max_bits(n); bitindex-- > 0 ;) {
+        unsigned nbit = mp_get_bit(n, bitindex);
+
+        MontgomeryPoint *sum = ecc_montgomery_diff_add(k_B, kplus1_B, B);
+        ecc_montgomery_cond_swap(k_B, kplus1_B, nbit);
+        MontgomeryPoint *other = ecc_montgomery_double(k_B);
+        ecc_montgomery_point_free(k_B);
+        ecc_montgomery_point_free(kplus1_B);
+        k_B = other;
+        kplus1_B = sum;
+        ecc_montgomery_cond_swap(k_B, kplus1_B, nbit);
+
+        ecc_montgomery_cond_overwrite(k_B, B, not_started_yet);
+        ecc_montgomery_cond_overwrite(kplus1_B, two_B, not_started_yet);
+        not_started_yet &= ~nbit;
+    }
+
+    ecc_montgomery_point_free(two_B);
+    ecc_montgomery_point_free(kplus1_B);
+    return k_B;
+}
+
+void ecc_montgomery_get_affine(MontgomeryPoint *mp, mp_int **x)
+{
+    MontgomeryCurve *mc = mp->mc;
+
+    ecc_montgomery_normalise(mp);
+
+    if (x)
+        *x = monty_export(mc->mc, mp->X);
+}
+
+/* ----------------------------------------------------------------------
+ * Twisted Edwards curves.
+ */
+
+struct EdwardsPoint {
+    /*
+     * We represent an Edwards curve point in 'extended coordinates'.
+     * There's more than one coordinate system going by that name,
+     * unfortunately. These ones have the semantics that X,Y,Z are
+     * ordinary projective coordinates (so x=X/Z and y=Y/Z), but also,
+     * we store the extra value T = xyZ = XY/Z.
+     */
+    mp_int *X, *Y, *Z, *T;
+
+    EdwardsCurve *ec;
+};
+
+struct EdwardsCurve {
+    /* Prime modulus of the finite field. */
+    mp_int *p;
+
+    /* Montgomery context for arithmetic mod p. */
+    MontyContext *mc;
+
+    /* Modsqrt context for point decompression. */
+    ModsqrtContext *sc;
+
+    /* Parameters of the curve, in Montgomery-multiplication
+     * transformed form. */
+    mp_int *d, *a;
+};
+
+EdwardsCurve *ecc_edwards_curve(mp_int *p, mp_int *d, mp_int *a,
+                                mp_int *nonsquare_mod_p)
+{
+    EdwardsCurve *ec = snew(EdwardsCurve);
+    ec->p = mp_copy(p);
+    ec->mc = monty_new(p);
+    ec->d = monty_import(ec->mc, d);
+    ec->a = monty_import(ec->mc, a);
+
+    if (nonsquare_mod_p)
+        ec->sc = modsqrt_new(p, nonsquare_mod_p);
+    else
+        ec->sc = NULL;
+
+    return ec;
+}
+
+void ecc_edwards_curve_free(EdwardsCurve *ec)
+{
+    mp_free(ec->p);
+    mp_free(ec->d);
+    mp_free(ec->a);
+    monty_free(ec->mc);
+    if (ec->sc)
+        modsqrt_free(ec->sc);
+    sfree(ec);
+}
+
+static EdwardsPoint *ecc_edwards_point_new_empty(EdwardsCurve *ec)
+{
+    EdwardsPoint *ep = snew(EdwardsPoint);
+    ep->ec = ec;
+    ep->X = ep->Y = ep->Z = ep->T = NULL;
+    return ep;
+}
+
+static EdwardsPoint *ecc_edwards_point_new_imported(
+    EdwardsCurve *ec, mp_int *monty_x, mp_int *monty_y)
+{
+    EdwardsPoint *ep = ecc_edwards_point_new_empty(ec);
+    ep->X = monty_x;
+    ep->Y = monty_y;
+    ep->T = monty_mul(ec->mc, ep->X, ep->Y);
+    ep->Z = mp_copy(monty_identity(ec->mc));
+    return ep;
+}
+
+EdwardsPoint *ecc_edwards_point_new(
+    EdwardsCurve *ec, mp_int *x, mp_int *y)
+{
+    return ecc_edwards_point_new_imported(
+        ec, monty_import(ec->mc, x), monty_import(ec->mc, y));
+}
+
+EdwardsPoint *ecc_edwards_point_copy(EdwardsPoint *orig)
+{
+    EdwardsPoint *ep = ecc_edwards_point_new_empty(orig->ec);
+    ep->X = mp_copy(orig->X);
+    ep->Y = mp_copy(orig->Y);
+    ep->Z = mp_copy(orig->Z);
+    ep->T = mp_copy(orig->T);
+    return ep;
+}
+
+void ecc_edwards_point_free(EdwardsPoint *ep)
+{
+    mp_free(ep->X);
+    mp_free(ep->Y);
+    mp_free(ep->Z);
+    mp_free(ep->T);
+    smemclr(ep, sizeof(*ep));
+    sfree(ep);
+}
+
+EdwardsPoint *ecc_edwards_point_new_from_y(
+    EdwardsCurve *ec, mp_int *yorig, unsigned desired_x_parity)
+{
+    assert(ec->sc);
+
+    /*
+     * The curve equation is ax^2 + y^2 = 1 + dx^2y^2, which
+     * rearranges to x^2(dy^2-a) = y^2-1. So we compute
+     * (y^2-1)/(dy^2-a) and take its square root.
+     */
+    unsigned success;
+
+    mp_int *y = monty_import(ec->mc, yorig);
+    mp_int *y2 = monty_mul(ec->mc, y, y);
+    mp_int *dy2 = monty_mul(ec->mc, ec->d, y2);
+    mp_int *dy2ma = monty_sub(ec->mc, dy2, ec->a);
+    mp_int *y2m1 = monty_sub(ec->mc, y2, monty_identity(ec->mc));
+    mp_int *recip_denominator = monty_invert(ec->mc, dy2ma);
+    mp_int *radicand = monty_mul(ec->mc, y2m1, recip_denominator);
+    mp_int *x = monty_modsqrt(ec->sc, radicand, &success);
+    mp_free(y2);
+    mp_free(dy2);
+    mp_free(dy2ma);
+    mp_free(y2m1);
+    mp_free(recip_denominator);
+    mp_free(radicand);
+
+    if (!success) {
+        /* Failure! x^2 worked out to be a number that has no square
+         * root mod p. In this situation there's no point in trying to
+         * be time-constant, since the protocol sequence is going to
+         * diverge anyway when we complain to whoever gave us this
+         * bogus value. */
+        mp_free(x);
+        mp_free(y);
+        return NULL;
+    }
+
+    /*
+     * Choose whichever of x and p-x has the specified parity (of its
+     * lowest positive residue mod p).
+     */
+    mp_int *tmp = monty_export(ec->mc, x);
+    unsigned flip = (mp_get_bit(tmp, 0) ^ desired_x_parity) & 1;
+    mp_sub_into(tmp, ec->p, x);
+    mp_select_into(x, x, tmp, flip);
+    mp_free(tmp);
+
+    return ecc_edwards_point_new_imported(ec, x, y);
+}
+
+static void ecc_edwards_cond_overwrite(
+    EdwardsPoint *dest, EdwardsPoint *src, unsigned overwrite)
+{
+    mp_select_into(dest->X, dest->X, src->X, overwrite);
+    mp_select_into(dest->Y, dest->Y, src->Y, overwrite);
+    mp_select_into(dest->Z, dest->Z, src->Z, overwrite);
+    mp_select_into(dest->T, dest->T, src->T, overwrite);
+}
+
+static void ecc_edwards_cond_swap(
+    EdwardsPoint *P, EdwardsPoint *Q, unsigned swap)
+{
+    mp_cond_swap(P->X, Q->X, swap);
+    mp_cond_swap(P->Y, Q->Y, swap);
+    mp_cond_swap(P->Z, Q->Z, swap);
+    mp_cond_swap(P->T, Q->T, swap);
+}
+
+EdwardsPoint *ecc_edwards_add(EdwardsPoint *P, EdwardsPoint *Q)
+{
+    EdwardsCurve *ec = P->ec;
+    assert(Q->ec == ec);
+
+    EdwardsPoint *S = ecc_edwards_point_new_empty(ec);
+
+    /*
+     * The affine rule for Edwards addition of (x1,y1) and (x2,y2) is
+     *
+     *   x_out = (x1 y2 +   y1 x2) / (1 + d x1 x2 y1 y2)
+     *   y_out = (y1 y2 - a x1 x2) / (1 - d x1 x2 y1 y2)
+     *
+     * The formulae below are listed as 'add-2008-hwcd' in
+     * https://hyperelliptic.org/EFD/g1p/auto-twisted-extended.html
+     *
+     * and if you undo the careful optimisation to find out what
+     * they're actually computing, it comes out to
+     *
+     *   X_out = (X1 Y2 +   Y1 X2) (Z1 Z2 - d T1 T2)
+     *   Y_out = (Y1 Y2 - a X1 X2) (Z1 Z2 + d T1 T2)
+     *   Z_out = (Z1 Z2 - d T1 T2) (Z1 Z2 + d T1 T2)
+     *   T_out = (X1 Y2 +   Y1 X2) (Y1 Y2 - a X1 X2)
+     */
+    mp_int *PxQx = monty_mul(ec->mc, P->X, Q->X);
+    mp_int *PyQy = monty_mul(ec->mc, P->Y, Q->Y);
+    mp_int *PtQt = monty_mul(ec->mc, P->T, Q->T);
+    mp_int *PzQz = monty_mul(ec->mc, P->Z, Q->Z);
+    mp_int *Psum = monty_add(ec->mc, P->X, P->Y);
+    mp_int *Qsum = monty_add(ec->mc, Q->X, Q->Y);
+    mp_int *aPxQx = monty_mul(ec->mc, ec->a, PxQx);
+    mp_int *dPtQt = monty_mul(ec->mc, ec->d, PtQt);
+    mp_int *sumprod = monty_mul(ec->mc, Psum, Qsum);
+    mp_int *xx_p_yy = monty_add(ec->mc, PxQx, PyQy);
+    mp_int *E = monty_sub(ec->mc, sumprod, xx_p_yy);
+    mp_int *F = monty_sub(ec->mc, PzQz, dPtQt);
+    mp_int *G = monty_add(ec->mc, PzQz, dPtQt);
+    mp_int *H = monty_sub(ec->mc, PyQy, aPxQx);
+    S->X = monty_mul(ec->mc, E, F);
+    S->Z = monty_mul(ec->mc, F, G);
+    S->Y = monty_mul(ec->mc, G, H);
+    S->T = monty_mul(ec->mc, H, E);
+
+    mp_free(PxQx);
+    mp_free(PyQy);
+    mp_free(PtQt);
+    mp_free(PzQz);
+    mp_free(Psum);
+    mp_free(Qsum);
+    mp_free(aPxQx);
+    mp_free(dPtQt);
+    mp_free(sumprod);
+    mp_free(xx_p_yy);
+    mp_free(E);
+    mp_free(F);
+    mp_free(G);
+    mp_free(H);
+
+    return S;
+}
+
+static void ecc_edwards_normalise(EdwardsPoint *ep)
+{
+    EdwardsCurve *ec = ep->ec;
+    mp_int *zinv = monty_invert(ec->mc, ep->Z);
+    monty_mul_into(ec->mc, ep->X, ep->X, zinv);
+    monty_mul_into(ec->mc, ep->Y, ep->Y, zinv);
+    mp_free(zinv);
+    mp_copy_into(ep->Z, monty_identity(ec->mc));
+    monty_mul_into(ec->mc, ep->T, ep->X, ep->Y);
+}
+
+EdwardsPoint *ecc_edwards_multiply(EdwardsPoint *B, mp_int *n)
+{
+    EdwardsPoint *two_B = ecc_edwards_add(B, B);
+    EdwardsPoint *k_B = ecc_edwards_point_copy(B);
+    EdwardsPoint *kplus1_B = ecc_edwards_point_copy(two_B);
+
+    /*
+     * Another copy of the same exponentiation routine following the
+     * pattern of the Montgomery ladder, because it works as well as
+     * any other technique and this way I didn't have to debug two of
+     * them.
+     */
+
+    unsigned not_started_yet = 1;
+    for (size_t bitindex = mp_max_bits(n); bitindex-- > 0 ;) {
+        unsigned nbit = mp_get_bit(n, bitindex);
+
+        EdwardsPoint *sum = ecc_edwards_add(k_B, kplus1_B);
+        ecc_edwards_cond_swap(k_B, kplus1_B, nbit);
+        EdwardsPoint *other = ecc_edwards_add(k_B, k_B);
+        ecc_edwards_point_free(k_B);
+        ecc_edwards_point_free(kplus1_B);
+        k_B = other;
+        kplus1_B = sum;
+        ecc_edwards_cond_swap(k_B, kplus1_B, nbit);
+
+        ecc_edwards_cond_overwrite(k_B, B, not_started_yet);
+        ecc_edwards_cond_overwrite(kplus1_B, two_B, not_started_yet);
+        not_started_yet &= ~nbit;
+    }
+
+    ecc_edwards_point_free(two_B);
+    ecc_edwards_point_free(kplus1_B);
+    return k_B;
+}
+
+/*
+ * Helper routine to determine whether two values each given as a pair
+ * of projective coordinates represent the same affine value.
+ */
+static inline unsigned projective_eq(
+    MontyContext *mc, mp_int *An, mp_int *Ad,
+    mp_int *Bn, mp_int *Bd)
+{
+    mp_int *AnBd = monty_mul(mc, An, Bd);
+    mp_int *BnAd = monty_mul(mc, Bn, Ad);
+    unsigned toret = mp_cmp_eq(AnBd, BnAd);
+    mp_free(AnBd);
+    mp_free(BnAd);
+    return toret;
+}
+
+unsigned ecc_edwards_eq(EdwardsPoint *P, EdwardsPoint *Q)
+{
+    EdwardsCurve *ec = P->ec;
+    assert(Q->ec == ec);
+
+    return (projective_eq(ec->mc, P->X, P->Z, Q->X, Q->Z) &
+            projective_eq(ec->mc, P->Y, P->Z, Q->Y, Q->Z));
+}
+
+void ecc_edwards_get_affine(EdwardsPoint *ep, mp_int **x, mp_int **y)
+{
+    EdwardsCurve *ec = ep->ec;
+
+    ecc_edwards_normalise(ep);
+
+    if (x)
+        *x = monty_export(ec->mc, ep->X);
+    if (y)
+        *y = monty_export(ec->mc, ep->Y);
+}
diff --git a/ecc.h b/ecc.h
new file mode 100644
index 00000000..d9e12424
--- /dev/null
+++ b/ecc.h
@@ -0,0 +1,233 @@
+#ifndef PUTTY_ECC_H
+#define PUTTY_ECC_H
+
+/*
+ * Arithmetic functions for the various kinds of elliptic curves used
+ * by PuTTY's public-key cryptography.
+ *
+ * All of these elliptic curves are over the finite field whose order
+ * is a large prime p. (Elliptic curves over a field of order 2^n are
+ * also known, but PuTTY currently has no need of them.)
+ */
+
+/* ----------------------------------------------------------------------
+ * Weierstrass curves (or rather, 'short form' Weierstrass curves).
+ *
+ * A curve in this form is defined by two parameters a,b, and the
+ * non-identity points on the curve are represented by (x,y) (the
+ * 'affine coordinates') such that y^2 = x^3 + ax + b.
+ *
+ * The identity element of the curve's group is an additional 'point
+ * at infinity', which is considered to be the third point on the
+ * intersection of the curve with any vertical line. Hence, the
+ * inverse of the point (x,y) is (x,-y).
+ */
+
+/*
+ * Create and destroy Weierstrass curve data structures. The mandatory
+ * parameters to the constructor are the prime modulus p, and the
+ * curve parameters a,b.
+ *
+ * 'nonsquare_mod_p' is an optional extra parameter, only needed by
+ * ecc_edwards_point_new_from_y which has to take a modular square
+ * root. You can pass it as NULL if you don't need that function.
+ */
+WeierstrassCurve *ecc_weierstrass_curve(
+    mp_int *p, mp_int *a, mp_int *b, mp_int *nonsquare_mod_p);
+void ecc_weierstrass_curve_free(WeierstrassCurve *);
+
+/*
+ * Create points on a Weierstrass curve, given the curve.
+ *
+ * point_new_identity returns the special identity point.
+ * point_new(x,y) returns the non-identity point with the given affine
+ * coordinates.
+ *
+ * point_new_from_x constructs a non-identity point given only the
+ * x-coordinate, by using the curve equation to work out what y has to
+ * be. Of course the equation only tells you y^2, so it only
+ * determines y up to sign; the parameter desired_y_parity controls
+ * which of the two values of y you get, by saying whether you'd like
+ * its minimal non-negative residue mod p to be even or odd. (Of
+ * course, since p itself is odd, exactly one of y and p-y is odd.)
+ * This function has to take a modular square root, so it will only
+ * work if you passed in a non-square mod p when constructing the
+ * curve.
+ */
+WeierstrassPoint *ecc_weierstrass_point_new_identity(WeierstrassCurve *curve);
+WeierstrassPoint *ecc_weierstrass_point_new(
+    WeierstrassCurve *curve, mp_int *x, mp_int *y);
+WeierstrassPoint *ecc_weierstrass_point_new_from_x(
+    WeierstrassCurve *curve, mp_int *x, unsigned desired_y_parity);
+
+/* Memory management: copy and free points. */
+WeierstrassPoint *ecc_weierstrass_point_copy(WeierstrassPoint *wc);
+void ecc_weierstrass_point_free(WeierstrassPoint *point);
+
+/* Check whether a point is actually on the curve. */
+unsigned ecc_weierstrass_point_valid(WeierstrassPoint *);
+
+/*
+ * Add two points and return their sum. This function is fully
+ * general: it should do the right thing if the two inputs are the
+ * same, or if either (or both) of the input points is the identity,
+ * or if the two input points are inverses so the output is the
+ * identity. However, it pays for that generality by being slower than
+ * the special-purpose functions below..
+ */
+WeierstrassPoint *ecc_weierstrass_add_general(
+    WeierstrassPoint *, WeierstrassPoint *);
+
+/*
+ * Fast but less general arithmetic functions: add two points on the
+ * condition that they are not equal and neither is the identity, and
+ * add a point to itself.
+ */
+WeierstrassPoint *ecc_weierstrass_add(WeierstrassPoint *, WeierstrassPoint *);
+WeierstrassPoint *ecc_weierstrass_double(WeierstrassPoint *);
+
+/*
+ * Compute an integer multiple of a point. Not guaranteed to work
+ * unless the integer argument is less than the order of the point in
+ * the group (because it won't cope if an identity element shows up in
+ * any intermediate product).
+ */
+WeierstrassPoint *ecc_weierstrass_multiply(WeierstrassPoint *, mp_int *);
+
+/*
+ * Query functions to get the value of a point back out. is_identity
+ * tells you whether the point is the identity; if it isn't, then
+ * get_affine will retrieve one or both of its affine coordinates.
+ * (You can pass NULL as either output pointer, if you don't need that
+ * coordinate as output.)
+ */
+unsigned ecc_weierstrass_is_identity(WeierstrassPoint *wp);
+void ecc_weierstrass_get_affine(WeierstrassPoint *wp, mp_int **x, mp_int **y);
+
+/* ----------------------------------------------------------------------
+ * Montgomery curves.
+ *
+ * A curve in this form is defined by two parameters a,b, and the
+ * curve equation is y^2 = x^3 + ax^2 + bx.
+ *
+ * As with Weierstrass curves, there's an additional point at infinity
+ * that is the identity element, and the inverse of (x,y) is (x,-y).
+ *
+ * However, we don't actually work with full (x,y) pairs. We just
+ * store the x-coordinate (so what we're really representing is not a
+ * specific point on the curve but a two-point set {P,-P}). This means
+ * you can't quite do point addition, because if you're given {P,-P}
+ * and {Q,-Q} as input, you can work out a pair of x-coordinates that
+ * are those of P-Q and P+Q, but you don't know which is which.
+ *
+ * Instead, the basic operation is 'differential addition', in which
+ * you are given three parameters P, Q and P-Q and you return P+Q. (As
+ * well as disambiguating which of the possible answers you want, that
+ * extra input also enables a fast formulae for computing it. This
+ * fast formula is more or less why Montgomery curves are useful in
+ * the first place.)
+ *
+ * Doubling a point is still possible to do unambiguously, so you can
+ * still compute an integer multiple of P if you start by making 2P
+ * and then doing a series of differential additions.
+ */
+
+/*
+ * Create and destroy Montgomery curve data structures.
+ */
+MontgomeryCurve *ecc_montgomery_curve(mp_int *p, mp_int *a, mp_int *b);
+void ecc_montgomery_curve_free(MontgomeryCurve *);
+
+/*
+ * Create, copy and free points on the curve. We don't need to
+ * explicitly represent the identity for this application.
+ */
+MontgomeryPoint *ecc_montgomery_point_new(MontgomeryCurve *mc, mp_int *x);
+MontgomeryPoint *ecc_montgomery_point_copy(MontgomeryPoint *orig);
+void ecc_montgomery_point_free(MontgomeryPoint *mp);
+
+/*
+ * Basic arithmetic routines: differential addition and point-
+ * doubling. Each of these assumes that no special cases come up - no
+ * input or output point should be the identity, and in diff_add, P
+ * and Q shouldn't be the same.
+ */
+MontgomeryPoint *ecc_montgomery_diff_add(
+    MontgomeryPoint *P, MontgomeryPoint *Q, MontgomeryPoint *PminusQ);
+MontgomeryPoint *ecc_montgomery_double(MontgomeryPoint *P);
+
+/*
+ * Compute an integer multiple of a point.
+ */
+MontgomeryPoint *ecc_montgomery_multiply(MontgomeryPoint *, mp_int *);
+
+/*
+ * Return the affine x-coordinate of a point.
+ */
+void ecc_montgomery_get_affine(MontgomeryPoint *mp, mp_int **x);
+
+/* ----------------------------------------------------------------------
+ * Twisted Edwards curves.
+ *
+ * A curve in this form is defined by two parameters d,a, and the
+ * curve equation is a x^2 + y^2 = 1 + d x^2 y^2.
+ *
+ * Apparently if you ask a proper algebraic geometer they'll tell you
+ * that this is technically not an actual elliptic curve. Certainly it
+ * doesn't work quite the same way as the other kinds: in this form,
+ * there is no need for a point at infinity, because the identity
+ * element is represented by the affine coordinates (0,1). And you
+ * invert a point by negating its x rather than y coordinate: the
+ * inverse of (x,y) is (-x,y).
+ *
+ * The usefulness of this representation is that the addition formula
+ * is 'strongly unified', meaning that the same formula works for any
+ * input and output points, without needing special cases for the
+ * identity or for doubling.
+ */
+
+/*
+ * Create and destroy Edwards curve data structures.
+ *
+ * Similarly to ecc_weierstrass_curve, you don't have to provide
+ * nonsquare_mod_p if you don't need ecc_edwards_point_new_from_y.
+ */
+EdwardsCurve *ecc_edwards_curve(
+    mp_int *p, mp_int *d, mp_int *a, mp_int *nonsquare_mod_p);
+void ecc_edwards_curve_free(EdwardsCurve *);
+
+/*
+ * Create points.
+ *
+ * There's no need to have a separate function to create the identity
+ * point, because you can just pass x=0 and y=1 to the usual function.
+ *
+ * Similarly to the Weierstrass curve, ecc_edwards_point_new_from_y
+ * creates a point given only its y-coordinate and the desired parity
+ * of its x-coordinate, and you can only call it if you provided the
+ * optional nonsquare_mod_p argument when creating the curve.
+ */
+EdwardsPoint *ecc_edwards_point_new(
+    EdwardsCurve *curve, mp_int *x, mp_int *y);
+EdwardsPoint *ecc_edwards_point_new_from_y(
+    EdwardsCurve *curve, mp_int *y, unsigned desired_x_parity);
+
+/* Copy and free points. */
+EdwardsPoint *ecc_edwards_point_copy(EdwardsPoint *ec);
+void ecc_edwards_point_free(EdwardsPoint *point);
+
+/*
+ * Arithmetic: add two points, and calculate an integer multiple of a
+ * point.
+ */
+EdwardsPoint *ecc_edwards_add(EdwardsPoint *, EdwardsPoint *);
+EdwardsPoint *ecc_edwards_multiply(EdwardsPoint *, mp_int *);
+
+/*
+ * Query functions: compare two points for equality, and return the
+ * affine coordinates of a point.
+ */
+unsigned ecc_edwards_eq(EdwardsPoint *, EdwardsPoint *);
+void ecc_edwards_get_affine(EdwardsPoint *wp, mp_int **x, mp_int **y);
+
+#endif /* PUTTY_ECC_H */
diff --git a/import.c b/import.c
index e3ba53c4..71caa859 100644
--- a/import.c
+++ b/import.c
@@ -10,6 +10,7 @@
 
 #include "putty.h"
 #include "ssh.h"
+#include "mpint.h"
 #include "misc.h"
 
 static bool openssh_pem_encrypted(const Filename *file);
@@ -815,7 +816,7 @@ static bool openssh_pem_write(
          */
         if (ssh_key_alg(key->key) == &ssh_rsa) {
             ptrlen n, e, d, p, q, iqmp, dmp1, dmq1;
-            Bignum bd, bp, bq, bdmp1, bdmq1;
+            mp_int *bd, *bp, *bq, *bdmp1, *bdmq1;
 
             /*
              * These blobs were generated from inside PuTTY, so we needn't
@@ -834,29 +835,29 @@ static bool openssh_pem_write(
             assert(!get_err(src));     /* can't go wrong */
 
             /* We also need d mod (p-1) and d mod (q-1). */
-            bd = bignum_from_bytes(d.ptr, d.len);
-            bp = bignum_from_bytes(p.ptr, p.len);
-            bq = bignum_from_bytes(q.ptr, q.len);
-            decbn(bp);
-            decbn(bq);
-            bdmp1 = bigmod(bd, bp);
-            bdmq1 = bigmod(bd, bq);
-            freebn(bd);
-            freebn(bp);
-            freebn(bq);
+            bd = mp_from_bytes_be(d);
+            bp = mp_from_bytes_be(p);
+            bq = mp_from_bytes_be(q);
+            mp_sub_integer_into(bp, bp, 1);
+            mp_sub_integer_into(bq, bq, 1);
+            bdmp1 = mp_mod(bd, bp);
+            bdmq1 = mp_mod(bd, bq);
+            mp_free(bd);
+            mp_free(bp);
+            mp_free(bq);
 
-            dmp1.len = (bignum_bitcount(bdmp1)+8)/8;
-            dmq1.len = (bignum_bitcount(bdmq1)+8)/8;
+            dmp1.len = (mp_get_nbits(bdmp1)+8)/8;
+            dmq1.len = (mp_get_nbits(bdmq1)+8)/8;
             sparelen = dmp1.len + dmq1.len;
             spareblob = snewn(sparelen, unsigned char);
             dmp1.ptr = spareblob;
             dmq1.ptr = spareblob + dmp1.len;
             for (i = 0; i < dmp1.len; i++)
-                spareblob[i] = bignum_byte(bdmp1, dmp1.len-1 - i);
+                spareblob[i] = mp_get_byte(bdmp1, dmp1.len-1 - i);
             for (i = 0; i < dmq1.len; i++)
-                spareblob[i+dmp1.len] = bignum_byte(bdmq1, dmq1.len-1 - i);
-            freebn(bdmp1);
-            freebn(bdmq1);
+                spareblob[i+dmp1.len] = mp_get_byte(bdmq1, dmq1.len-1 - i);
+            mp_free(bdmp1);
+            mp_free(bdmq1);
 
             numbers[0] = make_ptrlen(zero, 1); zero[0] = '\0';
             numbers[1] = n;
@@ -913,7 +914,7 @@ static bool openssh_pem_write(
                ssh_key_alg(key->key) == &ssh_ecdsa_nistp384 ||
                ssh_key_alg(key->key) == &ssh_ecdsa_nistp521) {
         const unsigned char *oid;
-        struct ec_key *ec = container_of(key->key, struct ec_key, sshk);
+        struct ecdsa_key *ec = container_of(key->key, struct ecdsa_key, sshk);
         int oidlen;
         int pointlen;
         strbuf *seq, *sub;
@@ -929,7 +930,7 @@ static bool openssh_pem_write(
          *     BIT STRING (0x00 public key point)
          */
         oid = ec_alg_oid(ssh_key_alg(key->key), &oidlen);
-        pointlen = (ec->publicKey.curve->fieldBits + 7) / 8 * 2;
+        pointlen = (ec->curve->fieldBits + 7) / 8 * 2;
 
         seq = strbuf_new();
 
diff --git a/marshal.h b/marshal.h
index e19cd0b9..400354c4 100644
--- a/marshal.h
+++ b/marshal.h
@@ -153,6 +153,8 @@ struct strbuf;
 void BinarySink_put_stringsb(BinarySink *, struct strbuf *);
 void BinarySink_put_asciz(BinarySink *, const char *str);
 bool BinarySink_put_pstring(BinarySink *, const char *str);
+void BinarySink_put_mp_ssh1(BinarySink *bs, mp_int *x);
+void BinarySink_put_mp_ssh2(BinarySink *bs, mp_int *x);
 
 /* ---------------------------------------------------------------------- */
 
@@ -195,7 +197,7 @@ struct BinarySource {
      * types.
      *
      * If the usual return value is dynamically allocated (e.g. a
-     * Bignum, or a normal C 'char *' string), then the error value is
+     * bignum, or a normal C 'char *' string), then the error value is
      * also dynamic in the same way. So you have to free exactly the
      * same set of things whether or not there was a decoding error,
      * which simplifies exit paths - for example, you could call a big
@@ -281,5 +283,7 @@ uint64_t BinarySource_get_uint64(BinarySource *);
 ptrlen BinarySource_get_string(BinarySource *);
 const char *BinarySource_get_asciz(BinarySource *);
 ptrlen BinarySource_get_pstring(BinarySource *);
+mp_int *BinarySource_get_mp_ssh1(BinarySource *src);
+mp_int *BinarySource_get_mp_ssh2(BinarySource *src);
 
 #endif /* PUTTY_MARSHAL_H */
diff --git a/mpint.c b/mpint.c
new file mode 100644
index 00000000..f317b58f
--- /dev/null
+++ b/mpint.c
@@ -0,0 +1,2340 @@
+#include <assert.h>
+#include <stdio.h>
+
+#include "defs.h"
+#include "putty.h"
+
+#include "mpint.h"
+#include "mpint_i.h"
+
+/*
+ * Inline helpers to take min and max of size_t values, used
+ * throughout this code.
+ */
+static inline size_t size_t_min(size_t a, size_t b)
+{
+    return a < b ? a : b;
+}
+static inline size_t size_t_max(size_t a, size_t b)
+{
+    return a > b ? a : b;
+}
+
+/*
+ * Helper to fetch a word of data from x with array overflow checking.
+ * If x is too short to have that word, 0 is returned.
+ */
+static inline BignumInt mp_word(mp_int *x, size_t i)
+{
+    return i < x->nw ? x->w[i] : 0;
+}
+
+static mp_int *mp_make_sized(size_t nw)
+{
+    mp_int *x = snew_plus(mp_int, nw * sizeof(BignumInt));
+    x->nw = nw;
+    x->w = snew_plus_get_aux(x);
+    mp_clear(x);
+    return x;
+}
+
+mp_int *mp_new(size_t maxbits)
+{
+    size_t words = (maxbits + BIGNUM_INT_BITS - 1) / BIGNUM_INT_BITS;
+    return mp_make_sized(words);
+}
+
+mp_int *mp_from_integer(uintmax_t n)
+{
+    mp_int *x = mp_make_sized(
+        (sizeof(n) + BIGNUM_INT_BYTES - 1) / BIGNUM_INT_BYTES);
+    for (size_t i = 0; i < x->nw; i++)
+        x->w[i] = n >> (i * BIGNUM_INT_BITS);
+    return x;
+}
+
+size_t mp_max_bytes(mp_int *x)
+{
+    return x->nw * BIGNUM_INT_BYTES;
+}
+
+size_t mp_max_bits(mp_int *x)
+{
+    return x->nw * BIGNUM_INT_BITS;
+}
+
+void mp_free(mp_int *x)
+{
+    mp_clear(x);
+    smemclr(x, sizeof(*x));
+    sfree(x);
+}
+
+void mp_dump(FILE *fp, const char *prefix, mp_int *x, const char *suffix)
+{
+    fprintf(fp, "%s0x", prefix);
+    for (size_t i = mp_max_bytes(x); i-- > 0 ;)
+        fprintf(fp, "%02X", mp_get_byte(x, i));
+    fputs(suffix, fp);
+}
+
+void mp_copy_into(mp_int *dest, mp_int *src)
+{
+    size_t copy_nw = size_t_min(dest->nw, src->nw);
+    memmove(dest->w, src->w, copy_nw * sizeof(BignumInt));
+    smemclr(dest->w + copy_nw, (dest->nw - copy_nw) * sizeof(BignumInt));
+}
+
+/*
+ * Conditional selection is done by negating 'which', to give a mask
+ * word which is all 1s if which==1 and all 0s if which==0. Then you
+ * can select between two inputs a,b without data-dependent control
+ * flow by XORing them to get their difference; ANDing with the mask
+ * word to replace that difference with 0 if which==0; and XORing that
+ * into a, which will either turn it into b or leave it alone.
+ *
+ * This trick will be used throughout this code and taken as read the
+ * rest of the time (or else I'd be here all week typing comments),
+ * but I felt I ought to explain it in words _once_.
+ */
+void mp_select_into(mp_int *dest, mp_int *src0, mp_int *src1,
+                    unsigned which)
+{
+    BignumInt mask = -(BignumInt)(1 & which);
+    for (size_t i = 0; i < dest->nw; i++) {
+        BignumInt srcword0 = mp_word(src0, i), srcword1 = mp_word(src1, i);
+        dest->w[i] = srcword0 ^ ((srcword1 ^ srcword0) & mask);
+    }
+}
+
+void mp_cond_swap(mp_int *x0, mp_int *x1, unsigned swap)
+{
+    assert(x0->nw == x1->nw);
+    BignumInt mask = -(BignumInt)(1 & swap);
+    for (size_t i = 0; i < x0->nw; i++) {
+        BignumInt diff = (x0->w[i] ^ x1->w[i]) & mask;
+        x0->w[i] ^= diff;
+        x1->w[i] ^= diff;
+    }
+}
+
+void mp_clear(mp_int *x)
+{
+    smemclr(x->w, x->nw * sizeof(BignumInt));
+}
+
+void mp_cond_clear(mp_int *x, unsigned clear)
+{
+    BignumInt mask = ~-(BignumInt)(1 & clear);
+    for (size_t i = 0; i < x->nw; i++)
+        x->w[i] &= mask;
+}
+
+/*
+ * Common code between mp_from_bytes_{le,be} which reads bytes in an
+ * arbitrary arithmetic progression.
+ */
+static mp_int *mp_from_bytes_int(ptrlen bytes, size_t m, size_t c)
+{
+    mp_int *n = mp_make_sized(
+        (bytes.len + BIGNUM_INT_BYTES - 1) / BIGNUM_INT_BYTES);
+    for (size_t i = 0; i < bytes.len; i++)
+        n->w[i / BIGNUM_INT_BYTES] |=
+            (BignumInt)(((const unsigned char *)bytes.ptr)[m*i+c]) <<
+            (8 * (i % BIGNUM_INT_BYTES));
+    return n;
+}
+
+mp_int *mp_from_bytes_le(ptrlen bytes)
+{
+    return mp_from_bytes_int(bytes, 1, 0);
+}
+
+mp_int *mp_from_bytes_be(ptrlen bytes)
+{
+    return mp_from_bytes_int(bytes, -1, bytes.len - 1);
+}
+
+static mp_int *mp_from_words(size_t nw, const BignumInt *w)
+{
+    mp_int *x = mp_make_sized(nw);
+    memcpy(x->w, w, x->nw * sizeof(BignumInt));
+    return x;
+}
+
+/*
+ * Decimal-to-binary conversion: just go through the input string
+ * adding on the decimal value of each digit, and then multiplying the
+ * number so far by 10.
+ */
+mp_int *mp_from_decimal_pl(ptrlen decimal)
+{
+    /* 196/59 is an upper bound (and also a continued-fraction
+     * convergent) for log2(10), so this conservatively estimates the
+     * number of bits that will be needed to store any number that can
+     * be written in this many decimal digits. */
+    assert(decimal.len < (~(size_t)0) / 196);
+    size_t bits = 196 * decimal.len / 59;
+
+    /* Now round that up to words. */
+    size_t words = bits / BIGNUM_INT_BITS + 1;
+
+    mp_int *x = mp_make_sized(words);
+    for (size_t i = 0;; i++) {
+        mp_add_integer_into(x, x, ((char *)decimal.ptr)[i] - '0');
+
+        if (i+1 == decimal.len)
+            break;
+
+        mp_mul_integer_into(x, x, 10);
+    }
+    return x;
+}
+
+mp_int *mp_from_decimal(const char *decimal)
+{
+    return mp_from_decimal_pl(ptrlen_from_asciz(decimal));
+}
+
+/*
+ * Hex-to-binary conversion: _algorithmically_ simpler than decimal
+ * (none of those multiplications by 10), but there's some fiddly
+ * bit-twiddling needed to process each hex digit without diverging
+ * control flow depending on whether it's a letter or a number.
+ */
+mp_int *mp_from_hex_pl(ptrlen hex)
+{
+    assert(hex.len <= (~(size_t)0) / 4);
+    size_t bits = hex.len * 4;
+    size_t words = (bits + BIGNUM_INT_BITS - 1) / BIGNUM_INT_BITS;
+    mp_int *x = mp_make_sized(words);
+    for (size_t nibble = 0; nibble < hex.len; nibble++) {
+        BignumInt digit = ((char *)hex.ptr)[hex.len-1 - nibble];
+
+        BignumInt lmask = ~-(((digit-'a')|('f'-digit)) >> (BIGNUM_INT_BITS-1));
+        BignumInt umask = ~-(((digit-'A')|('F'-digit)) >> (BIGNUM_INT_BITS-1));
+
+        BignumInt digitval = digit - '0';
+        digitval ^= (digitval ^ (digit - 'a' + 10)) & lmask;
+        digitval ^= (digitval ^ (digit - 'A' + 10)) & umask;
+        digitval &= 0xF; /* at least be _slightly_ nice about weird input */
+
+        size_t word_idx = nibble / (BIGNUM_INT_BYTES*2);
+        size_t nibble_within_word = nibble % (BIGNUM_INT_BYTES*2);
+        x->w[word_idx] |= digitval << (nibble_within_word * 4);
+    }
+    return x;
+}
+
+mp_int *mp_from_hex(const char *hex)
+{
+    return mp_from_hex_pl(ptrlen_from_asciz(hex));
+}
+
+mp_int *mp_copy(mp_int *x)
+{
+    return mp_from_words(x->nw, x->w);
+}
+
+uint8_t mp_get_byte(mp_int *x, size_t byte)
+{
+    return 0xFF & (mp_word(x, byte / BIGNUM_INT_BYTES) >>
+                   (8 * (byte % BIGNUM_INT_BYTES)));
+}
+
+unsigned mp_get_bit(mp_int *x, size_t bit)
+{
+    return 1 & (mp_word(x, bit / BIGNUM_INT_BITS) >>
+                (bit % BIGNUM_INT_BITS));
+}
+
+void mp_set_bit(mp_int *x, size_t bit, unsigned val)
+{
+    size_t word = bit / BIGNUM_INT_BITS;
+    assert(word < x->nw);
+
+    unsigned shift = (bit % BIGNUM_INT_BITS);
+
+    x->w[word] &= ~((BignumInt)1 << shift);
+    x->w[word] |= (BignumInt)(val & 1) << shift;
+}
+
+/*
+ * Helper function used here and there to normalise any nonzero input
+ * value to 1.
+ */
+static inline unsigned normalise_to_1(BignumInt n)
+{
+    n = (n >> 1) | (n & 1);            /* ensure top bit is clear */
+    n = (-n) >> (BIGNUM_INT_BITS - 1); /* normalise to 0 or 1 */
+    return n;
+}
+
+/*
+ * Find the highest nonzero word in a number. Returns the index of the
+ * word in x->w, and also a pair of output uint64_t in which that word
+ * appears in the high one shifted left by 'shift_wanted' bits, the
+ * words immediately below it occupy the space to the right, and the
+ * words below _that_ fill up the low one.
+ *
+ * If there is no nonzero word at all, the passed-by-reference output
+ * variables retain their original values.
+ */
+static inline void mp_find_highest_nonzero_word_pair(
+    mp_int *x, size_t shift_wanted, size_t *index,
+    uint64_t *hi, uint64_t *lo)
+{
+    uint64_t curr_hi = 0, curr_lo = 0;
+
+    for (size_t curr_index = 0; curr_index < x->nw; curr_index++) {
+        BignumInt curr_word = x->w[curr_index];
+        unsigned indicator = normalise_to_1(curr_word);
+
+        curr_lo = (BIGNUM_INT_BITS < 64 ? (curr_lo >> BIGNUM_INT_BITS) : 0) |
+            (curr_hi << (64 - BIGNUM_INT_BITS));
+        curr_hi = (BIGNUM_INT_BITS < 64 ? (curr_hi >> BIGNUM_INT_BITS) : 0) |
+            ((uint64_t)curr_word << shift_wanted);
+
+        if (hi)    *hi    ^= (curr_hi    ^ *hi   ) & -(uint64_t)indicator;
+        if (lo)    *lo    ^= (curr_lo    ^ *lo   ) & -(uint64_t)indicator;
+        if (index) *index ^= (curr_index ^ *index) & -(size_t)  indicator;
+    }
+}
+
+size_t mp_get_nbits(mp_int *x)
+{
+    /* Sentinel values in case there are no bits set at all: we
+     * imagine that there's a word at position -1 (i.e. the topmost
+     * fraction word) which is all 1s, because that way, we handle a
+     * zero input by considering its highest set bit to be the top one
+     * of that word, i.e. just below the units digit, i.e. at bit
+     * index -1, i.e. so we'll return 0 on output. */
+    size_t hiword_index = -(size_t)1;
+    uint64_t hiword64 = ~(BignumInt)0;
+
+    /*
+     * Find the highest nonzero word and its index.
+     */
+    mp_find_highest_nonzero_word_pair(x, 0, &hiword_index, &hiword64, NULL);
+    BignumInt hiword = hiword64; /* in case BignumInt is a narrower type */
+
+    /*
+     * Find the index of the highest set bit within hiword.
+     */
+    BignumInt hibit_index = 0;
+    for (size_t i = (1 << (BIGNUM_INT_BITS_BITS-1)); i != 0; i >>= 1) {
+        BignumInt shifted_word = hiword >> i;
+        BignumInt indicator = (-shifted_word) >> (BIGNUM_INT_BITS-1);
+        hiword ^= (shifted_word ^ hiword ) & -indicator;
+        hibit_index += i & -(size_t)indicator;
+    }
+
+    /*
+     * Put together the result.
+     */
+    return (hiword_index << BIGNUM_INT_BITS_BITS) + hibit_index + 1;
+}
+
+/*
+ * Shared code between the hex and decimal output functions to get rid
+ * of leading zeroes on the output string. The idea is that we wrote
+ * out a fixed number of digits and a trailing \0 byte into 'buf', and
+ * now we want to shift it all left so that the first nonzero digit
+ * moves to buf[0] (or, if there are no nonzero digits at all, we move
+ * up by 'maxtrim', so that we return 0 as "0" instead of "").
+ */
+static void trim_leading_zeroes(char *buf, size_t bufsize, size_t maxtrim)
+{
+    size_t trim = maxtrim;
+
+    /*
+     * Look for the first character not equal to '0', to find the
+     * shift count.
+     */
+    if (trim > 0) {
+        for (size_t pos = trim; pos-- > 0 ;) {
+            uint8_t diff = buf[pos] ^ '0';
+            size_t mask = -((((size_t)diff) - 1) >> (BIGNUM_INT_BITS - 1));
+            trim ^= (trim ^ pos) & ~mask;
+        }
+    }
+
+    /*
+     * Now do the shift, in log n passes each of which does a
+     * conditional shift by 2^i bytes if bit i is set in the shift
+     * count.
+     */
+    uint8_t *ubuf = (uint8_t *)buf;
+    for (size_t logd = 0; bufsize >> logd; logd++) {
+        uint8_t mask = -(uint8_t)((trim >> logd) & 1);
+        size_t d = (size_t)1 << logd;
+        for (size_t i = 0; i+d < bufsize; i++) {
+            uint8_t diff = mask & (ubuf[i] ^ ubuf[i+d]);
+            ubuf[i] ^= diff;
+            ubuf[i+d] ^= diff;
+        }
+    }
+}
+
+/*
+ * Binary to decimal conversion. Our strategy here is to extract each
+ * decimal digit by finding the input number's residue mod 10, then
+ * subtract that off to give an exact multiple of 10, which then means
+ * you can safely divide by 10 by means of shifting right one bit and
+ * then multiplying by the inverse of 5 mod 2^n.
+ */
+char *mp_get_decimal(mp_int *x_orig)
+{
+    mp_int *x = mp_copy(x_orig), *y = mp_make_sized(x->nw);
+
+    /*
+     * The inverse of 5 mod 2^lots is 0xccccccccccccccccccccd, for an
+     * appropriate number of 'c's. Manually construct an integer the
+     * right size.
+     */
+    mp_int *inv5 = mp_make_sized(x->nw);
+    assert(BIGNUM_INT_BITS % 8 == 0);
+    for (size_t i = 0; i < inv5->nw; i++)
+        inv5->w[i] = BIGNUM_INT_MASK / 5 * 4;
+    inv5->w[0]++;
+
+    /*
+     * 146/485 is an upper bound (and also a continued-fraction
+     * convergent) of log10(2), so this is a conservative estimate of
+     * the number of decimal digits needed to store a value that fits
+     * in this many binary bits.
+     */
+    assert(x->nw < (~(size_t)1) / (146 * BIGNUM_INT_BITS));
+    size_t bufsize = size_t_max(x->nw * (146 * BIGNUM_INT_BITS) / 485, 1) + 2;
+    char *outbuf = snewn(bufsize, char);
+    outbuf[bufsize - 1] = '\0';
+
+    /*
+     * Loop over the number generating digits from the least
+     * significant upwards, so that we write to outbuf in reverse
+     * order.
+     */
+    for (size_t pos = bufsize - 1; pos-- > 0 ;) {
+        /*
+         * Find the current residue mod 10. We do this by first
+         * summing the bytes of the number, with all but the lowest
+         * one multiplied by 6 (because 256^i == 6 mod 10 for all
+         * i>0). That gives us a single word congruent mod 10 to the
+         * input number, and then we reduce it further by manual
+         * multiplication and shifting, just in case the compiler
+         * target implements the C division operator in a way that has
+         * input-dependent timing.
+         */
+        uint32_t low_digit = 0, maxval = 0, mult = 1;
+        for (size_t i = 0; i < x->nw; i++) {
+            for (unsigned j = 0; j < BIGNUM_INT_BYTES; j++) {
+                low_digit += mult * (0xFF & (x->w[i] >> (8*j)));
+                maxval += mult * 0xFF;
+                mult = 6;
+            }
+            /*
+             * For _really_ big numbers, prevent overflow of t by
+             * periodically folding the top half of the accumulator
+             * into the bottom half, using the same rule 'multiply by
+             * 6 when shifting down by one or more whole bytes'.
+             */
+            if (maxval > UINT32_MAX - (6 * 0xFF * BIGNUM_INT_BYTES)) {
+                low_digit = (low_digit & 0xFFFF) + 6 * (low_digit >> 16);
+                maxval = (maxval & 0xFFFF) + 6 * (maxval >> 16);
+            }
+        }
+
+        /*
+         * Final reduction of low_digit. We multiply by 2^32 / 10
+         * (that's the constant 0x19999999) to get a 64-bit value
+         * whose top 32 bits are the approximate quotient
+         * low_digit/10; then we subtract off 10 times that; and
+         * finally we do one last trial subtraction of 10 by adding 6
+         * (which sets bit 4 if the number was just over 10) and then
+         * testing bit 4.
+         */
+        low_digit -= 10 * ((0x19999999ULL * low_digit) >> 32);
+        low_digit -= 10 * ((low_digit + 6) >> 4);
+
+        assert(low_digit < 10);        /* make sure we did reduce fully */
+        outbuf[pos] = '0' + low_digit;
+
+        /*
+         * Now subtract off that digit, divide by 2 (using a right
+         * shift) and by 5 (using the modular inverse), to get the
+         * next output digit into the units position.
+         */
+        mp_sub_integer_into(x, x, low_digit);
+        mp_rshift_fixed_into(y, x, 1);
+        mp_mul_into(x, y, inv5);
+    }
+
+    mp_free(x);
+    mp_free(y);
+    mp_free(inv5);
+
+    trim_leading_zeroes(outbuf, bufsize, bufsize - 2);
+    return outbuf;
+}
+
+/*
+ * Binary to hex conversion. Reasonably simple (only a spot of bit
+ * twiddling to choose whether to output a digit or a letter for each
+ * nibble).
+ */
+static char *mp_get_hex_internal(mp_int *x, uint8_t letter_offset)
+{
+    size_t nibbles = x->nw * BIGNUM_INT_BYTES * 2;
+    size_t bufsize = nibbles + 1;
+    char *outbuf = snewn(bufsize, char);
+    outbuf[nibbles] = '\0';
+
+    for (size_t nibble = 0; nibble < nibbles; nibble++) {
+        size_t word_idx = nibble / (BIGNUM_INT_BYTES*2);
+        size_t nibble_within_word = nibble % (BIGNUM_INT_BYTES*2);
+        uint8_t digitval = 0xF & (x->w[word_idx] >> (nibble_within_word * 4));
+
+        uint8_t mask = -((digitval + 6) >> 4);
+        char digit = digitval + '0' + (letter_offset & mask);
+        outbuf[nibbles-1 - nibble] = digit;
+    }
+
+    trim_leading_zeroes(outbuf, bufsize, nibbles - 1);
+    return outbuf;
+}
+
+char *mp_get_hex(mp_int *x)
+{
+    return mp_get_hex_internal(x, 'a' - ('0'+10));
+}
+
+char *mp_get_hex_uppercase(mp_int *x)
+{
+    return mp_get_hex_internal(x, 'A' - ('0'+10));
+}
+
+/*
+ * Routines for reading and writing the SSH-1 and SSH-2 wire formats
+ * for multiprecision integers, declared in marshal.h.
+ *
+ * These can't avoid having control flow dependent on the true bit
+ * size of the number, because the wire format requires the number of
+ * output bytes to depend on that.
+ */
+void BinarySink_put_mp_ssh1(BinarySink *bs, mp_int *x)
+{
+    size_t bits = mp_get_nbits(x);
+    size_t bytes = (bits + 7) / 8;
+
+    assert(bits < 0x10000);
+    put_uint16(bs, bits);
+    for (size_t i = bytes; i-- > 0 ;)
+        put_byte(bs, mp_get_byte(x, i));
+}
+
+void BinarySink_put_mp_ssh2(BinarySink *bs, mp_int *x)
+{
+    size_t bytes = (mp_get_nbits(x) + 8) / 8;
+
+    put_uint32(bs, bytes);
+    for (size_t i = bytes; i-- > 0 ;)
+        put_byte(bs, mp_get_byte(x, i));
+}
+
+mp_int *BinarySource_get_mp_ssh1(BinarySource *src)
+{
+    unsigned bitc = get_uint16(src);
+    ptrlen bytes = get_data(src, (bitc + 7) / 8);
+    if (get_err(src)) {
+        return mp_from_integer(0);
+    } else {
+        mp_int *toret = mp_from_bytes_be(bytes);
+        /* SSH-1.5 spec says that it's OK for the prefix uint16 to be
+         * _greater_ than the actual number of bits */
+        if (mp_get_nbits(toret) > bitc) {
+            src->err = BSE_INVALID;
+            mp_free(toret);
+            toret = mp_from_integer(0);
+        }
+        return toret;
+    }
+}
+
+mp_int *BinarySource_get_mp_ssh2(BinarySource *src)
+{
+    ptrlen bytes = get_string(src);
+    if (get_err(src)) {
+        return mp_from_integer(0);
+    } else {
+        const unsigned char *p = bytes.ptr;
+        if ((bytes.len > 0 &&
+             ((p[0] & 0x80) ||
+              (p[0] == 0 && (bytes.len <= 1 || !(p[1] & 0x80)))))) {
+            src->err = BSE_INVALID;
+            return mp_from_integer(0);
+        }
+        return mp_from_bytes_be(bytes);
+    }
+}
+
+/*
+ * Make an mp_int structure whose words array aliases a subinterval of
+ * some other mp_int. This makes it easy to read or write just the low
+ * or high words of a number, e.g. to add a number starting from a
+ * high bit position, or to reduce mod 2^{n*BIGNUM_INT_BITS}.
+ *
+ * The convention throughout this code is that when we store an mp_int
+ * directly by value, we always expect it to be an alias of some kind,
+ * so its words array won't ever need freeing. Whereas an 'mp_int *'
+ * has an owner, who knows whether it needs freeing or whether it was
+ * created by address-taking an alias.
+ */
+static mp_int mp_make_alias(mp_int *in, size_t offset, size_t len)
+{
+    /*
+     * Bounds-check the offset and length so that we always return
+     * something valid, even if it's not necessarily the length the
+     * caller asked for.
+     */
+    if (offset > in->nw)
+        offset = in->nw;
+    if (len > in->nw - offset)
+        len = in->nw - offset;
+
+    mp_int toret;
+    toret.nw = len;
+    toret.w = in->w + offset;
+    return toret;
+}
+
+/*
+ * A special case of mp_make_alias: in some cases we preallocate a
+ * large mp_int to use as scratch space (to avoid pointless
+ * malloc/free churn in recursive or iterative work).
+ *
+ * mp_alloc_from_scratch creates an alias of size 'len' to part of
+ * 'pool', and adjusts 'pool' itself so that further allocations won't
+ * overwrite that space.
+ *
+ * There's no free function to go with this. Typically you just copy
+ * the pool mp_int by value, allocate from the copy, and when you're
+ * done with those allocations, throw the copy away and go back to the
+ * original value of pool. (A mark/release system.)
+ */
+static mp_int mp_alloc_from_scratch(mp_int *pool, size_t len)
+{
+    assert(len <= pool->nw);
+    mp_int toret = mp_make_alias(pool, 0, len);
+    *pool = mp_make_alias(pool, len, pool->nw);
+    return toret;
+}
+
+/*
+ * Internal component common to lots of assorted add/subtract code.
+ * Reads words from a,b; writes into w_out (which might be NULL if the
+ * output isn't even needed). Takes an input carry flag in 'carry',
+ * and returns the output carry. Each word read from b is ANDed with
+ * b_and and then XORed with b_xor.
+ *
+ * So you can implement addition by setting b_and to all 1s and b_xor
+ * to 0; you can subtract by making b_xor all 1s too (effectively
+ * bit-flipping b) and also passing 1 as the input carry (to turn
+ * one's complement into two's complement). And you can do conditional
+ * add/subtract by choosing b_and to be all 1s or all 0s based on a
+ * condition, because the value of b will be totally ignored if b_and
+ * == 0.
+ */
+static BignumCarry mp_add_masked_into(
+    BignumInt *w_out, size_t rw, mp_int *a, mp_int *b,
+    BignumInt b_and, BignumInt b_xor, BignumCarry carry)
+{
+    for (size_t i = 0; i < rw; i++) {
+        BignumInt aword = mp_word(a, i), bword = mp_word(b, i), out;
+        bword = (bword & b_and) ^ b_xor;
+        BignumADC(out, carry, aword, bword, carry);
+        if (w_out)
+            w_out[i] = out;
+    }
+    return carry;
+}
+
+/*
+ * Like the public mp_add_into except that it returns the output carry.
+ */
+static inline BignumCarry mp_add_into_internal(mp_int *r, mp_int *a, mp_int *b)
+{
+    return mp_add_masked_into(r->w, r->nw, a, b, ~(BignumInt)0, 0, 0);
+}
+
+void mp_add_into(mp_int *r, mp_int *a, mp_int *b)
+{
+    mp_add_into_internal(r, a, b);
+}
+
+void mp_sub_into(mp_int *r, mp_int *a, mp_int *b)
+{
+    mp_add_masked_into(r->w, r->nw, a, b, ~(BignumInt)0, ~(BignumInt)0, 1);
+}
+
+static void mp_cond_negate(mp_int *r, mp_int *x, unsigned yes)
+{
+    BignumCarry carry = yes;
+    BignumInt flip = -(BignumInt)yes;
+    for (size_t i = 0; i < r->nw; i++) {
+        BignumInt xword = mp_word(x, i);
+        xword ^= flip;
+        BignumADC(r->w[i], carry, 0, xword, carry);
+    }
+}
+
+/*
+ * Similar to mp_add_masked_into, but takes a C integer instead of an
+ * mp_int as the masked operand.
+ */
+static BignumCarry mp_add_masked_integer_into(
+    BignumInt *w_out, size_t rw, mp_int *a, uintmax_t b,
+    BignumInt b_and, BignumInt b_xor, BignumCarry carry)
+{
+    for (size_t i = 0; i < rw; i++) {
+        BignumInt aword = mp_word(a, i);
+        size_t shift = i * BIGNUM_INT_BITS;
+        BignumInt bword = shift < BIGNUM_INT_BYTES ? b >> shift : 0;
+        BignumInt out;
+        bword = (bword ^ b_xor) & b_and;
+        BignumADC(out, carry, aword, bword, carry);
+        if (w_out)
+            w_out[i] = out;
+    }
+    return carry;
+}
+
+void mp_add_integer_into(mp_int *r, mp_int *a, uintmax_t n)
+{
+    mp_add_masked_integer_into(r->w, r->nw, a, n, ~(BignumInt)0, 0, 0);
+}
+
+void mp_sub_integer_into(mp_int *r, mp_int *a, uintmax_t n)
+{
+    mp_add_masked_integer_into(r->w, r->nw, a, n,
+                               ~(BignumInt)0, ~(BignumInt)0, 1);
+}
+
+/*
+ * Sets r to a + n << (word_index * BIGNUM_INT_BITS), treating
+ * word_index as secret data.
+ */
+static void mp_add_integer_into_shifted_by_words(
+    mp_int *r, mp_int *a, uintmax_t n, size_t word_index)
+{
+    unsigned indicator = 0;
+    BignumCarry carry = 0;
+
+    for (size_t i = 0; i < r->nw; i++) {
+        /* indicator becomes 1 when we reach the index that the least
+         * significant bits of n want to be placed at, and it stays 1
+         * thereafter. */
+        indicator |= 1 ^ normalise_to_1(i ^ word_index);
+
+        /* If indicator is 1, we add the low bits of n into r, and
+         * shift n down. If it's 0, we add zero bits into r, and
+         * leave n alone. */
+        BignumInt bword = n & -(BignumInt)indicator;
+        uintmax_t new_n = (BIGNUM_INT_BITS < 64 ? n >> BIGNUM_INT_BITS : 0);
+        n ^= (n ^ new_n) & -(uintmax_t)indicator;
+
+        BignumInt aword = mp_word(a, i);
+        BignumInt out;
+        BignumADC(out, carry, aword, bword, carry);
+        r->w[i] = out;
+    }
+}
+
+void mp_mul_integer_into(mp_int *r, mp_int *a, uint16_t n)
+{
+    BignumInt carry = 0, mult = n;
+    for (size_t i = 0; i < r->nw; i++) {
+        BignumInt aword = mp_word(a, i);
+        BignumMULADD(carry, r->w[i], aword, mult, carry);
+    }
+    assert(!carry);
+}
+
+void mp_cond_add_into(mp_int *r, mp_int *a, mp_int *b, unsigned yes)
+{
+    BignumInt mask = -(BignumInt)(yes & 1);
+    mp_add_masked_into(r->w, r->nw, a, b, mask, 0, 0);
+}
+
+void mp_cond_sub_into(mp_int *r, mp_int *a, mp_int *b, unsigned yes)
+{
+    BignumInt mask = -(BignumInt)(yes & 1);
+    mp_add_masked_into(r->w, r->nw, a, b, mask, mask, 1 & mask);
+}
+
+/*
+ * Ordered comparison between unsigned numbers is done by subtracting
+ * one from the other and looking at the output carry.
+ */
+unsigned mp_cmp_hs(mp_int *a, mp_int *b)
+{
+    size_t rw = size_t_max(a->nw, b->nw);
+    return mp_add_masked_into(NULL, rw, a, b, ~(BignumInt)0, ~(BignumInt)0, 1);
+}
+
+unsigned mp_hs_integer(mp_int *x, uintmax_t n)
+{
+    BignumInt carry = 1;
+    for (size_t i = 0; i < x->nw; i++) {
+        size_t shift = i * BIGNUM_INT_BITS;
+        BignumInt nword = shift < BIGNUM_INT_BYTES ? n >> shift : 0;
+        BignumInt dummy_out;
+        BignumADC(dummy_out, carry, x->w[i], ~nword, carry);
+        (void)dummy_out;
+    }
+    return carry;
+}
+
+/*
+ * Equality comparison is done by bitwise XOR of the input numbers,
+ * ORing together all the output words, and normalising the result
+ * using our careful normalise_to_1 helper function.
+ */
+unsigned mp_cmp_eq(mp_int *a, mp_int *b)
+{
+    BignumInt diff = 0;
+    for (size_t i = 0, limit = size_t_max(a->nw, b->nw); i < limit; i++)
+        diff |= mp_word(a, i) ^ mp_word(b, i);
+    return 1 ^ normalise_to_1(diff);   /* return 1 if diff _is_ zero */
+}
+
+unsigned mp_eq_integer(mp_int *x, uintmax_t n)
+{
+    BignumInt diff = 0;
+    for (size_t i = 0; i < x->nw; i++) {
+        size_t shift = i * BIGNUM_INT_BITS;
+        BignumInt nword = shift < BIGNUM_INT_BYTES ? n >> shift : 0;
+        diff |= x->w[i] ^ nword;
+    }
+    return 1 ^ normalise_to_1(diff);   /* return 1 if diff _is_ zero */
+}
+
+void mp_neg_into(mp_int *r, mp_int *a)
+{
+    mp_int zero;
+    zero.nw = 0;
+    mp_sub_into(r, &zero, a);
+}
+
+mp_int *mp_add(mp_int *x, mp_int *y)
+{
+    mp_int *r = mp_make_sized(size_t_max(x->nw, y->nw) + 1);
+    mp_add_into(r, x, y);
+    return r;
+}
+
+mp_int *mp_sub(mp_int *x, mp_int *y)
+{
+    mp_int *r = mp_make_sized(size_t_max(x->nw, y->nw));
+    mp_sub_into(r, x, y);
+    return r;
+}
+
+mp_int *mp_neg(mp_int *a)
+{
+    mp_int *r = mp_make_sized(a->nw);
+    mp_neg_into(r, a);
+    return r;
+}
+
+/*
+ * Internal routine: multiply and accumulate in the trivial O(N^2)
+ * way. Sets r <- r + a*b.
+ */
+static void mp_mul_add_simple(mp_int *r, mp_int *a, mp_int *b)
+{
+    BignumInt *aend = a->w + a->nw, *bend = b->w + b->nw, *rend = r->w + r->nw;
+
+    for (BignumInt *ap = a->w, *rp = r->w;
+         ap < aend && rp < rend; ap++, rp++) {
+
+        BignumInt adata = *ap, carry = 0, *rq = rp;
+
+        for (BignumInt *bp = b->w; bp < bend && rq < rend; bp++, rq++) {
+            BignumInt bdata = bp < bend ? *bp : 0;
+            BignumMULADD2(carry, *rq, adata, bdata, *rq, carry);
+        }
+
+        for (; rq < rend; rq++)
+            BignumADC(*rq, carry, 0, *rq, carry);
+    }
+}
+
+#ifndef KARATSUBA_THRESHOLD      /* allow redefinition via -D for testing */
+#define KARATSUBA_THRESHOLD 50
+#endif
+
+static inline size_t mp_mul_scratchspace_unary(size_t n)
+{
+    /*
+     * Simplistic and overcautious bound on the amount of scratch
+     * space that the recursive multiply function will need.
+     *
+     * The rationale is: on the main Karatsuba branch of
+     * mp_mul_internal, which is the most space-intensive one, we
+     * allocate space for (a0+a1) and (b0+b1) (each just over half the
+     * input length n) and their product (the sum of those sizes, i.e.
+     * just over n itself). Then in order to actually compute the
+     * product, we do a recursive multiplication of size just over n.
+     *
+     * If all those 'just over' weren't there, and everything was
+     * _exactly_ half the length, you'd get the amount of space for a
+     * size-n multiply defined by the recurrence M(n) = 2n + M(n/2),
+     * which is satisfied by M(n) = 4n. But instead it's (2n plus a
+     * word or two) and M(n/2 plus a word or two). On the assumption
+     * that there's still some constant k such that M(n) <= kn, this
+     * gives us kn = 2n + w + k(n/2 + w), where w is a small constant
+     * (one or two words). That simplifies to kn/2 = 2n + (k+1)w, and
+     * since we don't even _start_ needing scratch space until n is at
+     * least 50, we can bound 2n + (k+1)w above by 3n, giving k=6.
+     *
+     * So I claim that 6n words of scratch space will suffice, and I
+     * check that by assertion at every stage of the recursion.
+     */
+    return n * 6;
+}
+
+static size_t mp_mul_scratchspace(size_t rw, size_t aw, size_t bw)
+{
+    size_t inlen = size_t_min(rw, size_t_max(aw, bw));
+    return mp_mul_scratchspace_unary(inlen);
+}
+
+static void mp_mul_internal(mp_int *r, mp_int *a, mp_int *b, mp_int scratch)
+{
+    size_t inlen = size_t_min(r->nw, size_t_max(a->nw, b->nw));
+    assert(scratch.nw >= mp_mul_scratchspace_unary(inlen));
+
+    mp_clear(r);
+
+    if (inlen < KARATSUBA_THRESHOLD || a->nw == 0 || b->nw == 0) {
+        /*
+         * The input numbers are too small to bother optimising. Go
+         * straight to the simple primitive approach.
+         */
+        mp_mul_add_simple(r, a, b);
+        return;
+    }
+
+    /*
+     * Karatsuba divide-and-conquer algorithm. We cut each input in
+     * half, so that it's expressed as two big 'digits' in a giant
+     * base D:
+     *
+     *   a = a_1 D + a_0
+     *   b = b_1 D + b_0
+     *
+     * Then the product is of course
+     *
+     *   ab = a_1 b_1 D^2 + (a_1 b_0 + a_0 b_1) D + a_0 b_0
+     *
+     * and we compute the three coefficients by recursively calling
+     * ourself to do half-length multiplications.
+     *
+     * The clever bit that makes this worth doing is that we only need
+     * _one_ half-length multiplication for the central coefficient
+     * rather than the two that it obviouly looks like, because we can
+     * use a single multiplication to compute
+     *
+     *   (a_1 + a_0) (b_1 + b_0) = a_1 b_1 + a_1 b_0 + a_0 b_1 + a_0 b_0
+     *
+     * and then we subtract the other two coefficients (a_1 b_1 and
+     * a_0 b_0) which we were computing anyway.
+     *
+     * Hence we get to multiply two numbers of length N in about three
+     * times as much work as it takes to multiply numbers of length
+     * N/2, which is obviously better than the four times as much work
+     * it would take if we just did a long conventional multiply.
+     */
+
+    /* Break up the input as botlen + toplen, with botlen >= toplen.
+     * The 'base' D is equal to 2^{botlen * BIGNUM_INT_BITS}. */
+    size_t toplen = inlen / 2;
+    size_t botlen = inlen - toplen;
+
+    /* Alias bignums that address the two halves of a,b, and useful
+     * pieces of r. */
+    mp_int a0 = mp_make_alias(a, 0, botlen);
+    mp_int b0 = mp_make_alias(b, 0, botlen);
+    mp_int a1 = mp_make_alias(a, botlen, toplen);
+    mp_int b1 = mp_make_alias(b, botlen, toplen);
+    mp_int r0 = mp_make_alias(r, 0, botlen*2);
+    mp_int r1 = mp_make_alias(r, botlen, r->nw);
+    mp_int r2 = mp_make_alias(r, botlen*2, r->nw);
+
+    /* Recurse to compute a0*b0 and a1*b1, in their correct positions
+     * in the output bignum. They can't overlap. */
+    mp_mul_internal(&r0, &a0, &b0, scratch);
+    mp_mul_internal(&r2, &a1, &b1, scratch);
+
+    if (r->nw < inlen*2) {
+        /*
+         * The output buffer isn't large enough to require the whole
+         * product, so some of a1*b1 won't have been stored. In that
+         * case we won't try to do the full Karatsuba optimisation;
+         * we'll just recurse again to compute a0*b1 and a1*b0 - or at
+         * least as much of them as the output buffer size requires -
+         * and add each one in.
+         */
+        mp_int s = mp_alloc_from_scratch(
+            &scratch, size_t_min(botlen+toplen, r1.nw));
+
+        mp_mul_internal(&s, &a0, &b1, scratch);
+        mp_add_into(&r1, &r1, &s);
+        mp_mul_internal(&s, &a1, &b0, scratch);
+        mp_add_into(&r1, &r1, &s);
+        return;
+    }
+
+    /* a0+a1 and b0+b1 */
+    mp_int asum = mp_alloc_from_scratch(&scratch, botlen+1);
+    mp_int bsum = mp_alloc_from_scratch(&scratch, botlen+1);
+    mp_add_into(&asum, &a0, &a1);
+    mp_add_into(&bsum, &b0, &b1);
+
+    /* Their product */
+    mp_int product = mp_alloc_from_scratch(&scratch, botlen*2+1);
+    mp_mul_internal(&product, &asum, &bsum, scratch);
+
+    /* Subtract off the outer terms we already have */
+    mp_sub_into(&product, &product, &r0);
+    mp_sub_into(&product, &product, &r2);
+
+    /* And add it in with the right offset. */
+    mp_add_into(&r1, &r1, &product);
+}
+
+void mp_mul_into(mp_int *r, mp_int *a, mp_int *b)
+{
+    mp_int *scratch = mp_make_sized(mp_mul_scratchspace(r->nw, a->nw, b->nw));
+    mp_mul_internal(r, a, b, *scratch);
+    mp_free(scratch);
+}
+
+mp_int *mp_mul(mp_int *x, mp_int *y)
+{
+    mp_int *r = mp_make_sized(x->nw + y->nw);
+    mp_mul_into(r, x, y);
+    return r;
+}
+
+void mp_lshift_fixed_into(mp_int *r, mp_int *a, size_t bits)
+{
+    size_t words = bits / BIGNUM_INT_BITS;
+    size_t bitoff = bits % BIGNUM_INT_BITS;
+
+    for (size_t i = 0; i < r->nw; i++) {
+        if (i < words) {
+            r->w[i] = 0;
+        } else {
+            r->w[i] = mp_word(a, i - words);
+            if (bitoff != 0) {
+                r->w[i] <<= bitoff;
+                if (i > words)
+                    r->w[i] |= mp_word(a, i - words - 1) >>
+                        (BIGNUM_INT_BITS - bitoff);
+            }
+        }
+    }
+}
+
+void mp_rshift_fixed_into(mp_int *r, mp_int *a, size_t bits)
+{
+    size_t words = bits / BIGNUM_INT_BITS;
+    size_t bitoff = bits % BIGNUM_INT_BITS;
+
+    for (size_t i = 0; i < r->nw; i++) {
+        r->w[i] = mp_word(a, i + words);
+        if (bitoff != 0) {
+            r->w[i] >>= bitoff;
+            r->w[i] |= mp_word(a, i + words + 1) << (BIGNUM_INT_BITS - bitoff);
+        }
+    }
+}
+
+mp_int *mp_rshift_fixed(mp_int *x, size_t bits)
+{
+    size_t words = bits / BIGNUM_INT_BITS;
+    mp_int *r = mp_make_sized(x->nw - size_t_min(x->nw, words));
+    mp_rshift_fixed_into(r, x, bits);
+    return r;
+}
+
+/*
+ * Safe right shift is done using the same technique as
+ * trim_leading_zeroes above: you make an n-word left shift by
+ * composing an appropriate subset of power-of-2-sized shifts, so it
+ * takes log_2(n) loop iterations each of which does a different shift
+ * by a power of 2 words, using the usual bit twiddling to make the
+ * whole shift conditional on the appropriate bit of n.
+ */
+mp_int *mp_rshift_safe(mp_int *x, size_t bits)
+{
+    size_t wordshift = bits / BIGNUM_INT_BITS;
+    size_t bitshift = bits % BIGNUM_INT_BITS;
+
+    mp_int *r = mp_copy(x);
+
+    unsigned clear = (r->nw - wordshift) >> (CHAR_BIT * sizeof(size_t) - 1);
+    mp_cond_clear(r, clear);
+
+    for (unsigned bit = 0; r->nw >> bit; bit++) {
+        size_t word_offset = 1 << bit;
+        BignumInt mask = -(BignumInt)((wordshift >> bit) & 1);
+        for (size_t i = 0; i < r->nw; i++) {
+            BignumInt w = mp_word(r, i + word_offset);
+            r->w[i] ^= (r->w[i] ^ w) & mask;
+        }
+    }
+
+    /*
+     * That's done the shifting by words; now we do the shifting by
+     * bits.
+     *
+     * I assume here that register-controlled right shifts are
+     * time-constant. If they're not, I could replace this with
+     * another loop over bit positions.
+     */
+    size_t upshift = BIGNUM_INT_BITS - bitshift;
+    size_t no_shift = (upshift >> BIGNUM_INT_BITS_BITS);
+    upshift &= ~-(size_t)no_shift;
+    BignumInt upshifted_mask = ~-(BignumInt)no_shift;
+
+    for (size_t i = 0; i < r->nw; i++) {
+        r->w[i] = (r->w[i] >> bitshift) |
+            ((mp_word(r, i+1) << upshift) & upshifted_mask);
+    }
+
+    return r;
+}
+
+void mp_reduce_mod_2to(mp_int *x, size_t p)
+{
+    size_t word = p / BIGNUM_INT_BITS;
+    size_t mask = ((size_t)1 << (p % BIGNUM_INT_BITS)) - 1;
+    for (; word < x->nw; word++) {
+        x->w[word] &= mask;
+        mask = -(size_t)1;
+    }
+}
+
+/*
+ * Inverse mod 2^n is computed by an iterative technique which doubles
+ * the number of bits at each step.
+ */
+mp_int *mp_invert_mod_2to(mp_int *x, size_t p)
+{
+    /* Input checks: x must be coprime to the modulus, i.e. odd, and p
+     * can't be zero */
+    assert(x->nw > 0);
+    assert(x->w[0] & 1);
+    assert(p > 0);
+
+    size_t rw = (p + BIGNUM_INT_BITS - 1) / BIGNUM_INT_BITS;
+    mp_int *r = mp_make_sized(rw);
+
+    size_t mul_scratchsize = mp_mul_scratchspace(2*rw, rw, rw);
+    mp_int *scratch_orig = mp_make_sized(6 * rw + mul_scratchsize);
+    mp_int scratch_per_iter = *scratch_orig;
+    mp_int mul_scratch = mp_alloc_from_scratch(
+        &scratch_per_iter, mul_scratchsize);
+
+    r->w[0] = 1;
+
+    for (size_t b = 1; b < p; b <<= 1) {
+        /*
+         * In each step of this iteration, we have the inverse of x
+         * mod 2^b, and we want the inverse of x mod 2^{2b}.
+         *
+         * Write B = 2^b for convenience, so we want x^{-1} mod B^2.
+         * Let x = x_0 + B x_1 + k B^2, with 0 <= x_0,x_1 < B.
+         *
+         * We want to find r_0 and r_1 such that
+         *    (r_1 B + r_0) (x_1 B + x_0) == 1 (mod B^2)
+         *
+         * To begin with, we know r_0 must be the inverse mod B of
+         * x_0, i.e. of x, i.e. it is the inverse we computed in the
+         * previous iteration. So now all we need is r_1.
+         *
+         * Multiplying out, neglecting multiples of B^2, and writing
+         * x_0 r_0 = K B + 1, we have
+         *
+         *    r_1 x_0 B + r_0 x_1 B + K B == 0                    (mod B^2)
+         * =>                   r_1 x_0 B == - r_0 x_1 B - K B    (mod B^2)
+         * =>                     r_1 x_0 == - r_0 x_1 - K        (mod B)
+         * =>                         r_1 == r_0 (- r_0 x_1 - K)  (mod B)
+         *
+         * (the last step because we multiply through by the inverse
+         * of x_0, which we already know is r_0).
+         */
+
+        mp_int scratch_this_iter = scratch_per_iter;
+        size_t Bw = (b + BIGNUM_INT_BITS - 1) / BIGNUM_INT_BITS;
+        size_t B2w = (2*b + BIGNUM_INT_BITS - 1) / BIGNUM_INT_BITS;
+
+        /* Start by finding K: multiply x_0 by r_0, and shift down. */
+        mp_int x0 = mp_alloc_from_scratch(&scratch_this_iter, Bw);
+        mp_copy_into(&x0, x);
+        mp_reduce_mod_2to(&x0, b);
+        mp_int r0 = mp_make_alias(r, 0, Bw);
+        mp_int Kshift = mp_alloc_from_scratch(&scratch_this_iter, B2w);
+        mp_mul_internal(&Kshift, &x0, &r0, mul_scratch);
+        mp_int K = mp_alloc_from_scratch(&scratch_this_iter, Bw);
+        mp_rshift_fixed_into(&K, &Kshift, b);
+
+        /* Now compute the product r_0 x_1, reusing the space of Kshift. */
+        mp_int x1 = mp_alloc_from_scratch(&scratch_this_iter, Bw);
+        mp_rshift_fixed_into(&x1, x, b);
+        mp_reduce_mod_2to(&x1, b);
+        mp_int r0x1 = mp_make_alias(&Kshift, 0, Bw);
+        mp_mul_internal(&r0x1, &r0, &x1, mul_scratch);
+
+        /* Add K to that. */
+        mp_add_into(&r0x1, &r0x1, &K);
+
+        /* Negate it. */
+        mp_neg_into(&r0x1, &r0x1);
+
+        /* Multiply by r_0. */
+        mp_int r1 = mp_alloc_from_scratch(&scratch_this_iter, Bw);
+        mp_mul_internal(&r1, &r0, &r0x1, mul_scratch);
+        mp_reduce_mod_2to(&r1, b);
+
+        /* That's our r_1, so add it on to r_0 to get the full inverse
+         * output from this iteration. */
+        mp_lshift_fixed_into(&K, &r1, (b % BIGNUM_INT_BITS));
+        size_t Bpos = b / BIGNUM_INT_BITS;
+        mp_int r1_position = mp_make_alias(r, Bpos, B2w-Bpos);
+        mp_add_into(&r1_position, &r1_position, &K);
+    }
+
+    /* Finally, reduce mod the precise desired number of bits. */
+    mp_reduce_mod_2to(r, p);
+
+    mp_free(scratch_orig);
+    return r;
+}
+
+static size_t monty_scratch_size(MontyContext *mc)
+{
+    return 3*mc->rw + mc->pw + mp_mul_scratchspace(mc->pw, mc->rw, mc->rw);
+}
+
+MontyContext *monty_new(mp_int *modulus)
+{
+    MontyContext *mc = snew(MontyContext);
+
+    mc->rw = modulus->nw;
+    mc->rbits = mc->rw * BIGNUM_INT_BITS;
+    mc->pw = mc->rw * 2 + 1;
+
+    mc->m = mp_copy(modulus);
+
+    mc->minus_minv_mod_r = mp_invert_mod_2to(mc->m, mc->rbits);
+    mp_neg_into(mc->minus_minv_mod_r, mc->minus_minv_mod_r);
+
+    mp_int *r = mp_make_sized(mc->rw + 1);
+    r->w[mc->rw] = 1;
+    mc->powers_of_r_mod_m[0] = mp_mod(r, mc->m);
+    mp_free(r);
+
+    for (size_t j = 1; j < lenof(mc->powers_of_r_mod_m); j++)
+        mc->powers_of_r_mod_m[j] = mp_modmul(
+            mc->powers_of_r_mod_m[0], mc->powers_of_r_mod_m[j-1], mc->m);
+
+    mc->scratch = mp_make_sized(monty_scratch_size(mc));
+
+    return mc;
+}
+
+MontyContext *monty_copy(MontyContext *orig)
+{
+    MontyContext *mc = snew(MontyContext);
+
+    mc->rw = orig->rw;
+    mc->pw = orig->pw;
+    mc->rbits = orig->rbits;
+    mc->m = mp_copy(orig->m);
+    mc->minus_minv_mod_r = mp_copy(orig->minus_minv_mod_r);
+    for (size_t j = 0; j < 3; j++)
+        mc->powers_of_r_mod_m[j] = mp_copy(orig->powers_of_r_mod_m[j]);
+    mc->scratch = mp_make_sized(monty_scratch_size(mc));
+    return mc;
+}
+
+void monty_free(MontyContext *mc)
+{
+    mp_free(mc->m);
+    for (size_t j = 0; j < 3; j++)
+        mp_free(mc->powers_of_r_mod_m[j]);
+    mp_free(mc->minus_minv_mod_r);
+    mp_free(mc->scratch);
+    smemclr(mc, sizeof(*mc));
+    sfree(mc);
+}
+
+/*
+ * The main Montgomery reduction step.
+ */
+static mp_int monty_reduce_internal(MontyContext *mc, mp_int *x, mp_int scratch)
+{
+    /*
+     * The trick with Montgomery reduction is that on the one hand we
+     * want to reduce the size of the input by a factor of about r,
+     * and on the other hand, the two numbers we just multiplied were
+     * both stored with an extra factor of r multiplied in. So we
+     * computed ar*br = ab r^2, but we want to return abr, so we need
+     * to divide by r - and if we can do that by _actually dividing_
+     * by r then this also reduces the size of the number.
+     *
+     * But we can only do that if the number we're dividing by r is a
+     * multiple of r. So first we must add an adjustment to it which
+     * clears its bottom 'rbits' bits. That adjustment must be a
+     * multiple of m in order to leave the residue mod n unchanged, so
+     * the question is, what multiple of m can we add to x to make it
+     * congruent to 0 mod r? And the answer is, x * (-m)^{-1} mod r.
+     */
+
+    /* x mod r */
+    mp_int x_lo = mp_make_alias(x, 0, mc->rbits);
+
+    /* x * (-m)^{-1}, i.e. the number we want to multiply by m */
+    mp_int k = mp_alloc_from_scratch(&scratch, mc->rw);
+    mp_mul_internal(&k, &x_lo, mc->minus_minv_mod_r, scratch);
+
+    /* m times that, i.e. the number we want to add to x */
+    mp_int mk = mp_alloc_from_scratch(&scratch, mc->pw);
+    mp_mul_internal(&mk, mc->m, &k, scratch);
+
+    /* Add it to x */
+    mp_add_into(&mk, x, &mk);
+
+    /* Reduce mod r, by simply making an alias to the upper words of x */
+    mp_int toret = mp_make_alias(&mk, mc->rw, mk.nw - mc->rw);
+
+    /*
+     * We'll generally be doing this after a multiplication of two
+     * fully reduced values. So our input could be anything up to m^2,
+     * and then we added up to rm to it. Hence, the maximum value is
+     * rm+m^2, and after dividing by r, that becomes r + m(m/r) < 2r.
+     * So a single trial-subtraction will finish reducing to the
+     * interval [0,m).
+     */
+    mp_cond_sub_into(&toret, &toret, mc->m, mp_cmp_hs(&toret, mc->m));
+    return toret;
+}
+
+void monty_mul_into(MontyContext *mc, mp_int *r, mp_int *x, mp_int *y)
+{
+    assert(x->nw <= mc->rw);
+    assert(y->nw <= mc->rw);
+
+    mp_int scratch = *mc->scratch;
+    mp_int tmp = mp_alloc_from_scratch(&scratch, 2*mc->rw);
+    mp_mul_into(&tmp, x, y);
+    mp_int reduced = monty_reduce_internal(mc, &tmp, scratch);
+    mp_copy_into(r, &reduced);
+    mp_clear(mc->scratch);
+}
+
+mp_int *monty_mul(MontyContext *mc, mp_int *x, mp_int *y)
+{
+    mp_int *toret = mp_make_sized(mc->rw);
+    monty_mul_into(mc, toret, x, y);
+    return toret;
+}
+
+mp_int *monty_modulus(MontyContext *mc)
+{
+    return mc->m;
+}
+
+mp_int *monty_identity(MontyContext *mc)
+{
+    return mc->powers_of_r_mod_m[0];
+}
+
+mp_int *monty_invert(MontyContext *mc, mp_int *x)
+{
+    /* Given xr, we want to return x^{-1}r = (xr)^{-1} r^2 =
+     * monty_reduce((xr)^{-1} r^3) */
+    mp_int *tmp = mp_invert(x, mc->m);
+    mp_int *toret = monty_mul(mc, tmp, mc->powers_of_r_mod_m[2]);
+    mp_free(tmp);
+    return toret;
+}
+
+/*
+ * Importing a number into Montgomery representation involves
+ * multiplying it by r and reducing mod m. We could do this using the
+ * straightforward mp_modmul, but since we have the machinery to avoid
+ * division, why don't we use it? If we multiply the number not by r
+ * itself, but by the residue of r^2 mod m, then we can do an actual
+ * Montgomery reduction to reduce the result and remove the extra
+ * factor of r.
+ */
+void monty_import_into(MontyContext *mc, mp_int *r, mp_int *x)
+{
+    monty_mul_into(mc, r, x, mc->powers_of_r_mod_m[1]);
+}
+
+mp_int *monty_import(MontyContext *mc, mp_int *x)
+{
+    return monty_mul(mc, x, mc->powers_of_r_mod_m[1]);
+}
+
+/*
+ * Exporting a number means multiplying it by r^{-1}, which is exactly
+ * what monty_reduce does anyway, so we just do that.
+ */
+void monty_export_into(MontyContext *mc, mp_int *r, mp_int *x)
+{
+    assert(x->nw <= 2*mc->rw);
+    mp_int reduced = monty_reduce_internal(mc, x, *mc->scratch);
+    mp_copy_into(r, &reduced);
+    mp_clear(mc->scratch);
+}
+
+mp_int *monty_export(MontyContext *mc, mp_int *x)
+{
+    mp_int *toret = mp_make_sized(mc->rw);
+    monty_export_into(mc, toret, x);
+    return toret;
+}
+
+static void monty_reduce(MontyContext *mc, mp_int *x)
+{
+    mp_int reduced = monty_reduce_internal(mc, x, *mc->scratch);
+    mp_copy_into(x, &reduced);
+    mp_clear(mc->scratch);
+}
+
+mp_int *monty_pow(MontyContext *mc, mp_int *base, mp_int *exponent)
+{
+    /* square builds up powers of the form base^{2^i}. */
+    mp_int *square = mp_copy(base);
+    size_t i = 0;
+
+    /* out accumulates the output value. Starts at 1 (in Montgomery
+     * representation) and we multiply in each base^{2^i}. */
+    mp_int *out = mp_copy(mc->powers_of_r_mod_m[0]);
+
+    /* tmp holds each product we compute and reduce. */
+    mp_int *tmp = mp_make_sized(mc->rw * 2);
+
+    while (true) {
+        mp_mul_into(tmp, out, square);
+        monty_reduce(mc, tmp);
+        mp_select_into(out, out, tmp, mp_get_bit(exponent, i));
+
+        if (++i >= exponent->nw * BIGNUM_INT_BITS)
+            break;
+
+        mp_mul_into(tmp, square, square);
+        monty_reduce(mc, tmp);
+        mp_copy_into(square, tmp);
+    }
+
+    mp_free(square);
+    mp_free(tmp);
+    mp_clear(mc->scratch);
+    return out;
+}
+
+mp_int *mp_modpow(mp_int *base, mp_int *exponent, mp_int *modulus)
+{
+    assert(base->nw <= modulus->nw);
+    assert(modulus->nw > 0);
+    assert(modulus->w[0] & 1);
+
+    MontyContext *mc = monty_new(modulus);
+    mp_int *m_base = monty_import(mc, base);
+    mp_int *m_out = monty_pow(mc, m_base, exponent);
+    mp_int *out = monty_export(mc, m_out);
+    mp_free(m_base);
+    mp_free(m_out);
+    monty_free(mc);
+    return out;
+}
+
+/*
+ * Given two coprime nonzero input integers a,b, returns two integers
+ * A,B such that A*a - B*b = 1. A,B will be the minimal non-negative
+ * pair satisfying that criterion, which is equivalent to saying that
+ * 0<=A<b and 0<=B<a.
+ *
+ * This algorithm is an adapted form of Stein's algorithm, which
+ * computes gcd(a,b) using only addition and bit shifts (i.e. without
+ * needing general division), using the following rules:
+ *
+ *  - if both of a,b are even, divide off a common factor of 2
+ *  - if one of a,b (WLOG a) is even, then gcd(a,b) = gcd(a/2,b), so
+ *    just divide a by 2
+ *  - if both of a,b are odd, then WLOG a>b, and gcd(a,b) =
+ *    gcd(b,(a-b)/2).
+ *
+ * For this application, I always expect the actual gcd to be coprime,
+ * so we can rule out the 'both even' initial case. For simplicity
+ * I've changed the 'both odd' case to turn (a,b) into (b,a-b) without
+ * the division by 2 (the next iteration would divide by 2 anyway).
+ *
+ * But the big change is that we need the Bezout coefficients as
+ * output, not just the gcd. So we need to know how to generate those
+ * in each case, based on the coefficients from the reduced pair of
+ * numbers:
+ *
+ *  - If a,b are both odd, and u,v are such that u*b + v*(a-b) = 1,
+ *    then v*a + (u-v)*b = 1.
+ *
+ *  - If a is even, and u,v are such that u*(a/2) + v*b = 1:
+ *     + if u is also even, then this is just (u/2)*a + v*b = 1
+ *     + otherwise, (u+b)*(a/2) + (v-a/2)*b is also equal to 1, and
+ *       since u and b are both odd, (u+b)/2 is an integer, so we have
+ *       ((u+b)/2)*a + (v-a/2)*b = 1.
+ *
+ * The code below transforms this from a recursive to an iterative
+ * algorithm. We first reduce a,b to 0,1, recording at each stage
+ * whether one of them was even, and whether we had to swap them; then
+ * we iterate backwards over that record of what we did, applying the
+ * above rules for building up the Bezout coefficients as we go. Of
+ * course, all the case analysis is done by the usual bit-twiddling
+ * conditionalisation to avoid data-dependent control flow.
+ *
+ * Also, since these mp_ints are generally treated as unsigned, we
+ * store the coefficients by absolute value, with the semantics that
+ * they always have opposite sign, and in the unwinding loop we keep a
+ * bit indicating whether Aa-Bb is currently expected to be +1 or -1,
+ * so that we can do one final conditional adjustment if it's -1.
+ *
+ * Once the reduction rules have managed to reduce the input numbers
+ * to (0,1), then they are stable (the next reduction will always
+ * divide the even one by 2, which maps 0 to 0). So it doesn't matter
+ * if we do more steps of the algorithm than necessary; hence, for
+ * constant time, we just need to find the maximum number we could
+ * _possibly_ require, and do that many.
+ *
+ * If a,b < 2^n, at most 3n iterations are required. Proof: consider
+ * the quantity Q = log_2(min(a,b)) + 2 log_2(max(a,b)).
+ *  - If the smaller number is even, then the next iteration halves
+ *    it, decreasing Q by 1.
+ *  - If the larger number is even, then the next iteration halves
+ *    it, decreasing Q by 2.
+ *  - If the two numbers are both odd, then the combined effect of the
+ *    next two steps will be to replace the larger number with
+ *    something less than half its original value.
+ * In any of these cases, the effect is that in k steps (where k = 1
+ * or 2 depending on the case) Q decreases by at least k. So on
+ * average it decreases by at least 1 per step, and since it starts
+ * off at 3n, that's how many steps it might take.
+ *
+ * The worst case inputs (I think) are where x=2^{n-1} and y=2^n-1
+ * (i.e. x is a power of 2 and y is all 1s). In that situation, the
+ * first n-1 steps repeatedly halve x until it's 1, and then there are
+ * n pairs of steps each of which subtracts 1 from y and then halves
+ * it.
+ */
+static void mp_bezout_into(mp_int *a_coeff_out, mp_int *b_coeff_out,
+                           mp_int *a_in, mp_int *b_in)
+{
+    size_t nw = size_t_max(1, size_t_max(a_in->nw, b_in->nw));
+
+    /* Make mutable copies of the input numbers */
+    mp_int *a = mp_make_sized(nw), *b = mp_make_sized(nw);
+    mp_copy_into(a, a_in);
+    mp_copy_into(b, b_in);
+
+    /* Space to build up the output coefficients, with an extra word
+     * so that intermediate values can overflow off the top and still
+     * right-shift back down to the correct value */
+    mp_int *ac = mp_make_sized(nw + 1), *bc = mp_make_sized(nw + 1);
+
+    /* And a general-purpose temp register */
+    mp_int *tmp = mp_make_sized(nw);
+
+    /* Space to record the sequence of reduction steps to unwind. We
+     * make it a BignumInt for no particular reason except that (a)
+     * mp_make_sized conveniently zeroes the allocation and mp_free
+     * wipes it, and (b) this way I can use mp_dump() if I have to
+     * debug this code. */
+    size_t steps = 3 * nw * BIGNUM_INT_BITS;
+    mp_int *record = mp_make_sized(
+        (steps*2 + BIGNUM_INT_BITS - 1) / BIGNUM_INT_BITS);
+
+    for (size_t step = 0; step < steps; step++) {
+        /*
+         * If a and b are both odd, we want to sort them so that a is
+         * larger. But if one is even, we want to sort them so that a
+         * is the even one.
+         */
+        unsigned swap_if_both_odd = mp_cmp_hs(b, a);
+        unsigned swap_if_one_even = a->w[0] & 1;
+        unsigned both_odd = a->w[0] & b->w[0] & 1;
+        unsigned swap = swap_if_one_even ^ (
+            (swap_if_both_odd ^ swap_if_one_even) & both_odd);
+
+        mp_cond_swap(a, b, swap);
+
+        /*
+         * Now, if we've made a the even number, divide it by two; if
+         * we've made it the larger of two odd numbers, subtract the
+         * smaller one from it.
+         */
+        mp_rshift_fixed_into(tmp, a, 1);
+        mp_sub_into(a, a, b);
+        mp_select_into(a, tmp, a, both_odd);
+
+        /*
+         * Record the two 1-bit values both_odd and swap.
+         */
+        mp_set_bit(record, step*2, both_odd);
+        mp_set_bit(record, step*2+1, swap);
+    }
+
+    /*
+     * Now we expect to have reduced the two numbers to 0 and 1,
+     * although we don't know which way round. (But we avoid checking
+     * this by assertion; sometimes we'll need to do this computation
+     * without giving away that we already know the inputs were bogus.
+     * So we'd prefer to just press on and return nonsense.)
+     */
+
+    /*
+     * So their Bezout coefficients at this point are simply
+     * themselves.
+     */
+    mp_copy_into(ac, a);
+    mp_copy_into(bc, b);
+
+    /*
+     * We'll maintain the invariant as we unwind that ac * a - bc * b
+     * is either +1 or -1, and we'll remember which. (We _could_ keep
+     * it at +1 the whole time, but it would cost more work every time
+     * round the loop, so it's cheaper to fix that up once at the
+     * end.)
+     *
+     * Initially, the result is +1 if a was the nonzero value after
+     * reduction, and -1 if b was.
+     */
+    unsigned minus_one = b->w[0];
+
+    for (size_t step = steps; step-- > 0 ;) {
+        /*
+         * Recover the data from the step we're unwinding.
+         */
+        unsigned both_odd = mp_get_bit(record, step*2);
+        unsigned swap = mp_get_bit(record, step*2+1);
+
+        /*
+         * If this was a division step (!both_odd), and our
+         * coefficient of a is not the even one, we need to adjust the
+         * coefficients by +b and +a respectively.
+         */
+        unsigned adjust = (ac->w[0] & 1) & ~both_odd;
+        mp_cond_add_into(ac, ac, b, adjust);
+        mp_cond_add_into(bc, bc, a, adjust);
+
+        /*
+         * Now, if it was a division step, then ac is even, and we
+         * divide it by two.
+         */
+        mp_rshift_fixed_into(tmp, ac, 1);
+        mp_select_into(ac, tmp, ac, both_odd);
+
+        /*
+         * But if it was a subtraction step, we add ac to bc instead.
+         */
+        mp_cond_add_into(bc, bc, ac, both_odd);
+
+        /*
+         * Undo the transformation of the input numbers, by adding b
+         * to a (if both_odd) or multiplying a by 2 (otherwise).
+         */
+        mp_lshift_fixed_into(tmp, a, 1);
+        mp_add_into(a, a, b);
+        mp_select_into(a, tmp, a, both_odd);
+
+        /*
+         * Finally, undo the swap. If we do swap, this also reverses
+         * the sign of the current result ac*a+bc*b.
+         */
+        mp_cond_swap(a, b, swap);
+        mp_cond_swap(ac, bc, swap);
+        minus_one ^= swap;
+    }
+
+    /*
+     * Now we expect to have recovered the input a,b.
+     */
+    assert(mp_cmp_eq(a, a_in) & mp_cmp_eq(b, b_in));
+
+    /*
+     * But we might find that our current result is -1 instead of +1,
+     * that is, we have A',B' such that A'a - B'b = -1.
+     *
+     * In that situation, we set A = b-A' and B = a-B', giving us
+     * Aa-Bb = ab - A'a - ab + B'b = +1.
+     */
+    mp_sub_into(tmp, b, ac);
+    mp_select_into(ac, ac, tmp, minus_one);
+    mp_sub_into(tmp, a, bc);
+    mp_select_into(bc, bc, tmp, minus_one);
+
+    /*
+     * Now we really are done. Return the outputs.
+     */
+    if (a_coeff_out)
+        mp_copy_into(a_coeff_out, ac);
+    if (b_coeff_out)
+        mp_copy_into(b_coeff_out, bc);
+
+    mp_free(a);
+    mp_free(b);
+    mp_free(ac);
+    mp_free(bc);
+    mp_free(tmp);
+    mp_free(record);
+}
+
+mp_int *mp_invert(mp_int *x, mp_int *m)
+{
+    mp_int *result = mp_make_sized(m->nw);
+    mp_bezout_into(result, NULL, x, m);
+    return result;
+}
+
+static uint32_t recip_approx_32(uint32_t x)
+{
+    /*
+     * Given an input x in [2^31,2^32), i.e. a uint32_t with its high
+     * bit set, this function returns an approximation to 2^63/x,
+     * computed using only multiplications and bit shifts just in case
+     * the C divide operator has non-constant time (either because the
+     * underlying machine instruction does, or because the operator
+     * expands to a library function on a CPU without hardware
+     * division).
+     *
+     * The coefficients are derived from those of the degree-9
+     * polynomial which is the minimax-optimal approximation to that
+     * function on the given interval (generated using the Remez
+     * algorithm), converted into integer arithmetic with shifts used
+     * to maximise the number of significant bits at every state. (A
+     * sort of 'static floating point' - the exponent is statically
+     * known at every point in the code, so it never needs to be
+     * stored at run time or to influence runtime decisions.)
+     *
+     * Exhaustive iteration over the whole input space shows the
+     * largest possible error to be 1686.54. (The input value
+     * attaining that bound is 4226800006 == 0xfbefd986, whose true
+     * reciprocal is 2182116973.540... == 0x8210766d.8a6..., whereas
+     * this function returns 2182115287 == 0x82106fd7.)
+     */
+    uint64_t r = 0x92db03d6ULL;
+    r = 0xf63e71eaULL - ((r*x) >> 34);
+    r = 0xb63721e8ULL - ((r*x) >> 34);
+    r = 0x9c2da00eULL - ((r*x) >> 33);
+    r = 0xaada0bb8ULL - ((r*x) >> 32);
+    r = 0xf75cd403ULL - ((r*x) >> 31);
+    r = 0xecf97a41ULL - ((r*x) >> 31);
+    r = 0x90d876cdULL - ((r*x) >> 31);
+    r = 0x6682799a0ULL - ((r*x) >> 26);
+    return r;
+}
+
+void mp_divmod_into(mp_int *n, mp_int *d, mp_int *q_out, mp_int *r_out)
+{
+    assert(!mp_eq_integer(d, 0));
+
+    /*
+     * We do division by using Newton-Raphson iteration to converge to
+     * the reciprocal of d (or rather, R/d for R a sufficiently large
+     * power of 2); then we multiply that reciprocal by n; and we
+     * finish up with conditional subtraction.
+     *
+     * But we have to do it in a fixed number of N-R iterations, so we
+     * need some error analysis to know how many we might need.
+     *
+     * The iteration is derived by defining f(r) = d - R/r.
+     * Differentiating gives f'(r) = R/r^2, and the Newton-Raphson
+     * formula applied to those functions gives
+     *
+     *      r_{i+1} = r_i - f(r_i) / f'(r_i)
+     *              = r_i - (d - R/r_i) r_i^2 / R
+     *              = r_i (2 R - d r_i) / R
+     *
+     * Now let e_i be the error in a given iteration, in the sense
+     * that
+     *
+     *        d r_i = R + e_i
+     *  i.e.  e_i/R = (r_i - r_true) / r_true
+     *
+     * so e_i is the _relative_ error in r_i.
+     *
+     * We must also introduce a rounding-error term, because the
+     * division by R always gives an integer. This might make the
+     * output off by up to 1 (in the negative direction, because
+     * right-shifting gives floor of the true quotient). So when we
+     * divide by R, we must imagine adding some f in [0,1). Then we
+     * have
+     *
+     *    d r_{i+1} = d r_i (2 R - d r_i) / R - d f
+     *              = (R + e_i) (R - e_i) / R - d f
+     *              = (R^2 - e_i^2) / R - d f
+     *              = R - (e_i^2 / R + d f)
+     * =>   e_{i+1} = - (e_i^2 / R + d f)
+     *
+     * The sum of two positive quantities is bounded above by twice
+     * their max, and max |f| = 1, so we can bound this as follows:
+     *
+     *               |e_{i+1}| <= 2 max (e_i^2/R, d)
+     *             |e_{i+1}/R| <= 2 max ((e_i/R)^2, d/R)
+     *        log2 |R/e_{i+1}| <= min (2 log2 |R/e_i|, log2 |R/d|) - 1
+     *
+     * which tells us that the number of 'good' bits - i.e.
+     * log2(R/e_i) - very nearly doubles at every iteration (apart
+     * from that subtraction of 1), until it gets to the same size as
+     * log2(R/d). In other words, the size of R in bits has to be the
+     * size of denominator we're putting in, _plus_ the amount of
+     * precision we want to get back out.
+     *
+     * So when we multiply n (the input numerator) by our final
+     * reciprocal approximation r, but actually r differs from R/d by
+     * up to 2, then it follows that 
+     *
+     *   n/d - nr/R = n/d - [ n (R/d + e) ] / R
+     *              = n/d - [ (n/d) R + n e ] / R
+     *              = -ne/R
+     *      =>   0 <= n/d - nr/R < 2n/R
+     *
+     * so our computed quotient can differ from the true n/d by up to
+     * 2n/R. Hence, as long as we also choose R large enough that 2n/R
+     * is bounded above by a constant, we can guarantee a bounded
+     * number of final conditional-subtraction steps.
+     */
+
+    /*
+     * Get at least 32 of the most significant bits of the input
+     * number.
+     */
+    size_t hiword_index = 0;
+    uint64_t hibits = 0, lobits = 0;
+    mp_find_highest_nonzero_word_pair(d, 64 - BIGNUM_INT_BITS,
+                                      &hiword_index, &hibits, &lobits);
+
+    /*
+     * Make a shifted combination of those two words which puts the
+     * topmost bit of the number at bit 63.
+     */
+    size_t shift_up = 0;
+    for (size_t i = BIGNUM_INT_BITS_BITS; i-- > 0;) {
+        size_t sl = 1 << i;               /* left shift count */
+        size_t sr = BIGNUM_INT_BITS - sl; /* complementary right-shift count */
+
+        /* Should we shift up? */
+        unsigned indicator = 1 ^ normalise_to_1(hibits >> sr);
+
+        /* If we do, what will we get? */
+        uint64_t new_hibits = (hibits << sl) | (lobits >> sr);
+        uint64_t new_lobits = lobits << sl;
+        size_t new_shift_up = shift_up + sl;
+
+        /* Conditionally swap those values in. */
+        hibits    ^= (hibits    ^ new_hibits   ) & -(BignumInt)indicator;
+        lobits    ^= (lobits    ^ new_lobits   ) & -(BignumInt)indicator;
+        shift_up  ^= (shift_up  ^ new_shift_up ) & -(size_t)   indicator;
+    }
+
+    /*
+     * So now we know the most significant 32 bits of d are at the top
+     * of hibits. Approximate the reciprocal of those bits.
+     */
+    lobits = (uint64_t)recip_approx_32(hibits >> 32) << 32;
+    hibits = 0;
+
+    /*
+     * And shift that up by as many bits as the input was shifted up
+     * just now, so that the product of this approximation and the
+     * actual input will be close to a fixed power of two regardless
+     * of where the MSB was.
+     *
+     * I do this in another log n individual passes, not so much
+     * because I'm worried about the time-invariance of the CPU's
+     * register-controlled shift operation, but in case the compiler
+     * code-generates uint64_t shifts out of a variable number of
+     * smaller-word shift instructions, e.g. by splitting up into
+     * cases.
+     */
+    for (size_t i = BIGNUM_INT_BITS_BITS; i-- > 0;) {
+        size_t sl = 1 << i;               /* left shift count */
+        size_t sr = BIGNUM_INT_BITS - sl; /* complementary right-shift count */
+
+        /* Should we shift up? */
+        unsigned indicator = 1 & (shift_up >> i);
+
+        /* If we do, what will we get? */
+        uint64_t new_hibits = (hibits << sl) | (lobits >> sr);
+        uint64_t new_lobits = lobits << sl;
+
+        /* Conditionally swap those values in. */
+        hibits    ^= (hibits    ^ new_hibits   ) & -(BignumInt)indicator;
+        lobits    ^= (lobits    ^ new_lobits   ) & -(BignumInt)indicator;
+    }
+
+    /*
+     * The product of the 128-bit value now in hibits:lobits with the
+     * 128-bit value we originally retrieved in the same variables
+     * will be in the vicinity of 2^191. So we'll take log2(R) to be
+     * 191, plus a multiple of BIGNUM_INT_BITS large enough to allow R
+     * to hold the combined sizes of n and d.
+     */
+    size_t log2_R;
+    {
+        size_t max_log2_n = (n->nw + d->nw) * BIGNUM_INT_BITS;
+        log2_R = max_log2_n + 3;
+        log2_R -= size_t_min(191, log2_R);
+        log2_R = (log2_R + BIGNUM_INT_BITS - 1) & ~(BIGNUM_INT_BITS - 1);
+        log2_R += 191;
+    }
+
+    /* Number of words in a bignum capable of holding numbers the size
+     * of twice R. */
+    size_t rw = ((log2_R+2) + BIGNUM_INT_BITS - 1) / BIGNUM_INT_BITS;
+
+    /*
+     * Now construct our full-sized starting reciprocal approximation.
+     */
+    mp_int *r_approx = mp_make_sized(rw);
+    size_t output_bit_index;
+    {
+        /* Where in the input number did the input 128-bit value come from? */
+        size_t input_bit_index =
+            (hiword_index * BIGNUM_INT_BITS) - (128 - BIGNUM_INT_BITS);
+
+        /* So how far do we need to shift our 64-bit output, if the
+         * product of those two fixed-size values is 2^191 and we want
+         * to make it 2^log2_R instead? */
+        output_bit_index = log2_R - 191 - input_bit_index;
+
+        /* If we've done all that right, it should be a whole number
+         * of words. */
+        assert(output_bit_index % BIGNUM_INT_BITS == 0);
+        size_t output_word_index = output_bit_index / BIGNUM_INT_BITS;
+
+        mp_add_integer_into_shifted_by_words(
+            r_approx, r_approx, lobits, output_word_index);
+        mp_add_integer_into_shifted_by_words(
+            r_approx, r_approx, hibits,
+            output_word_index + 64 / BIGNUM_INT_BITS);
+    }
+
+    /*
+     * Make the constant 2*R, which we'll need in the iteration.
+     */
+    mp_int *two_R = mp_make_sized(rw);
+    mp_add_integer_into_shifted_by_words(
+        two_R, two_R, (BignumInt)1 << ((log2_R+1) % BIGNUM_INT_BITS),
+        (log2_R+1) / BIGNUM_INT_BITS);
+
+    /*
+     * Scratch space.
+     */
+    mp_int *dr = mp_make_sized(rw + d->nw);
+    mp_int *diff = mp_make_sized(size_t_max(rw, dr->nw));
+    mp_int *product = mp_make_sized(rw + diff->nw);
+    size_t scratchsize = size_t_max(
+        mp_mul_scratchspace(dr->nw, r_approx->nw, d->nw),
+        mp_mul_scratchspace(product->nw, r_approx->nw, diff->nw));
+    mp_int *scratch = mp_make_sized(scratchsize);
+    mp_int product_shifted = mp_make_alias(
+        product, log2_R / BIGNUM_INT_BITS, product->nw);
+
+    /*
+     * Initial error estimate: the 32-bit output of recip_approx_32
+     * differs by less than 2048 (== 2^11) from the true top 32 bits
+     * of the reciprocal, so the relative error is at most 2^11
+     * divided by the 32-bit reciprocal, which at worst is 2^11/2^31 =
+     * 2^-20. So even in the worst case, we have 20 good bits of
+     * reciprocal to start with.
+     */
+    size_t good_bits = 31 - 11;
+    size_t good_bits_needed = BIGNUM_INT_BITS * n->nw + 4; /* add a few */
+
+    /*
+     * Now do Newton-Raphson iterations until we have reason to think
+     * they're not converging any more.
+     */
+    while (good_bits < good_bits_needed) {
+        /*
+         * Compute the next iterate.
+         */
+        mp_mul_internal(dr, r_approx, d, *scratch);
+        mp_sub_into(diff, two_R, dr);
+        mp_mul_internal(product, r_approx, diff, *scratch);
+        mp_rshift_fixed_into(r_approx, &product_shifted,
+                             log2_R % BIGNUM_INT_BITS);
+
+        /*
+         * Adjust the error estimate.
+         */
+        good_bits = good_bits * 2 - 1;
+    }
+
+    mp_free(dr);
+    mp_free(diff);
+    mp_free(product);
+    mp_free(scratch);
+
+    /*
+     * Now we've got our reciprocal, we can compute the quotient, by
+     * multiplying in n and then shifting down by log2_R bits.
+     */
+    mp_int *quotient_full = mp_mul(r_approx, n);
+    mp_int quotient_alias = mp_make_alias(
+        quotient_full, log2_R / BIGNUM_INT_BITS, quotient_full->nw);
+    mp_int *quotient = mp_make_sized(n->nw);
+    mp_rshift_fixed_into(quotient, &quotient_alias, log2_R % BIGNUM_INT_BITS);
+
+    /*
+     * Next, compute the remainder.
+     */
+    mp_int *remainder = mp_make_sized(d->nw);
+    mp_mul_into(remainder, quotient, d);
+    mp_sub_into(remainder, n, remainder);
+
+    /*
+     * Finally, two conditional subtractions to fix up any remaining
+     * rounding error. (I _think_ one should be enough, but this
+     * routine isn't time-critical enough to take chances.)
+     */
+    unsigned q_correction = 0;
+    for (unsigned iter = 0; iter < 2; iter++) {
+        unsigned need_correction = mp_cmp_hs(remainder, d);
+        mp_cond_sub_into(remainder, remainder, d, need_correction);
+        q_correction += need_correction;
+    }
+    mp_add_integer_into(quotient, quotient, q_correction);
+
+    /*
+     * Now we should have a perfect answer, i.e. 0 <= r < d.
+     */
+    assert(!mp_cmp_hs(remainder, d));
+
+    if (q_out)
+        mp_copy_into(q_out, quotient);
+    if (r_out)
+        mp_copy_into(r_out, remainder);
+
+    mp_free(r_approx);
+    mp_free(two_R);
+    mp_free(quotient_full);
+    mp_free(quotient);
+    mp_free(remainder);
+}
+
+mp_int *mp_div(mp_int *n, mp_int *d)
+{
+    mp_int *q = mp_make_sized(n->nw);
+    mp_divmod_into(n, d, q, NULL);
+    return q;
+}
+
+mp_int *mp_mod(mp_int *n, mp_int *d)
+{
+    mp_int *r = mp_make_sized(d->nw);
+    mp_divmod_into(n, d, NULL, r);
+    return r;
+}
+
+mp_int *mp_modmul(mp_int *x, mp_int *y, mp_int *modulus)
+{
+    mp_int *product = mp_mul(x, y);
+    mp_int *reduced = mp_mod(product, modulus);
+    mp_free(product);
+    return reduced;
+}
+
+mp_int *mp_modadd(mp_int *x, mp_int *y, mp_int *modulus)
+{
+    mp_int *sum = mp_add(x, y);
+    mp_int *reduced = mp_mod(sum, modulus);
+    mp_free(sum);
+    return reduced;
+}
+
+mp_int *mp_modsub(mp_int *x, mp_int *y, mp_int *modulus)
+{
+    mp_int *diff = mp_make_sized(size_t_max(x->nw, y->nw));
+    mp_sub_into(diff, x, y);
+    unsigned negate = mp_cmp_hs(y, x);
+    mp_cond_negate(diff, diff, negate);
+    mp_int *reduced = mp_mod(diff, modulus);
+    mp_cond_negate(reduced, reduced, negate);
+    mp_cond_add_into(reduced, reduced, modulus, negate);
+    mp_free(diff);
+    return reduced;
+}
+
+static mp_int *mp_modadd_in_range(mp_int *x, mp_int *y, mp_int *modulus)
+{
+    mp_int *sum = mp_make_sized(modulus->nw);
+    unsigned carry = mp_add_into_internal(sum, x, y);
+    mp_cond_sub_into(sum, sum, modulus, carry | mp_cmp_hs(sum, modulus));
+    return sum;
+}
+
+static mp_int *mp_modsub_in_range(mp_int *x, mp_int *y, mp_int *modulus)
+{
+    mp_int *diff = mp_make_sized(modulus->nw);
+    mp_sub_into(diff, x, y);
+    mp_cond_add_into(diff, diff, modulus, 1 ^ mp_cmp_hs(x, y));
+    return diff;
+}
+
+mp_int *monty_add(MontyContext *mc, mp_int *x, mp_int *y)
+{
+    return mp_modadd_in_range(x, y, mc->m);
+}
+
+mp_int *monty_sub(MontyContext *mc, mp_int *x, mp_int *y)
+{
+    return mp_modsub_in_range(x, y, mc->m);
+}
+
+void mp_min_into(mp_int *r, mp_int *x, mp_int *y)
+{
+    mp_select_into(r, x, y, mp_cmp_hs(x, y));
+}
+
+mp_int *mp_min(mp_int *x, mp_int *y)
+{
+    mp_int *r = mp_make_sized(size_t_min(x->nw, y->nw));
+    mp_min_into(r, x, y);
+    return r;
+}
+
+mp_int *mp_power_2(size_t power)
+{
+    mp_int *x = mp_new(power + 1);
+    mp_set_bit(x, power, 1);
+    return x;
+}
+
+struct ModsqrtContext {
+    mp_int *p;                      /* the prime */
+    MontyContext *mc;                  /* for doing arithmetic mod p */
+
+    /* Decompose p-1 as 2^e k, for positive integer e and odd k */
+    size_t e;
+    mp_int *k;
+    mp_int *km1o2;                  /* (k-1)/2 */
+
+    /* The user-provided value z which is not a quadratic residue mod
+     * p, and its kth power. Both in Montgomery form. */
+    mp_int *z, *zk;
+};
+
+ModsqrtContext *modsqrt_new(mp_int *p, mp_int *any_nonsquare_mod_p)
+{
+    ModsqrtContext *sc = snew(ModsqrtContext);
+    memset(sc, 0, sizeof(ModsqrtContext));
+
+    sc->p = mp_copy(p);
+    sc->mc = monty_new(sc->p);
+    sc->z = monty_import(sc->mc, any_nonsquare_mod_p);
+
+    /* Find the lowest set bit in p-1. Since this routine expects p to
+     * be non-secret (typically a well-known standard elliptic curve
+     * parameter), for once we don't need clever bit tricks. */
+    for (sc->e = 1; sc->e < BIGNUM_INT_BITS * p->nw; sc->e++)
+        if (mp_get_bit(p, sc->e))
+            break;
+
+    sc->k = mp_rshift_fixed(p, sc->e);
+    sc->km1o2 = mp_rshift_fixed(sc->k, 1);
+
+    /* Leave zk to be filled in lazily, since it's more expensive to
+     * compute. If this context turns out never to be needed, we can
+     * save the bulk of the setup time this way. */
+
+    return sc;
+}
+
+static void modsqrt_lazy_setup(ModsqrtContext *sc)
+{
+    if (!sc->zk)
+        sc->zk = monty_pow(sc->mc, sc->z, sc->k);
+}
+
+void modsqrt_free(ModsqrtContext *sc)
+{
+    monty_free(sc->mc);
+    mp_free(sc->p);
+    mp_free(sc->z);
+    mp_free(sc->k);
+    mp_free(sc->km1o2);
+
+    if (sc->zk)
+        mp_free(sc->zk);
+
+    sfree(sc);
+}
+
+mp_int *mp_modsqrt(ModsqrtContext *sc, mp_int *x, unsigned *success)
+{
+    mp_int *mx = monty_import(sc->mc, x);
+    mp_int *mroot = monty_modsqrt(sc, mx, success);
+    mp_free(mx);
+    mp_int *root = monty_export(sc->mc, mroot);
+    mp_free(mroot);
+    return root;
+}
+
+/*
+ * Modular square root, using an algorithm more or less similar to
+ * Tonelli-Shanks but adapted for constant time.
+ *
+ * The basic idea is to write p-1 = k 2^e, where k is odd and e > 0.
+ * Then the multiplicative group mod p (call it G) has a sequence of
+ * e+1 nested subgroups G = G_0 > G_1 > G_2 > ... > G_e, where each
+ * G_i is exactly half the size of G_{i-1} and consists of all the
+ * squares of elements in G_{i-1}. So the innermost group G_e has
+ * order k, which is odd, and hence within that group you can take a
+ * square root by raising to the power (k+1)/2.
+ *
+ * Our strategy is to iterate over these groups one by one and make
+ * sure the number x we're trying to take the square root of is inside
+ * each one, by adjusting it if it isn't.
+ *
+ * Suppose g is a primitive root of p, i.e. a generator of G_0. (We
+ * don't actually need to know what g _is_; we just imagine it for the
+ * sake of understanding.) Then G_i consists of precisely the (2^i)th
+ * powers of g, and hence, you can tell if a number is in G_i if
+ * raising it to the power k 2^{e-i} gives 1. So the conceptual
+ * algorithm goes: for each i, test whether x is in G_i by that
+ * method. If it isn't, then the previous iteration ensured it's in
+ * G_{i-1}, so it will be an odd power of g^{2^{i-1}}, and hence
+ * multiplying by any other odd power of g^{2^{i-1}} will give x' in
+ * G_i. And we have one of those, because our non-square z is an odd
+ * power of g, so z^{2^{i-1}} is an odd power of g^{2^{i-1}}.
+ *
+ * (There's a special case in the very first iteration, where we don't
+ * have a G_{i-1}. If it turns out that x is not even in G_1, that
+ * means it's not a square, so we set *success to 0. We still run the
+ * rest of the algorithm anyway, for the sake of constant time, but we
+ * don't give a hoot what it returns.)
+ *
+ * When we get to the end and have x in G_e, then we can take its
+ * square root by raising to (k+1)/2. But of course that's not the
+ * square root of the original input - it's only the square root of
+ * the adjusted version we produced during the algorithm. To get the
+ * true output answer we also have to multiply by a power of z,
+ * namely, z to the power of _half_ whatever we've been multiplying in
+ * as we go along. (The power of z we multiplied in must have been
+ * even, because the case in which we would have multiplied in an odd
+ * power of z is the i=0 case, in which we instead set the failure
+ * flag.)
+ *
+ * The code below is an optimised version of that basic idea, in which
+ * we _start_ by computing x^k so as to be able to test membership in
+ * G_i by only a few squarings rather than a full from-scratch modpow
+ * every time; we also start by computing our candidate output value
+ * x^{(k+1)/2}. So when the above description says 'adjust x by z^i'
+ * for some i, we have to adjust our running values of x^k and
+ * x^{(k+1)/2} by z^{ik} and z^{ik/2} respectively (the latter is safe
+ * because, as above, i is always even). And it turns out that we
+ * don't actually have to store the adjusted version of x itself at
+ * all - we _only_ keep those two powers of it.
+ */
+mp_int *monty_modsqrt(ModsqrtContext *sc, mp_int *x, unsigned *success)
+{
+    modsqrt_lazy_setup(sc);
+
+    mp_int *scratch_to_free = mp_make_sized(3 * sc->mc->rw);
+    mp_int scratch = *scratch_to_free;
+
+    /*
+     * Compute toret = x^{(k+1)/2}, our starting point for the output
+     * square root, and also xk = x^k which we'll use as we go along
+     * for knowing when to apply correction factors. We do this by
+     * first computing x^{(k-1)/2}, then multiplying it by x, then
+     * multiplying the two together.
+     */
+    mp_int *toret = monty_pow(sc->mc, x, sc->km1o2);
+    mp_int xk = mp_alloc_from_scratch(&scratch, sc->mc->rw);
+    mp_copy_into(&xk, toret);
+    monty_mul_into(sc->mc, toret, toret, x);
+    monty_mul_into(sc->mc, &xk, toret, &xk);
+
+    mp_int tmp = mp_alloc_from_scratch(&scratch, sc->mc->rw);
+
+    mp_int power_of_zk = mp_alloc_from_scratch(&scratch, sc->mc->rw);
+    mp_copy_into(&power_of_zk, sc->zk);
+
+    for (size_t i = 0; i < sc->e; i++) {
+        mp_copy_into(&tmp, &xk);
+        for (size_t j = i+1; j < sc->e; j++)
+            monty_mul_into(sc->mc, &tmp, &tmp, &tmp);
+        unsigned eq1 = mp_cmp_eq(&tmp, monty_identity(sc->mc));
+
+        if (i == 0) {
+            *success = eq1;
+        } else {
+            monty_mul_into(sc->mc, &tmp, toret, &power_of_zk);
+            mp_select_into(toret, &tmp, toret, eq1);
+
+            monty_mul_into(sc->mc, &power_of_zk,
+                           &power_of_zk, &power_of_zk);
+
+            monty_mul_into(sc->mc, &tmp, &xk, &power_of_zk);
+            mp_select_into(&xk, &tmp, &xk, eq1);
+        }
+    }
+
+    mp_free(scratch_to_free);
+
+    return toret;
+}
+
+mp_int *mp_random_bits_fn(size_t bits, int (*gen_byte)(void))
+{
+    size_t bytes = (bits + 7) / 8;
+    size_t words = (bits + BIGNUM_INT_BITS - 1) / BIGNUM_INT_BITS;
+    mp_int *x = mp_make_sized(words);
+    for (size_t i = 0; i < bytes; i++) {
+        BignumInt byte = gen_byte();
+        unsigned mask = (1 << size_t_min(8, bits-i*8)) - 1;
+        x->w[i / BIGNUM_INT_BYTES] |=
+            (byte & mask) << (8*(i % BIGNUM_INT_BYTES));
+    }
+    return x;
+}
+
+mp_int *mp_random_in_range_fn(mp_int *lo, mp_int *hi, int (*gen_byte)(void))
+{
+    mp_int *n_outcomes = mp_sub(hi, lo);
+
+    /*
+     * It would be nice to generate our random numbers in such a way
+     * as to make every possible outcome literally equiprobable. But
+     * we can't do that in constant time, so we have to go for a very
+     * close approximation instead. I'm going to take the view that a
+     * factor of (1+2^-128) between the probabilities of two outcomes
+     * is acceptable on the grounds that you'd have to examine so many
+     * outputs to even detect it.
+     */
+    mp_int *unreduced = mp_random_bits_fn(
+        mp_max_bits(n_outcomes) + 128, gen_byte);
+    mp_int *reduced = mp_mod(unreduced, n_outcomes);
+    mp_add_into(reduced, reduced, lo);
+    mp_free(unreduced);
+    mp_free(n_outcomes);
+    return reduced;
+}
diff --git a/mpint.h b/mpint.h
new file mode 100644
index 00000000..34b175d9
--- /dev/null
+++ b/mpint.h
@@ -0,0 +1,386 @@
+#ifndef PUTTY_MPINT_H
+#define PUTTY_MPINT_H
+
+/*
+ * PuTTY's multiprecision integer library.
+ *
+ * This library is written with the aim of avoiding leaking the input
+ * numbers via timing and cache side channels. This means avoiding
+ * making any control flow change, or deciding the address of any
+ * memory access, based on the value of potentially secret input data.
+ *
+ * But in a library that has to handle numbers of arbitrary size, you
+ * can't avoid your control flow depending on the _size_ of the input!
+ * So the rule is that an mp_int has a nominal size that need not be
+ * its mathematical size: i.e. if you call (say) mp_from_bytes_be to
+ * turn an array of 256 bytes into an integer, and all but the last of
+ * those bytes is zero, then you get an mp_int which has space for 256
+ * bytes of data but just happens to store the value 1. So the
+ * _nominal_ sizes of input data - e.g. the size in bits of some
+ * public-key modulus - are not considered secret, and control flow is
+ * allowed to do what it likes based on those sizes. But the same
+ * function, called with the same _nominally sized_ arguments
+ * containing different values, should run in the same length of time.
+ *
+ * When a function returns an 'mp_int *', it is newly allocated to an
+ * appropriate nominal size (which, again, depends only on the nominal
+ * sizes of the inputs). Other functions have 'into' in their name,
+ * and they instead overwrite the contents of an existing mp_int.
+ *
+ * Functions in this API which return values that are logically
+ * boolean return them as 'unsigned' rather than the C99 bool type.
+ * That's because C99 bool does an implicit test for non-zero-ness
+ * when converting any other integer type to it, which compilers might
+ * well implement using data-dependent control flow.
+ */
+
+/*
+ * Create and destroy mp_ints. A newly created one is initialised to
+ * zero. mp_clear also resets an existing number to zero.
+ */
+mp_int *mp_new(size_t maxbits);
+void mp_free(mp_int *);
+void mp_clear(mp_int *x);
+
+/*
+ * Create mp_ints from various sources: little- and big-endian binary
+ * data, an ordinary C unsigned integer type, a decimal or hex string
+ * (given either as a ptrlen or a C NUL-terminated string), and
+ * another mp_int.
+ *
+ * The decimal and hex conversion functions have running time
+ * dependent on the length of the input data, of course.
+ */
+mp_int *mp_from_bytes_le(ptrlen bytes);
+mp_int *mp_from_bytes_be(ptrlen bytes);
+mp_int *mp_from_integer(uintmax_t n);
+mp_int *mp_from_decimal_pl(ptrlen decimal);
+mp_int *mp_from_decimal(const char *decimal);
+mp_int *mp_from_hex_pl(ptrlen hex);
+mp_int *mp_from_hex(const char *hex);
+mp_int *mp_copy(mp_int *x);
+
+/*
+ * A macro for declaring large fixed numbers in source code (such as
+ * elliptic curve parameters, or standard Diffie-Hellman moduli). The
+ * idea is that you just write something like
+ *
+ *   mp_int *value = MP_LITERAL(0x19284376283754638745693467245);
+ *
+ * and it newly allocates you an mp_int containing that number.
+ *
+ * Internally, the macro argument is stringified and passed to
+ * mp_from_hex. That's not as fast as it could be if I had instead set
+ * up some kind of mp_from_array_of_uint64_t() function, but I think
+ * this system is valuable for the fact that the literal integers
+ * appear in a very natural syntax that can be pasted directly out
+ * into, say, Python if you want to cross-check a calculation.
+ */
+static inline mp_int *mp__from_string_literal(const char *lit)
+{
+    /* Don't call this directly; it's not equipped to deal with
+     * hostile data. Use only via the MP_LITERAL macro. */
+    if (lit[0] && (lit[1] == 'x' || lit[1] == 'X'))
+        return mp_from_hex(lit+2);
+    else
+        return mp_from_decimal(lit);
+}
+#define MP_LITERAL(number) mp__from_string_literal(#number)
+
+/*
+ * Create an mp_int with the value 2^power.
+ */
+mp_int *mp_power_2(size_t power);
+
+/*
+ * Retrieve the value of a particular bit or byte of an mp_int. The
+ * byte / bit index is not considered to be secret data. Out-of-range
+ * byte/bit indices are handled cleanly and return zero.
+ */
+uint8_t mp_get_byte(mp_int *x, size_t byte);
+unsigned mp_get_bit(mp_int *x, size_t bit);
+
+/*
+ * Set an mp_int bit. Again, the bit index is not considered secret.
+ * Do not pass an out-of-range index, on pain of assertion failure.
+ */
+void mp_set_bit(mp_int *x, size_t bit, unsigned val);
+
+/*
+ * Return the nominal size of an mp_int, in terms of the maximum
+ * number of bytes or bits that can fit in it.
+ */
+size_t mp_max_bytes(mp_int *x);
+size_t mp_max_bits(mp_int *x);
+
+/*
+ * Return the _mathematical_ bit count of an mp_int (not its nominal
+ * size), i.e. a value n such that 2^{n-1} <= x < 2^n.
+ *
+ * This function is supposed to run in constant time for a given
+ * nominal input size. Of course it's likely that clients of this
+ * function will promptly need to use the result as the limit of some
+ * loop (e.g. marshalling an mp_int into an SSH packet, which doesn't
+ * permit extra prefix zero bytes). But that's up to the caller to
+ * decide the safety of.
+ */
+size_t mp_get_nbits(mp_int *x);
+
+/*
+ * Return the value of an mp_int as a decimal or hex string. The
+ * result is dynamically allocated, and the caller is responsible for
+ * freeing it.
+ *
+ * These functions should run in constant time for a given nominal
+ * input size, even though the exact number of digits returned is
+ * variable. They always allocate enough space for the largest output
+ * that might be needed, but they don't always fill it.
+ */
+char *mp_get_decimal(mp_int *x);
+char *mp_get_hex(mp_int *x);
+char *mp_get_hex_uppercase(mp_int *x);
+
+/*
+ * Compare two mp_ints, or compare one mp_int against a C integer. The
+ * 'eq' functions return 1 if the two inputs are equal, or 0
+ * otherwise; the 'hs' functions return 1 if the first input is >= the
+ * second, and 0 otherwise.
+ */
+unsigned mp_cmp_hs(mp_int *a, mp_int *b);
+unsigned mp_cmp_eq(mp_int *a, mp_int *b);
+unsigned mp_hs_integer(mp_int *x, uintmax_t n);
+unsigned mp_eq_integer(mp_int *x, uintmax_t n);
+
+/*
+ * Take the minimum of two mp_ints, without using a conditional branch.
+ */
+void mp_min_into(mp_int *r, mp_int *x, mp_int *y);
+mp_int *mp_min(mp_int *x, mp_int *y);
+
+/*
+ * Diagnostic function. Writes out x in hex to the supplied stdio
+ * stream, preceded by the string 'prefix' and followed by 'suffix'.
+ *
+ * This is useful to put temporarily into code, but it's also
+ * potentially useful to call from a debugger.
+ */
+void mp_dump(FILE *fp, const char *prefix, mp_int *x, const char *suffix);
+
+/*
+ * Overwrite one mp_int with another.
+ */
+void mp_copy_into(mp_int *dest, mp_int *src);
+
+/*
+ * Conditional selection. Overwrites dest with either src0 or src1,
+ * according to the value of 'choose_src1'. choose_src1 should be 0 or
+ * 1; if it's 1, then dest is set to src1, otherwise src0.
+ *
+ * The value of choose_src1 is considered to be secret data, so
+ * control flow and memory access should not depend on it.
+ */
+void mp_select_into(mp_int *dest, mp_int *src0, mp_int *src1,
+                    unsigned choose_src1);
+
+/*
+ * Addition, subtraction and multiplication, either targeting an
+ * existing mp_int or making a new one large enough to hold whatever
+ * the output might be..
+ */
+void mp_add_into(mp_int *r, mp_int *a, mp_int *b);
+void mp_sub_into(mp_int *r, mp_int *a, mp_int *b);
+void mp_mul_into(mp_int *r, mp_int *a, mp_int *b);
+mp_int *mp_add(mp_int *x, mp_int *y);
+mp_int *mp_sub(mp_int *x, mp_int *y);
+mp_int *mp_mul(mp_int *x, mp_int *y);
+
+/*
+ * Addition, subtraction and multiplication with one argument small
+ * enough to fit in a C integer. For mp_mul_integer_into, it has to be
+ * even smaller than that.
+ */
+void mp_add_integer_into(mp_int *r, mp_int *a, uintmax_t n);
+void mp_sub_integer_into(mp_int *r, mp_int *a, uintmax_t n);
+void mp_mul_integer_into(mp_int *r, mp_int *a, uint16_t n);
+
+/*
+ * Conditional addition/subtraction. If yes == 1, sets r to a+b or a-b
+ * (respectively). If yes == 0, sets r to just a. 'yes' is considered
+ * secret data.
+ */
+void mp_cond_add_into(mp_int *r, mp_int *a, mp_int *b, unsigned yes);
+void mp_cond_sub_into(mp_int *r, mp_int *a, mp_int *b, unsigned yes);
+
+/*
+ * Swap x0 and x1 if swap == 1, and not if swap == 0. 'swap' is
+ * considered secret.
+ */
+void mp_cond_swap(mp_int *x0, mp_int *x1, unsigned swap);
+
+/*
+ * Set x to 0 if clear == 1, and otherwise leave it unchanged. 'clear'
+ * is considered secret.
+ */
+void mp_cond_clear(mp_int *x, unsigned clear);
+
+/*
+ * Division. mp_divmod_into divides n by d, and writes the quotient
+ * into q and the remainder into r. You can pass either of q and r as
+ * NULL if you don't need one of the outputs.
+ *
+ * mp_div and mp_mod are wrappers that return one or other of those
+ * outputs as a freshly allocated mp_int of the appropriate size.
+ *
+ * Division by zero gives no error, and returns a quotient of 0 and a
+ * remainder of n (so as to still satisfy the division identity that
+ * n=qd+r).
+ */
+void mp_divmod_into(mp_int *n, mp_int *d, mp_int *q, mp_int *r);
+mp_int *mp_div(mp_int *n, mp_int *d);
+mp_int *mp_mod(mp_int *x, mp_int *modulus);
+
+/*
+ * Trivially easy special case of mp_mod: reduce a number mod a power
+ * of two.
+ */
+void mp_reduce_mod_2to(mp_int *x, size_t p);
+
+/*
+ * Modular inverses. mp_invert computes the inverse of x mod modulus
+ * (and will expect the two to be coprime). mp_invert_mod_2to computes
+ * the inverse of x mod 2^p, and is a great deal faster.
+ */
+mp_int *mp_invert_mod_2to(mp_int *x, size_t p);
+mp_int *mp_invert(mp_int *x, mp_int *modulus);
+
+/*
+ * System for taking square roots modulo an odd prime.
+ *
+ * In order to do this efficiently, you need to provide an extra piece
+ * of information at setup time, namely a number which is not
+ * congruent mod p to any square. Given p and that non-square, you can
+ * use modsqrt_new to make a context containing all the necessary
+ * equipment for actually calculating the square roots, and then you
+ * can call mp_modsqrt as many times as you like on that context
+ * before freeing it.
+ *
+ * The output parameter '*success' will be filled in with 1 if the
+ * operation was successful, or 0 if the input number doesn't have a
+ * square root mod p at all. In the latter case, the returned mp_int
+ * will be nonsense and you shouldn't depend on it.
+ *
+ * ==== WARNING ====
+ *
+ * This function DOES NOT TREAT THE PRIME MODULUS AS SECRET DATA! It
+ * will protect the number you're taking the square root _of_, but not
+ * the number you're taking the root of it _mod_.
+ *
+ * (This is because the algorithm requires a number of loop iterations
+ * equal to the number of factors of 2 in p-1. And the expected use of
+ * this function is for elliptic-curve point decompression, in which
+ * the modulus is always a well-known one written down in standards
+ * documents.)
+ */
+typedef struct ModsqrtContext ModsqrtContext;
+ModsqrtContext *modsqrt_new(mp_int *p, mp_int *any_nonsquare_mod_p);
+void modsqrt_free(ModsqrtContext *);
+mp_int *mp_modsqrt(ModsqrtContext *sc, mp_int *x, unsigned *success);
+
+/*
+ * Functions for Montgomery multiplication, a fast technique for doing
+ * a long series of modular multiplications all with the same modulus
+ * (which has to be odd).
+ *
+ * You start by calling monty_new to set up a context structure
+ * containing all the precomputed bits and pieces needed by the
+ * algorithm. Then, any numbers you want to work with must first be
+ * transformed into the internal Montgomery representation using
+ * monty_import; having done that, you can use monty_mul and monty_pow
+ * to operate on them efficiently; and finally, monty_export will
+ * convert numbers back out of Montgomery representation to give their
+ * ordinary values.
+ *
+ * Addition and subtraction are not optimised by the Montgomery trick,
+ * but monty_add and monty_sub are provided anyway for convenience.
+ *
+ * There are also monty_invert and monty_modsqrt, which are analogues
+ * of mp_invert and mp_modsqrt which take their inputs in Montgomery
+ * representation. For mp_modsqrt, the prime modulus of the
+ * ModsqrtContext must be the same as the modulus of the MontyContext.
+ *
+ * The query functions monty_modulus and monty_identity return numbers
+ * stored inside the MontyContext, without copying them. The returned
+ * pointers are still owned by the MontyContext, so don't free them!
+ */
+MontyContext *monty_new(mp_int *modulus);
+MontyContext *monty_copy(MontyContext *mc);
+void monty_free(MontyContext *mc);
+mp_int *monty_modulus(MontyContext *mc); /* doesn't transfer ownership */
+mp_int *monty_identity(MontyContext *mc); /* doesn't transfer ownership */
+void monty_import_into(MontyContext *mc, mp_int *r, mp_int *x);
+mp_int *monty_import(MontyContext *mc, mp_int *x);
+void monty_export_into(MontyContext *mc, mp_int *r, mp_int *x);
+mp_int *monty_export(MontyContext *mc, mp_int *x);
+void monty_mul_into(MontyContext *, mp_int *r, mp_int *, mp_int *);
+mp_int *monty_add(MontyContext *, mp_int *, mp_int *);
+mp_int *monty_sub(MontyContext *, mp_int *, mp_int *);
+mp_int *monty_mul(MontyContext *, mp_int *, mp_int *);
+mp_int *monty_pow(MontyContext *, mp_int *base, mp_int *exponent);
+mp_int *monty_invert(MontyContext *, mp_int *);
+mp_int *monty_modsqrt(ModsqrtContext *sc, mp_int *mx, unsigned *success);
+
+/*
+ * Modular arithmetic functions which don't use an explicit
+ * MontyContext. mp_modpow will use one internally (on the assumption
+ * that the exponent is likely to be large enough to make it
+ * worthwhile); the other three will just do ordinary non-Montgomery-
+ * optimised modular reduction. Use mp_modmul if you only have one
+ * product to compute; if you have a lot, consider using a
+ * MontyContext in the client code.
+ */
+mp_int *mp_modpow(mp_int *base, mp_int *exponent, mp_int *modulus);
+mp_int *mp_modmul(mp_int *x, mp_int *y, mp_int *modulus);
+mp_int *mp_modadd(mp_int *x, mp_int *y, mp_int *modulus);
+mp_int *mp_modsub(mp_int *x, mp_int *y, mp_int *modulus);
+
+/*
+ * Shift an mp_int right by a given number of bits. The shift count is
+ * considered to be secret data, and as a result, the algorithm takes
+ * O(n log n) time instead of the obvious O(n).
+ */
+mp_int *mp_rshift_safe(mp_int *x, size_t shift);
+
+/*
+ * Shift an mp_int left or right by a fixed number of bits. The shift
+ * count is NOT considered to be secret data! Use this if you're
+ * always dividing by 2, for example, but don't use it to shift by a
+ * variable amount derived from another secret number.
+ *
+ * The upside is that these functions run in sensible linear time.
+ */
+void mp_lshift_fixed_into(mp_int *r, mp_int *a, size_t shift);
+void mp_rshift_fixed_into(mp_int *r, mp_int *x, size_t shift);
+mp_int *mp_rshift_fixed(mp_int *x, size_t shift);
+
+/*
+ * Generate a random mp_int.
+ *
+ * The _function_ definitions here will expect to be given a gen_byte
+ * function that provides random data. Normally you'd use this using
+ * random_byte() from random.c, and the macro wrappers automate that.
+ *
+ * (This is a bit of a dodge to avoid mpint.c having a link-time
+ * dependency on random.c, so that programs can link against one but
+ * not the other: if a client of this header uses one of these macros
+ * then _they_ have link-time dependencies on both modules.)
+ *
+ * mp_random_bits[_fn] returns an integer 0 <= n < 2^bits.
+ * mp_random_in_range[_fn](lo,hi) returns an integer lo <= n < hi.
+ */
+mp_int *mp_random_bits_fn(size_t bits, int (*gen_byte)(void));
+mp_int *mp_random_in_range_fn(
+    mp_int *lo_inclusive, mp_int *hi_exclusive, int (*gen_byte)(void));
+#define mp_random_bits(bits) mp_random_bits_fn(bits, random_byte)
+#define mp_random_in_range(lo, hi) mp_random_in_range_fn(lo, hi, random_byte)
+
+#endif /* PUTTY_MPINT_H */
diff --git a/sshbn.h b/mpint_i.h
similarity index 78%
rename from sshbn.h
rename to mpint_i.h
index 6ee97ee6..45060b7e 100644
--- a/sshbn.h
+++ b/mpint_i.h
@@ -1,10 +1,15 @@
 /*
- * sshbn.h: the assorted conditional definitions of BignumInt and
- * multiply macros used throughout the bignum code to treat numbers as
- * arrays of the most conveniently sized word for the target machine.
+ * mpint_i.h: definitions used internally by the bignum code, and
+ * also a few other vaguely-bignum-like places.
+ */
+
+/* ----------------------------------------------------------------------
+ * The assorted conditional definitions of BignumInt and multiply
+ * macros used throughout the bignum code to treat numbers as arrays
+ * of the most conveniently sized word for the target machine.
  * Exported so that other code (e.g. poly1305) can use it too.
  *
- * This file must export, in whatever ifdef branch it ends up in:
+ * This code must export, in whatever ifdef branch it ends up in:
  *
  *  - two types: 'BignumInt' and 'BignumCarry'. BignumInt is an
  *    unsigned integer type which will be used as the base word size
@@ -64,7 +69,7 @@
    */
 
   typedef unsigned long long BignumInt;
-  #define BIGNUM_INT_BITS 64
+  #define BIGNUM_INT_BITS_BITS 6
   #define DEFINE_BIGNUMDBLINT typedef __uint128_t BignumDblInt
 
 #elif defined _MSC_VER && defined _M_AMD64
@@ -85,7 +90,7 @@
   #include <intrin.h>
   typedef unsigned char BignumCarry; /* the type _addcarry_u64 likes to use */
   typedef unsigned __int64 BignumInt;
-  #define BIGNUM_INT_BITS 64
+  #define BIGNUM_INT_BITS_BITS 6
   #define BignumADC(ret, retc, a, b, c) do                \
       {                                                   \
           BignumInt ADC_tmp;                              \
@@ -119,7 +124,7 @@
   /* 32-bit BignumInt, using C99 unsigned long long as BignumDblInt */
 
   typedef unsigned int BignumInt;
-  #define BIGNUM_INT_BITS 32
+  #define BIGNUM_INT_BITS_BITS 5
   #define DEFINE_BIGNUMDBLINT typedef unsigned long long BignumDblInt
 
 #elif defined _MSC_VER && defined _M_IX86
@@ -127,7 +132,7 @@
   /* 32-bit BignumInt, using Visual Studio __int64 as BignumDblInt */
 
   typedef unsigned int BignumInt;
-  #define BIGNUM_INT_BITS  32
+  #define BIGNUM_INT_BITS_BITS 5
   #define DEFINE_BIGNUMDBLINT typedef unsigned __int64 BignumDblInt
 
 #elif defined _LP64
@@ -139,7 +144,7 @@
    */
 
   typedef unsigned int BignumInt;
-  #define BIGNUM_INT_BITS  32
+  #define BIGNUM_INT_BITS_BITS 5
   #define DEFINE_BIGNUMDBLINT typedef unsigned long BignumDblInt
 
 #else
@@ -155,15 +160,16 @@
    */
 
   typedef unsigned short BignumInt;
-  #define BIGNUM_INT_BITS  16
+  #define BIGNUM_INT_BITS_BITS 4
   #define DEFINE_BIGNUMDBLINT typedef unsigned long BignumDblInt
 
 #endif
 
 /*
- * Common code across all branches of that ifdef: define the three
- * easy constant macros in terms of BIGNUM_INT_BITS.
+ * Common code across all branches of that ifdef: define all the
+ * easy constant macros in terms of BIGNUM_INT_BITS_BITS.
  */
+#define BIGNUM_INT_BITS (1 << BIGNUM_INT_BITS_BITS)
 #define BIGNUM_INT_BYTES (BIGNUM_INT_BITS / 8)
 #define BIGNUM_TOP_BIT (((BignumInt)1) << (BIGNUM_INT_BITS-1))
 #define BIGNUM_INT_MASK (BIGNUM_TOP_BIT | (BIGNUM_TOP_BIT-1))
@@ -218,3 +224,58 @@
       } while (0)
 
 #endif /* DEFINE_BIGNUMDBLINT */
+
+/* ----------------------------------------------------------------------
+ * Data structures used inside bignum.c.
+ */
+
+struct mp_int {
+    size_t nw;
+    BignumInt *w;
+};
+
+struct MontyContext {
+    /*
+     * The actual modulus.
+     */
+    mp_int *m;
+
+    /*
+     * Montgomery multiplication works by selecting a value r > m,
+     * coprime to m, which is really easy to divide by. In binary
+     * arithmetic, that means making it a power of 2; in fact we make
+     * it a whole number of BignumInt.
+     *
+     * We don't store r directly as an mp_int (there's no need). But
+     * its value is 2^rbits; we also store rw = rbits/BIGNUM_INT_BITS
+     * (the corresponding word offset within an mp_int).
+     *
+     * pw is the number of words needed to store an mp_int you're
+     * doing reduction on: it has to be big enough to hold the sum of
+     * an input value up to m^2 plus an extra addend up to m*r.
+     */
+    size_t rbits, rw, pw;
+
+    /*
+     * The key step in Montgomery reduction requires the inverse of -m
+     * mod r.
+     */
+    mp_int *minus_minv_mod_r;
+
+    /*
+     * r^1, r^2 and r^3 mod m, which are used for various purposes.
+     *
+     * (Annoyingly, this is one of the rare cases where it would have
+     * been nicer to have a Pascal-style 1-indexed array. I couldn't
+     * _quite_ bring myself to put a gratuitous zero element in here.
+     * So you just have to live with getting r^k by taking the [k-1]th
+     * element of this array.)
+     */
+    mp_int *powers_of_r_mod_m[3];
+
+    /*
+     * Persistent scratch space from which monty_* functions can
+     * allocate storage for intermediate values.
+     */
+    mp_int *scratch;
+};
diff --git a/pageant.c b/pageant.c
index 3da719e9..ebd56b03 100644
--- a/pageant.c
+++ b/pageant.c
@@ -7,6 +7,7 @@
 #include <assert.h>
 
 #include "putty.h"
+#include "mpint.h"
 #include "ssh.h"
 #include "pageant.h"
 
@@ -41,37 +42,9 @@ static int cmpkeys_rsa(void *av, void *bv)
 {
     struct RSAKey *a = (struct RSAKey *) av;
     struct RSAKey *b = (struct RSAKey *) bv;
-    Bignum am, bm;
-    int alen, blen;
 
-    am = a->modulus;
-    bm = b->modulus;
-    /*
-     * Compare by length of moduli.
-     */
-    alen = bignum_bitcount(am);
-    blen = bignum_bitcount(bm);
-    if (alen > blen)
-	return +1;
-    else if (alen < blen)
-	return -1;
-    /*
-     * Now compare by moduli themselves.
-     */
-    alen = (alen + 7) / 8;	       /* byte count */
-    while (alen-- > 0) {
-	int abyte, bbyte;
-	abyte = bignum_byte(am, alen);
-	bbyte = bignum_byte(bm, alen);
-	if (abyte > bbyte)
-	    return +1;
-	else if (abyte < bbyte)
-	    return -1;
-    }
-    /*
-     * Give up.
-     */
-    return 0;
+    return ((int)mp_cmp_hs(a->modulus, b->modulus) -
+            (int)mp_cmp_hs(b->modulus, a->modulus));
 }
 
 /*
@@ -251,7 +224,7 @@ void pageant_handle_msg(BinarySink *bs,
 	 */
 	{
 	    struct RSAKey reqkey, *key;
-	    Bignum challenge, response;
+	    mp_int *challenge, *response;
             ptrlen session_id;
             unsigned response_type;
 	    unsigned char response_md5[16];
@@ -295,7 +268,7 @@ void pageant_handle_msg(BinarySink *bs,
 
 	    MD5Init(&md5c);
 	    for (i = 0; i < 32; i++)
-		put_byte(&md5c, bignum_byte(response, 31 - i));
+		put_byte(&md5c, mp_get_byte(response, 31 - i));
 	    put_data(&md5c, session_id.ptr, session_id.len);
 	    MD5Final(response_md5, &md5c);
 
@@ -306,8 +279,8 @@ void pageant_handle_msg(BinarySink *bs,
 
           challenge1_cleanup:
             if (response)
-                freebn(response);
-            freebn(challenge);
+                mp_free(response);
+            mp_free(challenge);
             freersakey(&reqkey);
 	}
 	break;
@@ -1275,7 +1248,7 @@ int pageant_add_keyfile(Filename *filename, const char *passphrase,
 
 	    request = strbuf_new_for_agent_query();
 	    put_byte(request, SSH1_AGENTC_ADD_RSA_IDENTITY);
-	    put_uint32(request, bignum_bitcount(rkey->modulus));
+	    put_uint32(request, mp_get_nbits(rkey->modulus));
 	    put_mp_ssh1(request, rkey->modulus);
 	    put_mp_ssh1(request, rkey->exponent);
 	    put_mp_ssh1(request, rkey->private_exponent);
diff --git a/ssh.h b/ssh.h
index ec5bbb4a..47feb92f 100644
--- a/ssh.h
+++ b/ssh.h
@@ -390,10 +390,6 @@ void ssh_user_close(Ssh *ssh, const char *fmt, ...);
 #define SSH_CIPHER_3DES		3
 #define SSH_CIPHER_BLOWFISH	6
 
-#ifndef BIGNUM_INTERNAL
-typedef void *Bignum;
-#endif
-
 typedef struct ssh_keyalg ssh_keyalg;
 typedef struct ssh_key {
     const struct ssh_keyalg *vt;
@@ -402,57 +398,52 @@ typedef struct ssh_key {
 struct RSAKey {
     int bits;
     int bytes;
-    Bignum modulus;
-    Bignum exponent;
-    Bignum private_exponent;
-    Bignum p;
-    Bignum q;
-    Bignum iqmp;
+    mp_int *modulus;
+    mp_int *exponent;
+    mp_int *private_exponent;
+    mp_int *p;
+    mp_int *q;
+    mp_int *iqmp;
     char *comment;
     ssh_key sshk;
 };
 
 struct dss_key {
-    Bignum p, q, g, y, x;
+    mp_int *p, *q, *g, *y, *x;
     ssh_key sshk;
 };
 
 struct ec_curve;
 
-struct ec_point {
-    const struct ec_curve *curve;
-    Bignum x, y;
-    Bignum z;  /* Jacobian denominator */
-    bool infinity;
-};
-
-/* A couple of ECC functions exported for use outside sshecc.c */
-struct ec_point *ecp_mul(const struct ec_point *a, const Bignum b);
-void ec_point_free(struct ec_point *point);
-
 /* Weierstrass form curve */
 struct ec_wcurve
 {
-    Bignum a, b, n;
-    struct ec_point G;
+    WeierstrassCurve *wc;
+    WeierstrassPoint *G;
+    mp_int *G_order;
 };
 
 /* Montgomery form curve */
 struct ec_mcurve
 {
-    Bignum a, b;
-    struct ec_point G;
+    MontgomeryCurve *mc;
+    MontgomeryPoint *G;
 };
 
 /* Edwards form curve */
 struct ec_ecurve
 {
-    Bignum l, d;
-    struct ec_point B;
+    EdwardsCurve *ec;
+    EdwardsPoint *G;
+    mp_int *G_order;
 };
 
+typedef enum EllipticCurveType {
+    EC_WEIERSTRASS, EC_MONTGOMERY, EC_EDWARDS
+} EllipticCurveType;
+
 struct ec_curve {
-    enum { EC_WEIERSTRASS, EC_MONTGOMERY, EC_EDWARDS } type;
+    EllipticCurveType type;
     /* 'name' is the identifier of the curve when it has to appear in
      * wire protocol encodings, as it does in e.g. the public key and
      * signature formats for NIST curves. Curves which do not format
@@ -461,8 +452,8 @@ struct ec_curve {
      * 'textname' is non-NULL for all curves, and is a human-readable
      * identification suitable for putting in log messages. */
     const char *name, *textname;
-    unsigned int fieldBits;
-    Bignum p;
+    size_t fieldBits, fieldBytes;
+    mp_int *p;
     union {
         struct ec_wcurve w;
         struct ec_mcurve m;
@@ -481,13 +472,21 @@ bool ec_ed_alg_and_curve_by_bits(int bits,
                                  const struct ec_curve **curve,
                                  const ssh_keyalg **alg);
 
-struct ec_key {
-    struct ec_point publicKey;
-    Bignum privateKey;
+struct ecdsa_key {
+    const struct ec_curve *curve;
+    WeierstrassPoint *publicKey;
+    mp_int *privateKey;
+    ssh_key sshk;
+};
+struct eddsa_key {
+    const struct ec_curve *curve;
+    EdwardsPoint *publicKey;
+    mp_int *privateKey;
     ssh_key sshk;
 };
 
-struct ec_point *ec_public(const Bignum privateKey, const struct ec_curve *curve);
+WeierstrassPoint *ecdsa_public(mp_int *private_key, const ssh_keyalg *alg);
+EdwardsPoint *eddsa_public(mp_int *private_key, const ssh_keyalg *alg);
 
 /*
  * SSH-1 never quite decided which order to store the two components
@@ -504,8 +503,9 @@ void BinarySource_get_rsa_ssh1_pub(
 void BinarySource_get_rsa_ssh1_priv(
     BinarySource *src, struct RSAKey *rsa);
 bool rsa_ssh1_encrypt(unsigned char *data, int length, struct RSAKey *key);
-Bignum rsa_ssh1_decrypt(Bignum input, struct RSAKey *key);
-bool rsa_ssh1_decrypt_pkcs1(Bignum input, struct RSAKey *key, strbuf *outbuf);
+mp_int *rsa_ssh1_decrypt(mp_int *input, struct RSAKey *key);
+bool rsa_ssh1_decrypt_pkcs1(mp_int *input, struct RSAKey *key,
+                            strbuf *outbuf);
 char *rsastr_fmt(struct RSAKey *key);
 char *rsa_ssh1_fingerprint(struct RSAKey *key);
 bool rsa_verify(struct RSAKey *key);
@@ -538,25 +538,26 @@ int ssh_rsakex_klen(struct RSAKey *key);
 void ssh_rsakex_encrypt(const struct ssh_hashalg *h,
                         unsigned char *in, int inlen,
                         unsigned char *out, int outlen, struct RSAKey *key);
-Bignum ssh_rsakex_decrypt(const struct ssh_hashalg *h, ptrlen ciphertext,
-                          struct RSAKey *rsa);
+mp_int *ssh_rsakex_decrypt(const struct ssh_hashalg *h, ptrlen ciphertext,
+                              struct RSAKey *rsa);
 
 /*
  * SSH2 ECDH key exchange functions
  */
 struct ssh_kex;
+typedef struct ecdh_key ecdh_key;
 const char *ssh_ecdhkex_curve_textname(const struct ssh_kex *kex);
-struct ec_key *ssh_ecdhkex_newkey(const struct ssh_kex *kex);
-void ssh_ecdhkex_freekey(struct ec_key *key);
-void ssh_ecdhkex_getpublic(struct ec_key *key, BinarySink *bs);
-Bignum ssh_ecdhkex_getkey(struct ec_key *key,
-                          const void *remoteKey, int remoteKeyLen);
+ecdh_key *ssh_ecdhkex_newkey(const struct ssh_kex *kex);
+void ssh_ecdhkex_freekey(ecdh_key *key);
+void ssh_ecdhkex_getpublic(ecdh_key *key, BinarySink *bs);
+mp_int *ssh_ecdhkex_getkey(ecdh_key *key, ptrlen remoteKey);
 
 /*
  * Helper function for k generation in DSA, reused in ECDSA
  */
-Bignum *dss_gen_k(const char *id_string, Bignum modulus, Bignum private_key,
-                  unsigned char *digest, int digest_len);
+mp_int *dss_gen_k(const char *id_string,
+                     mp_int *modulus, mp_int *private_key,
+                     unsigned char *digest, int digest_len);
 
 struct ssh2_cipheralg;
 typedef struct ssh2_cipher {
@@ -740,14 +741,14 @@ typedef struct ssh_hash {
     BinarySink_DELEGATE_IMPLEMENTATION;
 } ssh_hash;
 
-struct ssh_hashalg {
+typedef struct ssh_hashalg {
     ssh_hash *(*new)(const struct ssh_hashalg *alg);
     ssh_hash *(*copy)(ssh_hash *);
     void (*final)(ssh_hash *, unsigned char *); /* ALSO FREES THE ssh_hash! */
     void (*free)(ssh_hash *);
     int hlen; /* output length in bytes */
     const char *text_name;
-};   
+} ssh_hashalg;
 
 #define ssh_hash_new(alg) ((alg)->new(alg))
 #define ssh_hash_copy(ctx) ((ctx)->vt->copy(ctx))
@@ -1053,58 +1054,15 @@ void *x11_dehexify(ptrlen hex, int *outlen);
 
 Channel *agentf_new(SshChannel *c);
 
-Bignum copybn(Bignum b);
-Bignum bn_power_2(int n);
-void bn_restore_invariant(Bignum b);
-Bignum bignum_from_long(unsigned long n);
-void freebn(Bignum b);
-Bignum modpow(Bignum base, Bignum exp, Bignum mod);
-Bignum modmul(Bignum a, Bignum b, Bignum mod);
-Bignum modsub(const Bignum a, const Bignum b, const Bignum n);
-void decbn(Bignum n);
-extern Bignum Zero, One;
-Bignum bignum_from_bytes(const void *data, int nbytes);
-Bignum bignum_from_bytes_le(const void *data, int nbytes);
-Bignum bignum_random_in_range(const Bignum lower, const Bignum upper);
-int bignum_bitcount(Bignum bn);
-int bignum_byte(Bignum bn, int i);
-int bignum_bit(Bignum bn, int i);
-void bignum_set_bit(Bignum bn, int i, int value);
-Bignum biggcd(Bignum a, Bignum b);
-unsigned short bignum_mod_short(Bignum number, unsigned short modulus);
-Bignum bignum_add_long(Bignum number, unsigned long addend);
-Bignum bigadd(Bignum a, Bignum b);
-Bignum bigsub(Bignum a, Bignum b);
-Bignum bigmul(Bignum a, Bignum b);
-Bignum bigmuladd(Bignum a, Bignum b, Bignum addend);
-Bignum bigdiv(Bignum a, Bignum b);
-Bignum bigmod(Bignum a, Bignum b);
-Bignum modinv(Bignum number, Bignum modulus);
-Bignum bignum_bitmask(Bignum number);
-Bignum bignum_rshift(Bignum number, int shift);
-Bignum bignum_lshift(Bignum number, int shift);
-int bignum_cmp(Bignum a, Bignum b);
-char *bignum_decimal(Bignum x);
-Bignum bignum_from_decimal(const char *decimal);
-
-void BinarySink_put_mp_ssh1(BinarySink *, Bignum);
-void BinarySink_put_mp_ssh2(BinarySink *, Bignum);
-Bignum BinarySource_get_mp_ssh1(BinarySource *);
-Bignum BinarySource_get_mp_ssh2(BinarySource *);
-
-#ifdef DEBUG
-void diagbn(char *prefix, Bignum md);
-#endif
-
 bool dh_is_gex(const struct ssh_kex *kex);
 struct dh_ctx;
 struct dh_ctx *dh_setup_group(const struct ssh_kex *kex);
-struct dh_ctx *dh_setup_gex(Bignum pval, Bignum gval);
+struct dh_ctx *dh_setup_gex(mp_int *pval, mp_int *gval);
 int dh_modulus_bit_size(const struct dh_ctx *ctx);
 void dh_cleanup(struct dh_ctx *);
-Bignum dh_create_e(struct dh_ctx *, int nbits);
-const char *dh_validate_f(struct dh_ctx *, Bignum f);
-Bignum dh_find_K(struct dh_ctx *, Bignum f);
+mp_int *dh_create_e(struct dh_ctx *, int nbits);
+const char *dh_validate_f(struct dh_ctx *, mp_int *f);
+mp_int *dh_find_K(struct dh_ctx *, mp_int *f);
 
 bool rsa_ssh1_encrypted(const Filename *filename, char **comment);
 int rsa_ssh1_loadpub(const Filename *filename, BinarySink *bs,
@@ -1114,6 +1072,14 @@ int rsa_ssh1_loadkey(const Filename *filename, struct RSAKey *key,
 bool rsa_ssh1_savekey(const Filename *filename, struct RSAKey *key,
                       char *passphrase);
 
+static inline bool is_base64_char(char c)
+{
+    return ((c >= '0' && c <= '9') ||
+            (c >= 'a' && c <= 'z') ||
+            (c >= 'A' && c <= 'Z') ||
+            c == '+' || c == '/' || c == '=');
+}
+
 extern int base64_decode_atom(const char *atom, unsigned char *out);
 extern int base64_lines(int datalen);
 extern void base64_encode_atom(const unsigned char *data, int n, char *out);
@@ -1233,12 +1199,13 @@ int rsa_generate(struct RSAKey *key, int bits, progfn_t pfn,
 		 void *pfnparam);
 int dsa_generate(struct dss_key *key, int bits, progfn_t pfn,
 		 void *pfnparam);
-int ec_generate(struct ec_key *key, int bits, progfn_t pfn,
-                void *pfnparam);
-int ec_edgenerate(struct ec_key *key, int bits, progfn_t pfn,
-                  void *pfnparam);
-Bignum primegen(int bits, int modulus, int residue, Bignum factor,
-		int phase, progfn_t pfn, void *pfnparam, unsigned firstbits);
+int ecdsa_generate(struct ecdsa_key *key, int bits, progfn_t pfn,
+                   void *pfnparam);
+int eddsa_generate(struct eddsa_key *key, int bits, progfn_t pfn,
+                   void *pfnparam);
+mp_int *primegen(
+    int bits, int modulus, int residue, mp_int *factor,
+    int phase, progfn_t pfn, void *pfnparam, unsigned firstbits);
 void invent_firstbits(unsigned *one, unsigned *two);
 
 /*
diff --git a/ssh1login-server.c b/ssh1login-server.c
index 1dd374a7..1100d169 100644
--- a/ssh1login-server.c
+++ b/ssh1login-server.c
@@ -5,6 +5,7 @@
 #include <assert.h>
 
 #include "putty.h"
+#include "mpint.h"
 #include "ssh.h"
 #include "sshbpp.h"
 #include "sshppl.h"
@@ -29,7 +30,7 @@ struct ssh1_login_server_state {
 
     struct RSAKey *servkey, *hostkey;
     bool servkey_generated_here;
-    Bignum sesskey;
+    mp_int *sesskey;
 
     AuthPolicy *authpolicy;
     unsigned ap_methods, current_method;
@@ -206,8 +207,8 @@ static void ssh1_login_server_process_queue(PacketProtocolLayer *ppl)
         struct RSAKey *smaller, *larger;
         strbuf *data = strbuf_new();
 
-        if (bignum_bitcount(s->hostkey->modulus) >
-            bignum_bitcount(s->servkey->modulus)) {
+        if (mp_get_nbits(s->hostkey->modulus) >
+            mp_get_nbits(s->servkey->modulus)) {
             larger = s->hostkey;
             smaller = s->servkey;
         } else {
@@ -216,13 +217,13 @@ static void ssh1_login_server_process_queue(PacketProtocolLayer *ppl)
         }
 
         if (rsa_ssh1_decrypt_pkcs1(s->sesskey, larger, data)) {
-            freebn(s->sesskey);
-            s->sesskey = bignum_from_bytes(data->u, data->len);
+            mp_free(s->sesskey);
+            s->sesskey = mp_from_bytes_be(ptrlen_from_strbuf(data));
             data->len = 0;
             if (rsa_ssh1_decrypt_pkcs1(s->sesskey, smaller, data) &&
                 data->len == sizeof(s->session_key)) {
                 memcpy(s->session_key, data->u, sizeof(s->session_key));
-                freebn(s->sesskey);
+                mp_free(s->sesskey);
                 s->sesskey = NULL;     /* indicates success */
             }
         }
@@ -288,10 +289,10 @@ static void ssh1_login_server_process_queue(PacketProtocolLayer *ppl)
                 continue;
 
             {
-                Bignum modulus = get_mp_ssh1(pktin);
+                mp_int *modulus = get_mp_ssh1(pktin);
                 s->authkey = auth_publickey_ssh1(
                     s->authpolicy, s->username, modulus);
-                freebn(modulus);
+                mp_free(modulus);
             }
 
             if (!s->authkey)
@@ -321,7 +322,8 @@ static void ssh1_login_server_process_queue(PacketProtocolLayer *ppl)
                     continue;
                 }
 
-                Bignum bn = bignum_from_bytes(rsabuf, s->authkey->bytes);
+                mp_int *bn = mp_from_bytes_be(
+                    make_ptrlen(rsabuf, s->authkey->bytes));
                 smemclr(rsabuf, s->authkey->bytes);
                 sfree(rsabuf);
 
@@ -330,7 +332,7 @@ static void ssh1_login_server_process_queue(PacketProtocolLayer *ppl)
                 put_mp_ssh1(pktout, bn);
                 pq_push(s->ppl.out_pq, pktout);
 
-                freebn(bn);
+                mp_free(bn);
             }
 
             crMaybeWaitUntilV((pktin = ssh1_login_server_pop(s)) != NULL);
diff --git a/ssh1login.c b/ssh1login.c
index e5308d0b..790ffa90 100644
--- a/ssh1login.c
+++ b/ssh1login.c
@@ -7,6 +7,7 @@
 
 #include "putty.h"
 #include "ssh.h"
+#include "mpint.h"
 #include "sshbpp.h"
 #include "sshppl.h"
 #include "sshcr.h"
@@ -49,7 +50,7 @@ struct ssh1_login_state {
     int keyi, nkeys;
     bool authed;
     struct RSAKey key;
-    Bignum challenge;
+    mp_int *challenge;
     ptrlen comment;
     int dlgret;
     Filename *keyfile;
@@ -537,7 +538,7 @@ static void ssh1_login_process_queue(PacketProtocolLayer *ppl)
                     ppl_logevent("Received RSA challenge");
                     s->challenge = get_mp_ssh1(pktin);
                     if (get_err(pktin)) {
-                        freebn(s->challenge);
+                        mp_free(s->challenge);
                         ssh_proto_error(s->ppl.ssh, "Server's RSA challenge "
                                         "was badly formatted");
                         return;
@@ -549,7 +550,7 @@ static void ssh1_login_process_queue(PacketProtocolLayer *ppl)
 
                         agentreq = strbuf_new_for_agent_query();
                         put_byte(agentreq, SSH1_AGENTC_RSA_CHALLENGE);
-                        put_uint32(agentreq, bignum_bitcount(s->key.modulus));
+                        put_uint32(agentreq, mp_get_nbits(s->key.modulus));
                         put_mp_ssh1(agentreq, s->key.exponent);
                         put_mp_ssh1(agentreq, s->key.modulus);
                         put_mp_ssh1(agentreq, s->challenge);
@@ -594,9 +595,9 @@ static void ssh1_login_process_queue(PacketProtocolLayer *ppl)
                             ppl_logevent("No reply received from Pageant");
                         }
                     }
-                    freebn(s->key.exponent);
-                    freebn(s->key.modulus);
-                    freebn(s->challenge);
+                    mp_free(s->key.exponent);
+                    mp_free(s->key.modulus);
+                    mp_free(s->challenge);
                     if (s->authed)
                         break;
                 }
@@ -719,11 +720,11 @@ static void ssh1_login_process_queue(PacketProtocolLayer *ppl)
                 {
                     int i;
                     unsigned char buffer[32];
-                    Bignum challenge, response;
+                    mp_int *challenge, *response;
 
                     challenge = get_mp_ssh1(pktin);
                     if (get_err(pktin)) {
-                        freebn(challenge);
+                        mp_free(challenge);
                         ssh_proto_error(s->ppl.ssh, "Server's RSA challenge "
                                         "was badly formatted");
                         return;
@@ -732,7 +733,7 @@ static void ssh1_login_process_queue(PacketProtocolLayer *ppl)
                     freersapriv(&s->key);   /* burn the evidence */
 
                     for (i = 0; i < 32; i++) {
-                        buffer[i] = bignum_byte(response, 31 - i);
+                        buffer[i] = mp_get_byte(response, 31 - i);
                     }
 
                     {
@@ -748,8 +749,8 @@ static void ssh1_login_process_queue(PacketProtocolLayer *ppl)
                     put_data(pkt, buffer, 16);
                     pq_push(s->ppl.out_pq, pkt);
 
-                    freebn(challenge);
-                    freebn(response);
+                    mp_free(challenge);
+                    mp_free(response);
                 }
 
                 crMaybeWaitUntilV((pktin = ssh1_login_pop(s))
diff --git a/ssh2kex-client.c b/ssh2kex-client.c
index 0a17c0fd..865397df 100644
--- a/ssh2kex-client.c
+++ b/ssh2kex-client.c
@@ -11,6 +11,7 @@
 #include "sshcr.h"
 #include "storage.h"
 #include "ssh2transport.h"
+#include "mpint.h"
 
 void ssh2kex_coroutine(struct ssh2_transport_state *s, bool *aborted)
 {
@@ -170,10 +171,10 @@ void ssh2kex_coroutine(struct ssh2_transport_state *s, bool *aborted)
 
         dh_cleanup(s->dh_ctx);
         s->dh_ctx = NULL;
-        freebn(s->f); s->f = NULL;
+        mp_free(s->f); s->f = NULL;
         if (dh_is_gex(s->kex_alg)) {
-            freebn(s->g); s->g = NULL;
-            freebn(s->p); s->p = NULL;
+            mp_free(s->g); s->g = NULL;
+            mp_free(s->p); s->p = NULL;
         }
     } else if (s->kex_alg->main_type == KEXTYPE_ECDH) {
 
@@ -223,7 +224,7 @@ void ssh2kex_coroutine(struct ssh2_transport_state *s, bool *aborted)
         {
             ptrlen keydata = get_string(pktin);
             put_stringpl(s->exhash, keydata);
-            s->K = ssh_ecdhkex_getkey(s->ecdh_key, keydata.ptr, keydata.len);
+            s->K = ssh_ecdhkex_getkey(s->ecdh_key, keydata);
             if (!get_err(pktin) && !s->K) {
                 ssh_proto_error(s->ppl.ssh, "Received invalid elliptic curve "
                                 "point in ECDH reply");
@@ -501,10 +502,10 @@ void ssh2kex_coroutine(struct ssh2_transport_state *s, bool *aborted)
 
         dh_cleanup(s->dh_ctx);
         s->dh_ctx = NULL;
-        freebn(s->f); s->f = NULL;
+        mp_free(s->f); s->f = NULL;
         if (dh_is_gex(s->kex_alg)) {
-            freebn(s->g); s->g = NULL;
-            freebn(s->p); s->p = NULL;
+            mp_free(s->g); s->g = NULL;
+            mp_free(s->p); s->p = NULL;
         }
 #endif
     } else {
@@ -560,13 +561,13 @@ void ssh2kex_coroutine(struct ssh2_transport_state *s, bool *aborted)
             unsigned char *outstr;
             int outstrlen;
 
-            s->K = bn_power_2(nbits - 1);
+            s->K = mp_power_2(nbits - 1);
 
             for (i = 0; i < nbits; i++) {
                 if ((i & 7) == 0) {
                     byte = random_byte();
                 }
-                bignum_set_bit(s->K, i, (byte >> (i & 7)) & 1);
+                mp_set_bit(s->K, i, (byte >> (i & 7)) & 1);
             }
 
             /*
diff --git a/ssh2kex-server.c b/ssh2kex-server.c
index 0b94c996..22b26961 100644
--- a/ssh2kex-server.c
+++ b/ssh2kex-server.c
@@ -11,6 +11,7 @@
 #include "sshcr.h"
 #include "storage.h"
 #include "ssh2transport.h"
+#include "mpint.h"
 
 void ssh2_transport_provide_hostkeys(PacketProtocolLayer *ppl,
                                      ssh_key *const *hostkeys, int nhostkeys)
@@ -98,7 +99,7 @@ void ssh2kex_coroutine(struct ssh2_transport_state *s, bool *aborted)
              * but not for serious use.
              */
             s->p = primegen(s->pbits, 2, 2, NULL, 1, no_progress, NULL, 1);
-            s->g = bignum_from_long(2);
+            s->g = mp_from_integer(2);
             s->dh_ctx = dh_setup_gex(s->p, s->g);
             s->kex_init_value = SSH2_MSG_KEX_DH_GEX_INIT;
             s->kex_reply_value = SSH2_MSG_KEX_DH_GEX_REPLY;
@@ -177,10 +178,10 @@ void ssh2kex_coroutine(struct ssh2_transport_state *s, bool *aborted)
 
         dh_cleanup(s->dh_ctx);
         s->dh_ctx = NULL;
-        freebn(s->f); s->f = NULL;
+        mp_free(s->f); s->f = NULL;
         if (dh_is_gex(s->kex_alg)) {
-            freebn(s->g); s->g = NULL;
-            freebn(s->p); s->p = NULL;
+            mp_free(s->g); s->g = NULL;
+            mp_free(s->p); s->p = NULL;
         }
     } else if (s->kex_alg->main_type == KEXTYPE_ECDH) {
         ppl_logevent("Doing ECDH key exchange with curve %s and hash %s",
@@ -211,7 +212,7 @@ void ssh2kex_coroutine(struct ssh2_transport_state *s, bool *aborted)
             ptrlen keydata = get_string(pktin);
             put_stringpl(s->exhash, keydata);
 
-            s->K = ssh_ecdhkex_getkey(s->ecdh_key, keydata.ptr, keydata.len);
+            s->K = ssh_ecdhkex_getkey(s->ecdh_key, keydata);
             if (!get_err(pktin) && !s->K) {
                 ssh_proto_error(s->ppl.ssh, "Received invalid elliptic curve "
                                 "point in ECDH initial packet");
diff --git a/ssh2transport.c b/ssh2transport.c
index 4393d7ef..ab813ac2 100644
--- a/ssh2transport.c
+++ b/ssh2transport.c
@@ -11,6 +11,7 @@
 #include "sshcr.h"
 #include "storage.h"
 #include "ssh2transport.h"
+#include "mpint.h"
 
 const struct ssh_signkey_with_user_pref_id ssh2_hostkey_algs[] = {
     #define ARRAYENT_HOSTKEY_ALGORITHM(type, alg) { &alg, type },
@@ -200,10 +201,10 @@ static void ssh2_transport_free(PacketProtocolLayer *ppl)
         ssh_key_free(s->hkey);
         s->hkey = NULL;
     }
-    if (s->f) freebn(s->f);
-    if (s->p) freebn(s->p);
-    if (s->g) freebn(s->g);
-    if (s->K) freebn(s->K);
+    if (s->f) mp_free(s->f);
+    if (s->p) mp_free(s->p);
+    if (s->g) mp_free(s->g);
+    if (s->K) mp_free(s->K);
     if (s->dh_ctx)
         dh_cleanup(s->dh_ctx);
     if (s->rsa_kex_key)
@@ -225,7 +226,7 @@ static void ssh2_transport_free(PacketProtocolLayer *ppl)
  */
 static void ssh2_mkkey(
     struct ssh2_transport_state *s, strbuf *out,
-    Bignum K, unsigned char *H, char chr, int keylen)
+    mp_int *K, unsigned char *H, char chr, int keylen)
 {
     int hlen = s->kex_alg->hash->hlen;
     int keylen_padded;
@@ -1365,7 +1366,7 @@ static void ssh2_transport_process_queue(PacketProtocolLayer *ppl)
     /*
      * Free shared secret.
      */
-    freebn(s->K); s->K = NULL;
+    mp_free(s->K); s->K = NULL;
 
     /*
      * Update the specials menu to list the remaining uncertified host
diff --git a/ssh2transport.h b/ssh2transport.h
index 2f527e69..6b80b6cb 100644
--- a/ssh2transport.h
+++ b/ssh2transport.h
@@ -166,7 +166,7 @@ struct ssh2_transport_state {
 
     int nbits, pbits;
     bool warn_kex, warn_hk, warn_cscipher, warn_sccipher;
-    Bignum p, g, e, f, K;
+    mp_int *p, *g, *e, *f, *K;
     strbuf *outgoing_kexinit, *incoming_kexinit;
     strbuf *client_kexinit, *server_kexinit; /* aliases to the above */
     int kex_init_value, kex_reply_value;
@@ -176,7 +176,7 @@ struct ssh2_transport_state {
     char *keystr, *fingerprint;
     ssh_key *hkey;                     /* actual host key */
     struct RSAKey *rsa_kex_key;             /* for RSA kex */
-    struct ec_key *ecdh_key;              /* for ECDH kex */
+    ecdh_key *ecdh_key;                     /* for ECDH kex */
     unsigned char exchange_hash[SSH2_KEX_MAX_HASH_LEN];
     bool can_gssapi_keyex;
     bool need_gss_transient_hostkey;
diff --git a/sshbn.c b/sshbn.c
deleted file mode 100644
index c3cb5544..00000000
--- a/sshbn.c
+++ /dev/null
@@ -1,2180 +0,0 @@
-/*
- * Bignum routines for RSA and DH and stuff.
- */
-
-#include <stdio.h>
-#include <assert.h>
-#include <stdlib.h>
-#include <string.h>
-#include <limits.h>
-#include <ctype.h>
-
-#include "misc.h"
-
-#include "sshbn.h"
-
-#define BIGNUM_INTERNAL
-typedef BignumInt *Bignum;
-
-#include "ssh.h"
-#include "marshal.h"
-
-BignumInt bnZero[1] = { 0 };
-BignumInt bnOne[2] = { 1, 1 };
-BignumInt bnTen[2] = { 1, 10 };
-
-/*
- * The Bignum format is an array of `BignumInt'. The first
- * element of the array counts the remaining elements. The
- * remaining elements express the actual number, base 2^BIGNUM_INT_BITS, _least_
- * significant digit first. (So it's trivial to extract the bit
- * with value 2^n for any n.)
- *
- * All Bignums in this module are positive. Negative numbers must
- * be dealt with outside it.
- *
- * INVARIANT: the most significant word of any Bignum must be
- * nonzero.
- */
-
-Bignum Zero = bnZero, One = bnOne, Ten = bnTen;
-
-static Bignum newbn(int length)
-{
-    Bignum b;
-
-    assert(length >= 0 && length < INT_MAX / BIGNUM_INT_BITS);
-
-    b = snewn(length + 1, BignumInt);
-    memset(b, 0, (length + 1) * sizeof(*b));
-    b[0] = length;
-    return b;
-}
-
-void bn_restore_invariant(Bignum b)
-{
-    while (b[0] > 1 && b[b[0]] == 0)
-	b[0]--;
-}
-
-Bignum copybn(Bignum orig)
-{
-    Bignum b = snewn(orig[0] + 1, BignumInt);
-    if (!b)
-	abort();		       /* FIXME */
-    memcpy(b, orig, (orig[0] + 1) * sizeof(*b));
-    return b;
-}
-
-void freebn(Bignum b)
-{
-    /*
-     * Burn the evidence, just in case.
-     */
-    smemclr(b, sizeof(b[0]) * (b[0] + 1));
-    sfree(b);
-}
-
-Bignum bn_power_2(int n)
-{
-    Bignum ret;
-
-    assert(n >= 0);
-
-    ret = newbn(n / BIGNUM_INT_BITS + 1);
-    bignum_set_bit(ret, n, 1);
-    return ret;
-}
-
-/*
- * Internal addition. Sets c = a - b, where 'a', 'b' and 'c' are all
- * big-endian arrays of 'len' BignumInts. Returns the carry off the
- * top.
- */
-static BignumCarry internal_add(const BignumInt *a, const BignumInt *b,
-                                BignumInt *c, int len)
-{
-    int i;
-    BignumCarry carry = 0;
-
-    for (i = len-1; i >= 0; i--)
-        BignumADC(c[i], carry, a[i], b[i], carry);
-
-    return (BignumInt)carry;
-}
-
-/*
- * Internal subtraction. Sets c = a - b, where 'a', 'b' and 'c' are
- * all big-endian arrays of 'len' BignumInts. Any borrow from the top
- * is ignored.
- */
-static void internal_sub(const BignumInt *a, const BignumInt *b,
-                         BignumInt *c, int len)
-{
-    int i;
-    BignumCarry carry = 1;
-
-    for (i = len-1; i >= 0; i--)
-        BignumADC(c[i], carry, a[i], ~b[i], carry);
-}
-
-/*
- * Compute c = a * b.
- * Input is in the first len words of a and b.
- * Result is returned in the first 2*len words of c.
- *
- * 'scratch' must point to an array of BignumInt of size at least
- * mul_compute_scratch(len). (This covers the needs of internal_mul
- * and all its recursive calls to itself.)
- */
-#define KARATSUBA_THRESHOLD 50
-static int mul_compute_scratch(int len)
-{
-    int ret = 0;
-    while (len > KARATSUBA_THRESHOLD) {
-        int toplen = len/2, botlen = len - toplen; /* botlen is the bigger */
-        int midlen = botlen + 1;
-        ret += 4*midlen;
-        len = midlen;
-    }
-    return ret;
-}
-static void internal_mul(const BignumInt *a, const BignumInt *b,
-			 BignumInt *c, int len, BignumInt *scratch)
-{
-    if (len > KARATSUBA_THRESHOLD) {
-        int i;
-
-        /*
-         * Karatsuba divide-and-conquer algorithm. Cut each input in
-         * half, so that it's expressed as two big 'digits' in a giant
-         * base D:
-         *
-         *   a = a_1 D + a_0
-         *   b = b_1 D + b_0
-         *
-         * Then the product is of course
-         *
-         *  ab = a_1 b_1 D^2 + (a_1 b_0 + a_0 b_1) D + a_0 b_0
-         *
-         * and we compute the three coefficients by recursively
-         * calling ourself to do half-length multiplications.
-         *
-         * The clever bit that makes this worth doing is that we only
-         * need _one_ half-length multiplication for the central
-         * coefficient rather than the two that it obviouly looks
-         * like, because we can use a single multiplication to compute
-         *
-         *   (a_1 + a_0) (b_1 + b_0) = a_1 b_1 + a_1 b_0 + a_0 b_1 + a_0 b_0
-         *
-         * and then we subtract the other two coefficients (a_1 b_1
-         * and a_0 b_0) which we were computing anyway.
-         *
-         * Hence we get to multiply two numbers of length N in about
-         * three times as much work as it takes to multiply numbers of
-         * length N/2, which is obviously better than the four times
-         * as much work it would take if we just did a long
-         * conventional multiply.
-         */
-
-        int toplen = len/2, botlen = len - toplen; /* botlen is the bigger */
-        int midlen = botlen + 1;
-        BignumCarry carry;
-#ifdef KARA_DEBUG
-        int i;
-#endif
-
-        /*
-         * The coefficients a_1 b_1 and a_0 b_0 just avoid overlapping
-         * in the output array, so we can compute them immediately in
-         * place.
-         */
-
-#ifdef KARA_DEBUG
-        printf("a1,a0 = 0x");
-        for (i = 0; i < len; i++) {
-            if (i == toplen) printf(", 0x");
-            printf("%0*x", BIGNUM_INT_BITS/4, a[i]);
-        }
-        printf("\n");
-        printf("b1,b0 = 0x");
-        for (i = 0; i < len; i++) {
-            if (i == toplen) printf(", 0x");
-            printf("%0*x", BIGNUM_INT_BITS/4, b[i]);
-        }
-        printf("\n");
-#endif
-
-        /* a_1 b_1 */
-        internal_mul(a, b, c, toplen, scratch);
-#ifdef KARA_DEBUG
-        printf("a1b1 = 0x");
-        for (i = 0; i < 2*toplen; i++) {
-            printf("%0*x", BIGNUM_INT_BITS/4, c[i]);
-        }
-        printf("\n");
-#endif
-
-        /* a_0 b_0 */
-        internal_mul(a + toplen, b + toplen, c + 2*toplen, botlen, scratch);
-#ifdef KARA_DEBUG
-        printf("a0b0 = 0x");
-        for (i = 0; i < 2*botlen; i++) {
-            printf("%0*x", BIGNUM_INT_BITS/4, c[2*toplen+i]);
-        }
-        printf("\n");
-#endif
-
-        /* Zero padding. midlen exceeds toplen by at most 2, so just
-         * zero the first two words of each input and the rest will be
-         * copied over. */
-        scratch[0] = scratch[1] = scratch[midlen] = scratch[midlen+1] = 0;
-
-        for (i = 0; i < toplen; i++) {
-            scratch[midlen - toplen + i] = a[i]; /* a_1 */
-            scratch[2*midlen - toplen + i] = b[i]; /* b_1 */
-        }
-
-        /* compute a_1 + a_0 */
-        scratch[0] = internal_add(scratch+1, a+toplen, scratch+1, botlen);
-#ifdef KARA_DEBUG
-        printf("a1plusa0 = 0x");
-        for (i = 0; i < midlen; i++) {
-            printf("%0*x", BIGNUM_INT_BITS/4, scratch[i]);
-        }
-        printf("\n");
-#endif
-        /* compute b_1 + b_0 */
-        scratch[midlen] = internal_add(scratch+midlen+1, b+toplen,
-                                       scratch+midlen+1, botlen);
-#ifdef KARA_DEBUG
-        printf("b1plusb0 = 0x");
-        for (i = 0; i < midlen; i++) {
-            printf("%0*x", BIGNUM_INT_BITS/4, scratch[midlen+i]);
-        }
-        printf("\n");
-#endif
-
-        /*
-         * Now we can do the third multiplication.
-         */
-        internal_mul(scratch, scratch + midlen, scratch + 2*midlen, midlen,
-                     scratch + 4*midlen);
-#ifdef KARA_DEBUG
-        printf("a1plusa0timesb1plusb0 = 0x");
-        for (i = 0; i < 2*midlen; i++) {
-            printf("%0*x", BIGNUM_INT_BITS/4, scratch[2*midlen+i]);
-        }
-        printf("\n");
-#endif
-
-        /*
-         * Now we can reuse the first half of 'scratch' to compute the
-         * sum of the outer two coefficients, to subtract from that
-         * product to obtain the middle one.
-         */
-        scratch[0] = scratch[1] = scratch[2] = scratch[3] = 0;
-        for (i = 0; i < 2*toplen; i++)
-            scratch[2*midlen - 2*toplen + i] = c[i];
-        scratch[1] = internal_add(scratch+2, c + 2*toplen,
-                                  scratch+2, 2*botlen);
-#ifdef KARA_DEBUG
-        printf("a1b1plusa0b0 = 0x");
-        for (i = 0; i < 2*midlen; i++) {
-            printf("%0*x", BIGNUM_INT_BITS/4, scratch[i]);
-        }
-        printf("\n");
-#endif
-
-        internal_sub(scratch + 2*midlen, scratch,
-                     scratch + 2*midlen, 2*midlen);
-#ifdef KARA_DEBUG
-        printf("a1b0plusa0b1 = 0x");
-        for (i = 0; i < 2*midlen; i++) {
-            printf("%0*x", BIGNUM_INT_BITS/4, scratch[2*midlen+i]);
-        }
-        printf("\n");
-#endif
-
-        /*
-         * And now all we need to do is to add that middle coefficient
-         * back into the output. We may have to propagate a carry
-         * further up the output, but we can be sure it won't
-         * propagate right the way off the top.
-         */
-        carry = internal_add(c + 2*len - botlen - 2*midlen,
-                             scratch + 2*midlen,
-                             c + 2*len - botlen - 2*midlen, 2*midlen);
-        i = 2*len - botlen - 2*midlen - 1;
-        while (carry) {
-            assert(i >= 0);
-            BignumADC(c[i], carry, c[i], 0, carry);
-            i--;
-        }
-#ifdef KARA_DEBUG
-        printf("ab = 0x");
-        for (i = 0; i < 2*len; i++) {
-            printf("%0*x", BIGNUM_INT_BITS/4, c[i]);
-        }
-        printf("\n");
-#endif
-
-    } else {
-        int i;
-        BignumInt carry;
-        const BignumInt *ap, *bp;
-        BignumInt *cp, *cps;
-
-        /*
-         * Multiply in the ordinary O(N^2) way.
-         */
-
-        for (i = 0; i < 2 * len; i++)
-            c[i] = 0;
-
-        for (cps = c + 2*len, ap = a + len; ap-- > a; cps--) {
-            carry = 0;
-            for (cp = cps, bp = b + len; cp--, bp-- > b ;)
-                BignumMULADD2(carry, *cp, *ap, *bp, *cp, carry);
-            *cp = carry;
-        }
-    }
-}
-
-/*
- * Variant form of internal_mul used for the initial step of
- * Montgomery reduction. Only bothers outputting 'len' words
- * (everything above that is thrown away).
- */
-static void internal_mul_low(const BignumInt *a, const BignumInt *b,
-                             BignumInt *c, int len, BignumInt *scratch)
-{
-    if (len > KARATSUBA_THRESHOLD) {
-        int i;
-
-        /*
-         * Karatsuba-aware version of internal_mul_low. As before, we
-         * express each input value as a shifted combination of two
-         * halves:
-         *
-         *   a = a_1 D + a_0
-         *   b = b_1 D + b_0
-         *
-         * Then the full product is, as before,
-         *
-         *  ab = a_1 b_1 D^2 + (a_1 b_0 + a_0 b_1) D + a_0 b_0
-         *
-         * Provided we choose D on the large side (so that a_0 and b_0
-         * are _at least_ as long as a_1 and b_1), we don't need the
-         * topmost term at all, and we only need half of the middle
-         * term. So there's no point in doing the proper Karatsuba
-         * optimisation which computes the middle term using the top
-         * one, because we'd take as long computing the top one as
-         * just computing the middle one directly.
-         *
-         * So instead, we do a much more obvious thing: we call the
-         * fully optimised internal_mul to compute a_0 b_0, and we
-         * recursively call ourself to compute the _bottom halves_ of
-         * a_1 b_0 and a_0 b_1, each of which we add into the result
-         * in the obvious way.
-         *
-         * In other words, there's no actual Karatsuba _optimisation_
-         * in this function; the only benefit in doing it this way is
-         * that we call internal_mul proper for a large part of the
-         * work, and _that_ can optimise its operation.
-         */
-
-        int toplen = len/2, botlen = len - toplen; /* botlen is the bigger */
-
-        /*
-         * Scratch space for the various bits and pieces we're going
-         * to be adding together: we need botlen*2 words for a_0 b_0
-         * (though we may end up throwing away its topmost word), and
-         * toplen words for each of a_1 b_0 and a_0 b_1. That adds up
-         * to exactly 2*len.
-         */
-
-        /* a_0 b_0 */
-        internal_mul(a + toplen, b + toplen, scratch + 2*toplen, botlen,
-                     scratch + 2*len);
-
-        /* a_1 b_0 */
-        internal_mul_low(a, b + len - toplen, scratch + toplen, toplen,
-                         scratch + 2*len);
-
-        /* a_0 b_1 */
-        internal_mul_low(a + len - toplen, b, scratch, toplen,
-                         scratch + 2*len);
-
-        /* Copy the bottom half of the big coefficient into place */
-        for (i = 0; i < botlen; i++)
-            c[toplen + i] = scratch[2*toplen + botlen + i];
-
-        /* Add the two small coefficients, throwing away the returned carry */
-        internal_add(scratch, scratch + toplen, scratch, toplen);
-
-        /* And add that to the large coefficient, leaving the result in c. */
-        internal_add(scratch, scratch + 2*toplen + botlen - toplen,
-                     c, toplen);
-
-    } else {
-        int i;
-        BignumInt carry;
-        const BignumInt *ap, *bp;
-        BignumInt *cp, *cps;
-
-        /*
-         * Multiply in the ordinary O(N^2) way.
-         */
-
-        for (i = 0; i < len; i++)
-            c[i] = 0;
-
-        for (cps = c + len, ap = a + len; ap-- > a; cps--) {
-            carry = 0;
-            for (cp = cps, bp = b + len; bp--, cp-- > c ;)
-                BignumMULADD2(carry, *cp, *ap, *bp, *cp, carry);
-        }
-    }
-}
-
-/*
- * Montgomery reduction. Expects x to be a big-endian array of 2*len
- * BignumInts whose value satisfies 0 <= x < rn (where r = 2^(len *
- * BIGNUM_INT_BITS) is the Montgomery base). Returns in the same array
- * a value x' which is congruent to xr^{-1} mod n, and satisfies 0 <=
- * x' < n.
- *
- * 'n' and 'mninv' should be big-endian arrays of 'len' BignumInts
- * each, containing respectively n and the multiplicative inverse of
- * -n mod r.
- *
- * 'tmp' is an array of BignumInt used as scratch space, of length at
- * least 3*len + mul_compute_scratch(len).
- */
-static void monty_reduce(BignumInt *x, const BignumInt *n,
-                         const BignumInt *mninv, BignumInt *tmp, int len)
-{
-    int i;
-    BignumInt carry;
-
-    /*
-     * Multiply x by (-n)^{-1} mod r. This gives us a value m such
-     * that mn is congruent to -x mod r. Hence, mn+x is an exact
-     * multiple of r, and is also (obviously) congruent to x mod n.
-     */
-    internal_mul_low(x + len, mninv, tmp, len, tmp + 3*len);
-
-    /*
-     * Compute t = (mn+x)/r in ordinary, non-modular, integer
-     * arithmetic. By construction this is exact, and is congruent mod
-     * n to x * r^{-1}, i.e. the answer we want.
-     *
-     * The following multiply leaves that answer in the _most_
-     * significant half of the 'x' array, so then we must shift it
-     * down.
-     */
-    internal_mul(tmp, n, tmp+len, len, tmp + 3*len);
-    carry = internal_add(x, tmp+len, x, 2*len);
-    for (i = 0; i < len; i++)
-        x[len + i] = x[i], x[i] = 0;
-
-    /*
-     * Reduce t mod n. This doesn't require a full-on division by n,
-     * but merely a test and single optional subtraction, since we can
-     * show that 0 <= t < 2n.
-     *
-     * Proof:
-     *  + we computed m mod r, so 0 <= m < r.
-     *  + so 0 <= mn < rn, obviously
-     *  + hence we only need 0 <= x < rn to guarantee that 0 <= mn+x < 2rn
-     *  + yielding 0 <= (mn+x)/r < 2n as required.
-     */
-    if (!carry) {
-        for (i = 0; i < len; i++)
-            if (x[len + i] != n[i])
-                break;
-    }
-    if (carry || i >= len || x[len + i] > n[i])
-        internal_sub(x+len, n, x+len, len);
-}
-
-static void internal_add_shifted(BignumInt *number,
-				 BignumInt n, int shift)
-{
-    int word = 1 + (shift / BIGNUM_INT_BITS);
-    int bshift = shift % BIGNUM_INT_BITS;
-    BignumInt addendh, addendl;
-    BignumCarry carry;
-
-    addendl = n << bshift;
-    addendh = (bshift == 0 ? 0 : n >> (BIGNUM_INT_BITS - bshift));
-
-    assert(word <= number[0]);
-    BignumADC(number[word], carry, number[word], addendl, 0);
-    word++;
-    if (!addendh && !carry)
-        return;
-    assert(word <= number[0]);
-    BignumADC(number[word], carry, number[word], addendh, carry);
-    word++;
-    while (carry) {
-        assert(word <= number[0]);
-        BignumADC(number[word], carry, number[word], 0, carry);
-	word++;
-    }
-}
-
-static int bn_clz(BignumInt x)
-{
-    /*
-     * Count the leading zero bits in x. Equivalently, how far left
-     * would we need to shift x to make its top bit set?
-     *
-     * Precondition: x != 0.
-     */
-
-    /* FIXME: would be nice to put in some compiler intrinsics under
-     * ifdef here */
-    int i, ret = 0;
-    for (i = BIGNUM_INT_BITS / 2; i != 0; i >>= 1) {
-        if ((x >> (BIGNUM_INT_BITS-i)) == 0) {
-            x <<= i;
-            ret += i;
-        }
-    }
-    return ret;
-}
-
-static BignumInt reciprocal_word(BignumInt d)
-{
-    BignumInt dshort, recip, prodh, prodl;
-    int corrections;
-
-    /*
-     * Input: a BignumInt value d, with its top bit set.
-     */
-    assert(d >> (BIGNUM_INT_BITS-1) == 1);
-
-    /*
-     * Output: a value, shifted to fill a BignumInt, which is strictly
-     * less than 1/(d+1), i.e. is an *under*-estimate (but by as
-     * little as possible within the constraints) of the reciprocal of
-     * any number whose first BIGNUM_INT_BITS bits match d.
-     *
-     * Ideally we'd like to _totally_ fill BignumInt, i.e. always
-     * return a value with the top bit set. Unfortunately we can't
-     * quite guarantee that for all inputs and also return a fixed
-     * exponent. So instead we take our reciprocal to be
-     * 2^(BIGNUM_INT_BITS*2-1) / d, so that it has the top bit clear
-     * only in the exceptional case where d takes exactly the maximum
-     * value BIGNUM_INT_MASK; in that case, the top bit is clear and
-     * the next bit down is set.
-     */
-
-    /*
-     * Start by computing a half-length version of the answer, by
-     * straightforward division within a BignumInt.
-     */
-    dshort = (d >> (BIGNUM_INT_BITS/2)) + 1;
-    recip = (BIGNUM_TOP_BIT + dshort - 1) / dshort;
-    recip <<= BIGNUM_INT_BITS - BIGNUM_INT_BITS/2;
-
-    /*
-     * Newton-Raphson iteration to improve that starting reciprocal
-     * estimate: take f(x) = d - 1/x, and then the N-R formula gives
-     * x_new = x - f(x)/f'(x) = x - (d-1/x)/(1/x^2) = x(2-d*x). Or,
-     * taking our fixed-point representation into account, take f(x)
-     * to be d - K/x (where K = 2^(BIGNUM_INT_BITS*2-1) as discussed
-     * above) and then we get (2K - d*x) * x/K.
-     *
-     * Newton-Raphson doubles the number of correct bits at every
-     * iteration, and the initial division above already gave us half
-     * the output word, so it's only worth doing one iteration.
-     */
-    BignumMULADD(prodh, prodl, recip, d, recip);
-    prodl = ~prodl;
-    prodh = ~prodh;
-    {
-        BignumCarry c;
-        BignumADC(prodl, c, prodl, 1, 0);
-        prodh += c;
-    }
-    BignumMUL(prodh, prodl, prodh, recip);
-    recip = (prodh << 1) | (prodl >> (BIGNUM_INT_BITS-1));
-
-    /*
-     * Now make sure we have the best possible reciprocal estimate,
-     * before we return it. We might have been off by a handful either
-     * way - not enough to bother with any better-thought-out kind of
-     * correction loop.
-     */
-    BignumMULADD(prodh, prodl, recip, d, recip);
-    corrections = 0;
-    if (prodh >= BIGNUM_TOP_BIT) {
-        do {
-            BignumCarry c = 1;
-            BignumADC(prodl, c, prodl, ~d, c); prodh += BIGNUM_INT_MASK + c;
-            recip--;
-            corrections++;
-        } while (prodh >= ((BignumInt)1 << (BIGNUM_INT_BITS-1)));
-    } else {
-        while (1) {
-            BignumInt newprodh, newprodl;
-            BignumCarry c = 0;
-            BignumADC(newprodl, c, prodl, d, c); newprodh = prodh + c;
-            if (newprodh >= BIGNUM_TOP_BIT)
-                break;
-            prodh = newprodh;
-            prodl = newprodl;
-            recip++;
-            corrections++;
-        }
-    }
-
-    return recip;
-}
-
-/*
- * Compute a = a % m.
- * Input in first alen words of a and first mlen words of m.
- * Output in first alen words of a
- * (of which first alen-mlen words will be zero).
- * Quotient is accumulated in the `quotient' array, which is a Bignum
- * rather than the internal bigendian format.
- *
- * 'recip' must be the result of calling reciprocal_word() on the top
- * BIGNUM_INT_BITS of the modulus (denoted m0 in comments below), with
- * the topmost set bit normalised to the MSB of the input to
- * reciprocal_word. 'rshift' is how far left the top nonzero word of
- * the modulus had to be shifted to set that top bit.
- */
-static void internal_mod(BignumInt *a, int alen,
-			 BignumInt *m, int mlen,
-			 BignumInt *quot, BignumInt recip, int rshift)
-{
-    int i, k;
-
-#ifdef DIVISION_DEBUG
-    {
-        int d;
-        printf("start division, m=0x");
-        for (d = 0; d < mlen; d++)
-            printf("%0*llx", BIGNUM_INT_BITS/4, (unsigned long long)m[d]);
-        printf(", recip=%#0*llx, rshift=%d\n",
-               BIGNUM_INT_BITS/4, (unsigned long long)recip, rshift);
-    }
-#endif
-
-    /*
-     * Repeatedly use that reciprocal estimate to get a decent number
-     * of quotient bits, and subtract off the resulting multiple of m.
-     *
-     * Normally we expect to terminate this loop by means of finding
-     * out q=0 part way through, but one way in which we might not get
-     * that far in the first place is if the input a is actually zero,
-     * in which case we'll discard zero words from the front of a
-     * until we reach the termination condition in the for statement
-     * here.
-     */
-    for (i = 0; i <= alen - mlen ;) {
-	BignumInt product;
-        BignumInt aword, q;
-        int shift, full_bitoffset, bitoffset, wordoffset;
-
-#ifdef DIVISION_DEBUG
-        {
-            int d;
-            printf("main loop, a=0x");
-            for (d = 0; d < alen; d++)
-                printf("%0*llx", BIGNUM_INT_BITS/4, (unsigned long long)a[d]);
-            printf("\n");
-        }
-#endif
-
-        if (a[i] == 0) {
-#ifdef DIVISION_DEBUG
-            printf("zero word at i=%d\n", i);
-#endif
-            i++;
-            continue;
-        }
-
-        aword = a[i];
-        shift = bn_clz(aword);
-        aword <<= shift;
-        if (shift > 0 && i+1 < alen)
-            aword |= a[i+1] >> (BIGNUM_INT_BITS - shift);
-
-        {
-            BignumInt unused;
-            BignumMUL(q, unused, recip, aword);
-            (void)unused;
-        }
-
-#ifdef DIVISION_DEBUG
-        printf("i=%d, aword=%#0*llx, shift=%d, q=%#0*llx\n",
-               i, BIGNUM_INT_BITS/4, (unsigned long long)aword,
-               shift, BIGNUM_INT_BITS/4, (unsigned long long)q);
-#endif
-
-        /*
-         * Work out the right bit and word offsets to use when
-         * subtracting q*m from a.
-         *
-         * aword was taken from a[i], which means its LSB was at bit
-         * position (alen-1-i) * BIGNUM_INT_BITS. But then we shifted
-         * it left by 'shift', so now the low bit of aword corresponds
-         * to bit position (alen-1-i) * BIGNUM_INT_BITS - shift, i.e.
-         * aword is approximately equal to a / 2^(that).
-         *
-         * m0 comes from the top word of mod, so its LSB is at bit
-         * position (mlen-1) * BIGNUM_INT_BITS - rshift, i.e. it can
-         * be considered to be m / 2^(that power). 'recip' is the
-         * reciprocal of m0, times 2^(BIGNUM_INT_BITS*2-1), i.e. it's
-         * about 2^((mlen+1) * BIGNUM_INT_BITS - rshift - 1) / m.
-         *
-         * Hence, recip * aword is approximately equal to the product
-         * of those, which simplifies to
-         *
-         * a/m * 2^((mlen+2+i-alen)*BIGNUM_INT_BITS + shift - rshift - 1)
-         *
-         * But we've also shifted recip*aword down by BIGNUM_INT_BITS
-         * to form q, so we have
-         *
-         * q ~= a/m * 2^((mlen+1+i-alen)*BIGNUM_INT_BITS + shift - rshift - 1)
-         *
-         * and hence, when we now compute q*m, it will be about
-         * a*2^(all that lot), i.e. the negation of that expression is
-         * how far left we have to shift the product q*m to make it
-         * approximately equal to a.
-         */
-        full_bitoffset = -((mlen+1+i-alen)*BIGNUM_INT_BITS + shift-rshift-1);
-#ifdef DIVISION_DEBUG
-        printf("full_bitoffset=%d\n", full_bitoffset);
-#endif
-
-        if (full_bitoffset < 0) {
-            /*
-             * If we find ourselves needing to shift q*m _right_, that
-             * means we've reached the bottom of the quotient. Clip q
-             * so that its right shift becomes zero, and if that means
-             * q becomes _actually_ zero, this loop is done.
-             */
-            if (full_bitoffset <= -BIGNUM_INT_BITS)
-                break;
-            q >>= -full_bitoffset;
-            full_bitoffset = 0;
-            if (!q)
-                break;
-#ifdef DIVISION_DEBUG
-            printf("now full_bitoffset=%d, q=%#0*llx\n",
-                   full_bitoffset, BIGNUM_INT_BITS/4, (unsigned long long)q);
-#endif
-        }
-
-        wordoffset = full_bitoffset / BIGNUM_INT_BITS;
-        bitoffset = full_bitoffset % BIGNUM_INT_BITS;
-#ifdef DIVISION_DEBUG
-        printf("wordoffset=%d, bitoffset=%d\n", wordoffset, bitoffset);
-#endif
-
-        /* wordoffset as computed above is the offset between the LSWs
-         * of m and a. But in fact m and a are stored MSW-first, so we
-         * need to adjust it to be the offset between the actual array
-         * indices, and flip the sign too. */
-        wordoffset = alen - mlen - wordoffset;
-
-        if (bitoffset == 0) {
-            BignumCarry c = 1;
-            BignumInt prev_hi_word = 0;
-            for (k = mlen - 1; wordoffset+k >= i; k--) {
-                BignumInt mword = k<0 ? 0 : m[k];
-                BignumMULADD(prev_hi_word, product, q, mword, prev_hi_word);
-#ifdef DIVISION_DEBUG
-                printf("  aligned sub: product word for m[%d] = %#0*llx\n",
-                       k, BIGNUM_INT_BITS/4,
-                       (unsigned long long)product);
-#endif
-#ifdef DIVISION_DEBUG
-                printf("  aligned sub: subtrahend for a[%d] = %#0*llx\n",
-                       wordoffset+k, BIGNUM_INT_BITS/4,
-                       (unsigned long long)product);
-#endif
-                BignumADC(a[wordoffset+k], c, a[wordoffset+k], ~product, c);
-            }
-        } else {
-            BignumInt add_word = 0;
-            BignumInt c = 1;
-            BignumInt prev_hi_word = 0;
-            for (k = mlen - 1; wordoffset+k >= i; k--) {
-                BignumInt mword = k<0 ? 0 : m[k];
-                BignumMULADD(prev_hi_word, product, q, mword, prev_hi_word);
-#ifdef DIVISION_DEBUG
-                printf("  unaligned sub: product word for m[%d] = %#0*llx\n",
-                       k, BIGNUM_INT_BITS/4,
-                       (unsigned long long)product);
-#endif
-
-                add_word |= product << bitoffset;
-
-#ifdef DIVISION_DEBUG
-                printf("  unaligned sub: subtrahend for a[%d] = %#0*llx\n",
-                       wordoffset+k,
-                       BIGNUM_INT_BITS/4, (unsigned long long)add_word);
-#endif
-                BignumADC(a[wordoffset+k], c, a[wordoffset+k], ~add_word, c);
-
-                add_word = product >> (BIGNUM_INT_BITS - bitoffset);
-            }
-        }
-
-	if (quot) {
-#ifdef DIVISION_DEBUG
-            printf("adding quotient word %#0*llx << %d\n",
-                   BIGNUM_INT_BITS/4, (unsigned long long)q, full_bitoffset);
-#endif
-	    internal_add_shifted(quot, q, full_bitoffset);
-#ifdef DIVISION_DEBUG
-            {
-                int d;
-                printf("now quot=0x");
-                for (d = quot[0]; d > 0; d--)
-                    printf("%0*llx", BIGNUM_INT_BITS/4,
-                           (unsigned long long)quot[d]);
-                printf("\n");
-            }
-#endif
-        }
-    }
-
-#ifdef DIVISION_DEBUG
-    {
-        int d;
-        printf("end main loop, a=0x");
-        for (d = 0; d < alen; d++)
-            printf("%0*llx", BIGNUM_INT_BITS/4, (unsigned long long)a[d]);
-        if (quot) {
-            printf(", quot=0x");
-            for (d = quot[0]; d > 0; d--)
-                printf("%0*llx", BIGNUM_INT_BITS/4,
-                       (unsigned long long)quot[d]);
-        }
-        printf("\n");
-    }
-#endif
-
-    /*
-     * The above loop should terminate with the remaining value in a
-     * being strictly less than 2*m (if a >= 2*m then we should always
-     * have managed to get a nonzero q word), but we can't guarantee
-     * that it will be strictly less than m: consider a case where the
-     * remainder is 1, and another where the remainder is m-1. By the
-     * time a contains a value that's _about m_, you clearly can't
-     * distinguish those cases by looking at only the top word of a -
-     * you have to go all the way down to the bottom before you find
-     * out whether it's just less or just more than m.
-     *
-     * Hence, we now do a final fixup in which we subtract one last
-     * copy of m, or don't, accordingly. We should never have to
-     * subtract more than one copy of m here.
-     */
-    for (i = 0; i < alen; i++) {
-        /* Compare a with m, word by word, from the MSW down. As soon
-         * as we encounter a difference, we know whether we need the
-         * fixup. */
-        int mindex = mlen-alen+i;
-        BignumInt mword = mindex < 0 ? 0 : m[mindex];
-        if (a[i] < mword) {
-#ifdef DIVISION_DEBUG
-            printf("final fixup not needed, a < m\n");
-#endif
-            return;
-        } else if (a[i] > mword) {
-#ifdef DIVISION_DEBUG
-            printf("final fixup is needed, a > m\n");
-#endif
-            break;
-        }
-        /* If neither of those cases happened, the words are the same,
-         * so keep going and look at the next one. */
-    }
-#ifdef DIVISION_DEBUG
-    if (i == mlen) /* if we printed neither of the above diagnostics */
-        printf("final fixup is needed, a == m\n");
-#endif
-
-    /*
-     * If we got here without returning, then a >= m, so we must
-     * subtract m, and increment the quotient.
-     */
-    {
-        BignumCarry c = 1;
-        for (i = alen - 1; i >= 0; i--) {
-            int mindex = mlen-alen+i;
-            BignumInt mword = mindex < 0 ? 0 : m[mindex];
-            BignumADC(a[i], c, a[i], ~mword, c);
-        }
-    }
-    if (quot)
-        internal_add_shifted(quot, 1, 0);
-
-#ifdef DIVISION_DEBUG
-    {
-        int d;
-        printf("after final fixup, a=0x");
-        for (d = 0; d < alen; d++)
-            printf("%0*llx", BIGNUM_INT_BITS/4, (unsigned long long)a[d]);
-        if (quot) {
-            printf(", quot=0x");
-            for (d = quot[0]; d > 0; d--)
-                printf("%0*llx", BIGNUM_INT_BITS/4,
-                       (unsigned long long)quot[d]);
-        }
-        printf("\n");
-    }
-#endif
-}
-
-/*
- * Compute (base ^ exp) % mod, the pedestrian way.
- */
-Bignum modpow_simple(Bignum base_in, Bignum exp, Bignum mod)
-{
-    BignumInt *a, *b, *n, *m, *scratch;
-    BignumInt recip;
-    int rshift;
-    int mlen, scratchlen, i, j;
-    Bignum base, result;
-
-    /*
-     * The most significant word of mod needs to be non-zero. It
-     * should already be, but let's make sure.
-     */
-    assert(mod[mod[0]] != 0);
-
-    /*
-     * Make sure the base is smaller than the modulus, by reducing
-     * it modulo the modulus if not.
-     */
-    base = bigmod(base_in, mod);
-
-    /* Allocate m of size mlen, copy mod to m */
-    /* We use big endian internally */
-    mlen = mod[0];
-    m = snewn(mlen, BignumInt);
-    for (j = 0; j < mlen; j++)
-	m[j] = mod[mod[0] - j];
-
-    /* Allocate n of size mlen, copy base to n */
-    n = snewn(mlen, BignumInt);
-    i = mlen - base[0];
-    for (j = 0; j < i; j++)
-	n[j] = 0;
-    for (j = 0; j < (int)base[0]; j++)
-	n[i + j] = base[base[0] - j];
-
-    /* Allocate a and b of size 2*mlen. Set a = 1 */
-    a = snewn(2 * mlen, BignumInt);
-    b = snewn(2 * mlen, BignumInt);
-    for (i = 0; i < 2 * mlen; i++)
-	a[i] = 0;
-    a[2 * mlen - 1] = 1;
-
-    /* Scratch space for multiplies */
-    scratchlen = mul_compute_scratch(mlen);
-    scratch = snewn(scratchlen, BignumInt);
-
-    /* Skip leading zero bits of exp. */
-    i = 0;
-    j = BIGNUM_INT_BITS-1;
-    while (i < (int)exp[0] && (exp[exp[0] - i] & ((BignumInt)1 << j)) == 0) {
-	j--;
-	if (j < 0) {
-	    i++;
-	    j = BIGNUM_INT_BITS-1;
-	}
-    }
-
-    /* Compute reciprocal of the top full word of the modulus */
-    {
-        BignumInt m0 = m[0];
-        rshift = bn_clz(m0);
-        if (rshift) {
-            m0 <<= rshift;
-            if (mlen > 1)
-                m0 |= m[1] >> (BIGNUM_INT_BITS - rshift);
-        }
-        recip = reciprocal_word(m0);
-    }
-
-    /* Main computation */
-    while (i < (int)exp[0]) {
-	while (j >= 0) {
-	    internal_mul(a + mlen, a + mlen, b, mlen, scratch);
-	    internal_mod(b, mlen * 2, m, mlen, NULL, recip, rshift);
-	    if ((exp[exp[0] - i] & ((BignumInt)1 << j)) != 0) {
-		internal_mul(b + mlen, n, a, mlen, scratch);
-		internal_mod(a, mlen * 2, m, mlen, NULL, recip, rshift);
-	    } else {
-		BignumInt *t;
-		t = a;
-		a = b;
-		b = t;
-	    }
-	    j--;
-	}
-	i++;
-	j = BIGNUM_INT_BITS-1;
-    }
-
-    /* Copy result to buffer */
-    result = newbn(mod[0]);
-    for (i = 0; i < mlen; i++)
-	result[result[0] - i] = a[i + mlen];
-    while (result[0] > 1 && result[result[0]] == 0)
-	result[0]--;
-
-    /* Free temporary arrays */
-    smemclr(a, 2 * mlen * sizeof(*a));
-    sfree(a);
-    smemclr(scratch, scratchlen * sizeof(*scratch));
-    sfree(scratch);
-    smemclr(b, 2 * mlen * sizeof(*b));
-    sfree(b);
-    smemclr(m, mlen * sizeof(*m));
-    sfree(m);
-    smemclr(n, mlen * sizeof(*n));
-    sfree(n);
-
-    freebn(base);
-
-    return result;
-}
-
-/*
- * Compute (base ^ exp) % mod. Uses the Montgomery multiplication
- * technique where possible, falling back to modpow_simple otherwise.
- */
-Bignum modpow(Bignum base_in, Bignum exp, Bignum mod)
-{
-    BignumInt *a, *b, *x, *n, *mninv, *scratch;
-    int len, scratchlen, i, j;
-    Bignum base, base2, r, rn, inv, result;
-
-    /*
-     * The most significant word of mod needs to be non-zero. It
-     * should already be, but let's make sure.
-     */
-    assert(mod[mod[0]] != 0);
-
-    /*
-     * mod had better be odd, or we can't do Montgomery multiplication
-     * using a power of two at all.
-     */
-    if (!(mod[1] & 1))
-        return modpow_simple(base_in, exp, mod);
-
-    /*
-     * Make sure the base is smaller than the modulus, by reducing
-     * it modulo the modulus if not.
-     */
-    base = bigmod(base_in, mod);
-
-    /*
-     * Compute the inverse of n mod r, for monty_reduce. (In fact we
-     * want the inverse of _minus_ n mod r, but we'll sort that out
-     * below.)
-     */
-    len = mod[0];
-    r = bn_power_2(BIGNUM_INT_BITS * len);
-    inv = modinv(mod, r);
-    assert(inv); /* cannot fail, since mod is odd and r is a power of 2 */
-
-    /*
-     * Multiply the base by r mod n, to get it into Montgomery
-     * representation.
-     */
-    base2 = modmul(base, r, mod);
-    freebn(base);
-    base = base2;
-
-    rn = bigmod(r, mod);               /* r mod n, i.e. Montgomerified 1 */
-
-    freebn(r);                         /* won't need this any more */
-
-    /*
-     * Set up internal arrays of the right lengths, in big-endian
-     * format, containing the base, the modulus, and the modulus's
-     * inverse.
-     */
-    n = snewn(len, BignumInt);
-    for (j = 0; j < len; j++)
-	n[len - 1 - j] = mod[j + 1];
-
-    mninv = snewn(len, BignumInt);
-    for (j = 0; j < len; j++)
-	mninv[len - 1 - j] = (j < (int)inv[0] ? inv[j + 1] : 0);
-    freebn(inv);         /* we don't need this copy of it any more */
-    /* Now negate mninv mod r, so it's the inverse of -n rather than +n. */
-    x = snewn(len, BignumInt);
-    for (j = 0; j < len; j++)
-        x[j] = 0;
-    internal_sub(x, mninv, mninv, len);
-
-    /* x = snewn(len, BignumInt); */ /* already done above */
-    for (j = 0; j < len; j++)
-	x[len - 1 - j] = (j < (int)base[0] ? base[j + 1] : 0);
-    freebn(base);        /* we don't need this copy of it any more */
-
-    a = snewn(2*len, BignumInt);
-    b = snewn(2*len, BignumInt);
-    for (j = 0; j < len; j++)
-	a[2*len - 1 - j] = (j < (int)rn[0] ? rn[j + 1] : 0);
-    freebn(rn);
-
-    /* Scratch space for multiplies */
-    scratchlen = 3*len + mul_compute_scratch(len);
-    scratch = snewn(scratchlen, BignumInt);
-
-    /* Skip leading zero bits of exp. */
-    i = 0;
-    j = BIGNUM_INT_BITS-1;
-    while (i < (int)exp[0] && (exp[exp[0] - i] & ((BignumInt)1 << j)) == 0) {
-	j--;
-	if (j < 0) {
-	    i++;
-	    j = BIGNUM_INT_BITS-1;
-	}
-    }
-
-    /* Main computation */
-    while (i < (int)exp[0]) {
-	while (j >= 0) {
-	    internal_mul(a + len, a + len, b, len, scratch);
-            monty_reduce(b, n, mninv, scratch, len);
-	    if ((exp[exp[0] - i] & ((BignumInt)1 << j)) != 0) {
-                internal_mul(b + len, x, a, len,  scratch);
-                monty_reduce(a, n, mninv, scratch, len);
-	    } else {
-		BignumInt *t;
-		t = a;
-		a = b;
-		b = t;
-	    }
-	    j--;
-	}
-	i++;
-	j = BIGNUM_INT_BITS-1;
-    }
-
-    /*
-     * Final monty_reduce to get back from the adjusted Montgomery
-     * representation.
-     */
-    monty_reduce(a, n, mninv, scratch, len);
-
-    /* Copy result to buffer */
-    result = newbn(mod[0]);
-    for (i = 0; i < len; i++)
-	result[result[0] - i] = a[i + len];
-    while (result[0] > 1 && result[result[0]] == 0)
-	result[0]--;
-
-    /* Free temporary arrays */
-    smemclr(scratch, scratchlen * sizeof(*scratch));
-    sfree(scratch);
-    smemclr(a, 2 * len * sizeof(*a));
-    sfree(a);
-    smemclr(b, 2 * len * sizeof(*b));
-    sfree(b);
-    smemclr(mninv, len * sizeof(*mninv));
-    sfree(mninv);
-    smemclr(n, len * sizeof(*n));
-    sfree(n);
-    smemclr(x, len * sizeof(*x));
-    sfree(x);
-
-    return result;
-}
-
-/*
- * Compute (p * q) % mod.
- * The most significant word of mod MUST be non-zero.
- * We assume that the result array is the same size as the mod array.
- */
-Bignum modmul(Bignum p, Bignum q, Bignum mod)
-{
-    BignumInt *a, *n, *m, *o, *scratch;
-    BignumInt recip;
-    int rshift, scratchlen;
-    int pqlen, mlen, rlen, i, j;
-    Bignum result;
-
-    /*
-     * The most significant word of mod needs to be non-zero. It
-     * should already be, but let's make sure.
-     */
-    assert(mod[mod[0]] != 0);
-
-    /* Allocate m of size mlen, copy mod to m */
-    /* We use big endian internally */
-    mlen = mod[0];
-    m = snewn(mlen, BignumInt);
-    for (j = 0; j < mlen; j++)
-	m[j] = mod[mod[0] - j];
-
-    pqlen = (p[0] > q[0] ? p[0] : q[0]);
-
-    /*
-     * Make sure that we're allowing enough space. The shifting below
-     * will underflow the vectors we allocate if pqlen is too small.
-     */
-    if (2*pqlen <= mlen)
-        pqlen = mlen/2 + 1;
-
-    /* Allocate n of size pqlen, copy p to n */
-    n = snewn(pqlen, BignumInt);
-    i = pqlen - p[0];
-    for (j = 0; j < i; j++)
-	n[j] = 0;
-    for (j = 0; j < (int)p[0]; j++)
-	n[i + j] = p[p[0] - j];
-
-    /* Allocate o of size pqlen, copy q to o */
-    o = snewn(pqlen, BignumInt);
-    i = pqlen - q[0];
-    for (j = 0; j < i; j++)
-	o[j] = 0;
-    for (j = 0; j < (int)q[0]; j++)
-	o[i + j] = q[q[0] - j];
-
-    /* Allocate a of size 2*pqlen for result */
-    a = snewn(2 * pqlen, BignumInt);
-
-    /* Scratch space for multiplies */
-    scratchlen = mul_compute_scratch(pqlen);
-    scratch = snewn(scratchlen, BignumInt);
-
-    /* Compute reciprocal of the top full word of the modulus */
-    {
-        BignumInt m0 = m[0];
-        rshift = bn_clz(m0);
-        if (rshift) {
-            m0 <<= rshift;
-            if (mlen > 1)
-                m0 |= m[1] >> (BIGNUM_INT_BITS - rshift);
-        }
-        recip = reciprocal_word(m0);
-    }
-
-    /* Main computation */
-    internal_mul(n, o, a, pqlen, scratch);
-    internal_mod(a, pqlen * 2, m, mlen, NULL, recip, rshift);
-
-    /* Copy result to buffer */
-    rlen = (mlen < pqlen * 2 ? mlen : pqlen * 2);
-    result = newbn(rlen);
-    for (i = 0; i < rlen; i++)
-	result[result[0] - i] = a[i + 2 * pqlen - rlen];
-    while (result[0] > 1 && result[result[0]] == 0)
-	result[0]--;
-
-    /* Free temporary arrays */
-    smemclr(scratch, scratchlen * sizeof(*scratch));
-    sfree(scratch);
-    smemclr(a, 2 * pqlen * sizeof(*a));
-    sfree(a);
-    smemclr(m, mlen * sizeof(*m));
-    sfree(m);
-    smemclr(n, pqlen * sizeof(*n));
-    sfree(n);
-    smemclr(o, pqlen * sizeof(*o));
-    sfree(o);
-
-    return result;
-}
-
-Bignum modsub(const Bignum a, const Bignum b, const Bignum n)
-{
-    Bignum a1, b1, ret;
-
-    if (bignum_cmp(a, n) >= 0) a1 = bigmod(a, n);
-    else a1 = a;
-    if (bignum_cmp(b, n) >= 0) b1 = bigmod(b, n);
-    else b1 = b;
-
-    if (bignum_cmp(a1, b1) >= 0) /* a >= b */
-    {
-        ret = bigsub(a1, b1);
-    }
-    else
-    {
-        /* Handle going round the corner of the modulus without having
-         * negative support in Bignum */
-        Bignum tmp = bigsub(n, b1);
-        assert(tmp);
-        ret = bigadd(tmp, a1);
-        freebn(tmp);
-    }
-
-    if (a != a1) freebn(a1);
-    if (b != b1) freebn(b1);
-
-    return ret;
-}
-
-/*
- * Compute p % mod.
- * The most significant word of mod MUST be non-zero.
- * We assume that the result array is the same size as the mod array.
- * We optionally write out a quotient if `quotient' is non-NULL.
- * We can avoid writing out the result if `result' is NULL.
- */
-static void bigdivmod(Bignum p, Bignum mod, Bignum result, Bignum quotient)
-{
-    BignumInt *n, *m;
-    BignumInt recip;
-    int rshift;
-    int plen, mlen, i, j;
-
-    /*
-     * The most significant word of mod needs to be non-zero. It
-     * should already be, but let's make sure.
-     */
-    assert(mod[mod[0]] != 0);
-
-    /* Allocate m of size mlen, copy mod to m */
-    /* We use big endian internally */
-    mlen = mod[0];
-    m = snewn(mlen, BignumInt);
-    for (j = 0; j < mlen; j++)
-	m[j] = mod[mod[0] - j];
-
-    plen = p[0];
-    /* Ensure plen > mlen */
-    if (plen <= mlen)
-	plen = mlen + 1;
-
-    /* Allocate n of size plen, copy p to n */
-    n = snewn(plen, BignumInt);
-    for (j = 0; j < plen; j++)
-	n[j] = 0;
-    for (j = 1; j <= (int)p[0]; j++)
-	n[plen - j] = p[j];
-
-    /* Compute reciprocal of the top full word of the modulus */
-    {
-        BignumInt m0 = m[0];
-        rshift = bn_clz(m0);
-        if (rshift) {
-            m0 <<= rshift;
-            if (mlen > 1)
-                m0 |= m[1] >> (BIGNUM_INT_BITS - rshift);
-        }
-        recip = reciprocal_word(m0);
-    }
-
-    /* Main computation */
-    internal_mod(n, plen, m, mlen, quotient, recip, rshift);
-
-    /* Copy result to buffer */
-    if (result) {
-	for (i = 1; i <= (int)result[0]; i++) {
-	    int j = plen - i;
-	    result[i] = j >= 0 ? n[j] : 0;
-	}
-    }
-
-    /* Free temporary arrays */
-    smemclr(m, mlen * sizeof(*m));
-    sfree(m);
-    smemclr(n, plen * sizeof(*n));
-    sfree(n);
-}
-
-/*
- * Decrement a number.
- */
-void decbn(Bignum bn)
-{
-    int i = 1;
-    while (i < (int)bn[0] && bn[i] == 0)
-	bn[i++] = BIGNUM_INT_MASK;
-    bn[i]--;
-}
-
-Bignum bignum_from_bytes(const void *vdata, int nbytes)
-{
-    const unsigned char *data = (const unsigned char *)vdata;
-    Bignum result;
-    int w, i;
-
-    assert(nbytes >= 0 && nbytes < INT_MAX/8);
-
-    w = (nbytes + BIGNUM_INT_BYTES - 1) / BIGNUM_INT_BYTES; /* bytes->words */
-
-    result = newbn(w);
-    for (i = 1; i <= w; i++)
-	result[i] = 0;
-    for (i = nbytes; i--;) {
-	unsigned char byte = *data++;
-	result[1 + i / BIGNUM_INT_BYTES] |=
-            (BignumInt)byte << (8*i % BIGNUM_INT_BITS);
-    }
-
-    bn_restore_invariant(result);
-    return result;
-}
-
-Bignum bignum_from_bytes_le(const void *vdata, int nbytes)
-{
-    const unsigned char *data = (const unsigned char *)vdata;
-    Bignum result;
-    int w, i;
-
-    assert(nbytes >= 0 && nbytes < INT_MAX/8);
-
-    w = (nbytes + BIGNUM_INT_BYTES - 1) / BIGNUM_INT_BYTES; /* bytes->words */
-
-    result = newbn(w);
-    for (i = 1; i <= w; i++)
-        result[i] = 0;
-    for (i = 0; i < nbytes; ++i) {
-        unsigned char byte = *data++;
-        result[1 + i / BIGNUM_INT_BYTES] |=
-            (BignumInt)byte << (8*i % BIGNUM_INT_BITS);
-    }
-
-    bn_restore_invariant(result);
-    return result;
-}
-
-Bignum bignum_from_decimal(const char *decimal)
-{
-    Bignum result = copybn(Zero);
-
-    while (*decimal) {
-        Bignum tmp, tmp2;
-
-        if (!isdigit((unsigned char)*decimal)) {
-            freebn(result);
-            return 0;
-        }
-
-        tmp = bigmul(result, Ten);
-        tmp2 = bignum_from_long(*decimal - '0');
-        freebn(result);
-        result = bigadd(tmp, tmp2);
-        freebn(tmp);
-        freebn(tmp2);
-
-        decimal++;
-    }
-
-    return result;
-}
-
-Bignum bignum_random_in_range(const Bignum lower, const Bignum upper)
-{
-    Bignum ret = NULL;
-    unsigned char *bytes;
-    int upper_len = bignum_bitcount(upper);
-    int upper_bytes = upper_len / 8;
-    int upper_bits = upper_len % 8;
-    if (upper_bits) ++upper_bytes;
-
-    bytes = snewn(upper_bytes, unsigned char);
-    do {
-        int i;
-
-        if (ret) freebn(ret);
-
-        for (i = 0; i < upper_bytes; ++i)
-        {
-            bytes[i] = (unsigned char)random_byte();
-        }
-        /* Mask the top to reduce failure rate to 50/50 */
-        if (upper_bits)
-        {
-            bytes[i - 1] &= 0xFF >> (8 - upper_bits);
-        }
-
-        ret = bignum_from_bytes(bytes, upper_bytes);
-    } while (bignum_cmp(ret, lower) < 0 || bignum_cmp(ret, upper) > 0);
-    smemclr(bytes, upper_bytes);
-    sfree(bytes);
-
-    return ret;
-}
-
-/*
- * Return the bit count of a bignum.
- */
-int bignum_bitcount(Bignum bn)
-{
-    int bitcount = bn[0] * BIGNUM_INT_BITS - 1;
-    while (bitcount >= 0
-	   && (bn[bitcount / BIGNUM_INT_BITS + 1] >> (bitcount % BIGNUM_INT_BITS)) == 0) bitcount--;
-    return bitcount + 1;
-}
-
-/*
- * Return a byte from a bignum; 0 is least significant, etc.
- */
-int bignum_byte(Bignum bn, int i)
-{
-    if (i < 0 || i >= (int)(BIGNUM_INT_BYTES * bn[0]))
-	return 0;		       /* beyond the end */
-    else
-	return (bn[i / BIGNUM_INT_BYTES + 1] >>
-		((i % BIGNUM_INT_BYTES)*8)) & 0xFF;
-}
-
-/*
- * Return a bit from a bignum; 0 is least significant, etc.
- */
-int bignum_bit(Bignum bn, int i)
-{
-    if (i < 0 || i >= (int)(BIGNUM_INT_BITS * bn[0]))
-	return 0;		       /* beyond the end */
-    else
-	return (bn[i / BIGNUM_INT_BITS + 1] >> (i % BIGNUM_INT_BITS)) & 1;
-}
-
-/*
- * Set a bit in a bignum; 0 is least significant, etc.
- */
-void bignum_set_bit(Bignum bn, int bitnum, int value)
-{
-    if (bitnum < 0 || bitnum >= (int)(BIGNUM_INT_BITS * bn[0])) {
-        if (value) abort();		       /* beyond the end */
-    } else {
-	int v = bitnum / BIGNUM_INT_BITS + 1;
-	BignumInt mask = (BignumInt)1 << (bitnum % BIGNUM_INT_BITS);
-	if (value)
-	    bn[v] |= mask;
-	else
-	    bn[v] &= ~mask;
-    }
-}
-
-void BinarySink_put_mp_ssh1(BinarySink *bs, Bignum bn)
-{
-    int bits = bignum_bitcount(bn);
-    int bytes = (bits + 7) / 8;
-    int i;
-
-    put_uint16(bs, bits);
-    for (i = bytes; i--;)
-        put_byte(bs, bignum_byte(bn, i));
-}
-
-void BinarySink_put_mp_ssh2(BinarySink *bs, Bignum bn)
-{
-    int bytes = (bignum_bitcount(bn) + 8) / 8;
-    int i;
-
-    put_uint32(bs, bytes);
-    for (i = bytes; i--;)
-        put_byte(bs, bignum_byte(bn, i));
-}
-
-Bignum BinarySource_get_mp_ssh1(BinarySource *src)
-{
-    unsigned bitc = get_uint16(src);
-    ptrlen bytes = get_data(src, (bitc + 7) / 8);
-    if (get_err(src)) {
-        return bignum_from_long(0);
-    } else {
-        Bignum toret = bignum_from_bytes(bytes.ptr, bytes.len);
-        /* SSH-1.5 spec says that it's OK for the prefix uint16 to be
-         * _greater_ than the actual number of bits */
-        if (bignum_bitcount(toret) > bitc) {
-            src->err = BSE_INVALID;
-            freebn(toret);
-            toret = bignum_from_long(0);
-        }
-        return toret;
-    }
-}
-
-Bignum BinarySource_get_mp_ssh2(BinarySource *src)
-{
-    ptrlen bytes = get_string(src);
-    if (get_err(src)) {
-        return bignum_from_long(0);
-    } else {
-        const unsigned char *p = bytes.ptr;
-        if ((bytes.len > 0 &&
-             ((p[0] & 0x80) ||
-              (p[0] == 0 && (bytes.len <= 1 || !(p[1] & 0x80)))))) {
-            src->err = BSE_INVALID;
-            return bignum_from_long(0);
-        }
-        return bignum_from_bytes(bytes.ptr, bytes.len);
-    }
-}
-
-/*
- * Compare two bignums. Returns like strcmp.
- */
-int bignum_cmp(Bignum a, Bignum b)
-{
-    int amax = a[0], bmax = b[0];
-    int i;
-
-    /* Annoyingly we have two representations of zero */
-    if (amax == 1 && a[amax] == 0)
-        amax = 0;
-    if (bmax == 1 && b[bmax] == 0)
-        bmax = 0;
-
-    assert(amax == 0 || a[amax] != 0);
-    assert(bmax == 0 || b[bmax] != 0);
-
-    i = (amax > bmax ? amax : bmax);
-    while (i) {
-	BignumInt aval = (i > amax ? 0 : a[i]);
-	BignumInt bval = (i > bmax ? 0 : b[i]);
-	if (aval < bval)
-	    return -1;
-	if (aval > bval)
-	    return +1;
-	i--;
-    }
-    return 0;
-}
-
-/*
- * Right-shift one bignum to form another.
- */
-Bignum bignum_rshift(Bignum a, int shift)
-{
-    Bignum ret;
-    int i, shiftw, shiftb, shiftbb, bits;
-    BignumInt ai, ai1;
-
-    assert(shift >= 0);
-
-    bits = bignum_bitcount(a) - shift;
-    ret = newbn((bits + BIGNUM_INT_BITS - 1) / BIGNUM_INT_BITS);
-
-    if (ret) {
-	shiftw = shift / BIGNUM_INT_BITS;
-	shiftb = shift % BIGNUM_INT_BITS;
-	shiftbb = BIGNUM_INT_BITS - shiftb;
-
-	ai1 = a[shiftw + 1];
-	for (i = 1; i <= (int)ret[0]; i++) {
-	    ai = ai1;
-	    ai1 = (i + shiftw + 1 <= (int)a[0] ? a[i + shiftw + 1] : 0);
-	    ret[i] = ((ai >> shiftb) | (ai1 << shiftbb)) & BIGNUM_INT_MASK;
-	}
-    }
-
-    return ret;
-}
-
-/*
- * Left-shift one bignum to form another.
- */
-Bignum bignum_lshift(Bignum a, int shift)
-{
-    Bignum ret;
-    int bits, shiftWords, shiftBits;
-
-    assert(shift >= 0);
-
-    bits = bignum_bitcount(a) + shift;
-    ret = newbn((bits + BIGNUM_INT_BITS - 1) / BIGNUM_INT_BITS);
-
-    shiftWords = shift / BIGNUM_INT_BITS;
-    shiftBits = shift % BIGNUM_INT_BITS;
-
-    if (shiftBits == 0)
-    {
-        memcpy(&ret[1 + shiftWords], &a[1], sizeof(BignumInt) * a[0]);
-    }
-    else
-    {
-        int i;
-        BignumInt carry = 0;
-
-        /* Remember that Bignum[0] is length, so add 1 */
-        for (i = shiftWords + 1; i < ((int)a[0]) + shiftWords + 1; ++i)
-        {
-            BignumInt from = a[i - shiftWords];
-            ret[i] = (from << shiftBits) | carry;
-            carry = from >> (BIGNUM_INT_BITS - shiftBits);
-        }
-        if (carry) ret[i] = carry;
-    }
-
-    return ret;
-}
-
-/*
- * Non-modular multiplication and addition.
- */
-Bignum bigmuladd(Bignum a, Bignum b, Bignum addend)
-{
-    int alen = a[0], blen = b[0];
-    int mlen = (alen > blen ? alen : blen);
-    int rlen, i, maxspot;
-    int wslen;
-    BignumInt *workspace;
-    Bignum ret;
-
-    /* mlen space for a, mlen space for b, 2*mlen for result,
-     * plus scratch space for multiplication */
-    wslen = mlen * 4 + mul_compute_scratch(mlen);
-    workspace = snewn(wslen, BignumInt);
-    for (i = 0; i < mlen; i++) {
-	workspace[0 * mlen + i] = (mlen - i <= (int)a[0] ? a[mlen - i] : 0);
-	workspace[1 * mlen + i] = (mlen - i <= (int)b[0] ? b[mlen - i] : 0);
-    }
-
-    internal_mul(workspace + 0 * mlen, workspace + 1 * mlen,
-		 workspace + 2 * mlen, mlen, workspace + 4 * mlen);
-
-    /* now just copy the result back */
-    rlen = alen + blen + 1;
-    if (addend && rlen <= (int)addend[0])
-	rlen = addend[0] + 1;
-    ret = newbn(rlen);
-    maxspot = 0;
-    for (i = 1; i <= (int)ret[0]; i++) {
-	ret[i] = (i <= 2 * mlen ? workspace[4 * mlen - i] : 0);
-	if (ret[i] != 0)
-	    maxspot = i;
-    }
-    ret[0] = maxspot;
-
-    /* now add in the addend, if any */
-    if (addend) {
-	BignumCarry carry = 0;
-	for (i = 1; i <= rlen; i++) {
-            BignumInt retword = (i <= (int)ret[0] ? ret[i] : 0);
-            BignumInt addword = (i <= (int)addend[0] ? addend[i] : 0);
-            BignumADC(ret[i], carry, retword, addword, carry);
-	    if (ret[i] != 0 && i > maxspot)
-		maxspot = i;
-	}
-    }
-    ret[0] = maxspot;
-
-    smemclr(workspace, wslen * sizeof(*workspace));
-    sfree(workspace);
-    return ret;
-}
-
-/*
- * Non-modular multiplication.
- */
-Bignum bigmul(Bignum a, Bignum b)
-{
-    return bigmuladd(a, b, NULL);
-}
-
-/*
- * Simple addition.
- */
-Bignum bigadd(Bignum a, Bignum b)
-{
-    int alen = a[0], blen = b[0];
-    int rlen = (alen > blen ? alen : blen) + 1;
-    int i, maxspot;
-    Bignum ret;
-    BignumCarry carry;
-
-    ret = newbn(rlen);
-
-    carry = 0;
-    maxspot = 0;
-    for (i = 1; i <= rlen; i++) {
-        BignumInt aword = (i <= (int)a[0] ? a[i] : 0);
-        BignumInt bword = (i <= (int)b[0] ? b[i] : 0);
-        BignumADC(ret[i], carry, aword, bword, carry);
-        if (ret[i] != 0 && i > maxspot)
-            maxspot = i;
-    }
-    ret[0] = maxspot;
-
-    return ret;
-}
-
-/*
- * Subtraction. Returns a-b, or NULL if the result would come out
- * negative (recall that this entire bignum module only handles
- * positive numbers).
- */
-Bignum bigsub(Bignum a, Bignum b)
-{
-    int alen = a[0], blen = b[0];
-    int rlen = (alen > blen ? alen : blen);
-    int i, maxspot;
-    Bignum ret;
-    BignumCarry carry;
-
-    ret = newbn(rlen);
-
-    carry = 1;
-    maxspot = 0;
-    for (i = 1; i <= rlen; i++) {
-        BignumInt aword = (i <= (int)a[0] ? a[i] : 0);
-        BignumInt bword = (i <= (int)b[0] ? b[i] : 0);
-        BignumADC(ret[i], carry, aword, ~bword, carry);
-        if (ret[i] != 0 && i > maxspot)
-            maxspot = i;
-    }
-    ret[0] = maxspot;
-
-    if (!carry) {
-        freebn(ret);
-        return NULL;
-    }
-
-    return ret;
-}
-
-/*
- * Create a bignum which is the bitmask covering another one. That
- * is, the smallest integer which is >= N and is also one less than
- * a power of two.
- */
-Bignum bignum_bitmask(Bignum n)
-{
-    Bignum ret = copybn(n);
-    int i;
-    BignumInt j;
-
-    i = ret[0];
-    while (n[i] == 0 && i > 0)
-	i--;
-    if (i <= 0)
-	return ret;		       /* input was zero */
-    j = 1;
-    while (j < n[i])
-	j = 2 * j + 1;
-    ret[i] = j;
-    while (--i > 0)
-	ret[i] = BIGNUM_INT_MASK;
-    return ret;
-}
-
-/*
- * Convert an unsigned long into a bignum.
- */
-Bignum bignum_from_long(unsigned long n)
-{
-    const int maxwords =
-        (sizeof(unsigned long) + sizeof(BignumInt) - 1) / sizeof(BignumInt);
-    Bignum ret;
-    int i;
-
-    ret = newbn(maxwords);
-    ret[0] = 0;
-    for (i = 0; i < maxwords; i++) {
-        ret[i+1] = n >> (i * BIGNUM_INT_BITS);
-        if (ret[i+1] != 0)
-            ret[0] = i+1;
-    }
-
-    return ret;
-}
-
-/*
- * Add a long to a bignum.
- */
-Bignum bignum_add_long(Bignum number, unsigned long n)
-{
-    const int maxwords =
-        (sizeof(unsigned long) + sizeof(BignumInt) - 1) / sizeof(BignumInt);
-    Bignum ret;
-    int words, i;
-    BignumCarry carry;
-
-    words = number[0];
-    if (words < maxwords)
-        words = maxwords;
-    words++;
-    ret = newbn(words);
-
-    carry = 0;
-    ret[0] = 0;
-    for (i = 0; i < words; i++) {
-        BignumInt nword = (i < maxwords ? n >> (i * BIGNUM_INT_BITS) : 0);
-        BignumInt numword = (i < number[0] ? number[i+1] : 0);
-        BignumADC(ret[i+1], carry, numword, nword, carry);
-	if (ret[i+1] != 0)
-            ret[0] = i+1;
-    }
-    return ret;
-}
-
-/*
- * Compute the residue of a bignum, modulo a (max 16-bit) short.
- */
-unsigned short bignum_mod_short(Bignum number, unsigned short modulus)
-{
-    unsigned long mod = modulus, r = 0;
-    /* Precompute (BIGNUM_INT_MASK+1) % mod */
-    unsigned long base_r = (BIGNUM_INT_MASK - modulus + 1) % mod;
-    int i;
-
-    for (i = number[0]; i > 0; i--) {
-        /*
-         * Conceptually, ((r << BIGNUM_INT_BITS) + number[i]) % mod
-         */
-        r = ((r * base_r) + (number[i] % mod)) % mod;
-    }
-    return (unsigned short) r;
-}
-
-#ifdef DEBUG
-void diagbn(char *prefix, Bignum md)
-{
-    int i, nibbles, morenibbles;
-    static const char hex[] = "0123456789ABCDEF";
-
-    debug("%s0x", prefix ? prefix : "");
-
-    nibbles = (3 + bignum_bitcount(md)) / 4;
-    if (nibbles < 1)
-	nibbles = 1;
-    morenibbles = 4 * md[0] - nibbles;
-    for (i = 0; i < morenibbles; i++)
-	debug("-");
-    for (i = nibbles; i--;)
-        debug("%c", hex[(bignum_byte(md, i / 2) >> (4 * (i % 2))) & 0xF]);
-
-    if (prefix)
-	debug("\n");
-}
-#endif
-
-/*
- * Simple division.
- */
-Bignum bigdiv(Bignum a, Bignum b)
-{
-    Bignum q = newbn(a[0]);
-    bigdivmod(a, b, NULL, q);
-    while (q[0] > 1 && q[q[0]] == 0)
-        q[0]--;
-    return q;
-}
-
-/*
- * Simple remainder.
- */
-Bignum bigmod(Bignum a, Bignum b)
-{
-    Bignum r = newbn(b[0]);
-    bigdivmod(a, b, r, NULL);
-    while (r[0] > 1 && r[r[0]] == 0)
-        r[0]--;
-    return r;
-}
-
-/*
- * Greatest common divisor.
- */
-Bignum biggcd(Bignum av, Bignum bv)
-{
-    Bignum a = copybn(av);
-    Bignum b = copybn(bv);
-
-    while (bignum_cmp(b, Zero) != 0) {
-	Bignum t = newbn(b[0]);
-	bigdivmod(a, b, t, NULL);
-	while (t[0] > 1 && t[t[0]] == 0)
-	    t[0]--;
-	freebn(a);
-	a = b;
-	b = t;
-    }
-
-    freebn(b);
-    return a;
-}
-
-/*
- * Modular inverse, using Euclid's extended algorithm.
- */
-Bignum modinv(Bignum number, Bignum modulus)
-{
-    Bignum a = copybn(modulus);
-    Bignum b = copybn(number);
-    Bignum xp = copybn(Zero);
-    Bignum x = copybn(One);
-    int sign = +1;
-
-    assert(number[number[0]] != 0);
-    assert(modulus[modulus[0]] != 0);
-
-    while (bignum_cmp(b, One) != 0) {
-	Bignum t, q;
-
-        if (bignum_cmp(b, Zero) == 0) {
-            /*
-             * Found a common factor between the inputs, so we cannot
-             * return a modular inverse at all.
-             */
-            freebn(b);
-            freebn(a);
-            freebn(xp);
-            freebn(x);
-            return NULL;
-        }
-
-        t = newbn(b[0]);
-	q = newbn(a[0]);
-	bigdivmod(a, b, t, q);
-	while (t[0] > 1 && t[t[0]] == 0)
-	    t[0]--;
-	while (q[0] > 1 && q[q[0]] == 0)
-	    q[0]--;
-	freebn(a);
-	a = b;
-	b = t;
-	t = xp;
-	xp = x;
-	x = bigmuladd(q, xp, t);
-	sign = -sign;
-	freebn(t);
-	freebn(q);
-    }
-
-    freebn(b);
-    freebn(a);
-    freebn(xp);
-
-    /* now we know that sign * x == 1, and that x < modulus */
-    if (sign < 0) {
-	/* set a new x to be modulus - x */
-	Bignum newx = newbn(modulus[0]);
-	BignumInt carry = 0;
-	int maxspot = 1;
-	int i;
-
-	for (i = 1; i <= (int)newx[0]; i++) {
-	    BignumInt aword = (i <= (int)modulus[0] ? modulus[i] : 0);
-	    BignumInt bword = (i <= (int)x[0] ? x[i] : 0);
-	    newx[i] = aword - bword - carry;
-	    bword = ~bword;
-	    carry = carry ? (newx[i] >= bword) : (newx[i] > bword);
-	    if (newx[i] != 0)
-		maxspot = i;
-	}
-	newx[0] = maxspot;
-	freebn(x);
-	x = newx;
-    }
-
-    /* and return. */
-    return x;
-}
-
-/*
- * Render a bignum into decimal. Return a malloced string holding
- * the decimal representation.
- */
-char *bignum_decimal(Bignum x)
-{
-    int ndigits, ndigit;
-    int i;
-    bool iszero;
-    BignumInt carry;
-    char *ret;
-    BignumInt *workspace;
-
-    /*
-     * First, estimate the number of digits. Since log(10)/log(2)
-     * is just greater than 93/28 (the joys of continued fraction
-     * approximations...) we know that for every 93 bits, we need
-     * at most 28 digits. This will tell us how much to malloc.
-     *
-     * Formally: if x has i bits, that means x is strictly less
-     * than 2^i. Since 2 is less than 10^(28/93), this is less than
-     * 10^(28i/93). We need an integer power of ten, so we must
-     * round up (rounding down might make it less than x again).
-     * Therefore if we multiply the bit count by 28/93, rounding
-     * up, we will have enough digits.
-     *
-     * i=0 (i.e., x=0) is an irritating special case.
-     */
-    i = bignum_bitcount(x);
-    if (!i)
-	ndigits = 1;		       /* x = 0 */
-    else
-	ndigits = (28 * i + 92) / 93;  /* multiply by 28/93 and round up */
-    ndigits++;			       /* allow for trailing \0 */
-    ret = snewn(ndigits, char);
-
-    /*
-     * Now allocate some workspace to hold the binary form as we
-     * repeatedly divide it by ten. Initialise this to the
-     * big-endian form of the number.
-     */
-    workspace = snewn(x[0], BignumInt);
-    for (i = 0; i < (int)x[0]; i++)
-	workspace[i] = x[x[0] - i];
-
-    /*
-     * Next, write the decimal number starting with the last digit.
-     * We use ordinary short division, dividing 10 into the
-     * workspace.
-     */
-    ndigit = ndigits - 1;
-    ret[ndigit] = '\0';
-    do {
-	iszero = true;
-	carry = 0;
-	for (i = 0; i < (int)x[0]; i++) {
-            /*
-             * Conceptually, we want to compute
-             *
-             *   (carry << BIGNUM_INT_BITS) + workspace[i]
-             *   -----------------------------------------
-             *                      10
-             *
-             * but we don't have an integer type longer than BignumInt
-             * to work with. So we have to do it in pieces.
-             */
-
-            BignumInt q, r;
-            q = workspace[i] / 10;
-            r = workspace[i] % 10;
-
-            /* I want (BIGNUM_INT_MASK+1)/10 but can't say so directly! */
-            q += carry * ((BIGNUM_INT_MASK-9) / 10 + 1);
-            r += carry * ((BIGNUM_INT_MASK-9) % 10);
-
-            q += r / 10;
-            r %= 10;
-
-	    workspace[i] = q;
-	    carry = r;
-
-	    if (workspace[i])
-		iszero = false;
-	}
-	ret[--ndigit] = (char) (carry + '0');
-    } while (!iszero);
-
-    /*
-     * There's a chance we've fallen short of the start of the
-     * string. Correct if so.
-     */
-    if (ndigit > 0)
-	memmove(ret, ret + ndigit, ndigits - ndigit);
-
-    /*
-     * Done.
-     */
-    smemclr(workspace, x[0] * sizeof(*workspace));
-    sfree(workspace);
-    return ret;
-}
diff --git a/sshccp.c b/sshccp.c
index 176e6094..81ae45b6 100644
--- a/sshccp.c
+++ b/sshccp.c
@@ -30,7 +30,7 @@
  */
 
 #include "ssh.h"
-#include "sshbn.h"
+#include "mpint_i.h"
 
 #ifndef INLINE
 #define INLINE
diff --git a/sshcommon.c b/sshcommon.c
index 337c3293..c541e9d3 100644
--- a/sshcommon.c
+++ b/sshcommon.c
@@ -7,6 +7,7 @@
 #include <stdlib.h>
 
 #include "putty.h"
+#include "mpint.h"
 #include "ssh.h"
 #include "sshbpp.h"
 #include "sshppl.h"
@@ -1008,13 +1009,12 @@ void ssh1_compute_session_id(
     struct RSAKey *hostkey, struct RSAKey *servkey)
 {
     struct MD5Context md5c;
-    int i;
 
     MD5Init(&md5c);
-    for (i = (bignum_bitcount(hostkey->modulus) + 7) / 8; i-- ;)
-        put_byte(&md5c, bignum_byte(hostkey->modulus, i));
-    for (i = (bignum_bitcount(servkey->modulus) + 7) / 8; i-- ;)
-        put_byte(&md5c, bignum_byte(servkey->modulus, i));
+    for (size_t i = (mp_get_nbits(hostkey->modulus) + 7) / 8; i-- ;)
+        put_byte(&md5c, mp_get_byte(hostkey->modulus, i));
+    for (size_t i = (mp_get_nbits(servkey->modulus) + 7) / 8; i-- ;)
+        put_byte(&md5c, mp_get_byte(servkey->modulus, i));
     put_data(&md5c, cookie, 8);
     MD5Final(session_id, &md5c);
 }
diff --git a/sshdh.c b/sshdh.c
index 1bd226a0..b7c4b136 100644
--- a/sshdh.c
+++ b/sshdh.c
@@ -2,61 +2,35 @@
  * Diffie-Hellman implementation for PuTTY.
  */
 
+#include <assert.h>
+
 #include "ssh.h"
+#include "misc.h"
+#include "mpint.h"
 
-/*
- * The primes used in the group1 and group14 key exchange.
- */
-static const unsigned char P1[] = {
-    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xC9, 0x0F, 0xDA, 0xA2,
-    0x21, 0x68, 0xC2, 0x34, 0xC4, 0xC6, 0x62, 0x8B, 0x80, 0xDC, 0x1C, 0xD1,
-    0x29, 0x02, 0x4E, 0x08, 0x8A, 0x67, 0xCC, 0x74, 0x02, 0x0B, 0xBE, 0xA6,
-    0x3B, 0x13, 0x9B, 0x22, 0x51, 0x4A, 0x08, 0x79, 0x8E, 0x34, 0x04, 0xDD,
-    0xEF, 0x95, 0x19, 0xB3, 0xCD, 0x3A, 0x43, 0x1B, 0x30, 0x2B, 0x0A, 0x6D,
-    0xF2, 0x5F, 0x14, 0x37, 0x4F, 0xE1, 0x35, 0x6D, 0x6D, 0x51, 0xC2, 0x45,
-    0xE4, 0x85, 0xB5, 0x76, 0x62, 0x5E, 0x7E, 0xC6, 0xF4, 0x4C, 0x42, 0xE9,
-    0xA6, 0x37, 0xED, 0x6B, 0x0B, 0xFF, 0x5C, 0xB6, 0xF4, 0x06, 0xB7, 0xED,
-    0xEE, 0x38, 0x6B, 0xFB, 0x5A, 0x89, 0x9F, 0xA5, 0xAE, 0x9F, 0x24, 0x11,
-    0x7C, 0x4B, 0x1F, 0xE6, 0x49, 0x28, 0x66, 0x51, 0xEC, 0xE6, 0x53, 0x81,
-    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
+struct dh_ctx {
+    mp_int *x, *e, *p, *q, *g;
 };
-static const unsigned char P14[] = {
-    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xC9, 0x0F, 0xDA, 0xA2,
-    0x21, 0x68, 0xC2, 0x34, 0xC4, 0xC6, 0x62, 0x8B, 0x80, 0xDC, 0x1C, 0xD1,
-    0x29, 0x02, 0x4E, 0x08, 0x8A, 0x67, 0xCC, 0x74, 0x02, 0x0B, 0xBE, 0xA6,
-    0x3B, 0x13, 0x9B, 0x22, 0x51, 0x4A, 0x08, 0x79, 0x8E, 0x34, 0x04, 0xDD,
-    0xEF, 0x95, 0x19, 0xB3, 0xCD, 0x3A, 0x43, 0x1B, 0x30, 0x2B, 0x0A, 0x6D,
-    0xF2, 0x5F, 0x14, 0x37, 0x4F, 0xE1, 0x35, 0x6D, 0x6D, 0x51, 0xC2, 0x45,
-    0xE4, 0x85, 0xB5, 0x76, 0x62, 0x5E, 0x7E, 0xC6, 0xF4, 0x4C, 0x42, 0xE9,
-    0xA6, 0x37, 0xED, 0x6B, 0x0B, 0xFF, 0x5C, 0xB6, 0xF4, 0x06, 0xB7, 0xED,
-    0xEE, 0x38, 0x6B, 0xFB, 0x5A, 0x89, 0x9F, 0xA5, 0xAE, 0x9F, 0x24, 0x11,
-    0x7C, 0x4B, 0x1F, 0xE6, 0x49, 0x28, 0x66, 0x51, 0xEC, 0xE4, 0x5B, 0x3D,
-    0xC2, 0x00, 0x7C, 0xB8, 0xA1, 0x63, 0xBF, 0x05, 0x98, 0xDA, 0x48, 0x36,
-    0x1C, 0x55, 0xD3, 0x9A, 0x69, 0x16, 0x3F, 0xA8, 0xFD, 0x24, 0xCF, 0x5F,
-    0x83, 0x65, 0x5D, 0x23, 0xDC, 0xA3, 0xAD, 0x96, 0x1C, 0x62, 0xF3, 0x56,
-    0x20, 0x85, 0x52, 0xBB, 0x9E, 0xD5, 0x29, 0x07, 0x70, 0x96, 0x96, 0x6D,
-    0x67, 0x0C, 0x35, 0x4E, 0x4A, 0xBC, 0x98, 0x04, 0xF1, 0x74, 0x6C, 0x08,
-    0xCA, 0x18, 0x21, 0x7C, 0x32, 0x90, 0x5E, 0x46, 0x2E, 0x36, 0xCE, 0x3B,
-    0xE3, 0x9E, 0x77, 0x2C, 0x18, 0x0E, 0x86, 0x03, 0x9B, 0x27, 0x83, 0xA2,
-    0xEC, 0x07, 0xA2, 0x8F, 0xB5, 0xC5, 0x5D, 0xF0, 0x6F, 0x4C, 0x52, 0xC9,
-    0xDE, 0x2B, 0xCB, 0xF6, 0x95, 0x58, 0x17, 0x18, 0x39, 0x95, 0x49, 0x7C,
-    0xEA, 0x95, 0x6A, 0xE5, 0x15, 0xD2, 0x26, 0x18, 0x98, 0xFA, 0x05, 0x10,
-    0x15, 0x72, 0x8E, 0x5A, 0x8A, 0xAC, 0xAA, 0x68, 0xFF, 0xFF, 0xFF, 0xFF,
-    0xFF, 0xFF, 0xFF, 0xFF
-};
-
-/*
- * The generator g = 2 (used for both group1 and group14).
- */
-static const unsigned char G[] = { 2 };
 
 struct dh_extra {
-    const unsigned char *pdata, *gdata; /* NULL means group exchange */
-    int plen, glen;
+    bool gex;
+    void (*construct)(struct dh_ctx *ctx);
 };
 
+static void dh_group1_construct(struct dh_ctx *ctx)
+{
+    ctx->p = MP_LITERAL(0xFFFFFFFFFFFFFFFFC90FDAA22168C234C4C6628B80DC1CD129024E088A67CC74020BBEA63B139B22514A08798E3404DDEF9519B3CD3A431B302B0A6DF25F14374FE1356D6D51C245E485B576625E7EC6F44C42E9A637ED6B0BFF5CB6F406B7EDEE386BFB5A899FA5AE9F24117C4B1FE649286651ECE65381FFFFFFFFFFFFFFFF);
+    ctx->g = mp_from_integer(2);
+}
+
+static void dh_group14_construct(struct dh_ctx *ctx)
+{
+    ctx->p = MP_LITERAL(0xFFFFFFFFFFFFFFFFC90FDAA22168C234C4C6628B80DC1CD129024E088A67CC74020BBEA63B139B22514A08798E3404DDEF9519B3CD3A431B302B0A6DF25F14374FE1356D6D51C245E485B576625E7EC6F44C42E9A637ED6B0BFF5CB6F406B7EDEE386BFB5A899FA5AE9F24117C4B1FE649286651ECE45B3DC2007CB8A163BF0598DA48361C55D39A69163FA8FD24CF5F83655D23DCA3AD961C62F356208552BB9ED529077096966D670C354E4ABC9804F1746C08CA18217C32905E462E36CE3BE39E772C180E86039B2783A2EC07A28FB5C55DF06F4C52C9DE2BCBF6955817183995497CEA956AE515D2261898FA051015728E5A8AACAA68FFFFFFFFFFFFFFFF);
+    ctx->g = mp_from_integer(2);
+}
+
 static const struct dh_extra extra_group1 = {
-    P1, G, lenof(P1), lenof(G),
+    false, dh_group1_construct,
 };
 
 static const struct ssh_kex ssh_diffiehellman_group1_sha1 = {
@@ -74,7 +48,7 @@ const struct ssh_kexes ssh_diffiehellman_group1 = {
 };
 
 static const struct dh_extra extra_group14 = {
-    P14, G, lenof(P14), lenof(G),
+    false, dh_group14_construct,
 };
 
 static const struct ssh_kex ssh_diffiehellman_group14_sha256 = {
@@ -97,9 +71,7 @@ const struct ssh_kexes ssh_diffiehellman_group14 = {
     group14_list
 };
 
-static const struct dh_extra extra_gex = {
-    NULL, NULL, 0, 0,
-};
+static const struct dh_extra extra_gex = { true };
 
 static const struct ssh_kex ssh_diffiehellman_gex_sha256 = {
     "diffie-hellman-group-exchange-sha256", NULL,
@@ -161,27 +133,19 @@ const struct ssh_kexes ssh_gssk5_sha1_kex = {
     gssk5_sha1_kex_list
 };
 
-/*
- * Variables.
- */
-struct dh_ctx {
-    Bignum x, e, p, q, qmask, g;
-};
-
 /*
  * Common DH initialisation.
  */
 static void dh_init(struct dh_ctx *ctx)
 {
-    ctx->q = bignum_rshift(ctx->p, 1);
-    ctx->qmask = bignum_bitmask(ctx->q);
+    ctx->q = mp_rshift_fixed(ctx->p, 1);
     ctx->x = ctx->e = NULL;
 }
 
 bool dh_is_gex(const struct ssh_kex *kex)
 {
     const struct dh_extra *extra = (const struct dh_extra *)kex->extra;
-    return extra->pdata == NULL;
+    return extra->gex;
 }
 
 /*
@@ -190,9 +154,9 @@ bool dh_is_gex(const struct ssh_kex *kex)
 struct dh_ctx *dh_setup_group(const struct ssh_kex *kex)
 {
     const struct dh_extra *extra = (const struct dh_extra *)kex->extra;
+    assert(!extra->gex);
     struct dh_ctx *ctx = snew(struct dh_ctx);
-    ctx->p = bignum_from_bytes(extra->pdata, extra->plen);
-    ctx->g = bignum_from_bytes(extra->gdata, extra->glen);
+    extra->construct(ctx);
     dh_init(ctx);
     return ctx;
 }
@@ -200,11 +164,11 @@ struct dh_ctx *dh_setup_group(const struct ssh_kex *kex)
 /*
  * Initialise DH for a server-supplied group.
  */
-struct dh_ctx *dh_setup_gex(Bignum pval, Bignum gval)
+struct dh_ctx *dh_setup_gex(mp_int *pval, mp_int *gval)
 {
     struct dh_ctx *ctx = snew(struct dh_ctx);
-    ctx->p = copybn(pval);
-    ctx->g = copybn(gval);
+    ctx->p = mp_copy(pval);
+    ctx->g = mp_copy(gval);
     dh_init(ctx);
     return ctx;
 }
@@ -214,7 +178,7 @@ struct dh_ctx *dh_setup_gex(Bignum pval, Bignum gval)
  */
 int dh_modulus_bit_size(const struct dh_ctx *ctx)
 {
-    return bignum_bitcount(ctx->p);
+    return mp_get_nbits(ctx->p);
 }
 
 /*
@@ -222,12 +186,11 @@ int dh_modulus_bit_size(const struct dh_ctx *ctx)
  */
 void dh_cleanup(struct dh_ctx *ctx)
 {
-    freebn(ctx->x);
-    freebn(ctx->e);
-    freebn(ctx->p);
-    freebn(ctx->g);
-    freebn(ctx->q);
-    freebn(ctx->qmask);
+    mp_free(ctx->x);
+    mp_free(ctx->e);
+    mp_free(ctx->p);
+    mp_free(ctx->g);
+    mp_free(ctx->q);
     sfree(ctx);
 }
 
@@ -246,49 +209,36 @@ void dh_cleanup(struct dh_ctx *ctx)
  * Advances in Cryptology: Proceedings of Eurocrypt '96
  * Springer-Verlag, May 1996.
  */
-Bignum dh_create_e(struct dh_ctx *ctx, int nbits)
+mp_int *dh_create_e(struct dh_ctx *ctx, int nbits)
 {
-    int i;
-
-    int nbytes;
-    unsigned char *buf;
-
-    nbytes = (bignum_bitcount(ctx->qmask) + 7) / 8;
-    buf = snewn(nbytes, unsigned char);
-
-    do {
-	/*
-	 * Create a potential x, by ANDing a string of random bytes
-	 * with qmask.
-	 */
-	if (ctx->x)
-	    freebn(ctx->x);
-	if (nbits == 0 || nbits > bignum_bitcount(ctx->qmask)) {
-	    for (i = 0; i < nbytes; i++)
-		buf[i] = bignum_byte(ctx->qmask, i) & random_byte();
-	    ctx->x = bignum_from_bytes(buf, nbytes);
-	} else {
-	    int b, nb;
-	    ctx->x = bn_power_2(nbits);
-	    b = nb = 0;
-	    for (i = 0; i < nbits; i++) {
-		if (nb == 0) {
-		    nb = 8;
-		    b = random_byte();
-		}
-		bignum_set_bit(ctx->x, i, b & 1);
-		b >>= 1;
-		nb--;
-	    }
-	}
-    } while (bignum_cmp(ctx->x, One) <= 0 || bignum_cmp(ctx->x, ctx->q) >= 0);
-
-    sfree(buf);
+    /*
+     * Lower limit is just 2.
+     */
+    mp_int *lo = mp_from_integer(2);
 
     /*
-     * Done. Now compute e = g^x mod p.
+     * Upper limit.
      */
-    ctx->e = modpow(ctx->g, ctx->x, ctx->p);
+    mp_int *hi = mp_copy(ctx->q);
+    mp_sub_integer_into(hi, hi, 1);
+    if (nbits) {
+        mp_int *pow2 = mp_power_2(nbits+1);
+        mp_min_into(pow2, pow2, hi);
+        mp_free(hi);
+        hi = pow2;
+    }
+
+    /*
+     * Make a random number in that range.
+     */
+    ctx->x = mp_random_in_range(lo, hi);
+    mp_free(lo);
+    mp_free(hi);
+
+    /*
+     * Now compute e = g^x mod p.
+     */
+    ctx->e = mp_modpow(ctx->g, ctx->x, ctx->p);
 
     return ctx->e;
 }
@@ -301,15 +251,16 @@ Bignum dh_create_e(struct dh_ctx *ctx, int nbits)
  * they lead to obviously weak keys that even a passive eavesdropper
  * can figure out.)
  */
-const char *dh_validate_f(struct dh_ctx *ctx, Bignum f)
+const char *dh_validate_f(struct dh_ctx *ctx, mp_int *f)
 {
-    if (bignum_cmp(f, One) <= 0) {
+    if (!mp_hs_integer(f, 2)) {
         return "f value received is too small";
     } else {
-        Bignum pm1 = bigsub(ctx->p, One);
-        int cmp = bignum_cmp(f, pm1);
-        freebn(pm1);
-        if (cmp >= 0)
+        mp_int *pm1 = mp_copy(ctx->p);
+        mp_sub_integer_into(pm1, pm1, 1);
+        unsigned cmp = mp_cmp_hs(f, pm1);
+        mp_free(pm1);
+        if (cmp)
             return "f value received is too large";
     }
     return NULL;
@@ -318,9 +269,7 @@ const char *dh_validate_f(struct dh_ctx *ctx, Bignum f)
 /*
  * DH stage 2: given a number f, compute K = f^x mod p.
  */
-Bignum dh_find_K(struct dh_ctx *ctx, Bignum f)
+mp_int *dh_find_K(struct dh_ctx *ctx, mp_int *f)
 {
-    Bignum ret;
-    ret = modpow(f, ctx->x, ctx->p);
-    return ret;
+    return mp_modpow(f, ctx->x, ctx->p);
 }
diff --git a/sshdss.c b/sshdss.c
index cac40af7..21bb7f82 100644
--- a/sshdss.c
+++ b/sshdss.c
@@ -7,6 +7,7 @@
 #include <assert.h>
 
 #include "ssh.h"
+#include "mpint.h"
 #include "misc.h"
 
 static void dss_freekey(ssh_key *key);    /* forward reference */
@@ -29,7 +30,7 @@ static ssh_key *dss_new_pub(const ssh_keyalg *self, ptrlen data)
     dss->x = NULL;
 
     if (get_err(src) ||
-        !bignum_cmp(dss->q, Zero) || !bignum_cmp(dss->p, Zero)) {
+        mp_eq_integer(dss->p, 0) || mp_eq_integer(dss->q, 0)) {
         /* Invalid key. */
         dss_freekey(&dss->sshk);
         return NULL;
@@ -42,29 +43,28 @@ static void dss_freekey(ssh_key *key)
 {
     struct dss_key *dss = container_of(key, struct dss_key, sshk);
     if (dss->p)
-        freebn(dss->p);
+        mp_free(dss->p);
     if (dss->q)
-        freebn(dss->q);
+        mp_free(dss->q);
     if (dss->g)
-        freebn(dss->g);
+        mp_free(dss->g);
     if (dss->y)
-        freebn(dss->y);
+        mp_free(dss->y);
     if (dss->x)
-        freebn(dss->x);
+        mp_free(dss->x);
     sfree(dss);
 }
 
-static void append_hex_to_strbuf(strbuf *sb, Bignum *x)
+static void append_hex_to_strbuf(strbuf *sb, mp_int *x)
 {
     if (sb->len > 0)
         put_byte(sb, ',');
     put_data(sb, "0x", 2);
-    int nibbles = (3 + bignum_bitcount(x)) / 4;
-    if (nibbles < 1)
-	nibbles = 1;
-    static const char hex[] = "0123456789abcdef";
-    for (int i = nibbles; i--;)
-	put_byte(sb, hex[(bignum_byte(x, i / 2) >> (4 * (i % 2))) & 0xF]);
+    char *hex = mp_get_hex(x);
+    size_t hexlen = strlen(hex);
+    put_data(sb, hex, hexlen);
+    smemclr(hex, hexlen);
+    sfree(hex);
 }
 
 static char *dss_cache_str(ssh_key *key)
@@ -88,7 +88,6 @@ static bool dss_verify(ssh_key *key, ptrlen sig, ptrlen data)
     struct dss_key *dss = container_of(key, struct dss_key, sshk);
     BinarySource src[1];
     unsigned char hash[20];
-    Bignum r, s, w, gu1p, yu2p, gu1yu2p, u1, u2, sha, v;
     bool toret;
 
     if (!dss->p)
@@ -117,29 +116,29 @@ static bool dss_verify(ssh_key *key, ptrlen sig, ptrlen data)
     }
 
     /* Now we're sitting on a 40-byte string for sure. */
-    r = bignum_from_bytes(sig.ptr, 20);
-    s = bignum_from_bytes((const char *)sig.ptr + 20, 20);
+    mp_int *r = mp_from_bytes_be(make_ptrlen(sig.ptr, 20));
+    mp_int *s = mp_from_bytes_be(make_ptrlen((const char *)sig.ptr + 20, 20));
     if (!r || !s) {
         if (r)
-            freebn(r);
+            mp_free(r);
         if (s)
-            freebn(s);
+            mp_free(s);
 	return false;
     }
 
-    if (!bignum_cmp(s, Zero)) {
-        freebn(r);
-        freebn(s);
+    if (mp_eq_integer(s, 0)) {
+        mp_free(r);
+        mp_free(s);
         return false;
     }
 
     /*
      * Step 1. w <- s^-1 mod q.
      */
-    w = modinv(s, dss->q);
+    mp_int *w = mp_invert(s, dss->q);
     if (!w) {
-        freebn(r);
-        freebn(s);
+        mp_free(r);
+        mp_free(s);
         return false;
     }
 
@@ -147,38 +146,38 @@ static bool dss_verify(ssh_key *key, ptrlen sig, ptrlen data)
      * Step 2. u1 <- SHA(message) * w mod q.
      */
     SHA_Simple(data.ptr, data.len, hash);
-    sha = bignum_from_bytes(hash, 20);
-    u1 = modmul(sha, w, dss->q);
+    mp_int *sha = mp_from_bytes_be(make_ptrlen(hash, 20));
+    mp_int *u1 = mp_modmul(sha, w, dss->q);
 
     /*
      * Step 3. u2 <- r * w mod q.
      */
-    u2 = modmul(r, w, dss->q);
+    mp_int *u2 = mp_modmul(r, w, dss->q);
 
     /*
      * Step 4. v <- (g^u1 * y^u2 mod p) mod q.
      */
-    gu1p = modpow(dss->g, u1, dss->p);
-    yu2p = modpow(dss->y, u2, dss->p);
-    gu1yu2p = modmul(gu1p, yu2p, dss->p);
-    v = modmul(gu1yu2p, One, dss->q);
+    mp_int *gu1p = mp_modpow(dss->g, u1, dss->p);
+    mp_int *yu2p = mp_modpow(dss->y, u2, dss->p);
+    mp_int *gu1yu2p = mp_modmul(gu1p, yu2p, dss->p);
+    mp_int *v = mp_mod(gu1yu2p, dss->q);
 
     /*
      * Step 5. v should now be equal to r.
      */
 
-    toret = !bignum_cmp(v, r);
+    toret = mp_cmp_eq(v, r);
 
-    freebn(w);
-    freebn(sha);
-    freebn(u1);
-    freebn(u2);
-    freebn(gu1p);
-    freebn(yu2p);
-    freebn(gu1yu2p);
-    freebn(v);
-    freebn(r);
-    freebn(s);
+    mp_free(w);
+    mp_free(sha);
+    mp_free(u1);
+    mp_free(u2);
+    mp_free(gu1p);
+    mp_free(yu2p);
+    mp_free(gu1yu2p);
+    mp_free(v);
+    mp_free(r);
+    mp_free(s);
 
     return toret;
 }
@@ -209,7 +208,7 @@ static ssh_key *dss_new_priv(const ssh_keyalg *self, ptrlen pub, ptrlen priv)
     ptrlen hash;
     SHA_State s;
     unsigned char digest[20];
-    Bignum ytest;
+    mp_int *ytest;
 
     sshk = dss_new_pub(self, pub);
     if (!sshk)
@@ -233,7 +232,7 @@ static ssh_key *dss_new_priv(const ssh_keyalg *self, ptrlen pub, ptrlen priv)
 	put_mp_ssh2(&s, dss->q);
 	put_mp_ssh2(&s, dss->g);
 	SHA_Final(&s, digest);
-	if (0 != memcmp(hash.ptr, digest, 20)) {
+	if (!smemeq(hash.ptr, digest, 20)) {
 	    dss_freekey(&dss->sshk);
 	    return NULL;
 	}
@@ -242,13 +241,13 @@ static ssh_key *dss_new_priv(const ssh_keyalg *self, ptrlen pub, ptrlen priv)
     /*
      * Now ensure g^x mod p really is y.
      */
-    ytest = modpow(dss->g, dss->x, dss->p);
-    if (0 != bignum_cmp(ytest, dss->y)) {
+    ytest = mp_modpow(dss->g, dss->x, dss->p);
+    if (!mp_cmp_eq(ytest, dss->y)) {
+        mp_free(ytest);
 	dss_freekey(&dss->sshk);
-        freebn(ytest);
 	return NULL;
     }
-    freebn(ytest);
+    mp_free(ytest);
 
     return &dss->sshk;
 }
@@ -268,7 +267,7 @@ static ssh_key *dss_new_priv_openssh(const ssh_keyalg *self,
     dss->x = get_mp_ssh2(src);
 
     if (get_err(src) ||
-        !bignum_cmp(dss->q, Zero) || !bignum_cmp(dss->p, Zero)) {
+        mp_eq_integer(dss->q, 0) || mp_eq_integer(dss->p, 0)) {
         /* Invalid key. */
         dss_freekey(&dss->sshk);
         return NULL;
@@ -299,14 +298,15 @@ static int dss_pubkey_bits(const ssh_keyalg *self, ptrlen pub)
         return -1;
 
     dss = container_of(sshk, struct dss_key, sshk);
-    ret = bignum_bitcount(dss->p);
+    ret = mp_get_nbits(dss->p);
     dss_freekey(&dss->sshk);
 
     return ret;
 }
 
-Bignum *dss_gen_k(const char *id_string, Bignum modulus, Bignum private_key,
-                  unsigned char *digest, int digest_len)
+mp_int *dss_gen_k(const char *id_string, mp_int *modulus,
+                     mp_int *private_key,
+                     unsigned char *digest, int digest_len)
 {
     /*
      * The basic DSS signing algorithm is:
@@ -381,7 +381,6 @@ Bignum *dss_gen_k(const char *id_string, Bignum modulus, Bignum private_key,
      */
     SHA512_State ss;
     unsigned char digest512[64];
-    Bignum proto_k, k;
 
     /*
      * Hash some identifying text plus x.
@@ -397,72 +396,63 @@ Bignum *dss_gen_k(const char *id_string, Bignum modulus, Bignum private_key,
     SHA512_Init(&ss);
     put_data(&ss, digest512, sizeof(digest512));
     put_data(&ss, digest, digest_len);
+    SHA512_Final(&ss, digest512);
 
-    while (1) {
-        SHA512_State ss2 = ss;         /* structure copy */
-        SHA512_Final(&ss2, digest512);
+    /*
+     * Now convert the result into a bignum, and coerce it to the
+     * range [2,q), which we do by reducing it mod q-2 and adding 2.
+     */
+    mp_int *modminus2 = mp_copy(modulus);
+    mp_sub_integer_into(modminus2, modminus2, 2);
+    mp_int *proto_k = mp_from_bytes_be(make_ptrlen(digest512, 64));
+    mp_int *k = mp_mod(proto_k, modminus2);
+    mp_free(proto_k);
+    mp_free(modminus2);
+    mp_add_integer_into(k, k, 2);
 
-        smemclr(&ss2, sizeof(ss2));
+    smemclr(&ss, sizeof(ss));
+    smemclr(digest512, sizeof(digest512));
 
-        /*
-         * Now convert the result into a bignum, and reduce it mod q.
-         */
-        proto_k = bignum_from_bytes(digest512, 64);
-        k = bigmod(proto_k, modulus);
-        freebn(proto_k);
-
-        if (bignum_cmp(k, One) != 0 && bignum_cmp(k, Zero) != 0) {
-            smemclr(&ss, sizeof(ss));
-            smemclr(digest512, sizeof(digest512));
-            return k;
-        }
-
-        /* Very unlikely we get here, but if so, k was unsuitable. */
-        freebn(k);
-        /* Perturb the hash to think of a different k. */
-        put_byte(&ss, 'x');
-        /* Go round and try again. */
-    }
+    return k;
 }
 
 static void dss_sign(ssh_key *key, const void *data, int datalen,
                      unsigned flags, BinarySink *bs)
 {
     struct dss_key *dss = container_of(key, struct dss_key, sshk);
-    Bignum k, gkp, hash, kinv, hxr, r, s;
     unsigned char digest[20];
     int i;
 
     SHA_Simple(data, datalen, digest);
 
-    k = dss_gen_k("DSA deterministic k generator", dss->q, dss->x,
-                  digest, sizeof(digest));
-    kinv = modinv(k, dss->q);	       /* k^-1 mod q */
-    assert(kinv);
+    mp_int *k = dss_gen_k("DSA deterministic k generator", dss->q, dss->x,
+                          digest, sizeof(digest));
+    mp_int *kinv = mp_invert(k, dss->q);       /* k^-1 mod q */
 
     /*
      * Now we have k, so just go ahead and compute the signature.
      */
-    gkp = modpow(dss->g, k, dss->p);   /* g^k mod p */
-    r = bigmod(gkp, dss->q);	       /* r = (g^k mod p) mod q */
-    freebn(gkp);
+    mp_int *gkp = mp_modpow(dss->g, k, dss->p); /* g^k mod p */
+    mp_int *r = mp_mod(gkp, dss->q);        /* r = (g^k mod p) mod q */
+    mp_free(gkp);
 
-    hash = bignum_from_bytes(digest, 20);
-    hxr = bigmuladd(dss->x, r, hash);  /* hash + x*r */
-    s = modmul(kinv, hxr, dss->q);     /* s = k^-1 * (hash + x*r) mod q */
-    freebn(hxr);
-    freebn(kinv);
-    freebn(k);
-    freebn(hash);
+    mp_int *hash = mp_from_bytes_be(make_ptrlen(digest, 20));
+    mp_int *hxr = mp_mul(dss->x, r);
+    mp_add_into(hxr, hxr, hash);         /* hash + x*r */
+    mp_int *s = mp_modmul(kinv, hxr, dss->q); /* s = k^-1 * (hash+x*r) mod q */
+    mp_free(hxr);
+    mp_free(kinv);
+    mp_free(k);
+    mp_free(hash);
 
     put_stringz(bs, "ssh-dss");
     put_uint32(bs, 40);
     for (i = 0; i < 20; i++)
-	put_byte(bs, bignum_byte(r, 19 - i));
+	put_byte(bs, mp_get_byte(r, 19 - i));
     for (i = 0; i < 20; i++)
-        put_byte(bs, bignum_byte(s, 19 - i));
-    freebn(r);
-    freebn(s);
+        put_byte(bs, mp_get_byte(s, 19 - i));
+    mp_free(r);
+    mp_free(s);
 }
 
 const ssh_keyalg ssh_dss = {
diff --git a/sshdssg.c b/sshdssg.c
index f6905b8d..cece7b0f 100644
--- a/sshdssg.c
+++ b/sshdssg.c
@@ -4,16 +4,11 @@
 
 #include "misc.h"
 #include "ssh.h"
+#include "mpint.h"
 
 int dsa_generate(struct dss_key *key, int bits, progfn_t pfn,
 		 void *pfnparam)
 {
-    Bignum qm1, power, g, h, tmp;
-    unsigned pfirst, qfirst;
-    int progress;
-
-    key->sshk.vt = &ssh_dss;
-
     /*
      * Set up the phase limits for the progress report. We do this
      * by passing minus the phase number.
@@ -59,30 +54,19 @@ int dsa_generate(struct dss_key *key, int bits, progfn_t pfn,
     pfn(pfnparam, PROGFN_PHASE_EXTENT, 3, 0x2000);
     pfn(pfnparam, PROGFN_EXP_PHASE, 3, -32768);
 
-    /*
-     * In phase four we are finding an element x between 1 and q-1
-     * (exclusive), by inventing 160 random bits and hoping they
-     * come out to a plausible number; so assuming q is uniformly
-     * distributed between 2^159 and 2^160, the chance of any given
-     * attempt succeeding is somewhere between 0.5 and 1. Lacking
-     * the energy to arrange to be able to specify this probability
-     * _after_ generating q, we'll just set it to 0.75.
-     */
-    pfn(pfnparam, PROGFN_PHASE_EXTENT, 4, 0x2000);
-    pfn(pfnparam, PROGFN_EXP_PHASE, 4, -49152);
-
     pfn(pfnparam, PROGFN_READY, 0, 0);
 
+    unsigned pfirst, qfirst;
     invent_firstbits(&pfirst, &qfirst);
     /*
      * Generate q: a prime of length 160.
      */
-    key->q = primegen(160, 2, 2, NULL, 1, pfn, pfnparam, qfirst);
+    mp_int *q = primegen(160, 2, 2, NULL, 1, pfn, pfnparam, qfirst);
     /*
      * Now generate p: a prime of length `bits', such that p-1 is
      * divisible by q.
      */
-    key->p = primegen(bits-160, 2, 2, key->q, 2, pfn, pfnparam, pfirst);
+    mp_int *p = primegen(bits-160, 2, 2, q, 2, pfn, pfnparam, pfirst);
 
     /*
      * Next we need g. Raise 2 to the power (p-1)/q modulo p, and
@@ -90,58 +74,40 @@ int dsa_generate(struct dss_key *key, int bits, progfn_t pfn,
      * soon as we hit a non-unit (and non-zero!) one, that'll do
      * for g.
      */
-    power = bigdiv(key->p, key->q);    /* this is floor(p/q) == (p-1)/q */
-    h = bignum_from_long(1);
-    progress = 0;
+    mp_int *power = mp_div(p, q); /* this is floor(p/q) == (p-1)/q */
+    mp_int *h = mp_from_integer(1);
+    int progress = 0;
+    mp_int *g;
     while (1) {
 	pfn(pfnparam, PROGFN_PROGRESS, 3, ++progress);
-	g = modpow(h, power, key->p);
-	if (bignum_cmp(g, One) > 0)
+	g = mp_modpow(h, power, p);
+	if (mp_hs_integer(g, 2))
 	    break;		       /* got one */
-	tmp = h;
-	h = bignum_add_long(h, 1);
-	freebn(tmp);
+        mp_free(g);
+        mp_add_integer_into(h, h, 1);
     }
-    key->g = g;
-    freebn(h);
+    mp_free(h);
+    mp_free(power);
 
     /*
      * Now we're nearly done. All we need now is our private key x,
      * which should be a number between 1 and q-1 exclusive, and
      * our public key y = g^x mod p.
      */
-    qm1 = copybn(key->q);
-    decbn(qm1);
-    progress = 0;
-    while (1) {
-	int i, v, byte, bitsleft;
-	Bignum x;
+    mp_int *two = mp_from_integer(2);
+    mp_int *qm1 = mp_copy(q);
+    mp_sub_integer_into(qm1, qm1, 1);
+    mp_int *x = mp_random_in_range(two, qm1);
+    mp_free(two);
+    mp_free(qm1);
 
-	pfn(pfnparam, PROGFN_PROGRESS, 4, ++progress);
-	x = bn_power_2(159);
-	byte = 0;
-	bitsleft = 0;
+    key->sshk.vt = &ssh_dss;
 
-	for (i = 0; i < 160; i++) {
-	    if (bitsleft <= 0)
-		bitsleft = 8, byte = random_byte();
-	    v = byte & 1;
-	    byte >>= 1;
-	    bitsleft--;
-	    bignum_set_bit(x, i, v);
-	}
-
-	if (bignum_cmp(x, One) <= 0 || bignum_cmp(x, qm1) >= 0) {
-	    freebn(x);
-	    continue;
-	} else {
-	    key->x = x;
-	    break;
-	}
-    }
-    freebn(qm1);
-
-    key->y = modpow(key->g, key->x, key->p);
+    key->p = p;
+    key->q = q;
+    key->g = g;
+    key->x = x;
+    key->y = mp_modpow(key->g, key->x, key->p);
 
     return 1;
 }
diff --git a/sshecc.c b/sshecc.c
index 02271969..4f0b9217 100644
--- a/sshecc.c
+++ b/sshecc.c
@@ -36,84 +36,55 @@
 #include <assert.h>
 
 #include "ssh.h"
+#include "mpint.h"
+#include "ecc.h"
 
 /* ----------------------------------------------------------------------
  * Elliptic curve definitions
  */
 
-static void initialise_wcurve(struct ec_curve *curve, int bits,
-                              const unsigned char *p,
-                              const unsigned char *a, const unsigned char *b,
-                              const unsigned char *n, const unsigned char *Gx,
-                              const unsigned char *Gy)
+static void initialise_common(
+    struct ec_curve *curve, EllipticCurveType type, mp_int *p)
 {
-    int length = bits / 8;
-    if (bits % 8) ++length;
-
-    curve->type = EC_WEIERSTRASS;
-
-    curve->fieldBits = bits;
-    curve->p = bignum_from_bytes(p, length);
-
-    /* Curve co-efficients */
-    curve->w.a = bignum_from_bytes(a, length);
-    curve->w.b = bignum_from_bytes(b, length);
-
-    /* Group order and generator */
-    curve->w.n = bignum_from_bytes(n, length);
-    curve->w.G.x = bignum_from_bytes(Gx, length);
-    curve->w.G.y = bignum_from_bytes(Gy, length);
-    curve->w.G.curve = curve;
-    curve->w.G.infinity = false;
+    curve->type = type;
+    curve->p = mp_copy(p);
+    curve->fieldBits = mp_get_nbits(p);
+    curve->fieldBytes = (curve->fieldBits + 7) / 8;
 }
 
-static void initialise_mcurve(struct ec_curve *curve, int bits,
-                              const unsigned char *p,
-                              const unsigned char *a, const unsigned char *b,
-                              const unsigned char *Gx)
+static void initialise_wcurve(
+    struct ec_curve *curve, mp_int *p, mp_int *a, mp_int *b,
+    mp_int *nonsquare, mp_int *G_x, mp_int *G_y, mp_int *G_order)
 {
-    int length = bits / 8;
-    if (bits % 8) ++length;
+    initialise_common(curve, EC_WEIERSTRASS, p);
 
-    curve->type = EC_MONTGOMERY;
+    curve->w.wc = ecc_weierstrass_curve(p, a, b, nonsquare);
 
-    curve->fieldBits = bits;
-    curve->p = bignum_from_bytes(p, length);
-
-    /* Curve co-efficients */
-    curve->m.a = bignum_from_bytes(a, length);
-    curve->m.b = bignum_from_bytes(b, length);
-
-    /* Generator */
-    curve->m.G.x = bignum_from_bytes(Gx, length);
-    curve->m.G.y = NULL;
-    curve->m.G.z = NULL;
-    curve->m.G.curve = curve;
-    curve->m.G.infinity = false;
+    curve->w.G = ecc_weierstrass_point_new(curve->w.wc, G_x, G_y);
+    curve->w.G_order = mp_copy(G_order);
 }
 
-static void initialise_ecurve(struct ec_curve *curve, int bits,
-                              const unsigned char *p,
-                              const unsigned char *l, const unsigned char *d,
-                              const unsigned char *Bx, const unsigned char *By)
+static void initialise_mcurve(
+    struct ec_curve *curve, mp_int *p, mp_int *a, mp_int *b,
+    mp_int *G_x)
 {
-    int length = bits / 8;
-    if (bits % 8) ++length;
+    initialise_common(curve, EC_MONTGOMERY, p);
 
-    curve->type = EC_EDWARDS;
+    curve->m.mc = ecc_montgomery_curve(p, a, b);
 
-    curve->fieldBits = bits;
-    curve->p = bignum_from_bytes(p, length);
+    curve->m.G = ecc_montgomery_point_new(curve->m.mc, G_x);
+}
 
-    /* Curve co-efficients */
-    curve->e.l = bignum_from_bytes(l, length);
-    curve->e.d = bignum_from_bytes(d, length);
+static void initialise_ecurve(
+    struct ec_curve *curve, mp_int *p, mp_int *d, mp_int *a,
+    mp_int *nonsquare, mp_int *G_x, mp_int *G_y, mp_int *G_order)
+{
+    initialise_common(curve, EC_EDWARDS, p);
 
-    /* Group order and generator */
-    curve->e.B.x = bignum_from_bytes(Bx, length);
-    curve->e.B.y = bignum_from_bytes(By, length);
-    curve->e.B.curve = curve;
-    curve->e.B.infinity = false;
+    curve->e.ec = ecc_edwards_curve(p, d, a, nonsquare);
+
+    curve->e.G = ecc_edwards_point_new(curve->e.ec, G_x, G_y);
+    curve->e.G_order = mp_copy(G_order);
 }
 
 static struct ec_curve *ec_p256(void)
@@ -123,44 +94,22 @@ static struct ec_curve *ec_p256(void)
 
     if (!initialised)
     {
-        static const unsigned char p[] = {
-            0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01,
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
-        };
-        static const unsigned char a[] = {
-            0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01,
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc
-        };
-        static const unsigned char b[] = {
-            0x5a, 0xc6, 0x35, 0xd8, 0xaa, 0x3a, 0x93, 0xe7,
-            0xb3, 0xeb, 0xbd, 0x55, 0x76, 0x98, 0x86, 0xbc,
-            0x65, 0x1d, 0x06, 0xb0, 0xcc, 0x53, 0xb0, 0xf6,
-            0x3b, 0xce, 0x3c, 0x3e, 0x27, 0xd2, 0x60, 0x4b
-        };
-        static const unsigned char n[] = {
-            0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-            0xbc, 0xe6, 0xfa, 0xad, 0xa7, 0x17, 0x9e, 0x84,
-            0xf3, 0xb9, 0xca, 0xc2, 0xfc, 0x63, 0x25, 0x51
-        };
-        static const unsigned char Gx[] = {
-            0x6b, 0x17, 0xd1, 0xf2, 0xe1, 0x2c, 0x42, 0x47,
-            0xf8, 0xbc, 0xe6, 0xe5, 0x63, 0xa4, 0x40, 0xf2,
-            0x77, 0x03, 0x7d, 0x81, 0x2d, 0xeb, 0x33, 0xa0,
-            0xf4, 0xa1, 0x39, 0x45, 0xd8, 0x98, 0xc2, 0x96
-        };
-        static const unsigned char Gy[] = {
-            0x4f, 0xe3, 0x42, 0xe2, 0xfe, 0x1a, 0x7f, 0x9b,
-            0x8e, 0xe7, 0xeb, 0x4a, 0x7c, 0x0f, 0x9e, 0x16,
-            0x2b, 0xce, 0x33, 0x57, 0x6b, 0x31, 0x5e, 0xce,
-            0xcb, 0xb6, 0x40, 0x68, 0x37, 0xbf, 0x51, 0xf5
-        };
+        mp_int *p = MP_LITERAL(0xffffffff00000001000000000000000000000000ffffffffffffffffffffffff);
+        mp_int *a = MP_LITERAL(0xffffffff00000001000000000000000000000000fffffffffffffffffffffffc);
+        mp_int *b = MP_LITERAL(0x5ac635d8aa3a93e7b3ebbd55769886bc651d06b0cc53b0f63bce3c3e27d2604b);
+        mp_int *G_x = MP_LITERAL(0x6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296);
+        mp_int *G_y = MP_LITERAL(0x4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5);
+        mp_int *G_order = MP_LITERAL(0xffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632551);
+        mp_int *nonsquare_mod_p = mp_from_integer(3);
+        initialise_wcurve(&curve, p, a, b, nonsquare_mod_p, G_x, G_y, G_order);
+        mp_free(p);
+        mp_free(a);
+        mp_free(b);
+        mp_free(G_x);
+        mp_free(G_y);
+        mp_free(G_order);
+        mp_free(nonsquare_mod_p);
 
-        initialise_wcurve(&curve, 256, p, a, b, n, Gx, Gy);
         curve.textname = curve.name = "nistp256";
 
         /* Now initialised, no need to do it again */
@@ -177,56 +126,22 @@ static struct ec_curve *ec_p384(void)
 
     if (!initialised)
     {
-        static const unsigned char p[] = {
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe,
-            0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff
-        };
-        static const unsigned char a[] = {
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe,
-            0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xfc
-        };
-        static const unsigned char b[] = {
-            0xb3, 0x31, 0x2f, 0xa7, 0xe2, 0x3e, 0xe7, 0xe4,
-            0x98, 0x8e, 0x05, 0x6b, 0xe3, 0xf8, 0x2d, 0x19,
-            0x18, 0x1d, 0x9c, 0x6e, 0xfe, 0x81, 0x41, 0x12,
-            0x03, 0x14, 0x08, 0x8f, 0x50, 0x13, 0x87, 0x5a,
-            0xc6, 0x56, 0x39, 0x8d, 0x8a, 0x2e, 0xd1, 0x9d,
-            0x2a, 0x85, 0xc8, 0xed, 0xd3, 0xec, 0x2a, 0xef
-        };
-        static const unsigned char n[] = {
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-            0xc7, 0x63, 0x4d, 0x81, 0xf4, 0x37, 0x2d, 0xdf,
-            0x58, 0x1a, 0x0d, 0xb2, 0x48, 0xb0, 0xa7, 0x7a,
-            0xec, 0xec, 0x19, 0x6a, 0xcc, 0xc5, 0x29, 0x73
-        };
-        static const unsigned char Gx[] = {
-            0xaa, 0x87, 0xca, 0x22, 0xbe, 0x8b, 0x05, 0x37,
-            0x8e, 0xb1, 0xc7, 0x1e, 0xf3, 0x20, 0xad, 0x74,
-            0x6e, 0x1d, 0x3b, 0x62, 0x8b, 0xa7, 0x9b, 0x98,
-            0x59, 0xf7, 0x41, 0xe0, 0x82, 0x54, 0x2a, 0x38,
-            0x55, 0x02, 0xf2, 0x5d, 0xbf, 0x55, 0x29, 0x6c,
-            0x3a, 0x54, 0x5e, 0x38, 0x72, 0x76, 0x0a, 0xb7
-        };
-        static const unsigned char Gy[] = {
-            0x36, 0x17, 0xde, 0x4a, 0x96, 0x26, 0x2c, 0x6f,
-            0x5d, 0x9e, 0x98, 0xbf, 0x92, 0x92, 0xdc, 0x29,
-            0xf8, 0xf4, 0x1d, 0xbd, 0x28, 0x9a, 0x14, 0x7c,
-            0xe9, 0xda, 0x31, 0x13, 0xb5, 0xf0, 0xb8, 0xc0,
-            0x0a, 0x60, 0xb1, 0xce, 0x1d, 0x7e, 0x81, 0x9d,
-            0x7a, 0x43, 0x1d, 0x7c, 0x90, 0xea, 0x0e, 0x5f
-        };
+        mp_int *p = MP_LITERAL(0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000ffffffff);
+        mp_int *a = MP_LITERAL(0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000fffffffc);
+        mp_int *b = MP_LITERAL(0xb3312fa7e23ee7e4988e056be3f82d19181d9c6efe8141120314088f5013875ac656398d8a2ed19d2a85c8edd3ec2aef);
+        mp_int *G_x = MP_LITERAL(0xaa87ca22be8b05378eb1c71ef320ad746e1d3b628ba79b9859f741e082542a385502f25dbf55296c3a545e3872760ab7);
+        mp_int *G_y = MP_LITERAL(0x3617de4a96262c6f5d9e98bf9292dc29f8f41dbd289a147ce9da3113b5f0b8c00a60b1ce1d7e819d7a431d7c90ea0e5f);
+        mp_int *G_order = MP_LITERAL(0xffffffffffffffffffffffffffffffffffffffffffffffffc7634d81f4372ddf581a0db248b0a77aecec196accc52973);
+        mp_int *nonsquare_mod_p = mp_from_integer(19);
+        initialise_wcurve(&curve, p, a, b, nonsquare_mod_p, G_x, G_y, G_order);
+        mp_free(p);
+        mp_free(a);
+        mp_free(b);
+        mp_free(G_x);
+        mp_free(G_y);
+        mp_free(G_order);
+        mp_free(nonsquare_mod_p);
 
-        initialise_wcurve(&curve, 384, p, a, b, n, Gx, Gy);
         curve.textname = curve.name = "nistp384";
 
         /* Now initialised, no need to do it again */
@@ -243,74 +158,22 @@ static struct ec_curve *ec_p521(void)
 
     if (!initialised)
     {
-        static const unsigned char p[] = {
-            0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-            0xff, 0xff
-        };
-        static const unsigned char a[] = {
-            0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-            0xff, 0xfc
-        };
-        static const unsigned char b[] = {
-            0x00, 0x51, 0x95, 0x3e, 0xb9, 0x61, 0x8e, 0x1c,
-            0x9a, 0x1f, 0x92, 0x9a, 0x21, 0xa0, 0xb6, 0x85,
-            0x40, 0xee, 0xa2, 0xda, 0x72, 0x5b, 0x99, 0xb3,
-            0x15, 0xf3, 0xb8, 0xb4, 0x89, 0x91, 0x8e, 0xf1,
-            0x09, 0xe1, 0x56, 0x19, 0x39, 0x51, 0xec, 0x7e,
-            0x93, 0x7b, 0x16, 0x52, 0xc0, 0xbd, 0x3b, 0xb1,
-            0xbf, 0x07, 0x35, 0x73, 0xdf, 0x88, 0x3d, 0x2c,
-            0x34, 0xf1, 0xef, 0x45, 0x1f, 0xd4, 0x6b, 0x50,
-            0x3f, 0x00
-        };
-        static const unsigned char n[] = {
-            0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-            0xff, 0xfa, 0x51, 0x86, 0x87, 0x83, 0xbf, 0x2f,
-            0x96, 0x6b, 0x7f, 0xcc, 0x01, 0x48, 0xf7, 0x09,
-            0xa5, 0xd0, 0x3b, 0xb5, 0xc9, 0xb8, 0x89, 0x9c,
-            0x47, 0xae, 0xbb, 0x6f, 0xb7, 0x1e, 0x91, 0x38,
-            0x64, 0x09
-        };
-        static const unsigned char Gx[] = {
-            0x00, 0xc6, 0x85, 0x8e, 0x06, 0xb7, 0x04, 0x04,
-            0xe9, 0xcd, 0x9e, 0x3e, 0xcb, 0x66, 0x23, 0x95,
-            0xb4, 0x42, 0x9c, 0x64, 0x81, 0x39, 0x05, 0x3f,
-            0xb5, 0x21, 0xf8, 0x28, 0xaf, 0x60, 0x6b, 0x4d,
-            0x3d, 0xba, 0xa1, 0x4b, 0x5e, 0x77, 0xef, 0xe7,
-            0x59, 0x28, 0xfe, 0x1d, 0xc1, 0x27, 0xa2, 0xff,
-            0xa8, 0xde, 0x33, 0x48, 0xb3, 0xc1, 0x85, 0x6a,
-            0x42, 0x9b, 0xf9, 0x7e, 0x7e, 0x31, 0xc2, 0xe5,
-            0xbd, 0x66
-        };
-        static const unsigned char Gy[] = {
-            0x01, 0x18, 0x39, 0x29, 0x6a, 0x78, 0x9a, 0x3b,
-            0xc0, 0x04, 0x5c, 0x8a, 0x5f, 0xb4, 0x2c, 0x7d,
-            0x1b, 0xd9, 0x98, 0xf5, 0x44, 0x49, 0x57, 0x9b,
-            0x44, 0x68, 0x17, 0xaf, 0xbd, 0x17, 0x27, 0x3e,
-            0x66, 0x2c, 0x97, 0xee, 0x72, 0x99, 0x5e, 0xf4,
-            0x26, 0x40, 0xc5, 0x50, 0xb9, 0x01, 0x3f, 0xad,
-            0x07, 0x61, 0x35, 0x3c, 0x70, 0x86, 0xa2, 0x72,
-            0xc2, 0x40, 0x88, 0xbe, 0x94, 0x76, 0x9f, 0xd1,
-            0x66, 0x50
-        };
+        mp_int *p = MP_LITERAL(0x01ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff);
+        mp_int *a = MP_LITERAL(0x01fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffc);
+        mp_int *b = MP_LITERAL(0x0051953eb9618e1c9a1f929a21a0b68540eea2da725b99b315f3b8b489918ef109e156193951ec7e937b1652c0bd3bb1bf073573df883d2c34f1ef451fd46b503f00);
+        mp_int *G_x = MP_LITERAL(0x00c6858e06b70404e9cd9e3ecb662395b4429c648139053fb521f828af606b4d3dbaa14b5e77efe75928fe1dc127a2ffa8de3348b3c1856a429bf97e7e31c2e5bd66);
+        mp_int *G_y = MP_LITERAL(0x011839296a789a3bc0045c8a5fb42c7d1bd998f54449579b446817afbd17273e662c97ee72995ef42640c550b9013fad0761353c7086a272c24088be94769fd16650);
+        mp_int *G_order = MP_LITERAL(0x01fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffa51868783bf2f966b7fcc0148f709a5d03bb5c9b8899c47aebb6fb71e91386409);
+        mp_int *nonsquare_mod_p = mp_from_integer(3);
+        initialise_wcurve(&curve, p, a, b, nonsquare_mod_p, G_x, G_y, G_order);
+        mp_free(p);
+        mp_free(a);
+        mp_free(b);
+        mp_free(G_x);
+        mp_free(G_y);
+        mp_free(G_order);
+        mp_free(nonsquare_mod_p);
 
-        initialise_wcurve(&curve, 521, p, a, b, n, Gx, Gy);
         curve.textname = curve.name = "nistp521";
 
         /* Now initialised, no need to do it again */
@@ -327,32 +190,16 @@ static struct ec_curve *ec_curve25519(void)
 
     if (!initialised)
     {
-        static const unsigned char p[] = {
-            0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xed
-        };
-        static const unsigned char a[] = {
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x6d, 0x06
-        };
-        static const unsigned char b[] = {
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01
-        };
-        static const unsigned char gx[32] = {
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09
-        };
+        mp_int *p = MP_LITERAL(0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffed);
+        mp_int *a = MP_LITERAL(0x0000000000000000000000000000000000000000000000000000000000076d06);
+        mp_int *b = MP_LITERAL(0x0000000000000000000000000000000000000000000000000000000000000001);
+        mp_int *G_x = MP_LITERAL(0x0000000000000000000000000000000000000000000000000000000000000009);
+        initialise_mcurve(&curve, p, a, b, G_x);
+        mp_free(p);
+        mp_free(a);
+        mp_free(b);
+        mp_free(G_x);
 
-        initialise_mcurve(&curve, 256, p, a, b, gx);
         /* This curve doesn't need a name, because it's never used in
          * any format that embeds the curve name */
         curve.name = NULL;
@@ -372,42 +219,26 @@ static struct ec_curve *ec_ed25519(void)
 
     if (!initialised)
     {
-        static const unsigned char q[] = {
-            0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xed
-        };
-        static const unsigned char l[32] = {
-            0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-            0x14, 0xde, 0xf9, 0xde, 0xa2, 0xf7, 0x9c, 0xd6,
-            0x58, 0x12, 0x63, 0x1a, 0x5c, 0xf5, 0xd3, 0xed
-        };
-        static const unsigned char d[32] = {
-            0x52, 0x03, 0x6c, 0xee, 0x2b, 0x6f, 0xfe, 0x73,
-            0x8c, 0xc7, 0x40, 0x79, 0x77, 0x79, 0xe8, 0x98,
-            0x00, 0x70, 0x0a, 0x4d, 0x41, 0x41, 0xd8, 0xab,
-            0x75, 0xeb, 0x4d, 0xca, 0x13, 0x59, 0x78, 0xa3
-        };
-        static const unsigned char Bx[32] = {
-            0x21, 0x69, 0x36, 0xd3, 0xcd, 0x6e, 0x53, 0xfe,
-            0xc0, 0xa4, 0xe2, 0x31, 0xfd, 0xd6, 0xdc, 0x5c,
-            0x69, 0x2c, 0xc7, 0x60, 0x95, 0x25, 0xa7, 0xb2,
-            0xc9, 0x56, 0x2d, 0x60, 0x8f, 0x25, 0xd5, 0x1a
-        };
-        static const unsigned char By[32] = {
-            0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66,
-            0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66,
-            0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66,
-            0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x58
-        };
+        mp_int *p = MP_LITERAL(0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffed);
+        mp_int *d = MP_LITERAL(0x52036cee2b6ffe738cc740797779e89800700a4d4141d8ab75eb4dca135978a3);
+        mp_int *a = MP_LITERAL(0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffec); /* == p-1 */
+        mp_int *G_x = MP_LITERAL(0x216936d3cd6e53fec0a4e231fdd6dc5c692cc7609525a7b2c9562d608f25d51a);
+        mp_int *G_y = MP_LITERAL(0x6666666666666666666666666666666666666666666666666666666666666658);
+        mp_int *G_order = MP_LITERAL(0x1000000000000000000000000000000014def9dea2f79cd65812631a5cf5d3ed);
+        mp_int *nonsquare_mod_p = mp_from_integer(2);
+        initialise_ecurve(&curve, p, d, a, nonsquare_mod_p, G_x, G_y, G_order);
+        mp_free(p);
+        mp_free(d);
+        mp_free(a);
+        mp_free(G_x);
+        mp_free(G_y);
+        mp_free(G_order);
+        mp_free(nonsquare_mod_p);
 
         /* This curve doesn't need a name, because it's never used in
          * any format that embeds the curve name */
         curve.name = NULL;
 
-        initialise_ecurve(&curve, 256, q, l, d, Bx, By);
         curve.textname = "Ed25519";
 
         /* Now initialised, no need to do it again */
@@ -417,1276 +248,10 @@ static struct ec_curve *ec_ed25519(void)
     return &curve;
 }
 
-/* Return 1 if a is -3 % p, otherwise return 0
- * This is used because there are some maths optimisations */
-static bool ec_aminus3(const struct ec_curve *curve)
-{
-    bool ret;
-    Bignum _p;
-
-    if (curve->type != EC_WEIERSTRASS) {
-        return false;
-    }
-
-    _p = bignum_add_long(curve->w.a, 3);
-
-    ret = !bignum_cmp(curve->p, _p);
-    freebn(_p);
-    return ret;
-}
-
-/* ----------------------------------------------------------------------
- * Elliptic curve field maths
- */
-
-static Bignum ecf_add(const Bignum a, const Bignum b,
-                      const struct ec_curve *curve)
-{
-    Bignum a1, b1, ab, ret;
-
-    a1 = bigmod(a, curve->p);
-    b1 = bigmod(b, curve->p);
-
-    ab = bigadd(a1, b1);
-    freebn(a1);
-    freebn(b1);
-
-    ret = bigmod(ab, curve->p);
-    freebn(ab);
-
-    return ret;
-}
-
-static Bignum ecf_square(const Bignum a, const struct ec_curve *curve)
-{
-    return modmul(a, a, curve->p);
-}
-
-static Bignum ecf_treble(const Bignum a, const struct ec_curve *curve)
-{
-    Bignum ret, tmp;
-
-    /* Double */
-    tmp = bignum_lshift(a, 1);
-
-    /* Add itself (i.e. treble) */
-    ret = bigadd(tmp, a);
-    freebn(tmp);
-
-    /* Normalise */
-    while (bignum_cmp(ret, curve->p) >= 0)
-    {
-        tmp = bigsub(ret, curve->p);
-        assert(tmp);
-        freebn(ret);
-        ret = tmp;
-    }
-
-    return ret;
-}
-
-static Bignum ecf_double(const Bignum a, const struct ec_curve *curve)
-{
-    Bignum ret = bignum_lshift(a, 1);
-    if (bignum_cmp(ret, curve->p) >= 0)
-    {
-        Bignum tmp = bigsub(ret, curve->p);
-        assert(tmp);
-        freebn(ret);
-        return tmp;
-    }
-    else
-    {
-        return ret;
-    }
-}
-
-/* ----------------------------------------------------------------------
- * Memory functions
- */
-
-void ec_point_free(struct ec_point *point)
-{
-    if (point == NULL) return;
-    point->curve = 0;
-    if (point->x) freebn(point->x);
-    if (point->y) freebn(point->y);
-    if (point->z) freebn(point->z);
-    point->infinity = false;
-    sfree(point);
-}
-
-static struct ec_point *ec_point_new(const struct ec_curve *curve,
-                                     const Bignum x, const Bignum y, const Bignum z,
-                                     bool infinity)
-{
-    struct ec_point *point = snewn(1, struct ec_point);
-    point->curve = curve;
-    point->x = x;
-    point->y = y;
-    point->z = z;
-    point->infinity = infinity;
-    return point;
-}
-
-static struct ec_point *ec_point_copy(const struct ec_point *a)
-{
-    if (a == NULL) return NULL;
-    return ec_point_new(a->curve,
-                        a->x ? copybn(a->x) : NULL,
-                        a->y ? copybn(a->y) : NULL,
-                        a->z ? copybn(a->z) : NULL,
-                        a->infinity);
-}
-
-static bool ec_point_verify(const struct ec_point *a)
-{
-    if (a->infinity) {
-        return true;
-    } else if (a->curve->type == EC_EDWARDS) {
-        /* Check y^2 - x^2 - 1 - d * x^2 * y^2 == 0 */
-        Bignum y2, x2, tmp, tmp2, tmp3;
-        bool ret;
-
-        y2 = ecf_square(a->y, a->curve);
-        x2 = ecf_square(a->x, a->curve);
-        tmp = modmul(a->curve->e.d, x2, a->curve->p);
-        tmp2 = modmul(tmp, y2, a->curve->p);
-        freebn(tmp);
-        tmp = modsub(y2, x2, a->curve->p);
-        freebn(y2);
-        freebn(x2);
-        tmp3 = modsub(tmp, tmp2, a->curve->p);
-        freebn(tmp);
-        freebn(tmp2);
-        ret = !bignum_cmp(tmp3, One);
-        freebn(tmp3);
-        return ret;
-    } else if (a->curve->type == EC_WEIERSTRASS) {
-        /* Verify y^2 = x^3 + ax + b */
-        bool ret = false;
-
-        Bignum lhs = NULL, x3 = NULL, ax = NULL, x3ax = NULL, x3axm = NULL, x3axb = NULL, rhs = NULL;
-
-        Bignum Three = bignum_from_long(3);
-
-        lhs = modmul(a->y, a->y, a->curve->p);
-
-        /* This uses montgomery multiplication to optimise */
-        x3 = modpow(a->x, Three, a->curve->p);
-        freebn(Three);
-        ax = modmul(a->curve->w.a, a->x, a->curve->p);
-        x3ax = bigadd(x3, ax);
-        freebn(x3); x3 = NULL;
-        freebn(ax); ax = NULL;
-        x3axm = bigmod(x3ax, a->curve->p);
-        freebn(x3ax); x3ax = NULL;
-        x3axb = bigadd(x3axm, a->curve->w.b);
-        freebn(x3axm); x3axm = NULL;
-        rhs = bigmod(x3axb, a->curve->p);
-        freebn(x3axb);
-
-        ret = !bignum_cmp(lhs, rhs);
-        freebn(lhs);
-        freebn(rhs);
-
-        return ret;
-    } else {
-        return false;
-    }
-}
-
-/* ----------------------------------------------------------------------
- * Elliptic curve point maths
- */
-
-/* Returns true on success and false on memory error */
-static bool ecp_normalise(struct ec_point *a)
-{
-    if (!a) {
-        /* No point */
-        return false;
-    }
-
-    if (a->infinity) {
-        /* Point is at infinity - i.e. normalised */
-        return true;
-    }
-
-    if (a->curve->type == EC_WEIERSTRASS) {
-        /* In Jacobian Coordinates the triple (X, Y, Z) represents
-           the affine point (X / Z^2, Y / Z^3) */
-
-        Bignum Z2, Z2inv, Z3, Z3inv, tx, ty;
-
-        if (!a->x || !a->y) {
-            /* No point defined */
-            return false;
-        } else if (!a->z) {
-            /* Already normalised */
-            return true;
-        }
-
-        Z2 = ecf_square(a->z, a->curve);
-        Z2inv = modinv(Z2, a->curve->p);
-        if (!Z2inv) {
-            freebn(Z2);
-            return false;
-        }
-        tx = modmul(a->x, Z2inv, a->curve->p);
-        freebn(Z2inv);
-
-        Z3 = modmul(Z2, a->z, a->curve->p);
-        freebn(Z2);
-        Z3inv = modinv(Z3, a->curve->p);
-        freebn(Z3);
-        if (!Z3inv) {
-            freebn(tx);
-            return false;
-        }
-        ty = modmul(a->y, Z3inv, a->curve->p);
-        freebn(Z3inv);
-
-        freebn(a->x);
-        a->x = tx;
-        freebn(a->y);
-        a->y = ty;
-        freebn(a->z);
-        a->z = NULL;
-        return true;
-    } else if (a->curve->type == EC_MONTGOMERY) {
-        /* In Montgomery (X : Z) represents the x co-ord (X / Z, ?) */
-
-        Bignum tmp, tmp2;
-
-        if (!a->x) {
-            /* No point defined */
-            return false;
-        } else if (!a->z) {
-            /* Already normalised */
-            return true;
-        }
-
-        tmp = modinv(a->z, a->curve->p);
-        if (!tmp) {
-            return false;
-        }
-        tmp2 = modmul(a->x, tmp, a->curve->p);
-        freebn(tmp);
-
-        freebn(a->z);
-        a->z = NULL;
-        freebn(a->x);
-        a->x = tmp2;
-        return true;
-    } else if (a->curve->type == EC_EDWARDS) {
-        /* Always normalised */
-        return true;
-    } else {
-        return false;
-    }
-}
-
-static struct ec_point *ecp_doublew(const struct ec_point *a, bool aminus3)
-{
-    Bignum S, M, outx, outy, outz;
-
-    if (bignum_cmp(a->y, Zero) == 0)
-    {
-        /* Identity */
-        return ec_point_new(a->curve, NULL, NULL, NULL, true);
-    }
-
-    /* S = 4*X*Y^2 */
-    {
-        Bignum Y2, XY2, _2XY2;
-
-        Y2 = ecf_square(a->y, a->curve);
-        XY2 = modmul(a->x, Y2, a->curve->p);
-        freebn(Y2);
-
-        _2XY2 = ecf_double(XY2, a->curve);
-        freebn(XY2);
-        S = ecf_double(_2XY2, a->curve);
-        freebn(_2XY2);
-    }
-
-    /* Faster calculation if a = -3 */
-    if (aminus3) {
-        /* if a = -3, then M can also be calculated as M = 3*(X + Z^2)*(X - Z^2) */
-        Bignum Z2, XpZ2, XmZ2, second;
-
-        if (a->z == NULL) {
-            Z2 = copybn(One);
-        } else {
-            Z2 = ecf_square(a->z, a->curve);
-        }
-
-        XpZ2 = ecf_add(a->x, Z2, a->curve);
-        XmZ2 = modsub(a->x, Z2, a->curve->p);
-        freebn(Z2);
-
-        second = modmul(XpZ2, XmZ2, a->curve->p);
-        freebn(XpZ2);
-        freebn(XmZ2);
-
-        M = ecf_treble(second, a->curve);
-        freebn(second);
-    } else {
-        /* M = 3*X^2 + a*Z^4 */
-        Bignum _3X2, X2, aZ4;
-
-        if (a->z == NULL) {
-            aZ4 = copybn(a->curve->w.a);
-        } else {
-            Bignum Z2, Z4;
-
-            Z2 = ecf_square(a->z, a->curve);
-            Z4 = ecf_square(Z2, a->curve);
-            freebn(Z2);
-            aZ4 = modmul(a->curve->w.a, Z4, a->curve->p);
-            freebn(Z4);
-        }
-
-        X2 = modmul(a->x, a->x, a->curve->p);
-        _3X2 = ecf_treble(X2, a->curve);
-        freebn(X2);
-        M = ecf_add(_3X2, aZ4, a->curve);
-        freebn(_3X2);
-        freebn(aZ4);
-    }
-
-    /* X' = M^2 - 2*S */
-    {
-        Bignum M2, _2S;
-
-        M2 = ecf_square(M, a->curve);
-        _2S = ecf_double(S, a->curve);
-        outx = modsub(M2, _2S, a->curve->p);
-        freebn(M2);
-        freebn(_2S);
-    }
-
-    /* Y' = M*(S - X') - 8*Y^4 */
-    {
-        Bignum SX, MSX, Eight, Y2, Y4, _8Y4;
-
-        SX = modsub(S, outx, a->curve->p);
-        freebn(S);
-        MSX = modmul(M, SX, a->curve->p);
-        freebn(SX);
-        freebn(M);
-        Y2 = ecf_square(a->y, a->curve);
-        Y4 = ecf_square(Y2, a->curve);
-        freebn(Y2);
-        Eight = bignum_from_long(8);
-        _8Y4 = modmul(Eight, Y4, a->curve->p);
-        freebn(Eight);
-        freebn(Y4);
-        outy = modsub(MSX, _8Y4, a->curve->p);
-        freebn(MSX);
-        freebn(_8Y4);
-    }
-
-    /* Z' = 2*Y*Z */
-    {
-        Bignum YZ;
-
-        if (a->z == NULL) {
-            YZ = copybn(a->y);
-        } else {
-            YZ = modmul(a->y, a->z, a->curve->p);
-        }
-
-        outz = ecf_double(YZ, a->curve);
-        freebn(YZ);
-    }
-
-    return ec_point_new(a->curve, outx, outy, outz, false);
-}
-
-static struct ec_point *ecp_doublem(const struct ec_point *a)
-{
-    Bignum z, outx, outz, xpz, xmz;
-
-    z = a->z;
-    if (!z) {
-        z = One;
-    }
-
-    /* 4xz = (x + z)^2 - (x - z)^2 */
-    {
-        Bignum tmp;
-
-        tmp = ecf_add(a->x, z, a->curve);
-        xpz = ecf_square(tmp, a->curve);
-        freebn(tmp);
-
-        tmp = modsub(a->x, z, a->curve->p);
-        xmz = ecf_square(tmp, a->curve);
-        freebn(tmp);
-    }
-
-    /* outx = (x + z)^2 * (x - z)^2 */
-    outx = modmul(xpz, xmz, a->curve->p);
-
-    /* outz = 4xz * ((x - z)^2 + ((A + 2) / 4)*4xz) */
-    {
-        Bignum _4xz, tmp, tmp2, tmp3;
-
-        tmp = bignum_from_long(2);
-        tmp2 = ecf_add(a->curve->m.a, tmp, a->curve);
-        freebn(tmp);
-
-        _4xz = modsub(xpz, xmz, a->curve->p);
-        freebn(xpz);
-        tmp = modmul(tmp2, _4xz, a->curve->p);
-        freebn(tmp2);
-
-        tmp2 = bignum_from_long(4);
-        tmp3 = modinv(tmp2, a->curve->p);
-        freebn(tmp2);
-        if (!tmp3) {
-            freebn(tmp);
-            freebn(_4xz);
-            freebn(outx);
-            freebn(xmz);
-            return NULL;
-        }
-        tmp2 = modmul(tmp, tmp3, a->curve->p);
-        freebn(tmp);
-        freebn(tmp3);
-
-        tmp = ecf_add(xmz, tmp2, a->curve);
-        freebn(xmz);
-        freebn(tmp2);
-        outz = modmul(_4xz, tmp, a->curve->p);
-        freebn(_4xz);
-        freebn(tmp);
-    }
-
-    return ec_point_new(a->curve, outx, NULL, outz, false);
-}
-
-/* Forward declaration for Edwards curve doubling */
-static struct ec_point *ecp_add(const struct ec_point *a,
-                                const struct ec_point *b,
-                                bool aminus3);
-
-static struct ec_point *ecp_double(const struct ec_point *a, bool aminus3)
-{
-    if (a->infinity)
-    {
-        /* Identity */
-        return ec_point_new(a->curve, NULL, NULL, NULL, true);
-    }
-
-    if (a->curve->type == EC_EDWARDS)
-    {
-        return ecp_add(a, a, aminus3);
-    }
-    else if (a->curve->type == EC_WEIERSTRASS)
-    {
-        return ecp_doublew(a, aminus3);
-    }
-    else
-    {
-        return ecp_doublem(a);
-    }
-}
-
-static struct ec_point *ecp_addw(const struct ec_point *a,
-                                 const struct ec_point *b,
-                                 bool aminus3)
-{
-    Bignum U1, U2, S1, S2, outx, outy, outz;
-
-    /* U1 = X1*Z2^2 */
-    /* S1 = Y1*Z2^3 */
-    if (b->z) {
-        Bignum Z2, Z3;
-
-        Z2 = ecf_square(b->z, a->curve);
-        U1 = modmul(a->x, Z2, a->curve->p);
-        Z3 = modmul(Z2, b->z, a->curve->p);
-        freebn(Z2);
-        S1 = modmul(a->y, Z3, a->curve->p);
-        freebn(Z3);
-    } else {
-        U1 = copybn(a->x);
-        S1 = copybn(a->y);
-    }
-
-    /* U2 = X2*Z1^2 */
-    /* S2 = Y2*Z1^3 */
-    if (a->z) {
-        Bignum Z2, Z3;
-
-        Z2 = ecf_square(a->z, b->curve);
-        U2 = modmul(b->x, Z2, b->curve->p);
-        Z3 = modmul(Z2, a->z, b->curve->p);
-        freebn(Z2);
-        S2 = modmul(b->y, Z3, b->curve->p);
-        freebn(Z3);
-    } else {
-        U2 = copybn(b->x);
-        S2 = copybn(b->y);
-    }
-
-    /* Check if multiplying by self */
-    if (bignum_cmp(U1, U2) == 0)
-    {
-        freebn(U1);
-        freebn(U2);
-        if (bignum_cmp(S1, S2) == 0)
-        {
-            freebn(S1);
-            freebn(S2);
-            return ecp_double(a, aminus3);
-        }
-        else
-        {
-            freebn(S1);
-            freebn(S2);
-            /* Infinity */
-            return ec_point_new(a->curve, NULL, NULL, NULL, true);
-        }
-    }
-
-    {
-        Bignum H, R, UH2, H3;
-
-        /* H = U2 - U1 */
-        H = modsub(U2, U1, a->curve->p);
-        freebn(U2);
-
-        /* R = S2 - S1 */
-        R = modsub(S2, S1, a->curve->p);
-        freebn(S2);
-
-        /* X3 = R^2 - H^3 - 2*U1*H^2 */
-        {
-            Bignum R2, H2, _2UH2, first;
-
-            H2 = ecf_square(H, a->curve);
-            UH2 = modmul(U1, H2, a->curve->p);
-            freebn(U1);
-            H3 = modmul(H2, H, a->curve->p);
-            freebn(H2);
-            R2 = ecf_square(R, a->curve);
-            _2UH2 = ecf_double(UH2, a->curve);
-            first = modsub(R2, H3, a->curve->p);
-            freebn(R2);
-            outx = modsub(first, _2UH2, a->curve->p);
-            freebn(first);
-            freebn(_2UH2);
-        }
-
-        /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
-        {
-            Bignum RUH2mX, UH2mX, SH3;
-
-            UH2mX = modsub(UH2, outx, a->curve->p);
-            freebn(UH2);
-            RUH2mX = modmul(R, UH2mX, a->curve->p);
-            freebn(UH2mX);
-            freebn(R);
-            SH3 = modmul(S1, H3, a->curve->p);
-            freebn(S1);
-            freebn(H3);
-
-            outy = modsub(RUH2mX, SH3, a->curve->p);
-            freebn(RUH2mX);
-            freebn(SH3);
-        }
-
-        /* Z3 = H*Z1*Z2 */
-        if (a->z && b->z) {
-            Bignum ZZ;
-
-            ZZ = modmul(a->z, b->z, a->curve->p);
-            outz = modmul(H, ZZ, a->curve->p);
-            freebn(H);
-            freebn(ZZ);
-        } else if (a->z) {
-            outz = modmul(H, a->z, a->curve->p);
-            freebn(H);
-        } else if (b->z) {
-            outz = modmul(H, b->z, a->curve->p);
-            freebn(H);
-        } else {
-            outz = H;
-        }
-    }
-
-    return ec_point_new(a->curve, outx, outy, outz, false);
-}
-
-static struct ec_point *ecp_addm(const struct ec_point *a,
-                                 const struct ec_point *b,
-                                 const struct ec_point *base)
-{
-    Bignum outx, outz, az, bz;
-
-    az = a->z;
-    if (!az) {
-        az = One;
-    }
-    bz = b->z;
-    if (!bz) {
-        bz = One;
-    }
-
-    /* a-b is maintained at 1 due to Montgomery ladder implementation */
-    /* Xa+b = Za-b * ((Xa - Za)*(Xb + Zb) + (Xa + Za)*(Xb - Zb))^2 */
-    /* Za+b = Xa-b * ((Xa - Za)*(Xb + Zb) - (Xa + Za)*(Xb - Zb))^2 */
-    {
-        Bignum tmp, tmp2, tmp3, tmp4;
-
-        /* (Xa + Za) * (Xb - Zb) */
-        tmp = ecf_add(a->x, az, a->curve);
-        tmp2 = modsub(b->x, bz, a->curve->p);
-        tmp3 = modmul(tmp, tmp2, a->curve->p);
-        freebn(tmp);
-        freebn(tmp2);
-
-        /* (Xa - Za) * (Xb + Zb) */
-        tmp = modsub(a->x, az, a->curve->p);
-        tmp2 = ecf_add(b->x, bz, a->curve);
-        tmp4 = modmul(tmp, tmp2, a->curve->p);
-        freebn(tmp);
-        freebn(tmp2);
-
-        tmp = ecf_add(tmp3, tmp4, a->curve);
-        outx = ecf_square(tmp, a->curve);
-        freebn(tmp);
-
-        tmp = modsub(tmp3, tmp4, a->curve->p);
-        freebn(tmp3);
-        freebn(tmp4);
-        tmp2 = ecf_square(tmp, a->curve);
-        freebn(tmp);
-        outz = modmul(base->x, tmp2, a->curve->p);
-        freebn(tmp2);
-    }
-
-    return ec_point_new(a->curve, outx, NULL, outz, false);
-}
-
-static struct ec_point *ecp_adde(const struct ec_point *a,
-                                 const struct ec_point *b)
-{
-    Bignum outx, outy, dmul;
-
-    /* outx = (a->x * b->y + b->x * a->y) /
-     *        (1 + a->curve->e.d * a->x * b->x * a->y * b->y) */
-    {
-        Bignum tmp, tmp2, tmp3, tmp4;
-
-        tmp = modmul(a->x, b->y, a->curve->p);
-        tmp2 = modmul(b->x, a->y, a->curve->p);
-        tmp3 = ecf_add(tmp, tmp2, a->curve);
-
-        tmp4 = modmul(tmp, tmp2, a->curve->p);
-        freebn(tmp);
-        freebn(tmp2);
-        dmul = modmul(a->curve->e.d, tmp4, a->curve->p);
-        freebn(tmp4);
-
-        tmp = ecf_add(One, dmul, a->curve);
-        tmp2 = modinv(tmp, a->curve->p);
-        freebn(tmp);
-        if (!tmp2)
-        {
-            freebn(tmp3);
-            freebn(dmul);
-            return NULL;
-        }
-
-        outx = modmul(tmp3, tmp2, a->curve->p);
-        freebn(tmp3);
-        freebn(tmp2);
-    }
-
-    /* outy = (a->y * b->y + a->x * b->x) /
-     *        (1 - a->curve->e.d * a->x * b->x * a->y * b->y) */
-    {
-        Bignum tmp, tmp2, tmp3, tmp4;
-
-        tmp = modsub(One, dmul, a->curve->p);
-        freebn(dmul);
-
-        tmp2 = modinv(tmp, a->curve->p);
-        freebn(tmp);
-        if (!tmp2)
-        {
-            freebn(outx);
-            return NULL;
-        }
-
-        tmp = modmul(a->y, b->y, a->curve->p);
-        tmp3 = modmul(a->x, b->x, a->curve->p);
-        tmp4 = ecf_add(tmp, tmp3, a->curve);
-        freebn(tmp);
-        freebn(tmp3);
-
-        outy = modmul(tmp4, tmp2, a->curve->p);
-        freebn(tmp4);
-        freebn(tmp2);
-    }
-
-    return ec_point_new(a->curve, outx, outy, NULL, false);
-}
-
-static struct ec_point *ecp_add(const struct ec_point *a,
-                                const struct ec_point *b,
-                                bool aminus3)
-{
-    if (a->curve != b->curve) {
-        return NULL;
-    }
-
-    /* Check if multiplying by infinity */
-    if (a->infinity) return ec_point_copy(b);
-    if (b->infinity) return ec_point_copy(a);
-
-    if (a->curve->type == EC_EDWARDS)
-    {
-        return ecp_adde(a, b);
-    }
-
-    if (a->curve->type == EC_WEIERSTRASS)
-    {
-        return ecp_addw(a, b, aminus3);
-    }
-
-    return NULL;
-}
-
-static struct ec_point *ecp_mul_(
-    const struct ec_point *a, const Bignum b, bool aminus3)
-{
-    struct ec_point *A, *ret;
-    int bits, i;
-
-    A = ec_point_copy(a);
-    ret = ec_point_new(a->curve, NULL, NULL, NULL, true);
-
-    bits = bignum_bitcount(b);
-    for (i = 0; i < bits; ++i)
-    {
-        if (bignum_bit(b, i))
-        {
-            struct ec_point *tmp = ecp_add(ret, A, aminus3);
-            ec_point_free(ret);
-            ret = tmp;
-        }
-        if (i+1 != bits)
-        {
-            struct ec_point *tmp = ecp_double(A, aminus3);
-            ec_point_free(A);
-            A = tmp;
-        }
-    }
-
-    ec_point_free(A);
-    return ret;
-}
-
-static struct ec_point *ecp_mulw(const struct ec_point *a, const Bignum b)
-{
-    struct ec_point *ret = ecp_mul_(a, b, ec_aminus3(a->curve));
-
-    if (!ecp_normalise(ret)) {
-        ec_point_free(ret);
-        return NULL;
-    }
-
-    return ret;
-}
-
-static struct ec_point *ecp_mule(const struct ec_point *a, const Bignum b)
-{
-    int i;
-    struct ec_point *ret;
-
-    ret = ec_point_new(a->curve, NULL, NULL, NULL, true);
-
-    for (i = bignum_bitcount(b); i >= 0 && ret; --i)
-    {
-        {
-            struct ec_point *tmp = ecp_double(ret, false);
-            ec_point_free(ret);
-            ret = tmp;
-        }
-        if (ret && bignum_bit(b, i))
-        {
-            struct ec_point *tmp = ecp_add(ret, a, false);
-            ec_point_free(ret);
-            ret = tmp;
-        }
-    }
-
-    return ret;
-}
-
-static struct ec_point *ecp_mulm(const struct ec_point *p, const Bignum n)
-{
-    struct ec_point *P1, *P2;
-    int bits, i;
-
-    /* P1 <- P and P2 <- [2]P */
-    P2 = ecp_double(p, false);
-    P1 = ec_point_copy(p);
-
-    /* for i = bits − 2 down to 0 */
-    bits = bignum_bitcount(n);
-    for (i = bits - 2; i >= 0; --i)
-    {
-        if (!bignum_bit(n, i))
-        {
-            /* P2 <- P1 + P2 */
-            struct ec_point *tmp = ecp_addm(P1, P2, p);
-            ec_point_free(P2);
-            P2 = tmp;
-
-            /* P1 <- [2]P1 */
-            tmp = ecp_double(P1, false);
-            ec_point_free(P1);
-            P1 = tmp;
-        }
-        else
-        {
-            /* P1 <- P1 + P2 */
-            struct ec_point *tmp = ecp_addm(P1, P2, p);
-            ec_point_free(P1);
-            P1 = tmp;
-
-            /* P2 <- [2]P2 */
-            tmp = ecp_double(P2, false);
-            ec_point_free(P2);
-            P2 = tmp;
-        }
-    }
-
-    ec_point_free(P2);
-
-    if (!ecp_normalise(P1)) {
-        ec_point_free(P1);
-        return NULL;
-    }
-
-    return P1;
-}
-
-/* Not static because it is used by sshecdsag.c to generate a new key */
-struct ec_point *ecp_mul(const struct ec_point *a, const Bignum b)
-{
-    if (a->curve->type == EC_WEIERSTRASS) {
-        return ecp_mulw(a, b);
-    } else if (a->curve->type == EC_EDWARDS) {
-        return ecp_mule(a, b);
-    } else {
-        return ecp_mulm(a, b);
-    }
-}
-
-static struct ec_point *ecp_summul(const Bignum a, const Bignum b,
-                                   const struct ec_point *point)
-{
-    struct ec_point *aG, *bP, *ret;
-    bool aminus3;
-
-    if (point->curve->type != EC_WEIERSTRASS) {
-        return NULL;
-    }
-
-    aminus3 = ec_aminus3(point->curve);
-
-    aG = ecp_mul_(&point->curve->w.G, a, aminus3);
-    if (!aG) return NULL;
-    bP = ecp_mul_(point, b, aminus3);
-    if (!bP) {
-        ec_point_free(aG);
-        return NULL;
-    }
-
-    ret = ecp_add(aG, bP, aminus3);
-
-    ec_point_free(aG);
-    ec_point_free(bP);
-
-    if (!ecp_normalise(ret)) {
-        ec_point_free(ret);
-        return NULL;
-    }
-
-    return ret;
-}
-static Bignum *ecp_edx(const struct ec_curve *curve, const Bignum y)
-{
-    /* Get the x value on the given Edwards curve for a given y */
-    Bignum x, xx;
-
-    /* xx = (y^2 - 1) / (d * y^2 + 1) */
-    {
-        Bignum tmp, tmp2, tmp3;
-
-        tmp = ecf_square(y, curve);
-        tmp2 = modmul(curve->e.d, tmp, curve->p);
-        tmp3 = ecf_add(tmp2, One, curve);
-        freebn(tmp2);
-        tmp2 = modinv(tmp3, curve->p);
-        freebn(tmp3);
-        if (!tmp2) {
-            freebn(tmp);
-            return NULL;
-        }
-
-        tmp3 = modsub(tmp, One, curve->p);
-        freebn(tmp);
-        xx = modmul(tmp3, tmp2, curve->p);
-        freebn(tmp3);
-        freebn(tmp2);
-    }
-
-    /* x = xx^((p + 3) / 8) */
-    {
-        Bignum tmp, tmp2;
-
-        tmp = bignum_add_long(curve->p, 3);
-        tmp2 = bignum_rshift(tmp, 3);
-        freebn(tmp);
-        x = modpow(xx, tmp2, curve->p);
-        freebn(tmp2);
-    }
-
-    /* if x^2 - xx != 0 then x = x*(2^((p - 1) / 4)) */
-    {
-        Bignum tmp, tmp2;
-
-        tmp = ecf_square(x, curve);
-        tmp2 = modsub(tmp, xx, curve->p);
-        freebn(tmp);
-        freebn(xx);
-        if (bignum_cmp(tmp2, Zero)) {
-            Bignum tmp3;
-
-            freebn(tmp2);
-
-            tmp = modsub(curve->p, One, curve->p);
-            tmp2 = bignum_rshift(tmp, 2);
-            freebn(tmp);
-            tmp = bignum_from_long(2);
-            tmp3 = modpow(tmp, tmp2, curve->p);
-            freebn(tmp);
-            freebn(tmp2);
-
-            tmp = modmul(x, tmp3, curve->p);
-            freebn(x);
-            freebn(tmp3);
-            x = tmp;
-        } else {
-            freebn(tmp2);
-        }
-    }
-
-    /* if x % 2 != 0 then x = p - x */
-    if (bignum_bit(x, 0)) {
-        Bignum tmp = modsub(curve->p, x, curve->p);
-        freebn(x);
-        x = tmp;
-    }
-
-    return x;
-}
-
 /* ----------------------------------------------------------------------
  * Public point from private
  */
 
-struct ec_point *ec_public(const Bignum privateKey, const struct ec_curve *curve)
-{
-    if (curve->type == EC_WEIERSTRASS) {
-        return ecp_mul(&curve->w.G, privateKey);
-    } else if (curve->type == EC_EDWARDS) {
-        /* hash = H(sk) (where hash creates 2 * fieldBits)
-         * b = fieldBits
-         * a = 2^(b-2) + SUM(2^i * h_i) for i = 2 -> b-2
-         * publicKey = aB */
-        struct ec_point *ret;
-        unsigned char hash[512/8];
-        Bignum a;
-        int i, keylen;
-        SHA512_State s;
-        SHA512_Init(&s);
-
-        keylen = curve->fieldBits / 8;
-        for (i = 0; i < keylen; ++i)
-            put_byte(&s, bignum_byte(privateKey, i));
-        SHA512_Final(&s, hash);
-
-        /* The second part is simply turning the hash into a Bignum,
-         * however the 2^(b-2) bit *must* be set, and the bottom 3
-         * bits *must* not be */
-        hash[0] &= 0xf8; /* Unset bottom 3 bits (if set) */
-        hash[31] &= 0x7f; /* Unset above (b-2) */
-        hash[31] |= 0x40; /* Set 2^(b-2) */
-        /* Chop off the top part and convert to int */
-        a = bignum_from_bytes_le(hash, 32);
-
-        ret = ecp_mul(&curve->e.B, a);
-        freebn(a);
-        return ret;
-    } else {
-        return NULL;
-    }
-}
-
-/* ----------------------------------------------------------------------
- * Basic sign and verify routines
- */
-
-static bool _ecdsa_verify(const struct ec_point *publicKey,
-                          const unsigned char *data, const int dataLen,
-                          const Bignum r, const Bignum s)
-{
-    int z_bits, n_bits;
-    Bignum z;
-    bool valid = false;
-
-    if (publicKey->curve->type != EC_WEIERSTRASS) {
-        return false;
-    }
-
-    /* Sanity checks */
-    if (bignum_cmp(r, Zero) == 0 || bignum_cmp(r, publicKey->curve->w.n) >= 0
-        || bignum_cmp(s, Zero) == 0 || bignum_cmp(s, publicKey->curve->w.n) >= 0)
-    {
-        return false;
-    }
-
-    /* z = left most bitlen(curve->n) of data */
-    z = bignum_from_bytes(data, dataLen);
-    n_bits = bignum_bitcount(publicKey->curve->w.n);
-    z_bits = bignum_bitcount(z);
-    if (z_bits > n_bits)
-    {
-        Bignum tmp = bignum_rshift(z, z_bits - n_bits);
-        freebn(z);
-        z = tmp;
-    }
-
-    /* Ensure z in range of n */
-    {
-        Bignum tmp = bigmod(z, publicKey->curve->w.n);
-        freebn(z);
-        z = tmp;
-    }
-
-    /* Calculate signature */
-    {
-        Bignum w, x, u1, u2;
-        struct ec_point *tmp;
-
-        w = modinv(s, publicKey->curve->w.n);
-        if (!w) {
-            freebn(z);
-            return false;
-        }
-        u1 = modmul(z, w, publicKey->curve->w.n);
-        u2 = modmul(r, w, publicKey->curve->w.n);
-        freebn(w);
-
-        tmp = ecp_summul(u1, u2, publicKey);
-        freebn(u1);
-        freebn(u2);
-        if (!tmp) {
-            freebn(z);
-            return false;
-        }
-
-        x = bigmod(tmp->x, publicKey->curve->w.n);
-        ec_point_free(tmp);
-
-        valid = (bignum_cmp(r, x) == 0);
-        freebn(x);
-    }
-
-    freebn(z);
-
-    return valid;
-}
-
-static void _ecdsa_sign(const Bignum privateKey, const struct ec_curve *curve,
-                        const unsigned char *data, const int dataLen,
-                        Bignum *r, Bignum *s)
-{
-    unsigned char digest[20];
-    int z_bits, n_bits;
-    Bignum z, k;
-    struct ec_point *kG;
-
-    *r = NULL;
-    *s = NULL;
-
-    if (curve->type != EC_WEIERSTRASS) {
-        return;
-    }
-
-    /* z = left most bitlen(curve->n) of data */
-    z = bignum_from_bytes(data, dataLen);
-    n_bits = bignum_bitcount(curve->w.n);
-    z_bits = bignum_bitcount(z);
-    if (z_bits > n_bits)
-    {
-        Bignum tmp;
-        tmp = bignum_rshift(z, z_bits - n_bits);
-        freebn(z);
-        z = tmp;
-    }
-
-    /* Generate k between 1 and curve->n, using the same deterministic
-     * k generation system we use for conventional DSA. */
-    SHA_Simple(data, dataLen, digest);
-    k = dss_gen_k("ECDSA deterministic k generator", curve->w.n, privateKey,
-                  digest, sizeof(digest));
-
-    kG = ecp_mul(&curve->w.G, k);
-    if (!kG) {
-        freebn(z);
-        freebn(k);
-        return;
-    }
-
-    /* r = kG.x mod n */
-    *r = bigmod(kG->x, curve->w.n);
-    ec_point_free(kG);
-
-    /* s = (z + r * priv)/k mod n */
-    {
-        Bignum rPriv, zMod, first, firstMod, kInv;
-        rPriv = modmul(*r, privateKey, curve->w.n);
-        zMod = bigmod(z, curve->w.n);
-        freebn(z);
-        first = bigadd(rPriv, zMod);
-        freebn(rPriv);
-        freebn(zMod);
-        firstMod = bigmod(first, curve->w.n);
-        freebn(first);
-        kInv = modinv(k, curve->w.n);
-        freebn(k);
-        if (!kInv) {
-            freebn(firstMod);
-            freebn(*r);
-            return;
-        }
-        *s = modmul(firstMod, kInv, curve->w.n);
-        freebn(firstMod);
-        freebn(kInv);
-    }
-}
-
-/* ----------------------------------------------------------------------
- * Misc functions
- */
-
-static Bignum BinarySource_get_mp_le(BinarySource *src)
-{
-    ptrlen mp_str = get_string(src);
-    return bignum_from_bytes_le(mp_str.ptr, mp_str.len);
-}
-#define get_mp_le(src) BinarySource_get_mp_le(BinarySource_UPCAST(src))
-
-static bool decodepoint_ed(const char *p, int length, struct ec_point *point)
-{
-    /* Got some conversion to do, first read in the y co-ord */
-    bool negative;
-
-    point->y = bignum_from_bytes_le((const unsigned char*)p, length);
-    if ((unsigned)bignum_bitcount(point->y) > point->curve->fieldBits) {
-        freebn(point->y);
-        point->y = NULL;
-        return false;
-    }
-    /* Read x bit and then reset it */
-    negative = bignum_bit(point->y, point->curve->fieldBits - 1);
-    bignum_set_bit(point->y, point->curve->fieldBits - 1, 0);
-    bn_restore_invariant(point->y);
-
-    /* Get the x from the y */
-    point->x = ecp_edx(point->curve, point->y);
-    if (!point->x) {
-        freebn(point->y);
-        point->y = NULL;
-        return false;
-    }
-    if (negative) {
-        Bignum tmp = modsub(point->curve->p, point->x, point->curve->p);
-        freebn(point->x);
-        point->x = tmp;
-    }
-
-    /* Verify the point is on the curve */
-    if (!ec_point_verify(point)) {
-        freebn(point->x);
-        point->x = NULL;
-        freebn(point->y);
-        point->y = NULL;
-        return false;
-    }
-
-    return true;
-}
-
-static bool decodepoint(const char *p, int length, struct ec_point *point)
-{
-    if (point->curve->type == EC_EDWARDS) {
-        return decodepoint_ed(p, length, point);
-    }
-
-    if (length < 1 || p[0] != 0x04) /* Only support uncompressed point */
-        return false;
-    /* Skip compression flag */
-    ++p;
-    --length;
-    /* The two values must be equal length */
-    if (length % 2 != 0) {
-        point->x = NULL;
-        point->y = NULL;
-        point->z = NULL;
-        return false;
-    }
-    length = length / 2;
-    point->x = bignum_from_bytes(p, length);
-    p += length;
-    point->y = bignum_from_bytes(p, length);
-    point->z = NULL;
-
-    /* Verify the point is on the curve */
-    if (!ec_point_verify(point)) {
-        freebn(point->x);
-        point->x = NULL;
-        freebn(point->y);
-        point->y = NULL;
-        return false;
-    }
-
-    return true;
-}
-
-static bool BinarySource_get_point(BinarySource *src, struct ec_point *point)
-{
-    ptrlen str = get_string(src);
-    if (get_err(src)) return false;
-    return decodepoint(str.ptr, str.len, point);
-}
-#define get_point(src, pt) BinarySource_get_point(BinarySource_UPCAST(src), pt)
-
-/* ----------------------------------------------------------------------
- * Exposed ECDSA interface
- */
-
 struct ecsign_extra {
     struct ec_curve *(*curve)(void);
     const struct ssh_hashalg *hash;
@@ -1696,719 +261,880 @@ struct ecsign_extra {
     int oidlen;
 };
 
-static void ecdsa_freekey(ssh_key *key)
-{
-    struct ec_key *ec;
-
-    if (!key) return;
-    ec = container_of(key, struct ec_key, sshk);
-
-    if (ec->publicKey.x)
-        freebn(ec->publicKey.x);
-    if (ec->publicKey.y)
-        freebn(ec->publicKey.y);
-    if (ec->publicKey.z)
-        freebn(ec->publicKey.z);
-    if (ec->privateKey)
-        freebn(ec->privateKey);
-    sfree(ec);
-}
-
-static ssh_key *ecdsa_new_pub(const ssh_keyalg *self, ptrlen data)
+WeierstrassPoint *ecdsa_public(mp_int *private_key, const ssh_keyalg *alg)
 {
     const struct ecsign_extra *extra =
-        (const struct ecsign_extra *)self->extra;
+        (const struct ecsign_extra *)alg->extra;
+    struct ec_curve *curve = extra->curve();
+    assert(curve->type == EC_WEIERSTRASS);
+
+    mp_int *priv_reduced = mp_mod(private_key, curve->p);
+    WeierstrassPoint *toret = ecc_weierstrass_multiply(
+        curve->w.G, priv_reduced);
+    mp_free(priv_reduced);
+    return toret;
+}
+
+static mp_int *eddsa_exponent_from_hash(
+    ptrlen hash, const struct ec_curve *curve)
+{
+    /*
+     * Make an integer out of the hash data, little-endian.
+     */
+    assert(hash.len >= curve->fieldBytes);
+    mp_int *e = mp_from_bytes_le(make_ptrlen(hash.ptr, curve->fieldBytes));
+
+    /*
+     * Set the highest bit that fits in the modulus, and clear any
+     * above that.
+     */
+    mp_set_bit(e, curve->fieldBits - 1, 1);
+    mp_reduce_mod_2to(e, curve->fieldBits);
+
+    /*
+     * Clear exactly three low bits.
+     */
+    for (size_t bit = 0; bit < 3; bit++)
+        mp_set_bit(e, bit, 0);
+
+    return e;
+}
+
+EdwardsPoint *eddsa_public(mp_int *private_key, const ssh_keyalg *alg)
+{
+    const struct ecsign_extra *extra =
+        (const struct ecsign_extra *)alg->extra;
+    struct ec_curve *curve = extra->curve();
+    assert(curve->type == EC_EDWARDS);
+
+    ssh_hash *h = ssh_hash_new(extra->hash);
+    for (size_t i = 0; i < curve->fieldBytes; ++i)
+        put_byte(h, mp_get_byte(private_key, i));
+
+    unsigned char hash[extra->hash->hlen];
+    ssh_hash_final(h, hash);
+
+    mp_int *exponent = eddsa_exponent_from_hash(
+        make_ptrlen(hash, extra->hash->hlen), curve);
+
+    EdwardsPoint *toret = ecc_edwards_multiply(curve->e.G, exponent);
+    mp_free(exponent);
+
+    return toret;
+}
+
+/* ----------------------------------------------------------------------
+ * Marshalling and unmarshalling functions
+ */
+
+static mp_int *BinarySource_get_mp_le(BinarySource *src)
+{
+    return mp_from_bytes_le(get_string(src));
+}
+#define get_mp_le(src) BinarySource_get_mp_le(BinarySource_UPCAST(src))
+
+static void BinarySink_put_mp_le_unsigned(BinarySink *bs, mp_int *x)
+{
+    size_t bytes = (mp_get_nbits(x) + 7) / 8;
+
+    put_uint32(bs, bytes);
+    for (size_t i = 0; i < bytes; ++i)
+        put_byte(bs, mp_get_byte(x, i));
+}
+#define put_mp_le_unsigned(bs, x) \
+    BinarySink_put_mp_le_unsigned(BinarySink_UPCAST(bs), x)
+
+static WeierstrassPoint *ecdsa_decode(
+    ptrlen encoded, const struct ec_curve *curve)
+{
+    assert(curve->type == EC_WEIERSTRASS);
     BinarySource src[1];
-    struct ec_key *ec;
-    struct ec_curve *curve;
 
-    BinarySource_BARE_INIT(src, data.ptr, data.len);
-    get_string(src);
+    BinarySource_BARE_INIT(src, encoded.ptr, encoded.len);
+    unsigned char format_type = get_byte(src);
 
-    curve = extra->curve();
-    assert(curve->type == EC_WEIERSTRASS || curve->type == EC_EDWARDS);
+    WeierstrassPoint *P;
 
-    /* Curve name is duplicated for Weierstrass form */
-    if (curve->type == EC_WEIERSTRASS) {
-        if (!ptrlen_eq_string(get_string(src), curve->name))
+    size_t len = get_avail(src);
+    mp_int *x;
+    mp_int *y;
+
+    switch (format_type) {
+      case 0:
+        /* The identity. */
+        P = ecc_weierstrass_point_new_identity(curve->w.wc);
+        break;
+      case 2:
+      case 3:
+        /* A compressed point, in which the x-coordinate is stored in
+         * full, and y is deduced from that and a single bit
+         * indicating its parity (stored in the format type byte). */
+        x = mp_from_bytes_be(get_data(src, len));
+        P = ecc_weierstrass_point_new_from_x(curve->w.wc, x, format_type & 1);
+        mp_free(x);
+        if (!P)            /* this can fail if the input is invalid */
             return NULL;
-    }
-
-    ec = snew(struct ec_key);
-    ec->sshk.vt = self;
-
-    ec->publicKey.curve = curve;
-    ec->publicKey.infinity = false;
-    ec->publicKey.x = NULL;
-    ec->publicKey.y = NULL;
-    ec->publicKey.z = NULL;
-    ec->privateKey = NULL;
-    if (!get_point(src, &ec->publicKey)) {
-        ecdsa_freekey(&ec->sshk);
+        break;
+      case 4:
+        /* An uncompressed point: the x,y coordinates are stored in
+         * full. We expect the rest of the string to have even length,
+         * and be divided half and half between the two values. */
+        if (len % 2 != 0)
+            return NULL;
+        len /= 2;
+        x = mp_from_bytes_be(get_data(src, len));
+        y = mp_from_bytes_be(get_data(src, len));
+        P = ecc_weierstrass_point_new(curve->w.wc, x, y);
+        mp_free(x);
+        mp_free(y);
+        break;
+      default:
+        /* An unrecognised type byte. */
         return NULL;
     }
 
-    if (!ec->publicKey.x || !ec->publicKey.y ||
-        bignum_cmp(ec->publicKey.x, curve->p) >= 0 ||
-        bignum_cmp(ec->publicKey.y, curve->p) >= 0)
-    {
-        ecdsa_freekey(&ec->sshk);
-        ec = NULL;
+    /* Verify the point is on the curve */
+    if (!ecc_weierstrass_point_valid(P)) {
+        ecc_weierstrass_point_free(P);
+        return NULL;
     }
 
-    return &ec->sshk;
+    return P;
+}
+
+static WeierstrassPoint *BinarySource_get_wpoint(
+    BinarySource *src, const struct ec_curve *curve)
+{
+    ptrlen str = get_string(src);
+    if (get_err(src))
+        return NULL;
+    return ecdsa_decode(str, curve);
+}
+#define get_wpoint(src, curve) \
+    BinarySource_get_wpoint(BinarySource_UPCAST(src), curve)
+
+static void BinarySink_put_wpoint(
+    BinarySink *bs, WeierstrassPoint *point, const struct ec_curve *curve,
+    bool bare)
+{
+    strbuf *sb;
+    BinarySink *bs_inner;
+
+    if (!bare) {
+        /*
+         * Encapsulate the raw data inside an outermost string layer.
+         */
+        sb = strbuf_new();
+        bs_inner = BinarySink_UPCAST(sb);
+    } else {
+        /*
+         * Just write the data directly to the output.
+         */
+        bs_inner = bs;
+    }
+
+    if (ecc_weierstrass_is_identity(point)) {
+        put_byte(bs_inner, 0);
+    } else {
+        mp_int *x, *y;
+        ecc_weierstrass_get_affine(point, &x, &y);
+
+        /*
+         * For ECDSA, we only ever output uncompressed points.
+         */
+        put_byte(bs_inner, 0x04);
+        for (size_t i = curve->fieldBytes; i--;)
+            put_byte(bs_inner, mp_get_byte(x, i));
+        for (size_t i = curve->fieldBytes; i--;)
+            put_byte(bs_inner, mp_get_byte(y, i));
+
+        mp_free(x);
+        mp_free(y);
+    }
+
+    if (!bare)
+        put_stringsb(bs, sb);
+}
+#define put_wpoint(bs, point, curve, bare)                              \
+    BinarySink_put_wpoint(BinarySink_UPCAST(bs), point, curve, bare)
+
+static EdwardsPoint *eddsa_decode(ptrlen encoded, const struct ec_curve *curve)
+{
+    assert(curve->type == EC_EDWARDS);
+    assert(curve->fieldBits % 8 == 7);
+
+    mp_int *y = mp_from_bytes_le(encoded);
+
+    if (mp_get_nbits(y) > curve->fieldBits+1) {
+        mp_free(y);
+        return NULL;
+    }
+
+    /* The topmost bit of the encoding isn't part of y, so it stores
+     * the bottom bit of x. Extract it, and zero that bit in y. */
+    unsigned desired_x_parity = mp_get_bit(y, curve->fieldBits);
+    mp_set_bit(y, curve->fieldBits, 0);
+
+    EdwardsPoint *P = ecc_edwards_point_new_from_y(
+        curve->e.ec, y, desired_x_parity);
+    mp_free(y);
+
+    /* A point constructed in this way will always satisfy the curve
+     * equation, unless ecc.c wasn't able to construct one at all, in
+     * which case P is now NULL. Either way, return it. */
+    return P;
+}
+
+static EdwardsPoint *BinarySource_get_epoint(
+    BinarySource *src, const struct ec_curve *curve)
+{
+    ptrlen str = get_string(src);
+    if (get_err(src))
+        return NULL;
+    return eddsa_decode(str, curve);
+}
+#define get_epoint(src, curve) \
+    BinarySource_get_epoint(BinarySource_UPCAST(src), curve)
+
+static void BinarySink_put_epoint(
+    BinarySink *bs, EdwardsPoint *point, const struct ec_curve *curve,
+    bool bare)
+{
+    mp_int *x, *y;
+    ecc_edwards_get_affine(point, &x, &y);
+
+    assert(curve->fieldBytes >= 2);
+
+    /*
+     * EdDSA requires point compression. We store a single integer,
+     * with bytes in little-endian order, which mostly contains y but
+     * in which the topmost bit is the low bit of x.
+     */
+    if (!bare)
+        put_uint32(bs, curve->fieldBytes);   /* string length field */
+    for (size_t i = 0; i < curve->fieldBytes - 1; i++)
+        put_byte(bs, mp_get_byte(y, i));
+    put_byte(bs, (mp_get_byte(y, curve->fieldBytes - 1) & 0x7F) |
+             (mp_get_bit(x, 0) << 7));
+
+    mp_free(x);
+    mp_free(y);
+}
+#define put_epoint(bs, point, curve, bare)                      \
+    BinarySink_put_epoint(BinarySink_UPCAST(bs), point, curve, bare)
+
+/* ----------------------------------------------------------------------
+ * Exposed ECDSA interface
+ */
+
+static void ecdsa_freekey(ssh_key *key)
+{
+    struct ecdsa_key *ek = container_of(key, struct ecdsa_key, sshk);
+
+    if (ek->publicKey)
+        ecc_weierstrass_point_free(ek->publicKey);
+    if (ek->privateKey)
+        mp_free(ek->privateKey);
+    sfree(ek);
+}
+
+static void eddsa_freekey(ssh_key *key)
+{
+    struct eddsa_key *ek = container_of(key, struct eddsa_key, sshk);
+
+    if (ek->publicKey)
+        ecc_edwards_point_free(ek->publicKey);
+    if (ek->privateKey)
+        mp_free(ek->privateKey);
+    sfree(ek);
+}
+
+static ssh_key *ecdsa_new_pub(const ssh_keyalg *alg, ptrlen data)
+{
+    const struct ecsign_extra *extra =
+        (const struct ecsign_extra *)alg->extra;
+    struct ec_curve *curve = extra->curve();
+    assert(curve->type == EC_WEIERSTRASS);
+
+    BinarySource src[1];
+    BinarySource_BARE_INIT(src, data.ptr, data.len);
+    get_string(src);
+
+    /* Curve name is duplicated for Weierstrass form */
+    if (!ptrlen_eq_string(get_string(src), curve->name))
+        return NULL;
+
+    struct ecdsa_key *ek = snew(struct ecdsa_key);
+    ek->sshk.vt = alg;
+    ek->curve = curve;
+
+    ek->publicKey = get_wpoint(src, curve);
+    if (!ek->publicKey) {
+        ecdsa_freekey(&ek->sshk);
+        return NULL;
+    }
+
+    ek->privateKey = NULL;
+
+    return &ek->sshk;
+}
+
+static ssh_key *eddsa_new_pub(const ssh_keyalg *alg, ptrlen data)
+{
+    const struct ecsign_extra *extra =
+        (const struct ecsign_extra *)alg->extra;
+    struct ec_curve *curve = extra->curve();
+    assert(curve->type == EC_EDWARDS);
+
+    BinarySource src[1];
+    BinarySource_BARE_INIT(src, data.ptr, data.len);
+    get_string(src);
+
+    struct eddsa_key *ek = snew(struct eddsa_key);
+    ek->sshk.vt = alg;
+    ek->curve = curve;
+    ek->privateKey = NULL;
+
+    ek->publicKey = get_epoint(src, curve);
+    if (!ek->publicKey) {
+        eddsa_freekey(&ek->sshk);
+        return NULL;
+    }
+
+    return &ek->sshk;
+}
+
+static char *ecc_cache_str_shared(
+    const char *curve_name, mp_int *x, mp_int *y)
+{
+    strbuf *sb = strbuf_new();
+
+    if (curve_name)
+        strbuf_catf(sb, "%s,", curve_name);
+
+    char *hx = mp_get_hex(x);
+    char *hy = mp_get_hex(y);
+    strbuf_catf(sb, "0x%s,0x%s", hx, hy);
+    sfree(hx);
+    sfree(hy);
+
+    return strbuf_to_str(sb);
 }
 
 static char *ecdsa_cache_str(ssh_key *key)
 {
-    struct ec_key *ec = container_of(key, struct ec_key, sshk);
-    char *p;
-    int len, i, pos, nibbles;
-    static const char hex[] = "0123456789abcdef";
-    if (!ec->publicKey.x || !ec->publicKey.y || !ec->publicKey.curve)
-        return NULL;
+    struct ecdsa_key *ek = container_of(key, struct ecdsa_key, sshk);
+    mp_int *x, *y;
 
-    len = 4 + 2 + 1;                  /* 2 x "0x", punctuation, \0 */
-    if (ec->publicKey.curve->name)
-        len += strlen(ec->publicKey.curve->name); /* Curve name */
-    len += 4 * (bignum_bitcount(ec->publicKey.x) + 15) / 16;
-    len += 4 * (bignum_bitcount(ec->publicKey.y) + 15) / 16;
-    p = snewn(len, char);
+    ecc_weierstrass_get_affine(ek->publicKey, &x, &y);
+    char *toret = ecc_cache_str_shared(ek->curve->name, x, y);
+    mp_free(x);
+    mp_free(y);
+    return toret;
+}
 
-    pos = 0;
-    if (ec->publicKey.curve->name)
-        pos += sprintf(p + pos, "%s,", ec->publicKey.curve->name);
-    pos += sprintf(p + pos, "0x");
-    nibbles = (3 + bignum_bitcount(ec->publicKey.x)) / 4;
-    if (nibbles < 1)
-        nibbles = 1;
-    for (i = nibbles; i--;) {
-        p[pos++] =
-            hex[(bignum_byte(ec->publicKey.x, i / 2) >> (4 * (i % 2))) & 0xF];
-    }
-    pos += sprintf(p + pos, ",0x");
-    nibbles = (3 + bignum_bitcount(ec->publicKey.y)) / 4;
-    if (nibbles < 1)
-        nibbles = 1;
-    for (i = nibbles; i--;) {
-        p[pos++] =
-            hex[(bignum_byte(ec->publicKey.y, i / 2) >> (4 * (i % 2))) & 0xF];
-    }
-    p[pos] = '\0';
-    return p;
+static char *eddsa_cache_str(ssh_key *key)
+{
+    struct eddsa_key *ek = container_of(key, struct eddsa_key, sshk);
+    mp_int *x, *y;
+
+    ecc_edwards_get_affine(ek->publicKey, &x, &y);
+    char *toret = ecc_cache_str_shared(ek->curve->name, x, y);
+    mp_free(x);
+    mp_free(y);
+    return toret;
 }
 
 static void ecdsa_public_blob(ssh_key *key, BinarySink *bs)
 {
-    struct ec_key *ec = container_of(key, struct ec_key, sshk);
-    int pointlen;
-    int i;
+    struct ecdsa_key *ek = container_of(key, struct ecdsa_key, sshk);
 
-    if (ec->publicKey.curve->type == EC_EDWARDS) {
-        /* Edwards compressed form "ssh-ed25519" point y[:-1] + x[0:1] */
+    put_stringz(bs, ek->sshk.vt->ssh_id);
+    put_stringz(bs, ek->curve->name);
+    put_wpoint(bs, ek->publicKey, ek->curve, false);
+}
 
-        pointlen = ec->publicKey.curve->fieldBits / 8;
+static void eddsa_public_blob(ssh_key *key, BinarySink *bs)
+{
+    struct eddsa_key *ek = container_of(key, struct eddsa_key, sshk);
 
-        assert(pointlen >= 2);
-
-        put_stringz(bs, ec->sshk.vt->ssh_id);
-        put_uint32(bs, pointlen);
-
-        /* Unset last bit of y and set first bit of x in its place */
-        for (i = 0; i < pointlen - 1; ++i)
-            put_byte(bs, bignum_byte(ec->publicKey.y, i));
-        /* Unset last bit of y and set first bit of x in its place */
-        put_byte(bs, ((bignum_byte(ec->publicKey.y, i) & 0x7f) |
-                                (bignum_bit(ec->publicKey.x, 0) << 7)));
-    } else if (ec->publicKey.curve->type == EC_WEIERSTRASS) {
-        assert(ec->publicKey.curve->name);
-
-        pointlen = (bignum_bitcount(ec->publicKey.curve->p) + 7) / 8;
-
-        put_stringz(bs, ec->sshk.vt->ssh_id);
-        put_stringz(bs, ec->publicKey.curve->name);
-        put_uint32(bs, (2 * pointlen) + 1);
-        put_byte(bs, 0x04);
-        for (i = pointlen; i--;)
-            put_byte(bs, bignum_byte(ec->publicKey.x, i));
-        for (i = pointlen; i--;)
-            put_byte(bs, bignum_byte(ec->publicKey.y, i));
-    } else {
-        assert(0 && "Bad key type in ecdsa_public_blob");
-    }
+    put_stringz(bs, ek->sshk.vt->ssh_id);
+    put_epoint(bs, ek->publicKey, ek->curve, false);
 }
 
 static void ecdsa_private_blob(ssh_key *key, BinarySink *bs)
 {
-    struct ec_key *ec = container_of(key, struct ec_key, sshk);
-    int keylen;
-    int i;
+    struct ecdsa_key *ek = container_of(key, struct ecdsa_key, sshk);
 
-    assert(ec->privateKey);
-
-    if (ec->publicKey.curve->type == EC_EDWARDS) {
-        /* Unsigned */
-        keylen = (bignum_bitcount(ec->privateKey) + 7) / 8;
-    } else {
-        /* Signed */
-        keylen = (bignum_bitcount(ec->privateKey) + 8) / 8;
-    }
-
-    put_uint32(bs, keylen);
-    if (ec->publicKey.curve->type == EC_EDWARDS) {
-        /* Little endian */
-        for (i = 0; i < keylen; ++i)
-            put_byte(bs, bignum_byte(ec->privateKey, i));
-    } else {
-        for (i = keylen; i--;)
-            put_byte(bs, bignum_byte(ec->privateKey, i));
-    }
+    /* ECDSA uses ordinary SSH-2 mpint format to store the private key */
+    assert(ek->privateKey);
+    put_mp_ssh2(bs, ek->privateKey);
 }
 
-static ssh_key *ecdsa_new_priv(const ssh_keyalg *self, ptrlen pub, ptrlen priv)
+static void eddsa_private_blob(ssh_key *key, BinarySink *bs)
 {
-    BinarySource src[1];
-    ssh_key *sshk;
-    struct ec_key *ec;
-    struct ec_point *publicKey;
+    struct eddsa_key *ek = container_of(key, struct eddsa_key, sshk);
 
-    sshk = ecdsa_new_pub(self, pub);
+    /* EdDSA stores the private key integer little-endian and unsigned */
+    assert(ek->privateKey);
+    put_mp_le_unsigned(bs, ek->privateKey);
+}
+
+static ssh_key *ecdsa_new_priv(const ssh_keyalg *alg, ptrlen pub, ptrlen priv)
+{
+    ssh_key *sshk = ecdsa_new_pub(alg, pub);
     if (!sshk)
         return NULL;
+    struct ecdsa_key *ek = container_of(sshk, struct ecdsa_key, sshk);
 
-    ec = container_of(sshk, struct ec_key, sshk);
+    BinarySource src[1];
     BinarySource_BARE_INIT(src, priv.ptr, priv.len);
+    ek->privateKey = get_mp_ssh2(src);
 
-    if (ec->publicKey.curve->type != EC_WEIERSTRASS
-        && ec->publicKey.curve->type != EC_EDWARDS) {
-        ecdsa_freekey(&ec->sshk);
-        return NULL;
-    }
-
-    if (ec->publicKey.curve->type == EC_EDWARDS) {
-        ec->privateKey = get_mp_le(src);
-    } else {
-        ec->privateKey = get_mp_ssh2(src);
-    }
-    if (!ec->privateKey) {
-        ecdsa_freekey(&ec->sshk);
-        return NULL;
-    }
-
-    /* Check that private key generates public key */
-    publicKey = ec_public(ec->privateKey, ec->publicKey.curve);
-
-    if (!publicKey ||
-        bignum_cmp(publicKey->x, ec->publicKey.x) ||
-        bignum_cmp(publicKey->y, ec->publicKey.y))
-    {
-        ecdsa_freekey(&ec->sshk);
-        ec = NULL;
-    }
-    ec_point_free(publicKey);
-
-    return &ec->sshk;
+    return &ek->sshk;
 }
 
-static ssh_key *ed25519_new_priv_openssh(const ssh_keyalg *self,
-                                         BinarySource *src)
+static ssh_key *eddsa_new_priv(const ssh_keyalg *alg, ptrlen pub, ptrlen priv)
 {
-    struct ec_key *ec;
-    struct ec_point *publicKey;
-    ptrlen p, q;
+    ssh_key *sshk = eddsa_new_pub(alg, pub);
+    if (!sshk)
+        return NULL;
+    struct eddsa_key *ek = container_of(sshk, struct eddsa_key, sshk);
 
-    p = get_string(src);
-    q = get_string(src);
-    if (get_err(src) || p.len != 32 || q.len != 64)
+    BinarySource src[1];
+    BinarySource_BARE_INIT(src, priv.ptr, priv.len);
+    ek->privateKey = get_mp_le(src);
+
+    return &ek->sshk;
+}
+
+static ssh_key *eddsa_new_priv_openssh(
+    const ssh_keyalg *alg, BinarySource *src)
+{
+    const struct ecsign_extra *extra =
+        (const struct ecsign_extra *)alg->extra;
+    struct ec_curve *curve = extra->curve();
+    assert(curve->type == EC_EDWARDS);
+
+    ptrlen pubkey_pl = get_string(src);
+    ptrlen privkey_extended_pl = get_string(src);
+    if (get_err(src) || pubkey_pl.len != curve->fieldBytes)
         return NULL;
 
-    ec = snew(struct ec_key);
-    ec->sshk.vt = self;
-
-    ec->publicKey.curve = ec_ed25519();
-    ec->publicKey.infinity = false;
-    ec->privateKey = NULL;
-    ec->publicKey.x = NULL;
-    ec->publicKey.z = NULL;
-    ec->publicKey.y = NULL;
-
-    if (!decodepoint_ed(p.ptr, p.len, &ec->publicKey))
-    {
-        ecdsa_freekey(&ec->sshk);
-        return NULL;
-    }
-
-    ec->privateKey = bignum_from_bytes_le(q.ptr, 32);
-
-    /* Check that private key generates public key */
-    publicKey = ec_public(ec->privateKey, ec->publicKey.curve);
-
-    if (!publicKey ||
-        bignum_cmp(publicKey->x, ec->publicKey.x) ||
-        bignum_cmp(publicKey->y, ec->publicKey.y))
-    {
-        ecdsa_freekey(&ec->sshk);
-        ec = NULL;
-    }
-    ec_point_free(publicKey);
-
-    /* The OpenSSH format for ed25519 private keys also for some
+    /*
+     * The OpenSSH format for ed25519 private keys also for some
      * reason encodes an extra copy of the public key in the second
      * half of the secret-key string. Check that that's present and
      * correct as well, otherwise the key we think we've imported
      * won't behave identically to the way OpenSSH would have treated
-     * it. */
-    if (0 != memcmp((const char *)q.ptr + 32, p.ptr, 32)) {
-        ecdsa_freekey(&ec->sshk);
+     * it.
+     */
+    BinarySource subsrc[1];
+    BinarySource_BARE_INIT(
+        subsrc, privkey_extended_pl.ptr, privkey_extended_pl.len);
+    ptrlen privkey_pl = get_data(subsrc, curve->fieldBytes);
+    ptrlen pubkey_copy_pl = get_data(subsrc, curve->fieldBytes);
+    if (get_err(subsrc) || get_avail(subsrc))
+        return NULL;
+    if (!ptrlen_eq_ptrlen(pubkey_pl, pubkey_copy_pl))
+        return NULL;
+
+    struct eddsa_key *ek = snew(struct eddsa_key);
+    ek->sshk.vt = alg;
+    ek->curve = curve;
+
+    ek->publicKey = eddsa_decode(pubkey_pl, curve);
+    if (!ek->publicKey) {
+        eddsa_freekey(&ek->sshk);
         return NULL;
     }
 
-    return &ec->sshk;
+    ek->privateKey = mp_from_bytes_le(privkey_pl);
+
+    return &ek->sshk;
 }
 
-static void ed25519_openssh_blob(ssh_key *key, BinarySink *bs)
+static void eddsa_openssh_blob(ssh_key *key, BinarySink *bs)
 {
-    struct ec_key *ec = container_of(key, struct ec_key, sshk);
-    strbuf *pub;
+    struct eddsa_key *ek = container_of(key, struct eddsa_key, sshk);
+    assert(ek->curve->type == EC_EDWARDS);
 
-    int pointlen;
-    int keylen;
-    int i;
+    /* Encode the public and private points as strings */
+    strbuf *pub_sb = strbuf_new();
+    put_epoint(pub_sb, ek->publicKey, ek->curve, false);
+    ptrlen pub = make_ptrlen(pub_sb->s + 4, pub_sb->len - 4);
 
-    assert(ec->publicKey.curve->type == EC_EDWARDS);
+    strbuf *priv_sb = strbuf_new();
+    put_mp_le_unsigned(priv_sb, ek->privateKey);
+    ptrlen priv = make_ptrlen(priv_sb->s + 4, priv_sb->len - 4);
 
-    pointlen = (bignum_bitcount(ec->publicKey.y) + 7) / 8;
-    keylen = (bignum_bitcount(ec->privateKey) + 7) / 8;
+    put_stringpl(bs, pub);
 
-    /* Encode the public point */
-    pub = strbuf_new();
-    put_uint32(pub, pointlen);
-    for (i = 0; i < pointlen - 1; ++i)
-        put_byte(pub, bignum_byte(ec->publicKey.y, i));
-    /* Unset last bit of y and set first bit of x in its place */
-    put_byte(pub, ((bignum_byte(ec->publicKey.y, i) & 0x7f) |
-                   (bignum_bit(ec->publicKey.x, 0) << 7)));
+    /* Encode the private key as the concatenation of the
+     * little-endian key integer and the public key again */
+    put_uint32(bs, priv.len + pub.len);
+    put_data(bs, priv.ptr, priv.len);
+    put_data(bs, pub.ptr, pub.len);
 
-    put_data(bs, pub->s, pub->len);
-
-    put_uint32(bs, keylen + pointlen);
-    for (i = 0; i < keylen; ++i)
-        put_byte(bs, bignum_byte(ec->privateKey, i));
-    /* Now encode an extra copy of the public point as the second half
-     * of the private key string, as the OpenSSH format for some
-     * reason requires */
-    put_data(bs, pub->s + 4, pub->len - 4);
-
-    strbuf_free(pub);
+    strbuf_free(pub_sb);
+    strbuf_free(priv_sb);
 }
 
-static ssh_key *ecdsa_new_priv_openssh(const ssh_keyalg *self,
-                                       BinarySource *src)
+static ssh_key *ecdsa_new_priv_openssh(
+    const ssh_keyalg *alg, BinarySource *src)
 {
     const struct ecsign_extra *extra =
-        (const struct ecsign_extra *)self->extra;
-    struct ec_key *ec;
-    struct ec_curve *curve;
-    struct ec_point *publicKey;
+        (const struct ecsign_extra *)alg->extra;
+    struct ec_curve *curve = extra->curve();
+    assert(curve->type == EC_WEIERSTRASS);
 
     get_string(src);
 
-    curve = extra->curve();
-    assert(curve->type == EC_WEIERSTRASS);
+    struct eddsa_key *ek = snew(struct eddsa_key);
+    ek->sshk.vt = alg;
+    ek->curve = curve;
 
-    ec = snew(struct ec_key);
-    ec->sshk.vt = self;
-
-    ec->publicKey.curve = curve;
-    ec->publicKey.infinity = false;
-    ec->publicKey.x = NULL;
-    ec->publicKey.y = NULL;
-    ec->publicKey.z = NULL;
-    if (!get_point(src, &ec->publicKey)) {
-        ecdsa_freekey(&ec->sshk);
-        return NULL;
-    }
-    ec->privateKey = NULL;
-
-    if (!ec->publicKey.x || !ec->publicKey.y ||
-        bignum_cmp(ec->publicKey.x, curve->p) >= 0 ||
-        bignum_cmp(ec->publicKey.y, curve->p) >= 0)
-    {
-        ecdsa_freekey(&ec->sshk);
+    ek->publicKey = get_epoint(src, curve);
+    if (!ek->publicKey) {
+        eddsa_freekey(&ek->sshk);
         return NULL;
     }
 
-    ec->privateKey = get_mp_ssh2(src);
-    if (ec->privateKey == NULL)
-    {
-        ecdsa_freekey(&ec->sshk);
-        return NULL;
-    }
+    ek->privateKey = get_mp_ssh2(src);
 
-    /* Now check that the private key makes the public key */
-    publicKey = ec_public(ec->privateKey, ec->publicKey.curve);
-    if (!publicKey)
-    {
-        ecdsa_freekey(&ec->sshk);
-        return NULL;
-    }
-
-    if (bignum_cmp(ec->publicKey.x, publicKey->x) ||
-        bignum_cmp(ec->publicKey.y, publicKey->y))
-    {
-        /* Private key doesn't make the public key on the given curve */
-        ecdsa_freekey(&ec->sshk);
-        ec_point_free(publicKey);
-        return NULL;
-    }
-
-    ec_point_free(publicKey);
-
-    return &ec->sshk;
+    return &ek->sshk;
 }
 
 static void ecdsa_openssh_blob(ssh_key *key, BinarySink *bs)
 {
-    struct ec_key *ec = container_of(key, struct ec_key, sshk);
-
-    int pointlen;
-    int i;
-
-    assert(ec->publicKey.curve->type == EC_WEIERSTRASS);
-
-    pointlen = (bignum_bitcount(ec->publicKey.curve->p) + 7) / 8;
-
-    put_stringz(bs, ec->publicKey.curve->name);
-
-    put_uint32(bs, 1 + (pointlen * 2));
-    put_byte(bs, 0x04);
-    for (i = pointlen; i--; )
-        put_byte(bs, bignum_byte(ec->publicKey.x, i));
-    for (i = pointlen; i--; )
-        put_byte(bs, bignum_byte(ec->publicKey.y, i));
-
-    put_mp_ssh2(bs, ec->privateKey);
+    struct ecdsa_key *ek = container_of(key, struct ecdsa_key, sshk);
+    put_stringz(bs, ek->curve->name);
+    put_wpoint(bs, ek->publicKey, ek->curve, false);
+    put_mp_ssh2(bs, ek->privateKey);
 }
 
-static int ecdsa_pubkey_bits(const ssh_keyalg *self, ptrlen blob)
+static int ec_shared_pubkey_bits(const ssh_keyalg *alg, ptrlen blob)
 {
-    ssh_key *sshk;
-    struct ec_key *ec;
-    int ret;
+    const struct ecsign_extra *extra =
+        (const struct ecsign_extra *)alg->extra;
+    struct ec_curve *curve = extra->curve();
+    return curve->fieldBits;
+}
 
-    sshk = ecdsa_new_pub(self, blob);
-    if (!sshk)
-        return -1;
+static mp_int *ecdsa_signing_exponent_from_data(
+    const struct ec_curve *curve, const struct ecsign_extra *extra,
+    ptrlen data)
+{
+    /* Hash the data being signed. */
+    unsigned char hash[extra->hash->hlen];
+    ssh_hash *h = ssh_hash_new(extra->hash);
+    put_data(h, data.ptr, data.len);
+    ssh_hash_final(h, hash);
 
-    ec = container_of(sshk, struct ec_key, sshk);
-    ret = ec->publicKey.curve->fieldBits;
-    ecdsa_freekey(&ec->sshk);
+    /*
+     * Take the leftmost b bits of the hash of the signed data (where
+     * b is the number of bits in order(G)), interpreted big-endian.
+     */
+    mp_int *z = mp_from_bytes_be(make_ptrlen(hash, extra->hash->hlen));
+    size_t zbits = mp_get_nbits(z);
+    size_t nbits = mp_get_nbits(curve->w.G_order);
+    size_t shift = zbits - nbits;
+    /* Bound the shift count below at 0, using bit twiddling to avoid
+     * a conditional branch */
+    shift &= ~-(shift >> (CHAR_BIT * sizeof(size_t) - 1));
+    mp_int *toret = mp_rshift_safe(z, shift);
+    mp_free(z);
 
-    return ret;
+    return toret;
 }
 
 static bool ecdsa_verify(ssh_key *key, ptrlen sig, ptrlen data)
 {
-    struct ec_key *ec = container_of(key, struct ec_key, sshk);
+    struct ecdsa_key *ek = container_of(key, struct ecdsa_key, sshk);
     const struct ecsign_extra *extra =
-        (const struct ecsign_extra *)ec->sshk.vt->extra;
+        (const struct ecsign_extra *)ek->sshk.vt->extra;
+
     BinarySource src[1];
-    ptrlen sigstr;
-    bool ret;
-
-    if (!ec->publicKey.x || !ec->publicKey.y || !ec->publicKey.curve)
-        return false;
-
     BinarySource_BARE_INIT(src, sig.ptr, sig.len);
 
     /* Check the signature starts with the algorithm name */
-    if (!ptrlen_eq_string(get_string(src), ec->sshk.vt->ssh_id))
+    if (!ptrlen_eq_string(get_string(src), ek->sshk.vt->ssh_id))
         return false;
 
-    sigstr = get_string(src);
+    /* Everything else is nested inside a sub-string. Descend into that. */
+    ptrlen sigstr = get_string(src);
     if (get_err(src))
         return false;
+    BinarySource_BARE_INIT(src, sigstr.ptr, sigstr.len);
 
-    if (ec->publicKey.curve->type == EC_EDWARDS) {
-        struct ec_point *r;
-        int pointlen = ec->publicKey.curve->fieldBits / 8;
-        Bignum s, h;
-
-        /* Check that the signature is two times the length of a point */
-        if (sigstr.len != pointlen * 2) {
-            return false;
-        }
-
-        /* Check it's the 256 bit field so that SHA512 is the correct hash */
-        if (ec->publicKey.curve->fieldBits != 256) {
-            return false;
-        }
-
-        /* Get the signature */
-        r = ec_point_new(ec->publicKey.curve, NULL, NULL, NULL, false);
-        if (!r) {
-            return false;
-        }
-        if (!decodepoint(sigstr.ptr, pointlen, r)) {
-            ec_point_free(r);
-            return false;
-        }
-        s = bignum_from_bytes_le(
-            (const char *)sigstr.ptr + pointlen, pointlen);
-
-        /* Get the hash of the encoded value of R + encoded value of pk + message */
-        {
-            int i;
-            unsigned char digest[512 / 8];
-            SHA512_State hs;
-            SHA512_Init(&hs);
-
-            /* Add encoded r (no need to encode it again, it was in
-             * the signature) */
-            put_data(&hs, sigstr.ptr, pointlen);
-
-            /* Encode pk and add it */
-            for (i = 0; i < pointlen - 1; ++i)
-                put_byte(&hs, bignum_byte(ec->publicKey.y, i));
-            /* Unset last bit of y and set first bit of x in its place */
-            put_byte(&hs, ((bignum_byte(ec->publicKey.y, i) & 0x7f) |
-                           (bignum_bit(ec->publicKey.x, 0) << 7)));
-
-            /* Add the message itself */
-            put_data(&hs, data.ptr, data.len);
-
-            /* Get the hash */
-            SHA512_Final(&hs, digest);
-
-            /* Convert to Bignum */
-            h = bignum_from_bytes_le(digest, sizeof(digest));
-        }
-
-        /* Verify sB == r + h*publicKey */
-        {
-            struct ec_point *lhs, *rhs, *tmp;
-
-            /* lhs = sB */
-            lhs = ecp_mul(&ec->publicKey.curve->e.B, s);
-            freebn(s);
-            if (!lhs) {
-                ec_point_free(r);
-                freebn(h);
-                return false;
-            }
-
-            /* rhs = r + h*publicKey */
-            tmp = ecp_mul(&ec->publicKey, h);
-            freebn(h);
-            if (!tmp) {
-                ec_point_free(lhs);
-                ec_point_free(r);
-                return false;
-            }
-            rhs = ecp_add(r, tmp, false);
-            ec_point_free(r);
-            ec_point_free(tmp);
-            if (!rhs) {
-                ec_point_free(lhs);
-                return false;
-            }
-
-            /* Check the point is the same */
-            ret = !bignum_cmp(lhs->x, rhs->x);
-            if (ret) {
-                ret = !bignum_cmp(lhs->y, rhs->y);
-                if (ret) {
-                    ret = true;
-                }
-            }
-            ec_point_free(lhs);
-            ec_point_free(rhs);
-        }
-    } else {
-        Bignum r, s;
-        unsigned char digest[512 / 8];
-        int digestLen;
-        ssh_hash *hashctx;
-
-        BinarySource_BARE_INIT(src, sigstr.ptr, sigstr.len);
-
-        r = get_mp_ssh2(src);
-        s = get_mp_ssh2(src);
-        if (get_err(src)) {
-            freebn(r);
-            freebn(s);
-            return false;
-        }
-
-        digestLen = extra->hash->hlen;
-        assert(digestLen <= sizeof(digest));
-        hashctx = ssh_hash_new(extra->hash);
-        put_data(hashctx, data.ptr, data.len);
-        ssh_hash_final(hashctx, digest);
-
-        /* Verify the signature */
-        ret = _ecdsa_verify(&ec->publicKey, digest, digestLen, r, s);
-
-        freebn(r);
-        freebn(s);
+    /* Extract the signature integers r,s */
+    mp_int *r = get_mp_ssh2(src);
+    mp_int *s = get_mp_ssh2(src);
+    if (get_err(src)) {
+        mp_free(r);
+        mp_free(s);
+        return false;
     }
 
-    return ret;
+    /* Basic sanity checks: 0 < r,s < order(G) */
+    unsigned invalid = 0;
+    invalid |= mp_eq_integer(r, 0);
+    invalid |= mp_eq_integer(s, 0);
+    invalid |= mp_cmp_hs(r, ek->curve->w.G_order);
+    invalid |= mp_cmp_hs(s, ek->curve->w.G_order);
+
+    /* Get the hash of the signed data, converted to an integer */
+    mp_int *z = ecdsa_signing_exponent_from_data(ek->curve, extra, data);
+
+    /* Verify the signature integers against the hash */
+    mp_int *w = mp_invert(s, ek->curve->w.G_order);
+    mp_int *u1 = mp_modmul(z, w, ek->curve->w.G_order);
+    mp_free(z);
+    mp_int *u2 = mp_modmul(r, w, ek->curve->w.G_order);
+    mp_free(w);
+    WeierstrassPoint *u1G = ecc_weierstrass_multiply(ek->curve->w.G, u1);
+    mp_free(u1);
+    WeierstrassPoint *u2P = ecc_weierstrass_multiply(ek->publicKey, u2);
+    mp_free(u2);
+    WeierstrassPoint *sum = ecc_weierstrass_add_general(u1G, u2P);
+    ecc_weierstrass_point_free(u1G);
+    ecc_weierstrass_point_free(u2P);
+
+    mp_int *x;
+    ecc_weierstrass_get_affine(sum, &x, NULL);
+    ecc_weierstrass_point_free(sum);
+
+    mp_divmod_into(x, ek->curve->w.G_order, NULL, x);
+    invalid |= (1 ^ mp_cmp_eq(r, x));
+    mp_free(x);
+
+    mp_free(r);
+    mp_free(s);
+
+    return !invalid;
+}
+
+static mp_int *eddsa_signing_exponent_from_data(
+    struct eddsa_key *ek, const struct ecsign_extra *extra,
+    ptrlen r_encoded, ptrlen data)
+{
+    /* Hash (r || public key || message) */
+    unsigned char hash[extra->hash->hlen];
+    ssh_hash *h = ssh_hash_new(extra->hash);
+    put_data(h, r_encoded.ptr, r_encoded.len);
+    put_epoint(h, ek->publicKey, ek->curve, true); /* omit string header */
+    put_data(h, data.ptr, data.len);
+    ssh_hash_final(h, hash);
+
+    /* Convert to an integer */
+    mp_int *toret = mp_from_bytes_le(make_ptrlen(hash, extra->hash->hlen));
+
+    smemclr(hash, extra->hash->hlen);
+    return toret;
+}
+
+static bool eddsa_verify(ssh_key *key, ptrlen sig, ptrlen data)
+{
+    struct eddsa_key *ek = container_of(key, struct eddsa_key, sshk);
+    const struct ecsign_extra *extra =
+        (const struct ecsign_extra *)ek->sshk.vt->extra;
+
+    BinarySource src[1];
+    BinarySource_BARE_INIT(src, sig.ptr, sig.len);
+
+    /* Check the signature starts with the algorithm name */
+    if (!ptrlen_eq_string(get_string(src), ek->sshk.vt->ssh_id))
+        return false;
+
+    /* Now expect a single string which is the concatenation of an
+     * encoded curve point r and an integer s. */
+    ptrlen sigstr = get_string(src);
+    if (get_err(src))
+        return false;
+    BinarySource_BARE_INIT(src, sigstr.ptr, sigstr.len);
+    ptrlen rstr = get_data(src, ek->curve->fieldBytes);
+    ptrlen sstr = get_data(src, ek->curve->fieldBytes);
+    if (get_err(src) || get_avail(src))
+        return false;
+
+    EdwardsPoint *r = eddsa_decode(rstr, ek->curve);
+    if (!r)
+        return false;
+    mp_int *s = mp_from_bytes_le(sstr);
+
+    mp_int *H = eddsa_signing_exponent_from_data(ek, extra, rstr, data);
+
+    /* Verify that s*G == r + H*publicKey */
+    EdwardsPoint *lhs = ecc_edwards_multiply(ek->curve->e.G, s);
+    mp_free(s);
+    EdwardsPoint *hpk = ecc_edwards_multiply(ek->publicKey, H);
+    mp_free(H);
+    EdwardsPoint *rhs = ecc_edwards_add(r, hpk);
+    ecc_edwards_point_free(hpk);
+    unsigned valid = ecc_edwards_eq(lhs, rhs);
+    ecc_edwards_point_free(lhs);
+    ecc_edwards_point_free(rhs);
+    ecc_edwards_point_free(r);
+
+    return valid;
 }
 
 static void ecdsa_sign(ssh_key *key, const void *data, int datalen,
                        unsigned flags, BinarySink *bs)
 {
-    struct ec_key *ec = container_of(key, struct ec_key, sshk);
+    struct ecdsa_key *ek = container_of(key, struct ecdsa_key, sshk);
     const struct ecsign_extra *extra =
-        (const struct ecsign_extra *)ec->sshk.vt->extra;
-    unsigned char digest[512 / 8];
-    int digestLen;
-    Bignum r = NULL, s = NULL;
-    int i;
+        (const struct ecsign_extra *)ek->sshk.vt->extra;
+    assert(ek->privateKey);
 
-    assert(ec->privateKey);
-    assert(ec->publicKey.curve);
+    mp_int *z = ecdsa_signing_exponent_from_data(
+        ek->curve, extra, make_ptrlen(data, datalen));
 
-    if (ec->publicKey.curve->type == EC_EDWARDS) {
-        struct ec_point *rp;
-        int pointlen = ec->publicKey.curve->fieldBits / 8;
-
-        /* hash = H(sk) (where hash creates 2 * fieldBits)
-         * b = fieldBits
-         * a = 2^(b-2) + SUM(2^i * h_i) for i = 2 -> b-2
-         * r = H(h[b/8:b/4] + m)
-         * R = rB
-         * S = (r + H(encodepoint(R) + encodepoint(pk) + m) * a) % l */
-        {
-            unsigned char hash[512/8];
-            Bignum a;
-            SHA512_State hs;
-            SHA512_Init(&hs);
-
-            for (i = 0; i < pointlen; ++i)
-                put_byte(&hs, bignum_byte(ec->privateKey, i));
-
-            SHA512_Final(&hs, hash);
-
-            /* The second part is simply turning the hash into a
-             * Bignum, however the 2^(b-2) bit *must* be set, and the
-             * bottom 3 bits *must* not be */
-            hash[0] &= 0xf8; /* Unset bottom 3 bits (if set) */
-            hash[31] &= 0x7f; /* Unset above (b-2) */
-            hash[31] |= 0x40; /* Set 2^(b-2) */
-            /* Chop off the top part and convert to int */
-            a = bignum_from_bytes_le(hash, 32);
-
-            SHA512_Init(&hs);
-            put_data(&hs, hash+(ec->publicKey.curve->fieldBits / 8),
-                     ((ec->publicKey.curve->fieldBits / 4) -
-                      (ec->publicKey.curve->fieldBits / 8)));
-            put_data(&hs, data, datalen);
-            SHA512_Final(&hs, hash);
-
-            r = bignum_from_bytes_le(hash, 512/8);
-            rp = ecp_mul(&ec->publicKey.curve->e.B, r);
-            assert(rp);
-
-            /* Now calculate s */
-            SHA512_Init(&hs);
-            /* Encode the point R */
-            for (i = 0; i < pointlen - 1; ++i)
-                put_byte(&hs, bignum_byte(rp->y, i));
-            /* Unset last bit of y and set first bit of x in its place */
-            put_byte(&hs, ((bignum_byte(rp->y, i) & 0x7f) |
-                           (bignum_bit(rp->x, 0) << 7)));
-
-            /* Encode the point pk */
-            for (i = 0; i < pointlen - 1; ++i)
-                put_byte(&hs, bignum_byte(ec->publicKey.y, i));
-            /* Unset last bit of y and set first bit of x in its place */
-            put_byte(&hs, ((bignum_byte(ec->publicKey.y, i) & 0x7f) |
-                           (bignum_bit(ec->publicKey.x, 0) << 7)));
-
-            /* Add the message */
-            put_data(&hs, data, datalen);
-            SHA512_Final(&hs, hash);
-
-            {
-                Bignum tmp, tmp2;
-
-                tmp = bignum_from_bytes_le(hash, 512/8);
-                tmp2 = modmul(tmp, a, ec->publicKey.curve->e.l);
-                freebn(a);
-                freebn(tmp);
-                tmp = bigadd(r, tmp2);
-                freebn(r);
-                freebn(tmp2);
-                s = bigmod(tmp, ec->publicKey.curve->e.l);
-                freebn(tmp);
-            }
-        }
-
-        /* Format the output */
-        put_stringz(bs, ec->sshk.vt->ssh_id);
-        pointlen = ec->publicKey.curve->fieldBits / 8;
-        put_uint32(bs, pointlen * 2);
-
-        /* Encode the point */
-        for (i = 0; i < pointlen - 1; ++i)
-            put_byte(bs, bignum_byte(rp->y, i));
-        /* Unset last bit of y and set first bit of x in its place */
-        put_byte(bs, ((bignum_byte(rp->y, i) & 0x7f) |
-                                (bignum_bit(rp->x, 0) << 7)));
-        ec_point_free(rp);
-
-        /* Encode the int */
-        for (i = 0; i < pointlen; ++i)
-            put_byte(bs, bignum_byte(s, i));
-        freebn(s);
-    } else {
-        ssh_hash *hashctx;
-        strbuf *substr;
-
-        digestLen = extra->hash->hlen;
-        assert(digestLen <= sizeof(digest));
-        hashctx = ssh_hash_new(extra->hash);
-        put_data(hashctx, data, datalen);
-        ssh_hash_final(hashctx, digest);
-
-        /* Do the signature */
-        _ecdsa_sign(ec->privateKey, ec->publicKey.curve, digest, digestLen, &r, &s);
-        assert(r);
-        assert(s);
-
-        /* Format the output */
-        put_stringz(bs, ec->sshk.vt->ssh_id);
-
-        substr = strbuf_new();
-        put_mp_ssh2(substr, r);
-        put_mp_ssh2(substr, s);
-        put_stringsb(bs, substr);
-
-        freebn(r);
-        freebn(s);
+    /* Generate k between 1 and curve->n, using the same deterministic
+     * k generation system we use for conventional DSA. */
+    mp_int *k;
+    {
+        unsigned char digest[20];
+        SHA_Simple(data, datalen, digest);
+        k = dss_gen_k(
+            "ECDSA deterministic k generator", ek->curve->w.G_order,
+            ek->privateKey, digest, sizeof(digest));
     }
+
+    WeierstrassPoint *kG = ecc_weierstrass_multiply(ek->curve->w.G, k);
+    mp_int *x;
+    ecc_weierstrass_get_affine(kG, &x, NULL);
+    ecc_weierstrass_point_free(kG);
+
+    /* r = kG.x mod order(G) */
+    mp_int *r = mp_mod(x, ek->curve->w.G_order);
+    mp_free(x);
+
+    /* s = (z + r * priv)/k mod n */
+    mp_int *rPriv = mp_modmul(r, ek->privateKey, ek->curve->w.G_order);
+    mp_int *numerator = mp_modadd(z, rPriv, ek->curve->w.G_order);
+    mp_free(z);
+    mp_free(rPriv);
+    mp_int *kInv = mp_invert(k, ek->curve->w.G_order);
+    mp_free(k);
+    mp_int *s = mp_modmul(numerator, kInv, ek->curve->w.G_order);
+    mp_free(numerator);
+    mp_free(kInv);
+
+    /* Format the output */
+    put_stringz(bs, ek->sshk.vt->ssh_id);
+
+    strbuf *substr = strbuf_new();
+    put_mp_ssh2(substr, r);
+    put_mp_ssh2(substr, s);
+    put_stringsb(bs, substr);
+
+    mp_free(r);
+    mp_free(s);
+}
+
+static void eddsa_sign(ssh_key *key, const void *data, int datalen,
+                       unsigned flags, BinarySink *bs)
+{
+    struct eddsa_key *ek = container_of(key, struct eddsa_key, sshk);
+    const struct ecsign_extra *extra =
+        (const struct ecsign_extra *)ek->sshk.vt->extra;
+    assert(ek->privateKey);
+
+    /*
+     * EdDSA prescribes a specific method of generating the random
+     * nonce integer for the signature. (A verifier can't tell
+     * whether you followed that method, but it's important to
+     * follow it anyway, because test vectors will want a specific
+     * signature for a given message, and because this preserves
+     * determinism of signatures even if the same signature were
+     * made twice by different software.)
+     */
+
+    /*
+     * First, we hash the private key integer (bare, little-endian)
+     * into a hash generating 2*fieldBytes of output.
+     */
+    unsigned char hash[extra->hash->hlen];
+    ssh_hash *h = ssh_hash_new(extra->hash);
+    for (size_t i = 0; i < ek->curve->fieldBytes; ++i)
+        put_byte(h, mp_get_byte(ek->privateKey, i));
+    ssh_hash_final(h, hash);
+
+    /*
+     * The first half of the output hash is converted into an
+     * integer a, by the standard EdDSA transformation.
+     */
+    mp_int *a = eddsa_exponent_from_hash(
+        make_ptrlen(hash, ek->curve->fieldBytes), ek->curve);
+
+    /*
+     * The second half of the hash of the private key is hashed again
+     * with the message to be signed, and used as an exponent to
+     * generate the signature point r.
+     */
+    h = ssh_hash_new(extra->hash);
+    put_data(h, hash + ek->curve->fieldBytes,
+             extra->hash->hlen - ek->curve->fieldBytes);
+    put_data(h, data, datalen);
+    ssh_hash_final(h, hash);
+    mp_int *log_r_unreduced = mp_from_bytes_le(
+        make_ptrlen(hash, extra->hash->hlen));
+    mp_int *log_r = mp_mod(log_r_unreduced, ek->curve->e.G_order);
+    mp_free(log_r_unreduced);
+    EdwardsPoint *r = ecc_edwards_multiply(ek->curve->e.G, log_r);
+
+    /*
+     * Encode r now, because we'll need its encoding for the next
+     * hashing step as well as to write into the actual signature.
+     */
+    strbuf *r_enc = strbuf_new();
+    put_epoint(r_enc, r, ek->curve, true); /* omit string header */
+    ecc_edwards_point_free(r);
+
+    /*
+     * Compute the hash of (r || public key || message) just as
+     * eddsa_verify does.
+     */
+    mp_int *H = eddsa_signing_exponent_from_data(
+        ek, extra, ptrlen_from_strbuf(r_enc), make_ptrlen(data, datalen));
+
+    /* And then s = (log(r) + H*a) mod order(G). */
+    mp_int *Ha = mp_modmul(H, a, ek->curve->e.G_order);
+    mp_int *s = mp_modadd(log_r, Ha, ek->curve->e.G_order);
+    mp_free(H);
+    mp_free(a);
+    mp_free(Ha);
+    mp_free(log_r);
+
+    /* Format the output */
+    put_stringz(bs, ek->sshk.vt->ssh_id);
+    put_uint32(bs, r_enc->len + ek->curve->fieldBytes);
+    put_data(bs, r_enc->u, r_enc->len);
+    strbuf_free(r_enc);
+    for (size_t i = 0; i < ek->curve->fieldBytes; ++i)
+        put_byte(bs, mp_get_byte(s, i));
+    mp_free(s);
 }
 
 const struct ecsign_extra sign_extra_ed25519 = {
-    ec_ed25519, NULL,
+    ec_ed25519, &ssh_sha512,
     NULL, 0,
 };
 const ssh_keyalg ssh_ecdsa_ed25519 = {
-    ecdsa_new_pub,
-    ecdsa_new_priv,
-    ed25519_new_priv_openssh,
+    eddsa_new_pub,
+    eddsa_new_priv,
+    eddsa_new_priv_openssh,
 
-    ecdsa_freekey,
-    ecdsa_sign,
-    ecdsa_verify,
-    ecdsa_public_blob,
-    ecdsa_private_blob,
-    ed25519_openssh_blob,
-    ecdsa_cache_str,
+    eddsa_freekey,
+    eddsa_sign,
+    eddsa_verify,
+    eddsa_public_blob,
+    eddsa_private_blob,
+    eddsa_openssh_blob,
+    eddsa_cache_str,
 
-    ecdsa_pubkey_bits,
+    ec_shared_pubkey_bits,
 
     "ssh-ed25519",
     "ssh-ed25519",
@@ -2437,7 +1163,7 @@ const ssh_keyalg ssh_ecdsa_nistp256 = {
     ecdsa_openssh_blob,
     ecdsa_cache_str,
 
-    ecdsa_pubkey_bits,
+    ec_shared_pubkey_bits,
 
     "ecdsa-sha2-nistp256",
     "ecdsa-sha2-nistp256",
@@ -2466,7 +1192,7 @@ const ssh_keyalg ssh_ecdsa_nistp384 = {
     ecdsa_openssh_blob,
     ecdsa_cache_str,
 
-    ecdsa_pubkey_bits,
+    ec_shared_pubkey_bits,
 
     "ecdsa-sha2-nistp384",
     "ecdsa-sha2-nistp384",
@@ -2495,7 +1221,7 @@ const ssh_keyalg ssh_ecdsa_nistp521 = {
     ecdsa_openssh_blob,
     ecdsa_cache_str,
 
-    ecdsa_pubkey_bits,
+    ec_shared_pubkey_bits,
 
     "ecdsa-sha2-nistp521",
     "ecdsa-sha2-nistp521",
@@ -2509,49 +1235,21 @@ const ssh_keyalg ssh_ecdsa_nistp521 = {
 
 struct eckex_extra {
     struct ec_curve *(*curve)(void);
+    void (*setup)(ecdh_key *dh);
+    void (*cleanup)(ecdh_key *dh);
+    void (*getpublic)(ecdh_key *dh, BinarySink *bs);
+    mp_int *(*getkey)(ecdh_key *dh, ptrlen remoteKey);
 };
 
-static Bignum ecdh_calculate(const Bignum private,
-                             const struct ec_point *public)
-{
-    struct ec_point *p;
-    Bignum ret;
-    p = ecp_mul(public, private);
-    if (!p) return NULL;
-    ret = p->x;
-    p->x = NULL;
-
-    if (p->curve->type == EC_MONTGOMERY) {
-        /*
-         * Endianness-swap. The Curve25519 algorithm definition
-         * assumes you were doing your computation in arrays of 32
-         * little-endian bytes, and now specifies that you take your
-         * final one of those and convert it into a bignum in
-         * _network_ byte order, i.e. big-endian.
-         *
-         * In particular, the spec says, you convert the _whole_ 32
-         * bytes into a bignum. That is, on the rare occasions that
-         * p->x has come out with the most significant 8 bits zero, we
-         * have to imagine that being represented by a 32-byte string
-         * with the last byte being zero, so that has to be converted
-         * into an SSH-2 bignum with the _low_ byte zero, i.e. a
-         * multiple of 256.
-         */
-        int i;
-        int bytes = (p->curve->fieldBits+7) / 8;
-        unsigned char *byteorder = snewn(bytes, unsigned char);
-        for (i = 0; i < bytes; ++i) {
-            byteorder[i] = bignum_byte(ret, i);
-        }
-        freebn(ret);
-        ret = bignum_from_bytes(byteorder, bytes);
-        smemclr(byteorder, bytes);
-        sfree(byteorder);
-    }
-
-    ec_point_free(p);
-    return ret;
-}
+struct ecdh_key {
+    const struct eckex_extra *extra;
+    const struct ec_curve *curve;
+    mp_int *private;
+    union {
+        WeierstrassPoint *w_public;
+        MontgomeryPoint *m_public;
+    };
+};
 
 const char *ssh_ecdhkex_curve_textname(const struct ssh_kex *kex)
 {
@@ -2560,142 +1258,181 @@ const char *ssh_ecdhkex_curve_textname(const struct ssh_kex *kex)
     return curve->textname;
 }
 
-struct ec_key *ssh_ecdhkex_newkey(const struct ssh_kex *kex)
+static void ssh_ecdhkex_w_setup(ecdh_key *dh)
+{
+    mp_int *one = mp_from_integer(1);
+    dh->private = mp_random_in_range(one, dh->curve->w.G_order);
+    mp_free(one);
+
+    dh->w_public = ecc_weierstrass_multiply(dh->curve->w.G, dh->private);
+}
+
+static void ssh_ecdhkex_m_setup(ecdh_key *dh)
+{
+    unsigned char bytes[dh->curve->fieldBytes];
+    for (size_t i = 0; i < sizeof(bytes); ++i)
+        bytes[i] = random_byte();
+
+    bytes[0] &= 0xF8;
+    bytes[dh->curve->fieldBytes-1] &= 0x7F;
+    bytes[dh->curve->fieldBytes-1] |= 0x40;
+    dh->private = mp_from_bytes_le(make_ptrlen(bytes, dh->curve->fieldBytes));
+    smemclr(bytes, sizeof(bytes));
+
+    dh->m_public = ecc_montgomery_multiply(dh->curve->m.G, dh->private);
+}
+
+ecdh_key *ssh_ecdhkex_newkey(const struct ssh_kex *kex)
 {
     const struct eckex_extra *extra = (const struct eckex_extra *)kex->extra;
-    struct ec_curve *curve;
-    struct ec_key *key;
-    struct ec_point *publicKey;
+    const struct ec_curve *curve = extra->curve();
 
-    curve = extra->curve();
-
-    key = snew(struct ec_key);
-
-    key->sshk.vt = NULL;
-    key->publicKey.curve = curve;
-
-    if (curve->type == EC_MONTGOMERY) {
-        unsigned char bytes[32] = {0};
-        int i;
-
-        for (i = 0; i < sizeof(bytes); ++i)
-        {
-            bytes[i] = (unsigned char)random_byte();
-        }
-        bytes[0] &= 248;
-        bytes[31] &= 127;
-        bytes[31] |= 64;
-        key->privateKey = bignum_from_bytes_le(bytes, sizeof(bytes));
-        smemclr(bytes, sizeof(bytes));
-        if (!key->privateKey) {
-            sfree(key);
-            return NULL;
-        }
-        publicKey = ecp_mul(&key->publicKey.curve->m.G, key->privateKey);
-        if (!publicKey) {
-            freebn(key->privateKey);
-            sfree(key);
-            return NULL;
-        }
-        key->publicKey.x = publicKey->x;
-        key->publicKey.y = publicKey->y;
-        key->publicKey.z = NULL;
-        sfree(publicKey);
-    } else {
-        key->privateKey = bignum_random_in_range(One, key->publicKey.curve->w.n);
-        if (!key->privateKey) {
-            sfree(key);
-            return NULL;
-        }
-        publicKey = ecp_mul(&key->publicKey.curve->w.G, key->privateKey);
-        if (!publicKey) {
-            freebn(key->privateKey);
-            sfree(key);
-            return NULL;
-        }
-        key->publicKey.x = publicKey->x;
-        key->publicKey.y = publicKey->y;
-        key->publicKey.z = NULL;
-        sfree(publicKey);
-    }
-    return key;
+    ecdh_key *dh = snew(ecdh_key);
+    dh->extra = extra;
+    dh->curve = curve;
+    dh->extra->setup(dh);
+    return dh;
 }
 
-void ssh_ecdhkex_getpublic(struct ec_key *ec, BinarySink *bs)
+static void ssh_ecdhkex_w_getpublic(ecdh_key *dh, BinarySink *bs)
 {
-    int i;
-    int pointlen;
-
-    pointlen = (bignum_bitcount(ec->publicKey.curve->p) + 7) / 8;
-
-    if (ec->publicKey.curve->type == EC_WEIERSTRASS) {
-        put_byte(bs, 0x04);
-        for (i = pointlen; i--;)
-            put_byte(bs, bignum_byte(ec->publicKey.x, i));
-        for (i = pointlen; i--;)
-            put_byte(bs, bignum_byte(ec->publicKey.y, i));
-    } else {
-        for (i = 0; i < pointlen; ++i)
-            put_byte(bs, bignum_byte(ec->publicKey.x, i));
-    }
+    put_wpoint(bs, dh->w_public, dh->curve, true);
 }
 
-Bignum ssh_ecdhkex_getkey(struct ec_key *ec,
-                          const void *remoteKey, int remoteKeyLen)
+static void ssh_ecdhkex_m_getpublic(ecdh_key *dh, BinarySink *bs)
 {
-    struct ec_point remote;
-    Bignum ret;
-
-    if (ec->publicKey.curve->type == EC_WEIERSTRASS) {
-        remote.curve = ec->publicKey.curve;
-        remote.infinity = false;
-        if (!decodepoint(remoteKey, remoteKeyLen, &remote)) {
-            return NULL;
-        }
-    } else {
-        /* Point length has to be the same length */
-        if (remoteKeyLen != (bignum_bitcount(ec->publicKey.curve->p) + 7) / 8) {
-            return NULL;
-        }
-
-        remote.curve = ec->publicKey.curve;
-        remote.infinity = false;
-        remote.x = bignum_from_bytes_le((const unsigned char *)remoteKey,
-                                        remoteKeyLen);
-        remote.y = NULL;
-        remote.z = NULL;
-    }
-
-    ret = ecdh_calculate(ec->privateKey, &remote);
-    if (remote.x) freebn(remote.x);
-    if (remote.y) freebn(remote.y);
-    return ret;
+    mp_int *x;
+    ecc_montgomery_get_affine(dh->m_public, &x);
+    for (size_t i = 0; i < dh->curve->fieldBytes; ++i)
+        put_byte(bs, mp_get_byte(x, i));
+    mp_free(x);
 }
 
-void ssh_ecdhkex_freekey(struct ec_key *key)
+void ssh_ecdhkex_getpublic(ecdh_key *dh, BinarySink *bs)
 {
-    ecdsa_freekey(&key->sshk);
+    dh->extra->getpublic(dh, bs);
 }
 
-static const struct eckex_extra kex_extra_curve25519 = { ec_curve25519 };
+static mp_int *ssh_ecdhkex_w_getkey(ecdh_key *dh, ptrlen remoteKey)
+{
+    WeierstrassPoint *remote_p = ecdsa_decode(remoteKey, dh->curve);
+    if (!remote_p)
+        return NULL;
+
+    WeierstrassPoint *p = ecc_weierstrass_multiply(remote_p, dh->private);
+
+    mp_int *x;
+    ecc_weierstrass_get_affine(p, &x, NULL);
+
+    ecc_weierstrass_point_free(remote_p);
+    ecc_weierstrass_point_free(p);
+
+    return x;
+}
+
+static mp_int *ssh_ecdhkex_m_getkey(ecdh_key *dh, ptrlen remoteKey)
+{
+    mp_int *remote_x = mp_from_bytes_le(remoteKey);
+    MontgomeryPoint *remote_p = ecc_montgomery_point_new(
+        dh->curve->m.mc, remote_x);
+    mp_free(remote_x);
+
+    MontgomeryPoint *p = ecc_montgomery_multiply(remote_p, dh->private);
+    mp_int *x;
+    ecc_montgomery_get_affine(p, &x);
+
+    ecc_montgomery_point_free(remote_p);
+    ecc_montgomery_point_free(p);
+
+    /*
+     * Endianness-swap. The Curve25519 algorithm definition assumes
+     * you were doing your computation in arrays of 32 little-endian
+     * bytes, and now specifies that you take your final one of those
+     * and convert it into a bignum in _network_ byte order, i.e.
+     * big-endian.
+     *
+     * In particular, the spec says, you convert the _whole_ 32 bytes
+     * into a bignum. That is, on the rare occasions that x has come
+     * out with the most significant 8 bits zero, we have to imagine
+     * that being represented by a 32-byte string with the last byte
+     * being zero, so that has to be converted into an SSH-2 bignum
+     * with the _low_ byte zero, i.e. a multiple of 256.
+     */
+    strbuf *sb = strbuf_new();
+    for (size_t i = 0; i < dh->curve->fieldBytes; ++i)
+        put_byte(sb, mp_get_byte(x, i));
+    mp_free(x);
+    x = mp_from_bytes_be(ptrlen_from_strbuf(sb));
+    strbuf_free(sb);
+
+    return x;
+}
+
+mp_int *ssh_ecdhkex_getkey(ecdh_key *dh, ptrlen remoteKey)
+{
+    return dh->extra->getkey(dh, remoteKey);
+}
+
+static void ssh_ecdhkex_w_cleanup(ecdh_key *dh)
+{
+    ecc_weierstrass_point_free(dh->w_public);
+}
+
+static void ssh_ecdhkex_m_cleanup(ecdh_key *dh)
+{
+    ecc_montgomery_point_free(dh->m_public);
+}
+
+void ssh_ecdhkex_freekey(ecdh_key *dh)
+{
+    mp_free(dh->private);
+    dh->extra->cleanup(dh);
+    sfree(dh);
+}
+
+static const struct eckex_extra kex_extra_curve25519 = {
+    ec_curve25519,
+    ssh_ecdhkex_m_setup,
+    ssh_ecdhkex_m_cleanup,
+    ssh_ecdhkex_m_getpublic,
+    ssh_ecdhkex_m_getkey,
+};
 static const struct ssh_kex ssh_ec_kex_curve25519 = {
     "curve25519-sha256@libssh.org", NULL, KEXTYPE_ECDH,
     &ssh_sha256, &kex_extra_curve25519,
 };
 
-const struct eckex_extra kex_extra_nistp256 = { ec_p256 };
+const struct eckex_extra kex_extra_nistp256 = {
+    ec_p256,
+    ssh_ecdhkex_w_setup,
+    ssh_ecdhkex_w_cleanup,
+    ssh_ecdhkex_w_getpublic,
+    ssh_ecdhkex_w_getkey,
+};
 static const struct ssh_kex ssh_ec_kex_nistp256 = {
     "ecdh-sha2-nistp256", NULL, KEXTYPE_ECDH,
     &ssh_sha256, &kex_extra_nistp256,
 };
 
-const struct eckex_extra kex_extra_nistp384 = { ec_p384 };
+const struct eckex_extra kex_extra_nistp384 = {
+    ec_p384,
+    ssh_ecdhkex_w_setup,
+    ssh_ecdhkex_w_cleanup,
+    ssh_ecdhkex_w_getpublic,
+    ssh_ecdhkex_w_getkey,
+};
 static const struct ssh_kex ssh_ec_kex_nistp384 = {
     "ecdh-sha2-nistp384", NULL, KEXTYPE_ECDH,
     &ssh_sha384, &kex_extra_nistp384,
 };
 
-const struct eckex_extra kex_extra_nistp521 = { ec_p521 };
+const struct eckex_extra kex_extra_nistp521 = {
+    ec_p521,
+    ssh_ecdhkex_w_setup,
+    ssh_ecdhkex_w_cleanup,
+    ssh_ecdhkex_w_getpublic,
+    ssh_ecdhkex_w_getkey,
+};
 static const struct ssh_kex ssh_ec_kex_nistp521 = {
     "ecdh-sha2-nistp521", NULL, KEXTYPE_ECDH,
     &ssh_sha512, &kex_extra_nistp521,
diff --git a/sshecdsag.c b/sshecdsag.c
index 24547ca5..37048ea6 100644
--- a/sshecdsag.c
+++ b/sshecdsag.c
@@ -3,66 +3,36 @@
  */
 
 #include "ssh.h"
+#include "mpint.h"
 
-int ec_generate(struct ec_key *key, int bits, progfn_t pfn,
-                void *pfnparam)
+int ecdsa_generate(struct ecdsa_key *ek, int bits,
+                   progfn_t pfn, void *pfnparam)
 {
-    struct ec_point *publicKey;
-
-    if (!ec_nist_alg_and_curve_by_bits(bits, &key->publicKey.curve,
-                                       &key->sshk.vt))
+    if (!ec_nist_alg_and_curve_by_bits(bits, &ek->curve, &ek->sshk.vt))
         return 0;
 
-    key->privateKey = bignum_random_in_range(One, key->publicKey.curve->w.n);
-    if (!key->privateKey) return 0;
+    mp_int *one = mp_from_integer(1);
+    ek->privateKey = mp_random_in_range(one, ek->curve->w.G_order);
+    mp_free(one);
 
-    publicKey = ec_public(key->privateKey, key->publicKey.curve);
-    if (!publicKey) {
-        freebn(key->privateKey);
-        key->privateKey = NULL;
-        return 0;
-    }
-
-    key->publicKey.x = publicKey->x;
-    key->publicKey.y = publicKey->y;
-    key->publicKey.z = NULL;
-    sfree(publicKey);
+    ek->publicKey = ecdsa_public(ek->privateKey, ek->sshk.vt);
 
     return 1;
 }
 
-int ec_edgenerate(struct ec_key *key, int bits, progfn_t pfn,
-                  void *pfnparam)
+int eddsa_generate(struct eddsa_key *ek, int bits,
+                   progfn_t pfn, void *pfnparam)
 {
-    struct ec_point *publicKey;
-
-    if (!ec_ed_alg_and_curve_by_bits(bits, &key->publicKey.curve,
-                                     &key->sshk.vt))
+    if (!ec_ed_alg_and_curve_by_bits(bits, &ek->curve, &ek->sshk.vt))
         return 0;
 
-    {
-        /* EdDSA secret keys are just 32 bytes of hash preimage; the
-         * 64-byte SHA-512 hash of that key will be used when signing,
-         * but the form of the key stored on disk is the preimage
-         * only. */
-        Bignum privMax = bn_power_2(bits);
-        if (!privMax) return 0;
-        key->privateKey = bignum_random_in_range(Zero, privMax);
-        freebn(privMax);
-        if (!key->privateKey) return 0;
-    }
+    /* EdDSA secret keys are just 32 bytes of hash preimage; the
+     * 64-byte SHA-512 hash of that key will be used when signing,
+     * but the form of the key stored on disk is the preimage
+     * only. */
+    ek->privateKey = mp_random_bits(bits);
 
-    publicKey = ec_public(key->privateKey, key->publicKey.curve);
-    if (!publicKey) {
-        freebn(key->privateKey);
-        key->privateKey = NULL;
-        return 0;
-    }
-
-    key->publicKey.x = publicKey->x;
-    key->publicKey.y = publicKey->y;
-    key->publicKey.z = NULL;
-    sfree(publicKey);
+    ek->publicKey = eddsa_public(ek->privateKey, ek->sshk.vt);
 
     return 1;
 }
diff --git a/sshprime.c b/sshprime.c
index cf36975e..65380e0e 100644
--- a/sshprime.c
+++ b/sshprime.c
@@ -4,6 +4,7 @@
 
 #include <assert.h>
 #include "ssh.h"
+#include "mpint.h"
 
 /*
  * This prime generation algorithm is pretty much cribbed from
@@ -134,6 +135,23 @@ static void init_primes_array(void)
     assert(pos == NPRIMES);
 }
 
+static unsigned short mp_mod_short(mp_int *x, unsigned short modulus)
+{
+    /*
+     * This function lives here rather than in mpint.c partly because
+     * this is the only place it's needed, but mostly because it
+     * doesn't pay careful attention to constant running time, since
+     * as far as I can tell that's a lost cause for key generation
+     * anyway.
+     */
+    unsigned accumulator = 0;
+    for (size_t i = mp_max_bytes(x); i-- > 0 ;) {
+        accumulator = 0x100 * accumulator + mp_get_byte(x, i);
+        accumulator %= modulus;
+    }
+    return accumulator;
+}
+
 /*
  * Generate a prime. We can deal with various extra properties of
  * the prime:
@@ -154,23 +172,15 @@ static void init_primes_array(void)
  *    'firstbits' is not needed, specifying it to either 0 or 1 is
  *    an adequate no-op.
  */
-Bignum primegen(int bits, int modulus, int residue, Bignum factor,
-		int phase, progfn_t pfn, void *pfnparam, unsigned firstbits)
+mp_int *primegen(
+    int bits, int modulus, int residue, mp_int *factor,
+    int phase, progfn_t pfn, void *pfnparam, unsigned firstbits)
 {
-    int i, k, v, byte, bitsleft, check, checks, fbsize;
-    unsigned long delta;
-    unsigned long moduli[NPRIMES + 1];
-    unsigned long residues[NPRIMES + 1];
-    unsigned long multipliers[NPRIMES + 1];
-    Bignum p, pm1, q, wqp, wqp2;
-    int progress = 0;
-
     init_primes_array();
 
-    byte = 0;
-    bitsleft = 0;
+    int progress = 0;
 
-    fbsize = 0;
+    size_t fbsize = 0;
     while (firstbits >> fbsize)        /* work out how to align this */
         fbsize++;
 
@@ -184,184 +194,172 @@ Bignum primegen(int bits, int modulus, int residue, Bignum factor,
      * random number with the top bit set and the bottom bit clear,
      * multiply it by `factor', and add one.
      */
-    p = bn_power_2(bits - 1);
-    for (i = 0; i < bits; i++) {
-	if (i == 0 || i == bits - 1) {
-	    v = (i != 0 || !factor) ? 1 : 0;
-        } else if (i >= bits - fbsize) {
-            v = (firstbits >> (i - (bits - fbsize))) & 1;
-        } else {
-	    if (bitsleft <= 0)
-		bitsleft = 8, byte = random_byte();
-	    v = byte & 1;
-	    byte >>= 1;
-	    bitsleft--;
-	}
-	bignum_set_bit(p, i, v);
-    }
+    mp_int *p = mp_random_bits(bits - 1);
+
+    mp_set_bit(p, 0, factor ? 0 : 1);  /* bottom bit */
+    mp_set_bit(p, bits-1, 1);          /* top bit */
+    for (size_t i = 0; i < fbsize; i++)
+        mp_set_bit(p, bits-fbsize + i, 1 & (firstbits >> i));
+
     if (factor) {
-	Bignum tmp = p;
-	p = bigmul(tmp, factor);
-	freebn(tmp);
-	assert(bignum_bit(p, 0) == 0);
-	bignum_set_bit(p, 0, 1);
+	mp_int *tmp = p;
+	p = mp_mul(tmp, factor);
+	mp_free(tmp);
+	assert(mp_get_bit(p, 0) == 0);
+        mp_set_bit(p, 0, 1);
     }
 
     /*
-     * Ensure this random number is coprime to the first few
-     * primes, by repeatedly adding either 2 or 2*factor to it
-     * until it is.
+     * We need to ensure this random number is coprime to the first
+     * few primes, by repeatedly adding either 2 or 2*factor to it
+     * until it is. To do this we make a list of (modulus, residue)
+     * pairs to avoid, and we also add to that list the extra pair our
+     * caller wants to avoid.
      */
-    for (i = 0; i < NPRIMES; i++) {
+
+    /* List the moduli */
+    unsigned long moduli[NPRIMES + 1];
+    for (size_t i = 0; i < NPRIMES; i++)
 	moduli[i] = primes[i];
-	residues[i] = bignum_mod_short(p, primes[i]);
+    moduli[NPRIMES] = modulus;
+
+    /* Find the residue of our starting number mod each of them. Also
+     * set up the multipliers array which tells us how each one will
+     * change when we increment the number (which isn't just 1 if
+     * we're incrementing by multiples of factor). */
+    unsigned long residues[NPRIMES + 1], multipliers[NPRIMES + 1];
+    for (size_t i = 0; i < lenof(moduli); i++) {
+	residues[i] = mp_mod_short(p, moduli[i]);
 	if (factor)
-	    multipliers[i] = bignum_mod_short(factor, primes[i]);
+	    multipliers[i] = mp_mod_short(factor, moduli[i]);
 	else
 	    multipliers[i] = 1;
     }
-    moduli[NPRIMES] = modulus;
-    residues[NPRIMES] = (bignum_mod_short(p, (unsigned short) modulus)
-			 + modulus - residue);
-    if (factor)
-	multipliers[NPRIMES] = bignum_mod_short(factor, modulus);
-    else
-	multipliers[NPRIMES] = 1;
-    delta = 0;
+
+    /* Adjust the last entry so that it avoids a residue other than zero */
+    residues[NPRIMES] = (residues[NPRIMES] + modulus - residue) % modulus;
+
+    /*
+     * Now loop until no residue in that list is zero, to find a
+     * sensible increment. We maintain the increment in an ordinary
+     * integer, so if it gets too big, we'll have to give up and go
+     * back to making up a fresh random large integer.
+     */
+    unsigned delta = 0;
     while (1) {
-	for (i = 0; i < (sizeof(moduli) / sizeof(*moduli)); i++)
+	for (size_t i = 0; i < lenof(moduli); i++)
 	    if (!((residues[i] + delta * multipliers[i]) % moduli[i]))
-		break;
-	if (i < (sizeof(moduli) / sizeof(*moduli))) {	/* we broke */
-	    delta += 2;
-	    if (delta > 65536) {
-		freebn(p);
-		goto STARTOVER;
-	    }
-	    continue;
-	}
-	break;
+		goto found_a_zero;
+
+        /* If we didn't exit that loop by goto, we've got our candidate. */
+        break;
+
+      found_a_zero:
+        delta += 2;
+        if (delta > 65536) {
+            mp_free(p);
+            goto STARTOVER;
+        }
     }
-    q = p;
+
+    /*
+     * Having found a plausible increment, actually add it on.
+     */
     if (factor) {
-	Bignum tmp;
-	tmp = bignum_from_long(delta);
-	p = bigmuladd(tmp, factor, q);
-	freebn(tmp);
+	mp_int *d = mp_from_integer(delta);
+        mp_int *df = mp_mul(d, factor);
+        mp_add_into(p, p, df);
+        mp_free(d);
+        mp_free(df);
     } else {
-	p = bignum_add_long(q, delta);
+        mp_add_integer_into(p, p, delta);
     }
-    freebn(q);
 
     /*
      * Now apply the Miller-Rabin primality test a few times. First
      * work out how many checks are needed.
      */
-    checks = 27;
-    if (bits >= 150)
-	checks = 18;
-    if (bits >= 200)
-	checks = 15;
-    if (bits >= 250)
-	checks = 12;
-    if (bits >= 300)
-	checks = 9;
-    if (bits >= 350)
-	checks = 8;
-    if (bits >= 400)
-	checks = 7;
-    if (bits >= 450)
-	checks = 6;
-    if (bits >= 550)
-	checks = 5;
-    if (bits >= 650)
-	checks = 4;
-    if (bits >= 850)
-	checks = 3;
-    if (bits >= 1300)
-	checks = 2;
+    unsigned checks =
+        bits >= 1300 ?  2 : bits >= 850 ?  3 : bits >= 650 ?  4 :
+        bits >=  550 ?  5 : bits >= 450 ?  6 : bits >= 400 ?  7 :
+        bits >=  350 ?  8 : bits >= 300 ?  9 : bits >= 250 ? 12 :
+        bits >=  200 ? 15 : bits >= 150 ? 18 : 27;
 
     /*
      * Next, write p-1 as q*2^k.
      */
-    for (k = 0; bignum_bit(p, k) == !k; k++)
+    size_t k;
+    for (k = 0; mp_get_bit(p, k) == !k; k++)
 	continue;	/* find first 1 bit in p-1 */
-    q = bignum_rshift(p, k);
-    /* And store p-1 itself, which we'll need. */
-    pm1 = copybn(p);
-    decbn(pm1);
+    mp_int *q = mp_rshift_safe(p, k);
+
+    /*
+     * Set up stuff for the Miller-Rabin checks.
+     */
+    mp_int *two = mp_from_integer(2);
+    mp_int *pm1 = mp_copy(p);
+    mp_sub_integer_into(pm1, pm1, 1);
+    MontyContext *mc = monty_new(p);
+    mp_int *m_pm1 = monty_import(mc, pm1);
+
+    bool known_bad = false;
 
     /*
      * Now, for each check ...
      */
-    for (check = 0; check < checks; check++) {
-	Bignum w;
-
+    for (unsigned check = 0; check < checks && !known_bad; check++) {
 	/*
-	 * Invent a random number between 1 and p-1 inclusive.
+	 * Invent a random number between 1 and p-1.
 	 */
-	while (1) {
-	    w = bn_power_2(bits - 1);
-	    for (i = 0; i < bits; i++) {
-		if (bitsleft <= 0)
-		    bitsleft = 8, byte = random_byte();
-		v = byte & 1;
-		byte >>= 1;
-		bitsleft--;
-		bignum_set_bit(w, i, v);
-	    }
-	    bn_restore_invariant(w);
-	    if (bignum_cmp(w, p) >= 0 || bignum_cmp(w, Zero) == 0) {
-		freebn(w);
-		continue;
-	    }
-	    break;
-	}
+        mp_int *w = mp_random_in_range(two, pm1);
+        monty_import_into(mc, w, w);
 
 	pfn(pfnparam, PROGFN_PROGRESS, phase, ++progress);
 
 	/*
 	 * Compute w^q mod p.
 	 */
-	wqp = modpow(w, q, p);
-	freebn(w);
+	mp_int *wqp = monty_pow(mc, w, q);
+	mp_free(w);
 
 	/*
 	 * See if this is 1, or if it is -1, or if it becomes -1
 	 * when squared at most k-1 times.
 	 */
-	if (bignum_cmp(wqp, One) == 0 || bignum_cmp(wqp, pm1) == 0) {
-	    freebn(wqp);
-	    continue;
-	}
-	for (i = 0; i < k - 1; i++) {
-	    wqp2 = modmul(wqp, wqp, p);
-	    freebn(wqp);
-	    wqp = wqp2;
-	    if (bignum_cmp(wqp, pm1) == 0)
-		break;
-	}
-	if (i < k - 1) {
-	    freebn(wqp);
-	    continue;
+        bool passed = false;
+
+	if (mp_cmp_eq(wqp, monty_identity(mc)) || mp_cmp_eq(wqp, m_pm1)) {
+            passed = true;
+        } else {
+            for (size_t i = 0; i < k - 1; i++) {
+                monty_mul_into(mc, wqp, wqp, wqp);
+                if (mp_cmp_eq(wqp, m_pm1)) {
+                    passed = true;
+                    break;
+                }
+            }
 	}
 
-	/*
-	 * It didn't. Therefore, w is a witness for the
-	 * compositeness of p.
-	 */
-	freebn(wqp);
-	freebn(p);
-	freebn(pm1);
-	freebn(q);
-	goto STARTOVER;
+        if (!passed)
+            known_bad = true;
+
+	mp_free(wqp);
+    }
+
+    mp_free(q);
+    mp_free(two);
+    mp_free(pm1);
+    monty_free(mc);
+    mp_free(m_pm1);
+
+    if (known_bad) {
+        mp_free(p);
+        goto STARTOVER;
     }
 
     /*
      * We have a prime!
      */
-    freebn(q);
-    freebn(pm1);
     return p;
 }
 
diff --git a/sshpubk.c b/sshpubk.c
index 5072ff63..9a187b5c 100644
--- a/sshpubk.c
+++ b/sshpubk.c
@@ -10,6 +10,7 @@
 #include <assert.h>
 
 #include "putty.h"
+#include "mpint.h"
 #include "ssh.h"
 #include "misc.h"
 
@@ -276,11 +277,11 @@ int rsa_ssh1_loadpub(const Filename *filename, BinarySink *bs,
         }
 
 	memset(&key, 0, sizeof(key));
-        key.exponent = bignum_from_decimal(expp);
-        key.modulus = bignum_from_decimal(modp);
-        if (atoi(bitsp) != bignum_bitcount(key.modulus)) {
-            freebn(key.exponent);
-            freebn(key.modulus);
+        key.exponent = mp_from_decimal(expp);
+        key.modulus = mp_from_decimal(modp);
+        if (atoi(bitsp) != mp_get_nbits(key.modulus)) {
+            mp_free(key.exponent);
+            mp_free(key.modulus);
             sfree(line);
             error = "key bit count does not match in SSH-1 public key file";
             goto end;
@@ -1360,10 +1361,9 @@ char *ssh1_pubkey_str(struct RSAKey *key)
     char *buffer;
     char *dec1, *dec2;
 
-    dec1 = bignum_decimal(key->exponent);
-    dec2 = bignum_decimal(key->modulus);
-    buffer = dupprintf("%d %s %s%s%s", bignum_bitcount(key->modulus),
-		       dec1, dec2,
+    dec1 = mp_get_decimal(key->exponent);
+    dec2 = mp_get_decimal(key->modulus);
+    buffer = dupprintf("%zd %s %s%s%s", mp_get_nbits(key->modulus), dec1, dec2,
                        key->comment ? " " : "",
                        key->comment ? key->comment : "");
     sfree(dec1);
diff --git a/sshrsa.c b/sshrsa.c
index afddbb7d..53767aae 100644
--- a/sshrsa.c
+++ b/sshrsa.c
@@ -8,13 +8,14 @@
 #include <assert.h>
 
 #include "ssh.h"
+#include "mpint.h"
 #include "misc.h"
 
 void BinarySource_get_rsa_ssh1_pub(
     BinarySource *src, struct RSAKey *rsa, RsaSsh1Order order)
 {
     unsigned bits;
-    Bignum e, m;
+    mp_int *e, *m;
 
     bits = get_uint32(src);
     if (order == RSA_SSH1_EXPONENT_FIRST) {
@@ -29,10 +30,10 @@ void BinarySource_get_rsa_ssh1_pub(
         rsa->bits = bits;
         rsa->exponent = e;
         rsa->modulus = m;
-        rsa->bytes = (bignum_bitcount(m) + 7) / 8;
+        rsa->bytes = (mp_get_nbits(m) + 7) / 8;
     } else {
-        freebn(e);
-        freebn(m);
+        mp_free(e);
+        mp_free(m);
     }
 }
 
@@ -44,7 +45,7 @@ void BinarySource_get_rsa_ssh1_priv(
 
 bool rsa_ssh1_encrypt(unsigned char *data, int length, struct RSAKey *key)
 {
-    Bignum b1, b2;
+    mp_int *b1, *b2;
     int i;
     unsigned char *p;
 
@@ -62,17 +63,17 @@ bool rsa_ssh1_encrypt(unsigned char *data, int length, struct RSAKey *key)
     }
     data[key->bytes - length - 1] = 0;
 
-    b1 = bignum_from_bytes(data, key->bytes);
+    b1 = mp_from_bytes_be(make_ptrlen(data, key->bytes));
 
-    b2 = modpow(b1, key->exponent, key->modulus);
+    b2 = mp_modpow(b1, key->exponent, key->modulus);
 
     p = data;
     for (i = key->bytes; i--;) {
-	*p++ = bignum_byte(b2, i);
+	*p++ = mp_get_byte(b2, i);
     }
 
-    freebn(b1);
-    freebn(b2);
+    mp_free(b1);
+    mp_free(b2);
 
     return true;
 }
@@ -83,28 +84,33 @@ bool rsa_ssh1_encrypt(unsigned char *data, int length, struct RSAKey *key)
  * Uses Chinese Remainder Theorem to speed computation up over the
  * obvious implementation of a single big modpow.
  */
-Bignum crt_modpow(Bignum base, Bignum exp, Bignum mod,
-                  Bignum p, Bignum q, Bignum iqmp)
+mp_int *crt_modpow(mp_int *base, mp_int *exp, mp_int *mod,
+                      mp_int *p, mp_int *q, mp_int *iqmp)
 {
-    Bignum pm1, qm1, pexp, qexp, presult, qresult, diff, multiplier, ret0, ret;
+    mp_int *pm1, *qm1, *pexp, *qexp, *presult, *qresult;
+    mp_int *diff, *multiplier, *ret0, *ret;
 
     /*
      * Reduce the exponent mod phi(p) and phi(q), to save time when
      * exponentiating mod p and mod q respectively. Of course, since p
      * and q are prime, phi(p) == p-1 and similarly for q.
      */
-    pm1 = copybn(p);
-    decbn(pm1);
-    qm1 = copybn(q);
-    decbn(qm1);
-    pexp = bigmod(exp, pm1);
-    qexp = bigmod(exp, qm1);
+    pm1 = mp_copy(p);
+    mp_sub_integer_into(pm1, pm1, 1);
+    qm1 = mp_copy(q);
+    mp_sub_integer_into(qm1, qm1, 1);
+    pexp = mp_mod(exp, pm1);
+    qexp = mp_mod(exp, qm1);
 
     /*
      * Do the two modpows.
      */
-    presult = modpow(base, pexp, p);
-    qresult = modpow(base, qexp, q);
+    mp_int *base_mod_p = mp_mod(base, p);
+    presult = mp_modpow(base_mod_p, pexp, p);
+    mp_free(base_mod_p);
+    mp_int *base_mod_q = mp_mod(base, q);
+    qresult = mp_modpow(base_mod_q, qexp, q);
+    mp_free(base_mod_q);
 
     /*
      * Recombine the results. We want a value which is congruent to
@@ -115,189 +121,66 @@ Bignum crt_modpow(Bignum base, Bignum exp, Bignum mod,
      * (which is congruent to qresult mod both primes), and add on
      * (presult-qresult) * (iqmp * q) which adjusts it to be congruent
      * to presult mod p without affecting its value mod q.
+     *
+     * (If presult-qresult < 0, we add p to it to keep it positive.)
      */
-    if (bignum_cmp(presult, qresult) < 0) {
-        /*
-         * Can't subtract presult from qresult without first adding on
-         * p.
-         */
-        Bignum tmp = presult;
-        presult = bigadd(presult, p);
-        freebn(tmp);
-    }
-    diff = bigsub(presult, qresult);
-    multiplier = bigmul(iqmp, q);
-    ret0 = bigmuladd(multiplier, diff, qresult);
+    unsigned presult_too_small = mp_cmp_hs(qresult, presult);
+    mp_cond_add_into(presult, presult, p, presult_too_small);
+
+    diff = mp_sub(presult, qresult);
+    multiplier = mp_mul(iqmp, q);
+    ret0 = mp_mul(multiplier, diff);
+    mp_add_into(ret0, ret0, qresult);
 
     /*
      * Finally, reduce the result mod n.
      */
-    ret = bigmod(ret0, mod);
+    ret = mp_mod(ret0, mod);
 
     /*
      * Free all the intermediate results before returning.
      */
-    freebn(pm1);
-    freebn(qm1);
-    freebn(pexp);
-    freebn(qexp);
-    freebn(presult);
-    freebn(qresult);
-    freebn(diff);
-    freebn(multiplier);
-    freebn(ret0);
+    mp_free(pm1);
+    mp_free(qm1);
+    mp_free(pexp);
+    mp_free(qexp);
+    mp_free(presult);
+    mp_free(qresult);
+    mp_free(diff);
+    mp_free(multiplier);
+    mp_free(ret0);
 
     return ret;
 }
 
 /*
- * This function is a wrapper on modpow(). It has the same effect as
- * modpow(), but employs RSA blinding to protect against timing
- * attacks and also uses the Chinese Remainder Theorem (implemented
- * above, in crt_modpow()) to speed up the main operation.
+ * Wrapper on crt_modpow that looks up all the right values from an
+ * RSAKey.
  */
-static Bignum rsa_privkey_op(Bignum input, struct RSAKey *key)
+static mp_int *rsa_privkey_op(mp_int *input, struct RSAKey *key)
 {
-    Bignum random, random_encrypted, random_inverse;
-    Bignum input_blinded, ret_blinded;
-    Bignum ret;
-
-    SHA512_State ss;
-    unsigned char digest512[64];
-    int digestused = lenof(digest512);
-    int hashseq = 0;
-
-    /*
-     * Start by inventing a random number chosen uniformly from the
-     * range 2..modulus-1. (We do this by preparing a random number
-     * of the right length and retrying if it's greater than the
-     * modulus, to prevent any potential Bleichenbacher-like
-     * attacks making use of the uneven distribution within the
-     * range that would arise from just reducing our number mod n.
-     * There are timing implications to the potential retries, of
-     * course, but all they tell you is the modulus, which you
-     * already knew.)
-     * 
-     * To preserve determinism and avoid Pageant needing to share
-     * the random number pool, we actually generate this `random'
-     * number by hashing stuff with the private key.
-     */
-    while (1) {
-	int bits, byte, bitsleft, v;
-	random = copybn(key->modulus);
-	/*
-	 * Find the topmost set bit. (This function will return its
-	 * index plus one.) Then we'll set all bits from that one
-	 * downwards randomly.
-	 */
-	bits = bignum_bitcount(random);
-	byte = 0;
-	bitsleft = 0;
-	while (bits--) {
-	    if (bitsleft <= 0) {
-		bitsleft = 8;
-		/*
-		 * Conceptually the following few lines are equivalent to
-		 *    byte = random_byte();
-		 */
-		if (digestused >= lenof(digest512)) {
-		    SHA512_Init(&ss);
-		    put_data(&ss, "RSA deterministic blinding", 26);
-		    put_uint32(&ss, hashseq);
-		    put_mp_ssh2(&ss, key->private_exponent);
-		    SHA512_Final(&ss, digest512);
-		    hashseq++;
-
-		    /*
-		     * Now hash that digest plus the signature
-		     * input.
-		     */
-		    SHA512_Init(&ss);
-		    put_data(&ss, digest512, sizeof(digest512));
-		    put_mp_ssh2(&ss, input);
-		    SHA512_Final(&ss, digest512);
-
-		    digestused = 0;
-		}
-		byte = digest512[digestused++];
-	    }
-	    v = byte & 1;
-	    byte >>= 1;
-	    bitsleft--;
-	    bignum_set_bit(random, bits, v);
-	}
-        bn_restore_invariant(random);
-
-	/*
-	 * Now check that this number is strictly greater than
-	 * zero, and strictly less than modulus.
-	 */
-	if (bignum_cmp(random, Zero) <= 0 ||
-	    bignum_cmp(random, key->modulus) >= 0) {
-	    freebn(random);
-	    continue;
-	}
-
-        /*
-         * Also, make sure it has an inverse mod modulus.
-         */
-        random_inverse = modinv(random, key->modulus);
-        if (!random_inverse) {
-	    freebn(random);
-	    continue;
-        }
-
-        break;
-    }
-
-    /*
-     * RSA blinding relies on the fact that (xy)^d mod n is equal
-     * to (x^d mod n) * (y^d mod n) mod n. We invent a random pair
-     * y and y^d; then we multiply x by y, raise to the power d mod
-     * n as usual, and divide by y^d to recover x^d. Thus an
-     * attacker can't correlate the timing of the modpow with the
-     * input, because they don't know anything about the number
-     * that was input to the actual modpow.
-     * 
-     * The clever bit is that we don't have to do a huge modpow to
-     * get y and y^d; we will use the number we just invented as
-     * _y^d_, and use the _public_ exponent to compute (y^d)^e = y
-     * from it, which is much faster to do.
-     */
-    random_encrypted = crt_modpow(random, key->exponent,
-                                  key->modulus, key->p, key->q, key->iqmp);
-    input_blinded = modmul(input, random_encrypted, key->modulus);
-    ret_blinded = crt_modpow(input_blinded, key->private_exponent,
-                             key->modulus, key->p, key->q, key->iqmp);
-    ret = modmul(ret_blinded, random_inverse, key->modulus);
-
-    freebn(ret_blinded);
-    freebn(input_blinded);
-    freebn(random_inverse);
-    freebn(random_encrypted);
-    freebn(random);
-
-    return ret;
+    return crt_modpow(input, key->private_exponent,
+                      key->modulus, key->p, key->q, key->iqmp);
 }
 
-Bignum rsa_ssh1_decrypt(Bignum input, struct RSAKey *key)
+mp_int *rsa_ssh1_decrypt(mp_int *input, struct RSAKey *key)
 {
     return rsa_privkey_op(input, key);
 }
 
-bool rsa_ssh1_decrypt_pkcs1(Bignum input, struct RSAKey *key, strbuf *outbuf)
+bool rsa_ssh1_decrypt_pkcs1(mp_int *input, struct RSAKey *key,
+                            strbuf *outbuf)
 {
     strbuf *data = strbuf_new();
     bool success = false;
     BinarySource src[1];
 
     {
-        Bignum *b = rsa_ssh1_decrypt(input, key);
-        int i;
-        for (i = (bignum_bitcount(key->modulus) + 7) / 8; i-- > 0 ;) {
-            put_byte(data, bignum_byte(b, i));
+        mp_int *b = rsa_ssh1_decrypt(input, key);
+        for (size_t i = (mp_get_nbits(key->modulus) + 7) / 8; i-- > 0 ;) {
+            put_byte(data, mp_get_byte(b, i));
         }
-        freebn(b);
+        mp_free(b);
     }
 
     BinarySource_BARE_INIT(src, data->u, data->len);
@@ -321,17 +204,16 @@ bool rsa_ssh1_decrypt_pkcs1(Bignum input, struct RSAKey *key, strbuf *outbuf)
     return success;
 }
 
-static void append_hex_to_strbuf(strbuf *sb, Bignum *x)
+static void append_hex_to_strbuf(strbuf *sb, mp_int *x)
 {
     if (sb->len > 0)
         put_byte(sb, ',');
     put_data(sb, "0x", 2);
-    int nibbles = (3 + bignum_bitcount(x)) / 4;
-    if (nibbles < 1)
-	nibbles = 1;
-    static const char hex[] = "0123456789abcdef";
-    for (int i = nibbles; i--;)
-	put_byte(sb, hex[(bignum_byte(x, i / 2) >> (4 * (i % 2))) & 0xF]);
+    char *hex = mp_get_hex(x);
+    size_t hexlen = strlen(hex);
+    put_data(sb, hex, hexlen);
+    smemclr(hex, hexlen);
+    sfree(hex);
 }
 
 char *rsastr_fmt(struct RSAKey *key)
@@ -361,7 +243,7 @@ char *rsa_ssh1_fingerprint(struct RSAKey *key)
     MD5Final(digest, &md5c);
 
     out = strbuf_new();
-    strbuf_catf(out, "%d ", bignum_bitcount(key->modulus));
+    strbuf_catf(out, "%d ", mp_get_nbits(key->modulus));
     for (i = 0; i < 16; i++)
 	strbuf_catf(out, "%s%02x", i ? ":" : "", digest[i]);
     if (key->comment)
@@ -376,34 +258,32 @@ char *rsa_ssh1_fingerprint(struct RSAKey *key)
  */
 bool rsa_verify(struct RSAKey *key)
 {
-    Bignum n, ed, pm1, qm1;
-    int cmp;
+    mp_int *n, *ed, *pm1, *qm1;
+    unsigned ok = 1;
+
+    /* Preliminary checks: p,q must actually be nonzero. */
+    if (mp_eq_integer(key->p, 0) | mp_eq_integer(key->q, 0))
+        return false;
 
     /* n must equal pq. */
-    n = bigmul(key->p, key->q);
-    cmp = bignum_cmp(n, key->modulus);
-    freebn(n);
-    if (cmp != 0)
-	return false;
+    n = mp_mul(key->p, key->q);
+    ok &= mp_cmp_eq(n, key->modulus);
+    mp_free(n);
 
     /* e * d must be congruent to 1, modulo (p-1) and modulo (q-1). */
-    pm1 = copybn(key->p);
-    decbn(pm1);
-    ed = modmul(key->exponent, key->private_exponent, pm1);
-    freebn(pm1);
-    cmp = bignum_cmp(ed, One);
-    freebn(ed);
-    if (cmp != 0)
-	return false;
+    pm1 = mp_copy(key->p);
+    mp_sub_integer_into(pm1, pm1, 1);
+    ed = mp_modmul(key->exponent, key->private_exponent, pm1);
+    mp_free(pm1);
+    ok &= mp_eq_integer(ed, 1);
+    mp_free(ed);
 
-    qm1 = copybn(key->q);
-    decbn(qm1);
-    ed = modmul(key->exponent, key->private_exponent, qm1);
-    freebn(qm1);
-    cmp = bignum_cmp(ed, One);
-    freebn(ed);
-    if (cmp != 0)
-	return false;
+    qm1 = mp_copy(key->q);
+    mp_sub_integer_into(qm1, qm1, 1);
+    ed = mp_modmul(key->exponent, key->private_exponent, qm1);
+    mp_free(qm1);
+    ok &= mp_eq_integer(ed, 1);
+    mp_free(ed);
 
     /*
      * Ensure p > q.
@@ -413,33 +293,18 @@ bool rsa_verify(struct RSAKey *key)
      * should instead flip them round into the canonical order of
      * p > q. This also involves regenerating iqmp.
      */
-    if (bignum_cmp(key->p, key->q) <= 0) {
-	Bignum tmp = key->p;
-	key->p = key->q;
-	key->q = tmp;
+    unsigned swap_pq = mp_cmp_hs(key->q, key->p);
+    mp_cond_swap(key->p, key->q, swap_pq);
+    mp_free(key->iqmp);
+    key->iqmp = mp_invert(key->q, key->p);
 
-	freebn(key->iqmp);
-	key->iqmp = modinv(key->q, key->p);
-        if (!key->iqmp)
-            return false;
-    }
-
-    /*
-     * Ensure iqmp * q is congruent to 1, modulo p.
-     */
-    n = modmul(key->iqmp, key->q, key->p);
-    cmp = bignum_cmp(n, One);
-    freebn(n);
-    if (cmp != 0)
-	return false;
-
-    return true;
+    return ok;
 }
 
 void rsa_ssh1_public_blob(BinarySink *bs, struct RSAKey *key,
                           RsaSsh1Order order)
 {
-    put_uint32(bs, bignum_bitcount(key->modulus));
+    put_uint32(bs, mp_get_nbits(key->modulus));
     if (order == RSA_SSH1_EXPONENT_FIRST) {
         put_mp_ssh1(bs, key->exponent);
         put_mp_ssh1(bs, key->modulus);
@@ -459,8 +324,8 @@ int rsa_ssh1_public_blob_len(void *data, int maxlen)
     /* Expect a length word, then exponent and modulus. (It doesn't
      * even matter which order.) */
     get_uint32(src);
-    freebn(get_mp_ssh1(src));
-    freebn(get_mp_ssh1(src));
+    mp_free(get_mp_ssh1(src));
+    mp_free(get_mp_ssh1(src));
 
     if (get_err(src))
 	return -1;
@@ -472,19 +337,19 @@ int rsa_ssh1_public_blob_len(void *data, int maxlen)
 void freersapriv(struct RSAKey *key)
 {
     if (key->private_exponent) {
-	freebn(key->private_exponent);
+	mp_free(key->private_exponent);
         key->private_exponent = NULL;
     }
     if (key->p) {
-	freebn(key->p);
+	mp_free(key->p);
         key->p = NULL;
     }
     if (key->q) {
-	freebn(key->q);
+	mp_free(key->q);
         key->q = NULL;
     }
     if (key->iqmp) {
-	freebn(key->iqmp);
+	mp_free(key->iqmp);
         key->iqmp = NULL;
     }
 }
@@ -493,11 +358,11 @@ void freersakey(struct RSAKey *key)
 {
     freersapriv(key);
     if (key->modulus) {
-	freebn(key->modulus);
+	mp_free(key->modulus);
         key->modulus = NULL;
     }
     if (key->exponent) {
-	freebn(key->exponent);
+	mp_free(key->exponent);
         key->exponent = NULL;
     }
     if (key->comment) {
@@ -642,7 +507,7 @@ static int rsa2_pubkey_bits(const ssh_keyalg *self, ptrlen pub)
         return -1;
 
     rsa = container_of(sshk, struct RSAKey, sshk);
-    ret = bignum_bitcount(rsa->modulus);
+    ret = mp_get_nbits(rsa->modulus);
     rsa2_freekey(&rsa->sshk);
 
     return ret;
@@ -738,8 +603,7 @@ static bool rsa2_verify(ssh_key *key, ptrlen sig, ptrlen data)
     struct RSAKey *rsa = container_of(key, struct RSAKey, sshk);
     BinarySource src[1];
     ptrlen type, in_pl;
-    Bignum in, out;
-    bool toret;
+    mp_int *in, *out;
 
     BinarySource_BARE_INIT(src, sig.ptr, sig.len);
     type = get_string(src);
@@ -751,28 +615,27 @@ static bool rsa2_verify(ssh_key *key, ptrlen sig, ptrlen data)
      * BUG_SSH2_RSA_PADDING at the other end, we tolerate it if it's
      * there.) So we can't use get_mp_ssh2, which enforces that
      * leading-byte scheme; instead we use get_string and
-     * bignum_from_bytes, which will tolerate anything.
+     * mp_from_bytes_be, which will tolerate anything.
      */
     in_pl = get_string(src);
     if (get_err(src) || !ptrlen_eq_string(type, "ssh-rsa"))
 	return false;
 
-    in = bignum_from_bytes(in_pl.ptr, in_pl.len);
-    out = modpow(in, rsa->exponent, rsa->modulus);
-    freebn(in);
+    in = mp_from_bytes_be(in_pl);
+    out = mp_modpow(in, rsa->exponent, rsa->modulus);
+    mp_free(in);
 
-    toret = true;
+    unsigned diff = 0;
 
-    size_t nbytes = (bignum_bitcount(rsa->modulus) + 7) / 8;
+    size_t nbytes = (mp_get_nbits(rsa->modulus) + 7) / 8;
     unsigned char *bytes = rsa_pkcs1_signature_string(nbytes, &ssh_sha1, data);
     for (size_t i = 0; i < nbytes; i++)
-	if (bytes[nbytes-1 - i] != bignum_byte(out, i))
-	    toret = false;
+        diff |= bytes[nbytes-1 - i] ^ mp_get_byte(out, i);
     smemclr(bytes, nbytes);
     sfree(bytes);
-    freebn(out);
+    mp_free(out);
 
-    return toret;
+    return diff == 0;
 }
 
 static void rsa2_sign(ssh_key *key, const void *data, int datalen,
@@ -780,8 +643,8 @@ static void rsa2_sign(ssh_key *key, const void *data, int datalen,
 {
     struct RSAKey *rsa = container_of(key, struct RSAKey, sshk);
     unsigned char *bytes;
-    int nbytes;
-    Bignum in, out;
+    size_t nbytes;
+    mp_int *in, *out;
     const struct ssh_hashalg *halg;
     const char *sign_alg_name;
 
@@ -796,24 +659,24 @@ static void rsa2_sign(ssh_key *key, const void *data, int datalen,
         sign_alg_name = "ssh-rsa";
     }
 
-    nbytes = (bignum_bitcount(rsa->modulus) + 7) / 8;
+    nbytes = (mp_get_nbits(rsa->modulus) + 7) / 8;
 
     bytes = rsa_pkcs1_signature_string(
         nbytes, halg, make_ptrlen(data, datalen));
-    in = bignum_from_bytes(bytes, nbytes);
+    in = mp_from_bytes_be(make_ptrlen(bytes, nbytes));
     smemclr(bytes, nbytes);
     sfree(bytes);
 
     out = rsa_privkey_op(in, rsa);
-    freebn(in);
+    mp_free(in);
 
     put_stringz(bs, sign_alg_name);
-    nbytes = (bignum_bitcount(out) + 7) / 8;
+    nbytes = (mp_get_nbits(out) + 7) / 8;
     put_uint32(bs, nbytes);
     for (size_t i = 0; i < nbytes; i++)
-	put_byte(bs, bignum_byte(out, nbytes - 1 - i));
+	put_byte(bs, mp_get_byte(out, nbytes - 1 - i));
 
-    freebn(out);
+    mp_free(out);
 }
 
 const ssh_keyalg ssh_rsa = {
@@ -852,7 +715,7 @@ void ssh_rsakex_freekey(struct RSAKey *key)
 
 int ssh_rsakex_klen(struct RSAKey *rsa)
 {
-    return bignum_bitcount(rsa->modulus);
+    return mp_get_nbits(rsa->modulus);
 }
 
 static void oaep_mask(const struct ssh_hashalg *h, void *seed, int seedlen,
@@ -885,7 +748,7 @@ void ssh_rsakex_encrypt(const struct ssh_hashalg *h,
                         unsigned char *in, int inlen,
                         unsigned char *out, int outlen, struct RSAKey *rsa)
 {
-    Bignum b1, b2;
+    mp_int *b1, *b2;
     int k, i;
     char *p;
     const int HLEN = h->hlen;
@@ -918,7 +781,7 @@ void ssh_rsakex_encrypt(const struct ssh_hashalg *h,
      */
 
     /* k denotes the length in octets of the RSA modulus. */
-    k = (7 + bignum_bitcount(rsa->modulus)) / 8;
+    k = (7 + mp_get_nbits(rsa->modulus)) / 8;
 
     /* The length of the input data must be at most k - 2hLen - 2. */
     assert(inlen > 0 && inlen <= k - 2*HLEN - 2);
@@ -961,24 +824,24 @@ void ssh_rsakex_encrypt(const struct ssh_hashalg *h,
      * Now `out' contains precisely the data we want to
      * RSA-encrypt.
      */
-    b1 = bignum_from_bytes(out, outlen);
-    b2 = modpow(b1, rsa->exponent, rsa->modulus);
+    b1 = mp_from_bytes_be(make_ptrlen(out, outlen));
+    b2 = mp_modpow(b1, rsa->exponent, rsa->modulus);
     p = (char *)out;
     for (i = outlen; i--;) {
-	*p++ = bignum_byte(b2, i);
+	*p++ = mp_get_byte(b2, i);
     }
-    freebn(b1);
-    freebn(b2);
+    mp_free(b1);
+    mp_free(b2);
 
     /*
      * And we're done.
      */
 }
 
-Bignum ssh_rsakex_decrypt(const struct ssh_hashalg *h, ptrlen ciphertext,
-                          struct RSAKey *rsa)
+mp_int *ssh_rsakex_decrypt(const struct ssh_hashalg *h, ptrlen ciphertext,
+                              struct RSAKey *rsa)
 {
-    Bignum b1, b2;
+    mp_int *b1, *b2;
     int outlen, i;
     unsigned char *out;
     unsigned char labelhash[64];
@@ -992,18 +855,18 @@ Bignum ssh_rsakex_decrypt(const struct ssh_hashalg *h, ptrlen ciphertext,
 
     /* The length of the encrypted data should be exactly the length
      * in octets of the RSA modulus.. */
-    outlen = (7 + bignum_bitcount(rsa->modulus)) / 8;
+    outlen = (7 + mp_get_nbits(rsa->modulus)) / 8;
     if (ciphertext.len != outlen)
         return NULL;
 
     /* Do the RSA decryption, and extract the result into a byte array. */
-    b1 = bignum_from_bytes(ciphertext.ptr, ciphertext.len);
+    b1 = mp_from_bytes_be(ciphertext);
     b2 = rsa_privkey_op(b1, rsa);
     out = snewn(outlen, unsigned char);
     for (i = 0; i < outlen; i++)
-        out[i] = bignum_byte(b2, outlen-1-i);
-    freebn(b1);
-    freebn(b2);
+        out[i] = mp_get_byte(b2, outlen-1-i);
+    mp_free(b1);
+    mp_free(b2);
 
     /* Do the OAEP masking operations, in the reverse order from encryption */
     oaep_mask(h, out+HLEN+1, outlen-HLEN-1, out+1, HLEN);
@@ -1038,7 +901,7 @@ Bignum ssh_rsakex_decrypt(const struct ssh_hashalg *h, ptrlen ciphertext,
     b1 = get_mp_ssh2(src);
     sfree(out);
     if (get_err(src) || get_avail(src) != 0) {
-        freebn(b1);
+        mp_free(b1);
         return NULL;
     }
 
diff --git a/sshrsag.c b/sshrsag.c
index fad23d1a..55868fbd 100644
--- a/sshrsag.c
+++ b/sshrsag.c
@@ -5,13 +5,13 @@
 #include <assert.h>
 
 #include "ssh.h"
+#include "mpint.h"
 
 #define RSA_EXPONENT 37		       /* we like this prime */
 
 int rsa_generate(struct RSAKey *key, int bits, progfn_t pfn,
 		 void *pfnparam)
 {
-    Bignum pm1, qm1, phi_n;
     unsigned pfirst, qfirst;
 
     key->sshk.vt = &ssh_rsa;
@@ -55,7 +55,7 @@ int rsa_generate(struct RSAKey *key, int bits, progfn_t pfn,
     /*
      * We don't generate e; we just use a standard one always.
      */
-    key->exponent = bignum_from_long(RSA_EXPONENT);
+    mp_int *exponent = mp_from_integer(RSA_EXPONENT);
 
     /*
      * Generate p and q: primes with combined length `bits', not
@@ -65,19 +65,15 @@ int rsa_generate(struct RSAKey *key, int bits, progfn_t pfn,
      * a prime e, we can simplify the criterion.)
      */
     invent_firstbits(&pfirst, &qfirst);
-    key->p = primegen(bits / 2, RSA_EXPONENT, 1, NULL,
-		      1, pfn, pfnparam, pfirst);
-    key->q = primegen(bits - bits / 2, RSA_EXPONENT, 1, NULL,
-		      2, pfn, pfnparam, qfirst);
+    mp_int *p = primegen(bits / 2, RSA_EXPONENT, 1, NULL,
+                            1, pfn, pfnparam, pfirst);
+    mp_int *q = primegen(bits - bits / 2, RSA_EXPONENT, 1, NULL,
+                            2, pfn, pfnparam, qfirst);
 
     /*
      * Ensure p > q, by swapping them if not.
      */
-    if (bignum_cmp(key->p, key->q) < 0) {
-	Bignum t = key->p;
-	key->p = key->q;
-	key->q = t;
-    }
+    mp_cond_swap(p, q, mp_cmp_hs(q, p));
 
     /*
      * Now we have p, q and e. All we need to do now is work out
@@ -85,27 +81,31 @@ int rsa_generate(struct RSAKey *key, int bits, progfn_t pfn,
      * and (q^-1 mod p).
      */
     pfn(pfnparam, PROGFN_PROGRESS, 3, 1);
-    key->modulus = bigmul(key->p, key->q);
+    mp_int *modulus = mp_mul(p, q);
     pfn(pfnparam, PROGFN_PROGRESS, 3, 2);
-    pm1 = copybn(key->p);
-    decbn(pm1);
-    qm1 = copybn(key->q);
-    decbn(qm1);
-    phi_n = bigmul(pm1, qm1);
+    mp_int *pm1 = mp_copy(p);
+    mp_sub_integer_into(pm1, pm1, 1);
+    mp_int *qm1 = mp_copy(q);
+    mp_sub_integer_into(qm1, qm1, 1);
+    mp_int *phi_n = mp_mul(pm1, qm1);
     pfn(pfnparam, PROGFN_PROGRESS, 3, 3);
-    freebn(pm1);
-    freebn(qm1);
-    key->private_exponent = modinv(key->exponent, phi_n);
-    assert(key->private_exponent);
+    mp_free(pm1);
+    mp_free(qm1);
+    mp_int *private_exponent = mp_invert(exponent, phi_n);
     pfn(pfnparam, PROGFN_PROGRESS, 3, 4);
-    key->iqmp = modinv(key->q, key->p);
-    assert(key->iqmp);
+    mp_free(phi_n);
+    mp_int *iqmp = mp_invert(q, p);
     pfn(pfnparam, PROGFN_PROGRESS, 3, 5);
 
     /*
-     * Clean up temporary numbers.
+     * Populate the returned structure.
      */
-    freebn(phi_n);
+    key->modulus = modulus;
+    key->exponent = exponent;
+    key->private_exponent = private_exponent;
+    key->p = p;
+    key->q = q;
+    key->iqmp = iqmp;
 
     return 1;
 }
diff --git a/sshserver.h b/sshserver.h
index 2dc43d95..4f720061 100644
--- a/sshserver.h
+++ b/sshserver.h
@@ -62,7 +62,7 @@ char *auth_ssh1int_challenge(AuthPolicy *, unsigned method, ptrlen username);
 bool auth_ssh1int_response(AuthPolicy *, ptrlen response);
 
 struct RSAKey *auth_publickey_ssh1(
-    AuthPolicy *ap, ptrlen username, Bignum rsa_modulus);
+    AuthPolicy *ap, ptrlen username, mp_int *rsa_modulus);
 /* auth_successful returns false if further authentication is needed */
 bool auth_successful(AuthPolicy *, ptrlen username, unsigned method);
 
diff --git a/testbn.c b/testbn.c
deleted file mode 100644
index 32b1b77c..00000000
--- a/testbn.c
+++ /dev/null
@@ -1,275 +0,0 @@
-/*
- * testbn.c: standalone test program for the bignum code.
- */
-
-/*
- * Accepts input on standard input, in the form generated by
- * testdata/bignum.py.
- */
-
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <ctype.h>
-
-#include "ssh.h"
-#include "sshbn.h"
-
-void modalfatalbox(const char *p, ...)
-{
-    va_list ap;
-    fprintf(stderr, "FATAL ERROR: ");
-    va_start(ap, p);
-    vfprintf(stderr, p, ap);
-    va_end(ap);
-    fputc('\n', stderr);
-    exit(1);
-}
-
-int random_byte(void)
-{
-    modalfatalbox("random_byte called in testbn");
-    return 0;
-}
-
-void queue_idempotent_callback(IdempotentCallback *ic) { assert(0); }
-
-#define fromxdigit(c) ( (c)>'9' ? ((c)&0xDF) - 'A' + 10 : (c) - '0' )
-
-/* For Unix in particular, but harmless if this main() is reused elsewhere */
-const bool buildinfo_gtk_relevant = false;
-
-int main(int argc, char **argv)
-{
-    char *buf;
-    int line = 0;
-    int passes = 0, fails = 0;
-
-    printf("BIGNUM_INT_BITS = %d\n", (int)BIGNUM_INT_BITS);
-
-    while ((buf = fgetline(stdin)) != NULL) {
-        int maxlen = strlen(buf);
-        unsigned char *data = snewn(maxlen, unsigned char);
-        unsigned char *ptrs[5], *q;
-        int ptrnum;
-        char *bufp = buf;
-
-        line++;
-
-        q = data;
-        ptrnum = 0;
-
-        while (*bufp && !isspace((unsigned char)*bufp))
-            bufp++;
-        if (*bufp)
-            *bufp++ = '\0';
-
-        while (*bufp) {
-            char *start, *end;
-            int i;
-
-            while (*bufp && !isxdigit((unsigned char)*bufp))
-                bufp++;
-            start = bufp;
-
-            if (!*bufp)
-                break;
-
-            while (*bufp && isxdigit((unsigned char)*bufp))
-                bufp++;
-            end = bufp;
-
-            if (ptrnum >= lenof(ptrs))
-                break;
-            ptrs[ptrnum++] = q;
-            
-            for (i = -((end - start) & 1); i < end-start; i += 2) {
-                unsigned char val = (i < 0 ? 0 : fromxdigit(start[i]));
-                val = val * 16 + fromxdigit(start[i+1]);
-                *q++ = val;
-            }
-        }
-
-        if (ptrnum < lenof(ptrs))
-            ptrs[ptrnum] = q;
-
-        if (!strcmp(buf, "mul")) {
-            Bignum a, b, c, p;
-
-            if (ptrnum != 3) {
-                printf("%d: mul with %d parameters, expected 3\n", line, ptrnum);
-                exit(1);
-            }
-            a = bignum_from_bytes(ptrs[0], ptrs[1]-ptrs[0]);
-            b = bignum_from_bytes(ptrs[1], ptrs[2]-ptrs[1]);
-            c = bignum_from_bytes(ptrs[2], ptrs[3]-ptrs[2]);
-            p = bigmul(a, b);
-
-            if (bignum_cmp(c, p) == 0) {
-                passes++;
-            } else {
-                char *as = bignum_decimal(a);
-                char *bs = bignum_decimal(b);
-                char *cs = bignum_decimal(c);
-                char *ps = bignum_decimal(p);
-                
-                printf("%d: fail: %s * %s gave %s expected %s\n",
-                       line, as, bs, ps, cs);
-                fails++;
-
-                sfree(as);
-                sfree(bs);
-                sfree(cs);
-                sfree(ps);
-            }
-            freebn(a);
-            freebn(b);
-            freebn(c);
-            freebn(p);
-        } else if (!strcmp(buf, "modmul")) {
-            Bignum a, b, m, c, p;
-
-            if (ptrnum != 4) {
-                printf("%d: modmul with %d parameters, expected 4\n",
-                       line, ptrnum);
-                exit(1);
-            }
-            a = bignum_from_bytes(ptrs[0], ptrs[1]-ptrs[0]);
-            b = bignum_from_bytes(ptrs[1], ptrs[2]-ptrs[1]);
-            m = bignum_from_bytes(ptrs[2], ptrs[3]-ptrs[2]);
-            c = bignum_from_bytes(ptrs[3], ptrs[4]-ptrs[3]);
-            p = modmul(a, b, m);
-
-            if (bignum_cmp(c, p) == 0) {
-                passes++;
-            } else {
-                char *as = bignum_decimal(a);
-                char *bs = bignum_decimal(b);
-                char *ms = bignum_decimal(m);
-                char *cs = bignum_decimal(c);
-                char *ps = bignum_decimal(p);
-                
-                printf("%d: fail: %s * %s mod %s gave %s expected %s\n",
-                       line, as, bs, ms, ps, cs);
-                fails++;
-
-                sfree(as);
-                sfree(bs);
-                sfree(ms);
-                sfree(cs);
-                sfree(ps);
-            }
-            freebn(a);
-            freebn(b);
-            freebn(m);
-            freebn(c);
-            freebn(p);
-        } else if (!strcmp(buf, "pow")) {
-            Bignum base, expt, modulus, expected, answer;
-
-            if (ptrnum != 4) {
-                printf("%d: pow with %d parameters, expected 4\n", line, ptrnum);
-                exit(1);
-            }
-
-            base = bignum_from_bytes(ptrs[0], ptrs[1]-ptrs[0]);
-            expt = bignum_from_bytes(ptrs[1], ptrs[2]-ptrs[1]);
-            modulus = bignum_from_bytes(ptrs[2], ptrs[3]-ptrs[2]);
-            expected = bignum_from_bytes(ptrs[3], ptrs[4]-ptrs[3]);
-            answer = modpow(base, expt, modulus);
-
-            if (bignum_cmp(expected, answer) == 0) {
-                passes++;
-            } else {
-                char *as = bignum_decimal(base);
-                char *bs = bignum_decimal(expt);
-                char *cs = bignum_decimal(modulus);
-                char *ds = bignum_decimal(answer);
-                char *ps = bignum_decimal(expected);
-                
-                printf("%d: fail: %s ^ %s mod %s gave %s expected %s\n",
-                       line, as, bs, cs, ds, ps);
-                fails++;
-
-                sfree(as);
-                sfree(bs);
-                sfree(cs);
-                sfree(ds);
-                sfree(ps);
-            }
-            freebn(base);
-            freebn(expt);
-            freebn(modulus);
-            freebn(expected);
-            freebn(answer);
-        } else if (!strcmp(buf, "divmod")) {
-            Bignum n, d, expect_q, expect_r, answer_q, answer_r;
-            bool fail;
-
-            if (ptrnum != 4) {
-                printf("%d: divmod with %d parameters, expected 4\n", line, ptrnum);
-                exit(1);
-            }
-
-            n = bignum_from_bytes(ptrs[0], ptrs[1]-ptrs[0]);
-            d = bignum_from_bytes(ptrs[1], ptrs[2]-ptrs[1]);
-            expect_q = bignum_from_bytes(ptrs[2], ptrs[3]-ptrs[2]);
-            expect_r = bignum_from_bytes(ptrs[3], ptrs[4]-ptrs[3]);
-            answer_q = bigdiv(n, d);
-            answer_r = bigmod(n, d);
-
-            fail = false;
-            if (bignum_cmp(expect_q, answer_q) != 0) {
-                char *as = bignum_decimal(n);
-                char *bs = bignum_decimal(d);
-                char *cs = bignum_decimal(answer_q);
-                char *ds = bignum_decimal(expect_q);
-
-                printf("%d: fail: %s / %s gave %s expected %s\n",
-                       line, as, bs, cs, ds);
-                fail = true;
-
-                sfree(as);
-                sfree(bs);
-                sfree(cs);
-                sfree(ds);
-            }
-            if (bignum_cmp(expect_r, answer_r) != 0) {
-                char *as = bignum_decimal(n);
-                char *bs = bignum_decimal(d);
-                char *cs = bignum_decimal(answer_r);
-                char *ds = bignum_decimal(expect_r);
-
-                printf("%d: fail: %s mod %s gave %s expected %s\n",
-                       line, as, bs, cs, ds);
-                fail = true;
-
-                sfree(as);
-                sfree(bs);
-                sfree(cs);
-                sfree(ds);
-            }
-
-            freebn(n);
-            freebn(d);
-            freebn(expect_q);
-            freebn(expect_r);
-            freebn(answer_q);
-            freebn(answer_r);
-
-            if (fail)
-                fails++;
-            else
-                passes++;
-        } else {
-            printf("%d: unrecognised test keyword: '%s'\n", line, buf);
-            exit(1);
-        }
-
-        sfree(buf);
-        sfree(data);
-    }
-
-    printf("passed %d failed %d total %d\n", passes, fails, passes+fails);
-    return fails != 0;
-}
diff --git a/testdata/bignum.py b/testdata/bignum.py
deleted file mode 100644
index 15ffe319..00000000
--- a/testdata/bignum.py
+++ /dev/null
@@ -1,140 +0,0 @@
-# Generate test cases for a bignum implementation.
-
-import sys
-
-# integer square roots
-def sqrt(n):
-    d = long(n)
-    a = 0L
-    # b must start off as a power of 4 at least as large as n
-    ndigits = len(hex(long(n)))
-    b = 1L << (ndigits*4)
-    while 1:
-        a = a >> 1
-        di = 2*a + b
-        if di <= d:
-            d = d - di
-            a = a + b
-        b = b >> 2
-        if b == 0: break
-    return a
-
-# continued fraction convergents of a rational
-def confrac(n, d):
-    coeffs = [(1,0),(0,1)]
-    while d != 0:
-        i = n / d
-        n, d = d, n % d
-        coeffs.append((coeffs[-2][0]-i*coeffs[-1][0],
-                       coeffs[-2][1]-i*coeffs[-1][1]))
-    return coeffs
-
-def findprod(target, dir = +1, ratio=(1,1)):
-    # Return two numbers whose product is as close as we can get to
-    # 'target', with any deviation having the sign of 'dir', and in
-    # the same approximate ratio as 'ratio'.
-
-    r = sqrt(target * ratio[0] * ratio[1])
-    a = r / ratio[1]
-    b = r / ratio[0]
-    if a*b * dir < target * dir:
-        a = a + 1
-        b = b + 1
-    assert a*b * dir >= target * dir
-
-    best = (a,b,a*b)
-
-    while 1:
-        improved = 0
-        a, b = best[:2]
-
-        coeffs = confrac(a, b)
-        for c in coeffs:
-            # a*c[0]+b*c[1] is as close as we can get it to zero. So
-            # if we replace a and b with a+c[1] and b+c[0], then that
-            # will be added to our product, along with c[0]*c[1].
-            da, db = c[1], c[0]
-
-            # Flip signs as appropriate.
-            if (a+da) * (b+db) * dir < target * dir:
-                da, db = -da, -db
-
-            # Multiply up. We want to get as close as we can to a
-            # solution of the quadratic equation in n
-            #
-            #    (a + n da) (b + n db) = target
-            # => n^2 da db + n (b da + a db) + (a b - target) = 0
-            A,B,C = da*db, b*da+a*db, a*b-target
-            discrim = B^2-4*A*C
-            if discrim > 0 and A != 0:
-                root = sqrt(discrim)
-                vals = []
-                vals.append((-B + root) / (2*A))
-                vals.append((-B - root) / (2*A))
-                if root * root != discrim:
-                    root = root + 1
-                    vals.append((-B + root) / (2*A))
-                    vals.append((-B - root) / (2*A))
-
-                for n in vals:
-                    ap = a + da*n
-                    bp = b + db*n
-                    pp = ap*bp
-                    if pp * dir >= target * dir and pp * dir < best[2]*dir:
-                        best = (ap, bp, pp)
-                        improved = 1
-
-        if not improved:
-            break
-
-    return best
-
-def hexstr(n):
-    s = hex(n)
-    if s[:2] == "0x": s = s[2:]
-    if s[-1:] == "L": s = s[:-1]
-    return s
-
-# Tests of multiplication which exercise the propagation of the last
-# carry to the very top of the number.
-for i in range(1,4200):
-    a, b, p = findprod((1<<i)+1, +1, (i, i*i+1))
-    print "mul", hexstr(a), hexstr(b), hexstr(p)
-    a, b, p = findprod((1<<i)+1, +1, (i, i+1))
-    print "mul", hexstr(a), hexstr(b), hexstr(p)
-
-# Bare tests of division/modulo.
-prefixes = [2**63, int(2**63.5), 2**64-1]
-for nsize in range(20, 200):
-    for dsize in range(20, 200):
-        for dprefix in prefixes:
-            d = sqrt(3<<(2*dsize)) + (dprefix<<dsize)
-            for nprefix in prefixes:
-                nbase = sqrt(3<<(2*nsize)) + (nprefix<<nsize)
-                for modulus in sorted({-1, 0, +1, d/2, nbase % d}):
-                    n = nbase - (nbase % d) + modulus
-                    if n < 0:
-                        n += d
-                        assert n >= 0
-                    print "divmod", hexstr(n), hexstr(d), hexstr(n/d), hexstr(n%d)
-
-# Simple tests of modmul.
-for ai in range(20, 200, 60):
-    a = sqrt(3<<(2*ai-1))
-    for bi in range(20, 200, 60):
-        b = sqrt(5<<(2*bi-1))
-        for m in range(20, 600, 32):
-            m = sqrt(2**(m+1))
-            print "modmul", hexstr(a), hexstr(b), hexstr(m), hexstr((a*b) % m)
-
-# Simple tests of modpow.
-for i in range(64, 4097, 63):
-    modulus = sqrt(1<<(2*i-1)) | 1
-    base = sqrt(3*modulus*modulus) % modulus
-    expt = sqrt(modulus*modulus*2/5)
-    print "pow", hexstr(base), hexstr(expt), hexstr(modulus), hexstr(pow(base, expt, modulus))
-    if i <= 1024:
-        # Test even moduli, which can't be done by Montgomery.
-        modulus = modulus - 1
-        print "pow", hexstr(base), hexstr(expt), hexstr(modulus), hexstr(pow(base, expt, modulus))
-        print "pow", hexstr(i), hexstr(expt), hexstr(modulus), hexstr(pow(i, expt, modulus))
diff --git a/testdata/bignumtests.txt b/testdata/bignumtests.txt
deleted file mode 100644
index 4cb7b0d4..00000000
--- a/testdata/bignumtests.txt
+++ /dev/null
@@ -1,205 +0,0 @@
-mul 6fcb0ed13247be24ded416f0d08612eb67d81017568e424698c442e4d7454d64315ffb51ce7af0bc6450c372d95c35967fde3adf6cec11a5c1e60847eccc8b4329b600bf917dbe4cab07ab82fbc439b5300000000000000000000000000000000000000000000000000000000030000000000000000000000000000000000000000000adf6cec11a5c1e60847eccc8b4329b2e4d7454d64315ffb51ce7af0bc6450c372d95c35967fde3adf6cec11a5c1e60847eccc8b4329b600bf917dbe4cab07ab82fbc439b5300000000000000000000000000000000000000000000000000000000030000000000000000000000000000000000000000000adf6cec11a5c1e60847eccc8b4329b600bf917dbe4cab07ab82fbc439b5300000000000000000000000000600bf917dbe4cab07ab82fbc439b5300000000000000000000000000000000000000000000000000000000030000000000000000000000000000000000000005967fde3adf6cec11a5c1e60847eccc8b4329b600bf917dbe4cab07ab82fbc439b53000000000000000000000000000000000d0000000000000000000000030000000000000000000000000000000000000000000adf6cec11a5c1e60847eccc8b4329b2e4d7454d64315ffb51ce7af0bc6450c372d95c35967fde3adf6cec11a5c1e60847eccc8b4329b600bf917dbe4cab07ab82fbc439b530d000000000000000000000000000000000000000000000000000000030000000000000000000000000000000000000000000adf6cec11a5c1e60847eccc8b4329b600bf917dbe4cab07ab82fbc439b5300000000000000000000000000600bf917dbe4cab07ab82fbc439b53000000000000000000000000000000000000000000000000000000000300000000000000000000000000000000000000000000000000000000000000000000000000d2b00000000000000000000000000000000000000000d2b000000000000000000000000 5472abe25fd603c76d0790f25654cfcdad1c78b8d78f0043b544a82bd2f00000000000000000000000000000000000d2b00000000000000000000000000000000000000000d2b000000000000000000000000 24e0b458bbaa8f7f910bb243b2d8072f7c19f6b6b5da853b24621fe88c2833151e92cc3e22d3127aa16eeda3bf38eb59768e3b212f87e19fc0a18bf71e12baa8322778957ba93757abb8f584595e6510d943b3bb1ca9de1f034a2a0c31ab11156d1da7181ba0163c761ce7bb7def818e7900f8dd1cdd5b5943111bcc50b9b7a5845a1da04c70edf907604814320c59c0cb2ca1171de6c5c3e74e1a9628397f2de04459f13ceda25b1e3e3e102cd59a09d74f61151af91514689bb5120cba14ce64981190c6641e440e5d757f352f2814605cf8a9d0e0d710a1da7181ba0163c761ce7bb7def818e7900f8dd1cdd5b5943111bcc50b9b7a5845a1da04c70edf907604814320c59c0cb2cb17fe9b421f43acbe48d7ddceecd1d544f07fcbbed77175dd4342c31fd7b27c18fe581d6d4205dd52575894ecd965741a727d5a3c0ca1900553a89d5beacce763a00a290ebc3588f5e2accd1bb7a9b9ed93326d9e18438a9eb3a493c5e8fbb1fc57d1b057019c415a1f71e1b8e9f23e387990c1108dbf518a6218d207ed544f07fcbbed77175dd4342c322761217bc596aa22891716241df1342c9d73f75885cf0720c7f897a146394f4551aa5845a1da04c70edf907604814320c59c1765ba1171de6c5c3e74e1a9628397f2de04459f13cee4d8a1e3e3e102cd59a09d74f61151af91514689bb5120cba14ce64981190c6641e4412a74839b40d064580e65b061f43259e13a4e3a31cf3a6cad202f84218ae48e7900f8dd1cdd5b5943111bcc50b9b7a62f891da04c70edf907604814320c59c0cb2cb17fe9b4cd233acbe48d7ddceecd1d544f07fcbbed77175dd4342c31fd7b27c18fe581d6d4205dd52575894ecd965741a727d5a3c0ca1900553a89d5beacce763a00a290ebc3588f5e2accd1bb7a9b9ed93326d9e18438a9eb3a493a86ad00cb201d045e09a2c8db901dff75c9866404b4ee3f723c308da22a716cecc595685e9a9fe7c607d914cf670366158da22a716cecc595685e9a9fe78087b9ec56609129e5000000000000000000000000000000015aca7200000000000000000000000000000000000000ad6539000000000000000000000000000000000000000000000000
-pow 4240f7064c1a41a5ec4dc53f528552ea5ec963dd373c59ca03b2f2a161cdf531d1cd5c30cc48280deedb3656dcca416 393e4b8b7fdbb26aca528ce01295f4d736806e48ca53b076cb48e2039026b61dd4ae6356aa5d4be633db00df1263807 5a827999fcef32422cbec4d9baa55f4f8eb7b05d449dd426768bd642c199cc8aa57e41821d5c5161d458ff37ee41ed8 54b00f9069773b7bb477cf039383e812f645d2afa949378e86fb3a9576dbabe44e4ebc35cb7e3e3a566083dc4f258c8
-mul 169d8e69f29bd8497ec8ca970f390d39fca5aed949a931e1cf9f0a9177bd4d7156612a1841593fbc28aed186f471dc905dfe46114e392df944034cf 16a3aecd3581bb879be79d9072635f52b33ada60b6faeb0b5821e1eebd60000000000000000000000000000000000000000000000000000000000000000000000000024e5a0729c792d783819919713a5161cfaeb69da6faeb0b5821e1eebd600000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000007ef527ae46e531d 2000000000000000000000000000000000000000069b4e4f331f6204e5bd7808dc9b8107a41d5c358230b15a39b9e342c17f3ea02e43ca6f5293550a6d7b38497443725b83d5dbb4b20862b4375bcbb494a7891dc7248727bda5a258b93b3c95bb633a3d26d7f6074ed0bf947b23a29ca9d546ae4e3bfe71a15d583c9b4e5f82f8f022218291fb6d5aa4eb6f30aefa9398a5a75e415d150682a0a000000000000000000000000000000000000000000000000000000000000000000000000000000b373462d6da27dab3fffe3684d57a1835d8e5d3384bdde0887e1827a8a512c47c8eb28686773a45b009aaffdf5cf2d57b1fe1e16122b6ef02c6c990457ba9a9571873
-pow 8483e4834c 727c9716ffb764d5 88888888888888888888888888888888cec888888888888888888888888888888888888888883978888888d8888888888888888888888888d8888888888d8888888b504f333f9de6485 5ad35993ffd525c7f0e2fdd30c77eca0ea7a7f328a6a998bcb63271c1f52ad472e33958ef17584e1c7a2b8f9e4942ccb7594e0b38c331a69f89ae7b42fbcfba9509aa610a6b34c5e2ef
-pow 8483ee0c9834834c 727c9716ffb764d5 888888888888888888888888888888880068888888888888888888888888888888888888883978888888d8888888888888888888888888d8888888888d88888888888888889de6485 1c04e7e3a7129c5ef42180d2f8fbe0804b8f3a35b8f2d593b4ec0ce478fd78c4e22170c03d631e91c4e8d84d7606716b0576afa60dade3fa4fb77f0fd75fb4f0e2863a749ef875a7f
-pow 8483ee0c98e4834c 727c9716ffb764d5 88888888888888888888888888888888e91888888888888839778888888d88888888888888888888888888d8888888888888888888888888d8888888888d8888888b504f333f9de6485 17ff9b5d91ce08685fdd51715f0aff77216ac4911e4fb1c22bdb17609f754340d1e901c7e38f14c5569b13136b7991fcdb23d82adcb67a485b628eb01b5d1274a5eef39fd5a42dddd3d
-pow 8483ee0c9834834c 727c9716ffb764d5 88888888888888888888888888888888d86dd488888888888888888888888888888888888888888888888888d8888888d8888888b504f333f9de6485 cd6d82f283b830e0e4f42243e661fc09cf379d7ade2ee39eedd941fd93db77f9ccebec1ca7ed7b321b2378dbf1238ffa5f2be38bf9d7688eb1652f7
-pow 840000888888888888888888888888888888880000000000 727c9716ffb764d5 88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888d888888d8888888b504f333f9de6485 6ede8a488a46e535a5c25c17236cc66c4e49e5de6bd31399366b19a9346ec95b5b36126726d63241a3031e41c0c1345fa5a17a3c48f141e951f4152c9ecdc8
-pow 8483ee0c9834834c 727c9716ffb764d5 888888888888888888888888888888888888888888888888888888888888888888888888888888888888d8888888888d8888888b504f333f9d88888888888e6485 19090849092b2f94c22605d47301625de4b2a34f57c35796f6619d33b73b15ed917f776577f9f219adb6b4c7b5dce0ed6e4963d38150ad104b8bf1a03a8da74590
-pow 8483e4834c 727c9716ffb764d5 88888888888888888888888888888888888888888888888888888888888888888888888888883978888888d8888888888888888888888888d8888888888d8888888b504f333f9de6485 60ca03e099dbf17c93db528e84638037419e28fff7639cc1d9af02d62407908207a3d622c4358491b93878580d8eca7b2a1fe89bcafa1ae32cba9248162ccb65cd86c717c6ad76369ac
-pow 8483ee0c9834834c 727c9716ffb764d5 888888888888888888888888888888888888888888888888888888888888888888888888883978888888d8888888888888888888888888d8888888888d88888888888888889de6485 16e28809be035b05923c747a4ee9d75306a8529aa954028057efe5d72a09666d51afcac7ce03ddd6fbebad1e1ec67f64b7826d00123ca472abde81b56e8a4c9e532f759e60e9f1054
-pow 8483ee0c9834834c 727c9716ffb764d5 88888888888888888888888888888888888888888888886839788888888888888888888d8888888888d888888d8888888888888888888888888d8888888888d8888888b504f333f9de6485 11a1c98f64b4d3a2a926fd1237e7f39c98eb6ef7a67ed9ccfd6d6ade87e0198469ea21c3fa8fe67e87dce8e4d59da5a7f2623a074377352f7a4246612254b1672e68a8f7fb5e6574cc84ac
-pow 8483ee0c98e4834c 727c9716ffb764d5 88888888888888888888888888888888888888888888888839778888888d88888888888888888888888888d8888888888888888888888888d8888888888d8888888b504f333f9de6485 215d10c7bc86820b2c783e4b5ff4d2d6e57f3715d24cb9a7b2902b7513c261ef0c089929b26a52fcf6897c75519e83782761ef8f0ff816cf61e45de89da6a148cbab7aa93ea5d65e67b
-pow 8483ee0c9834834c 727c9716ffb764d5 888888888888888888888888888888888888888888888888888888888888888888888883978888888d8888888888888888888888888d8888888888d8888888b504f333f9de6485 2ce9a51423d4223564b726e77575aab190e26cb0d1ce2bdc29c6f0e87599808df40a2e7062d9e9e410e54b02e7ef92bed6561b72b7b0b0fc662c7d04a242aeb7229dae5f1a8af1
-pow 84 727c9716ffb764d5 8888888888888888888888888888888888888888d888888d8888888888888888888888d888888d88888888888888888888b504f333f9de6485 5c42fe284175b01a14b34e5c89b69097ec1d5dc9373285b82a946ea41899593ae43b7a482cb3cdcc5e21f97cfcc5ebdbffb5cafb60d252a2a9
-pow 8483ee0c9834834c 727c9716ffb764d5 8888888888888888888888888888888888888888888888880888888888888888888d888888d8888888b504f38888888888888888888888888888888833f9de6485 75ced9c9d94e6ffc16cfa101b9aa8dd547b25e986cc26a297ed2a4ad88e6f6e4a0e31484600194248d8e5154466191ddd67daaf3514cbfce78e0d47be37d1bf81b
-pow 8483ee0c9834834c 727c9716ffb764d5 888888888888888888888888888888888888888888888888d888888d888888888888888888888888888d888888d8888888b504f333f9de6485 537529b1dbacd7ae8e63e0356947a8ed5a7e9e770290a38e135b46183731d6276f80c68817076fae903b6c7cbba7bdcef90a70004b5a94ca1e
-pow 8483c 727c9716ffb764d5 888888888888888888888888888888888888888888d888888d8888888b504f333f9de64888d8888888b504f333f9de6485 53244b21a7a17ff24160c5b96a0a98515a52296f44e0bbfbc48980a08a1f89182ae3977ba35b0e1d7d57075dc63002e640
-pow 8483ee0c9834834c 727c9716ffb764d5 888f9de6488888888888888888888888888888888888888888888888888888888888888888d888888d8888888b504f333f9de6485 14bf546b6c806f8b9b84671e7a46096966f1fe1e8de4c450fe89b57fd5408c36c8060eb169a57e76aa7ae76f330ef137bd9566834
-pow 8483ee0c9834834c 727c9716ffb764d5 888888888888888888888888888888888888888888888888888888888888888888888888888888888888888d8888888b504f333f9de6485 511219ae4968000dae7b6d4ae111f7e8fc19dc33020ecaba3a7f45b1c1ddfcc4cbd2539307ec619481afb4d449ed8ed7a76f668558533
-pow 8483ee0c9834834c 727c9716ffb764d5 8888888888888888888888888888888888888888888888d8888888b504f333f9de888888888888888888888888888d888888d8888888b504f333f9de64de88888888888888888885 8135bbedf63d766808ea9e043323609d85eed687eebb6803271304ec68dc87de8ce362c3f16c9ac31e6dcb9542c9e7e2518e1fee06950d139aaebf9c2a7f827c26a9889491605470
-pow 8483ee0c9834834c 727c9716ffb764d5 88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888d888888d8888888b504f333f9de6485 44ed50d437512d99e38b02141a5d98fe47217f6cf5ebd237c8e79772eeac3ac2f33c13bbd80e9dbf3925147dfd135486611066110c4e1bbfe9655639b419941600032a2f9af21
-pow 34c 727c9716ffb764d5 8888888888888888888888888888888888888888888888888888888d888888d8888888b504f333f9de6485 2928a55555c4efca16738b016cb559173f877bed11969bc281440c13b7caa65ce9af4067e7f1c21d0b21f1
-pow 8483ee0c9834834c 727c9716ffb764d5 88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888d888888d8888888b504f6485 4c3ef21c414369560eab90c986b78a156045825bcdeb9d9bcea091916549f8f58ba49d0241cda890a4e2a9177f12bc71c4deec9a31b0f927582c0b5091
-pow 8483ee0c9834834c 727c9716ffb764d5 8888888888888888888888888888888888888888886888888888888888888888888d888888d8888888b504f333f9de6488b504f333f9de648585 6f03a4f2c77bec9c99564930f7972eae6f7f63c44a96e3e4747870d16aca5552300da1b7c705fa23b31e612f16275686e43b73a899a65d0e13a6
-pow 8483ec 727c97c6ffb764d5 8888888888888888888888888888888888888888888888888888888888888888888d888888d8888888b504f333f9de6485 64b634614732913628140a72e9f4e2b59f8d75427ccc5c990e989aa358d7e8ee067a0f13f2fba3f7e747b1ee3c5f105515
-pow 8483be0c9834834c 727c9716ffb764d5 88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888d888888d8888888b504f333f9de6485 2a7e622ef5d374e587a48251eac21656ad40922326ee152ff20d9c8746a879e27a4fd414938da900c0d62480ae857e17b4a8fd9b404f7d6f1f13059313b073eff
-pow 8483ee0c9834834c 727c9716ffb764a5 8888888888888888888888888888888888888888888888888888888888888888888888888888888888d888f333f9de6485 c97e716de0974491160fb8b2a2cc16ad5abbc07266e1e6cf25ca3a595981fd49cf1c526e7c04f5ed77df591ec327549e3
-pow 8483ee0c9834834c 727c9716ffb764d5 8888888888888888888888888888888888888888888888888888888888888888888d888888d8888d8888888b504f333f9de888888b504f333f9de6485 48b31ef21735201bbe4d2e6bae6cd76b1c90ddbaeb639369150b010f1b170edc966893c76d6d7edf21078529530f5232731ed399b19cd4dedcf9f8c69
-pow 8483ee0c9834834c 727c9716ffb764d5 8888888888888888888888888888888888888888888888888888888888888888888d888888d88888888888888888888888888888888888888b504f333f9de64888888888885 2ce54ff4a8a9dce961c4cfaf5a5a05b94b27cf215cd60d6b6c97da184582b90986bcfe25c0245a1a04967cc4dbebb78645fc060c58128a8252676d435c0927b987829d07e21
-pow 8483ee0c9834834c 727c9716ffb764d5 8888888888888888888888888888888888888888888888888888888888888888888888888888888888888888d8888888d8888888b504f333f9de6485 109e5bfc2d7df8823079a2dfe19129d443268f91fecb08d0b34aaa704fce9d085656a5e90bcda51c496e0b652a8aa8d4230286f8f3e0ccf9ade06fd2
-pow 8483ee0c9834834c 727c9716ffb764d5 8888888888888888888888888888888888888888888888888888888888888888e1ed888888d8888888b504f333f9de6485 7a4973c5e0e6bb423afc9f42658e6a9325485e0a9291c3805d5a3d766795746fc6f7270c79442d8c90888427d5793e9808
-pow 8483ee0c9834834c 727c9716ffb764d5 8888888888888888888888888888888888888888888888888888888888888888d08d888888d8888888b504f333f9de6485 6a862f8f33f51553d95fe131ece8cff25b79c052de028cf66b485fd46b5f4cf913962978b56850a8e5ec9bb07fd6f5f2c
-pow 8400000000000000 727c9716ffb764d5 8888888888888888888888888888888888888888888888888888888888888888888d888888d8888888b504f333f9de6485 5851978fe5efd8a522ac7c2d0e17f7aeaebc9f8c4a77a6c7bd3d734c47f247a2b9b291961aea0b69b53bbc9906889e3912
-pow 8483ee0c9834834c 727c9716ffb764d5 8888888888888888888888888888888888888888888888888888888888888889888d888888d8888888b504f333f9de6485 1e7770c5546c601df48ccb9c929dfc9f36eaab78911c089e0b3459f0d80c80bfce2eded75c3d205a6d998aa628f3e9396b
-pow 8483ee0c9834834c 727c9716ffb764d5 8888888888888888888888888888888888888888888888880888888888888888888d888888d8888888b504f333f9de6485 2909fa1be537ec2cfd106f3f11664492283789e3f376e6179c170a4e080adc6a8a6850b77d35811c86b8dba0e6a98768c4
-modmul 13988e 194bd642cccccccccccccccccccccccccccccccccccccccccccccccccc22cbec4d97999fcef32b7b05d449dd426768bd6462cbec4d9ba58 5a827999fcef32422cbec4d97999fcef32422cbec4d9baa55baa55f4f8eb7b05d449dd426768bd642cccccccccccccccccccccccccccccccc1999fcef32422cbec4d97999fcef324229cc8aa57 1efb3f63db5da66666666666666666666666666666666666666666666596306eb2d65742a759166d49874c30845d5019afffd7a6b2db4bf19cd0
-modmul 139dddddddddddddddddddddddddddddd88e 194bd642ccccccccccccccccccccccc22cbec4d97999fcef32462cbec4d9ba58 5a8fcef32422cbec4d9baa55baa55f4f8eb7b05d449dd426768bd642cccccccccccccccccccccccccccccccc1999fcef32422cbec4d97999fcef324229cc8aa57 1f03a55c086d3a06d3a06d3a06d3a0602aba523c81830925e79c8b3b98eb9562b9d3f70790b415a26ba251983b4bbf79cd0
-pow 8483ee0c9834834c 727c9716ffb764d5 8888888888888888888888888888888888888888888888883978888888d8888888888888888888888888d8888888888d8888888b504f333f9de6485 4a0d96e0c39349b7377b15b5bf7b49f447a37349045d96c7e9f5122852162c9762d8381161f8db975e9e820b66a7042c850734679de7a7f548c1ed6
-pow 8483ee0c9834834c 727c9716ffb764d5 88888c888888888888888888888888888888888d888888d888888192ea3333f9de6485 6434b339504f053e2a5ff58f64998f96f47aa632d6343459a9d2c1f77a0f2c2e942f77
-pow 8483ee0c9834834c 727c9716ffb764d5 888888888888888888888888888888888324888d888888d8888888b504faaaaaaaaaaaaaaaaaaaaa333f9de6485 27a8e8c85285125d5617ebab1f839ecd5e20a606b4bd2e997a338399feb25fd6144045ffc50aa71167c7203b8e1
-pow 8483ee0c9834834c 727c9716ffb764d5 8888888888888888888888888888888888888888888888888888888888888888d888888d8888888b504f333f9de6485 2c6b37a2df4595500b7460400ce897817970cfadaaf1c386eeba15e08aaa6ce24019713a30e1a2c459eb2e1dacb768b
-pow 8483ee0c9834834c 727c9716ffb764d5 88888888888888888888888d88888888888888888888888888888888888888888888888d88888888888888888888d888888d8888888b504f333f9de6485 526b82e86fed0f1046b22b974a17b14d5a76f8c5c91f886e9aabf47d38c30d2dcd55f16391b4b719308169b0a6ca6e96ccf689fa6d9866dd88f36118472
-pow 8483ee0c9834834c 727c9716ffb764d5 888888888888888888888888888888888888888888888888888888888888888f88888d888888d8888888b504f333f9de6485 156e2c168bf9cdbc4d37f0863b0748cf244afc19db0ff3e8d2801efc574dc9ffe094500fc828cc6349b9f9c64b1c6ba4d7ec
-pow 8483ee0c9834834c 727c9716ffb764d5 888888888888888888888888888888888888888888888888888d888888d5 7d604b4171f1ef090338e494f47645ee6ce9b7d410cc52894bd4f25ab281
-pow c 727c9716ffb764d5 888888888888888888888888888888888888888d888888d8888888b504888d8888888b504f333f9de648f333f9de6485 74231cb11902ffe7ee56597e45e007732106d1565aee8d97cbba145ecbb034e7dfecacdf37b942b1d1c6db411b8f1ad5
-pow 8483884d9834834c 727c9716ffb764d5 888888888888888888888888888888888888888d888888d8888888b504f333f98b8d888888888888888888888888d888888d85 5adc4c88b8c112c2089468b51a545d218a07874bd9402923db6d8ba76f5f0e6e681396838574843fb46638b54e8a2f9ac71651
-pow 8483ee0a9834834c 727c9716ffb764d5 888888888888888888888888888888888888888888888888888888888888d888888d8888888b504f333f9de6485 848898833195df7201ea83c1fd5b865de18b1f7f3535407f8721c743c68c0d1e119342dc99250d516a142e8b436
-pow 8483ee0c9834834c 227c9716ffb764d5 888888888888888888888888888888888888888888888888888888b504f333f9de6485 12131509fa930f74d86f4da97b87c7c752320130b8dcc83c2bff6218a9b722def1636b
-pow 8483ee0c9834834c 727c9716ffb764d5 8888888888888888888888888888888888888888888888888888888888888d888f333f9de6485 2be812603b8a9707dadde37bb5a61dd0444eb6f30eb721057009cc7adbcc8d6667011c0504e68
-pow 8483ee0c9834834c 727c9716ffb764d5 8888888888888888888888888888888888888888888888888888888888888888888d888888d8888888b504f333f9de6485 3ad25628fe61d80cd41e987f9325b8b74725dad97c564df113d78169d6bfcf643656caa89b7156faab66c982f61b9957ad
-pow 8483ee0c9838dd4c 727c9716ffb764d5 888888888888888888888888888888888888888d888888888d888888d8888888b88d8888888b504f333f9de6485 30e87a722c8f3086881e32a662cc14a75b1a9122aebe5b4f615d4e03df5b59db82a2e1425cc2434afab8b0550b2
-pow 8483ee0c9834834c 727c9716ffb764d5 888804888888888888888888888888888888888d888888d8888888b504f333f9de6485 32c13f9ea9459a14779564d65f101dd1f8640196a875f6dbff2cd4b7b5f7542520b044
-pow 8483ee0c9834834c 727c9716ffb764d5 888888888888888888888888888888888888888d888888d8888888d504f333f9de6485 28d0c477fd109217949a66291f4e9816908ccad97823991e4179f5a503bd53ebeb364c
-pow 8483ee0c9834834c 727c9716ffb764d5 888888888888888888888888888888888888888d888888d8888888b104f333f9de6485 6985a06f4a1c1eebf0304de7f55d13a09ccb9419448e56f213f5bb4e8c369f8e9bdd4f
-modmul 13988e 194bd642ccccccccccccccccccccccc22cbec4d97999fcef32462cbec4d9ba58 5a827999fcef32422cbec4d97999fcef32422cbec4d9baa55baa55f4f8eb7b05d449dd426768bd642cccccccccccccccccccccccccccccccc1999fcef32422cbec4d97999fcef324229cc8aa57 1efb3f63db5da66666666666666666596306eb2d65742a7590dbcdfa6b2db4bf19cd0
-pow 8483ee0c9834834c 727c9716ffb764d5 8888888888888888888888888888888888d88888888888d88888888888d8888888b504f333f9de6485 5152a5e418fd58e41ca43b901cd36f7c99566568f9ff9b1e20389574908fea406f4e5ca8dd32918a07
-pow 8483ee0c9834834c 727c9716ffb764d5 888888888888888888888888888888888888888d888888d8888888b504f333f9de6485 8296d34d3b715f19a9aa3a412b6f685f476201c389f11761eab46a627b44ab5ff640b0
-pow 8483ee0c9834834c 6275 8888888888888888888888888888888888d8888888b5485 6a603ec3b6c06541519dda19c60de2767dbf3e8e68ca54d
-pow 8483ee0c9834834c 727c9716ffb764d5 88888888888888888888888888888888d8888888b50485 3e299644f713778d7d21067fea7ee62cd137f93ee3dfe
-pow 8483ee0c9834834c 727c9716ffb764d5 8888888888888888888888d7888888b504f333f9de6485 732b121547e7602f411f5b39f04de7cc39b3561431abcd
-modmul 13988e 194c22cbec4d97999fcef32422cbec4d9ba58 5a827999fcef32422cbec4d97999fcef32422cbec4d9baa55baa55f4f8eb7b05d449dd426768bd642cccccccccccccccccccccccccccccccc1999fcef32422cbec4d97999fcef324229cc8aa57 1efb9d206eb2d65742a7590d6e7d6eb2db4bf19cd0
-modmul 13988e 194c58 5a827999fcef32422cbec4d97999fcef32422cbec4d9baa55baa55f4f8eb7b05dccccccc449dd426768bd642ccccccccccccccccccccccccccccc1cc199cc8aa57 1efbde498d0
-modmul 13988e 194c58 5a827999fcef3cccccccccccccccccc199cc8aa2422cbec4d97999fcef32422cbec4d9baa55baa55f4f8eb7b05d449dd426768bd642cccccccccccccccccccccccccccccccc199cc8aa57 1efbde498d0
-pow 1ba 1c9f25c5bfedd935654670094afa6b9b4037246529d83b65a4294670094afa6b9b4037246529d83b65a47101c8135b0eea5731ab552ea5f319ed806f8931c03fe68da4b41da004c 2d413cccfe779921165f626cdd52afa7c75bd82ea24eea128b0ea2c7f9bf720f6ce43dd2a1790e71ec b816f83baec979e163d1daccc1ed4a4779ec06ae40af4dd7f3f45bc8ced54c8626ee4adef4fbdcff0
-pow 1ba f25c5bfedd93565294670094afa6b9b4037246529d83b65a47101c8135b0eea5731ab552ea5f319ed806f8931c03fe68da4b41da004c 2d413cccfe779921165f626cdd52afa7c75bd82ea24eea1338b8db2160c10eae28b0ea2c7f9bf720f6ce43cce64552bf20c10eae28b0ea2c7f9bf720f6ce43dd2a1790e71ec 23254b53a1cd644a5a998acd7e2c38f65b81909837ccc446a615a73a583ca5c92ce358370acbe6dce93031e26ce1fada16a133bf29c3143b45ecd8bb47f8383b88e8b109bdc
-pow 1ba 1c9f25c5bfedd93565294670094afa6b9b4037246529d83b65a47101c8135b0eea5731ab552ea5f319d413cccfe779921165f626cdd52afae43dd2a1724eea133b45eb2160cce64552bf20c10eae28b0ea2c7f9bf720f6ce43dd2a1790e71ec 7625b7f3f6eb39f032c9212b5b62deda285df73ec12c5f00acecd70c678d010ea35311ce8f9efbc7 b5943aa9528f0519e72ccd24686656291dab86ff5916a589b9abafc14486c820b10557eba884e79
-pow 1ba 1c9f25c5bfedd93565294670094afa6b9b4037246529d83b65a47101c8135b0eea5731ab152ea5f319ed806f8931c03fe68da4b41da004c 2d413cccfe779921165f626cdd52afa7c75bd82ea24eea133b45eb2160cce64552bf20c10eae28b0ea2c7f9bf720f6ce43dd2a1790e713fe68da4bec 10dd194113310a6cc9121c1e94db2410406ab41712c45b11ac10783e1f6ddae8ec2a2a4868db3ac8a74a13b525ccbb9469c9431e98b0fdb39e575fc
-pow 10907dc1930690697b1371 fd4a14eabe7c7b369ec448e52aa97eb1fda3d54ba97b258f74dcf167280ecbca820000000004f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e509ad87 16a09e667f3bcc908b2fb1366ea957d3e3adec17512775099da2f590b0667322 11d53e8bf0e94f38a15acad76c89426e4282a01e21a5c8cb40363b3660fb6f51
-pow 10907dc1930690697b13714fd957d3e3adec175a2f590b054ba97b258f74dcf167280ecbca858737d5c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 16a09e667f3bcc908b2fb1366ea957d3e3adec17512775099d366ea957d3e3adec175a2f590b0667323 12b697e76f7294ee42a4630ae19ebda51c4d1a40669a86ceeb78adbdf58cb9faf3320fd3cd2cfb1a886
-pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 16a09e667f3bcc908b2fb136e06cf924c93ae6ea957d3e3adec17512775099da2f590b0667323 518f9910b7b9847e43ba7344c4868177aed30d7b277e0c0ad64a3dc1f992e865af889a0dccbe
-pow 10907dc1930690497b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33800907dc14a57d35cda01b923294ec1db2d23880e40908b2fb1366ea957d3e3adec17512775099da2f590b0667323 1 0
-pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b9409ad87 16a031d9e667f3bcc908b2fb1366ea957d3e3adec17512775099da2f590b0667323 14cc00562f66093169240d19685705e073c96c7547e5797c768440c5044c0e43566
-pow 10907dc193b2d23880e409ad87 16a09e660690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ecc908b2fb1366ea957d3e3adec17512775099da2f590b0667323 a03af414acae1a25bd359e46a395895fc8f3b5c96a3c25c3c1f290062cdb4972275c3eba22a21316f3339459ae54a6b5
-pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 16a09e667f3bcc966e08b2fb1366ea957d3e3adec17512775099da2f590b0667323 16869520abf32acaeac583b5781802b8b5dbce8aeed2c0b3a0db0e0359f3328a278
-pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b97d35cda01b923294ec1db2d23880e409667f3bcc908b2fb1366ea957d3e3adec17512775099da3f590b0667323 13e06cf924c93da01b923294ec1db2d23880eae2d00206c4813c36a0aac31d2db5e2148af132b6a099da2f590b0667323 da20997b7a493f76368d529949067b3639469e0c2d681588f7f76fbd12e1aa92b1eafa89a07b7159b8d5a4b17fcb3123
-pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923297 16a09e657d35cda01b923294ec1db2d2388067f3bcc908b2fb1366ea957d3e3adec17512775099da2f590b0667323 115eec036127564768648670be45c3b1b7696d51d2ec76a657a5e090dd674e8a31cc0829ff63b3aa0150c5b591933
-pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858 37d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923267f3bcc908b2fb1366ea957d3e3adec17512775099da2f590b0667323 450ce71a8eec9fcecab453e1566cf6525c70f6a69aa16a47c7d1ed754b0967c4764f6da6cf092fd49589912bc2bd3337f4
-pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 16a09e667f3bcc908b2fb1366ea957d3e3adec175127750af132b6a7c9c8a399da2f590b0667323 10bea717321c68ab600971b66e7e59047ad3682b4dbc1b64258affbe7b125af26b62c994ec365a3
-pow 10907dc19306906e4f92e14fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 16a09e667f3bcc908b2fb1366ea957d3e3adec175127bbbbbbbbbbbbbbbbbbbbbbbbb537bbb75099da2f590b0667323 aafd00a40f14e94e5427612c7c8a2342576108dde4e32964fd4d100e60426e210f63ac4d884850cc8035e904d44e38
-pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c 4a33804a57d35cda01b923294ec1db2d23880e409ad87 16a09e667f3bcc908b2fb1366ea957d3e3adec17512775099da2f590b0667323 1462290f3612e19fc67b888f7419d278efed8ef32fd37ec435410cd4601a3e4c
-pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 16a09e667f3bcc94a154ba97b258f74dcf167280e08b2fb1366ea957d3e3adec17512775099da2f590b0667323 14c84cc12c5d71de79fd96dcedf077c050baca2420aa20e33f38dcd5fbc46853150257db771c31dbda3bb53905
-pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c 39792e2dff6ec9ab294a33804a57d35cda01b923294ed23880e409ad87 16a09e667f3bcc908b2fb1366ea957d3e3adec17512775099da2f590b0667323 12d750a696d901ffb58cc9c2004431a44d2abd08923cf585e49dd2d95c70a001
-pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 16a09e667f3bc366ea957d3e3adec17512775099da2f54ba97b258f74dcf167280ecbc90b0667323 145fe455eb9d42bc17ab30f1ba224fd7e58fef09ddf0f8e62baef7b8b01bdd636b5f648097c1e913
-pow 10907dc1930690697b13714fd4a154ba97b258f74d8737d4c75099da2f590b0667323 13e06cf924ccdf01b923294ec1db2d23880e409ad87 16a09e667f3bcc908b2fb1366908b2fb1366ea957d3ea957d3e3adec17512775099da2f590b0667323 b3566d563d6ba5fcc24ed18bc5f9a63e9d9b7e073d9b684156b4acc463ac9c89fe5507471d7347819
-pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 16a09e667ff6ec9ab294a33804a57df3bcc908b2fb1366ea957d3e3adec17512775099da2f590b0667323 9cfdd8d180de912e9490385a14cd642f05e258745625c0cd9d403ebdaa7db90109ca1d771476a5b842a5
-pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 16a09e667f3bcc908b923b2fb1366ea957d3e3adec17512775099da2f590b0667323 17c1f8e9e0fc930930fb5a1cbfd90f44d95eb3cf8c7918c98ec47f3ebd4cd9bd234
-pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f9c36a0aac31d2db5e2248af132e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 16a09e667f3bcc908b2fb1366ea957d3e3adec17512775099da2f590b0667323 2ba528582bdd37fe979ba13589832fd094dbd1ba79aa31b1a3fc15ea00d8908
-pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad930690697b13714fd4a154ba9787 16a09e667f3bcc366ea957d3e3adec17512775099da2f590b0667323 100b983f254cd0503a68b707952fd4e3930e57e48ef7521dc749302c
-pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 16a09e64d23bcc908b2fb1366ea957d3e3adec17512775099da2f590b0667323 32edca42bb85d3a7964256b95d4c12db0a3735c8501b7d24e35c17e6471aff1
-pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 16a092047f3bcc908b2fb1366ea957d3e3adec17512775099da2f590b0667323 100fa51ec3e14bbb7918eb18a4619b71a9c0445e1a9677387f982911df821403
-pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 18449e667f3bcc908b2fb1366ea957d3e3adec17512775099da2f590b0667323 16e9c0ed5254e4d5fa00a8e6630cc4174d69982c67d26303805ef489c332f713
-pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 16a09e6c7f3bcc908b2fb1366ea957d3e3adec17512775099da2f590b0667323 13928b672d49ab1c419847ae5c2bc6957a2792445b58bb5de8aaf35b11ec51b2
-pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 d6a09e667f3bcc908b2fb1366ea957d3e3adec17512775099da2f590b0667323 d3634464bd781a4f3c3de1bb2d575988d5d861944bbcfb2d5b5fb9cb5f641b62
-pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 16a09e667f3bcc908b2fb1366ea957d3e3adec17512775099db2f590b0667323 139a26f4ce368bcc132437fa3fdff9393e529e9ad810507065eae0fe1eae75ab
-pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 16a09e667f3bcc908b2fb1366ea957d3e3adeb17512775099da2f590b0667323 13cb39d2966a6cda4a61a7bcac4d54fc1e07f9a5911a6c9e418f424e1d690d88
-pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 16a09e667f3bcc908b2fb1366ea957d2e3adec17512775099da2f590b0667323 1045060ef67d3e02805fc6c7c73e78d9515773cebfb94a2361bb562de44f0cef
-pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 16a09e667f3bcc908b2fb1366ea955d3e3adec17512775099da2f590b0667323 e9f79f25a51263752bb6801ea2ea69f1bfe72eb29841a18fe897344e432778b
-pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 16a09e667f3bcc908b2fb1366ea857d3e3adec17512775099da2f590b0667323 1603254e45e8cea74b33e0fea929a14f8031fec82f60e443d5da2177dc5d9aa3
-pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 16a09e667f3bcc908b2fb1366da957d3e3adec17512775099da2f590b0667323 82799d2dc787e49afd44173d4373cc1e5e4c5f338639b0876da8b60f139032f
-pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 16a09e667f3bcc908b2fb1166ea957d3e3adec17512775099da2f590b0667323 550bb6f10292ca7bda8ee8ec2d849ca328123ff90f2bd85795c509a12d99e0a
-pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 12a09e667f3bcc908b2fb1366ea957d3e3adec17512775099da2f590b0667323 106e2d72f0365650111866243cfc5a675f4d0677452da7d86354717813c4b705
-pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 56a09e667f3bcc908b2fb1366ea957d3e3adec17512775099da2f590b0667323 34af9ebb1c936ee57965f89c005ea5dc91d70aefef9a3181363e39b7547856e4
-pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 96a09e667f3bcc908b2fb1366ea957d3e3adec17512775099da2f590b0667323 735eb1132cd9f59c79500163d423f3ecb1f802aa65ec6327612302f7d96ae917
-pow 11907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 16a09e667f3bcc908b2fb1366ea957d3e3adec17512775099da2f590b0667323 328fb6559104e886880c03bdc59572cc2dddb075bc405b8e37e681fdf6f14b6
-pow 12907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 16a09e667f3bcc908b2fb1366ea957d3e3adec17512775099da2f590b0667323 11d815b4b3c473a014845a6c46d4f3e0fbe1079a046d52a999a0efa4457d904e
-pow 50907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 16a09e667f3bcc908b2fb1366ea957d3e3adec17512775099da2f590b0667323 12cf3bbf47b42cd6d73e7c59aa66efe37c82efd92eadb5d3166c2514fb521ec4
-pow 4241f7064c1a41a5ec4dc53f528552e9 393e4b8b7fd5b26aca528ce01295f4d6 5a827999fcef32422cbec4d9baa55f41a5ec4dc53f528552e9 337f28ff252aa48023556ab30189e1eba23c1f2d451136b029
-pow 4241f7064c1a41a552e9 393e4b3b55fa3d1aaf5b8bb26aca528ce01295f4d6 5a827999fcef32422cbec4d9baa55f4e 46aa926b3d484810eccca4e964ad5f8f
-pow 4241f7064c1a41a5ec4dc53f528552e9 393e4b8b7fdb2cbec4d9baa55f4e 3e73738d7999fcef32422cbec4d9baa55f4e 70ca5cba1b58bdc4c907d349716609a3749
-pow 4241f7064c1a41a5ec4dc53f528552e9 393e4b8b7fdbb26aca528ce01295f4d6 5a827999fcef32422cbec4d9baa55f44dc53f528552e9 3b76731b825ed64be2fdd42579bf2c10a666b49b347ed
-pow 4241f7064c1a41c51c4dc53f528552e9 393e4b8b7f73738ddbb26aca528ce01295f4d6 5a827999fcef32422cbec4d9baa55f4e 3dc6a608deca791097e423ce76dc7c3f
-pow 4241f7064c1a41a5ec4dc53f528552e9 393e4b8b7fdbb26aca528ce01295f4d6 5a827999fcef32422cbec4d9baa55179 4b5b2ee754f65abca70d2ee02783bb84
-pow 4241f7064c1a41a5ec4dc53f528552e9 393e4b8b7fdbb26aca528ce01295f4d6 5a827999fcef32422cbec4d9baa55ee5 13a5660d889b6c2b7679e4804cfb33b0
-pow 4241f7064c1a41a5ec4dc53f528552e9 393e4b8b7fdbb26aca528ce01295f4d6 5a827999fcef32422cbec4d9baa55171 b2bee6670600e5cdca742a65fd1ff8d
-pow 4241f7064c1a41a5ec4dc53f528552e9 393e4b8b7fdbb26aca528ce01295f4d6 5a827999fcef32422cbec4d9baa5c5e5 209018f064798d5de50dca68e1a67bbd
-pow 4241f7064c1a41a5ec4dc53f528552e9 393e4b8b7fdbb26aca528ce01295f4d6 5a827999fcef32422cbec4d9baa311c9 19480fc0e92332649012fe282ac6507b
-pow 4241f7064c1a41a5ec4dc53f528552e9 393e4b8b7fdbb26aca528ce01295f4d6 5a827999fcef3bb06cbec4d9baa55f4e 4b140ce53242560dbd6e997fd2c75683
-pow 2241f7064c1a41a5ec4dc53f528552e9 393e4b8b7fdbb26aca528ce01295f4d6 5a827999fcef32422cbec4d9baa55f4e 3c503d9f72a8809af1eef9883798b963
-pow 4241f7064c1a41a5ec4dc53f528552e9 393e4b8b7fdbb26aca528ce01295f4d6 5a827999fcef32422cbec4d9baa55f4d b507582651e7cd31dac1fe4f6e22f4b
-pow 4241f7064c1a41a5ec4dc53f528552e9 793e4b8b7fdbb26aca528ce01295f4d6 5a827999fcef32422cbec4d9baa55f4e 59acb6241548663232e48f1bce0780e7
-pow 40 727c9716484 870e7550437c3f9de6484 812319144f0d00b0c71c0
-pow 8483 b504f333f9de6485 3192ea3e99c8b504f333f9de6485 2b83edad87fec41bdd2a2c67757
-pow 8403ee0c9834834c 727c9716ffb764d5 b504f333f9de64504f333f9de6485 72de72a69fb7be9db98df5ff0666e
-pow 8483179c984c 727c9716ffb764d5 b504f333f333f9de6485 45f63d048e8bf05fe15
-pow 848334764d5 b504f333f9de6485 3192ea3e99c84934c 846cdce061d08961
-pow 8483ee0c98348346dd00267168dd764d5 b504f333f9de6485 31b504f333f9de6485 1d3c563ca7a8355ee2
-pow 8483ee0c9834834c 727c9716ffb764d5 8888888888888888888888d8888888b504f333f9de6485 262b90699d5729bbd17f13a5d5f72b8a37d53c5bc6b862
-pow 8483ee0c94c 72 b504f333f9de6485e6485 873c770cd74ff4fff6c16
-pow 8483e834c 727c9716ffb7644e0c9834834c 727c9716ffb7644246504f333f9de6485 98af64208d351df230c13e918ae5a012
-pow 8483ee0c9834834c 727c9716ffbf333f9de648834c 727c9716ffbf333f9de645 1d7d26536d52e6422b5b81
-pow 8483ee0c9834834c 727c9716ffb764d5 b504f333f9dde6485 ac208abc45050b286
-pow 8483ee0c9834834c 727c9716ffb764d5 b504f333f504f333f9d9de6485 5da8095154c4248389a323559c
-pow 8483ee0c9834834c 3f9de6485 82bd2a3e99c872e99c849de504f337c92ea3e99c849de504f333f9de6485 5cc8c6fc66f6da8d4fc03cf14157915bf0a46062ee2b87007a1ba5efd0ef
-pow 8483ee0c9834834c 727c9716ffb764d5 b504f33333f9de6485 9b7fbbd74f27281103
-pow 8483ee0c98345 b504f333f9 e6485 809bf
-pow 8483ee0c9834834c 727c9716ffb764d5 b5026823f9de6485 380c222e7b1c958
-pow 8483ee0c9834834c 727c9716ffb764d5 4d24f333f9de6485 2e7a29f446106c06
-pow 8483ee0c9834834c 727c9716ffb764d5 1714f333f9de6485 38687f6a43fd553
-pow 8483ee0c9834834c 727c9716ffb764d5 e914f333f9de6485 5f8173a3832a148b
-pow 8483ee0c9834834c 727c9716ffb764d5 1754f333f9de6485 eee8599db0a878e
-pow 8483ee0c9834834c 727c9716ffb764d5 2044f333f9de6485 1e559d32dc6c4d92
-pow 8604ee0c9834834c 727c9716ffb764d5 b504f333f9de6485 5341d938b4b2112e
-pow 37c3ee0c9834834c 727c9716ffb764d5 b504f333f9de6485 2783dee829d253f2
-pow 8483ee0c9834834c 727c9716ffb764d5 b504fa33f9de6485 1b81c5c38e77c229
-pow 8483ee0c9834834c 727c9716ffb764d5 b5c4f333f9de6485 6c231b78114a63d1
-pow 8483ee0c9834834c 7f7c9716ffb764d5 b504f333f9de6485 2cc9b662148976d8
-pow 8483ee0c9834834c a27c9716ffb764d5 b504f333f9de6485 9833d32827c56876
-pow 8483ee0c9834834c 827c9716ffb764d5 b504f333f9de6485 44640cc39fd4788
-pow 7483ee0c9834834c 727c9716ffb764d5 b504f333f9de6485 55086f87b6043f40
-pow 8483ee0c9834834c 727c9716ffb764d5 b504f333f9de6489 5de3cc538a4912b2
-pow 8483ee0c9834834c 727c9716ffb764d5 b504f333f9de7485 7dee09f7103356f7
-pow 8483ee0c9834834c 727c9716ffb764d5 b504f332f9de6485 31e66ba6b6f01dbc
-pow 8483ee0c9834834c 727c9716ffb764d5 b504f233f9de6485 36a16ab51897078d
-pow 8483ee0c9834834c 727c9716ffb764d5 b524f333f9de6485 50139fa7019a79b2
-pow 8483ee0c9834834c 727c9716ffb764d5 b544f333f9de6485 218964ebf2bf02bc
-pow 8483ee0c9834834c 727c9716ffb764d5 b704f333f9de6485 60f74c122162ed95
-pow 8483ee0c9834834c 727c9716ffb764d5 b104f333f9de6485 16855ca50047d45e
-pow 8483ee0c9834834c 727c9716ffb764d5 c504f333f9de6485 2d27db08d15c5e30
-pow 8483ee0c9834834c 727c9716ffb764d5 f504f333f9de6485 43c59cdd83e4cb0e
-pow 8493ee0c9834834c 727c9716ffb764d5 b504f333f9de6485 b8f2e35b869b538
-pow 8083ee0c9834834c 727c9716ffb764d5 b504f333f9de6485 709ba83061fbedec
-pow 9483ee0c9834834c 727c9716ffb764d5 b504f333f9de6485 74a637bb3a6686ab
-modmul 13988e 194c583ada5b529204a2bc83eb7b05d44d9baa55f4f890cd9bfea55a7 5a827999fcef32422cbec4d9baa55f4f8eb7b05d44d9baa55f4f89dd426768bd64 1efbde91a17127884d7d32ffd5f1f18750c651ddcb394371cfa91ff23baaa2
-modmul 13988e 194c583ada1b529204a2bc830cd9bfea55a7 5a827999fcef32422cbec4d9baa55f4f8eb7b05d449dd426768bd64 1efbde91a122c5504d7d32fec547ba91ff23baaa2
-modmul 13988e 194c583ade5b529204a2bc830cd9bfea55a7 5a827999fcef32422cbec4d9baa55f4f8eb7b05d449dd426768bd64 1efbde91a6574b084d7d32fec547ba91ff23baaa2
-modmul 13988e 194c583ada5b529204a2bc830cd9bfea55a7 3a827999fcef32422cbec4d9baa55f4 3168752910948165510fe90bd7ab5d6
-modmul 13988e 194c583ada7b529204a2b 5a827999fcef32422cb 564931ea2a366069fa0
-modmul 13988e 194c583ada1b529204a2b 5a827999fcef32422cb 5641d8b4ea366069fa0
-modmul 13988e 194c58 5a827999fcef32422cbec4d97999fcef32422cbec4d9baa55baa55f4f8eb7b05d449dd426768bd642cccccccccccccccccccccccccccccccc199cc8aa57 1efbde498d0
-modmul 13988e 194c58 5a827999fcef32422cbec4d9baa55f4f8eb7b05d449dd426768bf65f4f8eb7b05d449dd426768bd642c142c199cc8aa57 1efbde498d0
-modmul 13988e 194c58 5a827999fcef32422cbec827999fcef32422cbec4d99fcef32422cbe32422cbec8279aa55f4f8e4d9baa55f4f8eb 1efbde498d0
-modmul 13988e 194c58ee5827999fcef32422cbec4d9baa55f4f2422cbec4d9baa55f4f8ed 1efbde48eb 17247c498b
-modmul 13988e 194242b 1efbde4aa55f4f8eb 1eef84d697da
-modmul 6d28e 194c58 5a82799 4b07f1e
-modmul 13988e 194c58 3a8 90
-modmul 13988e 194c58 9a8 438
-modmul 93988e 194c58 5a8 418
-mul 16c2d9895a204eb19a1bb5b9b3c06d262c4bf90b74bfe8e1e 400000000000000000000000d294c6e5844edbe8b97f145b4ffd73ef7617d71a92e8006ee548e1e 5b0b662568813ac6686ed6e7fa92c1116b9e82723896e0c88f73a9faa96b8ac9b4c6efa28706e2696b0addabd81c3c9ff6167d1b5e537339bee8dfca0814b84
-mul 16c2d9895a204eb19a1bb5b9b3c06d262c4bf90b040fec72b389cf6fd375533 2cfd2d7d74d29145b4ffd73ef7617d71a92e8006ee548e1d 3ffffffffffffb03af375d6c1c311c6e51e081bfe99f6b41a84698ec8806beb21ddb37330f58d39c67989aa2ebdd2300fecefebf42f0c7
-mul 16c2d9895a204eb00000000000000000004bf90b74bfec72b389cf6fd375533 2cfd2d7d74d294c6e5844edbe85b4ffd73ef7617d71a92e8006ee548e1e 3ffffffffffffffb7eda681a2eca0a66bebccfbd4ca3fa67c9ba58e8735028859d32a47c8411726411fd5c7779d7ce5b04076dbea2c376bf5bc7a45fa
-mul 16c2d9895a204eb19a1bb5b9b3c06d262c4bf90b74bfec72b389cfd2d7d74d294c6e5844edbe8b97f145b4ffd73ef7617d71a92e8006ee548e1e 40000000000000 5b0b662568813ac6686ed6e6cf01b498b12fe42dd2ffb1cace273f4b5f5d34a531b96113b6fa2e5fc516d3ff5cfbdd85f5c6a4ba001bb95238780000000000000
-mul 16c2d9895a2bf90b74bfec72b389cf6fd375533 2cfd2d7d74d294c6e5844edbe8b97f145b4ffd73ef7617d71a92e8006ee548e1e 400000000020cd0df0a99c9bcec9106a845f2589fb29d491cf9909dc7c53bc3dcad9a6eca00e5b04076dbea2c376bf5bc7a45fa
-mul 16c2d9895a204eb19a1bb5b9b3c06d262c4bf90b74bfec72b389cf6fd375533 2cfd2d7d74d294c6e5844edbe8b97f145b4ffd73ef7617d71a92e8006ee548e9e 400000000000000000000000000000000000000000000000000000000000000b616cc4ad102758cd0ddadcd9e037357125cae0be6763f7fc885e7745824df7a
-mul 16c2d9895a204eb19a1bb5b9b3c06d262c4bf90b74bfec72b389cf6fd375533 cfd2d7d74d294c6e5844edbe8b97f145b4ffd73ef7617d71a92e8006ee548e1e 127a4ced4bbf629ccbc8948c987f25b3a7680de91680271a98ec6120591559a00000000000000000000000000000a25affce5b04076dbea2c376bf5bc7a45fa
-mul 16c2d9895a204eb19a1bb5b9b3c06d262c4bf90b74bfec76b389cf6fd375533 2cfd2d7d74d294c6e5844edbe8b97f145b4ffd73ef7617d71a92e8006ee548e1e 40000000000000000000000000000000000000000000000b3f4b5f5d34a531b96113b6fa2e5fc516d3ff5cfbdd869821a4885b1fc0bff71ac376bf5bc7a45fa
-mul 16c2d9895a204eb19a1bb5b9b3c06d272c4bf90b74bfec72b389cf6fd375533 2cfd2d7d74d294c6e5844edbe8b97f145b4ffd73ef7617d71a92e8006ee548e1e 40000000000000000000000000000002cfd2d7d74d294c6e5844edbe8b97f145b4ffd73ef7617d71a92e8006ee553078ffce5b04076dbea2c376bf5bc7a45fa
-mul 16c2d9895a204eb59a1bb5b9b3c06d262c4bf90b74bfec72b389cf6fd375533 2cfd2d7d74d294c6e5844edbe8b97f145b4ffd73ef7617d71a92e8006ee548e1e 400000000000000b3f4b5f5d34a531b96113b6fa2e5fc516d3ff5cfbdd85f5c6a4ba001bb9523878000000000000a25affce5b04076dbea2c376bf5bc7a45fa
-mul 12c2d9895a204eb19a1bb5b9b3c06d262c4bf90b74bfec72b389cf6fd375533 2cfd2d7d74d294c6e5844edbe8b97f145b4ffd73ef7617d71a92e8006ee548e1e 34c0b4a0a2cb5ace469eec4905d1a03ae92c00a304227a0a395b45ffe446adc78800000000000000000000000000a25affce5b04076dbea2c376bf5bc7a45fa
-mul 36c2d9895a204eb19a1bb5b9b3c06d262c4bf90b74bfec72b389cf6fd375533 2cfd2d7d74d294c6e5844edbe8b97f145b4ffd73ef7617d71a92e8006ee548e1e 99fa5afae9a5298dcb089db7d172fe28b69ffae7deec2fae3525d000ddca91c3c000000000000000000000000000a25affce5b04076dbea2c376bf5bc7a45fa
-mul 56c2d9895a204eb19a1bb5b9b3c06d262c4bf90b74bfec72b389cf6fd375533 2cfd2d7d74d294c6e5844edbe8b97f145b4ffd73ef7617d71a92e8006ee548e1e f3f4b5f5d34a531b96113b6fa2e5fc516d3ff5cfbdd85f5c6a4ba001bb9523878000000000000000000000000000a25affce5b04076dbea2c376bf5bc7a45fa
-mul 96c2d9895a204eb19a1bb5b9b3c06d262c4bf90b74bfec72b389cf6fd375533 2cfd2d7d74d294c6e5844edbe8b97f145b4ffd73ef7617d71a92e8006ee548e1e 1a7e96beba694a6372c2276df45cbf8a2da7feb9f7bb0beb8d4974003772a470f0000000000000000000000000000a25affce5b04076dbea2c376bf5bc7a45fa
-mul 1 bbbbbbbbb0 bbbbbbbbb0
-mul 1 a a
-mul 1 e e
-mul a c 78
-mul 1 1 1
-mul a 4 28
-mul 1 2 2
-mul 1 8 8
-mul 1 0 0
-mul 0 4 0
-mul 3 4 c
-mul 5 4 14
-mul 9 4 24
diff --git a/unix/uxserver.c b/unix/uxserver.c
index c0ee251a..897ff865 100644
--- a/unix/uxserver.c
+++ b/unix/uxserver.c
@@ -43,6 +43,7 @@
 
 #define PUTTY_DO_GLOBALS	       /* actually _define_ globals */
 #include "putty.h"
+#include "mpint.h"
 #include "ssh.h"
 #include "sshserver.h"
 
@@ -221,11 +222,11 @@ bool auth_publickey(AuthPolicy *ap, ptrlen username, ptrlen public_blob)
     return false;
 }
 struct RSAKey *auth_publickey_ssh1(
-    AuthPolicy *ap, ptrlen username, Bignum rsa_modulus)
+    AuthPolicy *ap, ptrlen username, mp_int *rsa_modulus)
 {
     struct AuthPolicy_ssh1_pubkey *iter;
     for (iter = ap->ssh1keys; iter; iter = iter->next) {
-        if (!bignum_cmp(rsa_modulus, iter->key.modulus))
+        if (mp_cmp_eq(rsa_modulus, iter->key.modulus))
             return &iter->key;
     }
     return NULL;
diff --git a/windows/winpgen.c b/windows/winpgen.c
index 833ef393..005cd818 100644
--- a/windows/winpgen.c
+++ b/windows/winpgen.c
@@ -349,7 +349,8 @@ struct rsa_key_thread_params {
     union {
         struct RSAKey *key;
         struct dss_key *dsskey;
-        struct ec_key *eckey;
+        struct ecdsa_key *eckey;
+        struct eddsa_key *edkey;
     };
 };
 static DWORD WINAPI generate_key_thread(void *param)
@@ -364,9 +365,10 @@ static DWORD WINAPI generate_key_thread(void *param)
     if (params->keytype == DSA)
 	dsa_generate(params->dsskey, params->key_bits, progress_update, &prog);
     else if (params->keytype == ECDSA)
-        ec_generate(params->eckey, params->curve_bits, progress_update, &prog);
+        ecdsa_generate(params->eckey, params->curve_bits,
+                       progress_update, &prog);
     else if (params->keytype == ED25519)
-        ec_edgenerate(params->eckey, 256, progress_update, &prog);
+        eddsa_generate(params->edkey, 256, progress_update, &prog);
     else
 	rsa_generate(params->key, params->key_bits, progress_update, &prog);
 
@@ -390,7 +392,8 @@ struct MainDlgState {
     union {
         struct RSAKey key;
         struct dss_key dsskey;
-        struct ec_key eckey;
+        struct ecdsa_key eckey;
+        struct eddsa_key edkey;
     };
     HMENU filemenu, keymenu, cvtmenu;
 };
@@ -1401,7 +1404,7 @@ static INT_PTR CALLBACK MainDlgProc(HWND hwnd, UINT msg,
             } else if (state->keytype == ECDSA) {
                 state->ssh2key.key = &state->eckey.sshk;
             } else if (state->keytype == ED25519) {
-                state->ssh2key.key = &state->eckey.sshk;
+                state->ssh2key.key = &state->edkey.sshk;
 	    } else {
 		state->ssh2key.key = &state->key.sshk;
 	    }