From 25b034ee39f557cab6e6e7b79591ef46c72cba92 Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Mon, 31 Dec 2018 13:53:41 +0000 Subject: [PATCH] Complete rewrite of PuTTY's bignum library. The old 'Bignum' data type is gone completely, and so is sshbn.c. In its place is a new thing called 'mp_int', handled by an entirely new library module mpint.c, with API differences both large and small. The main aim of this change is that the new library should be free of timing- and cache-related side channels. I've written the code so that it _should_ - assuming I haven't made any mistakes - do all of its work without either control flow or memory addressing depending on the data words of the input numbers. (Though, being an _arbitrary_ precision library, it does have to at least depend on the sizes of the numbers - but there's a 'formal' size that can vary separately from the actual magnitude of the represented integer, so if you want to keep it secret that your number is actually small, it should work fine to have a very long mp_int and just happen to store 23 in it.) So I've done all my conditionalisation by means of computing both answers and doing bit-masking to swap the right one into place, and all loops over the words of an mp_int go up to the formal size rather than the actual size. I haven't actually tested the constant-time property in any rigorous way yet (I'm still considering the best way to do it). But this code is surely at the very least a big improvement on the old version, even if I later find a few more things to fix. I've also completely rewritten the low-level elliptic curve arithmetic from sshecc.c; the new ecc.c is closer to being an adjunct of mpint.c than it is to the SSH end of the code. The new elliptic curve code keeps all coordinates in Montgomery-multiplication transformed form to speed up all the multiplications mod the same prime, and only converts them back when you ask for the affine coordinates. Also, I adopted extended coordinates for the Edwards curve implementation. sshecc.c has also had a near-total rewrite in the course of switching it over to the new system. While I was there, I've separated ECDSA and EdDSA more completely - they now have separate vtables, instead of a single vtable in which nearly every function had a big if statement in it - and also made the externally exposed types for an ECDSA key and an ECDH context different. A minor new feature: since the new arithmetic code includes a modular square root function, we can now support the compressed point representation for the NIST curves. We seem to have been getting along fine without that so far, but it seemed a shame not to put it in, since it was suddenly easy. In sshrsa.c, one major change is that I've removed the RSA blinding step in rsa_privkey_op, in which we randomise the ciphertext before doing the decryption. The purpose of that was to avoid timing leaks giving away the plaintext - but the new arithmetic code should take that in its stride in the course of also being careful enough to avoid leaking the _private key_, which RSA blinding had no way to do anything about in any case. Apart from those specific points, most of the rest of the changes are more or less mechanical, just changing type names and translating code into the new API. --- Recipe | 13 +- cmdgen.c | 12 +- contrib/eccref.py | 401 +++++ contrib/gdb.py | 34 +- defs.h | 10 + ecc.c | 1112 +++++++++++++ ecc.h | 233 +++ import.c | 39 +- marshal.h | 6 +- mpint.c | 2340 +++++++++++++++++++++++++++ mpint.h | 386 +++++ sshbn.h => mpint_i.h | 85 +- pageant.c | 43 +- ssh.h | 169 +- ssh1login-server.c | 22 +- ssh1login.c | 23 +- ssh2kex-client.c | 19 +- ssh2kex-server.c | 11 +- ssh2transport.c | 13 +- ssh2transport.h | 4 +- sshbn.c | 2180 ------------------------- sshccp.c | 2 +- sshcommon.c | 10 +- sshdh.c | 193 +-- sshdss.c | 180 +-- sshdssg.c | 86 +- sshecc.c | 3305 ++++++++++++-------------------------- sshecdsag.c | 64 +- sshprime.c | 280 ++-- sshpubk.c | 18 +- sshrsa.c | 409 ++--- sshrsag.c | 50 +- sshserver.h | 2 +- testbn.c | 275 ---- testdata/bignum.py | 140 -- testdata/bignumtests.txt | 205 --- unix/uxserver.c | 5 +- windows/winpgen.c | 13 +- 38 files changed, 6283 insertions(+), 6109 deletions(-) create mode 100644 contrib/eccref.py create mode 100644 ecc.c create mode 100644 ecc.h create mode 100644 mpint.c create mode 100644 mpint.h rename sshbn.h => mpint_i.h (78%) delete mode 100644 sshbn.c delete mode 100644 testbn.c delete mode 100644 testdata/bignum.py delete mode 100644 testdata/bignumtests.txt diff --git a/Recipe b/Recipe index ef2c36a6..a4c5e828 100644 --- a/Recipe +++ b/Recipe @@ -250,10 +250,11 @@ GTKMAIN = gtkmain cmdline NONSSH = telnet raw rlogin ldisc pinger # SSH back end (putty, plink, pscp, psftp). +ARITH = mpint ecc SSHCOMMON = sshcommon sshrand + sshverstring sshcrc sshdes sshmd5 sshrsa sshsha sshblowf + sshdh sshcrcda sshpubk sshzlib sshdss ssharcf - + sshaes sshccp sshsh256 sshsh512 sshbn sshmac marshal nullplug + + sshaes sshccp sshsh256 sshsh512 ARITH sshmac marshal nullplug + sshgssc pgssapi sshecc wildcard ssh1censor ssh2censor ssh2bpp + ssh2transport ssh2transhk ssh2connection portfwd x11fwd + ssh1connection ssh1bpp @@ -325,11 +326,11 @@ pscp : [C] pscp winsftp wincons WINSSH BE_SSH SFTP wildcard WINMISC psftp : [C] psftp winsftp wincons WINSSH BE_SSH SFTP wildcard WINMISC + psftp.res winnojmp LIBS -pageant : [G] winpgnt pageant sshrsa sshpubk sshdes sshbn sshmd5 version +pageant : [G] winpgnt pageant sshrsa sshpubk sshdes ARITH sshmd5 version + tree234 MISC sshaes sshsha winsecur winpgntc aqsync sshdss sshsh256 + sshsh512 winutils sshecc winmisc winhelp conf pageant.res LIBS -puttygen : [G] winpgen sshrsag sshdssg sshprime sshdes sshbn sshmd5 version +puttygen : [G] winpgen sshrsag sshdssg sshprime sshdes ARITH sshmd5 version + sshrand winnoise sshsha winstore MISC winctrls sshrsa sshdss winmisc + sshpubk sshaes sshsh256 sshsh512 IMPORT winutils puttygen.res + tree234 notiming winhelp winnojmp CONF LIBS wintime sshecc @@ -348,7 +349,7 @@ puttytel : [X] GTKTERM uxmisc misc ldisc settings uxsel U_BE_NOSSH plink : [U] uxplink uxcons NONSSH UXSSH U_BE_ALL logging UXMISC uxsignal + ux_x11 noterm uxnogtk sessprep cmdline -PUTTYGEN_UNIX = sshrsag sshdssg sshprime sshdes sshbn sshmd5 version +PUTTYGEN_UNIX = sshrsag sshdssg sshprime sshdes ARITH sshmd5 version + sshrand uxnoise sshsha MISC sshrsa sshdss uxcons uxstore uxmisc + sshpubk sshaes sshsh256 sshsh512 IMPORT puttygen.res time tree234 + uxgen notiming CONF sshecc sshecdsag uxnogtk @@ -358,7 +359,7 @@ cgtest : [UT] cgtest PUTTYGEN_UNIX pscp : [U] pscp uxsftp uxcons UXSSH BE_SSH SFTP wildcard UXMISC uxnogtk psftp : [U] psftp uxsftp uxcons UXSSH BE_SSH SFTP wildcard UXMISC uxnogtk -pageant : [X] uxpgnt uxagentc aqsync pageant sshrsa sshpubk sshdes sshbn +pageant : [X] uxpgnt uxagentc aqsync pageant sshrsa sshpubk sshdes ARITH + sshmd5 version tree234 misc sshaes sshsha sshdss sshsh256 sshsh512 + sshecc CONF uxsignal nocproxy nogss be_none x11fwd ux_x11 uxcons + gtkask gtkmisc nullplug logging UXMISC uxagentsock memory @@ -373,8 +374,6 @@ osxlaunch : [UT] osxlaunch fuzzterm : [UT] UXTERM CHARSET misc version uxmisc uxucs fuzzterm time settings + uxstore be_none uxnogtk memory -testbn : [UT] testbn sshbn MISC version CONF tree234 uxmisc uxnogtk -testbn : [C] testbn sshbn MISC version CONF tree234 winmisc LIBS testzlib : [UT] testzlib sshzlib memory uppity : [UT] uxserver SSHSERVER UXMISC uxsignal uxnoise uxgss uxnogtk diff --git a/cmdgen.c b/cmdgen.c index 6730eeff..da2f98de 100644 --- a/cmdgen.c +++ b/cmdgen.c @@ -704,16 +704,16 @@ int main(int argc, char **argv) ssh2key->key = &dsskey->sshk; ssh1key = NULL; } else if (keytype == ECDSA) { - struct ec_key *ec = snew(struct ec_key); - ec_generate(ec, bits, progressfn, &prog); + struct ecdsa_key *ek = snew(struct ecdsa_key); + ecdsa_generate(ek, bits, progressfn, &prog); ssh2key = snew(struct ssh2_userkey); - ssh2key->key = &ec->sshk; + ssh2key->key = &ek->sshk; ssh1key = NULL; } else if (keytype == ED25519) { - struct ec_key *ec = snew(struct ec_key); - ec_edgenerate(ec, bits, progressfn, &prog); + struct eddsa_key *ek = snew(struct eddsa_key); + eddsa_generate(ek, bits, progressfn, &prog); ssh2key = snew(struct ssh2_userkey); - ssh2key->key = &ec->sshk; + ssh2key->key = &ek->sshk; ssh1key = NULL; } else { struct RSAKey *rsakey = snew(struct RSAKey); diff --git a/contrib/eccref.py b/contrib/eccref.py new file mode 100644 index 00000000..55dfa042 --- /dev/null +++ b/contrib/eccref.py @@ -0,0 +1,401 @@ +import numbers +import itertools + +def jacobi(n,m): + """Compute the Jacobi symbol. + + The special case of this when m is prime is the Legendre symbol, + which is 0 if n is congruent to 0 mod m; 1 if n is congruent to a + non-zero square number mod m; -1 if n is not congruent to any + square mod m. + + """ + assert m & 1 + acc = 1 + while True: + n %= m + if n == 0: + return 0 + while not (n & 1): + n >>= 1 + if (m & 7) not in {1,7}: + acc *= -1 + if n == 1: + return acc + if (n & 3) == 3 and (m & 3) == 3: + acc *= -1 + n, m = m, n + +class SqrtModP(object): + """Class for finding square roots of numbers mod p. + + p must be an odd prime (but its primality is not checked).""" + + def __init__(self, p): + p = abs(p) + assert p & 1 + self.p = p + + # Decompose p as 2^e k + 1 for odd k. + self.k = p-1 + self.e = 0 + while not (self.k & 1): + self.k >>= 1 + self.e += 1 + + # Find a non-square mod p. + for self.z in itertools.count(1): + if jacobi(self.z, self.p) == -1: + break + self.zinv = ModP(self.p, self.z).invert() + + def sqrt_recurse(self, a): + ak = pow(a, self.k, self.p) + for i in range(self.e, -1, -1): + if ak == 1: + break + ak = ak*ak % self.p + assert i > 0 + if i == self.e: + return pow(a, (self.k+1) // 2, self.p) + r_prime = self.sqrt_recurse(a * pow(self.z, 2**i, self.p)) + return r_prime * pow(self.zinv, 2**(i-1), self.p) % self.p + + def sqrt(self, a): + j = jacobi(a, self.p) + if j == 0: + return 0 + if j < 0: + raise ValueError("{} has no square root mod {}".format(a, self.p)) + a %= self.p + r = self.sqrt_recurse(a) + assert r*r % self.p == a + # Normalise to the smaller (or 'positive') one of the two roots. + return min(r, self.p - r) + + def __str__(self): + return "{}({})".format(type(self).__name__, self.p) + def __repr__(self): + return self.__str__() + +class ModP(object): + """Class that represents integers mod p as a field. + + All the usual arithmetic operations are supported directly, + including division, so you can write formulas in a natural way + without having to keep saying '% p' everywhere or call a + cumbersome modular_inverse() function. + + """ + def __init__(self, p, n=0): + self.p = p + if isinstance(n, type(self)): + self.check(n) + n = n.n + self.n = n % p + def check(self, other): + assert isinstance(other, type(self)) + assert isinstance(self, type(other)) + assert self.p == other.p + def coerce_to(self, other): + if not isinstance(other, type(self)): + other = type(self)(self.p, other) + else: + self.check(other) + return other + def invert(self): + "Internal routine which returns the bare inverse." + if self.n % self.p == 0: + raise ZeroDivisionError("division by {!r}".format(self)) + a = self.n, 1, 0 + b = self.p, 0, 1 + while b[0]: + q = a[0] // b[0] + a = a[0] - q*b[0], a[1] - q*b[1], a[2] - q*b[2] + b, a = a, b + assert abs(a[0]) == 1 + return a[1]*a[0] + def __add__(self, rhs): + rhs = self.coerce_to(rhs) + return type(self)(self.p, (self.n + rhs.n) % self.p) + def __neg__(self): + return type(self)(self.p, -self.n % self.p) + def __radd__(self, rhs): + rhs = self.coerce_to(rhs) + return type(self)(self.p, (self.n + rhs.n) % self.p) + def __sub__(self, rhs): + rhs = self.coerce_to(rhs) + return type(self)(self.p, (self.n - rhs.n) % self.p) + def __rsub__(self, rhs): + rhs = self.coerce_to(rhs) + return type(self)(self.p, (rhs.n - self.n) % self.p) + def __mul__(self, rhs): + rhs = self.coerce_to(rhs) + return type(self)(self.p, (self.n * rhs.n) % self.p) + def __rmul__(self, rhs): + rhs = self.coerce_to(rhs) + return type(self)(self.p, (self.n * rhs.n) % self.p) + def __div__(self, rhs): + rhs = self.coerce_to(rhs) + return type(self)(self.p, (self.n * rhs.invert()) % self.p) + def __rdiv__(self, rhs): + rhs = self.coerce_to(rhs) + return type(self)(self.p, (rhs.n * self.invert()) % self.p) + def __pow__(self, exponent): + assert exponent >= 0 + n, b_to_n = 1, self + total = type(self)(self.p, 1) + while True: + if exponent & n: + exponent -= n + total *= b_to_n + n *= 2 + if n > exponent: + break + b_to_n *= b_to_n + return total + def __cmp__(self, rhs): + rhs = self.coerce_to(rhs) + return cmp(self.n, rhs.n) + def __eq__(self, rhs): + rhs = self.coerce_to(rhs) + return self.n == rhs.n + def __ne__(self, rhs): + rhs = self.coerce_to(rhs) + return self.n != rhs.n + def __lt__(self, rhs): + raise ValueError("Elements of a modular ring have no ordering") + def __le__(self, rhs): + raise ValueError("Elements of a modular ring have no ordering") + def __gt__(self, rhs): + raise ValueError("Elements of a modular ring have no ordering") + def __ge__(self, rhs): + raise ValueError("Elements of a modular ring have no ordering") + def __str__(self): + return "0x{:x}".format(self.n) + def __repr__(self): + return "{}(0x{:x},0x{:x})".format(type(self).__name__, self.p, self.n) + +class AffinePoint(object): + """Base class for points on an elliptic curve.""" + + def __init__(self, curve, *args): + self.curve = curve + if len(args) == 0: + self.infinite = True + self.x = self.y = None + else: + assert len(args) == 2 + self.infinite = False + self.x = ModP(self.curve.p, args[0]) + self.y = ModP(self.curve.p, args[1]) + self.check_equation() + def __neg__(self): + if self.infinite: + return self + return type(self)(self.curve, self.x, -self.y) + def __mul__(self, rhs): + if not isinstance(rhs, numbers.Integral): + raise ValueError("Elliptic curve points can only be multiplied by integers") + P = self + if rhs < 0: + rhs = -rhs + P = -P + toret = self.curve.point() + n = 1 + nP = P + while rhs != 0: + if rhs & n: + rhs -= n + toret += nP + n += n + nP += nP + return toret + def __rmul__(self, rhs): + return self * rhs + def __sub__(self, rhs): + return self + (-rhs) + def __rsub__(self, rhs): + return (-self) + rhs + def __str__(self): + if self.infinite: + return "inf" + else: + return "({},{})".format(self.x, self.y) + def __repr__(self): + if self.infinite: + args = "" + else: + args = ", {}, {}".format(self.x, self.y) + return "{}.Point({}{})".format(type(self.curve).__name__, + self.curve, args) + def __eq__(self, rhs): + if self.infinite or rhs.infinite: + return self.infinite and rhs.infinite + return (self.x, self.y) == (rhs.x, rhs.y) + def __ne__(self, rhs): + return not (self == rhs) + def __lt__(self, rhs): + raise ValueError("Elliptic curve points have no ordering") + def __le__(self, rhs): + raise ValueError("Elliptic curve points have no ordering") + def __gt__(self, rhs): + raise ValueError("Elliptic curve points have no ordering") + def __ge__(self, rhs): + raise ValueError("Elliptic curve points have no ordering") + def __hash__(self): + if self.infinite: + return hash((True,)) + else: + return hash((False, self.x, self.y)) + +class CurveBase(object): + def point(self, *args): + return self.Point(self, *args) + +class WeierstrassCurve(CurveBase): + class Point(AffinePoint): + def check_equation(self): + assert (self.y*self.y == + self.x*self.x*self.x + + self.curve.a*self.x + self.curve.b) + def __add__(self, rhs): + if self.infinite: + return rhs + if rhs.infinite: + return self + if self.x == rhs.x and self.y != rhs.y: + return self.curve.point() + x1, x2, y1, y2 = self.x, rhs.x, self.y, rhs.y + xdiff = x2-x1 + if xdiff != 0: + slope = (y2-y1) / xdiff + else: + assert y1 == y2 + slope = (3*x1*x1 + self.curve.a) / (2*y1) + xp = slope*slope - x1 - x2 + yp = -(y1 + slope * (xp-x1)) + return self.curve.point(xp, yp) + + def __init__(self, p, a, b): + self.p = p + self.a = ModP(p, a) + self.b = ModP(p, b) + + def cpoint(self, x, yparity=0): + if not hasattr(self, 'sqrtmodp'): + self.sqrtmodp = SqrtModP(self.p) + rhs = x**3 + self.a.n * x + self.b.n + y = self.sqrtmodp.sqrt(rhs) + if (y - yparity) % 2: + y = -y + return self.point(x, y) + + def __repr__(self): + return "{}(0x{:x}, {}, {})".format( + type(self).__name__, self.p, self.a, self.b) + +class MontgomeryCurve(CurveBase): + class Point(AffinePoint): + def check_equation(self): + assert (self.curve.b*self.y*self.y == + self.x*self.x*self.x + + self.curve.a*self.x*self.x + self.x) + def __add__(self, rhs): + if self.infinite: + return rhs + if rhs.infinite: + return self + if self.x == rhs.x and self.y != rhs.y: + return self.curve.point() + x1, x2, y1, y2 = self.x, rhs.x, self.y, rhs.y + xdiff = x2-x1 + if xdiff != 0: + slope = (y2-y1) / xdiff + else: + assert y1 == y2 + slope = (3*x1*x1 + 2*self.curve.a*x1 + 1) / (2*self.curve.b*y1) + xp = self.curve.b*slope*slope - self.curve.a - x1 - x2 + yp = -(y1 + slope * (xp-x1)) + return self.curve.point(xp, yp) + + def __init__(self, p, a, b): + self.p = p + self.a = ModP(p, a) + self.b = ModP(p, b) + + def cpoint(self, x, yparity=0): + if not hasattr(self, 'sqrtmodp'): + self.sqrtmodp = SqrtModP(self.p) + rhs = x**3 + self.a.n * x**2 + self.b.n * x + y = self.sqrtmodp.sqrt(rhs) + if (y - yparity) % 2: + y = -y + return self.point(x, y) + + def __repr__(self): + return "{}(0x{:x}, {}, {})".format( + type(self).__name__, self.p, self.a, self.b) + +class TwistedEdwardsCurve(CurveBase): + class Point(AffinePoint): + def check_equation(self): + x2, y2 = self.x*self.x, self.y*self.y + assert (self.curve.a*x2 + y2 == 1 + self.curve.d*x2*y2) + def __neg__(self): + return type(self)(self.curve, -self.x, self.y) + def __add__(self, rhs): + x1, x2, y1, y2 = self.x, rhs.x, self.y, rhs.y + x1y2, y1x2, y1y2, x1x2 = x1*y2, y1*x2, y1*y2, x1*x2 + dxxyy = self.curve.d*x1x2*y1y2 + return self.curve.point((x1y2+y1x2)/(1+dxxyy), + (y1y2-self.curve.a*x1x2)/(1-dxxyy)) + + def __init__(self, p, d, a): + self.p = p + self.d = ModP(p, d) + self.a = ModP(p, a) + + def point(self, *args): + # This curve form represents the identity using finite + # numbers, so it doesn't need the special infinity flag. + # Detect a no-argument call to point() and substitute the pair + # of integers that gives the identity. + if len(args) == 0: + args = [0, 1] + return super(TwistedEdwardsCurve, self).point(*args) + + def cpoint(self, y, xparity=0): + if not hasattr(self, 'sqrtmodp'): + self.sqrtmodp = SqrtModP(self.p) + y = ModP(self.p, y) + y2 = y**2 + radicand = (y2 - 1) / (self.d * y2 - self.a) + x = self.sqrtmodp.sqrt(radicand.n) + if (x - xparity) % 2: + x = -x + return self.point(x, y) + + def __repr__(self): + return "{}(0x{:x}, {}, {})".format( + type(self).__name__, self.p, self.d, self.a) + +p256 = WeierstrassCurve(0xffffffff00000001000000000000000000000000ffffffffffffffffffffffff, -3, 0x5ac635d8aa3a93e7b3ebbd55769886bc651d06b0cc53b0f63bce3c3e27d2604b) +p256.G = p256.point(0x6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296,0x4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5) +p256.G_order = 0xffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632551 + +p384 = WeierstrassCurve(0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000ffffffff, -3, 0xb3312fa7e23ee7e4988e056be3f82d19181d9c6efe8141120314088f5013875ac656398d8a2ed19d2a85c8edd3ec2aef) +p384.G = p384.point(0xaa87ca22be8b05378eb1c71ef320ad746e1d3b628ba79b9859f741e082542a385502f25dbf55296c3a545e3872760ab7, 0x3617de4a96262c6f5d9e98bf9292dc29f8f41dbd289a147ce9da3113b5f0b8c00a60b1ce1d7e819d7a431d7c90ea0e5f) +p384.G_order = 0xffffffffffffffffffffffffffffffffffffffffffffffffc7634d81f4372ddf581a0db248b0a77aecec196accc52973 + +p521 = WeierstrassCurve(0x01ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff, -3, 0x0051953eb9618e1c9a1f929a21a0b68540eea2da725b99b315f3b8b489918ef109e156193951ec7e937b1652c0bd3bb1bf073573df883d2c34f1ef451fd46b503f00) +p521.G = p521.point(0x00c6858e06b70404e9cd9e3ecb662395b4429c648139053fb521f828af606b4d3dbaa14b5e77efe75928fe1dc127a2ffa8de3348b3c1856a429bf97e7e31c2e5bd66,0x011839296a789a3bc0045c8a5fb42c7d1bd998f54449579b446817afbd17273e662c97ee72995ef42640c550b9013fad0761353c7086a272c24088be94769fd16650) +p521.G_order = 0x01fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffa51868783bf2f966b7fcc0148f709a5d03bb5c9b8899c47aebb6fb71e91386409 + +curve25519 = MontgomeryCurve(2**255-19, 0x76d06, 1) +curve25519.G = curve25519.cpoint(9) + +ed25519 = TwistedEdwardsCurve(2**255-19, 0x52036cee2b6ffe738cc740797779e89800700a4d4141d8ab75eb4dca135978a3, -1) +ed25519.G = ed25519.point(0x216936d3cd6e53fec0a4e231fdd6dc5c692cc7609525a7b2c9562d608f25d51a,0x6666666666666666666666666666666666666666666666666666666666666658) +ed25519.G_order = 0x1000000000000000000000000000000014def9dea2f79cd65812631a5cf5d3ed + diff --git a/contrib/gdb.py b/contrib/gdb.py index 9bfd9584..34bbb0ec 100644 --- a/contrib/gdb.py +++ b/contrib/gdb.py @@ -2,39 +2,43 @@ import gdb import re import gdb.printing -class PuTTYBignumPrettyPrinter(gdb.printing.PrettyPrinter): - "Pretty-print PuTTY's Bignum type." - name = "Bignum" +class PuTTYMpintPrettyPrinter(gdb.printing.PrettyPrinter): + "Pretty-print PuTTY's mp_int type." + name = "mp_int" def __init__(self, val): - super(PuTTYBignumPrettyPrinter, self).__init__(self.name) + super(PuTTYMpintPrettyPrinter, self).__init__(self.name) self.val = val def to_string(self): type_BignumInt = gdb.lookup_type("BignumInt") type_BignumIntPtr = type_BignumInt.pointer() BIGNUM_INT_BITS = 8 * type_BignumInt.sizeof - array = self.val.cast(type_BignumIntPtr) + array = self.val["w"] aget = lambda i: int(array[i]) & ((1 << BIGNUM_INT_BITS)-1) try: - length = aget(0) + length = int(self.val["nw"]) value = 0 for i in range(length): - value |= aget(i+1) << (BIGNUM_INT_BITS * i) - return "Bignum({:#x})".format(value) + value |= aget(i) << (BIGNUM_INT_BITS * i) + return "mp_int({:#x})".format(value) except gdb.MemoryError: - address = int(array) + address = int(self.val) if address == 0: - return "Bignum(NULL)".format(address) - return "Bignum(invalid @ {:#x})".format(address) + return "mp_int(NULL)".format(address) + return "mp_int(invalid @ {:#x})".format(address) -rcpp = gdb.printing.RegexpCollectionPrettyPrinter("PuTTY") -rcpp.add_printer(PuTTYBignumPrettyPrinter.name, "^Bignum$", - PuTTYBignumPrettyPrinter) +class PuTTYPrinterSelector(gdb.printing.PrettyPrinter): + def __init__(self): + super(PuTTYPrinterSelector, self).__init__("PuTTY") + def __call__(self, val): + if str(val.type) == "mp_int *": + return PuTTYMpintPrettyPrinter(val) + return None -gdb.printing.register_pretty_printer(None, rcpp) +gdb.printing.register_pretty_printer(None, PuTTYPrinterSelector()) class MemDumpCommand(gdb.Command): """Print a hex+ASCII dump of object EXP. diff --git a/defs.h b/defs.h index 19d501a5..ff76655b 100644 --- a/defs.h +++ b/defs.h @@ -63,6 +63,16 @@ typedef struct TermWinVtable TermWinVtable; typedef struct Ssh Ssh; +typedef struct mp_int mp_int; +typedef struct MontyContext MontyContext; + +typedef struct WeierstrassCurve WeierstrassCurve; +typedef struct WeierstrassPoint WeierstrassPoint; +typedef struct MontgomeryCurve MontgomeryCurve; +typedef struct MontgomeryPoint MontgomeryPoint; +typedef struct EdwardsCurve EdwardsCurve; +typedef struct EdwardsPoint EdwardsPoint; + typedef struct SftpServer SftpServer; typedef struct SftpServerVtable SftpServerVtable; diff --git a/ecc.c b/ecc.c new file mode 100644 index 00000000..753616bd --- /dev/null +++ b/ecc.c @@ -0,0 +1,1112 @@ +#include + +#include "ssh.h" +#include "mpint.h" +#include "ecc.h" + +/* ---------------------------------------------------------------------- + * Weierstrass curves. + */ + +struct WeierstrassPoint { + /* + * Internally, we represent a point using 'Jacobian coordinates', + * which are three values X,Y,Z whose relation to the affine + * coordinates x,y is that x = X/Z^2 and y = Y/Z^3. + * + * This allows us to do most of our calculations without having to + * take an inverse mod p: every time the obvious affine formulae + * would need you to divide by something, you instead multiply it + * into the 'denominator' coordinate Z. You only have to actually + * take the inverse of Z when you need to get the affine + * coordinates back out, which means you do it once after your + * entire computation instead of at every intermediate step. + * + * The point at infinity is represented by setting all three + * coordinates to zero. + * + * These values are also stored in the Montgomery-multiplication + * transformed representation. + */ + mp_int *X, *Y, *Z; + + WeierstrassCurve *wc; +}; + +struct WeierstrassCurve { + /* Prime modulus of the finite field. */ + mp_int *p; + + /* Persistent Montgomery context for doing arithmetic mod p. */ + MontyContext *mc; + + /* Modsqrt context for point decompression. NULL if this curve was + * constructed without providing nonsquare_mod_p. */ + ModsqrtContext *sc; + + /* Parameters of the curve, in Montgomery-multiplication + * transformed form. */ + mp_int *a, *b; +}; + +WeierstrassCurve *ecc_weierstrass_curve( + mp_int *p, mp_int *a, mp_int *b, mp_int *nonsquare_mod_p) +{ + WeierstrassCurve *wc = snew(WeierstrassCurve); + wc->p = mp_copy(p); + wc->mc = monty_new(p); + wc->a = monty_import(wc->mc, a); + wc->b = monty_import(wc->mc, b); + + if (nonsquare_mod_p) + wc->sc = modsqrt_new(p, nonsquare_mod_p); + else + wc->sc = NULL; + + return wc; +} + +void ecc_weierstrass_curve_free(WeierstrassCurve *wc) +{ + mp_free(wc->p); + mp_free(wc->a); + mp_free(wc->b); + monty_free(wc->mc); + if (wc->sc) + modsqrt_free(wc->sc); + sfree(wc); +} + +static WeierstrassPoint *ecc_weierstrass_point_new_empty(WeierstrassCurve *wc) +{ + WeierstrassPoint *wp = snew(WeierstrassPoint); + wp->wc = wc; + wp->X = wp->Y = wp->Z = NULL; + return wp; +} + +static WeierstrassPoint *ecc_weierstrass_point_new_imported( + WeierstrassCurve *wc, mp_int *monty_x, mp_int *monty_y) +{ + WeierstrassPoint *wp = ecc_weierstrass_point_new_empty(wc); + wp->X = monty_x; + wp->Y = monty_y; + wp->Z = mp_copy(monty_identity(wc->mc)); + return wp; +} + +WeierstrassPoint *ecc_weierstrass_point_new( + WeierstrassCurve *wc, mp_int *x, mp_int *y) +{ + return ecc_weierstrass_point_new_imported( + wc, monty_import(wc->mc, x), monty_import(wc->mc, y)); +} + +WeierstrassPoint *ecc_weierstrass_point_new_identity(WeierstrassCurve *wc) +{ + WeierstrassPoint *wp = ecc_weierstrass_point_new_empty(wc); + size_t bits = mp_max_bits(wc->p); + wp->X = mp_new(bits); + wp->Y = mp_new(bits); + wp->Z = mp_new(bits); + return wp; +} + +WeierstrassPoint *ecc_weierstrass_point_copy(WeierstrassPoint *orig) +{ + WeierstrassPoint *wp = ecc_weierstrass_point_new_empty(orig->wc); + wp->X = mp_copy(orig->X); + wp->Y = mp_copy(orig->Y); + wp->Z = mp_copy(orig->Z); + return wp; +} + +void ecc_weierstrass_point_free(WeierstrassPoint *wp) +{ + mp_free(wp->X); + mp_free(wp->Y); + mp_free(wp->Z); + smemclr(wp, sizeof(*wp)); + sfree(wp); +} + +static mp_int *ecc_weierstrass_equation_rhs( + WeierstrassCurve *wc, mp_int *monty_x) +{ + mp_int *x2 = monty_mul(wc->mc, monty_x, monty_x); + mp_int *x2_plus_a = monty_add(wc->mc, x2, wc->a); + mp_int *x3_plus_ax = monty_mul(wc->mc, x2_plus_a, monty_x); + mp_int *rhs = monty_add(wc->mc, x3_plus_ax, wc->b); + mp_free(x2); + mp_free(x2_plus_a); + mp_free(x3_plus_ax); + return rhs; +} + +WeierstrassPoint *ecc_weierstrass_point_new_from_x( + WeierstrassCurve *wc, mp_int *xorig, unsigned desired_y_parity) +{ + assert(wc->sc); + + /* + * The curve equation is y^2 = x^3 + ax + b, which is already + * conveniently in a form where we can compute the RHS and take + * the square root of it to get y. + */ + unsigned success; + + mp_int *x = monty_import(wc->mc, xorig); + mp_int *rhs = ecc_weierstrass_equation_rhs(wc, x); + mp_int *y = monty_modsqrt(wc->sc, rhs, &success); + mp_free(rhs); + + if (!success) { + /* Failure! x^3+ax+b worked out to be a number that has no + * square root mod p. In this situation there's no point in + * trying to be time-constant, since the protocol sequence is + * going to diverge anyway when we complain to whoever gave us + * this bogus value. */ + mp_free(x); + mp_free(y); + return NULL; + } + + /* + * Choose whichever of y and p-y has the specified parity (of its + * lowest positive residue mod p). + */ + mp_int *tmp = monty_export(wc->mc, y); + unsigned flip = (mp_get_bit(tmp, 0) ^ desired_y_parity) & 1; + mp_sub_into(tmp, wc->p, y); + mp_select_into(y, y, tmp, flip); + mp_free(tmp); + + return ecc_weierstrass_point_new_imported(wc, x, y); +} + +static void ecc_weierstrass_cond_overwrite( + WeierstrassPoint *dest, WeierstrassPoint *src, unsigned overwrite) +{ + mp_select_into(dest->X, dest->X, src->X, overwrite); + mp_select_into(dest->Y, dest->Y, src->Y, overwrite); + mp_select_into(dest->Z, dest->Z, src->Z, overwrite); +} + +static void ecc_weierstrass_cond_swap( + WeierstrassPoint *P, WeierstrassPoint *Q, unsigned swap) +{ + mp_cond_swap(P->X, Q->X, swap); + mp_cond_swap(P->Y, Q->Y, swap); + mp_cond_swap(P->Z, Q->Z, swap); +} + +/* + * Shared code between all three of the basic arithmetic functions: + * once we've determined the slope of the line that we're intersecting + * the curve with, this takes care of finding the coordinates of the + * third intersection point (given the two input x-coordinates and one + * of the y-coords) and negating it to generate the output. + */ +static inline void ecc_weierstrass_epilogue( + mp_int *Px, mp_int *Qx, mp_int *Py, mp_int *common_Z, + mp_int *lambda_n, mp_int *lambda_d, WeierstrassPoint *out) +{ + WeierstrassCurve *wc = out->wc; + + /* Powers of the numerator and denominator of the slope lambda */ + mp_int *lambda_n2 = monty_mul(wc->mc, lambda_n, lambda_n); + mp_int *lambda_d2 = monty_mul(wc->mc, lambda_d, lambda_d); + mp_int *lambda_d3 = monty_mul(wc->mc, lambda_d, lambda_d2); + + /* Make the output x-coordinate */ + mp_int *xsum = monty_add(wc->mc, Px, Qx); + mp_int *lambda_d2_xsum = monty_mul(wc->mc, lambda_d2, xsum); + out->X = monty_sub(wc->mc, lambda_n2, lambda_d2_xsum); + + /* Make the output y-coordinate */ + mp_int *lambda_d2_Px = monty_mul(wc->mc, lambda_d2, Px); + mp_int *xdiff = monty_sub(wc->mc, lambda_d2_Px, out->X); + mp_int *lambda_n_xdiff = monty_mul(wc->mc, lambda_n, xdiff); + mp_int *lambda_d3_Py = monty_mul(wc->mc, lambda_d3, Py); + out->Y = monty_sub(wc->mc, lambda_n_xdiff, lambda_d3_Py); + + /* Make the output z-coordinate */ + out->Z = monty_mul(wc->mc, common_Z, lambda_d); + + mp_free(lambda_n2); + mp_free(lambda_d2); + mp_free(lambda_d3); + mp_free(xsum); + mp_free(xdiff); + mp_free(lambda_d2_xsum); + mp_free(lambda_n_xdiff); + mp_free(lambda_d2_Px); + mp_free(lambda_d3_Py); +} + +/* + * Shared code between add and add_general: put the two input points + * over a common denominator, and determine the slope lambda of the + * line through both of them. If the points have the same + * x-coordinate, then the slope will be returned with a zero + * denominator. + */ +static inline void ecc_weierstrass_add_prologue( + WeierstrassPoint *P, WeierstrassPoint *Q, + mp_int **Px, mp_int **Py, mp_int **Qx, mp_int **denom, + mp_int **lambda_n, mp_int **lambda_d) +{ + WeierstrassCurve *wc = P->wc; + + /* Powers of the points' denominators */ + mp_int *Pz2 = monty_mul(wc->mc, P->Z, P->Z); + mp_int *Pz3 = monty_mul(wc->mc, Pz2, P->Z); + mp_int *Qz2 = monty_mul(wc->mc, Q->Z, Q->Z); + mp_int *Qz3 = monty_mul(wc->mc, Qz2, Q->Z); + + /* Points' x,y coordinates scaled by the other one's denominator + * (raised to the appropriate power) */ + *Px = monty_mul(wc->mc, P->X, Qz2); + *Py = monty_mul(wc->mc, P->Y, Qz3); + *Qx = monty_mul(wc->mc, Q->X, Pz2); + mp_int *Qy = monty_mul(wc->mc, Q->Y, Pz3); + + /* Common denominator */ + *denom = monty_mul(wc->mc, P->Z, Q->Z); + + /* Slope of the line through the two points, if P != Q */ + *lambda_n = monty_sub(wc->mc, Qy, *Py); + *lambda_d = monty_sub(wc->mc, *Qx, *Px); + + mp_free(Pz2); + mp_free(Pz3); + mp_free(Qz2); + mp_free(Qz3); + mp_free(Qy); +} + +WeierstrassPoint *ecc_weierstrass_add(WeierstrassPoint *P, WeierstrassPoint *Q) +{ + WeierstrassCurve *wc = P->wc; + assert(Q->wc == wc); + + WeierstrassPoint *S = ecc_weierstrass_point_new_empty(wc); + + mp_int *Px, *Py, *Qx, *denom, *lambda_n, *lambda_d; + ecc_weierstrass_add_prologue( + P, Q, &Px, &Py, &Qx, &denom, &lambda_n, &lambda_d); + + /* Never expect to have received two mutually inverse inputs, or + * two identical ones (which would make this a doubling). In other + * words, the two input x-coordinates (after putting over a common + * denominator) should never have been equal. */ + assert(!mp_eq_integer(lambda_n, 0)); + + /* Now go to the common epilogue code. */ + ecc_weierstrass_epilogue(Px, Qx, Py, denom, lambda_n, lambda_d, S); + + mp_free(Px); + mp_free(Py); + mp_free(Qx); + mp_free(denom); + mp_free(lambda_n); + mp_free(lambda_d); + + return S; +} + +/* + * Code to determine the slope of the line you need to intersect with + * the curve in the case where you're adding a point to itself. In + * this situation you can't just say "the line through both input + * points" because that's under-determined; instead, you have to take + * the _tangent_ to the curve at the given point, by differentiating + * the curve equation y^2=x^3+ax+b to get 2y dy/dx = 3x^2+a. + */ +static inline void ecc_weierstrass_tangent_slope( + WeierstrassPoint *P, mp_int **lambda_n, mp_int **lambda_d) +{ + WeierstrassCurve *wc = P->wc; + + mp_int *X2 = monty_mul(wc->mc, P->X, P->X); + mp_int *twoX2 = monty_add(wc->mc, X2, X2); + mp_int *threeX2 = monty_add(wc->mc, twoX2, X2); + mp_int *Z2 = monty_mul(wc->mc, P->Z, P->Z); + mp_int *Z4 = monty_mul(wc->mc, Z2, Z2); + mp_int *aZ4 = monty_mul(wc->mc, wc->a, Z4); + + *lambda_n = monty_add(wc->mc, threeX2, aZ4); + *lambda_d = monty_add(wc->mc, P->Y, P->Y); + + mp_free(X2); + mp_free(twoX2); + mp_free(threeX2); + mp_free(Z2); + mp_free(Z4); + mp_free(aZ4); +} + +WeierstrassPoint *ecc_weierstrass_double(WeierstrassPoint *P) +{ + WeierstrassCurve *wc = P->wc; + WeierstrassPoint *D = ecc_weierstrass_point_new_empty(wc); + + mp_int *lambda_n, *lambda_d; + ecc_weierstrass_tangent_slope(P, &lambda_n, &lambda_d); + ecc_weierstrass_epilogue(P->X, P->X, P->Y, P->Z, lambda_n, lambda_d, D); + mp_free(lambda_n); + mp_free(lambda_d); + + return D; +} + +static inline void ecc_weierstrass_select_into( + WeierstrassPoint *dest, WeierstrassPoint *P, WeierstrassPoint *Q, + unsigned choose_Q) +{ + mp_select_into(dest->X, P->X, Q->X, choose_Q); + mp_select_into(dest->Y, P->Y, Q->Y, choose_Q); + mp_select_into(dest->Z, P->Z, Q->Z, choose_Q); +} + +WeierstrassPoint *ecc_weierstrass_add_general( + WeierstrassPoint *P, WeierstrassPoint *Q) +{ + WeierstrassCurve *wc = P->wc; + assert(Q->wc == wc); + + WeierstrassPoint *S = ecc_weierstrass_point_new_empty(wc); + + /* Parameters for the epilogue, and slope of the line if P != Q */ + mp_int *Px, *Py, *Qx, *denom, *lambda_n, *lambda_d; + ecc_weierstrass_add_prologue( + P, Q, &Px, &Py, &Qx, &denom, &lambda_n, &lambda_d); + + /* Slope if P == Q */ + mp_int *lambda_n_tangent, *lambda_d_tangent; + ecc_weierstrass_tangent_slope(P, &lambda_n_tangent, &lambda_d_tangent); + + /* Select between those slopes depending on whether P == Q */ + unsigned same_x_coord = mp_eq_integer(lambda_d, 0); + unsigned same_y_coord = mp_eq_integer(lambda_n, 0); + unsigned equality = same_x_coord & same_y_coord; + mp_select_into(lambda_n, lambda_n, lambda_n_tangent, equality); + mp_select_into(lambda_d, lambda_d, lambda_d_tangent, equality); + + /* Now go to the common code between addition and doubling */ + ecc_weierstrass_epilogue(Px, Qx, Py, denom, lambda_n, lambda_d, S); + + /* Check for the input identity cases, and overwrite the output if + * necessary. */ + ecc_weierstrass_select_into(S, S, Q, mp_eq_integer(P->Z, 0)); + ecc_weierstrass_select_into(S, S, P, mp_eq_integer(Q->Z, 0)); + + /* + * In the case where P == -Q and so the output is the identity, + * we'll have calculated lambda_d = 0 and so the output will have + * z==0 already. Detect that and use it to normalise the other two + * coordinates to zero. + */ + unsigned output_id = mp_eq_integer(S->Z, 0); + mp_cond_clear(S->X, output_id); + mp_cond_clear(S->Y, output_id); + + mp_free(Px); + mp_free(Py); + mp_free(Qx); + mp_free(denom); + mp_free(lambda_n); + mp_free(lambda_d); + mp_free(lambda_n_tangent); + mp_free(lambda_d_tangent); + + return S; +} + +WeierstrassPoint *ecc_weierstrass_multiply(WeierstrassPoint *B, mp_int *n) +{ + WeierstrassPoint *two_B = ecc_weierstrass_double(B); + WeierstrassPoint *k_B = ecc_weierstrass_point_copy(B); + WeierstrassPoint *kplus1_B = ecc_weierstrass_point_copy(two_B); + + /* + * This multiply routine more or less follows the shape of the + * 'Montgomery ladder' technique that you have to use under the + * extra constraint on addition in Montgomery curves, because it + * was fresh in my mind and easier to just do it the same way. See + * the comment in ecc_montgomery_multiply. + */ + + unsigned not_started_yet = 1; + for (size_t bitindex = mp_max_bits(n); bitindex-- > 0 ;) { + unsigned nbit = mp_get_bit(n, bitindex); + + WeierstrassPoint *sum = ecc_weierstrass_add(k_B, kplus1_B); + ecc_weierstrass_cond_swap(k_B, kplus1_B, nbit); + WeierstrassPoint *other = ecc_weierstrass_double(k_B); + ecc_weierstrass_point_free(k_B); + ecc_weierstrass_point_free(kplus1_B); + k_B = other; + kplus1_B = sum; + ecc_weierstrass_cond_swap(k_B, kplus1_B, nbit); + + ecc_weierstrass_cond_overwrite(k_B, B, not_started_yet); + ecc_weierstrass_cond_overwrite(kplus1_B, two_B, not_started_yet); + not_started_yet &= ~nbit; + } + + ecc_weierstrass_point_free(two_B); + ecc_weierstrass_point_free(kplus1_B); + return k_B; +} + +unsigned ecc_weierstrass_is_identity(WeierstrassPoint *wp) +{ + return mp_eq_integer(wp->Z, 0); +} + +/* + * Normalise a point by scaling its Jacobian coordinates so that Z=1. + * This doesn't change what point is represented by the triple, but it + * means the affine x,y can now be easily recovered from X and Y. + */ +static void ecc_weierstrass_normalise(WeierstrassPoint *wp) +{ + WeierstrassCurve *wc = wp->wc; + mp_int *zinv = monty_invert(wc->mc, wp->Z); + mp_int *zinv2 = monty_mul(wc->mc, zinv, zinv); + mp_int *zinv3 = monty_mul(wc->mc, zinv2, zinv); + monty_mul_into(wc->mc, wp->X, wp->X, zinv2); + monty_mul_into(wc->mc, wp->Y, wp->Y, zinv3); + mp_free(zinv); + mp_free(zinv2); + mp_free(zinv3); + mp_copy_into(wp->Z, monty_identity(wc->mc)); +} + +void ecc_weierstrass_get_affine( + WeierstrassPoint *wp, mp_int **x, mp_int **y) +{ + WeierstrassCurve *wc = wp->wc; + + ecc_weierstrass_normalise(wp); + + if (x) + *x = monty_export(wc->mc, wp->X); + if (y) + *y = monty_export(wc->mc, wp->Y); +} + +unsigned ecc_weierstrass_point_valid(WeierstrassPoint *P) +{ + mp_int *rhs = ecc_weierstrass_equation_rhs(P->wc, P->X); + mp_int *lhs = monty_mul(P->wc->mc, P->Y, P->Y); + unsigned valid = mp_cmp_eq(lhs, rhs); + mp_free(lhs); + mp_free(rhs); + return valid; +} + +/* ---------------------------------------------------------------------- + * Montgomery curves. + */ + +struct MontgomeryPoint { + /* XZ coordinates. These represent the affine x coordinate by the + * relationship x = X/Z. */ + mp_int *X, *Z; + + MontgomeryCurve *mc; +}; + +struct MontgomeryCurve { + /* Prime modulus of the finite field. */ + mp_int *p; + + /* Montgomery context for arithmetic mod p. */ + MontyContext *mc; + + /* Parameters of the curve, in Montgomery-multiplication + * transformed form. */ + mp_int *a, *b; + + /* (a+2)/4, also in Montgomery-multiplication form. */ + mp_int *aplus2over4; +}; + +MontgomeryCurve *ecc_montgomery_curve( + mp_int *p, mp_int *a, mp_int *b) +{ + MontgomeryCurve *mc = snew(MontgomeryCurve); + mc->p = mp_copy(p); + mc->mc = monty_new(p); + mc->a = monty_import(mc->mc, a); + mc->b = monty_import(mc->mc, b); + + mp_int *four = mp_from_integer(4); + mp_int *fourinverse = mp_invert(four, mc->p); + mp_int *aplus2 = mp_copy(a); + mp_add_integer_into(aplus2, aplus2, 2); + mp_int *aplus2over4 = mp_modmul(aplus2, fourinverse, mc->p); + mc->aplus2over4 = monty_import(mc->mc, aplus2over4); + mp_free(four); + mp_free(fourinverse); + mp_free(aplus2); + mp_free(aplus2over4); + + return mc; +} + +void ecc_montgomery_curve_free(MontgomeryCurve *mc) +{ + mp_free(mc->p); + mp_free(mc->a); + mp_free(mc->b); + mp_free(mc->aplus2over4); + monty_free(mc->mc); + sfree(mc); +} + +static MontgomeryPoint *ecc_montgomery_point_new_empty(MontgomeryCurve *mc) +{ + MontgomeryPoint *mp = snew(MontgomeryPoint); + mp->mc = mc; + mp->X = mp->Z = NULL; + return mp; +} + +MontgomeryPoint *ecc_montgomery_point_new(MontgomeryCurve *mc, mp_int *x) +{ + MontgomeryPoint *mp = ecc_montgomery_point_new_empty(mc); + mp->X = monty_import(mc->mc, x); + mp->Z = mp_copy(monty_identity(mc->mc)); + return mp; +} + +MontgomeryPoint *ecc_montgomery_point_copy(MontgomeryPoint *orig) +{ + MontgomeryPoint *mp = ecc_montgomery_point_new_empty(orig->mc); + mp->X = mp_copy(orig->X); + mp->Z = mp_copy(orig->Z); + return mp; +} + +void ecc_montgomery_point_free(MontgomeryPoint *mp) +{ + mp_free(mp->X); + mp_free(mp->Z); + smemclr(mp, sizeof(*mp)); + sfree(mp); +} + +static void ecc_montgomery_cond_overwrite( + MontgomeryPoint *dest, MontgomeryPoint *src, unsigned overwrite) +{ + mp_select_into(dest->X, dest->X, src->X, overwrite); + mp_select_into(dest->Z, dest->Z, src->Z, overwrite); +} + +static void ecc_montgomery_cond_swap( + MontgomeryPoint *P, MontgomeryPoint *Q, unsigned swap) +{ + mp_cond_swap(P->X, Q->X, swap); + mp_cond_swap(P->Z, Q->Z, swap); +} + +MontgomeryPoint *ecc_montgomery_diff_add( + MontgomeryPoint *P, MontgomeryPoint *Q, MontgomeryPoint *PminusQ) +{ + MontgomeryCurve *mc = P->mc; + assert(Q->mc == mc); + assert(PminusQ->mc == mc); + + /* + * Differential addition is achieved using the following formula + * that relates the affine x-coordinates of P, Q, P+Q and P-Q: + * + * x(P+Q) x(P-Q) (x(Q)-x(P))^2 = (x(P)x(Q) - 1)^2 + * + * As with the Weierstrass coordinates, the code below transforms + * that affine relation into a projective one to avoid having to + * do a division during the main arithmetic. + */ + + MontgomeryPoint *S = ecc_montgomery_point_new_empty(mc); + + mp_int *Px_m_Pz = monty_sub(mc->mc, P->X, P->Z); + mp_int *Px_p_Pz = monty_add(mc->mc, P->X, P->Z); + mp_int *Qx_m_Qz = monty_sub(mc->mc, Q->X, Q->Z); + mp_int *Qx_p_Qz = monty_add(mc->mc, Q->X, Q->Z); + mp_int *PmQp = monty_mul(mc->mc, Px_m_Pz, Qx_p_Qz); + mp_int *PpQm = monty_mul(mc->mc, Px_p_Pz, Qx_m_Qz); + mp_int *Xpre = monty_add(mc->mc, PmQp, PpQm); + mp_int *Zpre = monty_sub(mc->mc, PmQp, PpQm); + mp_int *Xpre2 = monty_mul(mc->mc, Xpre, Xpre); + mp_int *Zpre2 = monty_mul(mc->mc, Zpre, Zpre); + S->X = monty_mul(mc->mc, Xpre2, PminusQ->Z); + S->Z = monty_mul(mc->mc, Zpre2, PminusQ->X); + + mp_free(Px_m_Pz); + mp_free(Px_p_Pz); + mp_free(Qx_m_Qz); + mp_free(Qx_p_Qz); + mp_free(PmQp); + mp_free(PpQm); + mp_free(Xpre); + mp_free(Zpre); + mp_free(Xpre2); + mp_free(Zpre2); + + return S; +} + +MontgomeryPoint *ecc_montgomery_double(MontgomeryPoint *P) +{ + MontgomeryCurve *mc = P->mc; + MontgomeryPoint *D = ecc_montgomery_point_new_empty(mc); + + /* + * To double a point in affine coordinates, in principle you can + * use the same technique as for Weierstrass: differentiate the + * curve equation to get the tangent line at the input point, use + * that to get an expression for y which you substitute back into + * the curve equation, and subtract the known two roots (in this + * case both the same) from the x^2 coefficient of the resulting + * cubic. + * + * In this case, we don't have an input y-coordinate, so you have + * to do a bit of extra transformation to find a formula that can + * work without it. The tangent formula is (3x^2 + 2ax + 1)/(2y), + * and when that appears in the final formula it will be squared - + * so we can substitute the y^2 in the denominator for the RHS of + * the curve equation. Put together, that gives + * + * x_out = (x+1)^2 (x-1)^2 / 4(x^3+ax^2+x) + * + * and, as usual, the code below transforms that into projective + * form to avoid the division. + */ + + mp_int *Px_m_Pz = monty_sub(mc->mc, P->X, P->Z); + mp_int *Px_p_Pz = monty_add(mc->mc, P->X, P->Z); + mp_int *Px_m_Pz_2 = monty_mul(mc->mc, Px_m_Pz, Px_m_Pz); + mp_int *Px_p_Pz_2 = monty_mul(mc->mc, Px_p_Pz, Px_p_Pz); + D->X = monty_mul(mc->mc, Px_m_Pz_2, Px_p_Pz_2); + mp_int *XZ = monty_mul(mc->mc, P->X, P->Z); + mp_int *twoXZ = monty_add(mc->mc, XZ, XZ); + mp_int *fourXZ = monty_add(mc->mc, twoXZ, twoXZ); + mp_int *fourXZ_scaled = monty_mul(mc->mc, fourXZ, mc->aplus2over4); + mp_int *Zpre = monty_add(mc->mc, Px_m_Pz_2, fourXZ_scaled); + D->Z = monty_mul(mc->mc, fourXZ, Zpre); + + mp_free(Px_m_Pz); + mp_free(Px_p_Pz); + mp_free(Px_m_Pz_2); + mp_free(Px_p_Pz_2); + mp_free(XZ); + mp_free(twoXZ); + mp_free(fourXZ); + mp_free(fourXZ_scaled); + mp_free(Zpre); + + return D; +} + +static void ecc_montgomery_normalise(MontgomeryPoint *mp) +{ + MontgomeryCurve *mc = mp->mc; + mp_int *zinv = monty_invert(mc->mc, mp->Z); + monty_mul_into(mc->mc, mp->X, mp->X, zinv); + mp_free(zinv); + mp_copy_into(mp->Z, monty_identity(mc->mc)); +} + +MontgomeryPoint *ecc_montgomery_multiply(MontgomeryPoint *B, mp_int *n) +{ + /* + * 'Montgomery ladder' technique, to compute an arbitrary integer + * multiple of B under the constraint that you can only add two + * unequal points if you also know their difference. + * + * The setup is that you maintain two curve points one of which is + * always the other one plus B. Call them kB and (k+1)B, where k + * is some integer that evolves as we go along. We begin by + * doubling the input B, to initialise those points to B and 2B, + * so that k=1. + * + * At each stage, we add kB and (k+1)B together - which we can do + * under the differential-addition constraint because we know + * their difference is always just B - to give us (2k+1)B. Then we + * double one of kB or (k+1)B, and depending on which one we + * choose, we end up with (2k)B or (2k+2)B. Either way, that + * differs by B from the other value we've just computed. So in + * each iteration, we do one diff-add and one doubling, plus a + * couple of conditional swaps to choose which value we double and + * which way round we put the output points, and the effect is to + * replace k with either 2k or 2k+1, which we choose based on the + * appropriate bit of the desired exponent. + * + * This routine doesn't assume we know the exact location of the + * topmost set bit of the exponent. So to maintain constant time + * it does an iteration for every _potential_ bit, starting from + * the top downwards; after each iteration in which we haven't + * seen a set exponent bit yet, we just overwrite the two points + * with B and 2B again, + */ + + MontgomeryPoint *two_B = ecc_montgomery_double(B); + MontgomeryPoint *k_B = ecc_montgomery_point_copy(B); + MontgomeryPoint *kplus1_B = ecc_montgomery_point_copy(two_B); + + unsigned not_started_yet = 1; + for (size_t bitindex = mp_max_bits(n); bitindex-- > 0 ;) { + unsigned nbit = mp_get_bit(n, bitindex); + + MontgomeryPoint *sum = ecc_montgomery_diff_add(k_B, kplus1_B, B); + ecc_montgomery_cond_swap(k_B, kplus1_B, nbit); + MontgomeryPoint *other = ecc_montgomery_double(k_B); + ecc_montgomery_point_free(k_B); + ecc_montgomery_point_free(kplus1_B); + k_B = other; + kplus1_B = sum; + ecc_montgomery_cond_swap(k_B, kplus1_B, nbit); + + ecc_montgomery_cond_overwrite(k_B, B, not_started_yet); + ecc_montgomery_cond_overwrite(kplus1_B, two_B, not_started_yet); + not_started_yet &= ~nbit; + } + + ecc_montgomery_point_free(two_B); + ecc_montgomery_point_free(kplus1_B); + return k_B; +} + +void ecc_montgomery_get_affine(MontgomeryPoint *mp, mp_int **x) +{ + MontgomeryCurve *mc = mp->mc; + + ecc_montgomery_normalise(mp); + + if (x) + *x = monty_export(mc->mc, mp->X); +} + +/* ---------------------------------------------------------------------- + * Twisted Edwards curves. + */ + +struct EdwardsPoint { + /* + * We represent an Edwards curve point in 'extended coordinates'. + * There's more than one coordinate system going by that name, + * unfortunately. These ones have the semantics that X,Y,Z are + * ordinary projective coordinates (so x=X/Z and y=Y/Z), but also, + * we store the extra value T = xyZ = XY/Z. + */ + mp_int *X, *Y, *Z, *T; + + EdwardsCurve *ec; +}; + +struct EdwardsCurve { + /* Prime modulus of the finite field. */ + mp_int *p; + + /* Montgomery context for arithmetic mod p. */ + MontyContext *mc; + + /* Modsqrt context for point decompression. */ + ModsqrtContext *sc; + + /* Parameters of the curve, in Montgomery-multiplication + * transformed form. */ + mp_int *d, *a; +}; + +EdwardsCurve *ecc_edwards_curve(mp_int *p, mp_int *d, mp_int *a, + mp_int *nonsquare_mod_p) +{ + EdwardsCurve *ec = snew(EdwardsCurve); + ec->p = mp_copy(p); + ec->mc = monty_new(p); + ec->d = monty_import(ec->mc, d); + ec->a = monty_import(ec->mc, a); + + if (nonsquare_mod_p) + ec->sc = modsqrt_new(p, nonsquare_mod_p); + else + ec->sc = NULL; + + return ec; +} + +void ecc_edwards_curve_free(EdwardsCurve *ec) +{ + mp_free(ec->p); + mp_free(ec->d); + mp_free(ec->a); + monty_free(ec->mc); + if (ec->sc) + modsqrt_free(ec->sc); + sfree(ec); +} + +static EdwardsPoint *ecc_edwards_point_new_empty(EdwardsCurve *ec) +{ + EdwardsPoint *ep = snew(EdwardsPoint); + ep->ec = ec; + ep->X = ep->Y = ep->Z = ep->T = NULL; + return ep; +} + +static EdwardsPoint *ecc_edwards_point_new_imported( + EdwardsCurve *ec, mp_int *monty_x, mp_int *monty_y) +{ + EdwardsPoint *ep = ecc_edwards_point_new_empty(ec); + ep->X = monty_x; + ep->Y = monty_y; + ep->T = monty_mul(ec->mc, ep->X, ep->Y); + ep->Z = mp_copy(monty_identity(ec->mc)); + return ep; +} + +EdwardsPoint *ecc_edwards_point_new( + EdwardsCurve *ec, mp_int *x, mp_int *y) +{ + return ecc_edwards_point_new_imported( + ec, monty_import(ec->mc, x), monty_import(ec->mc, y)); +} + +EdwardsPoint *ecc_edwards_point_copy(EdwardsPoint *orig) +{ + EdwardsPoint *ep = ecc_edwards_point_new_empty(orig->ec); + ep->X = mp_copy(orig->X); + ep->Y = mp_copy(orig->Y); + ep->Z = mp_copy(orig->Z); + ep->T = mp_copy(orig->T); + return ep; +} + +void ecc_edwards_point_free(EdwardsPoint *ep) +{ + mp_free(ep->X); + mp_free(ep->Y); + mp_free(ep->Z); + mp_free(ep->T); + smemclr(ep, sizeof(*ep)); + sfree(ep); +} + +EdwardsPoint *ecc_edwards_point_new_from_y( + EdwardsCurve *ec, mp_int *yorig, unsigned desired_x_parity) +{ + assert(ec->sc); + + /* + * The curve equation is ax^2 + y^2 = 1 + dx^2y^2, which + * rearranges to x^2(dy^2-a) = y^2-1. So we compute + * (y^2-1)/(dy^2-a) and take its square root. + */ + unsigned success; + + mp_int *y = monty_import(ec->mc, yorig); + mp_int *y2 = monty_mul(ec->mc, y, y); + mp_int *dy2 = monty_mul(ec->mc, ec->d, y2); + mp_int *dy2ma = monty_sub(ec->mc, dy2, ec->a); + mp_int *y2m1 = monty_sub(ec->mc, y2, monty_identity(ec->mc)); + mp_int *recip_denominator = monty_invert(ec->mc, dy2ma); + mp_int *radicand = monty_mul(ec->mc, y2m1, recip_denominator); + mp_int *x = monty_modsqrt(ec->sc, radicand, &success); + mp_free(y2); + mp_free(dy2); + mp_free(dy2ma); + mp_free(y2m1); + mp_free(recip_denominator); + mp_free(radicand); + + if (!success) { + /* Failure! x^2 worked out to be a number that has no square + * root mod p. In this situation there's no point in trying to + * be time-constant, since the protocol sequence is going to + * diverge anyway when we complain to whoever gave us this + * bogus value. */ + mp_free(x); + mp_free(y); + return NULL; + } + + /* + * Choose whichever of x and p-x has the specified parity (of its + * lowest positive residue mod p). + */ + mp_int *tmp = monty_export(ec->mc, x); + unsigned flip = (mp_get_bit(tmp, 0) ^ desired_x_parity) & 1; + mp_sub_into(tmp, ec->p, x); + mp_select_into(x, x, tmp, flip); + mp_free(tmp); + + return ecc_edwards_point_new_imported(ec, x, y); +} + +static void ecc_edwards_cond_overwrite( + EdwardsPoint *dest, EdwardsPoint *src, unsigned overwrite) +{ + mp_select_into(dest->X, dest->X, src->X, overwrite); + mp_select_into(dest->Y, dest->Y, src->Y, overwrite); + mp_select_into(dest->Z, dest->Z, src->Z, overwrite); + mp_select_into(dest->T, dest->T, src->T, overwrite); +} + +static void ecc_edwards_cond_swap( + EdwardsPoint *P, EdwardsPoint *Q, unsigned swap) +{ + mp_cond_swap(P->X, Q->X, swap); + mp_cond_swap(P->Y, Q->Y, swap); + mp_cond_swap(P->Z, Q->Z, swap); + mp_cond_swap(P->T, Q->T, swap); +} + +EdwardsPoint *ecc_edwards_add(EdwardsPoint *P, EdwardsPoint *Q) +{ + EdwardsCurve *ec = P->ec; + assert(Q->ec == ec); + + EdwardsPoint *S = ecc_edwards_point_new_empty(ec); + + /* + * The affine rule for Edwards addition of (x1,y1) and (x2,y2) is + * + * x_out = (x1 y2 + y1 x2) / (1 + d x1 x2 y1 y2) + * y_out = (y1 y2 - a x1 x2) / (1 - d x1 x2 y1 y2) + * + * The formulae below are listed as 'add-2008-hwcd' in + * https://hyperelliptic.org/EFD/g1p/auto-twisted-extended.html + * + * and if you undo the careful optimisation to find out what + * they're actually computing, it comes out to + * + * X_out = (X1 Y2 + Y1 X2) (Z1 Z2 - d T1 T2) + * Y_out = (Y1 Y2 - a X1 X2) (Z1 Z2 + d T1 T2) + * Z_out = (Z1 Z2 - d T1 T2) (Z1 Z2 + d T1 T2) + * T_out = (X1 Y2 + Y1 X2) (Y1 Y2 - a X1 X2) + */ + mp_int *PxQx = monty_mul(ec->mc, P->X, Q->X); + mp_int *PyQy = monty_mul(ec->mc, P->Y, Q->Y); + mp_int *PtQt = monty_mul(ec->mc, P->T, Q->T); + mp_int *PzQz = monty_mul(ec->mc, P->Z, Q->Z); + mp_int *Psum = monty_add(ec->mc, P->X, P->Y); + mp_int *Qsum = monty_add(ec->mc, Q->X, Q->Y); + mp_int *aPxQx = monty_mul(ec->mc, ec->a, PxQx); + mp_int *dPtQt = monty_mul(ec->mc, ec->d, PtQt); + mp_int *sumprod = monty_mul(ec->mc, Psum, Qsum); + mp_int *xx_p_yy = monty_add(ec->mc, PxQx, PyQy); + mp_int *E = monty_sub(ec->mc, sumprod, xx_p_yy); + mp_int *F = monty_sub(ec->mc, PzQz, dPtQt); + mp_int *G = monty_add(ec->mc, PzQz, dPtQt); + mp_int *H = monty_sub(ec->mc, PyQy, aPxQx); + S->X = monty_mul(ec->mc, E, F); + S->Z = monty_mul(ec->mc, F, G); + S->Y = monty_mul(ec->mc, G, H); + S->T = monty_mul(ec->mc, H, E); + + mp_free(PxQx); + mp_free(PyQy); + mp_free(PtQt); + mp_free(PzQz); + mp_free(Psum); + mp_free(Qsum); + mp_free(aPxQx); + mp_free(dPtQt); + mp_free(sumprod); + mp_free(xx_p_yy); + mp_free(E); + mp_free(F); + mp_free(G); + mp_free(H); + + return S; +} + +static void ecc_edwards_normalise(EdwardsPoint *ep) +{ + EdwardsCurve *ec = ep->ec; + mp_int *zinv = monty_invert(ec->mc, ep->Z); + monty_mul_into(ec->mc, ep->X, ep->X, zinv); + monty_mul_into(ec->mc, ep->Y, ep->Y, zinv); + mp_free(zinv); + mp_copy_into(ep->Z, monty_identity(ec->mc)); + monty_mul_into(ec->mc, ep->T, ep->X, ep->Y); +} + +EdwardsPoint *ecc_edwards_multiply(EdwardsPoint *B, mp_int *n) +{ + EdwardsPoint *two_B = ecc_edwards_add(B, B); + EdwardsPoint *k_B = ecc_edwards_point_copy(B); + EdwardsPoint *kplus1_B = ecc_edwards_point_copy(two_B); + + /* + * Another copy of the same exponentiation routine following the + * pattern of the Montgomery ladder, because it works as well as + * any other technique and this way I didn't have to debug two of + * them. + */ + + unsigned not_started_yet = 1; + for (size_t bitindex = mp_max_bits(n); bitindex-- > 0 ;) { + unsigned nbit = mp_get_bit(n, bitindex); + + EdwardsPoint *sum = ecc_edwards_add(k_B, kplus1_B); + ecc_edwards_cond_swap(k_B, kplus1_B, nbit); + EdwardsPoint *other = ecc_edwards_add(k_B, k_B); + ecc_edwards_point_free(k_B); + ecc_edwards_point_free(kplus1_B); + k_B = other; + kplus1_B = sum; + ecc_edwards_cond_swap(k_B, kplus1_B, nbit); + + ecc_edwards_cond_overwrite(k_B, B, not_started_yet); + ecc_edwards_cond_overwrite(kplus1_B, two_B, not_started_yet); + not_started_yet &= ~nbit; + } + + ecc_edwards_point_free(two_B); + ecc_edwards_point_free(kplus1_B); + return k_B; +} + +/* + * Helper routine to determine whether two values each given as a pair + * of projective coordinates represent the same affine value. + */ +static inline unsigned projective_eq( + MontyContext *mc, mp_int *An, mp_int *Ad, + mp_int *Bn, mp_int *Bd) +{ + mp_int *AnBd = monty_mul(mc, An, Bd); + mp_int *BnAd = monty_mul(mc, Bn, Ad); + unsigned toret = mp_cmp_eq(AnBd, BnAd); + mp_free(AnBd); + mp_free(BnAd); + return toret; +} + +unsigned ecc_edwards_eq(EdwardsPoint *P, EdwardsPoint *Q) +{ + EdwardsCurve *ec = P->ec; + assert(Q->ec == ec); + + return (projective_eq(ec->mc, P->X, P->Z, Q->X, Q->Z) & + projective_eq(ec->mc, P->Y, P->Z, Q->Y, Q->Z)); +} + +void ecc_edwards_get_affine(EdwardsPoint *ep, mp_int **x, mp_int **y) +{ + EdwardsCurve *ec = ep->ec; + + ecc_edwards_normalise(ep); + + if (x) + *x = monty_export(ec->mc, ep->X); + if (y) + *y = monty_export(ec->mc, ep->Y); +} diff --git a/ecc.h b/ecc.h new file mode 100644 index 00000000..d9e12424 --- /dev/null +++ b/ecc.h @@ -0,0 +1,233 @@ +#ifndef PUTTY_ECC_H +#define PUTTY_ECC_H + +/* + * Arithmetic functions for the various kinds of elliptic curves used + * by PuTTY's public-key cryptography. + * + * All of these elliptic curves are over the finite field whose order + * is a large prime p. (Elliptic curves over a field of order 2^n are + * also known, but PuTTY currently has no need of them.) + */ + +/* ---------------------------------------------------------------------- + * Weierstrass curves (or rather, 'short form' Weierstrass curves). + * + * A curve in this form is defined by two parameters a,b, and the + * non-identity points on the curve are represented by (x,y) (the + * 'affine coordinates') such that y^2 = x^3 + ax + b. + * + * The identity element of the curve's group is an additional 'point + * at infinity', which is considered to be the third point on the + * intersection of the curve with any vertical line. Hence, the + * inverse of the point (x,y) is (x,-y). + */ + +/* + * Create and destroy Weierstrass curve data structures. The mandatory + * parameters to the constructor are the prime modulus p, and the + * curve parameters a,b. + * + * 'nonsquare_mod_p' is an optional extra parameter, only needed by + * ecc_edwards_point_new_from_y which has to take a modular square + * root. You can pass it as NULL if you don't need that function. + */ +WeierstrassCurve *ecc_weierstrass_curve( + mp_int *p, mp_int *a, mp_int *b, mp_int *nonsquare_mod_p); +void ecc_weierstrass_curve_free(WeierstrassCurve *); + +/* + * Create points on a Weierstrass curve, given the curve. + * + * point_new_identity returns the special identity point. + * point_new(x,y) returns the non-identity point with the given affine + * coordinates. + * + * point_new_from_x constructs a non-identity point given only the + * x-coordinate, by using the curve equation to work out what y has to + * be. Of course the equation only tells you y^2, so it only + * determines y up to sign; the parameter desired_y_parity controls + * which of the two values of y you get, by saying whether you'd like + * its minimal non-negative residue mod p to be even or odd. (Of + * course, since p itself is odd, exactly one of y and p-y is odd.) + * This function has to take a modular square root, so it will only + * work if you passed in a non-square mod p when constructing the + * curve. + */ +WeierstrassPoint *ecc_weierstrass_point_new_identity(WeierstrassCurve *curve); +WeierstrassPoint *ecc_weierstrass_point_new( + WeierstrassCurve *curve, mp_int *x, mp_int *y); +WeierstrassPoint *ecc_weierstrass_point_new_from_x( + WeierstrassCurve *curve, mp_int *x, unsigned desired_y_parity); + +/* Memory management: copy and free points. */ +WeierstrassPoint *ecc_weierstrass_point_copy(WeierstrassPoint *wc); +void ecc_weierstrass_point_free(WeierstrassPoint *point); + +/* Check whether a point is actually on the curve. */ +unsigned ecc_weierstrass_point_valid(WeierstrassPoint *); + +/* + * Add two points and return their sum. This function is fully + * general: it should do the right thing if the two inputs are the + * same, or if either (or both) of the input points is the identity, + * or if the two input points are inverses so the output is the + * identity. However, it pays for that generality by being slower than + * the special-purpose functions below.. + */ +WeierstrassPoint *ecc_weierstrass_add_general( + WeierstrassPoint *, WeierstrassPoint *); + +/* + * Fast but less general arithmetic functions: add two points on the + * condition that they are not equal and neither is the identity, and + * add a point to itself. + */ +WeierstrassPoint *ecc_weierstrass_add(WeierstrassPoint *, WeierstrassPoint *); +WeierstrassPoint *ecc_weierstrass_double(WeierstrassPoint *); + +/* + * Compute an integer multiple of a point. Not guaranteed to work + * unless the integer argument is less than the order of the point in + * the group (because it won't cope if an identity element shows up in + * any intermediate product). + */ +WeierstrassPoint *ecc_weierstrass_multiply(WeierstrassPoint *, mp_int *); + +/* + * Query functions to get the value of a point back out. is_identity + * tells you whether the point is the identity; if it isn't, then + * get_affine will retrieve one or both of its affine coordinates. + * (You can pass NULL as either output pointer, if you don't need that + * coordinate as output.) + */ +unsigned ecc_weierstrass_is_identity(WeierstrassPoint *wp); +void ecc_weierstrass_get_affine(WeierstrassPoint *wp, mp_int **x, mp_int **y); + +/* ---------------------------------------------------------------------- + * Montgomery curves. + * + * A curve in this form is defined by two parameters a,b, and the + * curve equation is y^2 = x^3 + ax^2 + bx. + * + * As with Weierstrass curves, there's an additional point at infinity + * that is the identity element, and the inverse of (x,y) is (x,-y). + * + * However, we don't actually work with full (x,y) pairs. We just + * store the x-coordinate (so what we're really representing is not a + * specific point on the curve but a two-point set {P,-P}). This means + * you can't quite do point addition, because if you're given {P,-P} + * and {Q,-Q} as input, you can work out a pair of x-coordinates that + * are those of P-Q and P+Q, but you don't know which is which. + * + * Instead, the basic operation is 'differential addition', in which + * you are given three parameters P, Q and P-Q and you return P+Q. (As + * well as disambiguating which of the possible answers you want, that + * extra input also enables a fast formulae for computing it. This + * fast formula is more or less why Montgomery curves are useful in + * the first place.) + * + * Doubling a point is still possible to do unambiguously, so you can + * still compute an integer multiple of P if you start by making 2P + * and then doing a series of differential additions. + */ + +/* + * Create and destroy Montgomery curve data structures. + */ +MontgomeryCurve *ecc_montgomery_curve(mp_int *p, mp_int *a, mp_int *b); +void ecc_montgomery_curve_free(MontgomeryCurve *); + +/* + * Create, copy and free points on the curve. We don't need to + * explicitly represent the identity for this application. + */ +MontgomeryPoint *ecc_montgomery_point_new(MontgomeryCurve *mc, mp_int *x); +MontgomeryPoint *ecc_montgomery_point_copy(MontgomeryPoint *orig); +void ecc_montgomery_point_free(MontgomeryPoint *mp); + +/* + * Basic arithmetic routines: differential addition and point- + * doubling. Each of these assumes that no special cases come up - no + * input or output point should be the identity, and in diff_add, P + * and Q shouldn't be the same. + */ +MontgomeryPoint *ecc_montgomery_diff_add( + MontgomeryPoint *P, MontgomeryPoint *Q, MontgomeryPoint *PminusQ); +MontgomeryPoint *ecc_montgomery_double(MontgomeryPoint *P); + +/* + * Compute an integer multiple of a point. + */ +MontgomeryPoint *ecc_montgomery_multiply(MontgomeryPoint *, mp_int *); + +/* + * Return the affine x-coordinate of a point. + */ +void ecc_montgomery_get_affine(MontgomeryPoint *mp, mp_int **x); + +/* ---------------------------------------------------------------------- + * Twisted Edwards curves. + * + * A curve in this form is defined by two parameters d,a, and the + * curve equation is a x^2 + y^2 = 1 + d x^2 y^2. + * + * Apparently if you ask a proper algebraic geometer they'll tell you + * that this is technically not an actual elliptic curve. Certainly it + * doesn't work quite the same way as the other kinds: in this form, + * there is no need for a point at infinity, because the identity + * element is represented by the affine coordinates (0,1). And you + * invert a point by negating its x rather than y coordinate: the + * inverse of (x,y) is (-x,y). + * + * The usefulness of this representation is that the addition formula + * is 'strongly unified', meaning that the same formula works for any + * input and output points, without needing special cases for the + * identity or for doubling. + */ + +/* + * Create and destroy Edwards curve data structures. + * + * Similarly to ecc_weierstrass_curve, you don't have to provide + * nonsquare_mod_p if you don't need ecc_edwards_point_new_from_y. + */ +EdwardsCurve *ecc_edwards_curve( + mp_int *p, mp_int *d, mp_int *a, mp_int *nonsquare_mod_p); +void ecc_edwards_curve_free(EdwardsCurve *); + +/* + * Create points. + * + * There's no need to have a separate function to create the identity + * point, because you can just pass x=0 and y=1 to the usual function. + * + * Similarly to the Weierstrass curve, ecc_edwards_point_new_from_y + * creates a point given only its y-coordinate and the desired parity + * of its x-coordinate, and you can only call it if you provided the + * optional nonsquare_mod_p argument when creating the curve. + */ +EdwardsPoint *ecc_edwards_point_new( + EdwardsCurve *curve, mp_int *x, mp_int *y); +EdwardsPoint *ecc_edwards_point_new_from_y( + EdwardsCurve *curve, mp_int *y, unsigned desired_x_parity); + +/* Copy and free points. */ +EdwardsPoint *ecc_edwards_point_copy(EdwardsPoint *ec); +void ecc_edwards_point_free(EdwardsPoint *point); + +/* + * Arithmetic: add two points, and calculate an integer multiple of a + * point. + */ +EdwardsPoint *ecc_edwards_add(EdwardsPoint *, EdwardsPoint *); +EdwardsPoint *ecc_edwards_multiply(EdwardsPoint *, mp_int *); + +/* + * Query functions: compare two points for equality, and return the + * affine coordinates of a point. + */ +unsigned ecc_edwards_eq(EdwardsPoint *, EdwardsPoint *); +void ecc_edwards_get_affine(EdwardsPoint *wp, mp_int **x, mp_int **y); + +#endif /* PUTTY_ECC_H */ diff --git a/import.c b/import.c index e3ba53c4..71caa859 100644 --- a/import.c +++ b/import.c @@ -10,6 +10,7 @@ #include "putty.h" #include "ssh.h" +#include "mpint.h" #include "misc.h" static bool openssh_pem_encrypted(const Filename *file); @@ -815,7 +816,7 @@ static bool openssh_pem_write( */ if (ssh_key_alg(key->key) == &ssh_rsa) { ptrlen n, e, d, p, q, iqmp, dmp1, dmq1; - Bignum bd, bp, bq, bdmp1, bdmq1; + mp_int *bd, *bp, *bq, *bdmp1, *bdmq1; /* * These blobs were generated from inside PuTTY, so we needn't @@ -834,29 +835,29 @@ static bool openssh_pem_write( assert(!get_err(src)); /* can't go wrong */ /* We also need d mod (p-1) and d mod (q-1). */ - bd = bignum_from_bytes(d.ptr, d.len); - bp = bignum_from_bytes(p.ptr, p.len); - bq = bignum_from_bytes(q.ptr, q.len); - decbn(bp); - decbn(bq); - bdmp1 = bigmod(bd, bp); - bdmq1 = bigmod(bd, bq); - freebn(bd); - freebn(bp); - freebn(bq); + bd = mp_from_bytes_be(d); + bp = mp_from_bytes_be(p); + bq = mp_from_bytes_be(q); + mp_sub_integer_into(bp, bp, 1); + mp_sub_integer_into(bq, bq, 1); + bdmp1 = mp_mod(bd, bp); + bdmq1 = mp_mod(bd, bq); + mp_free(bd); + mp_free(bp); + mp_free(bq); - dmp1.len = (bignum_bitcount(bdmp1)+8)/8; - dmq1.len = (bignum_bitcount(bdmq1)+8)/8; + dmp1.len = (mp_get_nbits(bdmp1)+8)/8; + dmq1.len = (mp_get_nbits(bdmq1)+8)/8; sparelen = dmp1.len + dmq1.len; spareblob = snewn(sparelen, unsigned char); dmp1.ptr = spareblob; dmq1.ptr = spareblob + dmp1.len; for (i = 0; i < dmp1.len; i++) - spareblob[i] = bignum_byte(bdmp1, dmp1.len-1 - i); + spareblob[i] = mp_get_byte(bdmp1, dmp1.len-1 - i); for (i = 0; i < dmq1.len; i++) - spareblob[i+dmp1.len] = bignum_byte(bdmq1, dmq1.len-1 - i); - freebn(bdmp1); - freebn(bdmq1); + spareblob[i+dmp1.len] = mp_get_byte(bdmq1, dmq1.len-1 - i); + mp_free(bdmp1); + mp_free(bdmq1); numbers[0] = make_ptrlen(zero, 1); zero[0] = '\0'; numbers[1] = n; @@ -913,7 +914,7 @@ static bool openssh_pem_write( ssh_key_alg(key->key) == &ssh_ecdsa_nistp384 || ssh_key_alg(key->key) == &ssh_ecdsa_nistp521) { const unsigned char *oid; - struct ec_key *ec = container_of(key->key, struct ec_key, sshk); + struct ecdsa_key *ec = container_of(key->key, struct ecdsa_key, sshk); int oidlen; int pointlen; strbuf *seq, *sub; @@ -929,7 +930,7 @@ static bool openssh_pem_write( * BIT STRING (0x00 public key point) */ oid = ec_alg_oid(ssh_key_alg(key->key), &oidlen); - pointlen = (ec->publicKey.curve->fieldBits + 7) / 8 * 2; + pointlen = (ec->curve->fieldBits + 7) / 8 * 2; seq = strbuf_new(); diff --git a/marshal.h b/marshal.h index e19cd0b9..400354c4 100644 --- a/marshal.h +++ b/marshal.h @@ -153,6 +153,8 @@ struct strbuf; void BinarySink_put_stringsb(BinarySink *, struct strbuf *); void BinarySink_put_asciz(BinarySink *, const char *str); bool BinarySink_put_pstring(BinarySink *, const char *str); +void BinarySink_put_mp_ssh1(BinarySink *bs, mp_int *x); +void BinarySink_put_mp_ssh2(BinarySink *bs, mp_int *x); /* ---------------------------------------------------------------------- */ @@ -195,7 +197,7 @@ struct BinarySource { * types. * * If the usual return value is dynamically allocated (e.g. a - * Bignum, or a normal C 'char *' string), then the error value is + * bignum, or a normal C 'char *' string), then the error value is * also dynamic in the same way. So you have to free exactly the * same set of things whether or not there was a decoding error, * which simplifies exit paths - for example, you could call a big @@ -281,5 +283,7 @@ uint64_t BinarySource_get_uint64(BinarySource *); ptrlen BinarySource_get_string(BinarySource *); const char *BinarySource_get_asciz(BinarySource *); ptrlen BinarySource_get_pstring(BinarySource *); +mp_int *BinarySource_get_mp_ssh1(BinarySource *src); +mp_int *BinarySource_get_mp_ssh2(BinarySource *src); #endif /* PUTTY_MARSHAL_H */ diff --git a/mpint.c b/mpint.c new file mode 100644 index 00000000..f317b58f --- /dev/null +++ b/mpint.c @@ -0,0 +1,2340 @@ +#include +#include + +#include "defs.h" +#include "putty.h" + +#include "mpint.h" +#include "mpint_i.h" + +/* + * Inline helpers to take min and max of size_t values, used + * throughout this code. + */ +static inline size_t size_t_min(size_t a, size_t b) +{ + return a < b ? a : b; +} +static inline size_t size_t_max(size_t a, size_t b) +{ + return a > b ? a : b; +} + +/* + * Helper to fetch a word of data from x with array overflow checking. + * If x is too short to have that word, 0 is returned. + */ +static inline BignumInt mp_word(mp_int *x, size_t i) +{ + return i < x->nw ? x->w[i] : 0; +} + +static mp_int *mp_make_sized(size_t nw) +{ + mp_int *x = snew_plus(mp_int, nw * sizeof(BignumInt)); + x->nw = nw; + x->w = snew_plus_get_aux(x); + mp_clear(x); + return x; +} + +mp_int *mp_new(size_t maxbits) +{ + size_t words = (maxbits + BIGNUM_INT_BITS - 1) / BIGNUM_INT_BITS; + return mp_make_sized(words); +} + +mp_int *mp_from_integer(uintmax_t n) +{ + mp_int *x = mp_make_sized( + (sizeof(n) + BIGNUM_INT_BYTES - 1) / BIGNUM_INT_BYTES); + for (size_t i = 0; i < x->nw; i++) + x->w[i] = n >> (i * BIGNUM_INT_BITS); + return x; +} + +size_t mp_max_bytes(mp_int *x) +{ + return x->nw * BIGNUM_INT_BYTES; +} + +size_t mp_max_bits(mp_int *x) +{ + return x->nw * BIGNUM_INT_BITS; +} + +void mp_free(mp_int *x) +{ + mp_clear(x); + smemclr(x, sizeof(*x)); + sfree(x); +} + +void mp_dump(FILE *fp, const char *prefix, mp_int *x, const char *suffix) +{ + fprintf(fp, "%s0x", prefix); + for (size_t i = mp_max_bytes(x); i-- > 0 ;) + fprintf(fp, "%02X", mp_get_byte(x, i)); + fputs(suffix, fp); +} + +void mp_copy_into(mp_int *dest, mp_int *src) +{ + size_t copy_nw = size_t_min(dest->nw, src->nw); + memmove(dest->w, src->w, copy_nw * sizeof(BignumInt)); + smemclr(dest->w + copy_nw, (dest->nw - copy_nw) * sizeof(BignumInt)); +} + +/* + * Conditional selection is done by negating 'which', to give a mask + * word which is all 1s if which==1 and all 0s if which==0. Then you + * can select between two inputs a,b without data-dependent control + * flow by XORing them to get their difference; ANDing with the mask + * word to replace that difference with 0 if which==0; and XORing that + * into a, which will either turn it into b or leave it alone. + * + * This trick will be used throughout this code and taken as read the + * rest of the time (or else I'd be here all week typing comments), + * but I felt I ought to explain it in words _once_. + */ +void mp_select_into(mp_int *dest, mp_int *src0, mp_int *src1, + unsigned which) +{ + BignumInt mask = -(BignumInt)(1 & which); + for (size_t i = 0; i < dest->nw; i++) { + BignumInt srcword0 = mp_word(src0, i), srcword1 = mp_word(src1, i); + dest->w[i] = srcword0 ^ ((srcword1 ^ srcword0) & mask); + } +} + +void mp_cond_swap(mp_int *x0, mp_int *x1, unsigned swap) +{ + assert(x0->nw == x1->nw); + BignumInt mask = -(BignumInt)(1 & swap); + for (size_t i = 0; i < x0->nw; i++) { + BignumInt diff = (x0->w[i] ^ x1->w[i]) & mask; + x0->w[i] ^= diff; + x1->w[i] ^= diff; + } +} + +void mp_clear(mp_int *x) +{ + smemclr(x->w, x->nw * sizeof(BignumInt)); +} + +void mp_cond_clear(mp_int *x, unsigned clear) +{ + BignumInt mask = ~-(BignumInt)(1 & clear); + for (size_t i = 0; i < x->nw; i++) + x->w[i] &= mask; +} + +/* + * Common code between mp_from_bytes_{le,be} which reads bytes in an + * arbitrary arithmetic progression. + */ +static mp_int *mp_from_bytes_int(ptrlen bytes, size_t m, size_t c) +{ + mp_int *n = mp_make_sized( + (bytes.len + BIGNUM_INT_BYTES - 1) / BIGNUM_INT_BYTES); + for (size_t i = 0; i < bytes.len; i++) + n->w[i / BIGNUM_INT_BYTES] |= + (BignumInt)(((const unsigned char *)bytes.ptr)[m*i+c]) << + (8 * (i % BIGNUM_INT_BYTES)); + return n; +} + +mp_int *mp_from_bytes_le(ptrlen bytes) +{ + return mp_from_bytes_int(bytes, 1, 0); +} + +mp_int *mp_from_bytes_be(ptrlen bytes) +{ + return mp_from_bytes_int(bytes, -1, bytes.len - 1); +} + +static mp_int *mp_from_words(size_t nw, const BignumInt *w) +{ + mp_int *x = mp_make_sized(nw); + memcpy(x->w, w, x->nw * sizeof(BignumInt)); + return x; +} + +/* + * Decimal-to-binary conversion: just go through the input string + * adding on the decimal value of each digit, and then multiplying the + * number so far by 10. + */ +mp_int *mp_from_decimal_pl(ptrlen decimal) +{ + /* 196/59 is an upper bound (and also a continued-fraction + * convergent) for log2(10), so this conservatively estimates the + * number of bits that will be needed to store any number that can + * be written in this many decimal digits. */ + assert(decimal.len < (~(size_t)0) / 196); + size_t bits = 196 * decimal.len / 59; + + /* Now round that up to words. */ + size_t words = bits / BIGNUM_INT_BITS + 1; + + mp_int *x = mp_make_sized(words); + for (size_t i = 0;; i++) { + mp_add_integer_into(x, x, ((char *)decimal.ptr)[i] - '0'); + + if (i+1 == decimal.len) + break; + + mp_mul_integer_into(x, x, 10); + } + return x; +} + +mp_int *mp_from_decimal(const char *decimal) +{ + return mp_from_decimal_pl(ptrlen_from_asciz(decimal)); +} + +/* + * Hex-to-binary conversion: _algorithmically_ simpler than decimal + * (none of those multiplications by 10), but there's some fiddly + * bit-twiddling needed to process each hex digit without diverging + * control flow depending on whether it's a letter or a number. + */ +mp_int *mp_from_hex_pl(ptrlen hex) +{ + assert(hex.len <= (~(size_t)0) / 4); + size_t bits = hex.len * 4; + size_t words = (bits + BIGNUM_INT_BITS - 1) / BIGNUM_INT_BITS; + mp_int *x = mp_make_sized(words); + for (size_t nibble = 0; nibble < hex.len; nibble++) { + BignumInt digit = ((char *)hex.ptr)[hex.len-1 - nibble]; + + BignumInt lmask = ~-(((digit-'a')|('f'-digit)) >> (BIGNUM_INT_BITS-1)); + BignumInt umask = ~-(((digit-'A')|('F'-digit)) >> (BIGNUM_INT_BITS-1)); + + BignumInt digitval = digit - '0'; + digitval ^= (digitval ^ (digit - 'a' + 10)) & lmask; + digitval ^= (digitval ^ (digit - 'A' + 10)) & umask; + digitval &= 0xF; /* at least be _slightly_ nice about weird input */ + + size_t word_idx = nibble / (BIGNUM_INT_BYTES*2); + size_t nibble_within_word = nibble % (BIGNUM_INT_BYTES*2); + x->w[word_idx] |= digitval << (nibble_within_word * 4); + } + return x; +} + +mp_int *mp_from_hex(const char *hex) +{ + return mp_from_hex_pl(ptrlen_from_asciz(hex)); +} + +mp_int *mp_copy(mp_int *x) +{ + return mp_from_words(x->nw, x->w); +} + +uint8_t mp_get_byte(mp_int *x, size_t byte) +{ + return 0xFF & (mp_word(x, byte / BIGNUM_INT_BYTES) >> + (8 * (byte % BIGNUM_INT_BYTES))); +} + +unsigned mp_get_bit(mp_int *x, size_t bit) +{ + return 1 & (mp_word(x, bit / BIGNUM_INT_BITS) >> + (bit % BIGNUM_INT_BITS)); +} + +void mp_set_bit(mp_int *x, size_t bit, unsigned val) +{ + size_t word = bit / BIGNUM_INT_BITS; + assert(word < x->nw); + + unsigned shift = (bit % BIGNUM_INT_BITS); + + x->w[word] &= ~((BignumInt)1 << shift); + x->w[word] |= (BignumInt)(val & 1) << shift; +} + +/* + * Helper function used here and there to normalise any nonzero input + * value to 1. + */ +static inline unsigned normalise_to_1(BignumInt n) +{ + n = (n >> 1) | (n & 1); /* ensure top bit is clear */ + n = (-n) >> (BIGNUM_INT_BITS - 1); /* normalise to 0 or 1 */ + return n; +} + +/* + * Find the highest nonzero word in a number. Returns the index of the + * word in x->w, and also a pair of output uint64_t in which that word + * appears in the high one shifted left by 'shift_wanted' bits, the + * words immediately below it occupy the space to the right, and the + * words below _that_ fill up the low one. + * + * If there is no nonzero word at all, the passed-by-reference output + * variables retain their original values. + */ +static inline void mp_find_highest_nonzero_word_pair( + mp_int *x, size_t shift_wanted, size_t *index, + uint64_t *hi, uint64_t *lo) +{ + uint64_t curr_hi = 0, curr_lo = 0; + + for (size_t curr_index = 0; curr_index < x->nw; curr_index++) { + BignumInt curr_word = x->w[curr_index]; + unsigned indicator = normalise_to_1(curr_word); + + curr_lo = (BIGNUM_INT_BITS < 64 ? (curr_lo >> BIGNUM_INT_BITS) : 0) | + (curr_hi << (64 - BIGNUM_INT_BITS)); + curr_hi = (BIGNUM_INT_BITS < 64 ? (curr_hi >> BIGNUM_INT_BITS) : 0) | + ((uint64_t)curr_word << shift_wanted); + + if (hi) *hi ^= (curr_hi ^ *hi ) & -(uint64_t)indicator; + if (lo) *lo ^= (curr_lo ^ *lo ) & -(uint64_t)indicator; + if (index) *index ^= (curr_index ^ *index) & -(size_t) indicator; + } +} + +size_t mp_get_nbits(mp_int *x) +{ + /* Sentinel values in case there are no bits set at all: we + * imagine that there's a word at position -1 (i.e. the topmost + * fraction word) which is all 1s, because that way, we handle a + * zero input by considering its highest set bit to be the top one + * of that word, i.e. just below the units digit, i.e. at bit + * index -1, i.e. so we'll return 0 on output. */ + size_t hiword_index = -(size_t)1; + uint64_t hiword64 = ~(BignumInt)0; + + /* + * Find the highest nonzero word and its index. + */ + mp_find_highest_nonzero_word_pair(x, 0, &hiword_index, &hiword64, NULL); + BignumInt hiword = hiword64; /* in case BignumInt is a narrower type */ + + /* + * Find the index of the highest set bit within hiword. + */ + BignumInt hibit_index = 0; + for (size_t i = (1 << (BIGNUM_INT_BITS_BITS-1)); i != 0; i >>= 1) { + BignumInt shifted_word = hiword >> i; + BignumInt indicator = (-shifted_word) >> (BIGNUM_INT_BITS-1); + hiword ^= (shifted_word ^ hiword ) & -indicator; + hibit_index += i & -(size_t)indicator; + } + + /* + * Put together the result. + */ + return (hiword_index << BIGNUM_INT_BITS_BITS) + hibit_index + 1; +} + +/* + * Shared code between the hex and decimal output functions to get rid + * of leading zeroes on the output string. The idea is that we wrote + * out a fixed number of digits and a trailing \0 byte into 'buf', and + * now we want to shift it all left so that the first nonzero digit + * moves to buf[0] (or, if there are no nonzero digits at all, we move + * up by 'maxtrim', so that we return 0 as "0" instead of ""). + */ +static void trim_leading_zeroes(char *buf, size_t bufsize, size_t maxtrim) +{ + size_t trim = maxtrim; + + /* + * Look for the first character not equal to '0', to find the + * shift count. + */ + if (trim > 0) { + for (size_t pos = trim; pos-- > 0 ;) { + uint8_t diff = buf[pos] ^ '0'; + size_t mask = -((((size_t)diff) - 1) >> (BIGNUM_INT_BITS - 1)); + trim ^= (trim ^ pos) & ~mask; + } + } + + /* + * Now do the shift, in log n passes each of which does a + * conditional shift by 2^i bytes if bit i is set in the shift + * count. + */ + uint8_t *ubuf = (uint8_t *)buf; + for (size_t logd = 0; bufsize >> logd; logd++) { + uint8_t mask = -(uint8_t)((trim >> logd) & 1); + size_t d = (size_t)1 << logd; + for (size_t i = 0; i+d < bufsize; i++) { + uint8_t diff = mask & (ubuf[i] ^ ubuf[i+d]); + ubuf[i] ^= diff; + ubuf[i+d] ^= diff; + } + } +} + +/* + * Binary to decimal conversion. Our strategy here is to extract each + * decimal digit by finding the input number's residue mod 10, then + * subtract that off to give an exact multiple of 10, which then means + * you can safely divide by 10 by means of shifting right one bit and + * then multiplying by the inverse of 5 mod 2^n. + */ +char *mp_get_decimal(mp_int *x_orig) +{ + mp_int *x = mp_copy(x_orig), *y = mp_make_sized(x->nw); + + /* + * The inverse of 5 mod 2^lots is 0xccccccccccccccccccccd, for an + * appropriate number of 'c's. Manually construct an integer the + * right size. + */ + mp_int *inv5 = mp_make_sized(x->nw); + assert(BIGNUM_INT_BITS % 8 == 0); + for (size_t i = 0; i < inv5->nw; i++) + inv5->w[i] = BIGNUM_INT_MASK / 5 * 4; + inv5->w[0]++; + + /* + * 146/485 is an upper bound (and also a continued-fraction + * convergent) of log10(2), so this is a conservative estimate of + * the number of decimal digits needed to store a value that fits + * in this many binary bits. + */ + assert(x->nw < (~(size_t)1) / (146 * BIGNUM_INT_BITS)); + size_t bufsize = size_t_max(x->nw * (146 * BIGNUM_INT_BITS) / 485, 1) + 2; + char *outbuf = snewn(bufsize, char); + outbuf[bufsize - 1] = '\0'; + + /* + * Loop over the number generating digits from the least + * significant upwards, so that we write to outbuf in reverse + * order. + */ + for (size_t pos = bufsize - 1; pos-- > 0 ;) { + /* + * Find the current residue mod 10. We do this by first + * summing the bytes of the number, with all but the lowest + * one multiplied by 6 (because 256^i == 6 mod 10 for all + * i>0). That gives us a single word congruent mod 10 to the + * input number, and then we reduce it further by manual + * multiplication and shifting, just in case the compiler + * target implements the C division operator in a way that has + * input-dependent timing. + */ + uint32_t low_digit = 0, maxval = 0, mult = 1; + for (size_t i = 0; i < x->nw; i++) { + for (unsigned j = 0; j < BIGNUM_INT_BYTES; j++) { + low_digit += mult * (0xFF & (x->w[i] >> (8*j))); + maxval += mult * 0xFF; + mult = 6; + } + /* + * For _really_ big numbers, prevent overflow of t by + * periodically folding the top half of the accumulator + * into the bottom half, using the same rule 'multiply by + * 6 when shifting down by one or more whole bytes'. + */ + if (maxval > UINT32_MAX - (6 * 0xFF * BIGNUM_INT_BYTES)) { + low_digit = (low_digit & 0xFFFF) + 6 * (low_digit >> 16); + maxval = (maxval & 0xFFFF) + 6 * (maxval >> 16); + } + } + + /* + * Final reduction of low_digit. We multiply by 2^32 / 10 + * (that's the constant 0x19999999) to get a 64-bit value + * whose top 32 bits are the approximate quotient + * low_digit/10; then we subtract off 10 times that; and + * finally we do one last trial subtraction of 10 by adding 6 + * (which sets bit 4 if the number was just over 10) and then + * testing bit 4. + */ + low_digit -= 10 * ((0x19999999ULL * low_digit) >> 32); + low_digit -= 10 * ((low_digit + 6) >> 4); + + assert(low_digit < 10); /* make sure we did reduce fully */ + outbuf[pos] = '0' + low_digit; + + /* + * Now subtract off that digit, divide by 2 (using a right + * shift) and by 5 (using the modular inverse), to get the + * next output digit into the units position. + */ + mp_sub_integer_into(x, x, low_digit); + mp_rshift_fixed_into(y, x, 1); + mp_mul_into(x, y, inv5); + } + + mp_free(x); + mp_free(y); + mp_free(inv5); + + trim_leading_zeroes(outbuf, bufsize, bufsize - 2); + return outbuf; +} + +/* + * Binary to hex conversion. Reasonably simple (only a spot of bit + * twiddling to choose whether to output a digit or a letter for each + * nibble). + */ +static char *mp_get_hex_internal(mp_int *x, uint8_t letter_offset) +{ + size_t nibbles = x->nw * BIGNUM_INT_BYTES * 2; + size_t bufsize = nibbles + 1; + char *outbuf = snewn(bufsize, char); + outbuf[nibbles] = '\0'; + + for (size_t nibble = 0; nibble < nibbles; nibble++) { + size_t word_idx = nibble / (BIGNUM_INT_BYTES*2); + size_t nibble_within_word = nibble % (BIGNUM_INT_BYTES*2); + uint8_t digitval = 0xF & (x->w[word_idx] >> (nibble_within_word * 4)); + + uint8_t mask = -((digitval + 6) >> 4); + char digit = digitval + '0' + (letter_offset & mask); + outbuf[nibbles-1 - nibble] = digit; + } + + trim_leading_zeroes(outbuf, bufsize, nibbles - 1); + return outbuf; +} + +char *mp_get_hex(mp_int *x) +{ + return mp_get_hex_internal(x, 'a' - ('0'+10)); +} + +char *mp_get_hex_uppercase(mp_int *x) +{ + return mp_get_hex_internal(x, 'A' - ('0'+10)); +} + +/* + * Routines for reading and writing the SSH-1 and SSH-2 wire formats + * for multiprecision integers, declared in marshal.h. + * + * These can't avoid having control flow dependent on the true bit + * size of the number, because the wire format requires the number of + * output bytes to depend on that. + */ +void BinarySink_put_mp_ssh1(BinarySink *bs, mp_int *x) +{ + size_t bits = mp_get_nbits(x); + size_t bytes = (bits + 7) / 8; + + assert(bits < 0x10000); + put_uint16(bs, bits); + for (size_t i = bytes; i-- > 0 ;) + put_byte(bs, mp_get_byte(x, i)); +} + +void BinarySink_put_mp_ssh2(BinarySink *bs, mp_int *x) +{ + size_t bytes = (mp_get_nbits(x) + 8) / 8; + + put_uint32(bs, bytes); + for (size_t i = bytes; i-- > 0 ;) + put_byte(bs, mp_get_byte(x, i)); +} + +mp_int *BinarySource_get_mp_ssh1(BinarySource *src) +{ + unsigned bitc = get_uint16(src); + ptrlen bytes = get_data(src, (bitc + 7) / 8); + if (get_err(src)) { + return mp_from_integer(0); + } else { + mp_int *toret = mp_from_bytes_be(bytes); + /* SSH-1.5 spec says that it's OK for the prefix uint16 to be + * _greater_ than the actual number of bits */ + if (mp_get_nbits(toret) > bitc) { + src->err = BSE_INVALID; + mp_free(toret); + toret = mp_from_integer(0); + } + return toret; + } +} + +mp_int *BinarySource_get_mp_ssh2(BinarySource *src) +{ + ptrlen bytes = get_string(src); + if (get_err(src)) { + return mp_from_integer(0); + } else { + const unsigned char *p = bytes.ptr; + if ((bytes.len > 0 && + ((p[0] & 0x80) || + (p[0] == 0 && (bytes.len <= 1 || !(p[1] & 0x80)))))) { + src->err = BSE_INVALID; + return mp_from_integer(0); + } + return mp_from_bytes_be(bytes); + } +} + +/* + * Make an mp_int structure whose words array aliases a subinterval of + * some other mp_int. This makes it easy to read or write just the low + * or high words of a number, e.g. to add a number starting from a + * high bit position, or to reduce mod 2^{n*BIGNUM_INT_BITS}. + * + * The convention throughout this code is that when we store an mp_int + * directly by value, we always expect it to be an alias of some kind, + * so its words array won't ever need freeing. Whereas an 'mp_int *' + * has an owner, who knows whether it needs freeing or whether it was + * created by address-taking an alias. + */ +static mp_int mp_make_alias(mp_int *in, size_t offset, size_t len) +{ + /* + * Bounds-check the offset and length so that we always return + * something valid, even if it's not necessarily the length the + * caller asked for. + */ + if (offset > in->nw) + offset = in->nw; + if (len > in->nw - offset) + len = in->nw - offset; + + mp_int toret; + toret.nw = len; + toret.w = in->w + offset; + return toret; +} + +/* + * A special case of mp_make_alias: in some cases we preallocate a + * large mp_int to use as scratch space (to avoid pointless + * malloc/free churn in recursive or iterative work). + * + * mp_alloc_from_scratch creates an alias of size 'len' to part of + * 'pool', and adjusts 'pool' itself so that further allocations won't + * overwrite that space. + * + * There's no free function to go with this. Typically you just copy + * the pool mp_int by value, allocate from the copy, and when you're + * done with those allocations, throw the copy away and go back to the + * original value of pool. (A mark/release system.) + */ +static mp_int mp_alloc_from_scratch(mp_int *pool, size_t len) +{ + assert(len <= pool->nw); + mp_int toret = mp_make_alias(pool, 0, len); + *pool = mp_make_alias(pool, len, pool->nw); + return toret; +} + +/* + * Internal component common to lots of assorted add/subtract code. + * Reads words from a,b; writes into w_out (which might be NULL if the + * output isn't even needed). Takes an input carry flag in 'carry', + * and returns the output carry. Each word read from b is ANDed with + * b_and and then XORed with b_xor. + * + * So you can implement addition by setting b_and to all 1s and b_xor + * to 0; you can subtract by making b_xor all 1s too (effectively + * bit-flipping b) and also passing 1 as the input carry (to turn + * one's complement into two's complement). And you can do conditional + * add/subtract by choosing b_and to be all 1s or all 0s based on a + * condition, because the value of b will be totally ignored if b_and + * == 0. + */ +static BignumCarry mp_add_masked_into( + BignumInt *w_out, size_t rw, mp_int *a, mp_int *b, + BignumInt b_and, BignumInt b_xor, BignumCarry carry) +{ + for (size_t i = 0; i < rw; i++) { + BignumInt aword = mp_word(a, i), bword = mp_word(b, i), out; + bword = (bword & b_and) ^ b_xor; + BignumADC(out, carry, aword, bword, carry); + if (w_out) + w_out[i] = out; + } + return carry; +} + +/* + * Like the public mp_add_into except that it returns the output carry. + */ +static inline BignumCarry mp_add_into_internal(mp_int *r, mp_int *a, mp_int *b) +{ + return mp_add_masked_into(r->w, r->nw, a, b, ~(BignumInt)0, 0, 0); +} + +void mp_add_into(mp_int *r, mp_int *a, mp_int *b) +{ + mp_add_into_internal(r, a, b); +} + +void mp_sub_into(mp_int *r, mp_int *a, mp_int *b) +{ + mp_add_masked_into(r->w, r->nw, a, b, ~(BignumInt)0, ~(BignumInt)0, 1); +} + +static void mp_cond_negate(mp_int *r, mp_int *x, unsigned yes) +{ + BignumCarry carry = yes; + BignumInt flip = -(BignumInt)yes; + for (size_t i = 0; i < r->nw; i++) { + BignumInt xword = mp_word(x, i); + xword ^= flip; + BignumADC(r->w[i], carry, 0, xword, carry); + } +} + +/* + * Similar to mp_add_masked_into, but takes a C integer instead of an + * mp_int as the masked operand. + */ +static BignumCarry mp_add_masked_integer_into( + BignumInt *w_out, size_t rw, mp_int *a, uintmax_t b, + BignumInt b_and, BignumInt b_xor, BignumCarry carry) +{ + for (size_t i = 0; i < rw; i++) { + BignumInt aword = mp_word(a, i); + size_t shift = i * BIGNUM_INT_BITS; + BignumInt bword = shift < BIGNUM_INT_BYTES ? b >> shift : 0; + BignumInt out; + bword = (bword ^ b_xor) & b_and; + BignumADC(out, carry, aword, bword, carry); + if (w_out) + w_out[i] = out; + } + return carry; +} + +void mp_add_integer_into(mp_int *r, mp_int *a, uintmax_t n) +{ + mp_add_masked_integer_into(r->w, r->nw, a, n, ~(BignumInt)0, 0, 0); +} + +void mp_sub_integer_into(mp_int *r, mp_int *a, uintmax_t n) +{ + mp_add_masked_integer_into(r->w, r->nw, a, n, + ~(BignumInt)0, ~(BignumInt)0, 1); +} + +/* + * Sets r to a + n << (word_index * BIGNUM_INT_BITS), treating + * word_index as secret data. + */ +static void mp_add_integer_into_shifted_by_words( + mp_int *r, mp_int *a, uintmax_t n, size_t word_index) +{ + unsigned indicator = 0; + BignumCarry carry = 0; + + for (size_t i = 0; i < r->nw; i++) { + /* indicator becomes 1 when we reach the index that the least + * significant bits of n want to be placed at, and it stays 1 + * thereafter. */ + indicator |= 1 ^ normalise_to_1(i ^ word_index); + + /* If indicator is 1, we add the low bits of n into r, and + * shift n down. If it's 0, we add zero bits into r, and + * leave n alone. */ + BignumInt bword = n & -(BignumInt)indicator; + uintmax_t new_n = (BIGNUM_INT_BITS < 64 ? n >> BIGNUM_INT_BITS : 0); + n ^= (n ^ new_n) & -(uintmax_t)indicator; + + BignumInt aword = mp_word(a, i); + BignumInt out; + BignumADC(out, carry, aword, bword, carry); + r->w[i] = out; + } +} + +void mp_mul_integer_into(mp_int *r, mp_int *a, uint16_t n) +{ + BignumInt carry = 0, mult = n; + for (size_t i = 0; i < r->nw; i++) { + BignumInt aword = mp_word(a, i); + BignumMULADD(carry, r->w[i], aword, mult, carry); + } + assert(!carry); +} + +void mp_cond_add_into(mp_int *r, mp_int *a, mp_int *b, unsigned yes) +{ + BignumInt mask = -(BignumInt)(yes & 1); + mp_add_masked_into(r->w, r->nw, a, b, mask, 0, 0); +} + +void mp_cond_sub_into(mp_int *r, mp_int *a, mp_int *b, unsigned yes) +{ + BignumInt mask = -(BignumInt)(yes & 1); + mp_add_masked_into(r->w, r->nw, a, b, mask, mask, 1 & mask); +} + +/* + * Ordered comparison between unsigned numbers is done by subtracting + * one from the other and looking at the output carry. + */ +unsigned mp_cmp_hs(mp_int *a, mp_int *b) +{ + size_t rw = size_t_max(a->nw, b->nw); + return mp_add_masked_into(NULL, rw, a, b, ~(BignumInt)0, ~(BignumInt)0, 1); +} + +unsigned mp_hs_integer(mp_int *x, uintmax_t n) +{ + BignumInt carry = 1; + for (size_t i = 0; i < x->nw; i++) { + size_t shift = i * BIGNUM_INT_BITS; + BignumInt nword = shift < BIGNUM_INT_BYTES ? n >> shift : 0; + BignumInt dummy_out; + BignumADC(dummy_out, carry, x->w[i], ~nword, carry); + (void)dummy_out; + } + return carry; +} + +/* + * Equality comparison is done by bitwise XOR of the input numbers, + * ORing together all the output words, and normalising the result + * using our careful normalise_to_1 helper function. + */ +unsigned mp_cmp_eq(mp_int *a, mp_int *b) +{ + BignumInt diff = 0; + for (size_t i = 0, limit = size_t_max(a->nw, b->nw); i < limit; i++) + diff |= mp_word(a, i) ^ mp_word(b, i); + return 1 ^ normalise_to_1(diff); /* return 1 if diff _is_ zero */ +} + +unsigned mp_eq_integer(mp_int *x, uintmax_t n) +{ + BignumInt diff = 0; + for (size_t i = 0; i < x->nw; i++) { + size_t shift = i * BIGNUM_INT_BITS; + BignumInt nword = shift < BIGNUM_INT_BYTES ? n >> shift : 0; + diff |= x->w[i] ^ nword; + } + return 1 ^ normalise_to_1(diff); /* return 1 if diff _is_ zero */ +} + +void mp_neg_into(mp_int *r, mp_int *a) +{ + mp_int zero; + zero.nw = 0; + mp_sub_into(r, &zero, a); +} + +mp_int *mp_add(mp_int *x, mp_int *y) +{ + mp_int *r = mp_make_sized(size_t_max(x->nw, y->nw) + 1); + mp_add_into(r, x, y); + return r; +} + +mp_int *mp_sub(mp_int *x, mp_int *y) +{ + mp_int *r = mp_make_sized(size_t_max(x->nw, y->nw)); + mp_sub_into(r, x, y); + return r; +} + +mp_int *mp_neg(mp_int *a) +{ + mp_int *r = mp_make_sized(a->nw); + mp_neg_into(r, a); + return r; +} + +/* + * Internal routine: multiply and accumulate in the trivial O(N^2) + * way. Sets r <- r + a*b. + */ +static void mp_mul_add_simple(mp_int *r, mp_int *a, mp_int *b) +{ + BignumInt *aend = a->w + a->nw, *bend = b->w + b->nw, *rend = r->w + r->nw; + + for (BignumInt *ap = a->w, *rp = r->w; + ap < aend && rp < rend; ap++, rp++) { + + BignumInt adata = *ap, carry = 0, *rq = rp; + + for (BignumInt *bp = b->w; bp < bend && rq < rend; bp++, rq++) { + BignumInt bdata = bp < bend ? *bp : 0; + BignumMULADD2(carry, *rq, adata, bdata, *rq, carry); + } + + for (; rq < rend; rq++) + BignumADC(*rq, carry, 0, *rq, carry); + } +} + +#ifndef KARATSUBA_THRESHOLD /* allow redefinition via -D for testing */ +#define KARATSUBA_THRESHOLD 50 +#endif + +static inline size_t mp_mul_scratchspace_unary(size_t n) +{ + /* + * Simplistic and overcautious bound on the amount of scratch + * space that the recursive multiply function will need. + * + * The rationale is: on the main Karatsuba branch of + * mp_mul_internal, which is the most space-intensive one, we + * allocate space for (a0+a1) and (b0+b1) (each just over half the + * input length n) and their product (the sum of those sizes, i.e. + * just over n itself). Then in order to actually compute the + * product, we do a recursive multiplication of size just over n. + * + * If all those 'just over' weren't there, and everything was + * _exactly_ half the length, you'd get the amount of space for a + * size-n multiply defined by the recurrence M(n) = 2n + M(n/2), + * which is satisfied by M(n) = 4n. But instead it's (2n plus a + * word or two) and M(n/2 plus a word or two). On the assumption + * that there's still some constant k such that M(n) <= kn, this + * gives us kn = 2n + w + k(n/2 + w), where w is a small constant + * (one or two words). That simplifies to kn/2 = 2n + (k+1)w, and + * since we don't even _start_ needing scratch space until n is at + * least 50, we can bound 2n + (k+1)w above by 3n, giving k=6. + * + * So I claim that 6n words of scratch space will suffice, and I + * check that by assertion at every stage of the recursion. + */ + return n * 6; +} + +static size_t mp_mul_scratchspace(size_t rw, size_t aw, size_t bw) +{ + size_t inlen = size_t_min(rw, size_t_max(aw, bw)); + return mp_mul_scratchspace_unary(inlen); +} + +static void mp_mul_internal(mp_int *r, mp_int *a, mp_int *b, mp_int scratch) +{ + size_t inlen = size_t_min(r->nw, size_t_max(a->nw, b->nw)); + assert(scratch.nw >= mp_mul_scratchspace_unary(inlen)); + + mp_clear(r); + + if (inlen < KARATSUBA_THRESHOLD || a->nw == 0 || b->nw == 0) { + /* + * The input numbers are too small to bother optimising. Go + * straight to the simple primitive approach. + */ + mp_mul_add_simple(r, a, b); + return; + } + + /* + * Karatsuba divide-and-conquer algorithm. We cut each input in + * half, so that it's expressed as two big 'digits' in a giant + * base D: + * + * a = a_1 D + a_0 + * b = b_1 D + b_0 + * + * Then the product is of course + * + * ab = a_1 b_1 D^2 + (a_1 b_0 + a_0 b_1) D + a_0 b_0 + * + * and we compute the three coefficients by recursively calling + * ourself to do half-length multiplications. + * + * The clever bit that makes this worth doing is that we only need + * _one_ half-length multiplication for the central coefficient + * rather than the two that it obviouly looks like, because we can + * use a single multiplication to compute + * + * (a_1 + a_0) (b_1 + b_0) = a_1 b_1 + a_1 b_0 + a_0 b_1 + a_0 b_0 + * + * and then we subtract the other two coefficients (a_1 b_1 and + * a_0 b_0) which we were computing anyway. + * + * Hence we get to multiply two numbers of length N in about three + * times as much work as it takes to multiply numbers of length + * N/2, which is obviously better than the four times as much work + * it would take if we just did a long conventional multiply. + */ + + /* Break up the input as botlen + toplen, with botlen >= toplen. + * The 'base' D is equal to 2^{botlen * BIGNUM_INT_BITS}. */ + size_t toplen = inlen / 2; + size_t botlen = inlen - toplen; + + /* Alias bignums that address the two halves of a,b, and useful + * pieces of r. */ + mp_int a0 = mp_make_alias(a, 0, botlen); + mp_int b0 = mp_make_alias(b, 0, botlen); + mp_int a1 = mp_make_alias(a, botlen, toplen); + mp_int b1 = mp_make_alias(b, botlen, toplen); + mp_int r0 = mp_make_alias(r, 0, botlen*2); + mp_int r1 = mp_make_alias(r, botlen, r->nw); + mp_int r2 = mp_make_alias(r, botlen*2, r->nw); + + /* Recurse to compute a0*b0 and a1*b1, in their correct positions + * in the output bignum. They can't overlap. */ + mp_mul_internal(&r0, &a0, &b0, scratch); + mp_mul_internal(&r2, &a1, &b1, scratch); + + if (r->nw < inlen*2) { + /* + * The output buffer isn't large enough to require the whole + * product, so some of a1*b1 won't have been stored. In that + * case we won't try to do the full Karatsuba optimisation; + * we'll just recurse again to compute a0*b1 and a1*b0 - or at + * least as much of them as the output buffer size requires - + * and add each one in. + */ + mp_int s = mp_alloc_from_scratch( + &scratch, size_t_min(botlen+toplen, r1.nw)); + + mp_mul_internal(&s, &a0, &b1, scratch); + mp_add_into(&r1, &r1, &s); + mp_mul_internal(&s, &a1, &b0, scratch); + mp_add_into(&r1, &r1, &s); + return; + } + + /* a0+a1 and b0+b1 */ + mp_int asum = mp_alloc_from_scratch(&scratch, botlen+1); + mp_int bsum = mp_alloc_from_scratch(&scratch, botlen+1); + mp_add_into(&asum, &a0, &a1); + mp_add_into(&bsum, &b0, &b1); + + /* Their product */ + mp_int product = mp_alloc_from_scratch(&scratch, botlen*2+1); + mp_mul_internal(&product, &asum, &bsum, scratch); + + /* Subtract off the outer terms we already have */ + mp_sub_into(&product, &product, &r0); + mp_sub_into(&product, &product, &r2); + + /* And add it in with the right offset. */ + mp_add_into(&r1, &r1, &product); +} + +void mp_mul_into(mp_int *r, mp_int *a, mp_int *b) +{ + mp_int *scratch = mp_make_sized(mp_mul_scratchspace(r->nw, a->nw, b->nw)); + mp_mul_internal(r, a, b, *scratch); + mp_free(scratch); +} + +mp_int *mp_mul(mp_int *x, mp_int *y) +{ + mp_int *r = mp_make_sized(x->nw + y->nw); + mp_mul_into(r, x, y); + return r; +} + +void mp_lshift_fixed_into(mp_int *r, mp_int *a, size_t bits) +{ + size_t words = bits / BIGNUM_INT_BITS; + size_t bitoff = bits % BIGNUM_INT_BITS; + + for (size_t i = 0; i < r->nw; i++) { + if (i < words) { + r->w[i] = 0; + } else { + r->w[i] = mp_word(a, i - words); + if (bitoff != 0) { + r->w[i] <<= bitoff; + if (i > words) + r->w[i] |= mp_word(a, i - words - 1) >> + (BIGNUM_INT_BITS - bitoff); + } + } + } +} + +void mp_rshift_fixed_into(mp_int *r, mp_int *a, size_t bits) +{ + size_t words = bits / BIGNUM_INT_BITS; + size_t bitoff = bits % BIGNUM_INT_BITS; + + for (size_t i = 0; i < r->nw; i++) { + r->w[i] = mp_word(a, i + words); + if (bitoff != 0) { + r->w[i] >>= bitoff; + r->w[i] |= mp_word(a, i + words + 1) << (BIGNUM_INT_BITS - bitoff); + } + } +} + +mp_int *mp_rshift_fixed(mp_int *x, size_t bits) +{ + size_t words = bits / BIGNUM_INT_BITS; + mp_int *r = mp_make_sized(x->nw - size_t_min(x->nw, words)); + mp_rshift_fixed_into(r, x, bits); + return r; +} + +/* + * Safe right shift is done using the same technique as + * trim_leading_zeroes above: you make an n-word left shift by + * composing an appropriate subset of power-of-2-sized shifts, so it + * takes log_2(n) loop iterations each of which does a different shift + * by a power of 2 words, using the usual bit twiddling to make the + * whole shift conditional on the appropriate bit of n. + */ +mp_int *mp_rshift_safe(mp_int *x, size_t bits) +{ + size_t wordshift = bits / BIGNUM_INT_BITS; + size_t bitshift = bits % BIGNUM_INT_BITS; + + mp_int *r = mp_copy(x); + + unsigned clear = (r->nw - wordshift) >> (CHAR_BIT * sizeof(size_t) - 1); + mp_cond_clear(r, clear); + + for (unsigned bit = 0; r->nw >> bit; bit++) { + size_t word_offset = 1 << bit; + BignumInt mask = -(BignumInt)((wordshift >> bit) & 1); + for (size_t i = 0; i < r->nw; i++) { + BignumInt w = mp_word(r, i + word_offset); + r->w[i] ^= (r->w[i] ^ w) & mask; + } + } + + /* + * That's done the shifting by words; now we do the shifting by + * bits. + * + * I assume here that register-controlled right shifts are + * time-constant. If they're not, I could replace this with + * another loop over bit positions. + */ + size_t upshift = BIGNUM_INT_BITS - bitshift; + size_t no_shift = (upshift >> BIGNUM_INT_BITS_BITS); + upshift &= ~-(size_t)no_shift; + BignumInt upshifted_mask = ~-(BignumInt)no_shift; + + for (size_t i = 0; i < r->nw; i++) { + r->w[i] = (r->w[i] >> bitshift) | + ((mp_word(r, i+1) << upshift) & upshifted_mask); + } + + return r; +} + +void mp_reduce_mod_2to(mp_int *x, size_t p) +{ + size_t word = p / BIGNUM_INT_BITS; + size_t mask = ((size_t)1 << (p % BIGNUM_INT_BITS)) - 1; + for (; word < x->nw; word++) { + x->w[word] &= mask; + mask = -(size_t)1; + } +} + +/* + * Inverse mod 2^n is computed by an iterative technique which doubles + * the number of bits at each step. + */ +mp_int *mp_invert_mod_2to(mp_int *x, size_t p) +{ + /* Input checks: x must be coprime to the modulus, i.e. odd, and p + * can't be zero */ + assert(x->nw > 0); + assert(x->w[0] & 1); + assert(p > 0); + + size_t rw = (p + BIGNUM_INT_BITS - 1) / BIGNUM_INT_BITS; + mp_int *r = mp_make_sized(rw); + + size_t mul_scratchsize = mp_mul_scratchspace(2*rw, rw, rw); + mp_int *scratch_orig = mp_make_sized(6 * rw + mul_scratchsize); + mp_int scratch_per_iter = *scratch_orig; + mp_int mul_scratch = mp_alloc_from_scratch( + &scratch_per_iter, mul_scratchsize); + + r->w[0] = 1; + + for (size_t b = 1; b < p; b <<= 1) { + /* + * In each step of this iteration, we have the inverse of x + * mod 2^b, and we want the inverse of x mod 2^{2b}. + * + * Write B = 2^b for convenience, so we want x^{-1} mod B^2. + * Let x = x_0 + B x_1 + k B^2, with 0 <= x_0,x_1 < B. + * + * We want to find r_0 and r_1 such that + * (r_1 B + r_0) (x_1 B + x_0) == 1 (mod B^2) + * + * To begin with, we know r_0 must be the inverse mod B of + * x_0, i.e. of x, i.e. it is the inverse we computed in the + * previous iteration. So now all we need is r_1. + * + * Multiplying out, neglecting multiples of B^2, and writing + * x_0 r_0 = K B + 1, we have + * + * r_1 x_0 B + r_0 x_1 B + K B == 0 (mod B^2) + * => r_1 x_0 B == - r_0 x_1 B - K B (mod B^2) + * => r_1 x_0 == - r_0 x_1 - K (mod B) + * => r_1 == r_0 (- r_0 x_1 - K) (mod B) + * + * (the last step because we multiply through by the inverse + * of x_0, which we already know is r_0). + */ + + mp_int scratch_this_iter = scratch_per_iter; + size_t Bw = (b + BIGNUM_INT_BITS - 1) / BIGNUM_INT_BITS; + size_t B2w = (2*b + BIGNUM_INT_BITS - 1) / BIGNUM_INT_BITS; + + /* Start by finding K: multiply x_0 by r_0, and shift down. */ + mp_int x0 = mp_alloc_from_scratch(&scratch_this_iter, Bw); + mp_copy_into(&x0, x); + mp_reduce_mod_2to(&x0, b); + mp_int r0 = mp_make_alias(r, 0, Bw); + mp_int Kshift = mp_alloc_from_scratch(&scratch_this_iter, B2w); + mp_mul_internal(&Kshift, &x0, &r0, mul_scratch); + mp_int K = mp_alloc_from_scratch(&scratch_this_iter, Bw); + mp_rshift_fixed_into(&K, &Kshift, b); + + /* Now compute the product r_0 x_1, reusing the space of Kshift. */ + mp_int x1 = mp_alloc_from_scratch(&scratch_this_iter, Bw); + mp_rshift_fixed_into(&x1, x, b); + mp_reduce_mod_2to(&x1, b); + mp_int r0x1 = mp_make_alias(&Kshift, 0, Bw); + mp_mul_internal(&r0x1, &r0, &x1, mul_scratch); + + /* Add K to that. */ + mp_add_into(&r0x1, &r0x1, &K); + + /* Negate it. */ + mp_neg_into(&r0x1, &r0x1); + + /* Multiply by r_0. */ + mp_int r1 = mp_alloc_from_scratch(&scratch_this_iter, Bw); + mp_mul_internal(&r1, &r0, &r0x1, mul_scratch); + mp_reduce_mod_2to(&r1, b); + + /* That's our r_1, so add it on to r_0 to get the full inverse + * output from this iteration. */ + mp_lshift_fixed_into(&K, &r1, (b % BIGNUM_INT_BITS)); + size_t Bpos = b / BIGNUM_INT_BITS; + mp_int r1_position = mp_make_alias(r, Bpos, B2w-Bpos); + mp_add_into(&r1_position, &r1_position, &K); + } + + /* Finally, reduce mod the precise desired number of bits. */ + mp_reduce_mod_2to(r, p); + + mp_free(scratch_orig); + return r; +} + +static size_t monty_scratch_size(MontyContext *mc) +{ + return 3*mc->rw + mc->pw + mp_mul_scratchspace(mc->pw, mc->rw, mc->rw); +} + +MontyContext *monty_new(mp_int *modulus) +{ + MontyContext *mc = snew(MontyContext); + + mc->rw = modulus->nw; + mc->rbits = mc->rw * BIGNUM_INT_BITS; + mc->pw = mc->rw * 2 + 1; + + mc->m = mp_copy(modulus); + + mc->minus_minv_mod_r = mp_invert_mod_2to(mc->m, mc->rbits); + mp_neg_into(mc->minus_minv_mod_r, mc->minus_minv_mod_r); + + mp_int *r = mp_make_sized(mc->rw + 1); + r->w[mc->rw] = 1; + mc->powers_of_r_mod_m[0] = mp_mod(r, mc->m); + mp_free(r); + + for (size_t j = 1; j < lenof(mc->powers_of_r_mod_m); j++) + mc->powers_of_r_mod_m[j] = mp_modmul( + mc->powers_of_r_mod_m[0], mc->powers_of_r_mod_m[j-1], mc->m); + + mc->scratch = mp_make_sized(monty_scratch_size(mc)); + + return mc; +} + +MontyContext *monty_copy(MontyContext *orig) +{ + MontyContext *mc = snew(MontyContext); + + mc->rw = orig->rw; + mc->pw = orig->pw; + mc->rbits = orig->rbits; + mc->m = mp_copy(orig->m); + mc->minus_minv_mod_r = mp_copy(orig->minus_minv_mod_r); + for (size_t j = 0; j < 3; j++) + mc->powers_of_r_mod_m[j] = mp_copy(orig->powers_of_r_mod_m[j]); + mc->scratch = mp_make_sized(monty_scratch_size(mc)); + return mc; +} + +void monty_free(MontyContext *mc) +{ + mp_free(mc->m); + for (size_t j = 0; j < 3; j++) + mp_free(mc->powers_of_r_mod_m[j]); + mp_free(mc->minus_minv_mod_r); + mp_free(mc->scratch); + smemclr(mc, sizeof(*mc)); + sfree(mc); +} + +/* + * The main Montgomery reduction step. + */ +static mp_int monty_reduce_internal(MontyContext *mc, mp_int *x, mp_int scratch) +{ + /* + * The trick with Montgomery reduction is that on the one hand we + * want to reduce the size of the input by a factor of about r, + * and on the other hand, the two numbers we just multiplied were + * both stored with an extra factor of r multiplied in. So we + * computed ar*br = ab r^2, but we want to return abr, so we need + * to divide by r - and if we can do that by _actually dividing_ + * by r then this also reduces the size of the number. + * + * But we can only do that if the number we're dividing by r is a + * multiple of r. So first we must add an adjustment to it which + * clears its bottom 'rbits' bits. That adjustment must be a + * multiple of m in order to leave the residue mod n unchanged, so + * the question is, what multiple of m can we add to x to make it + * congruent to 0 mod r? And the answer is, x * (-m)^{-1} mod r. + */ + + /* x mod r */ + mp_int x_lo = mp_make_alias(x, 0, mc->rbits); + + /* x * (-m)^{-1}, i.e. the number we want to multiply by m */ + mp_int k = mp_alloc_from_scratch(&scratch, mc->rw); + mp_mul_internal(&k, &x_lo, mc->minus_minv_mod_r, scratch); + + /* m times that, i.e. the number we want to add to x */ + mp_int mk = mp_alloc_from_scratch(&scratch, mc->pw); + mp_mul_internal(&mk, mc->m, &k, scratch); + + /* Add it to x */ + mp_add_into(&mk, x, &mk); + + /* Reduce mod r, by simply making an alias to the upper words of x */ + mp_int toret = mp_make_alias(&mk, mc->rw, mk.nw - mc->rw); + + /* + * We'll generally be doing this after a multiplication of two + * fully reduced values. So our input could be anything up to m^2, + * and then we added up to rm to it. Hence, the maximum value is + * rm+m^2, and after dividing by r, that becomes r + m(m/r) < 2r. + * So a single trial-subtraction will finish reducing to the + * interval [0,m). + */ + mp_cond_sub_into(&toret, &toret, mc->m, mp_cmp_hs(&toret, mc->m)); + return toret; +} + +void monty_mul_into(MontyContext *mc, mp_int *r, mp_int *x, mp_int *y) +{ + assert(x->nw <= mc->rw); + assert(y->nw <= mc->rw); + + mp_int scratch = *mc->scratch; + mp_int tmp = mp_alloc_from_scratch(&scratch, 2*mc->rw); + mp_mul_into(&tmp, x, y); + mp_int reduced = monty_reduce_internal(mc, &tmp, scratch); + mp_copy_into(r, &reduced); + mp_clear(mc->scratch); +} + +mp_int *monty_mul(MontyContext *mc, mp_int *x, mp_int *y) +{ + mp_int *toret = mp_make_sized(mc->rw); + monty_mul_into(mc, toret, x, y); + return toret; +} + +mp_int *monty_modulus(MontyContext *mc) +{ + return mc->m; +} + +mp_int *monty_identity(MontyContext *mc) +{ + return mc->powers_of_r_mod_m[0]; +} + +mp_int *monty_invert(MontyContext *mc, mp_int *x) +{ + /* Given xr, we want to return x^{-1}r = (xr)^{-1} r^2 = + * monty_reduce((xr)^{-1} r^3) */ + mp_int *tmp = mp_invert(x, mc->m); + mp_int *toret = monty_mul(mc, tmp, mc->powers_of_r_mod_m[2]); + mp_free(tmp); + return toret; +} + +/* + * Importing a number into Montgomery representation involves + * multiplying it by r and reducing mod m. We could do this using the + * straightforward mp_modmul, but since we have the machinery to avoid + * division, why don't we use it? If we multiply the number not by r + * itself, but by the residue of r^2 mod m, then we can do an actual + * Montgomery reduction to reduce the result and remove the extra + * factor of r. + */ +void monty_import_into(MontyContext *mc, mp_int *r, mp_int *x) +{ + monty_mul_into(mc, r, x, mc->powers_of_r_mod_m[1]); +} + +mp_int *monty_import(MontyContext *mc, mp_int *x) +{ + return monty_mul(mc, x, mc->powers_of_r_mod_m[1]); +} + +/* + * Exporting a number means multiplying it by r^{-1}, which is exactly + * what monty_reduce does anyway, so we just do that. + */ +void monty_export_into(MontyContext *mc, mp_int *r, mp_int *x) +{ + assert(x->nw <= 2*mc->rw); + mp_int reduced = monty_reduce_internal(mc, x, *mc->scratch); + mp_copy_into(r, &reduced); + mp_clear(mc->scratch); +} + +mp_int *monty_export(MontyContext *mc, mp_int *x) +{ + mp_int *toret = mp_make_sized(mc->rw); + monty_export_into(mc, toret, x); + return toret; +} + +static void monty_reduce(MontyContext *mc, mp_int *x) +{ + mp_int reduced = monty_reduce_internal(mc, x, *mc->scratch); + mp_copy_into(x, &reduced); + mp_clear(mc->scratch); +} + +mp_int *monty_pow(MontyContext *mc, mp_int *base, mp_int *exponent) +{ + /* square builds up powers of the form base^{2^i}. */ + mp_int *square = mp_copy(base); + size_t i = 0; + + /* out accumulates the output value. Starts at 1 (in Montgomery + * representation) and we multiply in each base^{2^i}. */ + mp_int *out = mp_copy(mc->powers_of_r_mod_m[0]); + + /* tmp holds each product we compute and reduce. */ + mp_int *tmp = mp_make_sized(mc->rw * 2); + + while (true) { + mp_mul_into(tmp, out, square); + monty_reduce(mc, tmp); + mp_select_into(out, out, tmp, mp_get_bit(exponent, i)); + + if (++i >= exponent->nw * BIGNUM_INT_BITS) + break; + + mp_mul_into(tmp, square, square); + monty_reduce(mc, tmp); + mp_copy_into(square, tmp); + } + + mp_free(square); + mp_free(tmp); + mp_clear(mc->scratch); + return out; +} + +mp_int *mp_modpow(mp_int *base, mp_int *exponent, mp_int *modulus) +{ + assert(base->nw <= modulus->nw); + assert(modulus->nw > 0); + assert(modulus->w[0] & 1); + + MontyContext *mc = monty_new(modulus); + mp_int *m_base = monty_import(mc, base); + mp_int *m_out = monty_pow(mc, m_base, exponent); + mp_int *out = monty_export(mc, m_out); + mp_free(m_base); + mp_free(m_out); + monty_free(mc); + return out; +} + +/* + * Given two coprime nonzero input integers a,b, returns two integers + * A,B such that A*a - B*b = 1. A,B will be the minimal non-negative + * pair satisfying that criterion, which is equivalent to saying that + * 0<=Ab, and gcd(a,b) = + * gcd(b,(a-b)/2). + * + * For this application, I always expect the actual gcd to be coprime, + * so we can rule out the 'both even' initial case. For simplicity + * I've changed the 'both odd' case to turn (a,b) into (b,a-b) without + * the division by 2 (the next iteration would divide by 2 anyway). + * + * But the big change is that we need the Bezout coefficients as + * output, not just the gcd. So we need to know how to generate those + * in each case, based on the coefficients from the reduced pair of + * numbers: + * + * - If a,b are both odd, and u,v are such that u*b + v*(a-b) = 1, + * then v*a + (u-v)*b = 1. + * + * - If a is even, and u,v are such that u*(a/2) + v*b = 1: + * + if u is also even, then this is just (u/2)*a + v*b = 1 + * + otherwise, (u+b)*(a/2) + (v-a/2)*b is also equal to 1, and + * since u and b are both odd, (u+b)/2 is an integer, so we have + * ((u+b)/2)*a + (v-a/2)*b = 1. + * + * The code below transforms this from a recursive to an iterative + * algorithm. We first reduce a,b to 0,1, recording at each stage + * whether one of them was even, and whether we had to swap them; then + * we iterate backwards over that record of what we did, applying the + * above rules for building up the Bezout coefficients as we go. Of + * course, all the case analysis is done by the usual bit-twiddling + * conditionalisation to avoid data-dependent control flow. + * + * Also, since these mp_ints are generally treated as unsigned, we + * store the coefficients by absolute value, with the semantics that + * they always have opposite sign, and in the unwinding loop we keep a + * bit indicating whether Aa-Bb is currently expected to be +1 or -1, + * so that we can do one final conditional adjustment if it's -1. + * + * Once the reduction rules have managed to reduce the input numbers + * to (0,1), then they are stable (the next reduction will always + * divide the even one by 2, which maps 0 to 0). So it doesn't matter + * if we do more steps of the algorithm than necessary; hence, for + * constant time, we just need to find the maximum number we could + * _possibly_ require, and do that many. + * + * If a,b < 2^n, at most 3n iterations are required. Proof: consider + * the quantity Q = log_2(min(a,b)) + 2 log_2(max(a,b)). + * - If the smaller number is even, then the next iteration halves + * it, decreasing Q by 1. + * - If the larger number is even, then the next iteration halves + * it, decreasing Q by 2. + * - If the two numbers are both odd, then the combined effect of the + * next two steps will be to replace the larger number with + * something less than half its original value. + * In any of these cases, the effect is that in k steps (where k = 1 + * or 2 depending on the case) Q decreases by at least k. So on + * average it decreases by at least 1 per step, and since it starts + * off at 3n, that's how many steps it might take. + * + * The worst case inputs (I think) are where x=2^{n-1} and y=2^n-1 + * (i.e. x is a power of 2 and y is all 1s). In that situation, the + * first n-1 steps repeatedly halve x until it's 1, and then there are + * n pairs of steps each of which subtracts 1 from y and then halves + * it. + */ +static void mp_bezout_into(mp_int *a_coeff_out, mp_int *b_coeff_out, + mp_int *a_in, mp_int *b_in) +{ + size_t nw = size_t_max(1, size_t_max(a_in->nw, b_in->nw)); + + /* Make mutable copies of the input numbers */ + mp_int *a = mp_make_sized(nw), *b = mp_make_sized(nw); + mp_copy_into(a, a_in); + mp_copy_into(b, b_in); + + /* Space to build up the output coefficients, with an extra word + * so that intermediate values can overflow off the top and still + * right-shift back down to the correct value */ + mp_int *ac = mp_make_sized(nw + 1), *bc = mp_make_sized(nw + 1); + + /* And a general-purpose temp register */ + mp_int *tmp = mp_make_sized(nw); + + /* Space to record the sequence of reduction steps to unwind. We + * make it a BignumInt for no particular reason except that (a) + * mp_make_sized conveniently zeroes the allocation and mp_free + * wipes it, and (b) this way I can use mp_dump() if I have to + * debug this code. */ + size_t steps = 3 * nw * BIGNUM_INT_BITS; + mp_int *record = mp_make_sized( + (steps*2 + BIGNUM_INT_BITS - 1) / BIGNUM_INT_BITS); + + for (size_t step = 0; step < steps; step++) { + /* + * If a and b are both odd, we want to sort them so that a is + * larger. But if one is even, we want to sort them so that a + * is the even one. + */ + unsigned swap_if_both_odd = mp_cmp_hs(b, a); + unsigned swap_if_one_even = a->w[0] & 1; + unsigned both_odd = a->w[0] & b->w[0] & 1; + unsigned swap = swap_if_one_even ^ ( + (swap_if_both_odd ^ swap_if_one_even) & both_odd); + + mp_cond_swap(a, b, swap); + + /* + * Now, if we've made a the even number, divide it by two; if + * we've made it the larger of two odd numbers, subtract the + * smaller one from it. + */ + mp_rshift_fixed_into(tmp, a, 1); + mp_sub_into(a, a, b); + mp_select_into(a, tmp, a, both_odd); + + /* + * Record the two 1-bit values both_odd and swap. + */ + mp_set_bit(record, step*2, both_odd); + mp_set_bit(record, step*2+1, swap); + } + + /* + * Now we expect to have reduced the two numbers to 0 and 1, + * although we don't know which way round. (But we avoid checking + * this by assertion; sometimes we'll need to do this computation + * without giving away that we already know the inputs were bogus. + * So we'd prefer to just press on and return nonsense.) + */ + + /* + * So their Bezout coefficients at this point are simply + * themselves. + */ + mp_copy_into(ac, a); + mp_copy_into(bc, b); + + /* + * We'll maintain the invariant as we unwind that ac * a - bc * b + * is either +1 or -1, and we'll remember which. (We _could_ keep + * it at +1 the whole time, but it would cost more work every time + * round the loop, so it's cheaper to fix that up once at the + * end.) + * + * Initially, the result is +1 if a was the nonzero value after + * reduction, and -1 if b was. + */ + unsigned minus_one = b->w[0]; + + for (size_t step = steps; step-- > 0 ;) { + /* + * Recover the data from the step we're unwinding. + */ + unsigned both_odd = mp_get_bit(record, step*2); + unsigned swap = mp_get_bit(record, step*2+1); + + /* + * If this was a division step (!both_odd), and our + * coefficient of a is not the even one, we need to adjust the + * coefficients by +b and +a respectively. + */ + unsigned adjust = (ac->w[0] & 1) & ~both_odd; + mp_cond_add_into(ac, ac, b, adjust); + mp_cond_add_into(bc, bc, a, adjust); + + /* + * Now, if it was a division step, then ac is even, and we + * divide it by two. + */ + mp_rshift_fixed_into(tmp, ac, 1); + mp_select_into(ac, tmp, ac, both_odd); + + /* + * But if it was a subtraction step, we add ac to bc instead. + */ + mp_cond_add_into(bc, bc, ac, both_odd); + + /* + * Undo the transformation of the input numbers, by adding b + * to a (if both_odd) or multiplying a by 2 (otherwise). + */ + mp_lshift_fixed_into(tmp, a, 1); + mp_add_into(a, a, b); + mp_select_into(a, tmp, a, both_odd); + + /* + * Finally, undo the swap. If we do swap, this also reverses + * the sign of the current result ac*a+bc*b. + */ + mp_cond_swap(a, b, swap); + mp_cond_swap(ac, bc, swap); + minus_one ^= swap; + } + + /* + * Now we expect to have recovered the input a,b. + */ + assert(mp_cmp_eq(a, a_in) & mp_cmp_eq(b, b_in)); + + /* + * But we might find that our current result is -1 instead of +1, + * that is, we have A',B' such that A'a - B'b = -1. + * + * In that situation, we set A = b-A' and B = a-B', giving us + * Aa-Bb = ab - A'a - ab + B'b = +1. + */ + mp_sub_into(tmp, b, ac); + mp_select_into(ac, ac, tmp, minus_one); + mp_sub_into(tmp, a, bc); + mp_select_into(bc, bc, tmp, minus_one); + + /* + * Now we really are done. Return the outputs. + */ + if (a_coeff_out) + mp_copy_into(a_coeff_out, ac); + if (b_coeff_out) + mp_copy_into(b_coeff_out, bc); + + mp_free(a); + mp_free(b); + mp_free(ac); + mp_free(bc); + mp_free(tmp); + mp_free(record); +} + +mp_int *mp_invert(mp_int *x, mp_int *m) +{ + mp_int *result = mp_make_sized(m->nw); + mp_bezout_into(result, NULL, x, m); + return result; +} + +static uint32_t recip_approx_32(uint32_t x) +{ + /* + * Given an input x in [2^31,2^32), i.e. a uint32_t with its high + * bit set, this function returns an approximation to 2^63/x, + * computed using only multiplications and bit shifts just in case + * the C divide operator has non-constant time (either because the + * underlying machine instruction does, or because the operator + * expands to a library function on a CPU without hardware + * division). + * + * The coefficients are derived from those of the degree-9 + * polynomial which is the minimax-optimal approximation to that + * function on the given interval (generated using the Remez + * algorithm), converted into integer arithmetic with shifts used + * to maximise the number of significant bits at every state. (A + * sort of 'static floating point' - the exponent is statically + * known at every point in the code, so it never needs to be + * stored at run time or to influence runtime decisions.) + * + * Exhaustive iteration over the whole input space shows the + * largest possible error to be 1686.54. (The input value + * attaining that bound is 4226800006 == 0xfbefd986, whose true + * reciprocal is 2182116973.540... == 0x8210766d.8a6..., whereas + * this function returns 2182115287 == 0x82106fd7.) + */ + uint64_t r = 0x92db03d6ULL; + r = 0xf63e71eaULL - ((r*x) >> 34); + r = 0xb63721e8ULL - ((r*x) >> 34); + r = 0x9c2da00eULL - ((r*x) >> 33); + r = 0xaada0bb8ULL - ((r*x) >> 32); + r = 0xf75cd403ULL - ((r*x) >> 31); + r = 0xecf97a41ULL - ((r*x) >> 31); + r = 0x90d876cdULL - ((r*x) >> 31); + r = 0x6682799a0ULL - ((r*x) >> 26); + return r; +} + +void mp_divmod_into(mp_int *n, mp_int *d, mp_int *q_out, mp_int *r_out) +{ + assert(!mp_eq_integer(d, 0)); + + /* + * We do division by using Newton-Raphson iteration to converge to + * the reciprocal of d (or rather, R/d for R a sufficiently large + * power of 2); then we multiply that reciprocal by n; and we + * finish up with conditional subtraction. + * + * But we have to do it in a fixed number of N-R iterations, so we + * need some error analysis to know how many we might need. + * + * The iteration is derived by defining f(r) = d - R/r. + * Differentiating gives f'(r) = R/r^2, and the Newton-Raphson + * formula applied to those functions gives + * + * r_{i+1} = r_i - f(r_i) / f'(r_i) + * = r_i - (d - R/r_i) r_i^2 / R + * = r_i (2 R - d r_i) / R + * + * Now let e_i be the error in a given iteration, in the sense + * that + * + * d r_i = R + e_i + * i.e. e_i/R = (r_i - r_true) / r_true + * + * so e_i is the _relative_ error in r_i. + * + * We must also introduce a rounding-error term, because the + * division by R always gives an integer. This might make the + * output off by up to 1 (in the negative direction, because + * right-shifting gives floor of the true quotient). So when we + * divide by R, we must imagine adding some f in [0,1). Then we + * have + * + * d r_{i+1} = d r_i (2 R - d r_i) / R - d f + * = (R + e_i) (R - e_i) / R - d f + * = (R^2 - e_i^2) / R - d f + * = R - (e_i^2 / R + d f) + * => e_{i+1} = - (e_i^2 / R + d f) + * + * The sum of two positive quantities is bounded above by twice + * their max, and max |f| = 1, so we can bound this as follows: + * + * |e_{i+1}| <= 2 max (e_i^2/R, d) + * |e_{i+1}/R| <= 2 max ((e_i/R)^2, d/R) + * log2 |R/e_{i+1}| <= min (2 log2 |R/e_i|, log2 |R/d|) - 1 + * + * which tells us that the number of 'good' bits - i.e. + * log2(R/e_i) - very nearly doubles at every iteration (apart + * from that subtraction of 1), until it gets to the same size as + * log2(R/d). In other words, the size of R in bits has to be the + * size of denominator we're putting in, _plus_ the amount of + * precision we want to get back out. + * + * So when we multiply n (the input numerator) by our final + * reciprocal approximation r, but actually r differs from R/d by + * up to 2, then it follows that + * + * n/d - nr/R = n/d - [ n (R/d + e) ] / R + * = n/d - [ (n/d) R + n e ] / R + * = -ne/R + * => 0 <= n/d - nr/R < 2n/R + * + * so our computed quotient can differ from the true n/d by up to + * 2n/R. Hence, as long as we also choose R large enough that 2n/R + * is bounded above by a constant, we can guarantee a bounded + * number of final conditional-subtraction steps. + */ + + /* + * Get at least 32 of the most significant bits of the input + * number. + */ + size_t hiword_index = 0; + uint64_t hibits = 0, lobits = 0; + mp_find_highest_nonzero_word_pair(d, 64 - BIGNUM_INT_BITS, + &hiword_index, &hibits, &lobits); + + /* + * Make a shifted combination of those two words which puts the + * topmost bit of the number at bit 63. + */ + size_t shift_up = 0; + for (size_t i = BIGNUM_INT_BITS_BITS; i-- > 0;) { + size_t sl = 1 << i; /* left shift count */ + size_t sr = BIGNUM_INT_BITS - sl; /* complementary right-shift count */ + + /* Should we shift up? */ + unsigned indicator = 1 ^ normalise_to_1(hibits >> sr); + + /* If we do, what will we get? */ + uint64_t new_hibits = (hibits << sl) | (lobits >> sr); + uint64_t new_lobits = lobits << sl; + size_t new_shift_up = shift_up + sl; + + /* Conditionally swap those values in. */ + hibits ^= (hibits ^ new_hibits ) & -(BignumInt)indicator; + lobits ^= (lobits ^ new_lobits ) & -(BignumInt)indicator; + shift_up ^= (shift_up ^ new_shift_up ) & -(size_t) indicator; + } + + /* + * So now we know the most significant 32 bits of d are at the top + * of hibits. Approximate the reciprocal of those bits. + */ + lobits = (uint64_t)recip_approx_32(hibits >> 32) << 32; + hibits = 0; + + /* + * And shift that up by as many bits as the input was shifted up + * just now, so that the product of this approximation and the + * actual input will be close to a fixed power of two regardless + * of where the MSB was. + * + * I do this in another log n individual passes, not so much + * because I'm worried about the time-invariance of the CPU's + * register-controlled shift operation, but in case the compiler + * code-generates uint64_t shifts out of a variable number of + * smaller-word shift instructions, e.g. by splitting up into + * cases. + */ + for (size_t i = BIGNUM_INT_BITS_BITS; i-- > 0;) { + size_t sl = 1 << i; /* left shift count */ + size_t sr = BIGNUM_INT_BITS - sl; /* complementary right-shift count */ + + /* Should we shift up? */ + unsigned indicator = 1 & (shift_up >> i); + + /* If we do, what will we get? */ + uint64_t new_hibits = (hibits << sl) | (lobits >> sr); + uint64_t new_lobits = lobits << sl; + + /* Conditionally swap those values in. */ + hibits ^= (hibits ^ new_hibits ) & -(BignumInt)indicator; + lobits ^= (lobits ^ new_lobits ) & -(BignumInt)indicator; + } + + /* + * The product of the 128-bit value now in hibits:lobits with the + * 128-bit value we originally retrieved in the same variables + * will be in the vicinity of 2^191. So we'll take log2(R) to be + * 191, plus a multiple of BIGNUM_INT_BITS large enough to allow R + * to hold the combined sizes of n and d. + */ + size_t log2_R; + { + size_t max_log2_n = (n->nw + d->nw) * BIGNUM_INT_BITS; + log2_R = max_log2_n + 3; + log2_R -= size_t_min(191, log2_R); + log2_R = (log2_R + BIGNUM_INT_BITS - 1) & ~(BIGNUM_INT_BITS - 1); + log2_R += 191; + } + + /* Number of words in a bignum capable of holding numbers the size + * of twice R. */ + size_t rw = ((log2_R+2) + BIGNUM_INT_BITS - 1) / BIGNUM_INT_BITS; + + /* + * Now construct our full-sized starting reciprocal approximation. + */ + mp_int *r_approx = mp_make_sized(rw); + size_t output_bit_index; + { + /* Where in the input number did the input 128-bit value come from? */ + size_t input_bit_index = + (hiword_index * BIGNUM_INT_BITS) - (128 - BIGNUM_INT_BITS); + + /* So how far do we need to shift our 64-bit output, if the + * product of those two fixed-size values is 2^191 and we want + * to make it 2^log2_R instead? */ + output_bit_index = log2_R - 191 - input_bit_index; + + /* If we've done all that right, it should be a whole number + * of words. */ + assert(output_bit_index % BIGNUM_INT_BITS == 0); + size_t output_word_index = output_bit_index / BIGNUM_INT_BITS; + + mp_add_integer_into_shifted_by_words( + r_approx, r_approx, lobits, output_word_index); + mp_add_integer_into_shifted_by_words( + r_approx, r_approx, hibits, + output_word_index + 64 / BIGNUM_INT_BITS); + } + + /* + * Make the constant 2*R, which we'll need in the iteration. + */ + mp_int *two_R = mp_make_sized(rw); + mp_add_integer_into_shifted_by_words( + two_R, two_R, (BignumInt)1 << ((log2_R+1) % BIGNUM_INT_BITS), + (log2_R+1) / BIGNUM_INT_BITS); + + /* + * Scratch space. + */ + mp_int *dr = mp_make_sized(rw + d->nw); + mp_int *diff = mp_make_sized(size_t_max(rw, dr->nw)); + mp_int *product = mp_make_sized(rw + diff->nw); + size_t scratchsize = size_t_max( + mp_mul_scratchspace(dr->nw, r_approx->nw, d->nw), + mp_mul_scratchspace(product->nw, r_approx->nw, diff->nw)); + mp_int *scratch = mp_make_sized(scratchsize); + mp_int product_shifted = mp_make_alias( + product, log2_R / BIGNUM_INT_BITS, product->nw); + + /* + * Initial error estimate: the 32-bit output of recip_approx_32 + * differs by less than 2048 (== 2^11) from the true top 32 bits + * of the reciprocal, so the relative error is at most 2^11 + * divided by the 32-bit reciprocal, which at worst is 2^11/2^31 = + * 2^-20. So even in the worst case, we have 20 good bits of + * reciprocal to start with. + */ + size_t good_bits = 31 - 11; + size_t good_bits_needed = BIGNUM_INT_BITS * n->nw + 4; /* add a few */ + + /* + * Now do Newton-Raphson iterations until we have reason to think + * they're not converging any more. + */ + while (good_bits < good_bits_needed) { + /* + * Compute the next iterate. + */ + mp_mul_internal(dr, r_approx, d, *scratch); + mp_sub_into(diff, two_R, dr); + mp_mul_internal(product, r_approx, diff, *scratch); + mp_rshift_fixed_into(r_approx, &product_shifted, + log2_R % BIGNUM_INT_BITS); + + /* + * Adjust the error estimate. + */ + good_bits = good_bits * 2 - 1; + } + + mp_free(dr); + mp_free(diff); + mp_free(product); + mp_free(scratch); + + /* + * Now we've got our reciprocal, we can compute the quotient, by + * multiplying in n and then shifting down by log2_R bits. + */ + mp_int *quotient_full = mp_mul(r_approx, n); + mp_int quotient_alias = mp_make_alias( + quotient_full, log2_R / BIGNUM_INT_BITS, quotient_full->nw); + mp_int *quotient = mp_make_sized(n->nw); + mp_rshift_fixed_into(quotient, "ient_alias, log2_R % BIGNUM_INT_BITS); + + /* + * Next, compute the remainder. + */ + mp_int *remainder = mp_make_sized(d->nw); + mp_mul_into(remainder, quotient, d); + mp_sub_into(remainder, n, remainder); + + /* + * Finally, two conditional subtractions to fix up any remaining + * rounding error. (I _think_ one should be enough, but this + * routine isn't time-critical enough to take chances.) + */ + unsigned q_correction = 0; + for (unsigned iter = 0; iter < 2; iter++) { + unsigned need_correction = mp_cmp_hs(remainder, d); + mp_cond_sub_into(remainder, remainder, d, need_correction); + q_correction += need_correction; + } + mp_add_integer_into(quotient, quotient, q_correction); + + /* + * Now we should have a perfect answer, i.e. 0 <= r < d. + */ + assert(!mp_cmp_hs(remainder, d)); + + if (q_out) + mp_copy_into(q_out, quotient); + if (r_out) + mp_copy_into(r_out, remainder); + + mp_free(r_approx); + mp_free(two_R); + mp_free(quotient_full); + mp_free(quotient); + mp_free(remainder); +} + +mp_int *mp_div(mp_int *n, mp_int *d) +{ + mp_int *q = mp_make_sized(n->nw); + mp_divmod_into(n, d, q, NULL); + return q; +} + +mp_int *mp_mod(mp_int *n, mp_int *d) +{ + mp_int *r = mp_make_sized(d->nw); + mp_divmod_into(n, d, NULL, r); + return r; +} + +mp_int *mp_modmul(mp_int *x, mp_int *y, mp_int *modulus) +{ + mp_int *product = mp_mul(x, y); + mp_int *reduced = mp_mod(product, modulus); + mp_free(product); + return reduced; +} + +mp_int *mp_modadd(mp_int *x, mp_int *y, mp_int *modulus) +{ + mp_int *sum = mp_add(x, y); + mp_int *reduced = mp_mod(sum, modulus); + mp_free(sum); + return reduced; +} + +mp_int *mp_modsub(mp_int *x, mp_int *y, mp_int *modulus) +{ + mp_int *diff = mp_make_sized(size_t_max(x->nw, y->nw)); + mp_sub_into(diff, x, y); + unsigned negate = mp_cmp_hs(y, x); + mp_cond_negate(diff, diff, negate); + mp_int *reduced = mp_mod(diff, modulus); + mp_cond_negate(reduced, reduced, negate); + mp_cond_add_into(reduced, reduced, modulus, negate); + mp_free(diff); + return reduced; +} + +static mp_int *mp_modadd_in_range(mp_int *x, mp_int *y, mp_int *modulus) +{ + mp_int *sum = mp_make_sized(modulus->nw); + unsigned carry = mp_add_into_internal(sum, x, y); + mp_cond_sub_into(sum, sum, modulus, carry | mp_cmp_hs(sum, modulus)); + return sum; +} + +static mp_int *mp_modsub_in_range(mp_int *x, mp_int *y, mp_int *modulus) +{ + mp_int *diff = mp_make_sized(modulus->nw); + mp_sub_into(diff, x, y); + mp_cond_add_into(diff, diff, modulus, 1 ^ mp_cmp_hs(x, y)); + return diff; +} + +mp_int *monty_add(MontyContext *mc, mp_int *x, mp_int *y) +{ + return mp_modadd_in_range(x, y, mc->m); +} + +mp_int *monty_sub(MontyContext *mc, mp_int *x, mp_int *y) +{ + return mp_modsub_in_range(x, y, mc->m); +} + +void mp_min_into(mp_int *r, mp_int *x, mp_int *y) +{ + mp_select_into(r, x, y, mp_cmp_hs(x, y)); +} + +mp_int *mp_min(mp_int *x, mp_int *y) +{ + mp_int *r = mp_make_sized(size_t_min(x->nw, y->nw)); + mp_min_into(r, x, y); + return r; +} + +mp_int *mp_power_2(size_t power) +{ + mp_int *x = mp_new(power + 1); + mp_set_bit(x, power, 1); + return x; +} + +struct ModsqrtContext { + mp_int *p; /* the prime */ + MontyContext *mc; /* for doing arithmetic mod p */ + + /* Decompose p-1 as 2^e k, for positive integer e and odd k */ + size_t e; + mp_int *k; + mp_int *km1o2; /* (k-1)/2 */ + + /* The user-provided value z which is not a quadratic residue mod + * p, and its kth power. Both in Montgomery form. */ + mp_int *z, *zk; +}; + +ModsqrtContext *modsqrt_new(mp_int *p, mp_int *any_nonsquare_mod_p) +{ + ModsqrtContext *sc = snew(ModsqrtContext); + memset(sc, 0, sizeof(ModsqrtContext)); + + sc->p = mp_copy(p); + sc->mc = monty_new(sc->p); + sc->z = monty_import(sc->mc, any_nonsquare_mod_p); + + /* Find the lowest set bit in p-1. Since this routine expects p to + * be non-secret (typically a well-known standard elliptic curve + * parameter), for once we don't need clever bit tricks. */ + for (sc->e = 1; sc->e < BIGNUM_INT_BITS * p->nw; sc->e++) + if (mp_get_bit(p, sc->e)) + break; + + sc->k = mp_rshift_fixed(p, sc->e); + sc->km1o2 = mp_rshift_fixed(sc->k, 1); + + /* Leave zk to be filled in lazily, since it's more expensive to + * compute. If this context turns out never to be needed, we can + * save the bulk of the setup time this way. */ + + return sc; +} + +static void modsqrt_lazy_setup(ModsqrtContext *sc) +{ + if (!sc->zk) + sc->zk = monty_pow(sc->mc, sc->z, sc->k); +} + +void modsqrt_free(ModsqrtContext *sc) +{ + monty_free(sc->mc); + mp_free(sc->p); + mp_free(sc->z); + mp_free(sc->k); + mp_free(sc->km1o2); + + if (sc->zk) + mp_free(sc->zk); + + sfree(sc); +} + +mp_int *mp_modsqrt(ModsqrtContext *sc, mp_int *x, unsigned *success) +{ + mp_int *mx = monty_import(sc->mc, x); + mp_int *mroot = monty_modsqrt(sc, mx, success); + mp_free(mx); + mp_int *root = monty_export(sc->mc, mroot); + mp_free(mroot); + return root; +} + +/* + * Modular square root, using an algorithm more or less similar to + * Tonelli-Shanks but adapted for constant time. + * + * The basic idea is to write p-1 = k 2^e, where k is odd and e > 0. + * Then the multiplicative group mod p (call it G) has a sequence of + * e+1 nested subgroups G = G_0 > G_1 > G_2 > ... > G_e, where each + * G_i is exactly half the size of G_{i-1} and consists of all the + * squares of elements in G_{i-1}. So the innermost group G_e has + * order k, which is odd, and hence within that group you can take a + * square root by raising to the power (k+1)/2. + * + * Our strategy is to iterate over these groups one by one and make + * sure the number x we're trying to take the square root of is inside + * each one, by adjusting it if it isn't. + * + * Suppose g is a primitive root of p, i.e. a generator of G_0. (We + * don't actually need to know what g _is_; we just imagine it for the + * sake of understanding.) Then G_i consists of precisely the (2^i)th + * powers of g, and hence, you can tell if a number is in G_i if + * raising it to the power k 2^{e-i} gives 1. So the conceptual + * algorithm goes: for each i, test whether x is in G_i by that + * method. If it isn't, then the previous iteration ensured it's in + * G_{i-1}, so it will be an odd power of g^{2^{i-1}}, and hence + * multiplying by any other odd power of g^{2^{i-1}} will give x' in + * G_i. And we have one of those, because our non-square z is an odd + * power of g, so z^{2^{i-1}} is an odd power of g^{2^{i-1}}. + * + * (There's a special case in the very first iteration, where we don't + * have a G_{i-1}. If it turns out that x is not even in G_1, that + * means it's not a square, so we set *success to 0. We still run the + * rest of the algorithm anyway, for the sake of constant time, but we + * don't give a hoot what it returns.) + * + * When we get to the end and have x in G_e, then we can take its + * square root by raising to (k+1)/2. But of course that's not the + * square root of the original input - it's only the square root of + * the adjusted version we produced during the algorithm. To get the + * true output answer we also have to multiply by a power of z, + * namely, z to the power of _half_ whatever we've been multiplying in + * as we go along. (The power of z we multiplied in must have been + * even, because the case in which we would have multiplied in an odd + * power of z is the i=0 case, in which we instead set the failure + * flag.) + * + * The code below is an optimised version of that basic idea, in which + * we _start_ by computing x^k so as to be able to test membership in + * G_i by only a few squarings rather than a full from-scratch modpow + * every time; we also start by computing our candidate output value + * x^{(k+1)/2}. So when the above description says 'adjust x by z^i' + * for some i, we have to adjust our running values of x^k and + * x^{(k+1)/2} by z^{ik} and z^{ik/2} respectively (the latter is safe + * because, as above, i is always even). And it turns out that we + * don't actually have to store the adjusted version of x itself at + * all - we _only_ keep those two powers of it. + */ +mp_int *monty_modsqrt(ModsqrtContext *sc, mp_int *x, unsigned *success) +{ + modsqrt_lazy_setup(sc); + + mp_int *scratch_to_free = mp_make_sized(3 * sc->mc->rw); + mp_int scratch = *scratch_to_free; + + /* + * Compute toret = x^{(k+1)/2}, our starting point for the output + * square root, and also xk = x^k which we'll use as we go along + * for knowing when to apply correction factors. We do this by + * first computing x^{(k-1)/2}, then multiplying it by x, then + * multiplying the two together. + */ + mp_int *toret = monty_pow(sc->mc, x, sc->km1o2); + mp_int xk = mp_alloc_from_scratch(&scratch, sc->mc->rw); + mp_copy_into(&xk, toret); + monty_mul_into(sc->mc, toret, toret, x); + monty_mul_into(sc->mc, &xk, toret, &xk); + + mp_int tmp = mp_alloc_from_scratch(&scratch, sc->mc->rw); + + mp_int power_of_zk = mp_alloc_from_scratch(&scratch, sc->mc->rw); + mp_copy_into(&power_of_zk, sc->zk); + + for (size_t i = 0; i < sc->e; i++) { + mp_copy_into(&tmp, &xk); + for (size_t j = i+1; j < sc->e; j++) + monty_mul_into(sc->mc, &tmp, &tmp, &tmp); + unsigned eq1 = mp_cmp_eq(&tmp, monty_identity(sc->mc)); + + if (i == 0) { + *success = eq1; + } else { + monty_mul_into(sc->mc, &tmp, toret, &power_of_zk); + mp_select_into(toret, &tmp, toret, eq1); + + monty_mul_into(sc->mc, &power_of_zk, + &power_of_zk, &power_of_zk); + + monty_mul_into(sc->mc, &tmp, &xk, &power_of_zk); + mp_select_into(&xk, &tmp, &xk, eq1); + } + } + + mp_free(scratch_to_free); + + return toret; +} + +mp_int *mp_random_bits_fn(size_t bits, int (*gen_byte)(void)) +{ + size_t bytes = (bits + 7) / 8; + size_t words = (bits + BIGNUM_INT_BITS - 1) / BIGNUM_INT_BITS; + mp_int *x = mp_make_sized(words); + for (size_t i = 0; i < bytes; i++) { + BignumInt byte = gen_byte(); + unsigned mask = (1 << size_t_min(8, bits-i*8)) - 1; + x->w[i / BIGNUM_INT_BYTES] |= + (byte & mask) << (8*(i % BIGNUM_INT_BYTES)); + } + return x; +} + +mp_int *mp_random_in_range_fn(mp_int *lo, mp_int *hi, int (*gen_byte)(void)) +{ + mp_int *n_outcomes = mp_sub(hi, lo); + + /* + * It would be nice to generate our random numbers in such a way + * as to make every possible outcome literally equiprobable. But + * we can't do that in constant time, so we have to go for a very + * close approximation instead. I'm going to take the view that a + * factor of (1+2^-128) between the probabilities of two outcomes + * is acceptable on the grounds that you'd have to examine so many + * outputs to even detect it. + */ + mp_int *unreduced = mp_random_bits_fn( + mp_max_bits(n_outcomes) + 128, gen_byte); + mp_int *reduced = mp_mod(unreduced, n_outcomes); + mp_add_into(reduced, reduced, lo); + mp_free(unreduced); + mp_free(n_outcomes); + return reduced; +} diff --git a/mpint.h b/mpint.h new file mode 100644 index 00000000..34b175d9 --- /dev/null +++ b/mpint.h @@ -0,0 +1,386 @@ +#ifndef PUTTY_MPINT_H +#define PUTTY_MPINT_H + +/* + * PuTTY's multiprecision integer library. + * + * This library is written with the aim of avoiding leaking the input + * numbers via timing and cache side channels. This means avoiding + * making any control flow change, or deciding the address of any + * memory access, based on the value of potentially secret input data. + * + * But in a library that has to handle numbers of arbitrary size, you + * can't avoid your control flow depending on the _size_ of the input! + * So the rule is that an mp_int has a nominal size that need not be + * its mathematical size: i.e. if you call (say) mp_from_bytes_be to + * turn an array of 256 bytes into an integer, and all but the last of + * those bytes is zero, then you get an mp_int which has space for 256 + * bytes of data but just happens to store the value 1. So the + * _nominal_ sizes of input data - e.g. the size in bits of some + * public-key modulus - are not considered secret, and control flow is + * allowed to do what it likes based on those sizes. But the same + * function, called with the same _nominally sized_ arguments + * containing different values, should run in the same length of time. + * + * When a function returns an 'mp_int *', it is newly allocated to an + * appropriate nominal size (which, again, depends only on the nominal + * sizes of the inputs). Other functions have 'into' in their name, + * and they instead overwrite the contents of an existing mp_int. + * + * Functions in this API which return values that are logically + * boolean return them as 'unsigned' rather than the C99 bool type. + * That's because C99 bool does an implicit test for non-zero-ness + * when converting any other integer type to it, which compilers might + * well implement using data-dependent control flow. + */ + +/* + * Create and destroy mp_ints. A newly created one is initialised to + * zero. mp_clear also resets an existing number to zero. + */ +mp_int *mp_new(size_t maxbits); +void mp_free(mp_int *); +void mp_clear(mp_int *x); + +/* + * Create mp_ints from various sources: little- and big-endian binary + * data, an ordinary C unsigned integer type, a decimal or hex string + * (given either as a ptrlen or a C NUL-terminated string), and + * another mp_int. + * + * The decimal and hex conversion functions have running time + * dependent on the length of the input data, of course. + */ +mp_int *mp_from_bytes_le(ptrlen bytes); +mp_int *mp_from_bytes_be(ptrlen bytes); +mp_int *mp_from_integer(uintmax_t n); +mp_int *mp_from_decimal_pl(ptrlen decimal); +mp_int *mp_from_decimal(const char *decimal); +mp_int *mp_from_hex_pl(ptrlen hex); +mp_int *mp_from_hex(const char *hex); +mp_int *mp_copy(mp_int *x); + +/* + * A macro for declaring large fixed numbers in source code (such as + * elliptic curve parameters, or standard Diffie-Hellman moduli). The + * idea is that you just write something like + * + * mp_int *value = MP_LITERAL(0x19284376283754638745693467245); + * + * and it newly allocates you an mp_int containing that number. + * + * Internally, the macro argument is stringified and passed to + * mp_from_hex. That's not as fast as it could be if I had instead set + * up some kind of mp_from_array_of_uint64_t() function, but I think + * this system is valuable for the fact that the literal integers + * appear in a very natural syntax that can be pasted directly out + * into, say, Python if you want to cross-check a calculation. + */ +static inline mp_int *mp__from_string_literal(const char *lit) +{ + /* Don't call this directly; it's not equipped to deal with + * hostile data. Use only via the MP_LITERAL macro. */ + if (lit[0] && (lit[1] == 'x' || lit[1] == 'X')) + return mp_from_hex(lit+2); + else + return mp_from_decimal(lit); +} +#define MP_LITERAL(number) mp__from_string_literal(#number) + +/* + * Create an mp_int with the value 2^power. + */ +mp_int *mp_power_2(size_t power); + +/* + * Retrieve the value of a particular bit or byte of an mp_int. The + * byte / bit index is not considered to be secret data. Out-of-range + * byte/bit indices are handled cleanly and return zero. + */ +uint8_t mp_get_byte(mp_int *x, size_t byte); +unsigned mp_get_bit(mp_int *x, size_t bit); + +/* + * Set an mp_int bit. Again, the bit index is not considered secret. + * Do not pass an out-of-range index, on pain of assertion failure. + */ +void mp_set_bit(mp_int *x, size_t bit, unsigned val); + +/* + * Return the nominal size of an mp_int, in terms of the maximum + * number of bytes or bits that can fit in it. + */ +size_t mp_max_bytes(mp_int *x); +size_t mp_max_bits(mp_int *x); + +/* + * Return the _mathematical_ bit count of an mp_int (not its nominal + * size), i.e. a value n such that 2^{n-1} <= x < 2^n. + * + * This function is supposed to run in constant time for a given + * nominal input size. Of course it's likely that clients of this + * function will promptly need to use the result as the limit of some + * loop (e.g. marshalling an mp_int into an SSH packet, which doesn't + * permit extra prefix zero bytes). But that's up to the caller to + * decide the safety of. + */ +size_t mp_get_nbits(mp_int *x); + +/* + * Return the value of an mp_int as a decimal or hex string. The + * result is dynamically allocated, and the caller is responsible for + * freeing it. + * + * These functions should run in constant time for a given nominal + * input size, even though the exact number of digits returned is + * variable. They always allocate enough space for the largest output + * that might be needed, but they don't always fill it. + */ +char *mp_get_decimal(mp_int *x); +char *mp_get_hex(mp_int *x); +char *mp_get_hex_uppercase(mp_int *x); + +/* + * Compare two mp_ints, or compare one mp_int against a C integer. The + * 'eq' functions return 1 if the two inputs are equal, or 0 + * otherwise; the 'hs' functions return 1 if the first input is >= the + * second, and 0 otherwise. + */ +unsigned mp_cmp_hs(mp_int *a, mp_int *b); +unsigned mp_cmp_eq(mp_int *a, mp_int *b); +unsigned mp_hs_integer(mp_int *x, uintmax_t n); +unsigned mp_eq_integer(mp_int *x, uintmax_t n); + +/* + * Take the minimum of two mp_ints, without using a conditional branch. + */ +void mp_min_into(mp_int *r, mp_int *x, mp_int *y); +mp_int *mp_min(mp_int *x, mp_int *y); + +/* + * Diagnostic function. Writes out x in hex to the supplied stdio + * stream, preceded by the string 'prefix' and followed by 'suffix'. + * + * This is useful to put temporarily into code, but it's also + * potentially useful to call from a debugger. + */ +void mp_dump(FILE *fp, const char *prefix, mp_int *x, const char *suffix); + +/* + * Overwrite one mp_int with another. + */ +void mp_copy_into(mp_int *dest, mp_int *src); + +/* + * Conditional selection. Overwrites dest with either src0 or src1, + * according to the value of 'choose_src1'. choose_src1 should be 0 or + * 1; if it's 1, then dest is set to src1, otherwise src0. + * + * The value of choose_src1 is considered to be secret data, so + * control flow and memory access should not depend on it. + */ +void mp_select_into(mp_int *dest, mp_int *src0, mp_int *src1, + unsigned choose_src1); + +/* + * Addition, subtraction and multiplication, either targeting an + * existing mp_int or making a new one large enough to hold whatever + * the output might be.. + */ +void mp_add_into(mp_int *r, mp_int *a, mp_int *b); +void mp_sub_into(mp_int *r, mp_int *a, mp_int *b); +void mp_mul_into(mp_int *r, mp_int *a, mp_int *b); +mp_int *mp_add(mp_int *x, mp_int *y); +mp_int *mp_sub(mp_int *x, mp_int *y); +mp_int *mp_mul(mp_int *x, mp_int *y); + +/* + * Addition, subtraction and multiplication with one argument small + * enough to fit in a C integer. For mp_mul_integer_into, it has to be + * even smaller than that. + */ +void mp_add_integer_into(mp_int *r, mp_int *a, uintmax_t n); +void mp_sub_integer_into(mp_int *r, mp_int *a, uintmax_t n); +void mp_mul_integer_into(mp_int *r, mp_int *a, uint16_t n); + +/* + * Conditional addition/subtraction. If yes == 1, sets r to a+b or a-b + * (respectively). If yes == 0, sets r to just a. 'yes' is considered + * secret data. + */ +void mp_cond_add_into(mp_int *r, mp_int *a, mp_int *b, unsigned yes); +void mp_cond_sub_into(mp_int *r, mp_int *a, mp_int *b, unsigned yes); + +/* + * Swap x0 and x1 if swap == 1, and not if swap == 0. 'swap' is + * considered secret. + */ +void mp_cond_swap(mp_int *x0, mp_int *x1, unsigned swap); + +/* + * Set x to 0 if clear == 1, and otherwise leave it unchanged. 'clear' + * is considered secret. + */ +void mp_cond_clear(mp_int *x, unsigned clear); + +/* + * Division. mp_divmod_into divides n by d, and writes the quotient + * into q and the remainder into r. You can pass either of q and r as + * NULL if you don't need one of the outputs. + * + * mp_div and mp_mod are wrappers that return one or other of those + * outputs as a freshly allocated mp_int of the appropriate size. + * + * Division by zero gives no error, and returns a quotient of 0 and a + * remainder of n (so as to still satisfy the division identity that + * n=qd+r). + */ +void mp_divmod_into(mp_int *n, mp_int *d, mp_int *q, mp_int *r); +mp_int *mp_div(mp_int *n, mp_int *d); +mp_int *mp_mod(mp_int *x, mp_int *modulus); + +/* + * Trivially easy special case of mp_mod: reduce a number mod a power + * of two. + */ +void mp_reduce_mod_2to(mp_int *x, size_t p); + +/* + * Modular inverses. mp_invert computes the inverse of x mod modulus + * (and will expect the two to be coprime). mp_invert_mod_2to computes + * the inverse of x mod 2^p, and is a great deal faster. + */ +mp_int *mp_invert_mod_2to(mp_int *x, size_t p); +mp_int *mp_invert(mp_int *x, mp_int *modulus); + +/* + * System for taking square roots modulo an odd prime. + * + * In order to do this efficiently, you need to provide an extra piece + * of information at setup time, namely a number which is not + * congruent mod p to any square. Given p and that non-square, you can + * use modsqrt_new to make a context containing all the necessary + * equipment for actually calculating the square roots, and then you + * can call mp_modsqrt as many times as you like on that context + * before freeing it. + * + * The output parameter '*success' will be filled in with 1 if the + * operation was successful, or 0 if the input number doesn't have a + * square root mod p at all. In the latter case, the returned mp_int + * will be nonsense and you shouldn't depend on it. + * + * ==== WARNING ==== + * + * This function DOES NOT TREAT THE PRIME MODULUS AS SECRET DATA! It + * will protect the number you're taking the square root _of_, but not + * the number you're taking the root of it _mod_. + * + * (This is because the algorithm requires a number of loop iterations + * equal to the number of factors of 2 in p-1. And the expected use of + * this function is for elliptic-curve point decompression, in which + * the modulus is always a well-known one written down in standards + * documents.) + */ +typedef struct ModsqrtContext ModsqrtContext; +ModsqrtContext *modsqrt_new(mp_int *p, mp_int *any_nonsquare_mod_p); +void modsqrt_free(ModsqrtContext *); +mp_int *mp_modsqrt(ModsqrtContext *sc, mp_int *x, unsigned *success); + +/* + * Functions for Montgomery multiplication, a fast technique for doing + * a long series of modular multiplications all with the same modulus + * (which has to be odd). + * + * You start by calling monty_new to set up a context structure + * containing all the precomputed bits and pieces needed by the + * algorithm. Then, any numbers you want to work with must first be + * transformed into the internal Montgomery representation using + * monty_import; having done that, you can use monty_mul and monty_pow + * to operate on them efficiently; and finally, monty_export will + * convert numbers back out of Montgomery representation to give their + * ordinary values. + * + * Addition and subtraction are not optimised by the Montgomery trick, + * but monty_add and monty_sub are provided anyway for convenience. + * + * There are also monty_invert and monty_modsqrt, which are analogues + * of mp_invert and mp_modsqrt which take their inputs in Montgomery + * representation. For mp_modsqrt, the prime modulus of the + * ModsqrtContext must be the same as the modulus of the MontyContext. + * + * The query functions monty_modulus and monty_identity return numbers + * stored inside the MontyContext, without copying them. The returned + * pointers are still owned by the MontyContext, so don't free them! + */ +MontyContext *monty_new(mp_int *modulus); +MontyContext *monty_copy(MontyContext *mc); +void monty_free(MontyContext *mc); +mp_int *monty_modulus(MontyContext *mc); /* doesn't transfer ownership */ +mp_int *monty_identity(MontyContext *mc); /* doesn't transfer ownership */ +void monty_import_into(MontyContext *mc, mp_int *r, mp_int *x); +mp_int *monty_import(MontyContext *mc, mp_int *x); +void monty_export_into(MontyContext *mc, mp_int *r, mp_int *x); +mp_int *monty_export(MontyContext *mc, mp_int *x); +void monty_mul_into(MontyContext *, mp_int *r, mp_int *, mp_int *); +mp_int *monty_add(MontyContext *, mp_int *, mp_int *); +mp_int *monty_sub(MontyContext *, mp_int *, mp_int *); +mp_int *monty_mul(MontyContext *, mp_int *, mp_int *); +mp_int *monty_pow(MontyContext *, mp_int *base, mp_int *exponent); +mp_int *monty_invert(MontyContext *, mp_int *); +mp_int *monty_modsqrt(ModsqrtContext *sc, mp_int *mx, unsigned *success); + +/* + * Modular arithmetic functions which don't use an explicit + * MontyContext. mp_modpow will use one internally (on the assumption + * that the exponent is likely to be large enough to make it + * worthwhile); the other three will just do ordinary non-Montgomery- + * optimised modular reduction. Use mp_modmul if you only have one + * product to compute; if you have a lot, consider using a + * MontyContext in the client code. + */ +mp_int *mp_modpow(mp_int *base, mp_int *exponent, mp_int *modulus); +mp_int *mp_modmul(mp_int *x, mp_int *y, mp_int *modulus); +mp_int *mp_modadd(mp_int *x, mp_int *y, mp_int *modulus); +mp_int *mp_modsub(mp_int *x, mp_int *y, mp_int *modulus); + +/* + * Shift an mp_int right by a given number of bits. The shift count is + * considered to be secret data, and as a result, the algorithm takes + * O(n log n) time instead of the obvious O(n). + */ +mp_int *mp_rshift_safe(mp_int *x, size_t shift); + +/* + * Shift an mp_int left or right by a fixed number of bits. The shift + * count is NOT considered to be secret data! Use this if you're + * always dividing by 2, for example, but don't use it to shift by a + * variable amount derived from another secret number. + * + * The upside is that these functions run in sensible linear time. + */ +void mp_lshift_fixed_into(mp_int *r, mp_int *a, size_t shift); +void mp_rshift_fixed_into(mp_int *r, mp_int *x, size_t shift); +mp_int *mp_rshift_fixed(mp_int *x, size_t shift); + +/* + * Generate a random mp_int. + * + * The _function_ definitions here will expect to be given a gen_byte + * function that provides random data. Normally you'd use this using + * random_byte() from random.c, and the macro wrappers automate that. + * + * (This is a bit of a dodge to avoid mpint.c having a link-time + * dependency on random.c, so that programs can link against one but + * not the other: if a client of this header uses one of these macros + * then _they_ have link-time dependencies on both modules.) + * + * mp_random_bits[_fn] returns an integer 0 <= n < 2^bits. + * mp_random_in_range[_fn](lo,hi) returns an integer lo <= n < hi. + */ +mp_int *mp_random_bits_fn(size_t bits, int (*gen_byte)(void)); +mp_int *mp_random_in_range_fn( + mp_int *lo_inclusive, mp_int *hi_exclusive, int (*gen_byte)(void)); +#define mp_random_bits(bits) mp_random_bits_fn(bits, random_byte) +#define mp_random_in_range(lo, hi) mp_random_in_range_fn(lo, hi, random_byte) + +#endif /* PUTTY_MPINT_H */ diff --git a/sshbn.h b/mpint_i.h similarity index 78% rename from sshbn.h rename to mpint_i.h index 6ee97ee6..45060b7e 100644 --- a/sshbn.h +++ b/mpint_i.h @@ -1,10 +1,15 @@ /* - * sshbn.h: the assorted conditional definitions of BignumInt and - * multiply macros used throughout the bignum code to treat numbers as - * arrays of the most conveniently sized word for the target machine. + * mpint_i.h: definitions used internally by the bignum code, and + * also a few other vaguely-bignum-like places. + */ + +/* ---------------------------------------------------------------------- + * The assorted conditional definitions of BignumInt and multiply + * macros used throughout the bignum code to treat numbers as arrays + * of the most conveniently sized word for the target machine. * Exported so that other code (e.g. poly1305) can use it too. * - * This file must export, in whatever ifdef branch it ends up in: + * This code must export, in whatever ifdef branch it ends up in: * * - two types: 'BignumInt' and 'BignumCarry'. BignumInt is an * unsigned integer type which will be used as the base word size @@ -64,7 +69,7 @@ */ typedef unsigned long long BignumInt; - #define BIGNUM_INT_BITS 64 + #define BIGNUM_INT_BITS_BITS 6 #define DEFINE_BIGNUMDBLINT typedef __uint128_t BignumDblInt #elif defined _MSC_VER && defined _M_AMD64 @@ -85,7 +90,7 @@ #include typedef unsigned char BignumCarry; /* the type _addcarry_u64 likes to use */ typedef unsigned __int64 BignumInt; - #define BIGNUM_INT_BITS 64 + #define BIGNUM_INT_BITS_BITS 6 #define BignumADC(ret, retc, a, b, c) do \ { \ BignumInt ADC_tmp; \ @@ -119,7 +124,7 @@ /* 32-bit BignumInt, using C99 unsigned long long as BignumDblInt */ typedef unsigned int BignumInt; - #define BIGNUM_INT_BITS 32 + #define BIGNUM_INT_BITS_BITS 5 #define DEFINE_BIGNUMDBLINT typedef unsigned long long BignumDblInt #elif defined _MSC_VER && defined _M_IX86 @@ -127,7 +132,7 @@ /* 32-bit BignumInt, using Visual Studio __int64 as BignumDblInt */ typedef unsigned int BignumInt; - #define BIGNUM_INT_BITS 32 + #define BIGNUM_INT_BITS_BITS 5 #define DEFINE_BIGNUMDBLINT typedef unsigned __int64 BignumDblInt #elif defined _LP64 @@ -139,7 +144,7 @@ */ typedef unsigned int BignumInt; - #define BIGNUM_INT_BITS 32 + #define BIGNUM_INT_BITS_BITS 5 #define DEFINE_BIGNUMDBLINT typedef unsigned long BignumDblInt #else @@ -155,15 +160,16 @@ */ typedef unsigned short BignumInt; - #define BIGNUM_INT_BITS 16 + #define BIGNUM_INT_BITS_BITS 4 #define DEFINE_BIGNUMDBLINT typedef unsigned long BignumDblInt #endif /* - * Common code across all branches of that ifdef: define the three - * easy constant macros in terms of BIGNUM_INT_BITS. + * Common code across all branches of that ifdef: define all the + * easy constant macros in terms of BIGNUM_INT_BITS_BITS. */ +#define BIGNUM_INT_BITS (1 << BIGNUM_INT_BITS_BITS) #define BIGNUM_INT_BYTES (BIGNUM_INT_BITS / 8) #define BIGNUM_TOP_BIT (((BignumInt)1) << (BIGNUM_INT_BITS-1)) #define BIGNUM_INT_MASK (BIGNUM_TOP_BIT | (BIGNUM_TOP_BIT-1)) @@ -218,3 +224,58 @@ } while (0) #endif /* DEFINE_BIGNUMDBLINT */ + +/* ---------------------------------------------------------------------- + * Data structures used inside bignum.c. + */ + +struct mp_int { + size_t nw; + BignumInt *w; +}; + +struct MontyContext { + /* + * The actual modulus. + */ + mp_int *m; + + /* + * Montgomery multiplication works by selecting a value r > m, + * coprime to m, which is really easy to divide by. In binary + * arithmetic, that means making it a power of 2; in fact we make + * it a whole number of BignumInt. + * + * We don't store r directly as an mp_int (there's no need). But + * its value is 2^rbits; we also store rw = rbits/BIGNUM_INT_BITS + * (the corresponding word offset within an mp_int). + * + * pw is the number of words needed to store an mp_int you're + * doing reduction on: it has to be big enough to hold the sum of + * an input value up to m^2 plus an extra addend up to m*r. + */ + size_t rbits, rw, pw; + + /* + * The key step in Montgomery reduction requires the inverse of -m + * mod r. + */ + mp_int *minus_minv_mod_r; + + /* + * r^1, r^2 and r^3 mod m, which are used for various purposes. + * + * (Annoyingly, this is one of the rare cases where it would have + * been nicer to have a Pascal-style 1-indexed array. I couldn't + * _quite_ bring myself to put a gratuitous zero element in here. + * So you just have to live with getting r^k by taking the [k-1]th + * element of this array.) + */ + mp_int *powers_of_r_mod_m[3]; + + /* + * Persistent scratch space from which monty_* functions can + * allocate storage for intermediate values. + */ + mp_int *scratch; +}; diff --git a/pageant.c b/pageant.c index 3da719e9..ebd56b03 100644 --- a/pageant.c +++ b/pageant.c @@ -7,6 +7,7 @@ #include #include "putty.h" +#include "mpint.h" #include "ssh.h" #include "pageant.h" @@ -41,37 +42,9 @@ static int cmpkeys_rsa(void *av, void *bv) { struct RSAKey *a = (struct RSAKey *) av; struct RSAKey *b = (struct RSAKey *) bv; - Bignum am, bm; - int alen, blen; - am = a->modulus; - bm = b->modulus; - /* - * Compare by length of moduli. - */ - alen = bignum_bitcount(am); - blen = bignum_bitcount(bm); - if (alen > blen) - return +1; - else if (alen < blen) - return -1; - /* - * Now compare by moduli themselves. - */ - alen = (alen + 7) / 8; /* byte count */ - while (alen-- > 0) { - int abyte, bbyte; - abyte = bignum_byte(am, alen); - bbyte = bignum_byte(bm, alen); - if (abyte > bbyte) - return +1; - else if (abyte < bbyte) - return -1; - } - /* - * Give up. - */ - return 0; + return ((int)mp_cmp_hs(a->modulus, b->modulus) - + (int)mp_cmp_hs(b->modulus, a->modulus)); } /* @@ -251,7 +224,7 @@ void pageant_handle_msg(BinarySink *bs, */ { struct RSAKey reqkey, *key; - Bignum challenge, response; + mp_int *challenge, *response; ptrlen session_id; unsigned response_type; unsigned char response_md5[16]; @@ -295,7 +268,7 @@ void pageant_handle_msg(BinarySink *bs, MD5Init(&md5c); for (i = 0; i < 32; i++) - put_byte(&md5c, bignum_byte(response, 31 - i)); + put_byte(&md5c, mp_get_byte(response, 31 - i)); put_data(&md5c, session_id.ptr, session_id.len); MD5Final(response_md5, &md5c); @@ -306,8 +279,8 @@ void pageant_handle_msg(BinarySink *bs, challenge1_cleanup: if (response) - freebn(response); - freebn(challenge); + mp_free(response); + mp_free(challenge); freersakey(&reqkey); } break; @@ -1275,7 +1248,7 @@ int pageant_add_keyfile(Filename *filename, const char *passphrase, request = strbuf_new_for_agent_query(); put_byte(request, SSH1_AGENTC_ADD_RSA_IDENTITY); - put_uint32(request, bignum_bitcount(rkey->modulus)); + put_uint32(request, mp_get_nbits(rkey->modulus)); put_mp_ssh1(request, rkey->modulus); put_mp_ssh1(request, rkey->exponent); put_mp_ssh1(request, rkey->private_exponent); diff --git a/ssh.h b/ssh.h index ec5bbb4a..47feb92f 100644 --- a/ssh.h +++ b/ssh.h @@ -390,10 +390,6 @@ void ssh_user_close(Ssh *ssh, const char *fmt, ...); #define SSH_CIPHER_3DES 3 #define SSH_CIPHER_BLOWFISH 6 -#ifndef BIGNUM_INTERNAL -typedef void *Bignum; -#endif - typedef struct ssh_keyalg ssh_keyalg; typedef struct ssh_key { const struct ssh_keyalg *vt; @@ -402,57 +398,52 @@ typedef struct ssh_key { struct RSAKey { int bits; int bytes; - Bignum modulus; - Bignum exponent; - Bignum private_exponent; - Bignum p; - Bignum q; - Bignum iqmp; + mp_int *modulus; + mp_int *exponent; + mp_int *private_exponent; + mp_int *p; + mp_int *q; + mp_int *iqmp; char *comment; ssh_key sshk; }; struct dss_key { - Bignum p, q, g, y, x; + mp_int *p, *q, *g, *y, *x; ssh_key sshk; }; struct ec_curve; -struct ec_point { - const struct ec_curve *curve; - Bignum x, y; - Bignum z; /* Jacobian denominator */ - bool infinity; -}; - -/* A couple of ECC functions exported for use outside sshecc.c */ -struct ec_point *ecp_mul(const struct ec_point *a, const Bignum b); -void ec_point_free(struct ec_point *point); - /* Weierstrass form curve */ struct ec_wcurve { - Bignum a, b, n; - struct ec_point G; + WeierstrassCurve *wc; + WeierstrassPoint *G; + mp_int *G_order; }; /* Montgomery form curve */ struct ec_mcurve { - Bignum a, b; - struct ec_point G; + MontgomeryCurve *mc; + MontgomeryPoint *G; }; /* Edwards form curve */ struct ec_ecurve { - Bignum l, d; - struct ec_point B; + EdwardsCurve *ec; + EdwardsPoint *G; + mp_int *G_order; }; +typedef enum EllipticCurveType { + EC_WEIERSTRASS, EC_MONTGOMERY, EC_EDWARDS +} EllipticCurveType; + struct ec_curve { - enum { EC_WEIERSTRASS, EC_MONTGOMERY, EC_EDWARDS } type; + EllipticCurveType type; /* 'name' is the identifier of the curve when it has to appear in * wire protocol encodings, as it does in e.g. the public key and * signature formats for NIST curves. Curves which do not format @@ -461,8 +452,8 @@ struct ec_curve { * 'textname' is non-NULL for all curves, and is a human-readable * identification suitable for putting in log messages. */ const char *name, *textname; - unsigned int fieldBits; - Bignum p; + size_t fieldBits, fieldBytes; + mp_int *p; union { struct ec_wcurve w; struct ec_mcurve m; @@ -481,13 +472,21 @@ bool ec_ed_alg_and_curve_by_bits(int bits, const struct ec_curve **curve, const ssh_keyalg **alg); -struct ec_key { - struct ec_point publicKey; - Bignum privateKey; +struct ecdsa_key { + const struct ec_curve *curve; + WeierstrassPoint *publicKey; + mp_int *privateKey; + ssh_key sshk; +}; +struct eddsa_key { + const struct ec_curve *curve; + EdwardsPoint *publicKey; + mp_int *privateKey; ssh_key sshk; }; -struct ec_point *ec_public(const Bignum privateKey, const struct ec_curve *curve); +WeierstrassPoint *ecdsa_public(mp_int *private_key, const ssh_keyalg *alg); +EdwardsPoint *eddsa_public(mp_int *private_key, const ssh_keyalg *alg); /* * SSH-1 never quite decided which order to store the two components @@ -504,8 +503,9 @@ void BinarySource_get_rsa_ssh1_pub( void BinarySource_get_rsa_ssh1_priv( BinarySource *src, struct RSAKey *rsa); bool rsa_ssh1_encrypt(unsigned char *data, int length, struct RSAKey *key); -Bignum rsa_ssh1_decrypt(Bignum input, struct RSAKey *key); -bool rsa_ssh1_decrypt_pkcs1(Bignum input, struct RSAKey *key, strbuf *outbuf); +mp_int *rsa_ssh1_decrypt(mp_int *input, struct RSAKey *key); +bool rsa_ssh1_decrypt_pkcs1(mp_int *input, struct RSAKey *key, + strbuf *outbuf); char *rsastr_fmt(struct RSAKey *key); char *rsa_ssh1_fingerprint(struct RSAKey *key); bool rsa_verify(struct RSAKey *key); @@ -538,25 +538,26 @@ int ssh_rsakex_klen(struct RSAKey *key); void ssh_rsakex_encrypt(const struct ssh_hashalg *h, unsigned char *in, int inlen, unsigned char *out, int outlen, struct RSAKey *key); -Bignum ssh_rsakex_decrypt(const struct ssh_hashalg *h, ptrlen ciphertext, - struct RSAKey *rsa); +mp_int *ssh_rsakex_decrypt(const struct ssh_hashalg *h, ptrlen ciphertext, + struct RSAKey *rsa); /* * SSH2 ECDH key exchange functions */ struct ssh_kex; +typedef struct ecdh_key ecdh_key; const char *ssh_ecdhkex_curve_textname(const struct ssh_kex *kex); -struct ec_key *ssh_ecdhkex_newkey(const struct ssh_kex *kex); -void ssh_ecdhkex_freekey(struct ec_key *key); -void ssh_ecdhkex_getpublic(struct ec_key *key, BinarySink *bs); -Bignum ssh_ecdhkex_getkey(struct ec_key *key, - const void *remoteKey, int remoteKeyLen); +ecdh_key *ssh_ecdhkex_newkey(const struct ssh_kex *kex); +void ssh_ecdhkex_freekey(ecdh_key *key); +void ssh_ecdhkex_getpublic(ecdh_key *key, BinarySink *bs); +mp_int *ssh_ecdhkex_getkey(ecdh_key *key, ptrlen remoteKey); /* * Helper function for k generation in DSA, reused in ECDSA */ -Bignum *dss_gen_k(const char *id_string, Bignum modulus, Bignum private_key, - unsigned char *digest, int digest_len); +mp_int *dss_gen_k(const char *id_string, + mp_int *modulus, mp_int *private_key, + unsigned char *digest, int digest_len); struct ssh2_cipheralg; typedef struct ssh2_cipher { @@ -740,14 +741,14 @@ typedef struct ssh_hash { BinarySink_DELEGATE_IMPLEMENTATION; } ssh_hash; -struct ssh_hashalg { +typedef struct ssh_hashalg { ssh_hash *(*new)(const struct ssh_hashalg *alg); ssh_hash *(*copy)(ssh_hash *); void (*final)(ssh_hash *, unsigned char *); /* ALSO FREES THE ssh_hash! */ void (*free)(ssh_hash *); int hlen; /* output length in bytes */ const char *text_name; -}; +} ssh_hashalg; #define ssh_hash_new(alg) ((alg)->new(alg)) #define ssh_hash_copy(ctx) ((ctx)->vt->copy(ctx)) @@ -1053,58 +1054,15 @@ void *x11_dehexify(ptrlen hex, int *outlen); Channel *agentf_new(SshChannel *c); -Bignum copybn(Bignum b); -Bignum bn_power_2(int n); -void bn_restore_invariant(Bignum b); -Bignum bignum_from_long(unsigned long n); -void freebn(Bignum b); -Bignum modpow(Bignum base, Bignum exp, Bignum mod); -Bignum modmul(Bignum a, Bignum b, Bignum mod); -Bignum modsub(const Bignum a, const Bignum b, const Bignum n); -void decbn(Bignum n); -extern Bignum Zero, One; -Bignum bignum_from_bytes(const void *data, int nbytes); -Bignum bignum_from_bytes_le(const void *data, int nbytes); -Bignum bignum_random_in_range(const Bignum lower, const Bignum upper); -int bignum_bitcount(Bignum bn); -int bignum_byte(Bignum bn, int i); -int bignum_bit(Bignum bn, int i); -void bignum_set_bit(Bignum bn, int i, int value); -Bignum biggcd(Bignum a, Bignum b); -unsigned short bignum_mod_short(Bignum number, unsigned short modulus); -Bignum bignum_add_long(Bignum number, unsigned long addend); -Bignum bigadd(Bignum a, Bignum b); -Bignum bigsub(Bignum a, Bignum b); -Bignum bigmul(Bignum a, Bignum b); -Bignum bigmuladd(Bignum a, Bignum b, Bignum addend); -Bignum bigdiv(Bignum a, Bignum b); -Bignum bigmod(Bignum a, Bignum b); -Bignum modinv(Bignum number, Bignum modulus); -Bignum bignum_bitmask(Bignum number); -Bignum bignum_rshift(Bignum number, int shift); -Bignum bignum_lshift(Bignum number, int shift); -int bignum_cmp(Bignum a, Bignum b); -char *bignum_decimal(Bignum x); -Bignum bignum_from_decimal(const char *decimal); - -void BinarySink_put_mp_ssh1(BinarySink *, Bignum); -void BinarySink_put_mp_ssh2(BinarySink *, Bignum); -Bignum BinarySource_get_mp_ssh1(BinarySource *); -Bignum BinarySource_get_mp_ssh2(BinarySource *); - -#ifdef DEBUG -void diagbn(char *prefix, Bignum md); -#endif - bool dh_is_gex(const struct ssh_kex *kex); struct dh_ctx; struct dh_ctx *dh_setup_group(const struct ssh_kex *kex); -struct dh_ctx *dh_setup_gex(Bignum pval, Bignum gval); +struct dh_ctx *dh_setup_gex(mp_int *pval, mp_int *gval); int dh_modulus_bit_size(const struct dh_ctx *ctx); void dh_cleanup(struct dh_ctx *); -Bignum dh_create_e(struct dh_ctx *, int nbits); -const char *dh_validate_f(struct dh_ctx *, Bignum f); -Bignum dh_find_K(struct dh_ctx *, Bignum f); +mp_int *dh_create_e(struct dh_ctx *, int nbits); +const char *dh_validate_f(struct dh_ctx *, mp_int *f); +mp_int *dh_find_K(struct dh_ctx *, mp_int *f); bool rsa_ssh1_encrypted(const Filename *filename, char **comment); int rsa_ssh1_loadpub(const Filename *filename, BinarySink *bs, @@ -1114,6 +1072,14 @@ int rsa_ssh1_loadkey(const Filename *filename, struct RSAKey *key, bool rsa_ssh1_savekey(const Filename *filename, struct RSAKey *key, char *passphrase); +static inline bool is_base64_char(char c) +{ + return ((c >= '0' && c <= '9') || + (c >= 'a' && c <= 'z') || + (c >= 'A' && c <= 'Z') || + c == '+' || c == '/' || c == '='); +} + extern int base64_decode_atom(const char *atom, unsigned char *out); extern int base64_lines(int datalen); extern void base64_encode_atom(const unsigned char *data, int n, char *out); @@ -1233,12 +1199,13 @@ int rsa_generate(struct RSAKey *key, int bits, progfn_t pfn, void *pfnparam); int dsa_generate(struct dss_key *key, int bits, progfn_t pfn, void *pfnparam); -int ec_generate(struct ec_key *key, int bits, progfn_t pfn, - void *pfnparam); -int ec_edgenerate(struct ec_key *key, int bits, progfn_t pfn, - void *pfnparam); -Bignum primegen(int bits, int modulus, int residue, Bignum factor, - int phase, progfn_t pfn, void *pfnparam, unsigned firstbits); +int ecdsa_generate(struct ecdsa_key *key, int bits, progfn_t pfn, + void *pfnparam); +int eddsa_generate(struct eddsa_key *key, int bits, progfn_t pfn, + void *pfnparam); +mp_int *primegen( + int bits, int modulus, int residue, mp_int *factor, + int phase, progfn_t pfn, void *pfnparam, unsigned firstbits); void invent_firstbits(unsigned *one, unsigned *two); /* diff --git a/ssh1login-server.c b/ssh1login-server.c index 1dd374a7..1100d169 100644 --- a/ssh1login-server.c +++ b/ssh1login-server.c @@ -5,6 +5,7 @@ #include #include "putty.h" +#include "mpint.h" #include "ssh.h" #include "sshbpp.h" #include "sshppl.h" @@ -29,7 +30,7 @@ struct ssh1_login_server_state { struct RSAKey *servkey, *hostkey; bool servkey_generated_here; - Bignum sesskey; + mp_int *sesskey; AuthPolicy *authpolicy; unsigned ap_methods, current_method; @@ -206,8 +207,8 @@ static void ssh1_login_server_process_queue(PacketProtocolLayer *ppl) struct RSAKey *smaller, *larger; strbuf *data = strbuf_new(); - if (bignum_bitcount(s->hostkey->modulus) > - bignum_bitcount(s->servkey->modulus)) { + if (mp_get_nbits(s->hostkey->modulus) > + mp_get_nbits(s->servkey->modulus)) { larger = s->hostkey; smaller = s->servkey; } else { @@ -216,13 +217,13 @@ static void ssh1_login_server_process_queue(PacketProtocolLayer *ppl) } if (rsa_ssh1_decrypt_pkcs1(s->sesskey, larger, data)) { - freebn(s->sesskey); - s->sesskey = bignum_from_bytes(data->u, data->len); + mp_free(s->sesskey); + s->sesskey = mp_from_bytes_be(ptrlen_from_strbuf(data)); data->len = 0; if (rsa_ssh1_decrypt_pkcs1(s->sesskey, smaller, data) && data->len == sizeof(s->session_key)) { memcpy(s->session_key, data->u, sizeof(s->session_key)); - freebn(s->sesskey); + mp_free(s->sesskey); s->sesskey = NULL; /* indicates success */ } } @@ -288,10 +289,10 @@ static void ssh1_login_server_process_queue(PacketProtocolLayer *ppl) continue; { - Bignum modulus = get_mp_ssh1(pktin); + mp_int *modulus = get_mp_ssh1(pktin); s->authkey = auth_publickey_ssh1( s->authpolicy, s->username, modulus); - freebn(modulus); + mp_free(modulus); } if (!s->authkey) @@ -321,7 +322,8 @@ static void ssh1_login_server_process_queue(PacketProtocolLayer *ppl) continue; } - Bignum bn = bignum_from_bytes(rsabuf, s->authkey->bytes); + mp_int *bn = mp_from_bytes_be( + make_ptrlen(rsabuf, s->authkey->bytes)); smemclr(rsabuf, s->authkey->bytes); sfree(rsabuf); @@ -330,7 +332,7 @@ static void ssh1_login_server_process_queue(PacketProtocolLayer *ppl) put_mp_ssh1(pktout, bn); pq_push(s->ppl.out_pq, pktout); - freebn(bn); + mp_free(bn); } crMaybeWaitUntilV((pktin = ssh1_login_server_pop(s)) != NULL); diff --git a/ssh1login.c b/ssh1login.c index e5308d0b..790ffa90 100644 --- a/ssh1login.c +++ b/ssh1login.c @@ -7,6 +7,7 @@ #include "putty.h" #include "ssh.h" +#include "mpint.h" #include "sshbpp.h" #include "sshppl.h" #include "sshcr.h" @@ -49,7 +50,7 @@ struct ssh1_login_state { int keyi, nkeys; bool authed; struct RSAKey key; - Bignum challenge; + mp_int *challenge; ptrlen comment; int dlgret; Filename *keyfile; @@ -537,7 +538,7 @@ static void ssh1_login_process_queue(PacketProtocolLayer *ppl) ppl_logevent("Received RSA challenge"); s->challenge = get_mp_ssh1(pktin); if (get_err(pktin)) { - freebn(s->challenge); + mp_free(s->challenge); ssh_proto_error(s->ppl.ssh, "Server's RSA challenge " "was badly formatted"); return; @@ -549,7 +550,7 @@ static void ssh1_login_process_queue(PacketProtocolLayer *ppl) agentreq = strbuf_new_for_agent_query(); put_byte(agentreq, SSH1_AGENTC_RSA_CHALLENGE); - put_uint32(agentreq, bignum_bitcount(s->key.modulus)); + put_uint32(agentreq, mp_get_nbits(s->key.modulus)); put_mp_ssh1(agentreq, s->key.exponent); put_mp_ssh1(agentreq, s->key.modulus); put_mp_ssh1(agentreq, s->challenge); @@ -594,9 +595,9 @@ static void ssh1_login_process_queue(PacketProtocolLayer *ppl) ppl_logevent("No reply received from Pageant"); } } - freebn(s->key.exponent); - freebn(s->key.modulus); - freebn(s->challenge); + mp_free(s->key.exponent); + mp_free(s->key.modulus); + mp_free(s->challenge); if (s->authed) break; } @@ -719,11 +720,11 @@ static void ssh1_login_process_queue(PacketProtocolLayer *ppl) { int i; unsigned char buffer[32]; - Bignum challenge, response; + mp_int *challenge, *response; challenge = get_mp_ssh1(pktin); if (get_err(pktin)) { - freebn(challenge); + mp_free(challenge); ssh_proto_error(s->ppl.ssh, "Server's RSA challenge " "was badly formatted"); return; @@ -732,7 +733,7 @@ static void ssh1_login_process_queue(PacketProtocolLayer *ppl) freersapriv(&s->key); /* burn the evidence */ for (i = 0; i < 32; i++) { - buffer[i] = bignum_byte(response, 31 - i); + buffer[i] = mp_get_byte(response, 31 - i); } { @@ -748,8 +749,8 @@ static void ssh1_login_process_queue(PacketProtocolLayer *ppl) put_data(pkt, buffer, 16); pq_push(s->ppl.out_pq, pkt); - freebn(challenge); - freebn(response); + mp_free(challenge); + mp_free(response); } crMaybeWaitUntilV((pktin = ssh1_login_pop(s)) diff --git a/ssh2kex-client.c b/ssh2kex-client.c index 0a17c0fd..865397df 100644 --- a/ssh2kex-client.c +++ b/ssh2kex-client.c @@ -11,6 +11,7 @@ #include "sshcr.h" #include "storage.h" #include "ssh2transport.h" +#include "mpint.h" void ssh2kex_coroutine(struct ssh2_transport_state *s, bool *aborted) { @@ -170,10 +171,10 @@ void ssh2kex_coroutine(struct ssh2_transport_state *s, bool *aborted) dh_cleanup(s->dh_ctx); s->dh_ctx = NULL; - freebn(s->f); s->f = NULL; + mp_free(s->f); s->f = NULL; if (dh_is_gex(s->kex_alg)) { - freebn(s->g); s->g = NULL; - freebn(s->p); s->p = NULL; + mp_free(s->g); s->g = NULL; + mp_free(s->p); s->p = NULL; } } else if (s->kex_alg->main_type == KEXTYPE_ECDH) { @@ -223,7 +224,7 @@ void ssh2kex_coroutine(struct ssh2_transport_state *s, bool *aborted) { ptrlen keydata = get_string(pktin); put_stringpl(s->exhash, keydata); - s->K = ssh_ecdhkex_getkey(s->ecdh_key, keydata.ptr, keydata.len); + s->K = ssh_ecdhkex_getkey(s->ecdh_key, keydata); if (!get_err(pktin) && !s->K) { ssh_proto_error(s->ppl.ssh, "Received invalid elliptic curve " "point in ECDH reply"); @@ -501,10 +502,10 @@ void ssh2kex_coroutine(struct ssh2_transport_state *s, bool *aborted) dh_cleanup(s->dh_ctx); s->dh_ctx = NULL; - freebn(s->f); s->f = NULL; + mp_free(s->f); s->f = NULL; if (dh_is_gex(s->kex_alg)) { - freebn(s->g); s->g = NULL; - freebn(s->p); s->p = NULL; + mp_free(s->g); s->g = NULL; + mp_free(s->p); s->p = NULL; } #endif } else { @@ -560,13 +561,13 @@ void ssh2kex_coroutine(struct ssh2_transport_state *s, bool *aborted) unsigned char *outstr; int outstrlen; - s->K = bn_power_2(nbits - 1); + s->K = mp_power_2(nbits - 1); for (i = 0; i < nbits; i++) { if ((i & 7) == 0) { byte = random_byte(); } - bignum_set_bit(s->K, i, (byte >> (i & 7)) & 1); + mp_set_bit(s->K, i, (byte >> (i & 7)) & 1); } /* diff --git a/ssh2kex-server.c b/ssh2kex-server.c index 0b94c996..22b26961 100644 --- a/ssh2kex-server.c +++ b/ssh2kex-server.c @@ -11,6 +11,7 @@ #include "sshcr.h" #include "storage.h" #include "ssh2transport.h" +#include "mpint.h" void ssh2_transport_provide_hostkeys(PacketProtocolLayer *ppl, ssh_key *const *hostkeys, int nhostkeys) @@ -98,7 +99,7 @@ void ssh2kex_coroutine(struct ssh2_transport_state *s, bool *aborted) * but not for serious use. */ s->p = primegen(s->pbits, 2, 2, NULL, 1, no_progress, NULL, 1); - s->g = bignum_from_long(2); + s->g = mp_from_integer(2); s->dh_ctx = dh_setup_gex(s->p, s->g); s->kex_init_value = SSH2_MSG_KEX_DH_GEX_INIT; s->kex_reply_value = SSH2_MSG_KEX_DH_GEX_REPLY; @@ -177,10 +178,10 @@ void ssh2kex_coroutine(struct ssh2_transport_state *s, bool *aborted) dh_cleanup(s->dh_ctx); s->dh_ctx = NULL; - freebn(s->f); s->f = NULL; + mp_free(s->f); s->f = NULL; if (dh_is_gex(s->kex_alg)) { - freebn(s->g); s->g = NULL; - freebn(s->p); s->p = NULL; + mp_free(s->g); s->g = NULL; + mp_free(s->p); s->p = NULL; } } else if (s->kex_alg->main_type == KEXTYPE_ECDH) { ppl_logevent("Doing ECDH key exchange with curve %s and hash %s", @@ -211,7 +212,7 @@ void ssh2kex_coroutine(struct ssh2_transport_state *s, bool *aborted) ptrlen keydata = get_string(pktin); put_stringpl(s->exhash, keydata); - s->K = ssh_ecdhkex_getkey(s->ecdh_key, keydata.ptr, keydata.len); + s->K = ssh_ecdhkex_getkey(s->ecdh_key, keydata); if (!get_err(pktin) && !s->K) { ssh_proto_error(s->ppl.ssh, "Received invalid elliptic curve " "point in ECDH initial packet"); diff --git a/ssh2transport.c b/ssh2transport.c index 4393d7ef..ab813ac2 100644 --- a/ssh2transport.c +++ b/ssh2transport.c @@ -11,6 +11,7 @@ #include "sshcr.h" #include "storage.h" #include "ssh2transport.h" +#include "mpint.h" const struct ssh_signkey_with_user_pref_id ssh2_hostkey_algs[] = { #define ARRAYENT_HOSTKEY_ALGORITHM(type, alg) { &alg, type }, @@ -200,10 +201,10 @@ static void ssh2_transport_free(PacketProtocolLayer *ppl) ssh_key_free(s->hkey); s->hkey = NULL; } - if (s->f) freebn(s->f); - if (s->p) freebn(s->p); - if (s->g) freebn(s->g); - if (s->K) freebn(s->K); + if (s->f) mp_free(s->f); + if (s->p) mp_free(s->p); + if (s->g) mp_free(s->g); + if (s->K) mp_free(s->K); if (s->dh_ctx) dh_cleanup(s->dh_ctx); if (s->rsa_kex_key) @@ -225,7 +226,7 @@ static void ssh2_transport_free(PacketProtocolLayer *ppl) */ static void ssh2_mkkey( struct ssh2_transport_state *s, strbuf *out, - Bignum K, unsigned char *H, char chr, int keylen) + mp_int *K, unsigned char *H, char chr, int keylen) { int hlen = s->kex_alg->hash->hlen; int keylen_padded; @@ -1365,7 +1366,7 @@ static void ssh2_transport_process_queue(PacketProtocolLayer *ppl) /* * Free shared secret. */ - freebn(s->K); s->K = NULL; + mp_free(s->K); s->K = NULL; /* * Update the specials menu to list the remaining uncertified host diff --git a/ssh2transport.h b/ssh2transport.h index 2f527e69..6b80b6cb 100644 --- a/ssh2transport.h +++ b/ssh2transport.h @@ -166,7 +166,7 @@ struct ssh2_transport_state { int nbits, pbits; bool warn_kex, warn_hk, warn_cscipher, warn_sccipher; - Bignum p, g, e, f, K; + mp_int *p, *g, *e, *f, *K; strbuf *outgoing_kexinit, *incoming_kexinit; strbuf *client_kexinit, *server_kexinit; /* aliases to the above */ int kex_init_value, kex_reply_value; @@ -176,7 +176,7 @@ struct ssh2_transport_state { char *keystr, *fingerprint; ssh_key *hkey; /* actual host key */ struct RSAKey *rsa_kex_key; /* for RSA kex */ - struct ec_key *ecdh_key; /* for ECDH kex */ + ecdh_key *ecdh_key; /* for ECDH kex */ unsigned char exchange_hash[SSH2_KEX_MAX_HASH_LEN]; bool can_gssapi_keyex; bool need_gss_transient_hostkey; diff --git a/sshbn.c b/sshbn.c deleted file mode 100644 index c3cb5544..00000000 --- a/sshbn.c +++ /dev/null @@ -1,2180 +0,0 @@ -/* - * Bignum routines for RSA and DH and stuff. - */ - -#include -#include -#include -#include -#include -#include - -#include "misc.h" - -#include "sshbn.h" - -#define BIGNUM_INTERNAL -typedef BignumInt *Bignum; - -#include "ssh.h" -#include "marshal.h" - -BignumInt bnZero[1] = { 0 }; -BignumInt bnOne[2] = { 1, 1 }; -BignumInt bnTen[2] = { 1, 10 }; - -/* - * The Bignum format is an array of `BignumInt'. The first - * element of the array counts the remaining elements. The - * remaining elements express the actual number, base 2^BIGNUM_INT_BITS, _least_ - * significant digit first. (So it's trivial to extract the bit - * with value 2^n for any n.) - * - * All Bignums in this module are positive. Negative numbers must - * be dealt with outside it. - * - * INVARIANT: the most significant word of any Bignum must be - * nonzero. - */ - -Bignum Zero = bnZero, One = bnOne, Ten = bnTen; - -static Bignum newbn(int length) -{ - Bignum b; - - assert(length >= 0 && length < INT_MAX / BIGNUM_INT_BITS); - - b = snewn(length + 1, BignumInt); - memset(b, 0, (length + 1) * sizeof(*b)); - b[0] = length; - return b; -} - -void bn_restore_invariant(Bignum b) -{ - while (b[0] > 1 && b[b[0]] == 0) - b[0]--; -} - -Bignum copybn(Bignum orig) -{ - Bignum b = snewn(orig[0] + 1, BignumInt); - if (!b) - abort(); /* FIXME */ - memcpy(b, orig, (orig[0] + 1) * sizeof(*b)); - return b; -} - -void freebn(Bignum b) -{ - /* - * Burn the evidence, just in case. - */ - smemclr(b, sizeof(b[0]) * (b[0] + 1)); - sfree(b); -} - -Bignum bn_power_2(int n) -{ - Bignum ret; - - assert(n >= 0); - - ret = newbn(n / BIGNUM_INT_BITS + 1); - bignum_set_bit(ret, n, 1); - return ret; -} - -/* - * Internal addition. Sets c = a - b, where 'a', 'b' and 'c' are all - * big-endian arrays of 'len' BignumInts. Returns the carry off the - * top. - */ -static BignumCarry internal_add(const BignumInt *a, const BignumInt *b, - BignumInt *c, int len) -{ - int i; - BignumCarry carry = 0; - - for (i = len-1; i >= 0; i--) - BignumADC(c[i], carry, a[i], b[i], carry); - - return (BignumInt)carry; -} - -/* - * Internal subtraction. Sets c = a - b, where 'a', 'b' and 'c' are - * all big-endian arrays of 'len' BignumInts. Any borrow from the top - * is ignored. - */ -static void internal_sub(const BignumInt *a, const BignumInt *b, - BignumInt *c, int len) -{ - int i; - BignumCarry carry = 1; - - for (i = len-1; i >= 0; i--) - BignumADC(c[i], carry, a[i], ~b[i], carry); -} - -/* - * Compute c = a * b. - * Input is in the first len words of a and b. - * Result is returned in the first 2*len words of c. - * - * 'scratch' must point to an array of BignumInt of size at least - * mul_compute_scratch(len). (This covers the needs of internal_mul - * and all its recursive calls to itself.) - */ -#define KARATSUBA_THRESHOLD 50 -static int mul_compute_scratch(int len) -{ - int ret = 0; - while (len > KARATSUBA_THRESHOLD) { - int toplen = len/2, botlen = len - toplen; /* botlen is the bigger */ - int midlen = botlen + 1; - ret += 4*midlen; - len = midlen; - } - return ret; -} -static void internal_mul(const BignumInt *a, const BignumInt *b, - BignumInt *c, int len, BignumInt *scratch) -{ - if (len > KARATSUBA_THRESHOLD) { - int i; - - /* - * Karatsuba divide-and-conquer algorithm. Cut each input in - * half, so that it's expressed as two big 'digits' in a giant - * base D: - * - * a = a_1 D + a_0 - * b = b_1 D + b_0 - * - * Then the product is of course - * - * ab = a_1 b_1 D^2 + (a_1 b_0 + a_0 b_1) D + a_0 b_0 - * - * and we compute the three coefficients by recursively - * calling ourself to do half-length multiplications. - * - * The clever bit that makes this worth doing is that we only - * need _one_ half-length multiplication for the central - * coefficient rather than the two that it obviouly looks - * like, because we can use a single multiplication to compute - * - * (a_1 + a_0) (b_1 + b_0) = a_1 b_1 + a_1 b_0 + a_0 b_1 + a_0 b_0 - * - * and then we subtract the other two coefficients (a_1 b_1 - * and a_0 b_0) which we were computing anyway. - * - * Hence we get to multiply two numbers of length N in about - * three times as much work as it takes to multiply numbers of - * length N/2, which is obviously better than the four times - * as much work it would take if we just did a long - * conventional multiply. - */ - - int toplen = len/2, botlen = len - toplen; /* botlen is the bigger */ - int midlen = botlen + 1; - BignumCarry carry; -#ifdef KARA_DEBUG - int i; -#endif - - /* - * The coefficients a_1 b_1 and a_0 b_0 just avoid overlapping - * in the output array, so we can compute them immediately in - * place. - */ - -#ifdef KARA_DEBUG - printf("a1,a0 = 0x"); - for (i = 0; i < len; i++) { - if (i == toplen) printf(", 0x"); - printf("%0*x", BIGNUM_INT_BITS/4, a[i]); - } - printf("\n"); - printf("b1,b0 = 0x"); - for (i = 0; i < len; i++) { - if (i == toplen) printf(", 0x"); - printf("%0*x", BIGNUM_INT_BITS/4, b[i]); - } - printf("\n"); -#endif - - /* a_1 b_1 */ - internal_mul(a, b, c, toplen, scratch); -#ifdef KARA_DEBUG - printf("a1b1 = 0x"); - for (i = 0; i < 2*toplen; i++) { - printf("%0*x", BIGNUM_INT_BITS/4, c[i]); - } - printf("\n"); -#endif - - /* a_0 b_0 */ - internal_mul(a + toplen, b + toplen, c + 2*toplen, botlen, scratch); -#ifdef KARA_DEBUG - printf("a0b0 = 0x"); - for (i = 0; i < 2*botlen; i++) { - printf("%0*x", BIGNUM_INT_BITS/4, c[2*toplen+i]); - } - printf("\n"); -#endif - - /* Zero padding. midlen exceeds toplen by at most 2, so just - * zero the first two words of each input and the rest will be - * copied over. */ - scratch[0] = scratch[1] = scratch[midlen] = scratch[midlen+1] = 0; - - for (i = 0; i < toplen; i++) { - scratch[midlen - toplen + i] = a[i]; /* a_1 */ - scratch[2*midlen - toplen + i] = b[i]; /* b_1 */ - } - - /* compute a_1 + a_0 */ - scratch[0] = internal_add(scratch+1, a+toplen, scratch+1, botlen); -#ifdef KARA_DEBUG - printf("a1plusa0 = 0x"); - for (i = 0; i < midlen; i++) { - printf("%0*x", BIGNUM_INT_BITS/4, scratch[i]); - } - printf("\n"); -#endif - /* compute b_1 + b_0 */ - scratch[midlen] = internal_add(scratch+midlen+1, b+toplen, - scratch+midlen+1, botlen); -#ifdef KARA_DEBUG - printf("b1plusb0 = 0x"); - for (i = 0; i < midlen; i++) { - printf("%0*x", BIGNUM_INT_BITS/4, scratch[midlen+i]); - } - printf("\n"); -#endif - - /* - * Now we can do the third multiplication. - */ - internal_mul(scratch, scratch + midlen, scratch + 2*midlen, midlen, - scratch + 4*midlen); -#ifdef KARA_DEBUG - printf("a1plusa0timesb1plusb0 = 0x"); - for (i = 0; i < 2*midlen; i++) { - printf("%0*x", BIGNUM_INT_BITS/4, scratch[2*midlen+i]); - } - printf("\n"); -#endif - - /* - * Now we can reuse the first half of 'scratch' to compute the - * sum of the outer two coefficients, to subtract from that - * product to obtain the middle one. - */ - scratch[0] = scratch[1] = scratch[2] = scratch[3] = 0; - for (i = 0; i < 2*toplen; i++) - scratch[2*midlen - 2*toplen + i] = c[i]; - scratch[1] = internal_add(scratch+2, c + 2*toplen, - scratch+2, 2*botlen); -#ifdef KARA_DEBUG - printf("a1b1plusa0b0 = 0x"); - for (i = 0; i < 2*midlen; i++) { - printf("%0*x", BIGNUM_INT_BITS/4, scratch[i]); - } - printf("\n"); -#endif - - internal_sub(scratch + 2*midlen, scratch, - scratch + 2*midlen, 2*midlen); -#ifdef KARA_DEBUG - printf("a1b0plusa0b1 = 0x"); - for (i = 0; i < 2*midlen; i++) { - printf("%0*x", BIGNUM_INT_BITS/4, scratch[2*midlen+i]); - } - printf("\n"); -#endif - - /* - * And now all we need to do is to add that middle coefficient - * back into the output. We may have to propagate a carry - * further up the output, but we can be sure it won't - * propagate right the way off the top. - */ - carry = internal_add(c + 2*len - botlen - 2*midlen, - scratch + 2*midlen, - c + 2*len - botlen - 2*midlen, 2*midlen); - i = 2*len - botlen - 2*midlen - 1; - while (carry) { - assert(i >= 0); - BignumADC(c[i], carry, c[i], 0, carry); - i--; - } -#ifdef KARA_DEBUG - printf("ab = 0x"); - for (i = 0; i < 2*len; i++) { - printf("%0*x", BIGNUM_INT_BITS/4, c[i]); - } - printf("\n"); -#endif - - } else { - int i; - BignumInt carry; - const BignumInt *ap, *bp; - BignumInt *cp, *cps; - - /* - * Multiply in the ordinary O(N^2) way. - */ - - for (i = 0; i < 2 * len; i++) - c[i] = 0; - - for (cps = c + 2*len, ap = a + len; ap-- > a; cps--) { - carry = 0; - for (cp = cps, bp = b + len; cp--, bp-- > b ;) - BignumMULADD2(carry, *cp, *ap, *bp, *cp, carry); - *cp = carry; - } - } -} - -/* - * Variant form of internal_mul used for the initial step of - * Montgomery reduction. Only bothers outputting 'len' words - * (everything above that is thrown away). - */ -static void internal_mul_low(const BignumInt *a, const BignumInt *b, - BignumInt *c, int len, BignumInt *scratch) -{ - if (len > KARATSUBA_THRESHOLD) { - int i; - - /* - * Karatsuba-aware version of internal_mul_low. As before, we - * express each input value as a shifted combination of two - * halves: - * - * a = a_1 D + a_0 - * b = b_1 D + b_0 - * - * Then the full product is, as before, - * - * ab = a_1 b_1 D^2 + (a_1 b_0 + a_0 b_1) D + a_0 b_0 - * - * Provided we choose D on the large side (so that a_0 and b_0 - * are _at least_ as long as a_1 and b_1), we don't need the - * topmost term at all, and we only need half of the middle - * term. So there's no point in doing the proper Karatsuba - * optimisation which computes the middle term using the top - * one, because we'd take as long computing the top one as - * just computing the middle one directly. - * - * So instead, we do a much more obvious thing: we call the - * fully optimised internal_mul to compute a_0 b_0, and we - * recursively call ourself to compute the _bottom halves_ of - * a_1 b_0 and a_0 b_1, each of which we add into the result - * in the obvious way. - * - * In other words, there's no actual Karatsuba _optimisation_ - * in this function; the only benefit in doing it this way is - * that we call internal_mul proper for a large part of the - * work, and _that_ can optimise its operation. - */ - - int toplen = len/2, botlen = len - toplen; /* botlen is the bigger */ - - /* - * Scratch space for the various bits and pieces we're going - * to be adding together: we need botlen*2 words for a_0 b_0 - * (though we may end up throwing away its topmost word), and - * toplen words for each of a_1 b_0 and a_0 b_1. That adds up - * to exactly 2*len. - */ - - /* a_0 b_0 */ - internal_mul(a + toplen, b + toplen, scratch + 2*toplen, botlen, - scratch + 2*len); - - /* a_1 b_0 */ - internal_mul_low(a, b + len - toplen, scratch + toplen, toplen, - scratch + 2*len); - - /* a_0 b_1 */ - internal_mul_low(a + len - toplen, b, scratch, toplen, - scratch + 2*len); - - /* Copy the bottom half of the big coefficient into place */ - for (i = 0; i < botlen; i++) - c[toplen + i] = scratch[2*toplen + botlen + i]; - - /* Add the two small coefficients, throwing away the returned carry */ - internal_add(scratch, scratch + toplen, scratch, toplen); - - /* And add that to the large coefficient, leaving the result in c. */ - internal_add(scratch, scratch + 2*toplen + botlen - toplen, - c, toplen); - - } else { - int i; - BignumInt carry; - const BignumInt *ap, *bp; - BignumInt *cp, *cps; - - /* - * Multiply in the ordinary O(N^2) way. - */ - - for (i = 0; i < len; i++) - c[i] = 0; - - for (cps = c + len, ap = a + len; ap-- > a; cps--) { - carry = 0; - for (cp = cps, bp = b + len; bp--, cp-- > c ;) - BignumMULADD2(carry, *cp, *ap, *bp, *cp, carry); - } - } -} - -/* - * Montgomery reduction. Expects x to be a big-endian array of 2*len - * BignumInts whose value satisfies 0 <= x < rn (where r = 2^(len * - * BIGNUM_INT_BITS) is the Montgomery base). Returns in the same array - * a value x' which is congruent to xr^{-1} mod n, and satisfies 0 <= - * x' < n. - * - * 'n' and 'mninv' should be big-endian arrays of 'len' BignumInts - * each, containing respectively n and the multiplicative inverse of - * -n mod r. - * - * 'tmp' is an array of BignumInt used as scratch space, of length at - * least 3*len + mul_compute_scratch(len). - */ -static void monty_reduce(BignumInt *x, const BignumInt *n, - const BignumInt *mninv, BignumInt *tmp, int len) -{ - int i; - BignumInt carry; - - /* - * Multiply x by (-n)^{-1} mod r. This gives us a value m such - * that mn is congruent to -x mod r. Hence, mn+x is an exact - * multiple of r, and is also (obviously) congruent to x mod n. - */ - internal_mul_low(x + len, mninv, tmp, len, tmp + 3*len); - - /* - * Compute t = (mn+x)/r in ordinary, non-modular, integer - * arithmetic. By construction this is exact, and is congruent mod - * n to x * r^{-1}, i.e. the answer we want. - * - * The following multiply leaves that answer in the _most_ - * significant half of the 'x' array, so then we must shift it - * down. - */ - internal_mul(tmp, n, tmp+len, len, tmp + 3*len); - carry = internal_add(x, tmp+len, x, 2*len); - for (i = 0; i < len; i++) - x[len + i] = x[i], x[i] = 0; - - /* - * Reduce t mod n. This doesn't require a full-on division by n, - * but merely a test and single optional subtraction, since we can - * show that 0 <= t < 2n. - * - * Proof: - * + we computed m mod r, so 0 <= m < r. - * + so 0 <= mn < rn, obviously - * + hence we only need 0 <= x < rn to guarantee that 0 <= mn+x < 2rn - * + yielding 0 <= (mn+x)/r < 2n as required. - */ - if (!carry) { - for (i = 0; i < len; i++) - if (x[len + i] != n[i]) - break; - } - if (carry || i >= len || x[len + i] > n[i]) - internal_sub(x+len, n, x+len, len); -} - -static void internal_add_shifted(BignumInt *number, - BignumInt n, int shift) -{ - int word = 1 + (shift / BIGNUM_INT_BITS); - int bshift = shift % BIGNUM_INT_BITS; - BignumInt addendh, addendl; - BignumCarry carry; - - addendl = n << bshift; - addendh = (bshift == 0 ? 0 : n >> (BIGNUM_INT_BITS - bshift)); - - assert(word <= number[0]); - BignumADC(number[word], carry, number[word], addendl, 0); - word++; - if (!addendh && !carry) - return; - assert(word <= number[0]); - BignumADC(number[word], carry, number[word], addendh, carry); - word++; - while (carry) { - assert(word <= number[0]); - BignumADC(number[word], carry, number[word], 0, carry); - word++; - } -} - -static int bn_clz(BignumInt x) -{ - /* - * Count the leading zero bits in x. Equivalently, how far left - * would we need to shift x to make its top bit set? - * - * Precondition: x != 0. - */ - - /* FIXME: would be nice to put in some compiler intrinsics under - * ifdef here */ - int i, ret = 0; - for (i = BIGNUM_INT_BITS / 2; i != 0; i >>= 1) { - if ((x >> (BIGNUM_INT_BITS-i)) == 0) { - x <<= i; - ret += i; - } - } - return ret; -} - -static BignumInt reciprocal_word(BignumInt d) -{ - BignumInt dshort, recip, prodh, prodl; - int corrections; - - /* - * Input: a BignumInt value d, with its top bit set. - */ - assert(d >> (BIGNUM_INT_BITS-1) == 1); - - /* - * Output: a value, shifted to fill a BignumInt, which is strictly - * less than 1/(d+1), i.e. is an *under*-estimate (but by as - * little as possible within the constraints) of the reciprocal of - * any number whose first BIGNUM_INT_BITS bits match d. - * - * Ideally we'd like to _totally_ fill BignumInt, i.e. always - * return a value with the top bit set. Unfortunately we can't - * quite guarantee that for all inputs and also return a fixed - * exponent. So instead we take our reciprocal to be - * 2^(BIGNUM_INT_BITS*2-1) / d, so that it has the top bit clear - * only in the exceptional case where d takes exactly the maximum - * value BIGNUM_INT_MASK; in that case, the top bit is clear and - * the next bit down is set. - */ - - /* - * Start by computing a half-length version of the answer, by - * straightforward division within a BignumInt. - */ - dshort = (d >> (BIGNUM_INT_BITS/2)) + 1; - recip = (BIGNUM_TOP_BIT + dshort - 1) / dshort; - recip <<= BIGNUM_INT_BITS - BIGNUM_INT_BITS/2; - - /* - * Newton-Raphson iteration to improve that starting reciprocal - * estimate: take f(x) = d - 1/x, and then the N-R formula gives - * x_new = x - f(x)/f'(x) = x - (d-1/x)/(1/x^2) = x(2-d*x). Or, - * taking our fixed-point representation into account, take f(x) - * to be d - K/x (where K = 2^(BIGNUM_INT_BITS*2-1) as discussed - * above) and then we get (2K - d*x) * x/K. - * - * Newton-Raphson doubles the number of correct bits at every - * iteration, and the initial division above already gave us half - * the output word, so it's only worth doing one iteration. - */ - BignumMULADD(prodh, prodl, recip, d, recip); - prodl = ~prodl; - prodh = ~prodh; - { - BignumCarry c; - BignumADC(prodl, c, prodl, 1, 0); - prodh += c; - } - BignumMUL(prodh, prodl, prodh, recip); - recip = (prodh << 1) | (prodl >> (BIGNUM_INT_BITS-1)); - - /* - * Now make sure we have the best possible reciprocal estimate, - * before we return it. We might have been off by a handful either - * way - not enough to bother with any better-thought-out kind of - * correction loop. - */ - BignumMULADD(prodh, prodl, recip, d, recip); - corrections = 0; - if (prodh >= BIGNUM_TOP_BIT) { - do { - BignumCarry c = 1; - BignumADC(prodl, c, prodl, ~d, c); prodh += BIGNUM_INT_MASK + c; - recip--; - corrections++; - } while (prodh >= ((BignumInt)1 << (BIGNUM_INT_BITS-1))); - } else { - while (1) { - BignumInt newprodh, newprodl; - BignumCarry c = 0; - BignumADC(newprodl, c, prodl, d, c); newprodh = prodh + c; - if (newprodh >= BIGNUM_TOP_BIT) - break; - prodh = newprodh; - prodl = newprodl; - recip++; - corrections++; - } - } - - return recip; -} - -/* - * Compute a = a % m. - * Input in first alen words of a and first mlen words of m. - * Output in first alen words of a - * (of which first alen-mlen words will be zero). - * Quotient is accumulated in the `quotient' array, which is a Bignum - * rather than the internal bigendian format. - * - * 'recip' must be the result of calling reciprocal_word() on the top - * BIGNUM_INT_BITS of the modulus (denoted m0 in comments below), with - * the topmost set bit normalised to the MSB of the input to - * reciprocal_word. 'rshift' is how far left the top nonzero word of - * the modulus had to be shifted to set that top bit. - */ -static void internal_mod(BignumInt *a, int alen, - BignumInt *m, int mlen, - BignumInt *quot, BignumInt recip, int rshift) -{ - int i, k; - -#ifdef DIVISION_DEBUG - { - int d; - printf("start division, m=0x"); - for (d = 0; d < mlen; d++) - printf("%0*llx", BIGNUM_INT_BITS/4, (unsigned long long)m[d]); - printf(", recip=%#0*llx, rshift=%d\n", - BIGNUM_INT_BITS/4, (unsigned long long)recip, rshift); - } -#endif - - /* - * Repeatedly use that reciprocal estimate to get a decent number - * of quotient bits, and subtract off the resulting multiple of m. - * - * Normally we expect to terminate this loop by means of finding - * out q=0 part way through, but one way in which we might not get - * that far in the first place is if the input a is actually zero, - * in which case we'll discard zero words from the front of a - * until we reach the termination condition in the for statement - * here. - */ - for (i = 0; i <= alen - mlen ;) { - BignumInt product; - BignumInt aword, q; - int shift, full_bitoffset, bitoffset, wordoffset; - -#ifdef DIVISION_DEBUG - { - int d; - printf("main loop, a=0x"); - for (d = 0; d < alen; d++) - printf("%0*llx", BIGNUM_INT_BITS/4, (unsigned long long)a[d]); - printf("\n"); - } -#endif - - if (a[i] == 0) { -#ifdef DIVISION_DEBUG - printf("zero word at i=%d\n", i); -#endif - i++; - continue; - } - - aword = a[i]; - shift = bn_clz(aword); - aword <<= shift; - if (shift > 0 && i+1 < alen) - aword |= a[i+1] >> (BIGNUM_INT_BITS - shift); - - { - BignumInt unused; - BignumMUL(q, unused, recip, aword); - (void)unused; - } - -#ifdef DIVISION_DEBUG - printf("i=%d, aword=%#0*llx, shift=%d, q=%#0*llx\n", - i, BIGNUM_INT_BITS/4, (unsigned long long)aword, - shift, BIGNUM_INT_BITS/4, (unsigned long long)q); -#endif - - /* - * Work out the right bit and word offsets to use when - * subtracting q*m from a. - * - * aword was taken from a[i], which means its LSB was at bit - * position (alen-1-i) * BIGNUM_INT_BITS. But then we shifted - * it left by 'shift', so now the low bit of aword corresponds - * to bit position (alen-1-i) * BIGNUM_INT_BITS - shift, i.e. - * aword is approximately equal to a / 2^(that). - * - * m0 comes from the top word of mod, so its LSB is at bit - * position (mlen-1) * BIGNUM_INT_BITS - rshift, i.e. it can - * be considered to be m / 2^(that power). 'recip' is the - * reciprocal of m0, times 2^(BIGNUM_INT_BITS*2-1), i.e. it's - * about 2^((mlen+1) * BIGNUM_INT_BITS - rshift - 1) / m. - * - * Hence, recip * aword is approximately equal to the product - * of those, which simplifies to - * - * a/m * 2^((mlen+2+i-alen)*BIGNUM_INT_BITS + shift - rshift - 1) - * - * But we've also shifted recip*aword down by BIGNUM_INT_BITS - * to form q, so we have - * - * q ~= a/m * 2^((mlen+1+i-alen)*BIGNUM_INT_BITS + shift - rshift - 1) - * - * and hence, when we now compute q*m, it will be about - * a*2^(all that lot), i.e. the negation of that expression is - * how far left we have to shift the product q*m to make it - * approximately equal to a. - */ - full_bitoffset = -((mlen+1+i-alen)*BIGNUM_INT_BITS + shift-rshift-1); -#ifdef DIVISION_DEBUG - printf("full_bitoffset=%d\n", full_bitoffset); -#endif - - if (full_bitoffset < 0) { - /* - * If we find ourselves needing to shift q*m _right_, that - * means we've reached the bottom of the quotient. Clip q - * so that its right shift becomes zero, and if that means - * q becomes _actually_ zero, this loop is done. - */ - if (full_bitoffset <= -BIGNUM_INT_BITS) - break; - q >>= -full_bitoffset; - full_bitoffset = 0; - if (!q) - break; -#ifdef DIVISION_DEBUG - printf("now full_bitoffset=%d, q=%#0*llx\n", - full_bitoffset, BIGNUM_INT_BITS/4, (unsigned long long)q); -#endif - } - - wordoffset = full_bitoffset / BIGNUM_INT_BITS; - bitoffset = full_bitoffset % BIGNUM_INT_BITS; -#ifdef DIVISION_DEBUG - printf("wordoffset=%d, bitoffset=%d\n", wordoffset, bitoffset); -#endif - - /* wordoffset as computed above is the offset between the LSWs - * of m and a. But in fact m and a are stored MSW-first, so we - * need to adjust it to be the offset between the actual array - * indices, and flip the sign too. */ - wordoffset = alen - mlen - wordoffset; - - if (bitoffset == 0) { - BignumCarry c = 1; - BignumInt prev_hi_word = 0; - for (k = mlen - 1; wordoffset+k >= i; k--) { - BignumInt mword = k<0 ? 0 : m[k]; - BignumMULADD(prev_hi_word, product, q, mword, prev_hi_word); -#ifdef DIVISION_DEBUG - printf(" aligned sub: product word for m[%d] = %#0*llx\n", - k, BIGNUM_INT_BITS/4, - (unsigned long long)product); -#endif -#ifdef DIVISION_DEBUG - printf(" aligned sub: subtrahend for a[%d] = %#0*llx\n", - wordoffset+k, BIGNUM_INT_BITS/4, - (unsigned long long)product); -#endif - BignumADC(a[wordoffset+k], c, a[wordoffset+k], ~product, c); - } - } else { - BignumInt add_word = 0; - BignumInt c = 1; - BignumInt prev_hi_word = 0; - for (k = mlen - 1; wordoffset+k >= i; k--) { - BignumInt mword = k<0 ? 0 : m[k]; - BignumMULADD(prev_hi_word, product, q, mword, prev_hi_word); -#ifdef DIVISION_DEBUG - printf(" unaligned sub: product word for m[%d] = %#0*llx\n", - k, BIGNUM_INT_BITS/4, - (unsigned long long)product); -#endif - - add_word |= product << bitoffset; - -#ifdef DIVISION_DEBUG - printf(" unaligned sub: subtrahend for a[%d] = %#0*llx\n", - wordoffset+k, - BIGNUM_INT_BITS/4, (unsigned long long)add_word); -#endif - BignumADC(a[wordoffset+k], c, a[wordoffset+k], ~add_word, c); - - add_word = product >> (BIGNUM_INT_BITS - bitoffset); - } - } - - if (quot) { -#ifdef DIVISION_DEBUG - printf("adding quotient word %#0*llx << %d\n", - BIGNUM_INT_BITS/4, (unsigned long long)q, full_bitoffset); -#endif - internal_add_shifted(quot, q, full_bitoffset); -#ifdef DIVISION_DEBUG - { - int d; - printf("now quot=0x"); - for (d = quot[0]; d > 0; d--) - printf("%0*llx", BIGNUM_INT_BITS/4, - (unsigned long long)quot[d]); - printf("\n"); - } -#endif - } - } - -#ifdef DIVISION_DEBUG - { - int d; - printf("end main loop, a=0x"); - for (d = 0; d < alen; d++) - printf("%0*llx", BIGNUM_INT_BITS/4, (unsigned long long)a[d]); - if (quot) { - printf(", quot=0x"); - for (d = quot[0]; d > 0; d--) - printf("%0*llx", BIGNUM_INT_BITS/4, - (unsigned long long)quot[d]); - } - printf("\n"); - } -#endif - - /* - * The above loop should terminate with the remaining value in a - * being strictly less than 2*m (if a >= 2*m then we should always - * have managed to get a nonzero q word), but we can't guarantee - * that it will be strictly less than m: consider a case where the - * remainder is 1, and another where the remainder is m-1. By the - * time a contains a value that's _about m_, you clearly can't - * distinguish those cases by looking at only the top word of a - - * you have to go all the way down to the bottom before you find - * out whether it's just less or just more than m. - * - * Hence, we now do a final fixup in which we subtract one last - * copy of m, or don't, accordingly. We should never have to - * subtract more than one copy of m here. - */ - for (i = 0; i < alen; i++) { - /* Compare a with m, word by word, from the MSW down. As soon - * as we encounter a difference, we know whether we need the - * fixup. */ - int mindex = mlen-alen+i; - BignumInt mword = mindex < 0 ? 0 : m[mindex]; - if (a[i] < mword) { -#ifdef DIVISION_DEBUG - printf("final fixup not needed, a < m\n"); -#endif - return; - } else if (a[i] > mword) { -#ifdef DIVISION_DEBUG - printf("final fixup is needed, a > m\n"); -#endif - break; - } - /* If neither of those cases happened, the words are the same, - * so keep going and look at the next one. */ - } -#ifdef DIVISION_DEBUG - if (i == mlen) /* if we printed neither of the above diagnostics */ - printf("final fixup is needed, a == m\n"); -#endif - - /* - * If we got here without returning, then a >= m, so we must - * subtract m, and increment the quotient. - */ - { - BignumCarry c = 1; - for (i = alen - 1; i >= 0; i--) { - int mindex = mlen-alen+i; - BignumInt mword = mindex < 0 ? 0 : m[mindex]; - BignumADC(a[i], c, a[i], ~mword, c); - } - } - if (quot) - internal_add_shifted(quot, 1, 0); - -#ifdef DIVISION_DEBUG - { - int d; - printf("after final fixup, a=0x"); - for (d = 0; d < alen; d++) - printf("%0*llx", BIGNUM_INT_BITS/4, (unsigned long long)a[d]); - if (quot) { - printf(", quot=0x"); - for (d = quot[0]; d > 0; d--) - printf("%0*llx", BIGNUM_INT_BITS/4, - (unsigned long long)quot[d]); - } - printf("\n"); - } -#endif -} - -/* - * Compute (base ^ exp) % mod, the pedestrian way. - */ -Bignum modpow_simple(Bignum base_in, Bignum exp, Bignum mod) -{ - BignumInt *a, *b, *n, *m, *scratch; - BignumInt recip; - int rshift; - int mlen, scratchlen, i, j; - Bignum base, result; - - /* - * The most significant word of mod needs to be non-zero. It - * should already be, but let's make sure. - */ - assert(mod[mod[0]] != 0); - - /* - * Make sure the base is smaller than the modulus, by reducing - * it modulo the modulus if not. - */ - base = bigmod(base_in, mod); - - /* Allocate m of size mlen, copy mod to m */ - /* We use big endian internally */ - mlen = mod[0]; - m = snewn(mlen, BignumInt); - for (j = 0; j < mlen; j++) - m[j] = mod[mod[0] - j]; - - /* Allocate n of size mlen, copy base to n */ - n = snewn(mlen, BignumInt); - i = mlen - base[0]; - for (j = 0; j < i; j++) - n[j] = 0; - for (j = 0; j < (int)base[0]; j++) - n[i + j] = base[base[0] - j]; - - /* Allocate a and b of size 2*mlen. Set a = 1 */ - a = snewn(2 * mlen, BignumInt); - b = snewn(2 * mlen, BignumInt); - for (i = 0; i < 2 * mlen; i++) - a[i] = 0; - a[2 * mlen - 1] = 1; - - /* Scratch space for multiplies */ - scratchlen = mul_compute_scratch(mlen); - scratch = snewn(scratchlen, BignumInt); - - /* Skip leading zero bits of exp. */ - i = 0; - j = BIGNUM_INT_BITS-1; - while (i < (int)exp[0] && (exp[exp[0] - i] & ((BignumInt)1 << j)) == 0) { - j--; - if (j < 0) { - i++; - j = BIGNUM_INT_BITS-1; - } - } - - /* Compute reciprocal of the top full word of the modulus */ - { - BignumInt m0 = m[0]; - rshift = bn_clz(m0); - if (rshift) { - m0 <<= rshift; - if (mlen > 1) - m0 |= m[1] >> (BIGNUM_INT_BITS - rshift); - } - recip = reciprocal_word(m0); - } - - /* Main computation */ - while (i < (int)exp[0]) { - while (j >= 0) { - internal_mul(a + mlen, a + mlen, b, mlen, scratch); - internal_mod(b, mlen * 2, m, mlen, NULL, recip, rshift); - if ((exp[exp[0] - i] & ((BignumInt)1 << j)) != 0) { - internal_mul(b + mlen, n, a, mlen, scratch); - internal_mod(a, mlen * 2, m, mlen, NULL, recip, rshift); - } else { - BignumInt *t; - t = a; - a = b; - b = t; - } - j--; - } - i++; - j = BIGNUM_INT_BITS-1; - } - - /* Copy result to buffer */ - result = newbn(mod[0]); - for (i = 0; i < mlen; i++) - result[result[0] - i] = a[i + mlen]; - while (result[0] > 1 && result[result[0]] == 0) - result[0]--; - - /* Free temporary arrays */ - smemclr(a, 2 * mlen * sizeof(*a)); - sfree(a); - smemclr(scratch, scratchlen * sizeof(*scratch)); - sfree(scratch); - smemclr(b, 2 * mlen * sizeof(*b)); - sfree(b); - smemclr(m, mlen * sizeof(*m)); - sfree(m); - smemclr(n, mlen * sizeof(*n)); - sfree(n); - - freebn(base); - - return result; -} - -/* - * Compute (base ^ exp) % mod. Uses the Montgomery multiplication - * technique where possible, falling back to modpow_simple otherwise. - */ -Bignum modpow(Bignum base_in, Bignum exp, Bignum mod) -{ - BignumInt *a, *b, *x, *n, *mninv, *scratch; - int len, scratchlen, i, j; - Bignum base, base2, r, rn, inv, result; - - /* - * The most significant word of mod needs to be non-zero. It - * should already be, but let's make sure. - */ - assert(mod[mod[0]] != 0); - - /* - * mod had better be odd, or we can't do Montgomery multiplication - * using a power of two at all. - */ - if (!(mod[1] & 1)) - return modpow_simple(base_in, exp, mod); - - /* - * Make sure the base is smaller than the modulus, by reducing - * it modulo the modulus if not. - */ - base = bigmod(base_in, mod); - - /* - * Compute the inverse of n mod r, for monty_reduce. (In fact we - * want the inverse of _minus_ n mod r, but we'll sort that out - * below.) - */ - len = mod[0]; - r = bn_power_2(BIGNUM_INT_BITS * len); - inv = modinv(mod, r); - assert(inv); /* cannot fail, since mod is odd and r is a power of 2 */ - - /* - * Multiply the base by r mod n, to get it into Montgomery - * representation. - */ - base2 = modmul(base, r, mod); - freebn(base); - base = base2; - - rn = bigmod(r, mod); /* r mod n, i.e. Montgomerified 1 */ - - freebn(r); /* won't need this any more */ - - /* - * Set up internal arrays of the right lengths, in big-endian - * format, containing the base, the modulus, and the modulus's - * inverse. - */ - n = snewn(len, BignumInt); - for (j = 0; j < len; j++) - n[len - 1 - j] = mod[j + 1]; - - mninv = snewn(len, BignumInt); - for (j = 0; j < len; j++) - mninv[len - 1 - j] = (j < (int)inv[0] ? inv[j + 1] : 0); - freebn(inv); /* we don't need this copy of it any more */ - /* Now negate mninv mod r, so it's the inverse of -n rather than +n. */ - x = snewn(len, BignumInt); - for (j = 0; j < len; j++) - x[j] = 0; - internal_sub(x, mninv, mninv, len); - - /* x = snewn(len, BignumInt); */ /* already done above */ - for (j = 0; j < len; j++) - x[len - 1 - j] = (j < (int)base[0] ? base[j + 1] : 0); - freebn(base); /* we don't need this copy of it any more */ - - a = snewn(2*len, BignumInt); - b = snewn(2*len, BignumInt); - for (j = 0; j < len; j++) - a[2*len - 1 - j] = (j < (int)rn[0] ? rn[j + 1] : 0); - freebn(rn); - - /* Scratch space for multiplies */ - scratchlen = 3*len + mul_compute_scratch(len); - scratch = snewn(scratchlen, BignumInt); - - /* Skip leading zero bits of exp. */ - i = 0; - j = BIGNUM_INT_BITS-1; - while (i < (int)exp[0] && (exp[exp[0] - i] & ((BignumInt)1 << j)) == 0) { - j--; - if (j < 0) { - i++; - j = BIGNUM_INT_BITS-1; - } - } - - /* Main computation */ - while (i < (int)exp[0]) { - while (j >= 0) { - internal_mul(a + len, a + len, b, len, scratch); - monty_reduce(b, n, mninv, scratch, len); - if ((exp[exp[0] - i] & ((BignumInt)1 << j)) != 0) { - internal_mul(b + len, x, a, len, scratch); - monty_reduce(a, n, mninv, scratch, len); - } else { - BignumInt *t; - t = a; - a = b; - b = t; - } - j--; - } - i++; - j = BIGNUM_INT_BITS-1; - } - - /* - * Final monty_reduce to get back from the adjusted Montgomery - * representation. - */ - monty_reduce(a, n, mninv, scratch, len); - - /* Copy result to buffer */ - result = newbn(mod[0]); - for (i = 0; i < len; i++) - result[result[0] - i] = a[i + len]; - while (result[0] > 1 && result[result[0]] == 0) - result[0]--; - - /* Free temporary arrays */ - smemclr(scratch, scratchlen * sizeof(*scratch)); - sfree(scratch); - smemclr(a, 2 * len * sizeof(*a)); - sfree(a); - smemclr(b, 2 * len * sizeof(*b)); - sfree(b); - smemclr(mninv, len * sizeof(*mninv)); - sfree(mninv); - smemclr(n, len * sizeof(*n)); - sfree(n); - smemclr(x, len * sizeof(*x)); - sfree(x); - - return result; -} - -/* - * Compute (p * q) % mod. - * The most significant word of mod MUST be non-zero. - * We assume that the result array is the same size as the mod array. - */ -Bignum modmul(Bignum p, Bignum q, Bignum mod) -{ - BignumInt *a, *n, *m, *o, *scratch; - BignumInt recip; - int rshift, scratchlen; - int pqlen, mlen, rlen, i, j; - Bignum result; - - /* - * The most significant word of mod needs to be non-zero. It - * should already be, but let's make sure. - */ - assert(mod[mod[0]] != 0); - - /* Allocate m of size mlen, copy mod to m */ - /* We use big endian internally */ - mlen = mod[0]; - m = snewn(mlen, BignumInt); - for (j = 0; j < mlen; j++) - m[j] = mod[mod[0] - j]; - - pqlen = (p[0] > q[0] ? p[0] : q[0]); - - /* - * Make sure that we're allowing enough space. The shifting below - * will underflow the vectors we allocate if pqlen is too small. - */ - if (2*pqlen <= mlen) - pqlen = mlen/2 + 1; - - /* Allocate n of size pqlen, copy p to n */ - n = snewn(pqlen, BignumInt); - i = pqlen - p[0]; - for (j = 0; j < i; j++) - n[j] = 0; - for (j = 0; j < (int)p[0]; j++) - n[i + j] = p[p[0] - j]; - - /* Allocate o of size pqlen, copy q to o */ - o = snewn(pqlen, BignumInt); - i = pqlen - q[0]; - for (j = 0; j < i; j++) - o[j] = 0; - for (j = 0; j < (int)q[0]; j++) - o[i + j] = q[q[0] - j]; - - /* Allocate a of size 2*pqlen for result */ - a = snewn(2 * pqlen, BignumInt); - - /* Scratch space for multiplies */ - scratchlen = mul_compute_scratch(pqlen); - scratch = snewn(scratchlen, BignumInt); - - /* Compute reciprocal of the top full word of the modulus */ - { - BignumInt m0 = m[0]; - rshift = bn_clz(m0); - if (rshift) { - m0 <<= rshift; - if (mlen > 1) - m0 |= m[1] >> (BIGNUM_INT_BITS - rshift); - } - recip = reciprocal_word(m0); - } - - /* Main computation */ - internal_mul(n, o, a, pqlen, scratch); - internal_mod(a, pqlen * 2, m, mlen, NULL, recip, rshift); - - /* Copy result to buffer */ - rlen = (mlen < pqlen * 2 ? mlen : pqlen * 2); - result = newbn(rlen); - for (i = 0; i < rlen; i++) - result[result[0] - i] = a[i + 2 * pqlen - rlen]; - while (result[0] > 1 && result[result[0]] == 0) - result[0]--; - - /* Free temporary arrays */ - smemclr(scratch, scratchlen * sizeof(*scratch)); - sfree(scratch); - smemclr(a, 2 * pqlen * sizeof(*a)); - sfree(a); - smemclr(m, mlen * sizeof(*m)); - sfree(m); - smemclr(n, pqlen * sizeof(*n)); - sfree(n); - smemclr(o, pqlen * sizeof(*o)); - sfree(o); - - return result; -} - -Bignum modsub(const Bignum a, const Bignum b, const Bignum n) -{ - Bignum a1, b1, ret; - - if (bignum_cmp(a, n) >= 0) a1 = bigmod(a, n); - else a1 = a; - if (bignum_cmp(b, n) >= 0) b1 = bigmod(b, n); - else b1 = b; - - if (bignum_cmp(a1, b1) >= 0) /* a >= b */ - { - ret = bigsub(a1, b1); - } - else - { - /* Handle going round the corner of the modulus without having - * negative support in Bignum */ - Bignum tmp = bigsub(n, b1); - assert(tmp); - ret = bigadd(tmp, a1); - freebn(tmp); - } - - if (a != a1) freebn(a1); - if (b != b1) freebn(b1); - - return ret; -} - -/* - * Compute p % mod. - * The most significant word of mod MUST be non-zero. - * We assume that the result array is the same size as the mod array. - * We optionally write out a quotient if `quotient' is non-NULL. - * We can avoid writing out the result if `result' is NULL. - */ -static void bigdivmod(Bignum p, Bignum mod, Bignum result, Bignum quotient) -{ - BignumInt *n, *m; - BignumInt recip; - int rshift; - int plen, mlen, i, j; - - /* - * The most significant word of mod needs to be non-zero. It - * should already be, but let's make sure. - */ - assert(mod[mod[0]] != 0); - - /* Allocate m of size mlen, copy mod to m */ - /* We use big endian internally */ - mlen = mod[0]; - m = snewn(mlen, BignumInt); - for (j = 0; j < mlen; j++) - m[j] = mod[mod[0] - j]; - - plen = p[0]; - /* Ensure plen > mlen */ - if (plen <= mlen) - plen = mlen + 1; - - /* Allocate n of size plen, copy p to n */ - n = snewn(plen, BignumInt); - for (j = 0; j < plen; j++) - n[j] = 0; - for (j = 1; j <= (int)p[0]; j++) - n[plen - j] = p[j]; - - /* Compute reciprocal of the top full word of the modulus */ - { - BignumInt m0 = m[0]; - rshift = bn_clz(m0); - if (rshift) { - m0 <<= rshift; - if (mlen > 1) - m0 |= m[1] >> (BIGNUM_INT_BITS - rshift); - } - recip = reciprocal_word(m0); - } - - /* Main computation */ - internal_mod(n, plen, m, mlen, quotient, recip, rshift); - - /* Copy result to buffer */ - if (result) { - for (i = 1; i <= (int)result[0]; i++) { - int j = plen - i; - result[i] = j >= 0 ? n[j] : 0; - } - } - - /* Free temporary arrays */ - smemclr(m, mlen * sizeof(*m)); - sfree(m); - smemclr(n, plen * sizeof(*n)); - sfree(n); -} - -/* - * Decrement a number. - */ -void decbn(Bignum bn) -{ - int i = 1; - while (i < (int)bn[0] && bn[i] == 0) - bn[i++] = BIGNUM_INT_MASK; - bn[i]--; -} - -Bignum bignum_from_bytes(const void *vdata, int nbytes) -{ - const unsigned char *data = (const unsigned char *)vdata; - Bignum result; - int w, i; - - assert(nbytes >= 0 && nbytes < INT_MAX/8); - - w = (nbytes + BIGNUM_INT_BYTES - 1) / BIGNUM_INT_BYTES; /* bytes->words */ - - result = newbn(w); - for (i = 1; i <= w; i++) - result[i] = 0; - for (i = nbytes; i--;) { - unsigned char byte = *data++; - result[1 + i / BIGNUM_INT_BYTES] |= - (BignumInt)byte << (8*i % BIGNUM_INT_BITS); - } - - bn_restore_invariant(result); - return result; -} - -Bignum bignum_from_bytes_le(const void *vdata, int nbytes) -{ - const unsigned char *data = (const unsigned char *)vdata; - Bignum result; - int w, i; - - assert(nbytes >= 0 && nbytes < INT_MAX/8); - - w = (nbytes + BIGNUM_INT_BYTES - 1) / BIGNUM_INT_BYTES; /* bytes->words */ - - result = newbn(w); - for (i = 1; i <= w; i++) - result[i] = 0; - for (i = 0; i < nbytes; ++i) { - unsigned char byte = *data++; - result[1 + i / BIGNUM_INT_BYTES] |= - (BignumInt)byte << (8*i % BIGNUM_INT_BITS); - } - - bn_restore_invariant(result); - return result; -} - -Bignum bignum_from_decimal(const char *decimal) -{ - Bignum result = copybn(Zero); - - while (*decimal) { - Bignum tmp, tmp2; - - if (!isdigit((unsigned char)*decimal)) { - freebn(result); - return 0; - } - - tmp = bigmul(result, Ten); - tmp2 = bignum_from_long(*decimal - '0'); - freebn(result); - result = bigadd(tmp, tmp2); - freebn(tmp); - freebn(tmp2); - - decimal++; - } - - return result; -} - -Bignum bignum_random_in_range(const Bignum lower, const Bignum upper) -{ - Bignum ret = NULL; - unsigned char *bytes; - int upper_len = bignum_bitcount(upper); - int upper_bytes = upper_len / 8; - int upper_bits = upper_len % 8; - if (upper_bits) ++upper_bytes; - - bytes = snewn(upper_bytes, unsigned char); - do { - int i; - - if (ret) freebn(ret); - - for (i = 0; i < upper_bytes; ++i) - { - bytes[i] = (unsigned char)random_byte(); - } - /* Mask the top to reduce failure rate to 50/50 */ - if (upper_bits) - { - bytes[i - 1] &= 0xFF >> (8 - upper_bits); - } - - ret = bignum_from_bytes(bytes, upper_bytes); - } while (bignum_cmp(ret, lower) < 0 || bignum_cmp(ret, upper) > 0); - smemclr(bytes, upper_bytes); - sfree(bytes); - - return ret; -} - -/* - * Return the bit count of a bignum. - */ -int bignum_bitcount(Bignum bn) -{ - int bitcount = bn[0] * BIGNUM_INT_BITS - 1; - while (bitcount >= 0 - && (bn[bitcount / BIGNUM_INT_BITS + 1] >> (bitcount % BIGNUM_INT_BITS)) == 0) bitcount--; - return bitcount + 1; -} - -/* - * Return a byte from a bignum; 0 is least significant, etc. - */ -int bignum_byte(Bignum bn, int i) -{ - if (i < 0 || i >= (int)(BIGNUM_INT_BYTES * bn[0])) - return 0; /* beyond the end */ - else - return (bn[i / BIGNUM_INT_BYTES + 1] >> - ((i % BIGNUM_INT_BYTES)*8)) & 0xFF; -} - -/* - * Return a bit from a bignum; 0 is least significant, etc. - */ -int bignum_bit(Bignum bn, int i) -{ - if (i < 0 || i >= (int)(BIGNUM_INT_BITS * bn[0])) - return 0; /* beyond the end */ - else - return (bn[i / BIGNUM_INT_BITS + 1] >> (i % BIGNUM_INT_BITS)) & 1; -} - -/* - * Set a bit in a bignum; 0 is least significant, etc. - */ -void bignum_set_bit(Bignum bn, int bitnum, int value) -{ - if (bitnum < 0 || bitnum >= (int)(BIGNUM_INT_BITS * bn[0])) { - if (value) abort(); /* beyond the end */ - } else { - int v = bitnum / BIGNUM_INT_BITS + 1; - BignumInt mask = (BignumInt)1 << (bitnum % BIGNUM_INT_BITS); - if (value) - bn[v] |= mask; - else - bn[v] &= ~mask; - } -} - -void BinarySink_put_mp_ssh1(BinarySink *bs, Bignum bn) -{ - int bits = bignum_bitcount(bn); - int bytes = (bits + 7) / 8; - int i; - - put_uint16(bs, bits); - for (i = bytes; i--;) - put_byte(bs, bignum_byte(bn, i)); -} - -void BinarySink_put_mp_ssh2(BinarySink *bs, Bignum bn) -{ - int bytes = (bignum_bitcount(bn) + 8) / 8; - int i; - - put_uint32(bs, bytes); - for (i = bytes; i--;) - put_byte(bs, bignum_byte(bn, i)); -} - -Bignum BinarySource_get_mp_ssh1(BinarySource *src) -{ - unsigned bitc = get_uint16(src); - ptrlen bytes = get_data(src, (bitc + 7) / 8); - if (get_err(src)) { - return bignum_from_long(0); - } else { - Bignum toret = bignum_from_bytes(bytes.ptr, bytes.len); - /* SSH-1.5 spec says that it's OK for the prefix uint16 to be - * _greater_ than the actual number of bits */ - if (bignum_bitcount(toret) > bitc) { - src->err = BSE_INVALID; - freebn(toret); - toret = bignum_from_long(0); - } - return toret; - } -} - -Bignum BinarySource_get_mp_ssh2(BinarySource *src) -{ - ptrlen bytes = get_string(src); - if (get_err(src)) { - return bignum_from_long(0); - } else { - const unsigned char *p = bytes.ptr; - if ((bytes.len > 0 && - ((p[0] & 0x80) || - (p[0] == 0 && (bytes.len <= 1 || !(p[1] & 0x80)))))) { - src->err = BSE_INVALID; - return bignum_from_long(0); - } - return bignum_from_bytes(bytes.ptr, bytes.len); - } -} - -/* - * Compare two bignums. Returns like strcmp. - */ -int bignum_cmp(Bignum a, Bignum b) -{ - int amax = a[0], bmax = b[0]; - int i; - - /* Annoyingly we have two representations of zero */ - if (amax == 1 && a[amax] == 0) - amax = 0; - if (bmax == 1 && b[bmax] == 0) - bmax = 0; - - assert(amax == 0 || a[amax] != 0); - assert(bmax == 0 || b[bmax] != 0); - - i = (amax > bmax ? amax : bmax); - while (i) { - BignumInt aval = (i > amax ? 0 : a[i]); - BignumInt bval = (i > bmax ? 0 : b[i]); - if (aval < bval) - return -1; - if (aval > bval) - return +1; - i--; - } - return 0; -} - -/* - * Right-shift one bignum to form another. - */ -Bignum bignum_rshift(Bignum a, int shift) -{ - Bignum ret; - int i, shiftw, shiftb, shiftbb, bits; - BignumInt ai, ai1; - - assert(shift >= 0); - - bits = bignum_bitcount(a) - shift; - ret = newbn((bits + BIGNUM_INT_BITS - 1) / BIGNUM_INT_BITS); - - if (ret) { - shiftw = shift / BIGNUM_INT_BITS; - shiftb = shift % BIGNUM_INT_BITS; - shiftbb = BIGNUM_INT_BITS - shiftb; - - ai1 = a[shiftw + 1]; - for (i = 1; i <= (int)ret[0]; i++) { - ai = ai1; - ai1 = (i + shiftw + 1 <= (int)a[0] ? a[i + shiftw + 1] : 0); - ret[i] = ((ai >> shiftb) | (ai1 << shiftbb)) & BIGNUM_INT_MASK; - } - } - - return ret; -} - -/* - * Left-shift one bignum to form another. - */ -Bignum bignum_lshift(Bignum a, int shift) -{ - Bignum ret; - int bits, shiftWords, shiftBits; - - assert(shift >= 0); - - bits = bignum_bitcount(a) + shift; - ret = newbn((bits + BIGNUM_INT_BITS - 1) / BIGNUM_INT_BITS); - - shiftWords = shift / BIGNUM_INT_BITS; - shiftBits = shift % BIGNUM_INT_BITS; - - if (shiftBits == 0) - { - memcpy(&ret[1 + shiftWords], &a[1], sizeof(BignumInt) * a[0]); - } - else - { - int i; - BignumInt carry = 0; - - /* Remember that Bignum[0] is length, so add 1 */ - for (i = shiftWords + 1; i < ((int)a[0]) + shiftWords + 1; ++i) - { - BignumInt from = a[i - shiftWords]; - ret[i] = (from << shiftBits) | carry; - carry = from >> (BIGNUM_INT_BITS - shiftBits); - } - if (carry) ret[i] = carry; - } - - return ret; -} - -/* - * Non-modular multiplication and addition. - */ -Bignum bigmuladd(Bignum a, Bignum b, Bignum addend) -{ - int alen = a[0], blen = b[0]; - int mlen = (alen > blen ? alen : blen); - int rlen, i, maxspot; - int wslen; - BignumInt *workspace; - Bignum ret; - - /* mlen space for a, mlen space for b, 2*mlen for result, - * plus scratch space for multiplication */ - wslen = mlen * 4 + mul_compute_scratch(mlen); - workspace = snewn(wslen, BignumInt); - for (i = 0; i < mlen; i++) { - workspace[0 * mlen + i] = (mlen - i <= (int)a[0] ? a[mlen - i] : 0); - workspace[1 * mlen + i] = (mlen - i <= (int)b[0] ? b[mlen - i] : 0); - } - - internal_mul(workspace + 0 * mlen, workspace + 1 * mlen, - workspace + 2 * mlen, mlen, workspace + 4 * mlen); - - /* now just copy the result back */ - rlen = alen + blen + 1; - if (addend && rlen <= (int)addend[0]) - rlen = addend[0] + 1; - ret = newbn(rlen); - maxspot = 0; - for (i = 1; i <= (int)ret[0]; i++) { - ret[i] = (i <= 2 * mlen ? workspace[4 * mlen - i] : 0); - if (ret[i] != 0) - maxspot = i; - } - ret[0] = maxspot; - - /* now add in the addend, if any */ - if (addend) { - BignumCarry carry = 0; - for (i = 1; i <= rlen; i++) { - BignumInt retword = (i <= (int)ret[0] ? ret[i] : 0); - BignumInt addword = (i <= (int)addend[0] ? addend[i] : 0); - BignumADC(ret[i], carry, retword, addword, carry); - if (ret[i] != 0 && i > maxspot) - maxspot = i; - } - } - ret[0] = maxspot; - - smemclr(workspace, wslen * sizeof(*workspace)); - sfree(workspace); - return ret; -} - -/* - * Non-modular multiplication. - */ -Bignum bigmul(Bignum a, Bignum b) -{ - return bigmuladd(a, b, NULL); -} - -/* - * Simple addition. - */ -Bignum bigadd(Bignum a, Bignum b) -{ - int alen = a[0], blen = b[0]; - int rlen = (alen > blen ? alen : blen) + 1; - int i, maxspot; - Bignum ret; - BignumCarry carry; - - ret = newbn(rlen); - - carry = 0; - maxspot = 0; - for (i = 1; i <= rlen; i++) { - BignumInt aword = (i <= (int)a[0] ? a[i] : 0); - BignumInt bword = (i <= (int)b[0] ? b[i] : 0); - BignumADC(ret[i], carry, aword, bword, carry); - if (ret[i] != 0 && i > maxspot) - maxspot = i; - } - ret[0] = maxspot; - - return ret; -} - -/* - * Subtraction. Returns a-b, or NULL if the result would come out - * negative (recall that this entire bignum module only handles - * positive numbers). - */ -Bignum bigsub(Bignum a, Bignum b) -{ - int alen = a[0], blen = b[0]; - int rlen = (alen > blen ? alen : blen); - int i, maxspot; - Bignum ret; - BignumCarry carry; - - ret = newbn(rlen); - - carry = 1; - maxspot = 0; - for (i = 1; i <= rlen; i++) { - BignumInt aword = (i <= (int)a[0] ? a[i] : 0); - BignumInt bword = (i <= (int)b[0] ? b[i] : 0); - BignumADC(ret[i], carry, aword, ~bword, carry); - if (ret[i] != 0 && i > maxspot) - maxspot = i; - } - ret[0] = maxspot; - - if (!carry) { - freebn(ret); - return NULL; - } - - return ret; -} - -/* - * Create a bignum which is the bitmask covering another one. That - * is, the smallest integer which is >= N and is also one less than - * a power of two. - */ -Bignum bignum_bitmask(Bignum n) -{ - Bignum ret = copybn(n); - int i; - BignumInt j; - - i = ret[0]; - while (n[i] == 0 && i > 0) - i--; - if (i <= 0) - return ret; /* input was zero */ - j = 1; - while (j < n[i]) - j = 2 * j + 1; - ret[i] = j; - while (--i > 0) - ret[i] = BIGNUM_INT_MASK; - return ret; -} - -/* - * Convert an unsigned long into a bignum. - */ -Bignum bignum_from_long(unsigned long n) -{ - const int maxwords = - (sizeof(unsigned long) + sizeof(BignumInt) - 1) / sizeof(BignumInt); - Bignum ret; - int i; - - ret = newbn(maxwords); - ret[0] = 0; - for (i = 0; i < maxwords; i++) { - ret[i+1] = n >> (i * BIGNUM_INT_BITS); - if (ret[i+1] != 0) - ret[0] = i+1; - } - - return ret; -} - -/* - * Add a long to a bignum. - */ -Bignum bignum_add_long(Bignum number, unsigned long n) -{ - const int maxwords = - (sizeof(unsigned long) + sizeof(BignumInt) - 1) / sizeof(BignumInt); - Bignum ret; - int words, i; - BignumCarry carry; - - words = number[0]; - if (words < maxwords) - words = maxwords; - words++; - ret = newbn(words); - - carry = 0; - ret[0] = 0; - for (i = 0; i < words; i++) { - BignumInt nword = (i < maxwords ? n >> (i * BIGNUM_INT_BITS) : 0); - BignumInt numword = (i < number[0] ? number[i+1] : 0); - BignumADC(ret[i+1], carry, numword, nword, carry); - if (ret[i+1] != 0) - ret[0] = i+1; - } - return ret; -} - -/* - * Compute the residue of a bignum, modulo a (max 16-bit) short. - */ -unsigned short bignum_mod_short(Bignum number, unsigned short modulus) -{ - unsigned long mod = modulus, r = 0; - /* Precompute (BIGNUM_INT_MASK+1) % mod */ - unsigned long base_r = (BIGNUM_INT_MASK - modulus + 1) % mod; - int i; - - for (i = number[0]; i > 0; i--) { - /* - * Conceptually, ((r << BIGNUM_INT_BITS) + number[i]) % mod - */ - r = ((r * base_r) + (number[i] % mod)) % mod; - } - return (unsigned short) r; -} - -#ifdef DEBUG -void diagbn(char *prefix, Bignum md) -{ - int i, nibbles, morenibbles; - static const char hex[] = "0123456789ABCDEF"; - - debug("%s0x", prefix ? prefix : ""); - - nibbles = (3 + bignum_bitcount(md)) / 4; - if (nibbles < 1) - nibbles = 1; - morenibbles = 4 * md[0] - nibbles; - for (i = 0; i < morenibbles; i++) - debug("-"); - for (i = nibbles; i--;) - debug("%c", hex[(bignum_byte(md, i / 2) >> (4 * (i % 2))) & 0xF]); - - if (prefix) - debug("\n"); -} -#endif - -/* - * Simple division. - */ -Bignum bigdiv(Bignum a, Bignum b) -{ - Bignum q = newbn(a[0]); - bigdivmod(a, b, NULL, q); - while (q[0] > 1 && q[q[0]] == 0) - q[0]--; - return q; -} - -/* - * Simple remainder. - */ -Bignum bigmod(Bignum a, Bignum b) -{ - Bignum r = newbn(b[0]); - bigdivmod(a, b, r, NULL); - while (r[0] > 1 && r[r[0]] == 0) - r[0]--; - return r; -} - -/* - * Greatest common divisor. - */ -Bignum biggcd(Bignum av, Bignum bv) -{ - Bignum a = copybn(av); - Bignum b = copybn(bv); - - while (bignum_cmp(b, Zero) != 0) { - Bignum t = newbn(b[0]); - bigdivmod(a, b, t, NULL); - while (t[0] > 1 && t[t[0]] == 0) - t[0]--; - freebn(a); - a = b; - b = t; - } - - freebn(b); - return a; -} - -/* - * Modular inverse, using Euclid's extended algorithm. - */ -Bignum modinv(Bignum number, Bignum modulus) -{ - Bignum a = copybn(modulus); - Bignum b = copybn(number); - Bignum xp = copybn(Zero); - Bignum x = copybn(One); - int sign = +1; - - assert(number[number[0]] != 0); - assert(modulus[modulus[0]] != 0); - - while (bignum_cmp(b, One) != 0) { - Bignum t, q; - - if (bignum_cmp(b, Zero) == 0) { - /* - * Found a common factor between the inputs, so we cannot - * return a modular inverse at all. - */ - freebn(b); - freebn(a); - freebn(xp); - freebn(x); - return NULL; - } - - t = newbn(b[0]); - q = newbn(a[0]); - bigdivmod(a, b, t, q); - while (t[0] > 1 && t[t[0]] == 0) - t[0]--; - while (q[0] > 1 && q[q[0]] == 0) - q[0]--; - freebn(a); - a = b; - b = t; - t = xp; - xp = x; - x = bigmuladd(q, xp, t); - sign = -sign; - freebn(t); - freebn(q); - } - - freebn(b); - freebn(a); - freebn(xp); - - /* now we know that sign * x == 1, and that x < modulus */ - if (sign < 0) { - /* set a new x to be modulus - x */ - Bignum newx = newbn(modulus[0]); - BignumInt carry = 0; - int maxspot = 1; - int i; - - for (i = 1; i <= (int)newx[0]; i++) { - BignumInt aword = (i <= (int)modulus[0] ? modulus[i] : 0); - BignumInt bword = (i <= (int)x[0] ? x[i] : 0); - newx[i] = aword - bword - carry; - bword = ~bword; - carry = carry ? (newx[i] >= bword) : (newx[i] > bword); - if (newx[i] != 0) - maxspot = i; - } - newx[0] = maxspot; - freebn(x); - x = newx; - } - - /* and return. */ - return x; -} - -/* - * Render a bignum into decimal. Return a malloced string holding - * the decimal representation. - */ -char *bignum_decimal(Bignum x) -{ - int ndigits, ndigit; - int i; - bool iszero; - BignumInt carry; - char *ret; - BignumInt *workspace; - - /* - * First, estimate the number of digits. Since log(10)/log(2) - * is just greater than 93/28 (the joys of continued fraction - * approximations...) we know that for every 93 bits, we need - * at most 28 digits. This will tell us how much to malloc. - * - * Formally: if x has i bits, that means x is strictly less - * than 2^i. Since 2 is less than 10^(28/93), this is less than - * 10^(28i/93). We need an integer power of ten, so we must - * round up (rounding down might make it less than x again). - * Therefore if we multiply the bit count by 28/93, rounding - * up, we will have enough digits. - * - * i=0 (i.e., x=0) is an irritating special case. - */ - i = bignum_bitcount(x); - if (!i) - ndigits = 1; /* x = 0 */ - else - ndigits = (28 * i + 92) / 93; /* multiply by 28/93 and round up */ - ndigits++; /* allow for trailing \0 */ - ret = snewn(ndigits, char); - - /* - * Now allocate some workspace to hold the binary form as we - * repeatedly divide it by ten. Initialise this to the - * big-endian form of the number. - */ - workspace = snewn(x[0], BignumInt); - for (i = 0; i < (int)x[0]; i++) - workspace[i] = x[x[0] - i]; - - /* - * Next, write the decimal number starting with the last digit. - * We use ordinary short division, dividing 10 into the - * workspace. - */ - ndigit = ndigits - 1; - ret[ndigit] = '\0'; - do { - iszero = true; - carry = 0; - for (i = 0; i < (int)x[0]; i++) { - /* - * Conceptually, we want to compute - * - * (carry << BIGNUM_INT_BITS) + workspace[i] - * ----------------------------------------- - * 10 - * - * but we don't have an integer type longer than BignumInt - * to work with. So we have to do it in pieces. - */ - - BignumInt q, r; - q = workspace[i] / 10; - r = workspace[i] % 10; - - /* I want (BIGNUM_INT_MASK+1)/10 but can't say so directly! */ - q += carry * ((BIGNUM_INT_MASK-9) / 10 + 1); - r += carry * ((BIGNUM_INT_MASK-9) % 10); - - q += r / 10; - r %= 10; - - workspace[i] = q; - carry = r; - - if (workspace[i]) - iszero = false; - } - ret[--ndigit] = (char) (carry + '0'); - } while (!iszero); - - /* - * There's a chance we've fallen short of the start of the - * string. Correct if so. - */ - if (ndigit > 0) - memmove(ret, ret + ndigit, ndigits - ndigit); - - /* - * Done. - */ - smemclr(workspace, x[0] * sizeof(*workspace)); - sfree(workspace); - return ret; -} diff --git a/sshccp.c b/sshccp.c index 176e6094..81ae45b6 100644 --- a/sshccp.c +++ b/sshccp.c @@ -30,7 +30,7 @@ */ #include "ssh.h" -#include "sshbn.h" +#include "mpint_i.h" #ifndef INLINE #define INLINE diff --git a/sshcommon.c b/sshcommon.c index 337c3293..c541e9d3 100644 --- a/sshcommon.c +++ b/sshcommon.c @@ -7,6 +7,7 @@ #include #include "putty.h" +#include "mpint.h" #include "ssh.h" #include "sshbpp.h" #include "sshppl.h" @@ -1008,13 +1009,12 @@ void ssh1_compute_session_id( struct RSAKey *hostkey, struct RSAKey *servkey) { struct MD5Context md5c; - int i; MD5Init(&md5c); - for (i = (bignum_bitcount(hostkey->modulus) + 7) / 8; i-- ;) - put_byte(&md5c, bignum_byte(hostkey->modulus, i)); - for (i = (bignum_bitcount(servkey->modulus) + 7) / 8; i-- ;) - put_byte(&md5c, bignum_byte(servkey->modulus, i)); + for (size_t i = (mp_get_nbits(hostkey->modulus) + 7) / 8; i-- ;) + put_byte(&md5c, mp_get_byte(hostkey->modulus, i)); + for (size_t i = (mp_get_nbits(servkey->modulus) + 7) / 8; i-- ;) + put_byte(&md5c, mp_get_byte(servkey->modulus, i)); put_data(&md5c, cookie, 8); MD5Final(session_id, &md5c); } diff --git a/sshdh.c b/sshdh.c index 1bd226a0..b7c4b136 100644 --- a/sshdh.c +++ b/sshdh.c @@ -2,61 +2,35 @@ * Diffie-Hellman implementation for PuTTY. */ +#include + #include "ssh.h" +#include "misc.h" +#include "mpint.h" -/* - * The primes used in the group1 and group14 key exchange. - */ -static const unsigned char P1[] = { - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xC9, 0x0F, 0xDA, 0xA2, - 0x21, 0x68, 0xC2, 0x34, 0xC4, 0xC6, 0x62, 0x8B, 0x80, 0xDC, 0x1C, 0xD1, - 0x29, 0x02, 0x4E, 0x08, 0x8A, 0x67, 0xCC, 0x74, 0x02, 0x0B, 0xBE, 0xA6, - 0x3B, 0x13, 0x9B, 0x22, 0x51, 0x4A, 0x08, 0x79, 0x8E, 0x34, 0x04, 0xDD, - 0xEF, 0x95, 0x19, 0xB3, 0xCD, 0x3A, 0x43, 0x1B, 0x30, 0x2B, 0x0A, 0x6D, - 0xF2, 0x5F, 0x14, 0x37, 0x4F, 0xE1, 0x35, 0x6D, 0x6D, 0x51, 0xC2, 0x45, - 0xE4, 0x85, 0xB5, 0x76, 0x62, 0x5E, 0x7E, 0xC6, 0xF4, 0x4C, 0x42, 0xE9, - 0xA6, 0x37, 0xED, 0x6B, 0x0B, 0xFF, 0x5C, 0xB6, 0xF4, 0x06, 0xB7, 0xED, - 0xEE, 0x38, 0x6B, 0xFB, 0x5A, 0x89, 0x9F, 0xA5, 0xAE, 0x9F, 0x24, 0x11, - 0x7C, 0x4B, 0x1F, 0xE6, 0x49, 0x28, 0x66, 0x51, 0xEC, 0xE6, 0x53, 0x81, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF +struct dh_ctx { + mp_int *x, *e, *p, *q, *g; }; -static const unsigned char P14[] = { - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xC9, 0x0F, 0xDA, 0xA2, - 0x21, 0x68, 0xC2, 0x34, 0xC4, 0xC6, 0x62, 0x8B, 0x80, 0xDC, 0x1C, 0xD1, - 0x29, 0x02, 0x4E, 0x08, 0x8A, 0x67, 0xCC, 0x74, 0x02, 0x0B, 0xBE, 0xA6, - 0x3B, 0x13, 0x9B, 0x22, 0x51, 0x4A, 0x08, 0x79, 0x8E, 0x34, 0x04, 0xDD, - 0xEF, 0x95, 0x19, 0xB3, 0xCD, 0x3A, 0x43, 0x1B, 0x30, 0x2B, 0x0A, 0x6D, - 0xF2, 0x5F, 0x14, 0x37, 0x4F, 0xE1, 0x35, 0x6D, 0x6D, 0x51, 0xC2, 0x45, - 0xE4, 0x85, 0xB5, 0x76, 0x62, 0x5E, 0x7E, 0xC6, 0xF4, 0x4C, 0x42, 0xE9, - 0xA6, 0x37, 0xED, 0x6B, 0x0B, 0xFF, 0x5C, 0xB6, 0xF4, 0x06, 0xB7, 0xED, - 0xEE, 0x38, 0x6B, 0xFB, 0x5A, 0x89, 0x9F, 0xA5, 0xAE, 0x9F, 0x24, 0x11, - 0x7C, 0x4B, 0x1F, 0xE6, 0x49, 0x28, 0x66, 0x51, 0xEC, 0xE4, 0x5B, 0x3D, - 0xC2, 0x00, 0x7C, 0xB8, 0xA1, 0x63, 0xBF, 0x05, 0x98, 0xDA, 0x48, 0x36, - 0x1C, 0x55, 0xD3, 0x9A, 0x69, 0x16, 0x3F, 0xA8, 0xFD, 0x24, 0xCF, 0x5F, - 0x83, 0x65, 0x5D, 0x23, 0xDC, 0xA3, 0xAD, 0x96, 0x1C, 0x62, 0xF3, 0x56, - 0x20, 0x85, 0x52, 0xBB, 0x9E, 0xD5, 0x29, 0x07, 0x70, 0x96, 0x96, 0x6D, - 0x67, 0x0C, 0x35, 0x4E, 0x4A, 0xBC, 0x98, 0x04, 0xF1, 0x74, 0x6C, 0x08, - 0xCA, 0x18, 0x21, 0x7C, 0x32, 0x90, 0x5E, 0x46, 0x2E, 0x36, 0xCE, 0x3B, - 0xE3, 0x9E, 0x77, 0x2C, 0x18, 0x0E, 0x86, 0x03, 0x9B, 0x27, 0x83, 0xA2, - 0xEC, 0x07, 0xA2, 0x8F, 0xB5, 0xC5, 0x5D, 0xF0, 0x6F, 0x4C, 0x52, 0xC9, - 0xDE, 0x2B, 0xCB, 0xF6, 0x95, 0x58, 0x17, 0x18, 0x39, 0x95, 0x49, 0x7C, - 0xEA, 0x95, 0x6A, 0xE5, 0x15, 0xD2, 0x26, 0x18, 0x98, 0xFA, 0x05, 0x10, - 0x15, 0x72, 0x8E, 0x5A, 0x8A, 0xAC, 0xAA, 0x68, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF -}; - -/* - * The generator g = 2 (used for both group1 and group14). - */ -static const unsigned char G[] = { 2 }; struct dh_extra { - const unsigned char *pdata, *gdata; /* NULL means group exchange */ - int plen, glen; + bool gex; + void (*construct)(struct dh_ctx *ctx); }; +static void dh_group1_construct(struct dh_ctx *ctx) +{ + ctx->p = MP_LITERAL(0xFFFFFFFFFFFFFFFFC90FDAA22168C234C4C6628B80DC1CD129024E088A67CC74020BBEA63B139B22514A08798E3404DDEF9519B3CD3A431B302B0A6DF25F14374FE1356D6D51C245E485B576625E7EC6F44C42E9A637ED6B0BFF5CB6F406B7EDEE386BFB5A899FA5AE9F24117C4B1FE649286651ECE65381FFFFFFFFFFFFFFFF); + ctx->g = mp_from_integer(2); +} + +static void dh_group14_construct(struct dh_ctx *ctx) +{ + ctx->p = MP_LITERAL(0xFFFFFFFFFFFFFFFFC90FDAA22168C234C4C6628B80DC1CD129024E088A67CC74020BBEA63B139B22514A08798E3404DDEF9519B3CD3A431B302B0A6DF25F14374FE1356D6D51C245E485B576625E7EC6F44C42E9A637ED6B0BFF5CB6F406B7EDEE386BFB5A899FA5AE9F24117C4B1FE649286651ECE45B3DC2007CB8A163BF0598DA48361C55D39A69163FA8FD24CF5F83655D23DCA3AD961C62F356208552BB9ED529077096966D670C354E4ABC9804F1746C08CA18217C32905E462E36CE3BE39E772C180E86039B2783A2EC07A28FB5C55DF06F4C52C9DE2BCBF6955817183995497CEA956AE515D2261898FA051015728E5A8AACAA68FFFFFFFFFFFFFFFF); + ctx->g = mp_from_integer(2); +} + static const struct dh_extra extra_group1 = { - P1, G, lenof(P1), lenof(G), + false, dh_group1_construct, }; static const struct ssh_kex ssh_diffiehellman_group1_sha1 = { @@ -74,7 +48,7 @@ const struct ssh_kexes ssh_diffiehellman_group1 = { }; static const struct dh_extra extra_group14 = { - P14, G, lenof(P14), lenof(G), + false, dh_group14_construct, }; static const struct ssh_kex ssh_diffiehellman_group14_sha256 = { @@ -97,9 +71,7 @@ const struct ssh_kexes ssh_diffiehellman_group14 = { group14_list }; -static const struct dh_extra extra_gex = { - NULL, NULL, 0, 0, -}; +static const struct dh_extra extra_gex = { true }; static const struct ssh_kex ssh_diffiehellman_gex_sha256 = { "diffie-hellman-group-exchange-sha256", NULL, @@ -161,27 +133,19 @@ const struct ssh_kexes ssh_gssk5_sha1_kex = { gssk5_sha1_kex_list }; -/* - * Variables. - */ -struct dh_ctx { - Bignum x, e, p, q, qmask, g; -}; - /* * Common DH initialisation. */ static void dh_init(struct dh_ctx *ctx) { - ctx->q = bignum_rshift(ctx->p, 1); - ctx->qmask = bignum_bitmask(ctx->q); + ctx->q = mp_rshift_fixed(ctx->p, 1); ctx->x = ctx->e = NULL; } bool dh_is_gex(const struct ssh_kex *kex) { const struct dh_extra *extra = (const struct dh_extra *)kex->extra; - return extra->pdata == NULL; + return extra->gex; } /* @@ -190,9 +154,9 @@ bool dh_is_gex(const struct ssh_kex *kex) struct dh_ctx *dh_setup_group(const struct ssh_kex *kex) { const struct dh_extra *extra = (const struct dh_extra *)kex->extra; + assert(!extra->gex); struct dh_ctx *ctx = snew(struct dh_ctx); - ctx->p = bignum_from_bytes(extra->pdata, extra->plen); - ctx->g = bignum_from_bytes(extra->gdata, extra->glen); + extra->construct(ctx); dh_init(ctx); return ctx; } @@ -200,11 +164,11 @@ struct dh_ctx *dh_setup_group(const struct ssh_kex *kex) /* * Initialise DH for a server-supplied group. */ -struct dh_ctx *dh_setup_gex(Bignum pval, Bignum gval) +struct dh_ctx *dh_setup_gex(mp_int *pval, mp_int *gval) { struct dh_ctx *ctx = snew(struct dh_ctx); - ctx->p = copybn(pval); - ctx->g = copybn(gval); + ctx->p = mp_copy(pval); + ctx->g = mp_copy(gval); dh_init(ctx); return ctx; } @@ -214,7 +178,7 @@ struct dh_ctx *dh_setup_gex(Bignum pval, Bignum gval) */ int dh_modulus_bit_size(const struct dh_ctx *ctx) { - return bignum_bitcount(ctx->p); + return mp_get_nbits(ctx->p); } /* @@ -222,12 +186,11 @@ int dh_modulus_bit_size(const struct dh_ctx *ctx) */ void dh_cleanup(struct dh_ctx *ctx) { - freebn(ctx->x); - freebn(ctx->e); - freebn(ctx->p); - freebn(ctx->g); - freebn(ctx->q); - freebn(ctx->qmask); + mp_free(ctx->x); + mp_free(ctx->e); + mp_free(ctx->p); + mp_free(ctx->g); + mp_free(ctx->q); sfree(ctx); } @@ -246,49 +209,36 @@ void dh_cleanup(struct dh_ctx *ctx) * Advances in Cryptology: Proceedings of Eurocrypt '96 * Springer-Verlag, May 1996. */ -Bignum dh_create_e(struct dh_ctx *ctx, int nbits) +mp_int *dh_create_e(struct dh_ctx *ctx, int nbits) { - int i; - - int nbytes; - unsigned char *buf; - - nbytes = (bignum_bitcount(ctx->qmask) + 7) / 8; - buf = snewn(nbytes, unsigned char); - - do { - /* - * Create a potential x, by ANDing a string of random bytes - * with qmask. - */ - if (ctx->x) - freebn(ctx->x); - if (nbits == 0 || nbits > bignum_bitcount(ctx->qmask)) { - for (i = 0; i < nbytes; i++) - buf[i] = bignum_byte(ctx->qmask, i) & random_byte(); - ctx->x = bignum_from_bytes(buf, nbytes); - } else { - int b, nb; - ctx->x = bn_power_2(nbits); - b = nb = 0; - for (i = 0; i < nbits; i++) { - if (nb == 0) { - nb = 8; - b = random_byte(); - } - bignum_set_bit(ctx->x, i, b & 1); - b >>= 1; - nb--; - } - } - } while (bignum_cmp(ctx->x, One) <= 0 || bignum_cmp(ctx->x, ctx->q) >= 0); - - sfree(buf); + /* + * Lower limit is just 2. + */ + mp_int *lo = mp_from_integer(2); /* - * Done. Now compute e = g^x mod p. + * Upper limit. */ - ctx->e = modpow(ctx->g, ctx->x, ctx->p); + mp_int *hi = mp_copy(ctx->q); + mp_sub_integer_into(hi, hi, 1); + if (nbits) { + mp_int *pow2 = mp_power_2(nbits+1); + mp_min_into(pow2, pow2, hi); + mp_free(hi); + hi = pow2; + } + + /* + * Make a random number in that range. + */ + ctx->x = mp_random_in_range(lo, hi); + mp_free(lo); + mp_free(hi); + + /* + * Now compute e = g^x mod p. + */ + ctx->e = mp_modpow(ctx->g, ctx->x, ctx->p); return ctx->e; } @@ -301,15 +251,16 @@ Bignum dh_create_e(struct dh_ctx *ctx, int nbits) * they lead to obviously weak keys that even a passive eavesdropper * can figure out.) */ -const char *dh_validate_f(struct dh_ctx *ctx, Bignum f) +const char *dh_validate_f(struct dh_ctx *ctx, mp_int *f) { - if (bignum_cmp(f, One) <= 0) { + if (!mp_hs_integer(f, 2)) { return "f value received is too small"; } else { - Bignum pm1 = bigsub(ctx->p, One); - int cmp = bignum_cmp(f, pm1); - freebn(pm1); - if (cmp >= 0) + mp_int *pm1 = mp_copy(ctx->p); + mp_sub_integer_into(pm1, pm1, 1); + unsigned cmp = mp_cmp_hs(f, pm1); + mp_free(pm1); + if (cmp) return "f value received is too large"; } return NULL; @@ -318,9 +269,7 @@ const char *dh_validate_f(struct dh_ctx *ctx, Bignum f) /* * DH stage 2: given a number f, compute K = f^x mod p. */ -Bignum dh_find_K(struct dh_ctx *ctx, Bignum f) +mp_int *dh_find_K(struct dh_ctx *ctx, mp_int *f) { - Bignum ret; - ret = modpow(f, ctx->x, ctx->p); - return ret; + return mp_modpow(f, ctx->x, ctx->p); } diff --git a/sshdss.c b/sshdss.c index cac40af7..21bb7f82 100644 --- a/sshdss.c +++ b/sshdss.c @@ -7,6 +7,7 @@ #include #include "ssh.h" +#include "mpint.h" #include "misc.h" static void dss_freekey(ssh_key *key); /* forward reference */ @@ -29,7 +30,7 @@ static ssh_key *dss_new_pub(const ssh_keyalg *self, ptrlen data) dss->x = NULL; if (get_err(src) || - !bignum_cmp(dss->q, Zero) || !bignum_cmp(dss->p, Zero)) { + mp_eq_integer(dss->p, 0) || mp_eq_integer(dss->q, 0)) { /* Invalid key. */ dss_freekey(&dss->sshk); return NULL; @@ -42,29 +43,28 @@ static void dss_freekey(ssh_key *key) { struct dss_key *dss = container_of(key, struct dss_key, sshk); if (dss->p) - freebn(dss->p); + mp_free(dss->p); if (dss->q) - freebn(dss->q); + mp_free(dss->q); if (dss->g) - freebn(dss->g); + mp_free(dss->g); if (dss->y) - freebn(dss->y); + mp_free(dss->y); if (dss->x) - freebn(dss->x); + mp_free(dss->x); sfree(dss); } -static void append_hex_to_strbuf(strbuf *sb, Bignum *x) +static void append_hex_to_strbuf(strbuf *sb, mp_int *x) { if (sb->len > 0) put_byte(sb, ','); put_data(sb, "0x", 2); - int nibbles = (3 + bignum_bitcount(x)) / 4; - if (nibbles < 1) - nibbles = 1; - static const char hex[] = "0123456789abcdef"; - for (int i = nibbles; i--;) - put_byte(sb, hex[(bignum_byte(x, i / 2) >> (4 * (i % 2))) & 0xF]); + char *hex = mp_get_hex(x); + size_t hexlen = strlen(hex); + put_data(sb, hex, hexlen); + smemclr(hex, hexlen); + sfree(hex); } static char *dss_cache_str(ssh_key *key) @@ -88,7 +88,6 @@ static bool dss_verify(ssh_key *key, ptrlen sig, ptrlen data) struct dss_key *dss = container_of(key, struct dss_key, sshk); BinarySource src[1]; unsigned char hash[20]; - Bignum r, s, w, gu1p, yu2p, gu1yu2p, u1, u2, sha, v; bool toret; if (!dss->p) @@ -117,29 +116,29 @@ static bool dss_verify(ssh_key *key, ptrlen sig, ptrlen data) } /* Now we're sitting on a 40-byte string for sure. */ - r = bignum_from_bytes(sig.ptr, 20); - s = bignum_from_bytes((const char *)sig.ptr + 20, 20); + mp_int *r = mp_from_bytes_be(make_ptrlen(sig.ptr, 20)); + mp_int *s = mp_from_bytes_be(make_ptrlen((const char *)sig.ptr + 20, 20)); if (!r || !s) { if (r) - freebn(r); + mp_free(r); if (s) - freebn(s); + mp_free(s); return false; } - if (!bignum_cmp(s, Zero)) { - freebn(r); - freebn(s); + if (mp_eq_integer(s, 0)) { + mp_free(r); + mp_free(s); return false; } /* * Step 1. w <- s^-1 mod q. */ - w = modinv(s, dss->q); + mp_int *w = mp_invert(s, dss->q); if (!w) { - freebn(r); - freebn(s); + mp_free(r); + mp_free(s); return false; } @@ -147,38 +146,38 @@ static bool dss_verify(ssh_key *key, ptrlen sig, ptrlen data) * Step 2. u1 <- SHA(message) * w mod q. */ SHA_Simple(data.ptr, data.len, hash); - sha = bignum_from_bytes(hash, 20); - u1 = modmul(sha, w, dss->q); + mp_int *sha = mp_from_bytes_be(make_ptrlen(hash, 20)); + mp_int *u1 = mp_modmul(sha, w, dss->q); /* * Step 3. u2 <- r * w mod q. */ - u2 = modmul(r, w, dss->q); + mp_int *u2 = mp_modmul(r, w, dss->q); /* * Step 4. v <- (g^u1 * y^u2 mod p) mod q. */ - gu1p = modpow(dss->g, u1, dss->p); - yu2p = modpow(dss->y, u2, dss->p); - gu1yu2p = modmul(gu1p, yu2p, dss->p); - v = modmul(gu1yu2p, One, dss->q); + mp_int *gu1p = mp_modpow(dss->g, u1, dss->p); + mp_int *yu2p = mp_modpow(dss->y, u2, dss->p); + mp_int *gu1yu2p = mp_modmul(gu1p, yu2p, dss->p); + mp_int *v = mp_mod(gu1yu2p, dss->q); /* * Step 5. v should now be equal to r. */ - toret = !bignum_cmp(v, r); + toret = mp_cmp_eq(v, r); - freebn(w); - freebn(sha); - freebn(u1); - freebn(u2); - freebn(gu1p); - freebn(yu2p); - freebn(gu1yu2p); - freebn(v); - freebn(r); - freebn(s); + mp_free(w); + mp_free(sha); + mp_free(u1); + mp_free(u2); + mp_free(gu1p); + mp_free(yu2p); + mp_free(gu1yu2p); + mp_free(v); + mp_free(r); + mp_free(s); return toret; } @@ -209,7 +208,7 @@ static ssh_key *dss_new_priv(const ssh_keyalg *self, ptrlen pub, ptrlen priv) ptrlen hash; SHA_State s; unsigned char digest[20]; - Bignum ytest; + mp_int *ytest; sshk = dss_new_pub(self, pub); if (!sshk) @@ -233,7 +232,7 @@ static ssh_key *dss_new_priv(const ssh_keyalg *self, ptrlen pub, ptrlen priv) put_mp_ssh2(&s, dss->q); put_mp_ssh2(&s, dss->g); SHA_Final(&s, digest); - if (0 != memcmp(hash.ptr, digest, 20)) { + if (!smemeq(hash.ptr, digest, 20)) { dss_freekey(&dss->sshk); return NULL; } @@ -242,13 +241,13 @@ static ssh_key *dss_new_priv(const ssh_keyalg *self, ptrlen pub, ptrlen priv) /* * Now ensure g^x mod p really is y. */ - ytest = modpow(dss->g, dss->x, dss->p); - if (0 != bignum_cmp(ytest, dss->y)) { + ytest = mp_modpow(dss->g, dss->x, dss->p); + if (!mp_cmp_eq(ytest, dss->y)) { + mp_free(ytest); dss_freekey(&dss->sshk); - freebn(ytest); return NULL; } - freebn(ytest); + mp_free(ytest); return &dss->sshk; } @@ -268,7 +267,7 @@ static ssh_key *dss_new_priv_openssh(const ssh_keyalg *self, dss->x = get_mp_ssh2(src); if (get_err(src) || - !bignum_cmp(dss->q, Zero) || !bignum_cmp(dss->p, Zero)) { + mp_eq_integer(dss->q, 0) || mp_eq_integer(dss->p, 0)) { /* Invalid key. */ dss_freekey(&dss->sshk); return NULL; @@ -299,14 +298,15 @@ static int dss_pubkey_bits(const ssh_keyalg *self, ptrlen pub) return -1; dss = container_of(sshk, struct dss_key, sshk); - ret = bignum_bitcount(dss->p); + ret = mp_get_nbits(dss->p); dss_freekey(&dss->sshk); return ret; } -Bignum *dss_gen_k(const char *id_string, Bignum modulus, Bignum private_key, - unsigned char *digest, int digest_len) +mp_int *dss_gen_k(const char *id_string, mp_int *modulus, + mp_int *private_key, + unsigned char *digest, int digest_len) { /* * The basic DSS signing algorithm is: @@ -381,7 +381,6 @@ Bignum *dss_gen_k(const char *id_string, Bignum modulus, Bignum private_key, */ SHA512_State ss; unsigned char digest512[64]; - Bignum proto_k, k; /* * Hash some identifying text plus x. @@ -397,72 +396,63 @@ Bignum *dss_gen_k(const char *id_string, Bignum modulus, Bignum private_key, SHA512_Init(&ss); put_data(&ss, digest512, sizeof(digest512)); put_data(&ss, digest, digest_len); + SHA512_Final(&ss, digest512); - while (1) { - SHA512_State ss2 = ss; /* structure copy */ - SHA512_Final(&ss2, digest512); + /* + * Now convert the result into a bignum, and coerce it to the + * range [2,q), which we do by reducing it mod q-2 and adding 2. + */ + mp_int *modminus2 = mp_copy(modulus); + mp_sub_integer_into(modminus2, modminus2, 2); + mp_int *proto_k = mp_from_bytes_be(make_ptrlen(digest512, 64)); + mp_int *k = mp_mod(proto_k, modminus2); + mp_free(proto_k); + mp_free(modminus2); + mp_add_integer_into(k, k, 2); - smemclr(&ss2, sizeof(ss2)); + smemclr(&ss, sizeof(ss)); + smemclr(digest512, sizeof(digest512)); - /* - * Now convert the result into a bignum, and reduce it mod q. - */ - proto_k = bignum_from_bytes(digest512, 64); - k = bigmod(proto_k, modulus); - freebn(proto_k); - - if (bignum_cmp(k, One) != 0 && bignum_cmp(k, Zero) != 0) { - smemclr(&ss, sizeof(ss)); - smemclr(digest512, sizeof(digest512)); - return k; - } - - /* Very unlikely we get here, but if so, k was unsuitable. */ - freebn(k); - /* Perturb the hash to think of a different k. */ - put_byte(&ss, 'x'); - /* Go round and try again. */ - } + return k; } static void dss_sign(ssh_key *key, const void *data, int datalen, unsigned flags, BinarySink *bs) { struct dss_key *dss = container_of(key, struct dss_key, sshk); - Bignum k, gkp, hash, kinv, hxr, r, s; unsigned char digest[20]; int i; SHA_Simple(data, datalen, digest); - k = dss_gen_k("DSA deterministic k generator", dss->q, dss->x, - digest, sizeof(digest)); - kinv = modinv(k, dss->q); /* k^-1 mod q */ - assert(kinv); + mp_int *k = dss_gen_k("DSA deterministic k generator", dss->q, dss->x, + digest, sizeof(digest)); + mp_int *kinv = mp_invert(k, dss->q); /* k^-1 mod q */ /* * Now we have k, so just go ahead and compute the signature. */ - gkp = modpow(dss->g, k, dss->p); /* g^k mod p */ - r = bigmod(gkp, dss->q); /* r = (g^k mod p) mod q */ - freebn(gkp); + mp_int *gkp = mp_modpow(dss->g, k, dss->p); /* g^k mod p */ + mp_int *r = mp_mod(gkp, dss->q); /* r = (g^k mod p) mod q */ + mp_free(gkp); - hash = bignum_from_bytes(digest, 20); - hxr = bigmuladd(dss->x, r, hash); /* hash + x*r */ - s = modmul(kinv, hxr, dss->q); /* s = k^-1 * (hash + x*r) mod q */ - freebn(hxr); - freebn(kinv); - freebn(k); - freebn(hash); + mp_int *hash = mp_from_bytes_be(make_ptrlen(digest, 20)); + mp_int *hxr = mp_mul(dss->x, r); + mp_add_into(hxr, hxr, hash); /* hash + x*r */ + mp_int *s = mp_modmul(kinv, hxr, dss->q); /* s = k^-1 * (hash+x*r) mod q */ + mp_free(hxr); + mp_free(kinv); + mp_free(k); + mp_free(hash); put_stringz(bs, "ssh-dss"); put_uint32(bs, 40); for (i = 0; i < 20; i++) - put_byte(bs, bignum_byte(r, 19 - i)); + put_byte(bs, mp_get_byte(r, 19 - i)); for (i = 0; i < 20; i++) - put_byte(bs, bignum_byte(s, 19 - i)); - freebn(r); - freebn(s); + put_byte(bs, mp_get_byte(s, 19 - i)); + mp_free(r); + mp_free(s); } const ssh_keyalg ssh_dss = { diff --git a/sshdssg.c b/sshdssg.c index f6905b8d..cece7b0f 100644 --- a/sshdssg.c +++ b/sshdssg.c @@ -4,16 +4,11 @@ #include "misc.h" #include "ssh.h" +#include "mpint.h" int dsa_generate(struct dss_key *key, int bits, progfn_t pfn, void *pfnparam) { - Bignum qm1, power, g, h, tmp; - unsigned pfirst, qfirst; - int progress; - - key->sshk.vt = &ssh_dss; - /* * Set up the phase limits for the progress report. We do this * by passing minus the phase number. @@ -59,30 +54,19 @@ int dsa_generate(struct dss_key *key, int bits, progfn_t pfn, pfn(pfnparam, PROGFN_PHASE_EXTENT, 3, 0x2000); pfn(pfnparam, PROGFN_EXP_PHASE, 3, -32768); - /* - * In phase four we are finding an element x between 1 and q-1 - * (exclusive), by inventing 160 random bits and hoping they - * come out to a plausible number; so assuming q is uniformly - * distributed between 2^159 and 2^160, the chance of any given - * attempt succeeding is somewhere between 0.5 and 1. Lacking - * the energy to arrange to be able to specify this probability - * _after_ generating q, we'll just set it to 0.75. - */ - pfn(pfnparam, PROGFN_PHASE_EXTENT, 4, 0x2000); - pfn(pfnparam, PROGFN_EXP_PHASE, 4, -49152); - pfn(pfnparam, PROGFN_READY, 0, 0); + unsigned pfirst, qfirst; invent_firstbits(&pfirst, &qfirst); /* * Generate q: a prime of length 160. */ - key->q = primegen(160, 2, 2, NULL, 1, pfn, pfnparam, qfirst); + mp_int *q = primegen(160, 2, 2, NULL, 1, pfn, pfnparam, qfirst); /* * Now generate p: a prime of length `bits', such that p-1 is * divisible by q. */ - key->p = primegen(bits-160, 2, 2, key->q, 2, pfn, pfnparam, pfirst); + mp_int *p = primegen(bits-160, 2, 2, q, 2, pfn, pfnparam, pfirst); /* * Next we need g. Raise 2 to the power (p-1)/q modulo p, and @@ -90,58 +74,40 @@ int dsa_generate(struct dss_key *key, int bits, progfn_t pfn, * soon as we hit a non-unit (and non-zero!) one, that'll do * for g. */ - power = bigdiv(key->p, key->q); /* this is floor(p/q) == (p-1)/q */ - h = bignum_from_long(1); - progress = 0; + mp_int *power = mp_div(p, q); /* this is floor(p/q) == (p-1)/q */ + mp_int *h = mp_from_integer(1); + int progress = 0; + mp_int *g; while (1) { pfn(pfnparam, PROGFN_PROGRESS, 3, ++progress); - g = modpow(h, power, key->p); - if (bignum_cmp(g, One) > 0) + g = mp_modpow(h, power, p); + if (mp_hs_integer(g, 2)) break; /* got one */ - tmp = h; - h = bignum_add_long(h, 1); - freebn(tmp); + mp_free(g); + mp_add_integer_into(h, h, 1); } - key->g = g; - freebn(h); + mp_free(h); + mp_free(power); /* * Now we're nearly done. All we need now is our private key x, * which should be a number between 1 and q-1 exclusive, and * our public key y = g^x mod p. */ - qm1 = copybn(key->q); - decbn(qm1); - progress = 0; - while (1) { - int i, v, byte, bitsleft; - Bignum x; + mp_int *two = mp_from_integer(2); + mp_int *qm1 = mp_copy(q); + mp_sub_integer_into(qm1, qm1, 1); + mp_int *x = mp_random_in_range(two, qm1); + mp_free(two); + mp_free(qm1); - pfn(pfnparam, PROGFN_PROGRESS, 4, ++progress); - x = bn_power_2(159); - byte = 0; - bitsleft = 0; + key->sshk.vt = &ssh_dss; - for (i = 0; i < 160; i++) { - if (bitsleft <= 0) - bitsleft = 8, byte = random_byte(); - v = byte & 1; - byte >>= 1; - bitsleft--; - bignum_set_bit(x, i, v); - } - - if (bignum_cmp(x, One) <= 0 || bignum_cmp(x, qm1) >= 0) { - freebn(x); - continue; - } else { - key->x = x; - break; - } - } - freebn(qm1); - - key->y = modpow(key->g, key->x, key->p); + key->p = p; + key->q = q; + key->g = g; + key->x = x; + key->y = mp_modpow(key->g, key->x, key->p); return 1; } diff --git a/sshecc.c b/sshecc.c index 02271969..4f0b9217 100644 --- a/sshecc.c +++ b/sshecc.c @@ -36,84 +36,55 @@ #include #include "ssh.h" +#include "mpint.h" +#include "ecc.h" /* ---------------------------------------------------------------------- * Elliptic curve definitions */ -static void initialise_wcurve(struct ec_curve *curve, int bits, - const unsigned char *p, - const unsigned char *a, const unsigned char *b, - const unsigned char *n, const unsigned char *Gx, - const unsigned char *Gy) +static void initialise_common( + struct ec_curve *curve, EllipticCurveType type, mp_int *p) { - int length = bits / 8; - if (bits % 8) ++length; - - curve->type = EC_WEIERSTRASS; - - curve->fieldBits = bits; - curve->p = bignum_from_bytes(p, length); - - /* Curve co-efficients */ - curve->w.a = bignum_from_bytes(a, length); - curve->w.b = bignum_from_bytes(b, length); - - /* Group order and generator */ - curve->w.n = bignum_from_bytes(n, length); - curve->w.G.x = bignum_from_bytes(Gx, length); - curve->w.G.y = bignum_from_bytes(Gy, length); - curve->w.G.curve = curve; - curve->w.G.infinity = false; + curve->type = type; + curve->p = mp_copy(p); + curve->fieldBits = mp_get_nbits(p); + curve->fieldBytes = (curve->fieldBits + 7) / 8; } -static void initialise_mcurve(struct ec_curve *curve, int bits, - const unsigned char *p, - const unsigned char *a, const unsigned char *b, - const unsigned char *Gx) +static void initialise_wcurve( + struct ec_curve *curve, mp_int *p, mp_int *a, mp_int *b, + mp_int *nonsquare, mp_int *G_x, mp_int *G_y, mp_int *G_order) { - int length = bits / 8; - if (bits % 8) ++length; + initialise_common(curve, EC_WEIERSTRASS, p); - curve->type = EC_MONTGOMERY; + curve->w.wc = ecc_weierstrass_curve(p, a, b, nonsquare); - curve->fieldBits = bits; - curve->p = bignum_from_bytes(p, length); - - /* Curve co-efficients */ - curve->m.a = bignum_from_bytes(a, length); - curve->m.b = bignum_from_bytes(b, length); - - /* Generator */ - curve->m.G.x = bignum_from_bytes(Gx, length); - curve->m.G.y = NULL; - curve->m.G.z = NULL; - curve->m.G.curve = curve; - curve->m.G.infinity = false; + curve->w.G = ecc_weierstrass_point_new(curve->w.wc, G_x, G_y); + curve->w.G_order = mp_copy(G_order); } -static void initialise_ecurve(struct ec_curve *curve, int bits, - const unsigned char *p, - const unsigned char *l, const unsigned char *d, - const unsigned char *Bx, const unsigned char *By) +static void initialise_mcurve( + struct ec_curve *curve, mp_int *p, mp_int *a, mp_int *b, + mp_int *G_x) { - int length = bits / 8; - if (bits % 8) ++length; + initialise_common(curve, EC_MONTGOMERY, p); - curve->type = EC_EDWARDS; + curve->m.mc = ecc_montgomery_curve(p, a, b); - curve->fieldBits = bits; - curve->p = bignum_from_bytes(p, length); + curve->m.G = ecc_montgomery_point_new(curve->m.mc, G_x); +} - /* Curve co-efficients */ - curve->e.l = bignum_from_bytes(l, length); - curve->e.d = bignum_from_bytes(d, length); +static void initialise_ecurve( + struct ec_curve *curve, mp_int *p, mp_int *d, mp_int *a, + mp_int *nonsquare, mp_int *G_x, mp_int *G_y, mp_int *G_order) +{ + initialise_common(curve, EC_EDWARDS, p); - /* Group order and generator */ - curve->e.B.x = bignum_from_bytes(Bx, length); - curve->e.B.y = bignum_from_bytes(By, length); - curve->e.B.curve = curve; - curve->e.B.infinity = false; + curve->e.ec = ecc_edwards_curve(p, d, a, nonsquare); + + curve->e.G = ecc_edwards_point_new(curve->e.ec, G_x, G_y); + curve->e.G_order = mp_copy(G_order); } static struct ec_curve *ec_p256(void) @@ -123,44 +94,22 @@ static struct ec_curve *ec_p256(void) if (!initialised) { - static const unsigned char p[] = { - 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff - }; - static const unsigned char a[] = { - 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc - }; - static const unsigned char b[] = { - 0x5a, 0xc6, 0x35, 0xd8, 0xaa, 0x3a, 0x93, 0xe7, - 0xb3, 0xeb, 0xbd, 0x55, 0x76, 0x98, 0x86, 0xbc, - 0x65, 0x1d, 0x06, 0xb0, 0xcc, 0x53, 0xb0, 0xf6, - 0x3b, 0xce, 0x3c, 0x3e, 0x27, 0xd2, 0x60, 0x4b - }; - static const unsigned char n[] = { - 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xbc, 0xe6, 0xfa, 0xad, 0xa7, 0x17, 0x9e, 0x84, - 0xf3, 0xb9, 0xca, 0xc2, 0xfc, 0x63, 0x25, 0x51 - }; - static const unsigned char Gx[] = { - 0x6b, 0x17, 0xd1, 0xf2, 0xe1, 0x2c, 0x42, 0x47, - 0xf8, 0xbc, 0xe6, 0xe5, 0x63, 0xa4, 0x40, 0xf2, - 0x77, 0x03, 0x7d, 0x81, 0x2d, 0xeb, 0x33, 0xa0, - 0xf4, 0xa1, 0x39, 0x45, 0xd8, 0x98, 0xc2, 0x96 - }; - static const unsigned char Gy[] = { - 0x4f, 0xe3, 0x42, 0xe2, 0xfe, 0x1a, 0x7f, 0x9b, - 0x8e, 0xe7, 0xeb, 0x4a, 0x7c, 0x0f, 0x9e, 0x16, - 0x2b, 0xce, 0x33, 0x57, 0x6b, 0x31, 0x5e, 0xce, - 0xcb, 0xb6, 0x40, 0x68, 0x37, 0xbf, 0x51, 0xf5 - }; + mp_int *p = MP_LITERAL(0xffffffff00000001000000000000000000000000ffffffffffffffffffffffff); + mp_int *a = MP_LITERAL(0xffffffff00000001000000000000000000000000fffffffffffffffffffffffc); + mp_int *b = MP_LITERAL(0x5ac635d8aa3a93e7b3ebbd55769886bc651d06b0cc53b0f63bce3c3e27d2604b); + mp_int *G_x = MP_LITERAL(0x6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296); + mp_int *G_y = MP_LITERAL(0x4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5); + mp_int *G_order = MP_LITERAL(0xffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632551); + mp_int *nonsquare_mod_p = mp_from_integer(3); + initialise_wcurve(&curve, p, a, b, nonsquare_mod_p, G_x, G_y, G_order); + mp_free(p); + mp_free(a); + mp_free(b); + mp_free(G_x); + mp_free(G_y); + mp_free(G_order); + mp_free(nonsquare_mod_p); - initialise_wcurve(&curve, 256, p, a, b, n, Gx, Gy); curve.textname = curve.name = "nistp256"; /* Now initialised, no need to do it again */ @@ -177,56 +126,22 @@ static struct ec_curve *ec_p384(void) if (!initialised) { - static const unsigned char p[] = { - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe, - 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff - }; - static const unsigned char a[] = { - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe, - 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xfc - }; - static const unsigned char b[] = { - 0xb3, 0x31, 0x2f, 0xa7, 0xe2, 0x3e, 0xe7, 0xe4, - 0x98, 0x8e, 0x05, 0x6b, 0xe3, 0xf8, 0x2d, 0x19, - 0x18, 0x1d, 0x9c, 0x6e, 0xfe, 0x81, 0x41, 0x12, - 0x03, 0x14, 0x08, 0x8f, 0x50, 0x13, 0x87, 0x5a, - 0xc6, 0x56, 0x39, 0x8d, 0x8a, 0x2e, 0xd1, 0x9d, - 0x2a, 0x85, 0xc8, 0xed, 0xd3, 0xec, 0x2a, 0xef - }; - static const unsigned char n[] = { - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xc7, 0x63, 0x4d, 0x81, 0xf4, 0x37, 0x2d, 0xdf, - 0x58, 0x1a, 0x0d, 0xb2, 0x48, 0xb0, 0xa7, 0x7a, - 0xec, 0xec, 0x19, 0x6a, 0xcc, 0xc5, 0x29, 0x73 - }; - static const unsigned char Gx[] = { - 0xaa, 0x87, 0xca, 0x22, 0xbe, 0x8b, 0x05, 0x37, - 0x8e, 0xb1, 0xc7, 0x1e, 0xf3, 0x20, 0xad, 0x74, - 0x6e, 0x1d, 0x3b, 0x62, 0x8b, 0xa7, 0x9b, 0x98, - 0x59, 0xf7, 0x41, 0xe0, 0x82, 0x54, 0x2a, 0x38, - 0x55, 0x02, 0xf2, 0x5d, 0xbf, 0x55, 0x29, 0x6c, - 0x3a, 0x54, 0x5e, 0x38, 0x72, 0x76, 0x0a, 0xb7 - }; - static const unsigned char Gy[] = { - 0x36, 0x17, 0xde, 0x4a, 0x96, 0x26, 0x2c, 0x6f, - 0x5d, 0x9e, 0x98, 0xbf, 0x92, 0x92, 0xdc, 0x29, - 0xf8, 0xf4, 0x1d, 0xbd, 0x28, 0x9a, 0x14, 0x7c, - 0xe9, 0xda, 0x31, 0x13, 0xb5, 0xf0, 0xb8, 0xc0, - 0x0a, 0x60, 0xb1, 0xce, 0x1d, 0x7e, 0x81, 0x9d, - 0x7a, 0x43, 0x1d, 0x7c, 0x90, 0xea, 0x0e, 0x5f - }; + mp_int *p = MP_LITERAL(0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000ffffffff); + mp_int *a = MP_LITERAL(0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000fffffffc); + mp_int *b = MP_LITERAL(0xb3312fa7e23ee7e4988e056be3f82d19181d9c6efe8141120314088f5013875ac656398d8a2ed19d2a85c8edd3ec2aef); + mp_int *G_x = MP_LITERAL(0xaa87ca22be8b05378eb1c71ef320ad746e1d3b628ba79b9859f741e082542a385502f25dbf55296c3a545e3872760ab7); + mp_int *G_y = MP_LITERAL(0x3617de4a96262c6f5d9e98bf9292dc29f8f41dbd289a147ce9da3113b5f0b8c00a60b1ce1d7e819d7a431d7c90ea0e5f); + mp_int *G_order = MP_LITERAL(0xffffffffffffffffffffffffffffffffffffffffffffffffc7634d81f4372ddf581a0db248b0a77aecec196accc52973); + mp_int *nonsquare_mod_p = mp_from_integer(19); + initialise_wcurve(&curve, p, a, b, nonsquare_mod_p, G_x, G_y, G_order); + mp_free(p); + mp_free(a); + mp_free(b); + mp_free(G_x); + mp_free(G_y); + mp_free(G_order); + mp_free(nonsquare_mod_p); - initialise_wcurve(&curve, 384, p, a, b, n, Gx, Gy); curve.textname = curve.name = "nistp384"; /* Now initialised, no need to do it again */ @@ -243,74 +158,22 @@ static struct ec_curve *ec_p521(void) if (!initialised) { - static const unsigned char p[] = { - 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff - }; - static const unsigned char a[] = { - 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xfc - }; - static const unsigned char b[] = { - 0x00, 0x51, 0x95, 0x3e, 0xb9, 0x61, 0x8e, 0x1c, - 0x9a, 0x1f, 0x92, 0x9a, 0x21, 0xa0, 0xb6, 0x85, - 0x40, 0xee, 0xa2, 0xda, 0x72, 0x5b, 0x99, 0xb3, - 0x15, 0xf3, 0xb8, 0xb4, 0x89, 0x91, 0x8e, 0xf1, - 0x09, 0xe1, 0x56, 0x19, 0x39, 0x51, 0xec, 0x7e, - 0x93, 0x7b, 0x16, 0x52, 0xc0, 0xbd, 0x3b, 0xb1, - 0xbf, 0x07, 0x35, 0x73, 0xdf, 0x88, 0x3d, 0x2c, - 0x34, 0xf1, 0xef, 0x45, 0x1f, 0xd4, 0x6b, 0x50, - 0x3f, 0x00 - }; - static const unsigned char n[] = { - 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xfa, 0x51, 0x86, 0x87, 0x83, 0xbf, 0x2f, - 0x96, 0x6b, 0x7f, 0xcc, 0x01, 0x48, 0xf7, 0x09, - 0xa5, 0xd0, 0x3b, 0xb5, 0xc9, 0xb8, 0x89, 0x9c, - 0x47, 0xae, 0xbb, 0x6f, 0xb7, 0x1e, 0x91, 0x38, - 0x64, 0x09 - }; - static const unsigned char Gx[] = { - 0x00, 0xc6, 0x85, 0x8e, 0x06, 0xb7, 0x04, 0x04, - 0xe9, 0xcd, 0x9e, 0x3e, 0xcb, 0x66, 0x23, 0x95, - 0xb4, 0x42, 0x9c, 0x64, 0x81, 0x39, 0x05, 0x3f, - 0xb5, 0x21, 0xf8, 0x28, 0xaf, 0x60, 0x6b, 0x4d, - 0x3d, 0xba, 0xa1, 0x4b, 0x5e, 0x77, 0xef, 0xe7, - 0x59, 0x28, 0xfe, 0x1d, 0xc1, 0x27, 0xa2, 0xff, - 0xa8, 0xde, 0x33, 0x48, 0xb3, 0xc1, 0x85, 0x6a, - 0x42, 0x9b, 0xf9, 0x7e, 0x7e, 0x31, 0xc2, 0xe5, - 0xbd, 0x66 - }; - static const unsigned char Gy[] = { - 0x01, 0x18, 0x39, 0x29, 0x6a, 0x78, 0x9a, 0x3b, - 0xc0, 0x04, 0x5c, 0x8a, 0x5f, 0xb4, 0x2c, 0x7d, - 0x1b, 0xd9, 0x98, 0xf5, 0x44, 0x49, 0x57, 0x9b, - 0x44, 0x68, 0x17, 0xaf, 0xbd, 0x17, 0x27, 0x3e, - 0x66, 0x2c, 0x97, 0xee, 0x72, 0x99, 0x5e, 0xf4, - 0x26, 0x40, 0xc5, 0x50, 0xb9, 0x01, 0x3f, 0xad, - 0x07, 0x61, 0x35, 0x3c, 0x70, 0x86, 0xa2, 0x72, - 0xc2, 0x40, 0x88, 0xbe, 0x94, 0x76, 0x9f, 0xd1, - 0x66, 0x50 - }; + mp_int *p = MP_LITERAL(0x01ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff); + mp_int *a = MP_LITERAL(0x01fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffc); + mp_int *b = MP_LITERAL(0x0051953eb9618e1c9a1f929a21a0b68540eea2da725b99b315f3b8b489918ef109e156193951ec7e937b1652c0bd3bb1bf073573df883d2c34f1ef451fd46b503f00); + mp_int *G_x = MP_LITERAL(0x00c6858e06b70404e9cd9e3ecb662395b4429c648139053fb521f828af606b4d3dbaa14b5e77efe75928fe1dc127a2ffa8de3348b3c1856a429bf97e7e31c2e5bd66); + mp_int *G_y = MP_LITERAL(0x011839296a789a3bc0045c8a5fb42c7d1bd998f54449579b446817afbd17273e662c97ee72995ef42640c550b9013fad0761353c7086a272c24088be94769fd16650); + mp_int *G_order = MP_LITERAL(0x01fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffa51868783bf2f966b7fcc0148f709a5d03bb5c9b8899c47aebb6fb71e91386409); + mp_int *nonsquare_mod_p = mp_from_integer(3); + initialise_wcurve(&curve, p, a, b, nonsquare_mod_p, G_x, G_y, G_order); + mp_free(p); + mp_free(a); + mp_free(b); + mp_free(G_x); + mp_free(G_y); + mp_free(G_order); + mp_free(nonsquare_mod_p); - initialise_wcurve(&curve, 521, p, a, b, n, Gx, Gy); curve.textname = curve.name = "nistp521"; /* Now initialised, no need to do it again */ @@ -327,32 +190,16 @@ static struct ec_curve *ec_curve25519(void) if (!initialised) { - static const unsigned char p[] = { - 0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xed - }; - static const unsigned char a[] = { - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x6d, 0x06 - }; - static const unsigned char b[] = { - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 - }; - static const unsigned char gx[32] = { - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09 - }; + mp_int *p = MP_LITERAL(0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffed); + mp_int *a = MP_LITERAL(0x0000000000000000000000000000000000000000000000000000000000076d06); + mp_int *b = MP_LITERAL(0x0000000000000000000000000000000000000000000000000000000000000001); + mp_int *G_x = MP_LITERAL(0x0000000000000000000000000000000000000000000000000000000000000009); + initialise_mcurve(&curve, p, a, b, G_x); + mp_free(p); + mp_free(a); + mp_free(b); + mp_free(G_x); - initialise_mcurve(&curve, 256, p, a, b, gx); /* This curve doesn't need a name, because it's never used in * any format that embeds the curve name */ curve.name = NULL; @@ -372,42 +219,26 @@ static struct ec_curve *ec_ed25519(void) if (!initialised) { - static const unsigned char q[] = { - 0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xed - }; - static const unsigned char l[32] = { - 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x14, 0xde, 0xf9, 0xde, 0xa2, 0xf7, 0x9c, 0xd6, - 0x58, 0x12, 0x63, 0x1a, 0x5c, 0xf5, 0xd3, 0xed - }; - static const unsigned char d[32] = { - 0x52, 0x03, 0x6c, 0xee, 0x2b, 0x6f, 0xfe, 0x73, - 0x8c, 0xc7, 0x40, 0x79, 0x77, 0x79, 0xe8, 0x98, - 0x00, 0x70, 0x0a, 0x4d, 0x41, 0x41, 0xd8, 0xab, - 0x75, 0xeb, 0x4d, 0xca, 0x13, 0x59, 0x78, 0xa3 - }; - static const unsigned char Bx[32] = { - 0x21, 0x69, 0x36, 0xd3, 0xcd, 0x6e, 0x53, 0xfe, - 0xc0, 0xa4, 0xe2, 0x31, 0xfd, 0xd6, 0xdc, 0x5c, - 0x69, 0x2c, 0xc7, 0x60, 0x95, 0x25, 0xa7, 0xb2, - 0xc9, 0x56, 0x2d, 0x60, 0x8f, 0x25, 0xd5, 0x1a - }; - static const unsigned char By[32] = { - 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, - 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, - 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, - 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x58 - }; + mp_int *p = MP_LITERAL(0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffed); + mp_int *d = MP_LITERAL(0x52036cee2b6ffe738cc740797779e89800700a4d4141d8ab75eb4dca135978a3); + mp_int *a = MP_LITERAL(0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffec); /* == p-1 */ + mp_int *G_x = MP_LITERAL(0x216936d3cd6e53fec0a4e231fdd6dc5c692cc7609525a7b2c9562d608f25d51a); + mp_int *G_y = MP_LITERAL(0x6666666666666666666666666666666666666666666666666666666666666658); + mp_int *G_order = MP_LITERAL(0x1000000000000000000000000000000014def9dea2f79cd65812631a5cf5d3ed); + mp_int *nonsquare_mod_p = mp_from_integer(2); + initialise_ecurve(&curve, p, d, a, nonsquare_mod_p, G_x, G_y, G_order); + mp_free(p); + mp_free(d); + mp_free(a); + mp_free(G_x); + mp_free(G_y); + mp_free(G_order); + mp_free(nonsquare_mod_p); /* This curve doesn't need a name, because it's never used in * any format that embeds the curve name */ curve.name = NULL; - initialise_ecurve(&curve, 256, q, l, d, Bx, By); curve.textname = "Ed25519"; /* Now initialised, no need to do it again */ @@ -417,1276 +248,10 @@ static struct ec_curve *ec_ed25519(void) return &curve; } -/* Return 1 if a is -3 % p, otherwise return 0 - * This is used because there are some maths optimisations */ -static bool ec_aminus3(const struct ec_curve *curve) -{ - bool ret; - Bignum _p; - - if (curve->type != EC_WEIERSTRASS) { - return false; - } - - _p = bignum_add_long(curve->w.a, 3); - - ret = !bignum_cmp(curve->p, _p); - freebn(_p); - return ret; -} - -/* ---------------------------------------------------------------------- - * Elliptic curve field maths - */ - -static Bignum ecf_add(const Bignum a, const Bignum b, - const struct ec_curve *curve) -{ - Bignum a1, b1, ab, ret; - - a1 = bigmod(a, curve->p); - b1 = bigmod(b, curve->p); - - ab = bigadd(a1, b1); - freebn(a1); - freebn(b1); - - ret = bigmod(ab, curve->p); - freebn(ab); - - return ret; -} - -static Bignum ecf_square(const Bignum a, const struct ec_curve *curve) -{ - return modmul(a, a, curve->p); -} - -static Bignum ecf_treble(const Bignum a, const struct ec_curve *curve) -{ - Bignum ret, tmp; - - /* Double */ - tmp = bignum_lshift(a, 1); - - /* Add itself (i.e. treble) */ - ret = bigadd(tmp, a); - freebn(tmp); - - /* Normalise */ - while (bignum_cmp(ret, curve->p) >= 0) - { - tmp = bigsub(ret, curve->p); - assert(tmp); - freebn(ret); - ret = tmp; - } - - return ret; -} - -static Bignum ecf_double(const Bignum a, const struct ec_curve *curve) -{ - Bignum ret = bignum_lshift(a, 1); - if (bignum_cmp(ret, curve->p) >= 0) - { - Bignum tmp = bigsub(ret, curve->p); - assert(tmp); - freebn(ret); - return tmp; - } - else - { - return ret; - } -} - -/* ---------------------------------------------------------------------- - * Memory functions - */ - -void ec_point_free(struct ec_point *point) -{ - if (point == NULL) return; - point->curve = 0; - if (point->x) freebn(point->x); - if (point->y) freebn(point->y); - if (point->z) freebn(point->z); - point->infinity = false; - sfree(point); -} - -static struct ec_point *ec_point_new(const struct ec_curve *curve, - const Bignum x, const Bignum y, const Bignum z, - bool infinity) -{ - struct ec_point *point = snewn(1, struct ec_point); - point->curve = curve; - point->x = x; - point->y = y; - point->z = z; - point->infinity = infinity; - return point; -} - -static struct ec_point *ec_point_copy(const struct ec_point *a) -{ - if (a == NULL) return NULL; - return ec_point_new(a->curve, - a->x ? copybn(a->x) : NULL, - a->y ? copybn(a->y) : NULL, - a->z ? copybn(a->z) : NULL, - a->infinity); -} - -static bool ec_point_verify(const struct ec_point *a) -{ - if (a->infinity) { - return true; - } else if (a->curve->type == EC_EDWARDS) { - /* Check y^2 - x^2 - 1 - d * x^2 * y^2 == 0 */ - Bignum y2, x2, tmp, tmp2, tmp3; - bool ret; - - y2 = ecf_square(a->y, a->curve); - x2 = ecf_square(a->x, a->curve); - tmp = modmul(a->curve->e.d, x2, a->curve->p); - tmp2 = modmul(tmp, y2, a->curve->p); - freebn(tmp); - tmp = modsub(y2, x2, a->curve->p); - freebn(y2); - freebn(x2); - tmp3 = modsub(tmp, tmp2, a->curve->p); - freebn(tmp); - freebn(tmp2); - ret = !bignum_cmp(tmp3, One); - freebn(tmp3); - return ret; - } else if (a->curve->type == EC_WEIERSTRASS) { - /* Verify y^2 = x^3 + ax + b */ - bool ret = false; - - Bignum lhs = NULL, x3 = NULL, ax = NULL, x3ax = NULL, x3axm = NULL, x3axb = NULL, rhs = NULL; - - Bignum Three = bignum_from_long(3); - - lhs = modmul(a->y, a->y, a->curve->p); - - /* This uses montgomery multiplication to optimise */ - x3 = modpow(a->x, Three, a->curve->p); - freebn(Three); - ax = modmul(a->curve->w.a, a->x, a->curve->p); - x3ax = bigadd(x3, ax); - freebn(x3); x3 = NULL; - freebn(ax); ax = NULL; - x3axm = bigmod(x3ax, a->curve->p); - freebn(x3ax); x3ax = NULL; - x3axb = bigadd(x3axm, a->curve->w.b); - freebn(x3axm); x3axm = NULL; - rhs = bigmod(x3axb, a->curve->p); - freebn(x3axb); - - ret = !bignum_cmp(lhs, rhs); - freebn(lhs); - freebn(rhs); - - return ret; - } else { - return false; - } -} - -/* ---------------------------------------------------------------------- - * Elliptic curve point maths - */ - -/* Returns true on success and false on memory error */ -static bool ecp_normalise(struct ec_point *a) -{ - if (!a) { - /* No point */ - return false; - } - - if (a->infinity) { - /* Point is at infinity - i.e. normalised */ - return true; - } - - if (a->curve->type == EC_WEIERSTRASS) { - /* In Jacobian Coordinates the triple (X, Y, Z) represents - the affine point (X / Z^2, Y / Z^3) */ - - Bignum Z2, Z2inv, Z3, Z3inv, tx, ty; - - if (!a->x || !a->y) { - /* No point defined */ - return false; - } else if (!a->z) { - /* Already normalised */ - return true; - } - - Z2 = ecf_square(a->z, a->curve); - Z2inv = modinv(Z2, a->curve->p); - if (!Z2inv) { - freebn(Z2); - return false; - } - tx = modmul(a->x, Z2inv, a->curve->p); - freebn(Z2inv); - - Z3 = modmul(Z2, a->z, a->curve->p); - freebn(Z2); - Z3inv = modinv(Z3, a->curve->p); - freebn(Z3); - if (!Z3inv) { - freebn(tx); - return false; - } - ty = modmul(a->y, Z3inv, a->curve->p); - freebn(Z3inv); - - freebn(a->x); - a->x = tx; - freebn(a->y); - a->y = ty; - freebn(a->z); - a->z = NULL; - return true; - } else if (a->curve->type == EC_MONTGOMERY) { - /* In Montgomery (X : Z) represents the x co-ord (X / Z, ?) */ - - Bignum tmp, tmp2; - - if (!a->x) { - /* No point defined */ - return false; - } else if (!a->z) { - /* Already normalised */ - return true; - } - - tmp = modinv(a->z, a->curve->p); - if (!tmp) { - return false; - } - tmp2 = modmul(a->x, tmp, a->curve->p); - freebn(tmp); - - freebn(a->z); - a->z = NULL; - freebn(a->x); - a->x = tmp2; - return true; - } else if (a->curve->type == EC_EDWARDS) { - /* Always normalised */ - return true; - } else { - return false; - } -} - -static struct ec_point *ecp_doublew(const struct ec_point *a, bool aminus3) -{ - Bignum S, M, outx, outy, outz; - - if (bignum_cmp(a->y, Zero) == 0) - { - /* Identity */ - return ec_point_new(a->curve, NULL, NULL, NULL, true); - } - - /* S = 4*X*Y^2 */ - { - Bignum Y2, XY2, _2XY2; - - Y2 = ecf_square(a->y, a->curve); - XY2 = modmul(a->x, Y2, a->curve->p); - freebn(Y2); - - _2XY2 = ecf_double(XY2, a->curve); - freebn(XY2); - S = ecf_double(_2XY2, a->curve); - freebn(_2XY2); - } - - /* Faster calculation if a = -3 */ - if (aminus3) { - /* if a = -3, then M can also be calculated as M = 3*(X + Z^2)*(X - Z^2) */ - Bignum Z2, XpZ2, XmZ2, second; - - if (a->z == NULL) { - Z2 = copybn(One); - } else { - Z2 = ecf_square(a->z, a->curve); - } - - XpZ2 = ecf_add(a->x, Z2, a->curve); - XmZ2 = modsub(a->x, Z2, a->curve->p); - freebn(Z2); - - second = modmul(XpZ2, XmZ2, a->curve->p); - freebn(XpZ2); - freebn(XmZ2); - - M = ecf_treble(second, a->curve); - freebn(second); - } else { - /* M = 3*X^2 + a*Z^4 */ - Bignum _3X2, X2, aZ4; - - if (a->z == NULL) { - aZ4 = copybn(a->curve->w.a); - } else { - Bignum Z2, Z4; - - Z2 = ecf_square(a->z, a->curve); - Z4 = ecf_square(Z2, a->curve); - freebn(Z2); - aZ4 = modmul(a->curve->w.a, Z4, a->curve->p); - freebn(Z4); - } - - X2 = modmul(a->x, a->x, a->curve->p); - _3X2 = ecf_treble(X2, a->curve); - freebn(X2); - M = ecf_add(_3X2, aZ4, a->curve); - freebn(_3X2); - freebn(aZ4); - } - - /* X' = M^2 - 2*S */ - { - Bignum M2, _2S; - - M2 = ecf_square(M, a->curve); - _2S = ecf_double(S, a->curve); - outx = modsub(M2, _2S, a->curve->p); - freebn(M2); - freebn(_2S); - } - - /* Y' = M*(S - X') - 8*Y^4 */ - { - Bignum SX, MSX, Eight, Y2, Y4, _8Y4; - - SX = modsub(S, outx, a->curve->p); - freebn(S); - MSX = modmul(M, SX, a->curve->p); - freebn(SX); - freebn(M); - Y2 = ecf_square(a->y, a->curve); - Y4 = ecf_square(Y2, a->curve); - freebn(Y2); - Eight = bignum_from_long(8); - _8Y4 = modmul(Eight, Y4, a->curve->p); - freebn(Eight); - freebn(Y4); - outy = modsub(MSX, _8Y4, a->curve->p); - freebn(MSX); - freebn(_8Y4); - } - - /* Z' = 2*Y*Z */ - { - Bignum YZ; - - if (a->z == NULL) { - YZ = copybn(a->y); - } else { - YZ = modmul(a->y, a->z, a->curve->p); - } - - outz = ecf_double(YZ, a->curve); - freebn(YZ); - } - - return ec_point_new(a->curve, outx, outy, outz, false); -} - -static struct ec_point *ecp_doublem(const struct ec_point *a) -{ - Bignum z, outx, outz, xpz, xmz; - - z = a->z; - if (!z) { - z = One; - } - - /* 4xz = (x + z)^2 - (x - z)^2 */ - { - Bignum tmp; - - tmp = ecf_add(a->x, z, a->curve); - xpz = ecf_square(tmp, a->curve); - freebn(tmp); - - tmp = modsub(a->x, z, a->curve->p); - xmz = ecf_square(tmp, a->curve); - freebn(tmp); - } - - /* outx = (x + z)^2 * (x - z)^2 */ - outx = modmul(xpz, xmz, a->curve->p); - - /* outz = 4xz * ((x - z)^2 + ((A + 2) / 4)*4xz) */ - { - Bignum _4xz, tmp, tmp2, tmp3; - - tmp = bignum_from_long(2); - tmp2 = ecf_add(a->curve->m.a, tmp, a->curve); - freebn(tmp); - - _4xz = modsub(xpz, xmz, a->curve->p); - freebn(xpz); - tmp = modmul(tmp2, _4xz, a->curve->p); - freebn(tmp2); - - tmp2 = bignum_from_long(4); - tmp3 = modinv(tmp2, a->curve->p); - freebn(tmp2); - if (!tmp3) { - freebn(tmp); - freebn(_4xz); - freebn(outx); - freebn(xmz); - return NULL; - } - tmp2 = modmul(tmp, tmp3, a->curve->p); - freebn(tmp); - freebn(tmp3); - - tmp = ecf_add(xmz, tmp2, a->curve); - freebn(xmz); - freebn(tmp2); - outz = modmul(_4xz, tmp, a->curve->p); - freebn(_4xz); - freebn(tmp); - } - - return ec_point_new(a->curve, outx, NULL, outz, false); -} - -/* Forward declaration for Edwards curve doubling */ -static struct ec_point *ecp_add(const struct ec_point *a, - const struct ec_point *b, - bool aminus3); - -static struct ec_point *ecp_double(const struct ec_point *a, bool aminus3) -{ - if (a->infinity) - { - /* Identity */ - return ec_point_new(a->curve, NULL, NULL, NULL, true); - } - - if (a->curve->type == EC_EDWARDS) - { - return ecp_add(a, a, aminus3); - } - else if (a->curve->type == EC_WEIERSTRASS) - { - return ecp_doublew(a, aminus3); - } - else - { - return ecp_doublem(a); - } -} - -static struct ec_point *ecp_addw(const struct ec_point *a, - const struct ec_point *b, - bool aminus3) -{ - Bignum U1, U2, S1, S2, outx, outy, outz; - - /* U1 = X1*Z2^2 */ - /* S1 = Y1*Z2^3 */ - if (b->z) { - Bignum Z2, Z3; - - Z2 = ecf_square(b->z, a->curve); - U1 = modmul(a->x, Z2, a->curve->p); - Z3 = modmul(Z2, b->z, a->curve->p); - freebn(Z2); - S1 = modmul(a->y, Z3, a->curve->p); - freebn(Z3); - } else { - U1 = copybn(a->x); - S1 = copybn(a->y); - } - - /* U2 = X2*Z1^2 */ - /* S2 = Y2*Z1^3 */ - if (a->z) { - Bignum Z2, Z3; - - Z2 = ecf_square(a->z, b->curve); - U2 = modmul(b->x, Z2, b->curve->p); - Z3 = modmul(Z2, a->z, b->curve->p); - freebn(Z2); - S2 = modmul(b->y, Z3, b->curve->p); - freebn(Z3); - } else { - U2 = copybn(b->x); - S2 = copybn(b->y); - } - - /* Check if multiplying by self */ - if (bignum_cmp(U1, U2) == 0) - { - freebn(U1); - freebn(U2); - if (bignum_cmp(S1, S2) == 0) - { - freebn(S1); - freebn(S2); - return ecp_double(a, aminus3); - } - else - { - freebn(S1); - freebn(S2); - /* Infinity */ - return ec_point_new(a->curve, NULL, NULL, NULL, true); - } - } - - { - Bignum H, R, UH2, H3; - - /* H = U2 - U1 */ - H = modsub(U2, U1, a->curve->p); - freebn(U2); - - /* R = S2 - S1 */ - R = modsub(S2, S1, a->curve->p); - freebn(S2); - - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - { - Bignum R2, H2, _2UH2, first; - - H2 = ecf_square(H, a->curve); - UH2 = modmul(U1, H2, a->curve->p); - freebn(U1); - H3 = modmul(H2, H, a->curve->p); - freebn(H2); - R2 = ecf_square(R, a->curve); - _2UH2 = ecf_double(UH2, a->curve); - first = modsub(R2, H3, a->curve->p); - freebn(R2); - outx = modsub(first, _2UH2, a->curve->p); - freebn(first); - freebn(_2UH2); - } - - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - { - Bignum RUH2mX, UH2mX, SH3; - - UH2mX = modsub(UH2, outx, a->curve->p); - freebn(UH2); - RUH2mX = modmul(R, UH2mX, a->curve->p); - freebn(UH2mX); - freebn(R); - SH3 = modmul(S1, H3, a->curve->p); - freebn(S1); - freebn(H3); - - outy = modsub(RUH2mX, SH3, a->curve->p); - freebn(RUH2mX); - freebn(SH3); - } - - /* Z3 = H*Z1*Z2 */ - if (a->z && b->z) { - Bignum ZZ; - - ZZ = modmul(a->z, b->z, a->curve->p); - outz = modmul(H, ZZ, a->curve->p); - freebn(H); - freebn(ZZ); - } else if (a->z) { - outz = modmul(H, a->z, a->curve->p); - freebn(H); - } else if (b->z) { - outz = modmul(H, b->z, a->curve->p); - freebn(H); - } else { - outz = H; - } - } - - return ec_point_new(a->curve, outx, outy, outz, false); -} - -static struct ec_point *ecp_addm(const struct ec_point *a, - const struct ec_point *b, - const struct ec_point *base) -{ - Bignum outx, outz, az, bz; - - az = a->z; - if (!az) { - az = One; - } - bz = b->z; - if (!bz) { - bz = One; - } - - /* a-b is maintained at 1 due to Montgomery ladder implementation */ - /* Xa+b = Za-b * ((Xa - Za)*(Xb + Zb) + (Xa + Za)*(Xb - Zb))^2 */ - /* Za+b = Xa-b * ((Xa - Za)*(Xb + Zb) - (Xa + Za)*(Xb - Zb))^2 */ - { - Bignum tmp, tmp2, tmp3, tmp4; - - /* (Xa + Za) * (Xb - Zb) */ - tmp = ecf_add(a->x, az, a->curve); - tmp2 = modsub(b->x, bz, a->curve->p); - tmp3 = modmul(tmp, tmp2, a->curve->p); - freebn(tmp); - freebn(tmp2); - - /* (Xa - Za) * (Xb + Zb) */ - tmp = modsub(a->x, az, a->curve->p); - tmp2 = ecf_add(b->x, bz, a->curve); - tmp4 = modmul(tmp, tmp2, a->curve->p); - freebn(tmp); - freebn(tmp2); - - tmp = ecf_add(tmp3, tmp4, a->curve); - outx = ecf_square(tmp, a->curve); - freebn(tmp); - - tmp = modsub(tmp3, tmp4, a->curve->p); - freebn(tmp3); - freebn(tmp4); - tmp2 = ecf_square(tmp, a->curve); - freebn(tmp); - outz = modmul(base->x, tmp2, a->curve->p); - freebn(tmp2); - } - - return ec_point_new(a->curve, outx, NULL, outz, false); -} - -static struct ec_point *ecp_adde(const struct ec_point *a, - const struct ec_point *b) -{ - Bignum outx, outy, dmul; - - /* outx = (a->x * b->y + b->x * a->y) / - * (1 + a->curve->e.d * a->x * b->x * a->y * b->y) */ - { - Bignum tmp, tmp2, tmp3, tmp4; - - tmp = modmul(a->x, b->y, a->curve->p); - tmp2 = modmul(b->x, a->y, a->curve->p); - tmp3 = ecf_add(tmp, tmp2, a->curve); - - tmp4 = modmul(tmp, tmp2, a->curve->p); - freebn(tmp); - freebn(tmp2); - dmul = modmul(a->curve->e.d, tmp4, a->curve->p); - freebn(tmp4); - - tmp = ecf_add(One, dmul, a->curve); - tmp2 = modinv(tmp, a->curve->p); - freebn(tmp); - if (!tmp2) - { - freebn(tmp3); - freebn(dmul); - return NULL; - } - - outx = modmul(tmp3, tmp2, a->curve->p); - freebn(tmp3); - freebn(tmp2); - } - - /* outy = (a->y * b->y + a->x * b->x) / - * (1 - a->curve->e.d * a->x * b->x * a->y * b->y) */ - { - Bignum tmp, tmp2, tmp3, tmp4; - - tmp = modsub(One, dmul, a->curve->p); - freebn(dmul); - - tmp2 = modinv(tmp, a->curve->p); - freebn(tmp); - if (!tmp2) - { - freebn(outx); - return NULL; - } - - tmp = modmul(a->y, b->y, a->curve->p); - tmp3 = modmul(a->x, b->x, a->curve->p); - tmp4 = ecf_add(tmp, tmp3, a->curve); - freebn(tmp); - freebn(tmp3); - - outy = modmul(tmp4, tmp2, a->curve->p); - freebn(tmp4); - freebn(tmp2); - } - - return ec_point_new(a->curve, outx, outy, NULL, false); -} - -static struct ec_point *ecp_add(const struct ec_point *a, - const struct ec_point *b, - bool aminus3) -{ - if (a->curve != b->curve) { - return NULL; - } - - /* Check if multiplying by infinity */ - if (a->infinity) return ec_point_copy(b); - if (b->infinity) return ec_point_copy(a); - - if (a->curve->type == EC_EDWARDS) - { - return ecp_adde(a, b); - } - - if (a->curve->type == EC_WEIERSTRASS) - { - return ecp_addw(a, b, aminus3); - } - - return NULL; -} - -static struct ec_point *ecp_mul_( - const struct ec_point *a, const Bignum b, bool aminus3) -{ - struct ec_point *A, *ret; - int bits, i; - - A = ec_point_copy(a); - ret = ec_point_new(a->curve, NULL, NULL, NULL, true); - - bits = bignum_bitcount(b); - for (i = 0; i < bits; ++i) - { - if (bignum_bit(b, i)) - { - struct ec_point *tmp = ecp_add(ret, A, aminus3); - ec_point_free(ret); - ret = tmp; - } - if (i+1 != bits) - { - struct ec_point *tmp = ecp_double(A, aminus3); - ec_point_free(A); - A = tmp; - } - } - - ec_point_free(A); - return ret; -} - -static struct ec_point *ecp_mulw(const struct ec_point *a, const Bignum b) -{ - struct ec_point *ret = ecp_mul_(a, b, ec_aminus3(a->curve)); - - if (!ecp_normalise(ret)) { - ec_point_free(ret); - return NULL; - } - - return ret; -} - -static struct ec_point *ecp_mule(const struct ec_point *a, const Bignum b) -{ - int i; - struct ec_point *ret; - - ret = ec_point_new(a->curve, NULL, NULL, NULL, true); - - for (i = bignum_bitcount(b); i >= 0 && ret; --i) - { - { - struct ec_point *tmp = ecp_double(ret, false); - ec_point_free(ret); - ret = tmp; - } - if (ret && bignum_bit(b, i)) - { - struct ec_point *tmp = ecp_add(ret, a, false); - ec_point_free(ret); - ret = tmp; - } - } - - return ret; -} - -static struct ec_point *ecp_mulm(const struct ec_point *p, const Bignum n) -{ - struct ec_point *P1, *P2; - int bits, i; - - /* P1 <- P and P2 <- [2]P */ - P2 = ecp_double(p, false); - P1 = ec_point_copy(p); - - /* for i = bits − 2 down to 0 */ - bits = bignum_bitcount(n); - for (i = bits - 2; i >= 0; --i) - { - if (!bignum_bit(n, i)) - { - /* P2 <- P1 + P2 */ - struct ec_point *tmp = ecp_addm(P1, P2, p); - ec_point_free(P2); - P2 = tmp; - - /* P1 <- [2]P1 */ - tmp = ecp_double(P1, false); - ec_point_free(P1); - P1 = tmp; - } - else - { - /* P1 <- P1 + P2 */ - struct ec_point *tmp = ecp_addm(P1, P2, p); - ec_point_free(P1); - P1 = tmp; - - /* P2 <- [2]P2 */ - tmp = ecp_double(P2, false); - ec_point_free(P2); - P2 = tmp; - } - } - - ec_point_free(P2); - - if (!ecp_normalise(P1)) { - ec_point_free(P1); - return NULL; - } - - return P1; -} - -/* Not static because it is used by sshecdsag.c to generate a new key */ -struct ec_point *ecp_mul(const struct ec_point *a, const Bignum b) -{ - if (a->curve->type == EC_WEIERSTRASS) { - return ecp_mulw(a, b); - } else if (a->curve->type == EC_EDWARDS) { - return ecp_mule(a, b); - } else { - return ecp_mulm(a, b); - } -} - -static struct ec_point *ecp_summul(const Bignum a, const Bignum b, - const struct ec_point *point) -{ - struct ec_point *aG, *bP, *ret; - bool aminus3; - - if (point->curve->type != EC_WEIERSTRASS) { - return NULL; - } - - aminus3 = ec_aminus3(point->curve); - - aG = ecp_mul_(&point->curve->w.G, a, aminus3); - if (!aG) return NULL; - bP = ecp_mul_(point, b, aminus3); - if (!bP) { - ec_point_free(aG); - return NULL; - } - - ret = ecp_add(aG, bP, aminus3); - - ec_point_free(aG); - ec_point_free(bP); - - if (!ecp_normalise(ret)) { - ec_point_free(ret); - return NULL; - } - - return ret; -} -static Bignum *ecp_edx(const struct ec_curve *curve, const Bignum y) -{ - /* Get the x value on the given Edwards curve for a given y */ - Bignum x, xx; - - /* xx = (y^2 - 1) / (d * y^2 + 1) */ - { - Bignum tmp, tmp2, tmp3; - - tmp = ecf_square(y, curve); - tmp2 = modmul(curve->e.d, tmp, curve->p); - tmp3 = ecf_add(tmp2, One, curve); - freebn(tmp2); - tmp2 = modinv(tmp3, curve->p); - freebn(tmp3); - if (!tmp2) { - freebn(tmp); - return NULL; - } - - tmp3 = modsub(tmp, One, curve->p); - freebn(tmp); - xx = modmul(tmp3, tmp2, curve->p); - freebn(tmp3); - freebn(tmp2); - } - - /* x = xx^((p + 3) / 8) */ - { - Bignum tmp, tmp2; - - tmp = bignum_add_long(curve->p, 3); - tmp2 = bignum_rshift(tmp, 3); - freebn(tmp); - x = modpow(xx, tmp2, curve->p); - freebn(tmp2); - } - - /* if x^2 - xx != 0 then x = x*(2^((p - 1) / 4)) */ - { - Bignum tmp, tmp2; - - tmp = ecf_square(x, curve); - tmp2 = modsub(tmp, xx, curve->p); - freebn(tmp); - freebn(xx); - if (bignum_cmp(tmp2, Zero)) { - Bignum tmp3; - - freebn(tmp2); - - tmp = modsub(curve->p, One, curve->p); - tmp2 = bignum_rshift(tmp, 2); - freebn(tmp); - tmp = bignum_from_long(2); - tmp3 = modpow(tmp, tmp2, curve->p); - freebn(tmp); - freebn(tmp2); - - tmp = modmul(x, tmp3, curve->p); - freebn(x); - freebn(tmp3); - x = tmp; - } else { - freebn(tmp2); - } - } - - /* if x % 2 != 0 then x = p - x */ - if (bignum_bit(x, 0)) { - Bignum tmp = modsub(curve->p, x, curve->p); - freebn(x); - x = tmp; - } - - return x; -} - /* ---------------------------------------------------------------------- * Public point from private */ -struct ec_point *ec_public(const Bignum privateKey, const struct ec_curve *curve) -{ - if (curve->type == EC_WEIERSTRASS) { - return ecp_mul(&curve->w.G, privateKey); - } else if (curve->type == EC_EDWARDS) { - /* hash = H(sk) (where hash creates 2 * fieldBits) - * b = fieldBits - * a = 2^(b-2) + SUM(2^i * h_i) for i = 2 -> b-2 - * publicKey = aB */ - struct ec_point *ret; - unsigned char hash[512/8]; - Bignum a; - int i, keylen; - SHA512_State s; - SHA512_Init(&s); - - keylen = curve->fieldBits / 8; - for (i = 0; i < keylen; ++i) - put_byte(&s, bignum_byte(privateKey, i)); - SHA512_Final(&s, hash); - - /* The second part is simply turning the hash into a Bignum, - * however the 2^(b-2) bit *must* be set, and the bottom 3 - * bits *must* not be */ - hash[0] &= 0xf8; /* Unset bottom 3 bits (if set) */ - hash[31] &= 0x7f; /* Unset above (b-2) */ - hash[31] |= 0x40; /* Set 2^(b-2) */ - /* Chop off the top part and convert to int */ - a = bignum_from_bytes_le(hash, 32); - - ret = ecp_mul(&curve->e.B, a); - freebn(a); - return ret; - } else { - return NULL; - } -} - -/* ---------------------------------------------------------------------- - * Basic sign and verify routines - */ - -static bool _ecdsa_verify(const struct ec_point *publicKey, - const unsigned char *data, const int dataLen, - const Bignum r, const Bignum s) -{ - int z_bits, n_bits; - Bignum z; - bool valid = false; - - if (publicKey->curve->type != EC_WEIERSTRASS) { - return false; - } - - /* Sanity checks */ - if (bignum_cmp(r, Zero) == 0 || bignum_cmp(r, publicKey->curve->w.n) >= 0 - || bignum_cmp(s, Zero) == 0 || bignum_cmp(s, publicKey->curve->w.n) >= 0) - { - return false; - } - - /* z = left most bitlen(curve->n) of data */ - z = bignum_from_bytes(data, dataLen); - n_bits = bignum_bitcount(publicKey->curve->w.n); - z_bits = bignum_bitcount(z); - if (z_bits > n_bits) - { - Bignum tmp = bignum_rshift(z, z_bits - n_bits); - freebn(z); - z = tmp; - } - - /* Ensure z in range of n */ - { - Bignum tmp = bigmod(z, publicKey->curve->w.n); - freebn(z); - z = tmp; - } - - /* Calculate signature */ - { - Bignum w, x, u1, u2; - struct ec_point *tmp; - - w = modinv(s, publicKey->curve->w.n); - if (!w) { - freebn(z); - return false; - } - u1 = modmul(z, w, publicKey->curve->w.n); - u2 = modmul(r, w, publicKey->curve->w.n); - freebn(w); - - tmp = ecp_summul(u1, u2, publicKey); - freebn(u1); - freebn(u2); - if (!tmp) { - freebn(z); - return false; - } - - x = bigmod(tmp->x, publicKey->curve->w.n); - ec_point_free(tmp); - - valid = (bignum_cmp(r, x) == 0); - freebn(x); - } - - freebn(z); - - return valid; -} - -static void _ecdsa_sign(const Bignum privateKey, const struct ec_curve *curve, - const unsigned char *data, const int dataLen, - Bignum *r, Bignum *s) -{ - unsigned char digest[20]; - int z_bits, n_bits; - Bignum z, k; - struct ec_point *kG; - - *r = NULL; - *s = NULL; - - if (curve->type != EC_WEIERSTRASS) { - return; - } - - /* z = left most bitlen(curve->n) of data */ - z = bignum_from_bytes(data, dataLen); - n_bits = bignum_bitcount(curve->w.n); - z_bits = bignum_bitcount(z); - if (z_bits > n_bits) - { - Bignum tmp; - tmp = bignum_rshift(z, z_bits - n_bits); - freebn(z); - z = tmp; - } - - /* Generate k between 1 and curve->n, using the same deterministic - * k generation system we use for conventional DSA. */ - SHA_Simple(data, dataLen, digest); - k = dss_gen_k("ECDSA deterministic k generator", curve->w.n, privateKey, - digest, sizeof(digest)); - - kG = ecp_mul(&curve->w.G, k); - if (!kG) { - freebn(z); - freebn(k); - return; - } - - /* r = kG.x mod n */ - *r = bigmod(kG->x, curve->w.n); - ec_point_free(kG); - - /* s = (z + r * priv)/k mod n */ - { - Bignum rPriv, zMod, first, firstMod, kInv; - rPriv = modmul(*r, privateKey, curve->w.n); - zMod = bigmod(z, curve->w.n); - freebn(z); - first = bigadd(rPriv, zMod); - freebn(rPriv); - freebn(zMod); - firstMod = bigmod(first, curve->w.n); - freebn(first); - kInv = modinv(k, curve->w.n); - freebn(k); - if (!kInv) { - freebn(firstMod); - freebn(*r); - return; - } - *s = modmul(firstMod, kInv, curve->w.n); - freebn(firstMod); - freebn(kInv); - } -} - -/* ---------------------------------------------------------------------- - * Misc functions - */ - -static Bignum BinarySource_get_mp_le(BinarySource *src) -{ - ptrlen mp_str = get_string(src); - return bignum_from_bytes_le(mp_str.ptr, mp_str.len); -} -#define get_mp_le(src) BinarySource_get_mp_le(BinarySource_UPCAST(src)) - -static bool decodepoint_ed(const char *p, int length, struct ec_point *point) -{ - /* Got some conversion to do, first read in the y co-ord */ - bool negative; - - point->y = bignum_from_bytes_le((const unsigned char*)p, length); - if ((unsigned)bignum_bitcount(point->y) > point->curve->fieldBits) { - freebn(point->y); - point->y = NULL; - return false; - } - /* Read x bit and then reset it */ - negative = bignum_bit(point->y, point->curve->fieldBits - 1); - bignum_set_bit(point->y, point->curve->fieldBits - 1, 0); - bn_restore_invariant(point->y); - - /* Get the x from the y */ - point->x = ecp_edx(point->curve, point->y); - if (!point->x) { - freebn(point->y); - point->y = NULL; - return false; - } - if (negative) { - Bignum tmp = modsub(point->curve->p, point->x, point->curve->p); - freebn(point->x); - point->x = tmp; - } - - /* Verify the point is on the curve */ - if (!ec_point_verify(point)) { - freebn(point->x); - point->x = NULL; - freebn(point->y); - point->y = NULL; - return false; - } - - return true; -} - -static bool decodepoint(const char *p, int length, struct ec_point *point) -{ - if (point->curve->type == EC_EDWARDS) { - return decodepoint_ed(p, length, point); - } - - if (length < 1 || p[0] != 0x04) /* Only support uncompressed point */ - return false; - /* Skip compression flag */ - ++p; - --length; - /* The two values must be equal length */ - if (length % 2 != 0) { - point->x = NULL; - point->y = NULL; - point->z = NULL; - return false; - } - length = length / 2; - point->x = bignum_from_bytes(p, length); - p += length; - point->y = bignum_from_bytes(p, length); - point->z = NULL; - - /* Verify the point is on the curve */ - if (!ec_point_verify(point)) { - freebn(point->x); - point->x = NULL; - freebn(point->y); - point->y = NULL; - return false; - } - - return true; -} - -static bool BinarySource_get_point(BinarySource *src, struct ec_point *point) -{ - ptrlen str = get_string(src); - if (get_err(src)) return false; - return decodepoint(str.ptr, str.len, point); -} -#define get_point(src, pt) BinarySource_get_point(BinarySource_UPCAST(src), pt) - -/* ---------------------------------------------------------------------- - * Exposed ECDSA interface - */ - struct ecsign_extra { struct ec_curve *(*curve)(void); const struct ssh_hashalg *hash; @@ -1696,719 +261,880 @@ struct ecsign_extra { int oidlen; }; -static void ecdsa_freekey(ssh_key *key) -{ - struct ec_key *ec; - - if (!key) return; - ec = container_of(key, struct ec_key, sshk); - - if (ec->publicKey.x) - freebn(ec->publicKey.x); - if (ec->publicKey.y) - freebn(ec->publicKey.y); - if (ec->publicKey.z) - freebn(ec->publicKey.z); - if (ec->privateKey) - freebn(ec->privateKey); - sfree(ec); -} - -static ssh_key *ecdsa_new_pub(const ssh_keyalg *self, ptrlen data) +WeierstrassPoint *ecdsa_public(mp_int *private_key, const ssh_keyalg *alg) { const struct ecsign_extra *extra = - (const struct ecsign_extra *)self->extra; + (const struct ecsign_extra *)alg->extra; + struct ec_curve *curve = extra->curve(); + assert(curve->type == EC_WEIERSTRASS); + + mp_int *priv_reduced = mp_mod(private_key, curve->p); + WeierstrassPoint *toret = ecc_weierstrass_multiply( + curve->w.G, priv_reduced); + mp_free(priv_reduced); + return toret; +} + +static mp_int *eddsa_exponent_from_hash( + ptrlen hash, const struct ec_curve *curve) +{ + /* + * Make an integer out of the hash data, little-endian. + */ + assert(hash.len >= curve->fieldBytes); + mp_int *e = mp_from_bytes_le(make_ptrlen(hash.ptr, curve->fieldBytes)); + + /* + * Set the highest bit that fits in the modulus, and clear any + * above that. + */ + mp_set_bit(e, curve->fieldBits - 1, 1); + mp_reduce_mod_2to(e, curve->fieldBits); + + /* + * Clear exactly three low bits. + */ + for (size_t bit = 0; bit < 3; bit++) + mp_set_bit(e, bit, 0); + + return e; +} + +EdwardsPoint *eddsa_public(mp_int *private_key, const ssh_keyalg *alg) +{ + const struct ecsign_extra *extra = + (const struct ecsign_extra *)alg->extra; + struct ec_curve *curve = extra->curve(); + assert(curve->type == EC_EDWARDS); + + ssh_hash *h = ssh_hash_new(extra->hash); + for (size_t i = 0; i < curve->fieldBytes; ++i) + put_byte(h, mp_get_byte(private_key, i)); + + unsigned char hash[extra->hash->hlen]; + ssh_hash_final(h, hash); + + mp_int *exponent = eddsa_exponent_from_hash( + make_ptrlen(hash, extra->hash->hlen), curve); + + EdwardsPoint *toret = ecc_edwards_multiply(curve->e.G, exponent); + mp_free(exponent); + + return toret; +} + +/* ---------------------------------------------------------------------- + * Marshalling and unmarshalling functions + */ + +static mp_int *BinarySource_get_mp_le(BinarySource *src) +{ + return mp_from_bytes_le(get_string(src)); +} +#define get_mp_le(src) BinarySource_get_mp_le(BinarySource_UPCAST(src)) + +static void BinarySink_put_mp_le_unsigned(BinarySink *bs, mp_int *x) +{ + size_t bytes = (mp_get_nbits(x) + 7) / 8; + + put_uint32(bs, bytes); + for (size_t i = 0; i < bytes; ++i) + put_byte(bs, mp_get_byte(x, i)); +} +#define put_mp_le_unsigned(bs, x) \ + BinarySink_put_mp_le_unsigned(BinarySink_UPCAST(bs), x) + +static WeierstrassPoint *ecdsa_decode( + ptrlen encoded, const struct ec_curve *curve) +{ + assert(curve->type == EC_WEIERSTRASS); BinarySource src[1]; - struct ec_key *ec; - struct ec_curve *curve; - BinarySource_BARE_INIT(src, data.ptr, data.len); - get_string(src); + BinarySource_BARE_INIT(src, encoded.ptr, encoded.len); + unsigned char format_type = get_byte(src); - curve = extra->curve(); - assert(curve->type == EC_WEIERSTRASS || curve->type == EC_EDWARDS); + WeierstrassPoint *P; - /* Curve name is duplicated for Weierstrass form */ - if (curve->type == EC_WEIERSTRASS) { - if (!ptrlen_eq_string(get_string(src), curve->name)) + size_t len = get_avail(src); + mp_int *x; + mp_int *y; + + switch (format_type) { + case 0: + /* The identity. */ + P = ecc_weierstrass_point_new_identity(curve->w.wc); + break; + case 2: + case 3: + /* A compressed point, in which the x-coordinate is stored in + * full, and y is deduced from that and a single bit + * indicating its parity (stored in the format type byte). */ + x = mp_from_bytes_be(get_data(src, len)); + P = ecc_weierstrass_point_new_from_x(curve->w.wc, x, format_type & 1); + mp_free(x); + if (!P) /* this can fail if the input is invalid */ return NULL; - } - - ec = snew(struct ec_key); - ec->sshk.vt = self; - - ec->publicKey.curve = curve; - ec->publicKey.infinity = false; - ec->publicKey.x = NULL; - ec->publicKey.y = NULL; - ec->publicKey.z = NULL; - ec->privateKey = NULL; - if (!get_point(src, &ec->publicKey)) { - ecdsa_freekey(&ec->sshk); + break; + case 4: + /* An uncompressed point: the x,y coordinates are stored in + * full. We expect the rest of the string to have even length, + * and be divided half and half between the two values. */ + if (len % 2 != 0) + return NULL; + len /= 2; + x = mp_from_bytes_be(get_data(src, len)); + y = mp_from_bytes_be(get_data(src, len)); + P = ecc_weierstrass_point_new(curve->w.wc, x, y); + mp_free(x); + mp_free(y); + break; + default: + /* An unrecognised type byte. */ return NULL; } - if (!ec->publicKey.x || !ec->publicKey.y || - bignum_cmp(ec->publicKey.x, curve->p) >= 0 || - bignum_cmp(ec->publicKey.y, curve->p) >= 0) - { - ecdsa_freekey(&ec->sshk); - ec = NULL; + /* Verify the point is on the curve */ + if (!ecc_weierstrass_point_valid(P)) { + ecc_weierstrass_point_free(P); + return NULL; } - return &ec->sshk; + return P; +} + +static WeierstrassPoint *BinarySource_get_wpoint( + BinarySource *src, const struct ec_curve *curve) +{ + ptrlen str = get_string(src); + if (get_err(src)) + return NULL; + return ecdsa_decode(str, curve); +} +#define get_wpoint(src, curve) \ + BinarySource_get_wpoint(BinarySource_UPCAST(src), curve) + +static void BinarySink_put_wpoint( + BinarySink *bs, WeierstrassPoint *point, const struct ec_curve *curve, + bool bare) +{ + strbuf *sb; + BinarySink *bs_inner; + + if (!bare) { + /* + * Encapsulate the raw data inside an outermost string layer. + */ + sb = strbuf_new(); + bs_inner = BinarySink_UPCAST(sb); + } else { + /* + * Just write the data directly to the output. + */ + bs_inner = bs; + } + + if (ecc_weierstrass_is_identity(point)) { + put_byte(bs_inner, 0); + } else { + mp_int *x, *y; + ecc_weierstrass_get_affine(point, &x, &y); + + /* + * For ECDSA, we only ever output uncompressed points. + */ + put_byte(bs_inner, 0x04); + for (size_t i = curve->fieldBytes; i--;) + put_byte(bs_inner, mp_get_byte(x, i)); + for (size_t i = curve->fieldBytes; i--;) + put_byte(bs_inner, mp_get_byte(y, i)); + + mp_free(x); + mp_free(y); + } + + if (!bare) + put_stringsb(bs, sb); +} +#define put_wpoint(bs, point, curve, bare) \ + BinarySink_put_wpoint(BinarySink_UPCAST(bs), point, curve, bare) + +static EdwardsPoint *eddsa_decode(ptrlen encoded, const struct ec_curve *curve) +{ + assert(curve->type == EC_EDWARDS); + assert(curve->fieldBits % 8 == 7); + + mp_int *y = mp_from_bytes_le(encoded); + + if (mp_get_nbits(y) > curve->fieldBits+1) { + mp_free(y); + return NULL; + } + + /* The topmost bit of the encoding isn't part of y, so it stores + * the bottom bit of x. Extract it, and zero that bit in y. */ + unsigned desired_x_parity = mp_get_bit(y, curve->fieldBits); + mp_set_bit(y, curve->fieldBits, 0); + + EdwardsPoint *P = ecc_edwards_point_new_from_y( + curve->e.ec, y, desired_x_parity); + mp_free(y); + + /* A point constructed in this way will always satisfy the curve + * equation, unless ecc.c wasn't able to construct one at all, in + * which case P is now NULL. Either way, return it. */ + return P; +} + +static EdwardsPoint *BinarySource_get_epoint( + BinarySource *src, const struct ec_curve *curve) +{ + ptrlen str = get_string(src); + if (get_err(src)) + return NULL; + return eddsa_decode(str, curve); +} +#define get_epoint(src, curve) \ + BinarySource_get_epoint(BinarySource_UPCAST(src), curve) + +static void BinarySink_put_epoint( + BinarySink *bs, EdwardsPoint *point, const struct ec_curve *curve, + bool bare) +{ + mp_int *x, *y; + ecc_edwards_get_affine(point, &x, &y); + + assert(curve->fieldBytes >= 2); + + /* + * EdDSA requires point compression. We store a single integer, + * with bytes in little-endian order, which mostly contains y but + * in which the topmost bit is the low bit of x. + */ + if (!bare) + put_uint32(bs, curve->fieldBytes); /* string length field */ + for (size_t i = 0; i < curve->fieldBytes - 1; i++) + put_byte(bs, mp_get_byte(y, i)); + put_byte(bs, (mp_get_byte(y, curve->fieldBytes - 1) & 0x7F) | + (mp_get_bit(x, 0) << 7)); + + mp_free(x); + mp_free(y); +} +#define put_epoint(bs, point, curve, bare) \ + BinarySink_put_epoint(BinarySink_UPCAST(bs), point, curve, bare) + +/* ---------------------------------------------------------------------- + * Exposed ECDSA interface + */ + +static void ecdsa_freekey(ssh_key *key) +{ + struct ecdsa_key *ek = container_of(key, struct ecdsa_key, sshk); + + if (ek->publicKey) + ecc_weierstrass_point_free(ek->publicKey); + if (ek->privateKey) + mp_free(ek->privateKey); + sfree(ek); +} + +static void eddsa_freekey(ssh_key *key) +{ + struct eddsa_key *ek = container_of(key, struct eddsa_key, sshk); + + if (ek->publicKey) + ecc_edwards_point_free(ek->publicKey); + if (ek->privateKey) + mp_free(ek->privateKey); + sfree(ek); +} + +static ssh_key *ecdsa_new_pub(const ssh_keyalg *alg, ptrlen data) +{ + const struct ecsign_extra *extra = + (const struct ecsign_extra *)alg->extra; + struct ec_curve *curve = extra->curve(); + assert(curve->type == EC_WEIERSTRASS); + + BinarySource src[1]; + BinarySource_BARE_INIT(src, data.ptr, data.len); + get_string(src); + + /* Curve name is duplicated for Weierstrass form */ + if (!ptrlen_eq_string(get_string(src), curve->name)) + return NULL; + + struct ecdsa_key *ek = snew(struct ecdsa_key); + ek->sshk.vt = alg; + ek->curve = curve; + + ek->publicKey = get_wpoint(src, curve); + if (!ek->publicKey) { + ecdsa_freekey(&ek->sshk); + return NULL; + } + + ek->privateKey = NULL; + + return &ek->sshk; +} + +static ssh_key *eddsa_new_pub(const ssh_keyalg *alg, ptrlen data) +{ + const struct ecsign_extra *extra = + (const struct ecsign_extra *)alg->extra; + struct ec_curve *curve = extra->curve(); + assert(curve->type == EC_EDWARDS); + + BinarySource src[1]; + BinarySource_BARE_INIT(src, data.ptr, data.len); + get_string(src); + + struct eddsa_key *ek = snew(struct eddsa_key); + ek->sshk.vt = alg; + ek->curve = curve; + ek->privateKey = NULL; + + ek->publicKey = get_epoint(src, curve); + if (!ek->publicKey) { + eddsa_freekey(&ek->sshk); + return NULL; + } + + return &ek->sshk; +} + +static char *ecc_cache_str_shared( + const char *curve_name, mp_int *x, mp_int *y) +{ + strbuf *sb = strbuf_new(); + + if (curve_name) + strbuf_catf(sb, "%s,", curve_name); + + char *hx = mp_get_hex(x); + char *hy = mp_get_hex(y); + strbuf_catf(sb, "0x%s,0x%s", hx, hy); + sfree(hx); + sfree(hy); + + return strbuf_to_str(sb); } static char *ecdsa_cache_str(ssh_key *key) { - struct ec_key *ec = container_of(key, struct ec_key, sshk); - char *p; - int len, i, pos, nibbles; - static const char hex[] = "0123456789abcdef"; - if (!ec->publicKey.x || !ec->publicKey.y || !ec->publicKey.curve) - return NULL; + struct ecdsa_key *ek = container_of(key, struct ecdsa_key, sshk); + mp_int *x, *y; - len = 4 + 2 + 1; /* 2 x "0x", punctuation, \0 */ - if (ec->publicKey.curve->name) - len += strlen(ec->publicKey.curve->name); /* Curve name */ - len += 4 * (bignum_bitcount(ec->publicKey.x) + 15) / 16; - len += 4 * (bignum_bitcount(ec->publicKey.y) + 15) / 16; - p = snewn(len, char); + ecc_weierstrass_get_affine(ek->publicKey, &x, &y); + char *toret = ecc_cache_str_shared(ek->curve->name, x, y); + mp_free(x); + mp_free(y); + return toret; +} - pos = 0; - if (ec->publicKey.curve->name) - pos += sprintf(p + pos, "%s,", ec->publicKey.curve->name); - pos += sprintf(p + pos, "0x"); - nibbles = (3 + bignum_bitcount(ec->publicKey.x)) / 4; - if (nibbles < 1) - nibbles = 1; - for (i = nibbles; i--;) { - p[pos++] = - hex[(bignum_byte(ec->publicKey.x, i / 2) >> (4 * (i % 2))) & 0xF]; - } - pos += sprintf(p + pos, ",0x"); - nibbles = (3 + bignum_bitcount(ec->publicKey.y)) / 4; - if (nibbles < 1) - nibbles = 1; - for (i = nibbles; i--;) { - p[pos++] = - hex[(bignum_byte(ec->publicKey.y, i / 2) >> (4 * (i % 2))) & 0xF]; - } - p[pos] = '\0'; - return p; +static char *eddsa_cache_str(ssh_key *key) +{ + struct eddsa_key *ek = container_of(key, struct eddsa_key, sshk); + mp_int *x, *y; + + ecc_edwards_get_affine(ek->publicKey, &x, &y); + char *toret = ecc_cache_str_shared(ek->curve->name, x, y); + mp_free(x); + mp_free(y); + return toret; } static void ecdsa_public_blob(ssh_key *key, BinarySink *bs) { - struct ec_key *ec = container_of(key, struct ec_key, sshk); - int pointlen; - int i; + struct ecdsa_key *ek = container_of(key, struct ecdsa_key, sshk); - if (ec->publicKey.curve->type == EC_EDWARDS) { - /* Edwards compressed form "ssh-ed25519" point y[:-1] + x[0:1] */ + put_stringz(bs, ek->sshk.vt->ssh_id); + put_stringz(bs, ek->curve->name); + put_wpoint(bs, ek->publicKey, ek->curve, false); +} - pointlen = ec->publicKey.curve->fieldBits / 8; +static void eddsa_public_blob(ssh_key *key, BinarySink *bs) +{ + struct eddsa_key *ek = container_of(key, struct eddsa_key, sshk); - assert(pointlen >= 2); - - put_stringz(bs, ec->sshk.vt->ssh_id); - put_uint32(bs, pointlen); - - /* Unset last bit of y and set first bit of x in its place */ - for (i = 0; i < pointlen - 1; ++i) - put_byte(bs, bignum_byte(ec->publicKey.y, i)); - /* Unset last bit of y and set first bit of x in its place */ - put_byte(bs, ((bignum_byte(ec->publicKey.y, i) & 0x7f) | - (bignum_bit(ec->publicKey.x, 0) << 7))); - } else if (ec->publicKey.curve->type == EC_WEIERSTRASS) { - assert(ec->publicKey.curve->name); - - pointlen = (bignum_bitcount(ec->publicKey.curve->p) + 7) / 8; - - put_stringz(bs, ec->sshk.vt->ssh_id); - put_stringz(bs, ec->publicKey.curve->name); - put_uint32(bs, (2 * pointlen) + 1); - put_byte(bs, 0x04); - for (i = pointlen; i--;) - put_byte(bs, bignum_byte(ec->publicKey.x, i)); - for (i = pointlen; i--;) - put_byte(bs, bignum_byte(ec->publicKey.y, i)); - } else { - assert(0 && "Bad key type in ecdsa_public_blob"); - } + put_stringz(bs, ek->sshk.vt->ssh_id); + put_epoint(bs, ek->publicKey, ek->curve, false); } static void ecdsa_private_blob(ssh_key *key, BinarySink *bs) { - struct ec_key *ec = container_of(key, struct ec_key, sshk); - int keylen; - int i; + struct ecdsa_key *ek = container_of(key, struct ecdsa_key, sshk); - assert(ec->privateKey); - - if (ec->publicKey.curve->type == EC_EDWARDS) { - /* Unsigned */ - keylen = (bignum_bitcount(ec->privateKey) + 7) / 8; - } else { - /* Signed */ - keylen = (bignum_bitcount(ec->privateKey) + 8) / 8; - } - - put_uint32(bs, keylen); - if (ec->publicKey.curve->type == EC_EDWARDS) { - /* Little endian */ - for (i = 0; i < keylen; ++i) - put_byte(bs, bignum_byte(ec->privateKey, i)); - } else { - for (i = keylen; i--;) - put_byte(bs, bignum_byte(ec->privateKey, i)); - } + /* ECDSA uses ordinary SSH-2 mpint format to store the private key */ + assert(ek->privateKey); + put_mp_ssh2(bs, ek->privateKey); } -static ssh_key *ecdsa_new_priv(const ssh_keyalg *self, ptrlen pub, ptrlen priv) +static void eddsa_private_blob(ssh_key *key, BinarySink *bs) { - BinarySource src[1]; - ssh_key *sshk; - struct ec_key *ec; - struct ec_point *publicKey; + struct eddsa_key *ek = container_of(key, struct eddsa_key, sshk); - sshk = ecdsa_new_pub(self, pub); + /* EdDSA stores the private key integer little-endian and unsigned */ + assert(ek->privateKey); + put_mp_le_unsigned(bs, ek->privateKey); +} + +static ssh_key *ecdsa_new_priv(const ssh_keyalg *alg, ptrlen pub, ptrlen priv) +{ + ssh_key *sshk = ecdsa_new_pub(alg, pub); if (!sshk) return NULL; + struct ecdsa_key *ek = container_of(sshk, struct ecdsa_key, sshk); - ec = container_of(sshk, struct ec_key, sshk); + BinarySource src[1]; BinarySource_BARE_INIT(src, priv.ptr, priv.len); + ek->privateKey = get_mp_ssh2(src); - if (ec->publicKey.curve->type != EC_WEIERSTRASS - && ec->publicKey.curve->type != EC_EDWARDS) { - ecdsa_freekey(&ec->sshk); - return NULL; - } - - if (ec->publicKey.curve->type == EC_EDWARDS) { - ec->privateKey = get_mp_le(src); - } else { - ec->privateKey = get_mp_ssh2(src); - } - if (!ec->privateKey) { - ecdsa_freekey(&ec->sshk); - return NULL; - } - - /* Check that private key generates public key */ - publicKey = ec_public(ec->privateKey, ec->publicKey.curve); - - if (!publicKey || - bignum_cmp(publicKey->x, ec->publicKey.x) || - bignum_cmp(publicKey->y, ec->publicKey.y)) - { - ecdsa_freekey(&ec->sshk); - ec = NULL; - } - ec_point_free(publicKey); - - return &ec->sshk; + return &ek->sshk; } -static ssh_key *ed25519_new_priv_openssh(const ssh_keyalg *self, - BinarySource *src) +static ssh_key *eddsa_new_priv(const ssh_keyalg *alg, ptrlen pub, ptrlen priv) { - struct ec_key *ec; - struct ec_point *publicKey; - ptrlen p, q; + ssh_key *sshk = eddsa_new_pub(alg, pub); + if (!sshk) + return NULL; + struct eddsa_key *ek = container_of(sshk, struct eddsa_key, sshk); - p = get_string(src); - q = get_string(src); - if (get_err(src) || p.len != 32 || q.len != 64) + BinarySource src[1]; + BinarySource_BARE_INIT(src, priv.ptr, priv.len); + ek->privateKey = get_mp_le(src); + + return &ek->sshk; +} + +static ssh_key *eddsa_new_priv_openssh( + const ssh_keyalg *alg, BinarySource *src) +{ + const struct ecsign_extra *extra = + (const struct ecsign_extra *)alg->extra; + struct ec_curve *curve = extra->curve(); + assert(curve->type == EC_EDWARDS); + + ptrlen pubkey_pl = get_string(src); + ptrlen privkey_extended_pl = get_string(src); + if (get_err(src) || pubkey_pl.len != curve->fieldBytes) return NULL; - ec = snew(struct ec_key); - ec->sshk.vt = self; - - ec->publicKey.curve = ec_ed25519(); - ec->publicKey.infinity = false; - ec->privateKey = NULL; - ec->publicKey.x = NULL; - ec->publicKey.z = NULL; - ec->publicKey.y = NULL; - - if (!decodepoint_ed(p.ptr, p.len, &ec->publicKey)) - { - ecdsa_freekey(&ec->sshk); - return NULL; - } - - ec->privateKey = bignum_from_bytes_le(q.ptr, 32); - - /* Check that private key generates public key */ - publicKey = ec_public(ec->privateKey, ec->publicKey.curve); - - if (!publicKey || - bignum_cmp(publicKey->x, ec->publicKey.x) || - bignum_cmp(publicKey->y, ec->publicKey.y)) - { - ecdsa_freekey(&ec->sshk); - ec = NULL; - } - ec_point_free(publicKey); - - /* The OpenSSH format for ed25519 private keys also for some + /* + * The OpenSSH format for ed25519 private keys also for some * reason encodes an extra copy of the public key in the second * half of the secret-key string. Check that that's present and * correct as well, otherwise the key we think we've imported * won't behave identically to the way OpenSSH would have treated - * it. */ - if (0 != memcmp((const char *)q.ptr + 32, p.ptr, 32)) { - ecdsa_freekey(&ec->sshk); + * it. + */ + BinarySource subsrc[1]; + BinarySource_BARE_INIT( + subsrc, privkey_extended_pl.ptr, privkey_extended_pl.len); + ptrlen privkey_pl = get_data(subsrc, curve->fieldBytes); + ptrlen pubkey_copy_pl = get_data(subsrc, curve->fieldBytes); + if (get_err(subsrc) || get_avail(subsrc)) + return NULL; + if (!ptrlen_eq_ptrlen(pubkey_pl, pubkey_copy_pl)) + return NULL; + + struct eddsa_key *ek = snew(struct eddsa_key); + ek->sshk.vt = alg; + ek->curve = curve; + + ek->publicKey = eddsa_decode(pubkey_pl, curve); + if (!ek->publicKey) { + eddsa_freekey(&ek->sshk); return NULL; } - return &ec->sshk; + ek->privateKey = mp_from_bytes_le(privkey_pl); + + return &ek->sshk; } -static void ed25519_openssh_blob(ssh_key *key, BinarySink *bs) +static void eddsa_openssh_blob(ssh_key *key, BinarySink *bs) { - struct ec_key *ec = container_of(key, struct ec_key, sshk); - strbuf *pub; + struct eddsa_key *ek = container_of(key, struct eddsa_key, sshk); + assert(ek->curve->type == EC_EDWARDS); - int pointlen; - int keylen; - int i; + /* Encode the public and private points as strings */ + strbuf *pub_sb = strbuf_new(); + put_epoint(pub_sb, ek->publicKey, ek->curve, false); + ptrlen pub = make_ptrlen(pub_sb->s + 4, pub_sb->len - 4); - assert(ec->publicKey.curve->type == EC_EDWARDS); + strbuf *priv_sb = strbuf_new(); + put_mp_le_unsigned(priv_sb, ek->privateKey); + ptrlen priv = make_ptrlen(priv_sb->s + 4, priv_sb->len - 4); - pointlen = (bignum_bitcount(ec->publicKey.y) + 7) / 8; - keylen = (bignum_bitcount(ec->privateKey) + 7) / 8; + put_stringpl(bs, pub); - /* Encode the public point */ - pub = strbuf_new(); - put_uint32(pub, pointlen); - for (i = 0; i < pointlen - 1; ++i) - put_byte(pub, bignum_byte(ec->publicKey.y, i)); - /* Unset last bit of y and set first bit of x in its place */ - put_byte(pub, ((bignum_byte(ec->publicKey.y, i) & 0x7f) | - (bignum_bit(ec->publicKey.x, 0) << 7))); + /* Encode the private key as the concatenation of the + * little-endian key integer and the public key again */ + put_uint32(bs, priv.len + pub.len); + put_data(bs, priv.ptr, priv.len); + put_data(bs, pub.ptr, pub.len); - put_data(bs, pub->s, pub->len); - - put_uint32(bs, keylen + pointlen); - for (i = 0; i < keylen; ++i) - put_byte(bs, bignum_byte(ec->privateKey, i)); - /* Now encode an extra copy of the public point as the second half - * of the private key string, as the OpenSSH format for some - * reason requires */ - put_data(bs, pub->s + 4, pub->len - 4); - - strbuf_free(pub); + strbuf_free(pub_sb); + strbuf_free(priv_sb); } -static ssh_key *ecdsa_new_priv_openssh(const ssh_keyalg *self, - BinarySource *src) +static ssh_key *ecdsa_new_priv_openssh( + const ssh_keyalg *alg, BinarySource *src) { const struct ecsign_extra *extra = - (const struct ecsign_extra *)self->extra; - struct ec_key *ec; - struct ec_curve *curve; - struct ec_point *publicKey; + (const struct ecsign_extra *)alg->extra; + struct ec_curve *curve = extra->curve(); + assert(curve->type == EC_WEIERSTRASS); get_string(src); - curve = extra->curve(); - assert(curve->type == EC_WEIERSTRASS); + struct eddsa_key *ek = snew(struct eddsa_key); + ek->sshk.vt = alg; + ek->curve = curve; - ec = snew(struct ec_key); - ec->sshk.vt = self; - - ec->publicKey.curve = curve; - ec->publicKey.infinity = false; - ec->publicKey.x = NULL; - ec->publicKey.y = NULL; - ec->publicKey.z = NULL; - if (!get_point(src, &ec->publicKey)) { - ecdsa_freekey(&ec->sshk); - return NULL; - } - ec->privateKey = NULL; - - if (!ec->publicKey.x || !ec->publicKey.y || - bignum_cmp(ec->publicKey.x, curve->p) >= 0 || - bignum_cmp(ec->publicKey.y, curve->p) >= 0) - { - ecdsa_freekey(&ec->sshk); + ek->publicKey = get_epoint(src, curve); + if (!ek->publicKey) { + eddsa_freekey(&ek->sshk); return NULL; } - ec->privateKey = get_mp_ssh2(src); - if (ec->privateKey == NULL) - { - ecdsa_freekey(&ec->sshk); - return NULL; - } + ek->privateKey = get_mp_ssh2(src); - /* Now check that the private key makes the public key */ - publicKey = ec_public(ec->privateKey, ec->publicKey.curve); - if (!publicKey) - { - ecdsa_freekey(&ec->sshk); - return NULL; - } - - if (bignum_cmp(ec->publicKey.x, publicKey->x) || - bignum_cmp(ec->publicKey.y, publicKey->y)) - { - /* Private key doesn't make the public key on the given curve */ - ecdsa_freekey(&ec->sshk); - ec_point_free(publicKey); - return NULL; - } - - ec_point_free(publicKey); - - return &ec->sshk; + return &ek->sshk; } static void ecdsa_openssh_blob(ssh_key *key, BinarySink *bs) { - struct ec_key *ec = container_of(key, struct ec_key, sshk); - - int pointlen; - int i; - - assert(ec->publicKey.curve->type == EC_WEIERSTRASS); - - pointlen = (bignum_bitcount(ec->publicKey.curve->p) + 7) / 8; - - put_stringz(bs, ec->publicKey.curve->name); - - put_uint32(bs, 1 + (pointlen * 2)); - put_byte(bs, 0x04); - for (i = pointlen; i--; ) - put_byte(bs, bignum_byte(ec->publicKey.x, i)); - for (i = pointlen; i--; ) - put_byte(bs, bignum_byte(ec->publicKey.y, i)); - - put_mp_ssh2(bs, ec->privateKey); + struct ecdsa_key *ek = container_of(key, struct ecdsa_key, sshk); + put_stringz(bs, ek->curve->name); + put_wpoint(bs, ek->publicKey, ek->curve, false); + put_mp_ssh2(bs, ek->privateKey); } -static int ecdsa_pubkey_bits(const ssh_keyalg *self, ptrlen blob) +static int ec_shared_pubkey_bits(const ssh_keyalg *alg, ptrlen blob) { - ssh_key *sshk; - struct ec_key *ec; - int ret; + const struct ecsign_extra *extra = + (const struct ecsign_extra *)alg->extra; + struct ec_curve *curve = extra->curve(); + return curve->fieldBits; +} - sshk = ecdsa_new_pub(self, blob); - if (!sshk) - return -1; +static mp_int *ecdsa_signing_exponent_from_data( + const struct ec_curve *curve, const struct ecsign_extra *extra, + ptrlen data) +{ + /* Hash the data being signed. */ + unsigned char hash[extra->hash->hlen]; + ssh_hash *h = ssh_hash_new(extra->hash); + put_data(h, data.ptr, data.len); + ssh_hash_final(h, hash); - ec = container_of(sshk, struct ec_key, sshk); - ret = ec->publicKey.curve->fieldBits; - ecdsa_freekey(&ec->sshk); + /* + * Take the leftmost b bits of the hash of the signed data (where + * b is the number of bits in order(G)), interpreted big-endian. + */ + mp_int *z = mp_from_bytes_be(make_ptrlen(hash, extra->hash->hlen)); + size_t zbits = mp_get_nbits(z); + size_t nbits = mp_get_nbits(curve->w.G_order); + size_t shift = zbits - nbits; + /* Bound the shift count below at 0, using bit twiddling to avoid + * a conditional branch */ + shift &= ~-(shift >> (CHAR_BIT * sizeof(size_t) - 1)); + mp_int *toret = mp_rshift_safe(z, shift); + mp_free(z); - return ret; + return toret; } static bool ecdsa_verify(ssh_key *key, ptrlen sig, ptrlen data) { - struct ec_key *ec = container_of(key, struct ec_key, sshk); + struct ecdsa_key *ek = container_of(key, struct ecdsa_key, sshk); const struct ecsign_extra *extra = - (const struct ecsign_extra *)ec->sshk.vt->extra; + (const struct ecsign_extra *)ek->sshk.vt->extra; + BinarySource src[1]; - ptrlen sigstr; - bool ret; - - if (!ec->publicKey.x || !ec->publicKey.y || !ec->publicKey.curve) - return false; - BinarySource_BARE_INIT(src, sig.ptr, sig.len); /* Check the signature starts with the algorithm name */ - if (!ptrlen_eq_string(get_string(src), ec->sshk.vt->ssh_id)) + if (!ptrlen_eq_string(get_string(src), ek->sshk.vt->ssh_id)) return false; - sigstr = get_string(src); + /* Everything else is nested inside a sub-string. Descend into that. */ + ptrlen sigstr = get_string(src); if (get_err(src)) return false; + BinarySource_BARE_INIT(src, sigstr.ptr, sigstr.len); - if (ec->publicKey.curve->type == EC_EDWARDS) { - struct ec_point *r; - int pointlen = ec->publicKey.curve->fieldBits / 8; - Bignum s, h; - - /* Check that the signature is two times the length of a point */ - if (sigstr.len != pointlen * 2) { - return false; - } - - /* Check it's the 256 bit field so that SHA512 is the correct hash */ - if (ec->publicKey.curve->fieldBits != 256) { - return false; - } - - /* Get the signature */ - r = ec_point_new(ec->publicKey.curve, NULL, NULL, NULL, false); - if (!r) { - return false; - } - if (!decodepoint(sigstr.ptr, pointlen, r)) { - ec_point_free(r); - return false; - } - s = bignum_from_bytes_le( - (const char *)sigstr.ptr + pointlen, pointlen); - - /* Get the hash of the encoded value of R + encoded value of pk + message */ - { - int i; - unsigned char digest[512 / 8]; - SHA512_State hs; - SHA512_Init(&hs); - - /* Add encoded r (no need to encode it again, it was in - * the signature) */ - put_data(&hs, sigstr.ptr, pointlen); - - /* Encode pk and add it */ - for (i = 0; i < pointlen - 1; ++i) - put_byte(&hs, bignum_byte(ec->publicKey.y, i)); - /* Unset last bit of y and set first bit of x in its place */ - put_byte(&hs, ((bignum_byte(ec->publicKey.y, i) & 0x7f) | - (bignum_bit(ec->publicKey.x, 0) << 7))); - - /* Add the message itself */ - put_data(&hs, data.ptr, data.len); - - /* Get the hash */ - SHA512_Final(&hs, digest); - - /* Convert to Bignum */ - h = bignum_from_bytes_le(digest, sizeof(digest)); - } - - /* Verify sB == r + h*publicKey */ - { - struct ec_point *lhs, *rhs, *tmp; - - /* lhs = sB */ - lhs = ecp_mul(&ec->publicKey.curve->e.B, s); - freebn(s); - if (!lhs) { - ec_point_free(r); - freebn(h); - return false; - } - - /* rhs = r + h*publicKey */ - tmp = ecp_mul(&ec->publicKey, h); - freebn(h); - if (!tmp) { - ec_point_free(lhs); - ec_point_free(r); - return false; - } - rhs = ecp_add(r, tmp, false); - ec_point_free(r); - ec_point_free(tmp); - if (!rhs) { - ec_point_free(lhs); - return false; - } - - /* Check the point is the same */ - ret = !bignum_cmp(lhs->x, rhs->x); - if (ret) { - ret = !bignum_cmp(lhs->y, rhs->y); - if (ret) { - ret = true; - } - } - ec_point_free(lhs); - ec_point_free(rhs); - } - } else { - Bignum r, s; - unsigned char digest[512 / 8]; - int digestLen; - ssh_hash *hashctx; - - BinarySource_BARE_INIT(src, sigstr.ptr, sigstr.len); - - r = get_mp_ssh2(src); - s = get_mp_ssh2(src); - if (get_err(src)) { - freebn(r); - freebn(s); - return false; - } - - digestLen = extra->hash->hlen; - assert(digestLen <= sizeof(digest)); - hashctx = ssh_hash_new(extra->hash); - put_data(hashctx, data.ptr, data.len); - ssh_hash_final(hashctx, digest); - - /* Verify the signature */ - ret = _ecdsa_verify(&ec->publicKey, digest, digestLen, r, s); - - freebn(r); - freebn(s); + /* Extract the signature integers r,s */ + mp_int *r = get_mp_ssh2(src); + mp_int *s = get_mp_ssh2(src); + if (get_err(src)) { + mp_free(r); + mp_free(s); + return false; } - return ret; + /* Basic sanity checks: 0 < r,s < order(G) */ + unsigned invalid = 0; + invalid |= mp_eq_integer(r, 0); + invalid |= mp_eq_integer(s, 0); + invalid |= mp_cmp_hs(r, ek->curve->w.G_order); + invalid |= mp_cmp_hs(s, ek->curve->w.G_order); + + /* Get the hash of the signed data, converted to an integer */ + mp_int *z = ecdsa_signing_exponent_from_data(ek->curve, extra, data); + + /* Verify the signature integers against the hash */ + mp_int *w = mp_invert(s, ek->curve->w.G_order); + mp_int *u1 = mp_modmul(z, w, ek->curve->w.G_order); + mp_free(z); + mp_int *u2 = mp_modmul(r, w, ek->curve->w.G_order); + mp_free(w); + WeierstrassPoint *u1G = ecc_weierstrass_multiply(ek->curve->w.G, u1); + mp_free(u1); + WeierstrassPoint *u2P = ecc_weierstrass_multiply(ek->publicKey, u2); + mp_free(u2); + WeierstrassPoint *sum = ecc_weierstrass_add_general(u1G, u2P); + ecc_weierstrass_point_free(u1G); + ecc_weierstrass_point_free(u2P); + + mp_int *x; + ecc_weierstrass_get_affine(sum, &x, NULL); + ecc_weierstrass_point_free(sum); + + mp_divmod_into(x, ek->curve->w.G_order, NULL, x); + invalid |= (1 ^ mp_cmp_eq(r, x)); + mp_free(x); + + mp_free(r); + mp_free(s); + + return !invalid; +} + +static mp_int *eddsa_signing_exponent_from_data( + struct eddsa_key *ek, const struct ecsign_extra *extra, + ptrlen r_encoded, ptrlen data) +{ + /* Hash (r || public key || message) */ + unsigned char hash[extra->hash->hlen]; + ssh_hash *h = ssh_hash_new(extra->hash); + put_data(h, r_encoded.ptr, r_encoded.len); + put_epoint(h, ek->publicKey, ek->curve, true); /* omit string header */ + put_data(h, data.ptr, data.len); + ssh_hash_final(h, hash); + + /* Convert to an integer */ + mp_int *toret = mp_from_bytes_le(make_ptrlen(hash, extra->hash->hlen)); + + smemclr(hash, extra->hash->hlen); + return toret; +} + +static bool eddsa_verify(ssh_key *key, ptrlen sig, ptrlen data) +{ + struct eddsa_key *ek = container_of(key, struct eddsa_key, sshk); + const struct ecsign_extra *extra = + (const struct ecsign_extra *)ek->sshk.vt->extra; + + BinarySource src[1]; + BinarySource_BARE_INIT(src, sig.ptr, sig.len); + + /* Check the signature starts with the algorithm name */ + if (!ptrlen_eq_string(get_string(src), ek->sshk.vt->ssh_id)) + return false; + + /* Now expect a single string which is the concatenation of an + * encoded curve point r and an integer s. */ + ptrlen sigstr = get_string(src); + if (get_err(src)) + return false; + BinarySource_BARE_INIT(src, sigstr.ptr, sigstr.len); + ptrlen rstr = get_data(src, ek->curve->fieldBytes); + ptrlen sstr = get_data(src, ek->curve->fieldBytes); + if (get_err(src) || get_avail(src)) + return false; + + EdwardsPoint *r = eddsa_decode(rstr, ek->curve); + if (!r) + return false; + mp_int *s = mp_from_bytes_le(sstr); + + mp_int *H = eddsa_signing_exponent_from_data(ek, extra, rstr, data); + + /* Verify that s*G == r + H*publicKey */ + EdwardsPoint *lhs = ecc_edwards_multiply(ek->curve->e.G, s); + mp_free(s); + EdwardsPoint *hpk = ecc_edwards_multiply(ek->publicKey, H); + mp_free(H); + EdwardsPoint *rhs = ecc_edwards_add(r, hpk); + ecc_edwards_point_free(hpk); + unsigned valid = ecc_edwards_eq(lhs, rhs); + ecc_edwards_point_free(lhs); + ecc_edwards_point_free(rhs); + ecc_edwards_point_free(r); + + return valid; } static void ecdsa_sign(ssh_key *key, const void *data, int datalen, unsigned flags, BinarySink *bs) { - struct ec_key *ec = container_of(key, struct ec_key, sshk); + struct ecdsa_key *ek = container_of(key, struct ecdsa_key, sshk); const struct ecsign_extra *extra = - (const struct ecsign_extra *)ec->sshk.vt->extra; - unsigned char digest[512 / 8]; - int digestLen; - Bignum r = NULL, s = NULL; - int i; + (const struct ecsign_extra *)ek->sshk.vt->extra; + assert(ek->privateKey); - assert(ec->privateKey); - assert(ec->publicKey.curve); + mp_int *z = ecdsa_signing_exponent_from_data( + ek->curve, extra, make_ptrlen(data, datalen)); - if (ec->publicKey.curve->type == EC_EDWARDS) { - struct ec_point *rp; - int pointlen = ec->publicKey.curve->fieldBits / 8; - - /* hash = H(sk) (where hash creates 2 * fieldBits) - * b = fieldBits - * a = 2^(b-2) + SUM(2^i * h_i) for i = 2 -> b-2 - * r = H(h[b/8:b/4] + m) - * R = rB - * S = (r + H(encodepoint(R) + encodepoint(pk) + m) * a) % l */ - { - unsigned char hash[512/8]; - Bignum a; - SHA512_State hs; - SHA512_Init(&hs); - - for (i = 0; i < pointlen; ++i) - put_byte(&hs, bignum_byte(ec->privateKey, i)); - - SHA512_Final(&hs, hash); - - /* The second part is simply turning the hash into a - * Bignum, however the 2^(b-2) bit *must* be set, and the - * bottom 3 bits *must* not be */ - hash[0] &= 0xf8; /* Unset bottom 3 bits (if set) */ - hash[31] &= 0x7f; /* Unset above (b-2) */ - hash[31] |= 0x40; /* Set 2^(b-2) */ - /* Chop off the top part and convert to int */ - a = bignum_from_bytes_le(hash, 32); - - SHA512_Init(&hs); - put_data(&hs, hash+(ec->publicKey.curve->fieldBits / 8), - ((ec->publicKey.curve->fieldBits / 4) - - (ec->publicKey.curve->fieldBits / 8))); - put_data(&hs, data, datalen); - SHA512_Final(&hs, hash); - - r = bignum_from_bytes_le(hash, 512/8); - rp = ecp_mul(&ec->publicKey.curve->e.B, r); - assert(rp); - - /* Now calculate s */ - SHA512_Init(&hs); - /* Encode the point R */ - for (i = 0; i < pointlen - 1; ++i) - put_byte(&hs, bignum_byte(rp->y, i)); - /* Unset last bit of y and set first bit of x in its place */ - put_byte(&hs, ((bignum_byte(rp->y, i) & 0x7f) | - (bignum_bit(rp->x, 0) << 7))); - - /* Encode the point pk */ - for (i = 0; i < pointlen - 1; ++i) - put_byte(&hs, bignum_byte(ec->publicKey.y, i)); - /* Unset last bit of y and set first bit of x in its place */ - put_byte(&hs, ((bignum_byte(ec->publicKey.y, i) & 0x7f) | - (bignum_bit(ec->publicKey.x, 0) << 7))); - - /* Add the message */ - put_data(&hs, data, datalen); - SHA512_Final(&hs, hash); - - { - Bignum tmp, tmp2; - - tmp = bignum_from_bytes_le(hash, 512/8); - tmp2 = modmul(tmp, a, ec->publicKey.curve->e.l); - freebn(a); - freebn(tmp); - tmp = bigadd(r, tmp2); - freebn(r); - freebn(tmp2); - s = bigmod(tmp, ec->publicKey.curve->e.l); - freebn(tmp); - } - } - - /* Format the output */ - put_stringz(bs, ec->sshk.vt->ssh_id); - pointlen = ec->publicKey.curve->fieldBits / 8; - put_uint32(bs, pointlen * 2); - - /* Encode the point */ - for (i = 0; i < pointlen - 1; ++i) - put_byte(bs, bignum_byte(rp->y, i)); - /* Unset last bit of y and set first bit of x in its place */ - put_byte(bs, ((bignum_byte(rp->y, i) & 0x7f) | - (bignum_bit(rp->x, 0) << 7))); - ec_point_free(rp); - - /* Encode the int */ - for (i = 0; i < pointlen; ++i) - put_byte(bs, bignum_byte(s, i)); - freebn(s); - } else { - ssh_hash *hashctx; - strbuf *substr; - - digestLen = extra->hash->hlen; - assert(digestLen <= sizeof(digest)); - hashctx = ssh_hash_new(extra->hash); - put_data(hashctx, data, datalen); - ssh_hash_final(hashctx, digest); - - /* Do the signature */ - _ecdsa_sign(ec->privateKey, ec->publicKey.curve, digest, digestLen, &r, &s); - assert(r); - assert(s); - - /* Format the output */ - put_stringz(bs, ec->sshk.vt->ssh_id); - - substr = strbuf_new(); - put_mp_ssh2(substr, r); - put_mp_ssh2(substr, s); - put_stringsb(bs, substr); - - freebn(r); - freebn(s); + /* Generate k between 1 and curve->n, using the same deterministic + * k generation system we use for conventional DSA. */ + mp_int *k; + { + unsigned char digest[20]; + SHA_Simple(data, datalen, digest); + k = dss_gen_k( + "ECDSA deterministic k generator", ek->curve->w.G_order, + ek->privateKey, digest, sizeof(digest)); } + + WeierstrassPoint *kG = ecc_weierstrass_multiply(ek->curve->w.G, k); + mp_int *x; + ecc_weierstrass_get_affine(kG, &x, NULL); + ecc_weierstrass_point_free(kG); + + /* r = kG.x mod order(G) */ + mp_int *r = mp_mod(x, ek->curve->w.G_order); + mp_free(x); + + /* s = (z + r * priv)/k mod n */ + mp_int *rPriv = mp_modmul(r, ek->privateKey, ek->curve->w.G_order); + mp_int *numerator = mp_modadd(z, rPriv, ek->curve->w.G_order); + mp_free(z); + mp_free(rPriv); + mp_int *kInv = mp_invert(k, ek->curve->w.G_order); + mp_free(k); + mp_int *s = mp_modmul(numerator, kInv, ek->curve->w.G_order); + mp_free(numerator); + mp_free(kInv); + + /* Format the output */ + put_stringz(bs, ek->sshk.vt->ssh_id); + + strbuf *substr = strbuf_new(); + put_mp_ssh2(substr, r); + put_mp_ssh2(substr, s); + put_stringsb(bs, substr); + + mp_free(r); + mp_free(s); +} + +static void eddsa_sign(ssh_key *key, const void *data, int datalen, + unsigned flags, BinarySink *bs) +{ + struct eddsa_key *ek = container_of(key, struct eddsa_key, sshk); + const struct ecsign_extra *extra = + (const struct ecsign_extra *)ek->sshk.vt->extra; + assert(ek->privateKey); + + /* + * EdDSA prescribes a specific method of generating the random + * nonce integer for the signature. (A verifier can't tell + * whether you followed that method, but it's important to + * follow it anyway, because test vectors will want a specific + * signature for a given message, and because this preserves + * determinism of signatures even if the same signature were + * made twice by different software.) + */ + + /* + * First, we hash the private key integer (bare, little-endian) + * into a hash generating 2*fieldBytes of output. + */ + unsigned char hash[extra->hash->hlen]; + ssh_hash *h = ssh_hash_new(extra->hash); + for (size_t i = 0; i < ek->curve->fieldBytes; ++i) + put_byte(h, mp_get_byte(ek->privateKey, i)); + ssh_hash_final(h, hash); + + /* + * The first half of the output hash is converted into an + * integer a, by the standard EdDSA transformation. + */ + mp_int *a = eddsa_exponent_from_hash( + make_ptrlen(hash, ek->curve->fieldBytes), ek->curve); + + /* + * The second half of the hash of the private key is hashed again + * with the message to be signed, and used as an exponent to + * generate the signature point r. + */ + h = ssh_hash_new(extra->hash); + put_data(h, hash + ek->curve->fieldBytes, + extra->hash->hlen - ek->curve->fieldBytes); + put_data(h, data, datalen); + ssh_hash_final(h, hash); + mp_int *log_r_unreduced = mp_from_bytes_le( + make_ptrlen(hash, extra->hash->hlen)); + mp_int *log_r = mp_mod(log_r_unreduced, ek->curve->e.G_order); + mp_free(log_r_unreduced); + EdwardsPoint *r = ecc_edwards_multiply(ek->curve->e.G, log_r); + + /* + * Encode r now, because we'll need its encoding for the next + * hashing step as well as to write into the actual signature. + */ + strbuf *r_enc = strbuf_new(); + put_epoint(r_enc, r, ek->curve, true); /* omit string header */ + ecc_edwards_point_free(r); + + /* + * Compute the hash of (r || public key || message) just as + * eddsa_verify does. + */ + mp_int *H = eddsa_signing_exponent_from_data( + ek, extra, ptrlen_from_strbuf(r_enc), make_ptrlen(data, datalen)); + + /* And then s = (log(r) + H*a) mod order(G). */ + mp_int *Ha = mp_modmul(H, a, ek->curve->e.G_order); + mp_int *s = mp_modadd(log_r, Ha, ek->curve->e.G_order); + mp_free(H); + mp_free(a); + mp_free(Ha); + mp_free(log_r); + + /* Format the output */ + put_stringz(bs, ek->sshk.vt->ssh_id); + put_uint32(bs, r_enc->len + ek->curve->fieldBytes); + put_data(bs, r_enc->u, r_enc->len); + strbuf_free(r_enc); + for (size_t i = 0; i < ek->curve->fieldBytes; ++i) + put_byte(bs, mp_get_byte(s, i)); + mp_free(s); } const struct ecsign_extra sign_extra_ed25519 = { - ec_ed25519, NULL, + ec_ed25519, &ssh_sha512, NULL, 0, }; const ssh_keyalg ssh_ecdsa_ed25519 = { - ecdsa_new_pub, - ecdsa_new_priv, - ed25519_new_priv_openssh, + eddsa_new_pub, + eddsa_new_priv, + eddsa_new_priv_openssh, - ecdsa_freekey, - ecdsa_sign, - ecdsa_verify, - ecdsa_public_blob, - ecdsa_private_blob, - ed25519_openssh_blob, - ecdsa_cache_str, + eddsa_freekey, + eddsa_sign, + eddsa_verify, + eddsa_public_blob, + eddsa_private_blob, + eddsa_openssh_blob, + eddsa_cache_str, - ecdsa_pubkey_bits, + ec_shared_pubkey_bits, "ssh-ed25519", "ssh-ed25519", @@ -2437,7 +1163,7 @@ const ssh_keyalg ssh_ecdsa_nistp256 = { ecdsa_openssh_blob, ecdsa_cache_str, - ecdsa_pubkey_bits, + ec_shared_pubkey_bits, "ecdsa-sha2-nistp256", "ecdsa-sha2-nistp256", @@ -2466,7 +1192,7 @@ const ssh_keyalg ssh_ecdsa_nistp384 = { ecdsa_openssh_blob, ecdsa_cache_str, - ecdsa_pubkey_bits, + ec_shared_pubkey_bits, "ecdsa-sha2-nistp384", "ecdsa-sha2-nistp384", @@ -2495,7 +1221,7 @@ const ssh_keyalg ssh_ecdsa_nistp521 = { ecdsa_openssh_blob, ecdsa_cache_str, - ecdsa_pubkey_bits, + ec_shared_pubkey_bits, "ecdsa-sha2-nistp521", "ecdsa-sha2-nistp521", @@ -2509,49 +1235,21 @@ const ssh_keyalg ssh_ecdsa_nistp521 = { struct eckex_extra { struct ec_curve *(*curve)(void); + void (*setup)(ecdh_key *dh); + void (*cleanup)(ecdh_key *dh); + void (*getpublic)(ecdh_key *dh, BinarySink *bs); + mp_int *(*getkey)(ecdh_key *dh, ptrlen remoteKey); }; -static Bignum ecdh_calculate(const Bignum private, - const struct ec_point *public) -{ - struct ec_point *p; - Bignum ret; - p = ecp_mul(public, private); - if (!p) return NULL; - ret = p->x; - p->x = NULL; - - if (p->curve->type == EC_MONTGOMERY) { - /* - * Endianness-swap. The Curve25519 algorithm definition - * assumes you were doing your computation in arrays of 32 - * little-endian bytes, and now specifies that you take your - * final one of those and convert it into a bignum in - * _network_ byte order, i.e. big-endian. - * - * In particular, the spec says, you convert the _whole_ 32 - * bytes into a bignum. That is, on the rare occasions that - * p->x has come out with the most significant 8 bits zero, we - * have to imagine that being represented by a 32-byte string - * with the last byte being zero, so that has to be converted - * into an SSH-2 bignum with the _low_ byte zero, i.e. a - * multiple of 256. - */ - int i; - int bytes = (p->curve->fieldBits+7) / 8; - unsigned char *byteorder = snewn(bytes, unsigned char); - for (i = 0; i < bytes; ++i) { - byteorder[i] = bignum_byte(ret, i); - } - freebn(ret); - ret = bignum_from_bytes(byteorder, bytes); - smemclr(byteorder, bytes); - sfree(byteorder); - } - - ec_point_free(p); - return ret; -} +struct ecdh_key { + const struct eckex_extra *extra; + const struct ec_curve *curve; + mp_int *private; + union { + WeierstrassPoint *w_public; + MontgomeryPoint *m_public; + }; +}; const char *ssh_ecdhkex_curve_textname(const struct ssh_kex *kex) { @@ -2560,142 +1258,181 @@ const char *ssh_ecdhkex_curve_textname(const struct ssh_kex *kex) return curve->textname; } -struct ec_key *ssh_ecdhkex_newkey(const struct ssh_kex *kex) +static void ssh_ecdhkex_w_setup(ecdh_key *dh) +{ + mp_int *one = mp_from_integer(1); + dh->private = mp_random_in_range(one, dh->curve->w.G_order); + mp_free(one); + + dh->w_public = ecc_weierstrass_multiply(dh->curve->w.G, dh->private); +} + +static void ssh_ecdhkex_m_setup(ecdh_key *dh) +{ + unsigned char bytes[dh->curve->fieldBytes]; + for (size_t i = 0; i < sizeof(bytes); ++i) + bytes[i] = random_byte(); + + bytes[0] &= 0xF8; + bytes[dh->curve->fieldBytes-1] &= 0x7F; + bytes[dh->curve->fieldBytes-1] |= 0x40; + dh->private = mp_from_bytes_le(make_ptrlen(bytes, dh->curve->fieldBytes)); + smemclr(bytes, sizeof(bytes)); + + dh->m_public = ecc_montgomery_multiply(dh->curve->m.G, dh->private); +} + +ecdh_key *ssh_ecdhkex_newkey(const struct ssh_kex *kex) { const struct eckex_extra *extra = (const struct eckex_extra *)kex->extra; - struct ec_curve *curve; - struct ec_key *key; - struct ec_point *publicKey; + const struct ec_curve *curve = extra->curve(); - curve = extra->curve(); - - key = snew(struct ec_key); - - key->sshk.vt = NULL; - key->publicKey.curve = curve; - - if (curve->type == EC_MONTGOMERY) { - unsigned char bytes[32] = {0}; - int i; - - for (i = 0; i < sizeof(bytes); ++i) - { - bytes[i] = (unsigned char)random_byte(); - } - bytes[0] &= 248; - bytes[31] &= 127; - bytes[31] |= 64; - key->privateKey = bignum_from_bytes_le(bytes, sizeof(bytes)); - smemclr(bytes, sizeof(bytes)); - if (!key->privateKey) { - sfree(key); - return NULL; - } - publicKey = ecp_mul(&key->publicKey.curve->m.G, key->privateKey); - if (!publicKey) { - freebn(key->privateKey); - sfree(key); - return NULL; - } - key->publicKey.x = publicKey->x; - key->publicKey.y = publicKey->y; - key->publicKey.z = NULL; - sfree(publicKey); - } else { - key->privateKey = bignum_random_in_range(One, key->publicKey.curve->w.n); - if (!key->privateKey) { - sfree(key); - return NULL; - } - publicKey = ecp_mul(&key->publicKey.curve->w.G, key->privateKey); - if (!publicKey) { - freebn(key->privateKey); - sfree(key); - return NULL; - } - key->publicKey.x = publicKey->x; - key->publicKey.y = publicKey->y; - key->publicKey.z = NULL; - sfree(publicKey); - } - return key; + ecdh_key *dh = snew(ecdh_key); + dh->extra = extra; + dh->curve = curve; + dh->extra->setup(dh); + return dh; } -void ssh_ecdhkex_getpublic(struct ec_key *ec, BinarySink *bs) +static void ssh_ecdhkex_w_getpublic(ecdh_key *dh, BinarySink *bs) { - int i; - int pointlen; - - pointlen = (bignum_bitcount(ec->publicKey.curve->p) + 7) / 8; - - if (ec->publicKey.curve->type == EC_WEIERSTRASS) { - put_byte(bs, 0x04); - for (i = pointlen; i--;) - put_byte(bs, bignum_byte(ec->publicKey.x, i)); - for (i = pointlen; i--;) - put_byte(bs, bignum_byte(ec->publicKey.y, i)); - } else { - for (i = 0; i < pointlen; ++i) - put_byte(bs, bignum_byte(ec->publicKey.x, i)); - } + put_wpoint(bs, dh->w_public, dh->curve, true); } -Bignum ssh_ecdhkex_getkey(struct ec_key *ec, - const void *remoteKey, int remoteKeyLen) +static void ssh_ecdhkex_m_getpublic(ecdh_key *dh, BinarySink *bs) { - struct ec_point remote; - Bignum ret; - - if (ec->publicKey.curve->type == EC_WEIERSTRASS) { - remote.curve = ec->publicKey.curve; - remote.infinity = false; - if (!decodepoint(remoteKey, remoteKeyLen, &remote)) { - return NULL; - } - } else { - /* Point length has to be the same length */ - if (remoteKeyLen != (bignum_bitcount(ec->publicKey.curve->p) + 7) / 8) { - return NULL; - } - - remote.curve = ec->publicKey.curve; - remote.infinity = false; - remote.x = bignum_from_bytes_le((const unsigned char *)remoteKey, - remoteKeyLen); - remote.y = NULL; - remote.z = NULL; - } - - ret = ecdh_calculate(ec->privateKey, &remote); - if (remote.x) freebn(remote.x); - if (remote.y) freebn(remote.y); - return ret; + mp_int *x; + ecc_montgomery_get_affine(dh->m_public, &x); + for (size_t i = 0; i < dh->curve->fieldBytes; ++i) + put_byte(bs, mp_get_byte(x, i)); + mp_free(x); } -void ssh_ecdhkex_freekey(struct ec_key *key) +void ssh_ecdhkex_getpublic(ecdh_key *dh, BinarySink *bs) { - ecdsa_freekey(&key->sshk); + dh->extra->getpublic(dh, bs); } -static const struct eckex_extra kex_extra_curve25519 = { ec_curve25519 }; +static mp_int *ssh_ecdhkex_w_getkey(ecdh_key *dh, ptrlen remoteKey) +{ + WeierstrassPoint *remote_p = ecdsa_decode(remoteKey, dh->curve); + if (!remote_p) + return NULL; + + WeierstrassPoint *p = ecc_weierstrass_multiply(remote_p, dh->private); + + mp_int *x; + ecc_weierstrass_get_affine(p, &x, NULL); + + ecc_weierstrass_point_free(remote_p); + ecc_weierstrass_point_free(p); + + return x; +} + +static mp_int *ssh_ecdhkex_m_getkey(ecdh_key *dh, ptrlen remoteKey) +{ + mp_int *remote_x = mp_from_bytes_le(remoteKey); + MontgomeryPoint *remote_p = ecc_montgomery_point_new( + dh->curve->m.mc, remote_x); + mp_free(remote_x); + + MontgomeryPoint *p = ecc_montgomery_multiply(remote_p, dh->private); + mp_int *x; + ecc_montgomery_get_affine(p, &x); + + ecc_montgomery_point_free(remote_p); + ecc_montgomery_point_free(p); + + /* + * Endianness-swap. The Curve25519 algorithm definition assumes + * you were doing your computation in arrays of 32 little-endian + * bytes, and now specifies that you take your final one of those + * and convert it into a bignum in _network_ byte order, i.e. + * big-endian. + * + * In particular, the spec says, you convert the _whole_ 32 bytes + * into a bignum. That is, on the rare occasions that x has come + * out with the most significant 8 bits zero, we have to imagine + * that being represented by a 32-byte string with the last byte + * being zero, so that has to be converted into an SSH-2 bignum + * with the _low_ byte zero, i.e. a multiple of 256. + */ + strbuf *sb = strbuf_new(); + for (size_t i = 0; i < dh->curve->fieldBytes; ++i) + put_byte(sb, mp_get_byte(x, i)); + mp_free(x); + x = mp_from_bytes_be(ptrlen_from_strbuf(sb)); + strbuf_free(sb); + + return x; +} + +mp_int *ssh_ecdhkex_getkey(ecdh_key *dh, ptrlen remoteKey) +{ + return dh->extra->getkey(dh, remoteKey); +} + +static void ssh_ecdhkex_w_cleanup(ecdh_key *dh) +{ + ecc_weierstrass_point_free(dh->w_public); +} + +static void ssh_ecdhkex_m_cleanup(ecdh_key *dh) +{ + ecc_montgomery_point_free(dh->m_public); +} + +void ssh_ecdhkex_freekey(ecdh_key *dh) +{ + mp_free(dh->private); + dh->extra->cleanup(dh); + sfree(dh); +} + +static const struct eckex_extra kex_extra_curve25519 = { + ec_curve25519, + ssh_ecdhkex_m_setup, + ssh_ecdhkex_m_cleanup, + ssh_ecdhkex_m_getpublic, + ssh_ecdhkex_m_getkey, +}; static const struct ssh_kex ssh_ec_kex_curve25519 = { "curve25519-sha256@libssh.org", NULL, KEXTYPE_ECDH, &ssh_sha256, &kex_extra_curve25519, }; -const struct eckex_extra kex_extra_nistp256 = { ec_p256 }; +const struct eckex_extra kex_extra_nistp256 = { + ec_p256, + ssh_ecdhkex_w_setup, + ssh_ecdhkex_w_cleanup, + ssh_ecdhkex_w_getpublic, + ssh_ecdhkex_w_getkey, +}; static const struct ssh_kex ssh_ec_kex_nistp256 = { "ecdh-sha2-nistp256", NULL, KEXTYPE_ECDH, &ssh_sha256, &kex_extra_nistp256, }; -const struct eckex_extra kex_extra_nistp384 = { ec_p384 }; +const struct eckex_extra kex_extra_nistp384 = { + ec_p384, + ssh_ecdhkex_w_setup, + ssh_ecdhkex_w_cleanup, + ssh_ecdhkex_w_getpublic, + ssh_ecdhkex_w_getkey, +}; static const struct ssh_kex ssh_ec_kex_nistp384 = { "ecdh-sha2-nistp384", NULL, KEXTYPE_ECDH, &ssh_sha384, &kex_extra_nistp384, }; -const struct eckex_extra kex_extra_nistp521 = { ec_p521 }; +const struct eckex_extra kex_extra_nistp521 = { + ec_p521, + ssh_ecdhkex_w_setup, + ssh_ecdhkex_w_cleanup, + ssh_ecdhkex_w_getpublic, + ssh_ecdhkex_w_getkey, +}; static const struct ssh_kex ssh_ec_kex_nistp521 = { "ecdh-sha2-nistp521", NULL, KEXTYPE_ECDH, &ssh_sha512, &kex_extra_nistp521, diff --git a/sshecdsag.c b/sshecdsag.c index 24547ca5..37048ea6 100644 --- a/sshecdsag.c +++ b/sshecdsag.c @@ -3,66 +3,36 @@ */ #include "ssh.h" +#include "mpint.h" -int ec_generate(struct ec_key *key, int bits, progfn_t pfn, - void *pfnparam) +int ecdsa_generate(struct ecdsa_key *ek, int bits, + progfn_t pfn, void *pfnparam) { - struct ec_point *publicKey; - - if (!ec_nist_alg_and_curve_by_bits(bits, &key->publicKey.curve, - &key->sshk.vt)) + if (!ec_nist_alg_and_curve_by_bits(bits, &ek->curve, &ek->sshk.vt)) return 0; - key->privateKey = bignum_random_in_range(One, key->publicKey.curve->w.n); - if (!key->privateKey) return 0; + mp_int *one = mp_from_integer(1); + ek->privateKey = mp_random_in_range(one, ek->curve->w.G_order); + mp_free(one); - publicKey = ec_public(key->privateKey, key->publicKey.curve); - if (!publicKey) { - freebn(key->privateKey); - key->privateKey = NULL; - return 0; - } - - key->publicKey.x = publicKey->x; - key->publicKey.y = publicKey->y; - key->publicKey.z = NULL; - sfree(publicKey); + ek->publicKey = ecdsa_public(ek->privateKey, ek->sshk.vt); return 1; } -int ec_edgenerate(struct ec_key *key, int bits, progfn_t pfn, - void *pfnparam) +int eddsa_generate(struct eddsa_key *ek, int bits, + progfn_t pfn, void *pfnparam) { - struct ec_point *publicKey; - - if (!ec_ed_alg_and_curve_by_bits(bits, &key->publicKey.curve, - &key->sshk.vt)) + if (!ec_ed_alg_and_curve_by_bits(bits, &ek->curve, &ek->sshk.vt)) return 0; - { - /* EdDSA secret keys are just 32 bytes of hash preimage; the - * 64-byte SHA-512 hash of that key will be used when signing, - * but the form of the key stored on disk is the preimage - * only. */ - Bignum privMax = bn_power_2(bits); - if (!privMax) return 0; - key->privateKey = bignum_random_in_range(Zero, privMax); - freebn(privMax); - if (!key->privateKey) return 0; - } + /* EdDSA secret keys are just 32 bytes of hash preimage; the + * 64-byte SHA-512 hash of that key will be used when signing, + * but the form of the key stored on disk is the preimage + * only. */ + ek->privateKey = mp_random_bits(bits); - publicKey = ec_public(key->privateKey, key->publicKey.curve); - if (!publicKey) { - freebn(key->privateKey); - key->privateKey = NULL; - return 0; - } - - key->publicKey.x = publicKey->x; - key->publicKey.y = publicKey->y; - key->publicKey.z = NULL; - sfree(publicKey); + ek->publicKey = eddsa_public(ek->privateKey, ek->sshk.vt); return 1; } diff --git a/sshprime.c b/sshprime.c index cf36975e..65380e0e 100644 --- a/sshprime.c +++ b/sshprime.c @@ -4,6 +4,7 @@ #include #include "ssh.h" +#include "mpint.h" /* * This prime generation algorithm is pretty much cribbed from @@ -134,6 +135,23 @@ static void init_primes_array(void) assert(pos == NPRIMES); } +static unsigned short mp_mod_short(mp_int *x, unsigned short modulus) +{ + /* + * This function lives here rather than in mpint.c partly because + * this is the only place it's needed, but mostly because it + * doesn't pay careful attention to constant running time, since + * as far as I can tell that's a lost cause for key generation + * anyway. + */ + unsigned accumulator = 0; + for (size_t i = mp_max_bytes(x); i-- > 0 ;) { + accumulator = 0x100 * accumulator + mp_get_byte(x, i); + accumulator %= modulus; + } + return accumulator; +} + /* * Generate a prime. We can deal with various extra properties of * the prime: @@ -154,23 +172,15 @@ static void init_primes_array(void) * 'firstbits' is not needed, specifying it to either 0 or 1 is * an adequate no-op. */ -Bignum primegen(int bits, int modulus, int residue, Bignum factor, - int phase, progfn_t pfn, void *pfnparam, unsigned firstbits) +mp_int *primegen( + int bits, int modulus, int residue, mp_int *factor, + int phase, progfn_t pfn, void *pfnparam, unsigned firstbits) { - int i, k, v, byte, bitsleft, check, checks, fbsize; - unsigned long delta; - unsigned long moduli[NPRIMES + 1]; - unsigned long residues[NPRIMES + 1]; - unsigned long multipliers[NPRIMES + 1]; - Bignum p, pm1, q, wqp, wqp2; - int progress = 0; - init_primes_array(); - byte = 0; - bitsleft = 0; + int progress = 0; - fbsize = 0; + size_t fbsize = 0; while (firstbits >> fbsize) /* work out how to align this */ fbsize++; @@ -184,184 +194,172 @@ Bignum primegen(int bits, int modulus, int residue, Bignum factor, * random number with the top bit set and the bottom bit clear, * multiply it by `factor', and add one. */ - p = bn_power_2(bits - 1); - for (i = 0; i < bits; i++) { - if (i == 0 || i == bits - 1) { - v = (i != 0 || !factor) ? 1 : 0; - } else if (i >= bits - fbsize) { - v = (firstbits >> (i - (bits - fbsize))) & 1; - } else { - if (bitsleft <= 0) - bitsleft = 8, byte = random_byte(); - v = byte & 1; - byte >>= 1; - bitsleft--; - } - bignum_set_bit(p, i, v); - } + mp_int *p = mp_random_bits(bits - 1); + + mp_set_bit(p, 0, factor ? 0 : 1); /* bottom bit */ + mp_set_bit(p, bits-1, 1); /* top bit */ + for (size_t i = 0; i < fbsize; i++) + mp_set_bit(p, bits-fbsize + i, 1 & (firstbits >> i)); + if (factor) { - Bignum tmp = p; - p = bigmul(tmp, factor); - freebn(tmp); - assert(bignum_bit(p, 0) == 0); - bignum_set_bit(p, 0, 1); + mp_int *tmp = p; + p = mp_mul(tmp, factor); + mp_free(tmp); + assert(mp_get_bit(p, 0) == 0); + mp_set_bit(p, 0, 1); } /* - * Ensure this random number is coprime to the first few - * primes, by repeatedly adding either 2 or 2*factor to it - * until it is. + * We need to ensure this random number is coprime to the first + * few primes, by repeatedly adding either 2 or 2*factor to it + * until it is. To do this we make a list of (modulus, residue) + * pairs to avoid, and we also add to that list the extra pair our + * caller wants to avoid. */ - for (i = 0; i < NPRIMES; i++) { + + /* List the moduli */ + unsigned long moduli[NPRIMES + 1]; + for (size_t i = 0; i < NPRIMES; i++) moduli[i] = primes[i]; - residues[i] = bignum_mod_short(p, primes[i]); + moduli[NPRIMES] = modulus; + + /* Find the residue of our starting number mod each of them. Also + * set up the multipliers array which tells us how each one will + * change when we increment the number (which isn't just 1 if + * we're incrementing by multiples of factor). */ + unsigned long residues[NPRIMES + 1], multipliers[NPRIMES + 1]; + for (size_t i = 0; i < lenof(moduli); i++) { + residues[i] = mp_mod_short(p, moduli[i]); if (factor) - multipliers[i] = bignum_mod_short(factor, primes[i]); + multipliers[i] = mp_mod_short(factor, moduli[i]); else multipliers[i] = 1; } - moduli[NPRIMES] = modulus; - residues[NPRIMES] = (bignum_mod_short(p, (unsigned short) modulus) - + modulus - residue); - if (factor) - multipliers[NPRIMES] = bignum_mod_short(factor, modulus); - else - multipliers[NPRIMES] = 1; - delta = 0; + + /* Adjust the last entry so that it avoids a residue other than zero */ + residues[NPRIMES] = (residues[NPRIMES] + modulus - residue) % modulus; + + /* + * Now loop until no residue in that list is zero, to find a + * sensible increment. We maintain the increment in an ordinary + * integer, so if it gets too big, we'll have to give up and go + * back to making up a fresh random large integer. + */ + unsigned delta = 0; while (1) { - for (i = 0; i < (sizeof(moduli) / sizeof(*moduli)); i++) + for (size_t i = 0; i < lenof(moduli); i++) if (!((residues[i] + delta * multipliers[i]) % moduli[i])) - break; - if (i < (sizeof(moduli) / sizeof(*moduli))) { /* we broke */ - delta += 2; - if (delta > 65536) { - freebn(p); - goto STARTOVER; - } - continue; - } - break; + goto found_a_zero; + + /* If we didn't exit that loop by goto, we've got our candidate. */ + break; + + found_a_zero: + delta += 2; + if (delta > 65536) { + mp_free(p); + goto STARTOVER; + } } - q = p; + + /* + * Having found a plausible increment, actually add it on. + */ if (factor) { - Bignum tmp; - tmp = bignum_from_long(delta); - p = bigmuladd(tmp, factor, q); - freebn(tmp); + mp_int *d = mp_from_integer(delta); + mp_int *df = mp_mul(d, factor); + mp_add_into(p, p, df); + mp_free(d); + mp_free(df); } else { - p = bignum_add_long(q, delta); + mp_add_integer_into(p, p, delta); } - freebn(q); /* * Now apply the Miller-Rabin primality test a few times. First * work out how many checks are needed. */ - checks = 27; - if (bits >= 150) - checks = 18; - if (bits >= 200) - checks = 15; - if (bits >= 250) - checks = 12; - if (bits >= 300) - checks = 9; - if (bits >= 350) - checks = 8; - if (bits >= 400) - checks = 7; - if (bits >= 450) - checks = 6; - if (bits >= 550) - checks = 5; - if (bits >= 650) - checks = 4; - if (bits >= 850) - checks = 3; - if (bits >= 1300) - checks = 2; + unsigned checks = + bits >= 1300 ? 2 : bits >= 850 ? 3 : bits >= 650 ? 4 : + bits >= 550 ? 5 : bits >= 450 ? 6 : bits >= 400 ? 7 : + bits >= 350 ? 8 : bits >= 300 ? 9 : bits >= 250 ? 12 : + bits >= 200 ? 15 : bits >= 150 ? 18 : 27; /* * Next, write p-1 as q*2^k. */ - for (k = 0; bignum_bit(p, k) == !k; k++) + size_t k; + for (k = 0; mp_get_bit(p, k) == !k; k++) continue; /* find first 1 bit in p-1 */ - q = bignum_rshift(p, k); - /* And store p-1 itself, which we'll need. */ - pm1 = copybn(p); - decbn(pm1); + mp_int *q = mp_rshift_safe(p, k); + + /* + * Set up stuff for the Miller-Rabin checks. + */ + mp_int *two = mp_from_integer(2); + mp_int *pm1 = mp_copy(p); + mp_sub_integer_into(pm1, pm1, 1); + MontyContext *mc = monty_new(p); + mp_int *m_pm1 = monty_import(mc, pm1); + + bool known_bad = false; /* * Now, for each check ... */ - for (check = 0; check < checks; check++) { - Bignum w; - + for (unsigned check = 0; check < checks && !known_bad; check++) { /* - * Invent a random number between 1 and p-1 inclusive. + * Invent a random number between 1 and p-1. */ - while (1) { - w = bn_power_2(bits - 1); - for (i = 0; i < bits; i++) { - if (bitsleft <= 0) - bitsleft = 8, byte = random_byte(); - v = byte & 1; - byte >>= 1; - bitsleft--; - bignum_set_bit(w, i, v); - } - bn_restore_invariant(w); - if (bignum_cmp(w, p) >= 0 || bignum_cmp(w, Zero) == 0) { - freebn(w); - continue; - } - break; - } + mp_int *w = mp_random_in_range(two, pm1); + monty_import_into(mc, w, w); pfn(pfnparam, PROGFN_PROGRESS, phase, ++progress); /* * Compute w^q mod p. */ - wqp = modpow(w, q, p); - freebn(w); + mp_int *wqp = monty_pow(mc, w, q); + mp_free(w); /* * See if this is 1, or if it is -1, or if it becomes -1 * when squared at most k-1 times. */ - if (bignum_cmp(wqp, One) == 0 || bignum_cmp(wqp, pm1) == 0) { - freebn(wqp); - continue; - } - for (i = 0; i < k - 1; i++) { - wqp2 = modmul(wqp, wqp, p); - freebn(wqp); - wqp = wqp2; - if (bignum_cmp(wqp, pm1) == 0) - break; - } - if (i < k - 1) { - freebn(wqp); - continue; + bool passed = false; + + if (mp_cmp_eq(wqp, monty_identity(mc)) || mp_cmp_eq(wqp, m_pm1)) { + passed = true; + } else { + for (size_t i = 0; i < k - 1; i++) { + monty_mul_into(mc, wqp, wqp, wqp); + if (mp_cmp_eq(wqp, m_pm1)) { + passed = true; + break; + } + } } - /* - * It didn't. Therefore, w is a witness for the - * compositeness of p. - */ - freebn(wqp); - freebn(p); - freebn(pm1); - freebn(q); - goto STARTOVER; + if (!passed) + known_bad = true; + + mp_free(wqp); + } + + mp_free(q); + mp_free(two); + mp_free(pm1); + monty_free(mc); + mp_free(m_pm1); + + if (known_bad) { + mp_free(p); + goto STARTOVER; } /* * We have a prime! */ - freebn(q); - freebn(pm1); return p; } diff --git a/sshpubk.c b/sshpubk.c index 5072ff63..9a187b5c 100644 --- a/sshpubk.c +++ b/sshpubk.c @@ -10,6 +10,7 @@ #include #include "putty.h" +#include "mpint.h" #include "ssh.h" #include "misc.h" @@ -276,11 +277,11 @@ int rsa_ssh1_loadpub(const Filename *filename, BinarySink *bs, } memset(&key, 0, sizeof(key)); - key.exponent = bignum_from_decimal(expp); - key.modulus = bignum_from_decimal(modp); - if (atoi(bitsp) != bignum_bitcount(key.modulus)) { - freebn(key.exponent); - freebn(key.modulus); + key.exponent = mp_from_decimal(expp); + key.modulus = mp_from_decimal(modp); + if (atoi(bitsp) != mp_get_nbits(key.modulus)) { + mp_free(key.exponent); + mp_free(key.modulus); sfree(line); error = "key bit count does not match in SSH-1 public key file"; goto end; @@ -1360,10 +1361,9 @@ char *ssh1_pubkey_str(struct RSAKey *key) char *buffer; char *dec1, *dec2; - dec1 = bignum_decimal(key->exponent); - dec2 = bignum_decimal(key->modulus); - buffer = dupprintf("%d %s %s%s%s", bignum_bitcount(key->modulus), - dec1, dec2, + dec1 = mp_get_decimal(key->exponent); + dec2 = mp_get_decimal(key->modulus); + buffer = dupprintf("%zd %s %s%s%s", mp_get_nbits(key->modulus), dec1, dec2, key->comment ? " " : "", key->comment ? key->comment : ""); sfree(dec1); diff --git a/sshrsa.c b/sshrsa.c index afddbb7d..53767aae 100644 --- a/sshrsa.c +++ b/sshrsa.c @@ -8,13 +8,14 @@ #include #include "ssh.h" +#include "mpint.h" #include "misc.h" void BinarySource_get_rsa_ssh1_pub( BinarySource *src, struct RSAKey *rsa, RsaSsh1Order order) { unsigned bits; - Bignum e, m; + mp_int *e, *m; bits = get_uint32(src); if (order == RSA_SSH1_EXPONENT_FIRST) { @@ -29,10 +30,10 @@ void BinarySource_get_rsa_ssh1_pub( rsa->bits = bits; rsa->exponent = e; rsa->modulus = m; - rsa->bytes = (bignum_bitcount(m) + 7) / 8; + rsa->bytes = (mp_get_nbits(m) + 7) / 8; } else { - freebn(e); - freebn(m); + mp_free(e); + mp_free(m); } } @@ -44,7 +45,7 @@ void BinarySource_get_rsa_ssh1_priv( bool rsa_ssh1_encrypt(unsigned char *data, int length, struct RSAKey *key) { - Bignum b1, b2; + mp_int *b1, *b2; int i; unsigned char *p; @@ -62,17 +63,17 @@ bool rsa_ssh1_encrypt(unsigned char *data, int length, struct RSAKey *key) } data[key->bytes - length - 1] = 0; - b1 = bignum_from_bytes(data, key->bytes); + b1 = mp_from_bytes_be(make_ptrlen(data, key->bytes)); - b2 = modpow(b1, key->exponent, key->modulus); + b2 = mp_modpow(b1, key->exponent, key->modulus); p = data; for (i = key->bytes; i--;) { - *p++ = bignum_byte(b2, i); + *p++ = mp_get_byte(b2, i); } - freebn(b1); - freebn(b2); + mp_free(b1); + mp_free(b2); return true; } @@ -83,28 +84,33 @@ bool rsa_ssh1_encrypt(unsigned char *data, int length, struct RSAKey *key) * Uses Chinese Remainder Theorem to speed computation up over the * obvious implementation of a single big modpow. */ -Bignum crt_modpow(Bignum base, Bignum exp, Bignum mod, - Bignum p, Bignum q, Bignum iqmp) +mp_int *crt_modpow(mp_int *base, mp_int *exp, mp_int *mod, + mp_int *p, mp_int *q, mp_int *iqmp) { - Bignum pm1, qm1, pexp, qexp, presult, qresult, diff, multiplier, ret0, ret; + mp_int *pm1, *qm1, *pexp, *qexp, *presult, *qresult; + mp_int *diff, *multiplier, *ret0, *ret; /* * Reduce the exponent mod phi(p) and phi(q), to save time when * exponentiating mod p and mod q respectively. Of course, since p * and q are prime, phi(p) == p-1 and similarly for q. */ - pm1 = copybn(p); - decbn(pm1); - qm1 = copybn(q); - decbn(qm1); - pexp = bigmod(exp, pm1); - qexp = bigmod(exp, qm1); + pm1 = mp_copy(p); + mp_sub_integer_into(pm1, pm1, 1); + qm1 = mp_copy(q); + mp_sub_integer_into(qm1, qm1, 1); + pexp = mp_mod(exp, pm1); + qexp = mp_mod(exp, qm1); /* * Do the two modpows. */ - presult = modpow(base, pexp, p); - qresult = modpow(base, qexp, q); + mp_int *base_mod_p = mp_mod(base, p); + presult = mp_modpow(base_mod_p, pexp, p); + mp_free(base_mod_p); + mp_int *base_mod_q = mp_mod(base, q); + qresult = mp_modpow(base_mod_q, qexp, q); + mp_free(base_mod_q); /* * Recombine the results. We want a value which is congruent to @@ -115,189 +121,66 @@ Bignum crt_modpow(Bignum base, Bignum exp, Bignum mod, * (which is congruent to qresult mod both primes), and add on * (presult-qresult) * (iqmp * q) which adjusts it to be congruent * to presult mod p without affecting its value mod q. + * + * (If presult-qresult < 0, we add p to it to keep it positive.) */ - if (bignum_cmp(presult, qresult) < 0) { - /* - * Can't subtract presult from qresult without first adding on - * p. - */ - Bignum tmp = presult; - presult = bigadd(presult, p); - freebn(tmp); - } - diff = bigsub(presult, qresult); - multiplier = bigmul(iqmp, q); - ret0 = bigmuladd(multiplier, diff, qresult); + unsigned presult_too_small = mp_cmp_hs(qresult, presult); + mp_cond_add_into(presult, presult, p, presult_too_small); + + diff = mp_sub(presult, qresult); + multiplier = mp_mul(iqmp, q); + ret0 = mp_mul(multiplier, diff); + mp_add_into(ret0, ret0, qresult); /* * Finally, reduce the result mod n. */ - ret = bigmod(ret0, mod); + ret = mp_mod(ret0, mod); /* * Free all the intermediate results before returning. */ - freebn(pm1); - freebn(qm1); - freebn(pexp); - freebn(qexp); - freebn(presult); - freebn(qresult); - freebn(diff); - freebn(multiplier); - freebn(ret0); + mp_free(pm1); + mp_free(qm1); + mp_free(pexp); + mp_free(qexp); + mp_free(presult); + mp_free(qresult); + mp_free(diff); + mp_free(multiplier); + mp_free(ret0); return ret; } /* - * This function is a wrapper on modpow(). It has the same effect as - * modpow(), but employs RSA blinding to protect against timing - * attacks and also uses the Chinese Remainder Theorem (implemented - * above, in crt_modpow()) to speed up the main operation. + * Wrapper on crt_modpow that looks up all the right values from an + * RSAKey. */ -static Bignum rsa_privkey_op(Bignum input, struct RSAKey *key) +static mp_int *rsa_privkey_op(mp_int *input, struct RSAKey *key) { - Bignum random, random_encrypted, random_inverse; - Bignum input_blinded, ret_blinded; - Bignum ret; - - SHA512_State ss; - unsigned char digest512[64]; - int digestused = lenof(digest512); - int hashseq = 0; - - /* - * Start by inventing a random number chosen uniformly from the - * range 2..modulus-1. (We do this by preparing a random number - * of the right length and retrying if it's greater than the - * modulus, to prevent any potential Bleichenbacher-like - * attacks making use of the uneven distribution within the - * range that would arise from just reducing our number mod n. - * There are timing implications to the potential retries, of - * course, but all they tell you is the modulus, which you - * already knew.) - * - * To preserve determinism and avoid Pageant needing to share - * the random number pool, we actually generate this `random' - * number by hashing stuff with the private key. - */ - while (1) { - int bits, byte, bitsleft, v; - random = copybn(key->modulus); - /* - * Find the topmost set bit. (This function will return its - * index plus one.) Then we'll set all bits from that one - * downwards randomly. - */ - bits = bignum_bitcount(random); - byte = 0; - bitsleft = 0; - while (bits--) { - if (bitsleft <= 0) { - bitsleft = 8; - /* - * Conceptually the following few lines are equivalent to - * byte = random_byte(); - */ - if (digestused >= lenof(digest512)) { - SHA512_Init(&ss); - put_data(&ss, "RSA deterministic blinding", 26); - put_uint32(&ss, hashseq); - put_mp_ssh2(&ss, key->private_exponent); - SHA512_Final(&ss, digest512); - hashseq++; - - /* - * Now hash that digest plus the signature - * input. - */ - SHA512_Init(&ss); - put_data(&ss, digest512, sizeof(digest512)); - put_mp_ssh2(&ss, input); - SHA512_Final(&ss, digest512); - - digestused = 0; - } - byte = digest512[digestused++]; - } - v = byte & 1; - byte >>= 1; - bitsleft--; - bignum_set_bit(random, bits, v); - } - bn_restore_invariant(random); - - /* - * Now check that this number is strictly greater than - * zero, and strictly less than modulus. - */ - if (bignum_cmp(random, Zero) <= 0 || - bignum_cmp(random, key->modulus) >= 0) { - freebn(random); - continue; - } - - /* - * Also, make sure it has an inverse mod modulus. - */ - random_inverse = modinv(random, key->modulus); - if (!random_inverse) { - freebn(random); - continue; - } - - break; - } - - /* - * RSA blinding relies on the fact that (xy)^d mod n is equal - * to (x^d mod n) * (y^d mod n) mod n. We invent a random pair - * y and y^d; then we multiply x by y, raise to the power d mod - * n as usual, and divide by y^d to recover x^d. Thus an - * attacker can't correlate the timing of the modpow with the - * input, because they don't know anything about the number - * that was input to the actual modpow. - * - * The clever bit is that we don't have to do a huge modpow to - * get y and y^d; we will use the number we just invented as - * _y^d_, and use the _public_ exponent to compute (y^d)^e = y - * from it, which is much faster to do. - */ - random_encrypted = crt_modpow(random, key->exponent, - key->modulus, key->p, key->q, key->iqmp); - input_blinded = modmul(input, random_encrypted, key->modulus); - ret_blinded = crt_modpow(input_blinded, key->private_exponent, - key->modulus, key->p, key->q, key->iqmp); - ret = modmul(ret_blinded, random_inverse, key->modulus); - - freebn(ret_blinded); - freebn(input_blinded); - freebn(random_inverse); - freebn(random_encrypted); - freebn(random); - - return ret; + return crt_modpow(input, key->private_exponent, + key->modulus, key->p, key->q, key->iqmp); } -Bignum rsa_ssh1_decrypt(Bignum input, struct RSAKey *key) +mp_int *rsa_ssh1_decrypt(mp_int *input, struct RSAKey *key) { return rsa_privkey_op(input, key); } -bool rsa_ssh1_decrypt_pkcs1(Bignum input, struct RSAKey *key, strbuf *outbuf) +bool rsa_ssh1_decrypt_pkcs1(mp_int *input, struct RSAKey *key, + strbuf *outbuf) { strbuf *data = strbuf_new(); bool success = false; BinarySource src[1]; { - Bignum *b = rsa_ssh1_decrypt(input, key); - int i; - for (i = (bignum_bitcount(key->modulus) + 7) / 8; i-- > 0 ;) { - put_byte(data, bignum_byte(b, i)); + mp_int *b = rsa_ssh1_decrypt(input, key); + for (size_t i = (mp_get_nbits(key->modulus) + 7) / 8; i-- > 0 ;) { + put_byte(data, mp_get_byte(b, i)); } - freebn(b); + mp_free(b); } BinarySource_BARE_INIT(src, data->u, data->len); @@ -321,17 +204,16 @@ bool rsa_ssh1_decrypt_pkcs1(Bignum input, struct RSAKey *key, strbuf *outbuf) return success; } -static void append_hex_to_strbuf(strbuf *sb, Bignum *x) +static void append_hex_to_strbuf(strbuf *sb, mp_int *x) { if (sb->len > 0) put_byte(sb, ','); put_data(sb, "0x", 2); - int nibbles = (3 + bignum_bitcount(x)) / 4; - if (nibbles < 1) - nibbles = 1; - static const char hex[] = "0123456789abcdef"; - for (int i = nibbles; i--;) - put_byte(sb, hex[(bignum_byte(x, i / 2) >> (4 * (i % 2))) & 0xF]); + char *hex = mp_get_hex(x); + size_t hexlen = strlen(hex); + put_data(sb, hex, hexlen); + smemclr(hex, hexlen); + sfree(hex); } char *rsastr_fmt(struct RSAKey *key) @@ -361,7 +243,7 @@ char *rsa_ssh1_fingerprint(struct RSAKey *key) MD5Final(digest, &md5c); out = strbuf_new(); - strbuf_catf(out, "%d ", bignum_bitcount(key->modulus)); + strbuf_catf(out, "%d ", mp_get_nbits(key->modulus)); for (i = 0; i < 16; i++) strbuf_catf(out, "%s%02x", i ? ":" : "", digest[i]); if (key->comment) @@ -376,34 +258,32 @@ char *rsa_ssh1_fingerprint(struct RSAKey *key) */ bool rsa_verify(struct RSAKey *key) { - Bignum n, ed, pm1, qm1; - int cmp; + mp_int *n, *ed, *pm1, *qm1; + unsigned ok = 1; + + /* Preliminary checks: p,q must actually be nonzero. */ + if (mp_eq_integer(key->p, 0) | mp_eq_integer(key->q, 0)) + return false; /* n must equal pq. */ - n = bigmul(key->p, key->q); - cmp = bignum_cmp(n, key->modulus); - freebn(n); - if (cmp != 0) - return false; + n = mp_mul(key->p, key->q); + ok &= mp_cmp_eq(n, key->modulus); + mp_free(n); /* e * d must be congruent to 1, modulo (p-1) and modulo (q-1). */ - pm1 = copybn(key->p); - decbn(pm1); - ed = modmul(key->exponent, key->private_exponent, pm1); - freebn(pm1); - cmp = bignum_cmp(ed, One); - freebn(ed); - if (cmp != 0) - return false; + pm1 = mp_copy(key->p); + mp_sub_integer_into(pm1, pm1, 1); + ed = mp_modmul(key->exponent, key->private_exponent, pm1); + mp_free(pm1); + ok &= mp_eq_integer(ed, 1); + mp_free(ed); - qm1 = copybn(key->q); - decbn(qm1); - ed = modmul(key->exponent, key->private_exponent, qm1); - freebn(qm1); - cmp = bignum_cmp(ed, One); - freebn(ed); - if (cmp != 0) - return false; + qm1 = mp_copy(key->q); + mp_sub_integer_into(qm1, qm1, 1); + ed = mp_modmul(key->exponent, key->private_exponent, qm1); + mp_free(qm1); + ok &= mp_eq_integer(ed, 1); + mp_free(ed); /* * Ensure p > q. @@ -413,33 +293,18 @@ bool rsa_verify(struct RSAKey *key) * should instead flip them round into the canonical order of * p > q. This also involves regenerating iqmp. */ - if (bignum_cmp(key->p, key->q) <= 0) { - Bignum tmp = key->p; - key->p = key->q; - key->q = tmp; + unsigned swap_pq = mp_cmp_hs(key->q, key->p); + mp_cond_swap(key->p, key->q, swap_pq); + mp_free(key->iqmp); + key->iqmp = mp_invert(key->q, key->p); - freebn(key->iqmp); - key->iqmp = modinv(key->q, key->p); - if (!key->iqmp) - return false; - } - - /* - * Ensure iqmp * q is congruent to 1, modulo p. - */ - n = modmul(key->iqmp, key->q, key->p); - cmp = bignum_cmp(n, One); - freebn(n); - if (cmp != 0) - return false; - - return true; + return ok; } void rsa_ssh1_public_blob(BinarySink *bs, struct RSAKey *key, RsaSsh1Order order) { - put_uint32(bs, bignum_bitcount(key->modulus)); + put_uint32(bs, mp_get_nbits(key->modulus)); if (order == RSA_SSH1_EXPONENT_FIRST) { put_mp_ssh1(bs, key->exponent); put_mp_ssh1(bs, key->modulus); @@ -459,8 +324,8 @@ int rsa_ssh1_public_blob_len(void *data, int maxlen) /* Expect a length word, then exponent and modulus. (It doesn't * even matter which order.) */ get_uint32(src); - freebn(get_mp_ssh1(src)); - freebn(get_mp_ssh1(src)); + mp_free(get_mp_ssh1(src)); + mp_free(get_mp_ssh1(src)); if (get_err(src)) return -1; @@ -472,19 +337,19 @@ int rsa_ssh1_public_blob_len(void *data, int maxlen) void freersapriv(struct RSAKey *key) { if (key->private_exponent) { - freebn(key->private_exponent); + mp_free(key->private_exponent); key->private_exponent = NULL; } if (key->p) { - freebn(key->p); + mp_free(key->p); key->p = NULL; } if (key->q) { - freebn(key->q); + mp_free(key->q); key->q = NULL; } if (key->iqmp) { - freebn(key->iqmp); + mp_free(key->iqmp); key->iqmp = NULL; } } @@ -493,11 +358,11 @@ void freersakey(struct RSAKey *key) { freersapriv(key); if (key->modulus) { - freebn(key->modulus); + mp_free(key->modulus); key->modulus = NULL; } if (key->exponent) { - freebn(key->exponent); + mp_free(key->exponent); key->exponent = NULL; } if (key->comment) { @@ -642,7 +507,7 @@ static int rsa2_pubkey_bits(const ssh_keyalg *self, ptrlen pub) return -1; rsa = container_of(sshk, struct RSAKey, sshk); - ret = bignum_bitcount(rsa->modulus); + ret = mp_get_nbits(rsa->modulus); rsa2_freekey(&rsa->sshk); return ret; @@ -738,8 +603,7 @@ static bool rsa2_verify(ssh_key *key, ptrlen sig, ptrlen data) struct RSAKey *rsa = container_of(key, struct RSAKey, sshk); BinarySource src[1]; ptrlen type, in_pl; - Bignum in, out; - bool toret; + mp_int *in, *out; BinarySource_BARE_INIT(src, sig.ptr, sig.len); type = get_string(src); @@ -751,28 +615,27 @@ static bool rsa2_verify(ssh_key *key, ptrlen sig, ptrlen data) * BUG_SSH2_RSA_PADDING at the other end, we tolerate it if it's * there.) So we can't use get_mp_ssh2, which enforces that * leading-byte scheme; instead we use get_string and - * bignum_from_bytes, which will tolerate anything. + * mp_from_bytes_be, which will tolerate anything. */ in_pl = get_string(src); if (get_err(src) || !ptrlen_eq_string(type, "ssh-rsa")) return false; - in = bignum_from_bytes(in_pl.ptr, in_pl.len); - out = modpow(in, rsa->exponent, rsa->modulus); - freebn(in); + in = mp_from_bytes_be(in_pl); + out = mp_modpow(in, rsa->exponent, rsa->modulus); + mp_free(in); - toret = true; + unsigned diff = 0; - size_t nbytes = (bignum_bitcount(rsa->modulus) + 7) / 8; + size_t nbytes = (mp_get_nbits(rsa->modulus) + 7) / 8; unsigned char *bytes = rsa_pkcs1_signature_string(nbytes, &ssh_sha1, data); for (size_t i = 0; i < nbytes; i++) - if (bytes[nbytes-1 - i] != bignum_byte(out, i)) - toret = false; + diff |= bytes[nbytes-1 - i] ^ mp_get_byte(out, i); smemclr(bytes, nbytes); sfree(bytes); - freebn(out); + mp_free(out); - return toret; + return diff == 0; } static void rsa2_sign(ssh_key *key, const void *data, int datalen, @@ -780,8 +643,8 @@ static void rsa2_sign(ssh_key *key, const void *data, int datalen, { struct RSAKey *rsa = container_of(key, struct RSAKey, sshk); unsigned char *bytes; - int nbytes; - Bignum in, out; + size_t nbytes; + mp_int *in, *out; const struct ssh_hashalg *halg; const char *sign_alg_name; @@ -796,24 +659,24 @@ static void rsa2_sign(ssh_key *key, const void *data, int datalen, sign_alg_name = "ssh-rsa"; } - nbytes = (bignum_bitcount(rsa->modulus) + 7) / 8; + nbytes = (mp_get_nbits(rsa->modulus) + 7) / 8; bytes = rsa_pkcs1_signature_string( nbytes, halg, make_ptrlen(data, datalen)); - in = bignum_from_bytes(bytes, nbytes); + in = mp_from_bytes_be(make_ptrlen(bytes, nbytes)); smemclr(bytes, nbytes); sfree(bytes); out = rsa_privkey_op(in, rsa); - freebn(in); + mp_free(in); put_stringz(bs, sign_alg_name); - nbytes = (bignum_bitcount(out) + 7) / 8; + nbytes = (mp_get_nbits(out) + 7) / 8; put_uint32(bs, nbytes); for (size_t i = 0; i < nbytes; i++) - put_byte(bs, bignum_byte(out, nbytes - 1 - i)); + put_byte(bs, mp_get_byte(out, nbytes - 1 - i)); - freebn(out); + mp_free(out); } const ssh_keyalg ssh_rsa = { @@ -852,7 +715,7 @@ void ssh_rsakex_freekey(struct RSAKey *key) int ssh_rsakex_klen(struct RSAKey *rsa) { - return bignum_bitcount(rsa->modulus); + return mp_get_nbits(rsa->modulus); } static void oaep_mask(const struct ssh_hashalg *h, void *seed, int seedlen, @@ -885,7 +748,7 @@ void ssh_rsakex_encrypt(const struct ssh_hashalg *h, unsigned char *in, int inlen, unsigned char *out, int outlen, struct RSAKey *rsa) { - Bignum b1, b2; + mp_int *b1, *b2; int k, i; char *p; const int HLEN = h->hlen; @@ -918,7 +781,7 @@ void ssh_rsakex_encrypt(const struct ssh_hashalg *h, */ /* k denotes the length in octets of the RSA modulus. */ - k = (7 + bignum_bitcount(rsa->modulus)) / 8; + k = (7 + mp_get_nbits(rsa->modulus)) / 8; /* The length of the input data must be at most k - 2hLen - 2. */ assert(inlen > 0 && inlen <= k - 2*HLEN - 2); @@ -961,24 +824,24 @@ void ssh_rsakex_encrypt(const struct ssh_hashalg *h, * Now `out' contains precisely the data we want to * RSA-encrypt. */ - b1 = bignum_from_bytes(out, outlen); - b2 = modpow(b1, rsa->exponent, rsa->modulus); + b1 = mp_from_bytes_be(make_ptrlen(out, outlen)); + b2 = mp_modpow(b1, rsa->exponent, rsa->modulus); p = (char *)out; for (i = outlen; i--;) { - *p++ = bignum_byte(b2, i); + *p++ = mp_get_byte(b2, i); } - freebn(b1); - freebn(b2); + mp_free(b1); + mp_free(b2); /* * And we're done. */ } -Bignum ssh_rsakex_decrypt(const struct ssh_hashalg *h, ptrlen ciphertext, - struct RSAKey *rsa) +mp_int *ssh_rsakex_decrypt(const struct ssh_hashalg *h, ptrlen ciphertext, + struct RSAKey *rsa) { - Bignum b1, b2; + mp_int *b1, *b2; int outlen, i; unsigned char *out; unsigned char labelhash[64]; @@ -992,18 +855,18 @@ Bignum ssh_rsakex_decrypt(const struct ssh_hashalg *h, ptrlen ciphertext, /* The length of the encrypted data should be exactly the length * in octets of the RSA modulus.. */ - outlen = (7 + bignum_bitcount(rsa->modulus)) / 8; + outlen = (7 + mp_get_nbits(rsa->modulus)) / 8; if (ciphertext.len != outlen) return NULL; /* Do the RSA decryption, and extract the result into a byte array. */ - b1 = bignum_from_bytes(ciphertext.ptr, ciphertext.len); + b1 = mp_from_bytes_be(ciphertext); b2 = rsa_privkey_op(b1, rsa); out = snewn(outlen, unsigned char); for (i = 0; i < outlen; i++) - out[i] = bignum_byte(b2, outlen-1-i); - freebn(b1); - freebn(b2); + out[i] = mp_get_byte(b2, outlen-1-i); + mp_free(b1); + mp_free(b2); /* Do the OAEP masking operations, in the reverse order from encryption */ oaep_mask(h, out+HLEN+1, outlen-HLEN-1, out+1, HLEN); @@ -1038,7 +901,7 @@ Bignum ssh_rsakex_decrypt(const struct ssh_hashalg *h, ptrlen ciphertext, b1 = get_mp_ssh2(src); sfree(out); if (get_err(src) || get_avail(src) != 0) { - freebn(b1); + mp_free(b1); return NULL; } diff --git a/sshrsag.c b/sshrsag.c index fad23d1a..55868fbd 100644 --- a/sshrsag.c +++ b/sshrsag.c @@ -5,13 +5,13 @@ #include #include "ssh.h" +#include "mpint.h" #define RSA_EXPONENT 37 /* we like this prime */ int rsa_generate(struct RSAKey *key, int bits, progfn_t pfn, void *pfnparam) { - Bignum pm1, qm1, phi_n; unsigned pfirst, qfirst; key->sshk.vt = &ssh_rsa; @@ -55,7 +55,7 @@ int rsa_generate(struct RSAKey *key, int bits, progfn_t pfn, /* * We don't generate e; we just use a standard one always. */ - key->exponent = bignum_from_long(RSA_EXPONENT); + mp_int *exponent = mp_from_integer(RSA_EXPONENT); /* * Generate p and q: primes with combined length `bits', not @@ -65,19 +65,15 @@ int rsa_generate(struct RSAKey *key, int bits, progfn_t pfn, * a prime e, we can simplify the criterion.) */ invent_firstbits(&pfirst, &qfirst); - key->p = primegen(bits / 2, RSA_EXPONENT, 1, NULL, - 1, pfn, pfnparam, pfirst); - key->q = primegen(bits - bits / 2, RSA_EXPONENT, 1, NULL, - 2, pfn, pfnparam, qfirst); + mp_int *p = primegen(bits / 2, RSA_EXPONENT, 1, NULL, + 1, pfn, pfnparam, pfirst); + mp_int *q = primegen(bits - bits / 2, RSA_EXPONENT, 1, NULL, + 2, pfn, pfnparam, qfirst); /* * Ensure p > q, by swapping them if not. */ - if (bignum_cmp(key->p, key->q) < 0) { - Bignum t = key->p; - key->p = key->q; - key->q = t; - } + mp_cond_swap(p, q, mp_cmp_hs(q, p)); /* * Now we have p, q and e. All we need to do now is work out @@ -85,27 +81,31 @@ int rsa_generate(struct RSAKey *key, int bits, progfn_t pfn, * and (q^-1 mod p). */ pfn(pfnparam, PROGFN_PROGRESS, 3, 1); - key->modulus = bigmul(key->p, key->q); + mp_int *modulus = mp_mul(p, q); pfn(pfnparam, PROGFN_PROGRESS, 3, 2); - pm1 = copybn(key->p); - decbn(pm1); - qm1 = copybn(key->q); - decbn(qm1); - phi_n = bigmul(pm1, qm1); + mp_int *pm1 = mp_copy(p); + mp_sub_integer_into(pm1, pm1, 1); + mp_int *qm1 = mp_copy(q); + mp_sub_integer_into(qm1, qm1, 1); + mp_int *phi_n = mp_mul(pm1, qm1); pfn(pfnparam, PROGFN_PROGRESS, 3, 3); - freebn(pm1); - freebn(qm1); - key->private_exponent = modinv(key->exponent, phi_n); - assert(key->private_exponent); + mp_free(pm1); + mp_free(qm1); + mp_int *private_exponent = mp_invert(exponent, phi_n); pfn(pfnparam, PROGFN_PROGRESS, 3, 4); - key->iqmp = modinv(key->q, key->p); - assert(key->iqmp); + mp_free(phi_n); + mp_int *iqmp = mp_invert(q, p); pfn(pfnparam, PROGFN_PROGRESS, 3, 5); /* - * Clean up temporary numbers. + * Populate the returned structure. */ - freebn(phi_n); + key->modulus = modulus; + key->exponent = exponent; + key->private_exponent = private_exponent; + key->p = p; + key->q = q; + key->iqmp = iqmp; return 1; } diff --git a/sshserver.h b/sshserver.h index 2dc43d95..4f720061 100644 --- a/sshserver.h +++ b/sshserver.h @@ -62,7 +62,7 @@ char *auth_ssh1int_challenge(AuthPolicy *, unsigned method, ptrlen username); bool auth_ssh1int_response(AuthPolicy *, ptrlen response); struct RSAKey *auth_publickey_ssh1( - AuthPolicy *ap, ptrlen username, Bignum rsa_modulus); + AuthPolicy *ap, ptrlen username, mp_int *rsa_modulus); /* auth_successful returns false if further authentication is needed */ bool auth_successful(AuthPolicy *, ptrlen username, unsigned method); diff --git a/testbn.c b/testbn.c deleted file mode 100644 index 32b1b77c..00000000 --- a/testbn.c +++ /dev/null @@ -1,275 +0,0 @@ -/* - * testbn.c: standalone test program for the bignum code. - */ - -/* - * Accepts input on standard input, in the form generated by - * testdata/bignum.py. - */ - -#include -#include -#include -#include - -#include "ssh.h" -#include "sshbn.h" - -void modalfatalbox(const char *p, ...) -{ - va_list ap; - fprintf(stderr, "FATAL ERROR: "); - va_start(ap, p); - vfprintf(stderr, p, ap); - va_end(ap); - fputc('\n', stderr); - exit(1); -} - -int random_byte(void) -{ - modalfatalbox("random_byte called in testbn"); - return 0; -} - -void queue_idempotent_callback(IdempotentCallback *ic) { assert(0); } - -#define fromxdigit(c) ( (c)>'9' ? ((c)&0xDF) - 'A' + 10 : (c) - '0' ) - -/* For Unix in particular, but harmless if this main() is reused elsewhere */ -const bool buildinfo_gtk_relevant = false; - -int main(int argc, char **argv) -{ - char *buf; - int line = 0; - int passes = 0, fails = 0; - - printf("BIGNUM_INT_BITS = %d\n", (int)BIGNUM_INT_BITS); - - while ((buf = fgetline(stdin)) != NULL) { - int maxlen = strlen(buf); - unsigned char *data = snewn(maxlen, unsigned char); - unsigned char *ptrs[5], *q; - int ptrnum; - char *bufp = buf; - - line++; - - q = data; - ptrnum = 0; - - while (*bufp && !isspace((unsigned char)*bufp)) - bufp++; - if (*bufp) - *bufp++ = '\0'; - - while (*bufp) { - char *start, *end; - int i; - - while (*bufp && !isxdigit((unsigned char)*bufp)) - bufp++; - start = bufp; - - if (!*bufp) - break; - - while (*bufp && isxdigit((unsigned char)*bufp)) - bufp++; - end = bufp; - - if (ptrnum >= lenof(ptrs)) - break; - ptrs[ptrnum++] = q; - - for (i = -((end - start) & 1); i < end-start; i += 2) { - unsigned char val = (i < 0 ? 0 : fromxdigit(start[i])); - val = val * 16 + fromxdigit(start[i+1]); - *q++ = val; - } - } - - if (ptrnum < lenof(ptrs)) - ptrs[ptrnum] = q; - - if (!strcmp(buf, "mul")) { - Bignum a, b, c, p; - - if (ptrnum != 3) { - printf("%d: mul with %d parameters, expected 3\n", line, ptrnum); - exit(1); - } - a = bignum_from_bytes(ptrs[0], ptrs[1]-ptrs[0]); - b = bignum_from_bytes(ptrs[1], ptrs[2]-ptrs[1]); - c = bignum_from_bytes(ptrs[2], ptrs[3]-ptrs[2]); - p = bigmul(a, b); - - if (bignum_cmp(c, p) == 0) { - passes++; - } else { - char *as = bignum_decimal(a); - char *bs = bignum_decimal(b); - char *cs = bignum_decimal(c); - char *ps = bignum_decimal(p); - - printf("%d: fail: %s * %s gave %s expected %s\n", - line, as, bs, ps, cs); - fails++; - - sfree(as); - sfree(bs); - sfree(cs); - sfree(ps); - } - freebn(a); - freebn(b); - freebn(c); - freebn(p); - } else if (!strcmp(buf, "modmul")) { - Bignum a, b, m, c, p; - - if (ptrnum != 4) { - printf("%d: modmul with %d parameters, expected 4\n", - line, ptrnum); - exit(1); - } - a = bignum_from_bytes(ptrs[0], ptrs[1]-ptrs[0]); - b = bignum_from_bytes(ptrs[1], ptrs[2]-ptrs[1]); - m = bignum_from_bytes(ptrs[2], ptrs[3]-ptrs[2]); - c = bignum_from_bytes(ptrs[3], ptrs[4]-ptrs[3]); - p = modmul(a, b, m); - - if (bignum_cmp(c, p) == 0) { - passes++; - } else { - char *as = bignum_decimal(a); - char *bs = bignum_decimal(b); - char *ms = bignum_decimal(m); - char *cs = bignum_decimal(c); - char *ps = bignum_decimal(p); - - printf("%d: fail: %s * %s mod %s gave %s expected %s\n", - line, as, bs, ms, ps, cs); - fails++; - - sfree(as); - sfree(bs); - sfree(ms); - sfree(cs); - sfree(ps); - } - freebn(a); - freebn(b); - freebn(m); - freebn(c); - freebn(p); - } else if (!strcmp(buf, "pow")) { - Bignum base, expt, modulus, expected, answer; - - if (ptrnum != 4) { - printf("%d: pow with %d parameters, expected 4\n", line, ptrnum); - exit(1); - } - - base = bignum_from_bytes(ptrs[0], ptrs[1]-ptrs[0]); - expt = bignum_from_bytes(ptrs[1], ptrs[2]-ptrs[1]); - modulus = bignum_from_bytes(ptrs[2], ptrs[3]-ptrs[2]); - expected = bignum_from_bytes(ptrs[3], ptrs[4]-ptrs[3]); - answer = modpow(base, expt, modulus); - - if (bignum_cmp(expected, answer) == 0) { - passes++; - } else { - char *as = bignum_decimal(base); - char *bs = bignum_decimal(expt); - char *cs = bignum_decimal(modulus); - char *ds = bignum_decimal(answer); - char *ps = bignum_decimal(expected); - - printf("%d: fail: %s ^ %s mod %s gave %s expected %s\n", - line, as, bs, cs, ds, ps); - fails++; - - sfree(as); - sfree(bs); - sfree(cs); - sfree(ds); - sfree(ps); - } - freebn(base); - freebn(expt); - freebn(modulus); - freebn(expected); - freebn(answer); - } else if (!strcmp(buf, "divmod")) { - Bignum n, d, expect_q, expect_r, answer_q, answer_r; - bool fail; - - if (ptrnum != 4) { - printf("%d: divmod with %d parameters, expected 4\n", line, ptrnum); - exit(1); - } - - n = bignum_from_bytes(ptrs[0], ptrs[1]-ptrs[0]); - d = bignum_from_bytes(ptrs[1], ptrs[2]-ptrs[1]); - expect_q = bignum_from_bytes(ptrs[2], ptrs[3]-ptrs[2]); - expect_r = bignum_from_bytes(ptrs[3], ptrs[4]-ptrs[3]); - answer_q = bigdiv(n, d); - answer_r = bigmod(n, d); - - fail = false; - if (bignum_cmp(expect_q, answer_q) != 0) { - char *as = bignum_decimal(n); - char *bs = bignum_decimal(d); - char *cs = bignum_decimal(answer_q); - char *ds = bignum_decimal(expect_q); - - printf("%d: fail: %s / %s gave %s expected %s\n", - line, as, bs, cs, ds); - fail = true; - - sfree(as); - sfree(bs); - sfree(cs); - sfree(ds); - } - if (bignum_cmp(expect_r, answer_r) != 0) { - char *as = bignum_decimal(n); - char *bs = bignum_decimal(d); - char *cs = bignum_decimal(answer_r); - char *ds = bignum_decimal(expect_r); - - printf("%d: fail: %s mod %s gave %s expected %s\n", - line, as, bs, cs, ds); - fail = true; - - sfree(as); - sfree(bs); - sfree(cs); - sfree(ds); - } - - freebn(n); - freebn(d); - freebn(expect_q); - freebn(expect_r); - freebn(answer_q); - freebn(answer_r); - - if (fail) - fails++; - else - passes++; - } else { - printf("%d: unrecognised test keyword: '%s'\n", line, buf); - exit(1); - } - - sfree(buf); - sfree(data); - } - - printf("passed %d failed %d total %d\n", passes, fails, passes+fails); - return fails != 0; -} diff --git a/testdata/bignum.py b/testdata/bignum.py deleted file mode 100644 index 15ffe319..00000000 --- a/testdata/bignum.py +++ /dev/null @@ -1,140 +0,0 @@ -# Generate test cases for a bignum implementation. - -import sys - -# integer square roots -def sqrt(n): - d = long(n) - a = 0L - # b must start off as a power of 4 at least as large as n - ndigits = len(hex(long(n))) - b = 1L << (ndigits*4) - while 1: - a = a >> 1 - di = 2*a + b - if di <= d: - d = d - di - a = a + b - b = b >> 2 - if b == 0: break - return a - -# continued fraction convergents of a rational -def confrac(n, d): - coeffs = [(1,0),(0,1)] - while d != 0: - i = n / d - n, d = d, n % d - coeffs.append((coeffs[-2][0]-i*coeffs[-1][0], - coeffs[-2][1]-i*coeffs[-1][1])) - return coeffs - -def findprod(target, dir = +1, ratio=(1,1)): - # Return two numbers whose product is as close as we can get to - # 'target', with any deviation having the sign of 'dir', and in - # the same approximate ratio as 'ratio'. - - r = sqrt(target * ratio[0] * ratio[1]) - a = r / ratio[1] - b = r / ratio[0] - if a*b * dir < target * dir: - a = a + 1 - b = b + 1 - assert a*b * dir >= target * dir - - best = (a,b,a*b) - - while 1: - improved = 0 - a, b = best[:2] - - coeffs = confrac(a, b) - for c in coeffs: - # a*c[0]+b*c[1] is as close as we can get it to zero. So - # if we replace a and b with a+c[1] and b+c[0], then that - # will be added to our product, along with c[0]*c[1]. - da, db = c[1], c[0] - - # Flip signs as appropriate. - if (a+da) * (b+db) * dir < target * dir: - da, db = -da, -db - - # Multiply up. We want to get as close as we can to a - # solution of the quadratic equation in n - # - # (a + n da) (b + n db) = target - # => n^2 da db + n (b da + a db) + (a b - target) = 0 - A,B,C = da*db, b*da+a*db, a*b-target - discrim = B^2-4*A*C - if discrim > 0 and A != 0: - root = sqrt(discrim) - vals = [] - vals.append((-B + root) / (2*A)) - vals.append((-B - root) / (2*A)) - if root * root != discrim: - root = root + 1 - vals.append((-B + root) / (2*A)) - vals.append((-B - root) / (2*A)) - - for n in vals: - ap = a + da*n - bp = b + db*n - pp = ap*bp - if pp * dir >= target * dir and pp * dir < best[2]*dir: - best = (ap, bp, pp) - improved = 1 - - if not improved: - break - - return best - -def hexstr(n): - s = hex(n) - if s[:2] == "0x": s = s[2:] - if s[-1:] == "L": s = s[:-1] - return s - -# Tests of multiplication which exercise the propagation of the last -# carry to the very top of the number. -for i in range(1,4200): - a, b, p = findprod((1<= 0 - print "divmod", hexstr(n), hexstr(d), hexstr(n/d), hexstr(n%d) - -# Simple tests of modmul. -for ai in range(20, 200, 60): - a = sqrt(3<<(2*ai-1)) - for bi in range(20, 200, 60): - b = sqrt(5<<(2*bi-1)) - for m in range(20, 600, 32): - m = sqrt(2**(m+1)) - print "modmul", hexstr(a), hexstr(b), hexstr(m), hexstr((a*b) % m) - -# Simple tests of modpow. -for i in range(64, 4097, 63): - modulus = sqrt(1<<(2*i-1)) | 1 - base = sqrt(3*modulus*modulus) % modulus - expt = sqrt(modulus*modulus*2/5) - print "pow", hexstr(base), hexstr(expt), hexstr(modulus), hexstr(pow(base, expt, modulus)) - if i <= 1024: - # Test even moduli, which can't be done by Montgomery. - modulus = modulus - 1 - print "pow", hexstr(base), hexstr(expt), hexstr(modulus), hexstr(pow(base, expt, modulus)) - print "pow", hexstr(i), hexstr(expt), hexstr(modulus), hexstr(pow(i, expt, modulus)) diff --git a/testdata/bignumtests.txt b/testdata/bignumtests.txt deleted file mode 100644 index 4cb7b0d4..00000000 --- a/testdata/bignumtests.txt +++ /dev/null @@ -1,205 +0,0 @@ -mul 6fcb0ed13247be24ded416f0d08612eb67d81017568e424698c442e4d7454d64315ffb51ce7af0bc6450c372d95c35967fde3adf6cec11a5c1e60847eccc8b4329b600bf917dbe4cab07ab82fbc439b5300000000000000000000000000000000000000000000000000000000030000000000000000000000000000000000000000000adf6cec11a5c1e60847eccc8b4329b2e4d7454d64315ffb51ce7af0bc6450c372d95c35967fde3adf6cec11a5c1e60847eccc8b4329b600bf917dbe4cab07ab82fbc439b5300000000000000000000000000000000000000000000000000000000030000000000000000000000000000000000000000000adf6cec11a5c1e60847eccc8b4329b600bf917dbe4cab07ab82fbc439b5300000000000000000000000000600bf917dbe4cab07ab82fbc439b5300000000000000000000000000000000000000000000000000000000030000000000000000000000000000000000000005967fde3adf6cec11a5c1e60847eccc8b4329b600bf917dbe4cab07ab82fbc439b53000000000000000000000000000000000d0000000000000000000000030000000000000000000000000000000000000000000adf6cec11a5c1e60847eccc8b4329b2e4d7454d64315ffb51ce7af0bc6450c372d95c35967fde3adf6cec11a5c1e60847eccc8b4329b600bf917dbe4cab07ab82fbc439b530d000000000000000000000000000000000000000000000000000000030000000000000000000000000000000000000000000adf6cec11a5c1e60847eccc8b4329b600bf917dbe4cab07ab82fbc439b5300000000000000000000000000600bf917dbe4cab07ab82fbc439b53000000000000000000000000000000000000000000000000000000000300000000000000000000000000000000000000000000000000000000000000000000000000d2b00000000000000000000000000000000000000000d2b000000000000000000000000 5472abe25fd603c76d0790f25654cfcdad1c78b8d78f0043b544a82bd2f00000000000000000000000000000000000d2b00000000000000000000000000000000000000000d2b000000000000000000000000 24e0b458bbaa8f7f910bb243b2d8072f7c19f6b6b5da853b24621fe88c2833151e92cc3e22d3127aa16eeda3bf38eb59768e3b212f87e19fc0a18bf71e12baa8322778957ba93757abb8f584595e6510d943b3bb1ca9de1f034a2a0c31ab11156d1da7181ba0163c761ce7bb7def818e7900f8dd1cdd5b5943111bcc50b9b7a5845a1da04c70edf907604814320c59c0cb2ca1171de6c5c3e74e1a9628397f2de04459f13ceda25b1e3e3e102cd59a09d74f61151af91514689bb5120cba14ce64981190c6641e440e5d757f352f2814605cf8a9d0e0d710a1da7181ba0163c761ce7bb7def818e7900f8dd1cdd5b5943111bcc50b9b7a5845a1da04c70edf907604814320c59c0cb2cb17fe9b421f43acbe48d7ddceecd1d544f07fcbbed77175dd4342c31fd7b27c18fe581d6d4205dd52575894ecd965741a727d5a3c0ca1900553a89d5beacce763a00a290ebc3588f5e2accd1bb7a9b9ed93326d9e18438a9eb3a493c5e8fbb1fc57d1b057019c415a1f71e1b8e9f23e387990c1108dbf518a6218d207ed544f07fcbbed77175dd4342c322761217bc596aa22891716241df1342c9d73f75885cf0720c7f897a146394f4551aa5845a1da04c70edf907604814320c59c1765ba1171de6c5c3e74e1a9628397f2de04459f13cee4d8a1e3e3e102cd59a09d74f61151af91514689bb5120cba14ce64981190c6641e4412a74839b40d064580e65b061f43259e13a4e3a31cf3a6cad202f84218ae48e7900f8dd1cdd5b5943111bcc50b9b7a62f891da04c70edf907604814320c59c0cb2cb17fe9b4cd233acbe48d7ddceecd1d544f07fcbbed77175dd4342c31fd7b27c18fe581d6d4205dd52575894ecd965741a727d5a3c0ca1900553a89d5beacce763a00a290ebc3588f5e2accd1bb7a9b9ed93326d9e18438a9eb3a493a86ad00cb201d045e09a2c8db901dff75c9866404b4ee3f723c308da22a716cecc595685e9a9fe7c607d914cf670366158da22a716cecc595685e9a9fe78087b9ec56609129e5000000000000000000000000000000015aca7200000000000000000000000000000000000000ad6539000000000000000000000000000000000000000000000000 -pow 4240f7064c1a41a5ec4dc53f528552ea5ec963dd373c59ca03b2f2a161cdf531d1cd5c30cc48280deedb3656dcca416 393e4b8b7fdbb26aca528ce01295f4d736806e48ca53b076cb48e2039026b61dd4ae6356aa5d4be633db00df1263807 5a827999fcef32422cbec4d9baa55f4f8eb7b05d449dd426768bd642c199cc8aa57e41821d5c5161d458ff37ee41ed8 54b00f9069773b7bb477cf039383e812f645d2afa949378e86fb3a9576dbabe44e4ebc35cb7e3e3a566083dc4f258c8 -mul 169d8e69f29bd8497ec8ca970f390d39fca5aed949a931e1cf9f0a9177bd4d7156612a1841593fbc28aed186f471dc905dfe46114e392df944034cf 16a3aecd3581bb879be79d9072635f52b33ada60b6faeb0b5821e1eebd60000000000000000000000000000000000000000000000000000000000000000000000000024e5a0729c792d783819919713a5161cfaeb69da6faeb0b5821e1eebd600000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000007ef527ae46e531d 2000000000000000000000000000000000000000069b4e4f331f6204e5bd7808dc9b8107a41d5c358230b15a39b9e342c17f3ea02e43ca6f5293550a6d7b38497443725b83d5dbb4b20862b4375bcbb494a7891dc7248727bda5a258b93b3c95bb633a3d26d7f6074ed0bf947b23a29ca9d546ae4e3bfe71a15d583c9b4e5f82f8f022218291fb6d5aa4eb6f30aefa9398a5a75e415d150682a0a000000000000000000000000000000000000000000000000000000000000000000000000000000b373462d6da27dab3fffe3684d57a1835d8e5d3384bdde0887e1827a8a512c47c8eb28686773a45b009aaffdf5cf2d57b1fe1e16122b6ef02c6c990457ba9a9571873 -pow 8483e4834c 727c9716ffb764d5 88888888888888888888888888888888cec888888888888888888888888888888888888888883978888888d8888888888888888888888888d8888888888d8888888b504f333f9de6485 5ad35993ffd525c7f0e2fdd30c77eca0ea7a7f328a6a998bcb63271c1f52ad472e33958ef17584e1c7a2b8f9e4942ccb7594e0b38c331a69f89ae7b42fbcfba9509aa610a6b34c5e2ef -pow 8483ee0c9834834c 727c9716ffb764d5 888888888888888888888888888888880068888888888888888888888888888888888888883978888888d8888888888888888888888888d8888888888d88888888888888889de6485 1c04e7e3a7129c5ef42180d2f8fbe0804b8f3a35b8f2d593b4ec0ce478fd78c4e22170c03d631e91c4e8d84d7606716b0576afa60dade3fa4fb77f0fd75fb4f0e2863a749ef875a7f -pow 8483ee0c98e4834c 727c9716ffb764d5 88888888888888888888888888888888e91888888888888839778888888d88888888888888888888888888d8888888888888888888888888d8888888888d8888888b504f333f9de6485 17ff9b5d91ce08685fdd51715f0aff77216ac4911e4fb1c22bdb17609f754340d1e901c7e38f14c5569b13136b7991fcdb23d82adcb67a485b628eb01b5d1274a5eef39fd5a42dddd3d -pow 8483ee0c9834834c 727c9716ffb764d5 88888888888888888888888888888888d86dd488888888888888888888888888888888888888888888888888d8888888d8888888b504f333f9de6485 cd6d82f283b830e0e4f42243e661fc09cf379d7ade2ee39eedd941fd93db77f9ccebec1ca7ed7b321b2378dbf1238ffa5f2be38bf9d7688eb1652f7 -pow 840000888888888888888888888888888888880000000000 727c9716ffb764d5 88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888d888888d8888888b504f333f9de6485 6ede8a488a46e535a5c25c17236cc66c4e49e5de6bd31399366b19a9346ec95b5b36126726d63241a3031e41c0c1345fa5a17a3c48f141e951f4152c9ecdc8 -pow 8483ee0c9834834c 727c9716ffb764d5 888888888888888888888888888888888888888888888888888888888888888888888888888888888888d8888888888d8888888b504f333f9d88888888888e6485 19090849092b2f94c22605d47301625de4b2a34f57c35796f6619d33b73b15ed917f776577f9f219adb6b4c7b5dce0ed6e4963d38150ad104b8bf1a03a8da74590 -pow 8483e4834c 727c9716ffb764d5 88888888888888888888888888888888888888888888888888888888888888888888888888883978888888d8888888888888888888888888d8888888888d8888888b504f333f9de6485 60ca03e099dbf17c93db528e84638037419e28fff7639cc1d9af02d62407908207a3d622c4358491b93878580d8eca7b2a1fe89bcafa1ae32cba9248162ccb65cd86c717c6ad76369ac -pow 8483ee0c9834834c 727c9716ffb764d5 888888888888888888888888888888888888888888888888888888888888888888888888883978888888d8888888888888888888888888d8888888888d88888888888888889de6485 16e28809be035b05923c747a4ee9d75306a8529aa954028057efe5d72a09666d51afcac7ce03ddd6fbebad1e1ec67f64b7826d00123ca472abde81b56e8a4c9e532f759e60e9f1054 -pow 8483ee0c9834834c 727c9716ffb764d5 88888888888888888888888888888888888888888888886839788888888888888888888d8888888888d888888d8888888888888888888888888d8888888888d8888888b504f333f9de6485 11a1c98f64b4d3a2a926fd1237e7f39c98eb6ef7a67ed9ccfd6d6ade87e0198469ea21c3fa8fe67e87dce8e4d59da5a7f2623a074377352f7a4246612254b1672e68a8f7fb5e6574cc84ac -pow 8483ee0c98e4834c 727c9716ffb764d5 88888888888888888888888888888888888888888888888839778888888d88888888888888888888888888d8888888888888888888888888d8888888888d8888888b504f333f9de6485 215d10c7bc86820b2c783e4b5ff4d2d6e57f3715d24cb9a7b2902b7513c261ef0c089929b26a52fcf6897c75519e83782761ef8f0ff816cf61e45de89da6a148cbab7aa93ea5d65e67b -pow 8483ee0c9834834c 727c9716ffb764d5 888888888888888888888888888888888888888888888888888888888888888888888883978888888d8888888888888888888888888d8888888888d8888888b504f333f9de6485 2ce9a51423d4223564b726e77575aab190e26cb0d1ce2bdc29c6f0e87599808df40a2e7062d9e9e410e54b02e7ef92bed6561b72b7b0b0fc662c7d04a242aeb7229dae5f1a8af1 -pow 84 727c9716ffb764d5 8888888888888888888888888888888888888888d888888d8888888888888888888888d888888d88888888888888888888b504f333f9de6485 5c42fe284175b01a14b34e5c89b69097ec1d5dc9373285b82a946ea41899593ae43b7a482cb3cdcc5e21f97cfcc5ebdbffb5cafb60d252a2a9 -pow 8483ee0c9834834c 727c9716ffb764d5 8888888888888888888888888888888888888888888888880888888888888888888d888888d8888888b504f38888888888888888888888888888888833f9de6485 75ced9c9d94e6ffc16cfa101b9aa8dd547b25e986cc26a297ed2a4ad88e6f6e4a0e31484600194248d8e5154466191ddd67daaf3514cbfce78e0d47be37d1bf81b -pow 8483ee0c9834834c 727c9716ffb764d5 888888888888888888888888888888888888888888888888d888888d888888888888888888888888888d888888d8888888b504f333f9de6485 537529b1dbacd7ae8e63e0356947a8ed5a7e9e770290a38e135b46183731d6276f80c68817076fae903b6c7cbba7bdcef90a70004b5a94ca1e -pow 8483c 727c9716ffb764d5 888888888888888888888888888888888888888888d888888d8888888b504f333f9de64888d8888888b504f333f9de6485 53244b21a7a17ff24160c5b96a0a98515a52296f44e0bbfbc48980a08a1f89182ae3977ba35b0e1d7d57075dc63002e640 -pow 8483ee0c9834834c 727c9716ffb764d5 888f9de6488888888888888888888888888888888888888888888888888888888888888888d888888d8888888b504f333f9de6485 14bf546b6c806f8b9b84671e7a46096966f1fe1e8de4c450fe89b57fd5408c36c8060eb169a57e76aa7ae76f330ef137bd9566834 -pow 8483ee0c9834834c 727c9716ffb764d5 888888888888888888888888888888888888888888888888888888888888888888888888888888888888888d8888888b504f333f9de6485 511219ae4968000dae7b6d4ae111f7e8fc19dc33020ecaba3a7f45b1c1ddfcc4cbd2539307ec619481afb4d449ed8ed7a76f668558533 -pow 8483ee0c9834834c 727c9716ffb764d5 8888888888888888888888888888888888888888888888d8888888b504f333f9de888888888888888888888888888d888888d8888888b504f333f9de64de88888888888888888885 8135bbedf63d766808ea9e043323609d85eed687eebb6803271304ec68dc87de8ce362c3f16c9ac31e6dcb9542c9e7e2518e1fee06950d139aaebf9c2a7f827c26a9889491605470 -pow 8483ee0c9834834c 727c9716ffb764d5 88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888d888888d8888888b504f333f9de6485 44ed50d437512d99e38b02141a5d98fe47217f6cf5ebd237c8e79772eeac3ac2f33c13bbd80e9dbf3925147dfd135486611066110c4e1bbfe9655639b419941600032a2f9af21 -pow 34c 727c9716ffb764d5 8888888888888888888888888888888888888888888888888888888d888888d8888888b504f333f9de6485 2928a55555c4efca16738b016cb559173f877bed11969bc281440c13b7caa65ce9af4067e7f1c21d0b21f1 -pow 8483ee0c9834834c 727c9716ffb764d5 88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888d888888d8888888b504f6485 4c3ef21c414369560eab90c986b78a156045825bcdeb9d9bcea091916549f8f58ba49d0241cda890a4e2a9177f12bc71c4deec9a31b0f927582c0b5091 -pow 8483ee0c9834834c 727c9716ffb764d5 8888888888888888888888888888888888888888886888888888888888888888888d888888d8888888b504f333f9de6488b504f333f9de648585 6f03a4f2c77bec9c99564930f7972eae6f7f63c44a96e3e4747870d16aca5552300da1b7c705fa23b31e612f16275686e43b73a899a65d0e13a6 -pow 8483ec 727c97c6ffb764d5 8888888888888888888888888888888888888888888888888888888888888888888d888888d8888888b504f333f9de6485 64b634614732913628140a72e9f4e2b59f8d75427ccc5c990e989aa358d7e8ee067a0f13f2fba3f7e747b1ee3c5f105515 -pow 8483be0c9834834c 727c9716ffb764d5 88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888d888888d8888888b504f333f9de6485 2a7e622ef5d374e587a48251eac21656ad40922326ee152ff20d9c8746a879e27a4fd414938da900c0d62480ae857e17b4a8fd9b404f7d6f1f13059313b073eff -pow 8483ee0c9834834c 727c9716ffb764a5 8888888888888888888888888888888888888888888888888888888888888888888888888888888888d888f333f9de6485 c97e716de0974491160fb8b2a2cc16ad5abbc07266e1e6cf25ca3a595981fd49cf1c526e7c04f5ed77df591ec327549e3 -pow 8483ee0c9834834c 727c9716ffb764d5 8888888888888888888888888888888888888888888888888888888888888888888d888888d8888d8888888b504f333f9de888888b504f333f9de6485 48b31ef21735201bbe4d2e6bae6cd76b1c90ddbaeb639369150b010f1b170edc966893c76d6d7edf21078529530f5232731ed399b19cd4dedcf9f8c69 -pow 8483ee0c9834834c 727c9716ffb764d5 8888888888888888888888888888888888888888888888888888888888888888888d888888d88888888888888888888888888888888888888b504f333f9de64888888888885 2ce54ff4a8a9dce961c4cfaf5a5a05b94b27cf215cd60d6b6c97da184582b90986bcfe25c0245a1a04967cc4dbebb78645fc060c58128a8252676d435c0927b987829d07e21 -pow 8483ee0c9834834c 727c9716ffb764d5 8888888888888888888888888888888888888888888888888888888888888888888888888888888888888888d8888888d8888888b504f333f9de6485 109e5bfc2d7df8823079a2dfe19129d443268f91fecb08d0b34aaa704fce9d085656a5e90bcda51c496e0b652a8aa8d4230286f8f3e0ccf9ade06fd2 -pow 8483ee0c9834834c 727c9716ffb764d5 8888888888888888888888888888888888888888888888888888888888888888e1ed888888d8888888b504f333f9de6485 7a4973c5e0e6bb423afc9f42658e6a9325485e0a9291c3805d5a3d766795746fc6f7270c79442d8c90888427d5793e9808 -pow 8483ee0c9834834c 727c9716ffb764d5 8888888888888888888888888888888888888888888888888888888888888888d08d888888d8888888b504f333f9de6485 6a862f8f33f51553d95fe131ece8cff25b79c052de028cf66b485fd46b5f4cf913962978b56850a8e5ec9bb07fd6f5f2c -pow 8400000000000000 727c9716ffb764d5 8888888888888888888888888888888888888888888888888888888888888888888d888888d8888888b504f333f9de6485 5851978fe5efd8a522ac7c2d0e17f7aeaebc9f8c4a77a6c7bd3d734c47f247a2b9b291961aea0b69b53bbc9906889e3912 -pow 8483ee0c9834834c 727c9716ffb764d5 8888888888888888888888888888888888888888888888888888888888888889888d888888d8888888b504f333f9de6485 1e7770c5546c601df48ccb9c929dfc9f36eaab78911c089e0b3459f0d80c80bfce2eded75c3d205a6d998aa628f3e9396b -pow 8483ee0c9834834c 727c9716ffb764d5 8888888888888888888888888888888888888888888888880888888888888888888d888888d8888888b504f333f9de6485 2909fa1be537ec2cfd106f3f11664492283789e3f376e6179c170a4e080adc6a8a6850b77d35811c86b8dba0e6a98768c4 -modmul 13988e 194bd642cccccccccccccccccccccccccccccccccccccccccccccccccc22cbec4d97999fcef32b7b05d449dd426768bd6462cbec4d9ba58 5a827999fcef32422cbec4d97999fcef32422cbec4d9baa55baa55f4f8eb7b05d449dd426768bd642cccccccccccccccccccccccccccccccc1999fcef32422cbec4d97999fcef324229cc8aa57 1efb3f63db5da66666666666666666666666666666666666666666666596306eb2d65742a759166d49874c30845d5019afffd7a6b2db4bf19cd0 -modmul 139dddddddddddddddddddddddddddddd88e 194bd642ccccccccccccccccccccccc22cbec4d97999fcef32462cbec4d9ba58 5a8fcef32422cbec4d9baa55baa55f4f8eb7b05d449dd426768bd642cccccccccccccccccccccccccccccccc1999fcef32422cbec4d97999fcef324229cc8aa57 1f03a55c086d3a06d3a06d3a06d3a0602aba523c81830925e79c8b3b98eb9562b9d3f70790b415a26ba251983b4bbf79cd0 -pow 8483ee0c9834834c 727c9716ffb764d5 8888888888888888888888888888888888888888888888883978888888d8888888888888888888888888d8888888888d8888888b504f333f9de6485 4a0d96e0c39349b7377b15b5bf7b49f447a37349045d96c7e9f5122852162c9762d8381161f8db975e9e820b66a7042c850734679de7a7f548c1ed6 -pow 8483ee0c9834834c 727c9716ffb764d5 88888c888888888888888888888888888888888d888888d888888192ea3333f9de6485 6434b339504f053e2a5ff58f64998f96f47aa632d6343459a9d2c1f77a0f2c2e942f77 -pow 8483ee0c9834834c 727c9716ffb764d5 888888888888888888888888888888888324888d888888d8888888b504faaaaaaaaaaaaaaaaaaaaa333f9de6485 27a8e8c85285125d5617ebab1f839ecd5e20a606b4bd2e997a338399feb25fd6144045ffc50aa71167c7203b8e1 -pow 8483ee0c9834834c 727c9716ffb764d5 8888888888888888888888888888888888888888888888888888888888888888d888888d8888888b504f333f9de6485 2c6b37a2df4595500b7460400ce897817970cfadaaf1c386eeba15e08aaa6ce24019713a30e1a2c459eb2e1dacb768b -pow 8483ee0c9834834c 727c9716ffb764d5 88888888888888888888888d88888888888888888888888888888888888888888888888d88888888888888888888d888888d8888888b504f333f9de6485 526b82e86fed0f1046b22b974a17b14d5a76f8c5c91f886e9aabf47d38c30d2dcd55f16391b4b719308169b0a6ca6e96ccf689fa6d9866dd88f36118472 -pow 8483ee0c9834834c 727c9716ffb764d5 888888888888888888888888888888888888888888888888888888888888888f88888d888888d8888888b504f333f9de6485 156e2c168bf9cdbc4d37f0863b0748cf244afc19db0ff3e8d2801efc574dc9ffe094500fc828cc6349b9f9c64b1c6ba4d7ec -pow 8483ee0c9834834c 727c9716ffb764d5 888888888888888888888888888888888888888888888888888d888888d5 7d604b4171f1ef090338e494f47645ee6ce9b7d410cc52894bd4f25ab281 -pow c 727c9716ffb764d5 888888888888888888888888888888888888888d888888d8888888b504888d8888888b504f333f9de648f333f9de6485 74231cb11902ffe7ee56597e45e007732106d1565aee8d97cbba145ecbb034e7dfecacdf37b942b1d1c6db411b8f1ad5 -pow 8483884d9834834c 727c9716ffb764d5 888888888888888888888888888888888888888d888888d8888888b504f333f98b8d888888888888888888888888d888888d85 5adc4c88b8c112c2089468b51a545d218a07874bd9402923db6d8ba76f5f0e6e681396838574843fb46638b54e8a2f9ac71651 -pow 8483ee0a9834834c 727c9716ffb764d5 888888888888888888888888888888888888888888888888888888888888d888888d8888888b504f333f9de6485 848898833195df7201ea83c1fd5b865de18b1f7f3535407f8721c743c68c0d1e119342dc99250d516a142e8b436 -pow 8483ee0c9834834c 227c9716ffb764d5 888888888888888888888888888888888888888888888888888888b504f333f9de6485 12131509fa930f74d86f4da97b87c7c752320130b8dcc83c2bff6218a9b722def1636b -pow 8483ee0c9834834c 727c9716ffb764d5 8888888888888888888888888888888888888888888888888888888888888d888f333f9de6485 2be812603b8a9707dadde37bb5a61dd0444eb6f30eb721057009cc7adbcc8d6667011c0504e68 -pow 8483ee0c9834834c 727c9716ffb764d5 8888888888888888888888888888888888888888888888888888888888888888888d888888d8888888b504f333f9de6485 3ad25628fe61d80cd41e987f9325b8b74725dad97c564df113d78169d6bfcf643656caa89b7156faab66c982f61b9957ad -pow 8483ee0c9838dd4c 727c9716ffb764d5 888888888888888888888888888888888888888d888888888d888888d8888888b88d8888888b504f333f9de6485 30e87a722c8f3086881e32a662cc14a75b1a9122aebe5b4f615d4e03df5b59db82a2e1425cc2434afab8b0550b2 -pow 8483ee0c9834834c 727c9716ffb764d5 888804888888888888888888888888888888888d888888d8888888b504f333f9de6485 32c13f9ea9459a14779564d65f101dd1f8640196a875f6dbff2cd4b7b5f7542520b044 -pow 8483ee0c9834834c 727c9716ffb764d5 888888888888888888888888888888888888888d888888d8888888d504f333f9de6485 28d0c477fd109217949a66291f4e9816908ccad97823991e4179f5a503bd53ebeb364c -pow 8483ee0c9834834c 727c9716ffb764d5 888888888888888888888888888888888888888d888888d8888888b104f333f9de6485 6985a06f4a1c1eebf0304de7f55d13a09ccb9419448e56f213f5bb4e8c369f8e9bdd4f -modmul 13988e 194bd642ccccccccccccccccccccccc22cbec4d97999fcef32462cbec4d9ba58 5a827999fcef32422cbec4d97999fcef32422cbec4d9baa55baa55f4f8eb7b05d449dd426768bd642cccccccccccccccccccccccccccccccc1999fcef32422cbec4d97999fcef324229cc8aa57 1efb3f63db5da66666666666666666596306eb2d65742a7590dbcdfa6b2db4bf19cd0 -pow 8483ee0c9834834c 727c9716ffb764d5 8888888888888888888888888888888888d88888888888d88888888888d8888888b504f333f9de6485 5152a5e418fd58e41ca43b901cd36f7c99566568f9ff9b1e20389574908fea406f4e5ca8dd32918a07 -pow 8483ee0c9834834c 727c9716ffb764d5 888888888888888888888888888888888888888d888888d8888888b504f333f9de6485 8296d34d3b715f19a9aa3a412b6f685f476201c389f11761eab46a627b44ab5ff640b0 -pow 8483ee0c9834834c 6275 8888888888888888888888888888888888d8888888b5485 6a603ec3b6c06541519dda19c60de2767dbf3e8e68ca54d -pow 8483ee0c9834834c 727c9716ffb764d5 88888888888888888888888888888888d8888888b50485 3e299644f713778d7d21067fea7ee62cd137f93ee3dfe -pow 8483ee0c9834834c 727c9716ffb764d5 8888888888888888888888d7888888b504f333f9de6485 732b121547e7602f411f5b39f04de7cc39b3561431abcd -modmul 13988e 194c22cbec4d97999fcef32422cbec4d9ba58 5a827999fcef32422cbec4d97999fcef32422cbec4d9baa55baa55f4f8eb7b05d449dd426768bd642cccccccccccccccccccccccccccccccc1999fcef32422cbec4d97999fcef324229cc8aa57 1efb9d206eb2d65742a7590d6e7d6eb2db4bf19cd0 -modmul 13988e 194c58 5a827999fcef32422cbec4d97999fcef32422cbec4d9baa55baa55f4f8eb7b05dccccccc449dd426768bd642ccccccccccccccccccccccccccccc1cc199cc8aa57 1efbde498d0 -modmul 13988e 194c58 5a827999fcef3cccccccccccccccccc199cc8aa2422cbec4d97999fcef32422cbec4d9baa55baa55f4f8eb7b05d449dd426768bd642cccccccccccccccccccccccccccccccc199cc8aa57 1efbde498d0 -pow 1ba 1c9f25c5bfedd935654670094afa6b9b4037246529d83b65a4294670094afa6b9b4037246529d83b65a47101c8135b0eea5731ab552ea5f319ed806f8931c03fe68da4b41da004c 2d413cccfe779921165f626cdd52afa7c75bd82ea24eea128b0ea2c7f9bf720f6ce43dd2a1790e71ec b816f83baec979e163d1daccc1ed4a4779ec06ae40af4dd7f3f45bc8ced54c8626ee4adef4fbdcff0 -pow 1ba f25c5bfedd93565294670094afa6b9b4037246529d83b65a47101c8135b0eea5731ab552ea5f319ed806f8931c03fe68da4b41da004c 2d413cccfe779921165f626cdd52afa7c75bd82ea24eea1338b8db2160c10eae28b0ea2c7f9bf720f6ce43cce64552bf20c10eae28b0ea2c7f9bf720f6ce43dd2a1790e71ec 23254b53a1cd644a5a998acd7e2c38f65b81909837ccc446a615a73a583ca5c92ce358370acbe6dce93031e26ce1fada16a133bf29c3143b45ecd8bb47f8383b88e8b109bdc -pow 1ba 1c9f25c5bfedd93565294670094afa6b9b4037246529d83b65a47101c8135b0eea5731ab552ea5f319d413cccfe779921165f626cdd52afae43dd2a1724eea133b45eb2160cce64552bf20c10eae28b0ea2c7f9bf720f6ce43dd2a1790e71ec 7625b7f3f6eb39f032c9212b5b62deda285df73ec12c5f00acecd70c678d010ea35311ce8f9efbc7 b5943aa9528f0519e72ccd24686656291dab86ff5916a589b9abafc14486c820b10557eba884e79 -pow 1ba 1c9f25c5bfedd93565294670094afa6b9b4037246529d83b65a47101c8135b0eea5731ab152ea5f319ed806f8931c03fe68da4b41da004c 2d413cccfe779921165f626cdd52afa7c75bd82ea24eea133b45eb2160cce64552bf20c10eae28b0ea2c7f9bf720f6ce43dd2a1790e713fe68da4bec 10dd194113310a6cc9121c1e94db2410406ab41712c45b11ac10783e1f6ddae8ec2a2a4868db3ac8a74a13b525ccbb9469c9431e98b0fdb39e575fc -pow 10907dc1930690697b1371 fd4a14eabe7c7b369ec448e52aa97eb1fda3d54ba97b258f74dcf167280ecbca820000000004f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e509ad87 16a09e667f3bcc908b2fb1366ea957d3e3adec17512775099da2f590b0667322 11d53e8bf0e94f38a15acad76c89426e4282a01e21a5c8cb40363b3660fb6f51 -pow 10907dc1930690697b13714fd957d3e3adec175a2f590b054ba97b258f74dcf167280ecbca858737d5c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 16a09e667f3bcc908b2fb1366ea957d3e3adec17512775099d366ea957d3e3adec175a2f590b0667323 12b697e76f7294ee42a4630ae19ebda51c4d1a40669a86ceeb78adbdf58cb9faf3320fd3cd2cfb1a886 -pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 16a09e667f3bcc908b2fb136e06cf924c93ae6ea957d3e3adec17512775099da2f590b0667323 518f9910b7b9847e43ba7344c4868177aed30d7b277e0c0ad64a3dc1f992e865af889a0dccbe -pow 10907dc1930690497b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33800907dc14a57d35cda01b923294ec1db2d23880e40908b2fb1366ea957d3e3adec17512775099da2f590b0667323 1 0 -pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b9409ad87 16a031d9e667f3bcc908b2fb1366ea957d3e3adec17512775099da2f590b0667323 14cc00562f66093169240d19685705e073c96c7547e5797c768440c5044c0e43566 -pow 10907dc193b2d23880e409ad87 16a09e660690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ecc908b2fb1366ea957d3e3adec17512775099da2f590b0667323 a03af414acae1a25bd359e46a395895fc8f3b5c96a3c25c3c1f290062cdb4972275c3eba22a21316f3339459ae54a6b5 -pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 16a09e667f3bcc966e08b2fb1366ea957d3e3adec17512775099da2f590b0667323 16869520abf32acaeac583b5781802b8b5dbce8aeed2c0b3a0db0e0359f3328a278 -pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b97d35cda01b923294ec1db2d23880e409667f3bcc908b2fb1366ea957d3e3adec17512775099da3f590b0667323 13e06cf924c93da01b923294ec1db2d23880eae2d00206c4813c36a0aac31d2db5e2148af132b6a099da2f590b0667323 da20997b7a493f76368d529949067b3639469e0c2d681588f7f76fbd12e1aa92b1eafa89a07b7159b8d5a4b17fcb3123 -pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923297 16a09e657d35cda01b923294ec1db2d2388067f3bcc908b2fb1366ea957d3e3adec17512775099da2f590b0667323 115eec036127564768648670be45c3b1b7696d51d2ec76a657a5e090dd674e8a31cc0829ff63b3aa0150c5b591933 -pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858 37d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923267f3bcc908b2fb1366ea957d3e3adec17512775099da2f590b0667323 450ce71a8eec9fcecab453e1566cf6525c70f6a69aa16a47c7d1ed754b0967c4764f6da6cf092fd49589912bc2bd3337f4 -pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 16a09e667f3bcc908b2fb1366ea957d3e3adec175127750af132b6a7c9c8a399da2f590b0667323 10bea717321c68ab600971b66e7e59047ad3682b4dbc1b64258affbe7b125af26b62c994ec365a3 -pow 10907dc19306906e4f92e14fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 16a09e667f3bcc908b2fb1366ea957d3e3adec175127bbbbbbbbbbbbbbbbbbbbbbbbb537bbb75099da2f590b0667323 aafd00a40f14e94e5427612c7c8a2342576108dde4e32964fd4d100e60426e210f63ac4d884850cc8035e904d44e38 -pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c 4a33804a57d35cda01b923294ec1db2d23880e409ad87 16a09e667f3bcc908b2fb1366ea957d3e3adec17512775099da2f590b0667323 1462290f3612e19fc67b888f7419d278efed8ef32fd37ec435410cd4601a3e4c -pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 16a09e667f3bcc94a154ba97b258f74dcf167280e08b2fb1366ea957d3e3adec17512775099da2f590b0667323 14c84cc12c5d71de79fd96dcedf077c050baca2420aa20e33f38dcd5fbc46853150257db771c31dbda3bb53905 -pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c 39792e2dff6ec9ab294a33804a57d35cda01b923294ed23880e409ad87 16a09e667f3bcc908b2fb1366ea957d3e3adec17512775099da2f590b0667323 12d750a696d901ffb58cc9c2004431a44d2abd08923cf585e49dd2d95c70a001 -pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 16a09e667f3bc366ea957d3e3adec17512775099da2f54ba97b258f74dcf167280ecbc90b0667323 145fe455eb9d42bc17ab30f1ba224fd7e58fef09ddf0f8e62baef7b8b01bdd636b5f648097c1e913 -pow 10907dc1930690697b13714fd4a154ba97b258f74d8737d4c75099da2f590b0667323 13e06cf924ccdf01b923294ec1db2d23880e409ad87 16a09e667f3bcc908b2fb1366908b2fb1366ea957d3ea957d3e3adec17512775099da2f590b0667323 b3566d563d6ba5fcc24ed18bc5f9a63e9d9b7e073d9b684156b4acc463ac9c89fe5507471d7347819 -pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 16a09e667ff6ec9ab294a33804a57df3bcc908b2fb1366ea957d3e3adec17512775099da2f590b0667323 9cfdd8d180de912e9490385a14cd642f05e258745625c0cd9d403ebdaa7db90109ca1d771476a5b842a5 -pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 16a09e667f3bcc908b923b2fb1366ea957d3e3adec17512775099da2f590b0667323 17c1f8e9e0fc930930fb5a1cbfd90f44d95eb3cf8c7918c98ec47f3ebd4cd9bd234 -pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f9c36a0aac31d2db5e2248af132e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 16a09e667f3bcc908b2fb1366ea957d3e3adec17512775099da2f590b0667323 2ba528582bdd37fe979ba13589832fd094dbd1ba79aa31b1a3fc15ea00d8908 -pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad930690697b13714fd4a154ba9787 16a09e667f3bcc366ea957d3e3adec17512775099da2f590b0667323 100b983f254cd0503a68b707952fd4e3930e57e48ef7521dc749302c -pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 16a09e64d23bcc908b2fb1366ea957d3e3adec17512775099da2f590b0667323 32edca42bb85d3a7964256b95d4c12db0a3735c8501b7d24e35c17e6471aff1 -pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 16a092047f3bcc908b2fb1366ea957d3e3adec17512775099da2f590b0667323 100fa51ec3e14bbb7918eb18a4619b71a9c0445e1a9677387f982911df821403 -pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 18449e667f3bcc908b2fb1366ea957d3e3adec17512775099da2f590b0667323 16e9c0ed5254e4d5fa00a8e6630cc4174d69982c67d26303805ef489c332f713 -pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 16a09e6c7f3bcc908b2fb1366ea957d3e3adec17512775099da2f590b0667323 13928b672d49ab1c419847ae5c2bc6957a2792445b58bb5de8aaf35b11ec51b2 -pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 d6a09e667f3bcc908b2fb1366ea957d3e3adec17512775099da2f590b0667323 d3634464bd781a4f3c3de1bb2d575988d5d861944bbcfb2d5b5fb9cb5f641b62 -pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 16a09e667f3bcc908b2fb1366ea957d3e3adec17512775099db2f590b0667323 139a26f4ce368bcc132437fa3fdff9393e529e9ad810507065eae0fe1eae75ab -pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 16a09e667f3bcc908b2fb1366ea957d3e3adeb17512775099da2f590b0667323 13cb39d2966a6cda4a61a7bcac4d54fc1e07f9a5911a6c9e418f424e1d690d88 -pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 16a09e667f3bcc908b2fb1366ea957d2e3adec17512775099da2f590b0667323 1045060ef67d3e02805fc6c7c73e78d9515773cebfb94a2361bb562de44f0cef -pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 16a09e667f3bcc908b2fb1366ea955d3e3adec17512775099da2f590b0667323 e9f79f25a51263752bb6801ea2ea69f1bfe72eb29841a18fe897344e432778b -pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 16a09e667f3bcc908b2fb1366ea857d3e3adec17512775099da2f590b0667323 1603254e45e8cea74b33e0fea929a14f8031fec82f60e443d5da2177dc5d9aa3 -pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 16a09e667f3bcc908b2fb1366da957d3e3adec17512775099da2f590b0667323 82799d2dc787e49afd44173d4373cc1e5e4c5f338639b0876da8b60f139032f -pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 16a09e667f3bcc908b2fb1166ea957d3e3adec17512775099da2f590b0667323 550bb6f10292ca7bda8ee8ec2d849ca328123ff90f2bd85795c509a12d99e0a -pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 12a09e667f3bcc908b2fb1366ea957d3e3adec17512775099da2f590b0667323 106e2d72f0365650111866243cfc5a675f4d0677452da7d86354717813c4b705 -pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 56a09e667f3bcc908b2fb1366ea957d3e3adec17512775099da2f590b0667323 34af9ebb1c936ee57965f89c005ea5dc91d70aefef9a3181363e39b7547856e4 -pow 10907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 96a09e667f3bcc908b2fb1366ea957d3e3adec17512775099da2f590b0667323 735eb1132cd9f59c79500163d423f3ecb1f802aa65ec6327612302f7d96ae917 -pow 11907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 16a09e667f3bcc908b2fb1366ea957d3e3adec17512775099da2f590b0667323 328fb6559104e886880c03bdc59572cc2dddb075bc405b8e37e681fdf6f14b6 -pow 12907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 16a09e667f3bcc908b2fb1366ea957d3e3adec17512775099da2f590b0667323 11d815b4b3c473a014845a6c46d4f3e0fbe1079a046d52a999a0efa4457d904e -pow 50907dc1930690697b13714fd4a154ba97b258f74dcf167280ecbca858737d4c e4f92e2dff6ec9ab294a33804a57d35cda01b923294ec1db2d23880e409ad87 16a09e667f3bcc908b2fb1366ea957d3e3adec17512775099da2f590b0667323 12cf3bbf47b42cd6d73e7c59aa66efe37c82efd92eadb5d3166c2514fb521ec4 -pow 4241f7064c1a41a5ec4dc53f528552e9 393e4b8b7fd5b26aca528ce01295f4d6 5a827999fcef32422cbec4d9baa55f41a5ec4dc53f528552e9 337f28ff252aa48023556ab30189e1eba23c1f2d451136b029 -pow 4241f7064c1a41a552e9 393e4b3b55fa3d1aaf5b8bb26aca528ce01295f4d6 5a827999fcef32422cbec4d9baa55f4e 46aa926b3d484810eccca4e964ad5f8f -pow 4241f7064c1a41a5ec4dc53f528552e9 393e4b8b7fdb2cbec4d9baa55f4e 3e73738d7999fcef32422cbec4d9baa55f4e 70ca5cba1b58bdc4c907d349716609a3749 -pow 4241f7064c1a41a5ec4dc53f528552e9 393e4b8b7fdbb26aca528ce01295f4d6 5a827999fcef32422cbec4d9baa55f44dc53f528552e9 3b76731b825ed64be2fdd42579bf2c10a666b49b347ed -pow 4241f7064c1a41c51c4dc53f528552e9 393e4b8b7f73738ddbb26aca528ce01295f4d6 5a827999fcef32422cbec4d9baa55f4e 3dc6a608deca791097e423ce76dc7c3f -pow 4241f7064c1a41a5ec4dc53f528552e9 393e4b8b7fdbb26aca528ce01295f4d6 5a827999fcef32422cbec4d9baa55179 4b5b2ee754f65abca70d2ee02783bb84 -pow 4241f7064c1a41a5ec4dc53f528552e9 393e4b8b7fdbb26aca528ce01295f4d6 5a827999fcef32422cbec4d9baa55ee5 13a5660d889b6c2b7679e4804cfb33b0 -pow 4241f7064c1a41a5ec4dc53f528552e9 393e4b8b7fdbb26aca528ce01295f4d6 5a827999fcef32422cbec4d9baa55171 b2bee6670600e5cdca742a65fd1ff8d -pow 4241f7064c1a41a5ec4dc53f528552e9 393e4b8b7fdbb26aca528ce01295f4d6 5a827999fcef32422cbec4d9baa5c5e5 209018f064798d5de50dca68e1a67bbd -pow 4241f7064c1a41a5ec4dc53f528552e9 393e4b8b7fdbb26aca528ce01295f4d6 5a827999fcef32422cbec4d9baa311c9 19480fc0e92332649012fe282ac6507b -pow 4241f7064c1a41a5ec4dc53f528552e9 393e4b8b7fdbb26aca528ce01295f4d6 5a827999fcef3bb06cbec4d9baa55f4e 4b140ce53242560dbd6e997fd2c75683 -pow 2241f7064c1a41a5ec4dc53f528552e9 393e4b8b7fdbb26aca528ce01295f4d6 5a827999fcef32422cbec4d9baa55f4e 3c503d9f72a8809af1eef9883798b963 -pow 4241f7064c1a41a5ec4dc53f528552e9 393e4b8b7fdbb26aca528ce01295f4d6 5a827999fcef32422cbec4d9baa55f4d b507582651e7cd31dac1fe4f6e22f4b -pow 4241f7064c1a41a5ec4dc53f528552e9 793e4b8b7fdbb26aca528ce01295f4d6 5a827999fcef32422cbec4d9baa55f4e 59acb6241548663232e48f1bce0780e7 -pow 40 727c9716484 870e7550437c3f9de6484 812319144f0d00b0c71c0 -pow 8483 b504f333f9de6485 3192ea3e99c8b504f333f9de6485 2b83edad87fec41bdd2a2c67757 -pow 8403ee0c9834834c 727c9716ffb764d5 b504f333f9de64504f333f9de6485 72de72a69fb7be9db98df5ff0666e -pow 8483179c984c 727c9716ffb764d5 b504f333f333f9de6485 45f63d048e8bf05fe15 -pow 848334764d5 b504f333f9de6485 3192ea3e99c84934c 846cdce061d08961 -pow 8483ee0c98348346dd00267168dd764d5 b504f333f9de6485 31b504f333f9de6485 1d3c563ca7a8355ee2 -pow 8483ee0c9834834c 727c9716ffb764d5 8888888888888888888888d8888888b504f333f9de6485 262b90699d5729bbd17f13a5d5f72b8a37d53c5bc6b862 -pow 8483ee0c94c 72 b504f333f9de6485e6485 873c770cd74ff4fff6c16 -pow 8483e834c 727c9716ffb7644e0c9834834c 727c9716ffb7644246504f333f9de6485 98af64208d351df230c13e918ae5a012 -pow 8483ee0c9834834c 727c9716ffbf333f9de648834c 727c9716ffbf333f9de645 1d7d26536d52e6422b5b81 -pow 8483ee0c9834834c 727c9716ffb764d5 b504f333f9dde6485 ac208abc45050b286 -pow 8483ee0c9834834c 727c9716ffb764d5 b504f333f504f333f9d9de6485 5da8095154c4248389a323559c -pow 8483ee0c9834834c 3f9de6485 82bd2a3e99c872e99c849de504f337c92ea3e99c849de504f333f9de6485 5cc8c6fc66f6da8d4fc03cf14157915bf0a46062ee2b87007a1ba5efd0ef -pow 8483ee0c9834834c 727c9716ffb764d5 b504f33333f9de6485 9b7fbbd74f27281103 -pow 8483ee0c98345 b504f333f9 e6485 809bf -pow 8483ee0c9834834c 727c9716ffb764d5 b5026823f9de6485 380c222e7b1c958 -pow 8483ee0c9834834c 727c9716ffb764d5 4d24f333f9de6485 2e7a29f446106c06 -pow 8483ee0c9834834c 727c9716ffb764d5 1714f333f9de6485 38687f6a43fd553 -pow 8483ee0c9834834c 727c9716ffb764d5 e914f333f9de6485 5f8173a3832a148b -pow 8483ee0c9834834c 727c9716ffb764d5 1754f333f9de6485 eee8599db0a878e -pow 8483ee0c9834834c 727c9716ffb764d5 2044f333f9de6485 1e559d32dc6c4d92 -pow 8604ee0c9834834c 727c9716ffb764d5 b504f333f9de6485 5341d938b4b2112e -pow 37c3ee0c9834834c 727c9716ffb764d5 b504f333f9de6485 2783dee829d253f2 -pow 8483ee0c9834834c 727c9716ffb764d5 b504fa33f9de6485 1b81c5c38e77c229 -pow 8483ee0c9834834c 727c9716ffb764d5 b5c4f333f9de6485 6c231b78114a63d1 -pow 8483ee0c9834834c 7f7c9716ffb764d5 b504f333f9de6485 2cc9b662148976d8 -pow 8483ee0c9834834c a27c9716ffb764d5 b504f333f9de6485 9833d32827c56876 -pow 8483ee0c9834834c 827c9716ffb764d5 b504f333f9de6485 44640cc39fd4788 -pow 7483ee0c9834834c 727c9716ffb764d5 b504f333f9de6485 55086f87b6043f40 -pow 8483ee0c9834834c 727c9716ffb764d5 b504f333f9de6489 5de3cc538a4912b2 -pow 8483ee0c9834834c 727c9716ffb764d5 b504f333f9de7485 7dee09f7103356f7 -pow 8483ee0c9834834c 727c9716ffb764d5 b504f332f9de6485 31e66ba6b6f01dbc -pow 8483ee0c9834834c 727c9716ffb764d5 b504f233f9de6485 36a16ab51897078d -pow 8483ee0c9834834c 727c9716ffb764d5 b524f333f9de6485 50139fa7019a79b2 -pow 8483ee0c9834834c 727c9716ffb764d5 b544f333f9de6485 218964ebf2bf02bc -pow 8483ee0c9834834c 727c9716ffb764d5 b704f333f9de6485 60f74c122162ed95 -pow 8483ee0c9834834c 727c9716ffb764d5 b104f333f9de6485 16855ca50047d45e -pow 8483ee0c9834834c 727c9716ffb764d5 c504f333f9de6485 2d27db08d15c5e30 -pow 8483ee0c9834834c 727c9716ffb764d5 f504f333f9de6485 43c59cdd83e4cb0e -pow 8493ee0c9834834c 727c9716ffb764d5 b504f333f9de6485 b8f2e35b869b538 -pow 8083ee0c9834834c 727c9716ffb764d5 b504f333f9de6485 709ba83061fbedec -pow 9483ee0c9834834c 727c9716ffb764d5 b504f333f9de6485 74a637bb3a6686ab -modmul 13988e 194c583ada5b529204a2bc83eb7b05d44d9baa55f4f890cd9bfea55a7 5a827999fcef32422cbec4d9baa55f4f8eb7b05d44d9baa55f4f89dd426768bd64 1efbde91a17127884d7d32ffd5f1f18750c651ddcb394371cfa91ff23baaa2 -modmul 13988e 194c583ada1b529204a2bc830cd9bfea55a7 5a827999fcef32422cbec4d9baa55f4f8eb7b05d449dd426768bd64 1efbde91a122c5504d7d32fec547ba91ff23baaa2 -modmul 13988e 194c583ade5b529204a2bc830cd9bfea55a7 5a827999fcef32422cbec4d9baa55f4f8eb7b05d449dd426768bd64 1efbde91a6574b084d7d32fec547ba91ff23baaa2 -modmul 13988e 194c583ada5b529204a2bc830cd9bfea55a7 3a827999fcef32422cbec4d9baa55f4 3168752910948165510fe90bd7ab5d6 -modmul 13988e 194c583ada7b529204a2b 5a827999fcef32422cb 564931ea2a366069fa0 -modmul 13988e 194c583ada1b529204a2b 5a827999fcef32422cb 5641d8b4ea366069fa0 -modmul 13988e 194c58 5a827999fcef32422cbec4d97999fcef32422cbec4d9baa55baa55f4f8eb7b05d449dd426768bd642cccccccccccccccccccccccccccccccc199cc8aa57 1efbde498d0 -modmul 13988e 194c58 5a827999fcef32422cbec4d9baa55f4f8eb7b05d449dd426768bf65f4f8eb7b05d449dd426768bd642c142c199cc8aa57 1efbde498d0 -modmul 13988e 194c58 5a827999fcef32422cbec827999fcef32422cbec4d99fcef32422cbe32422cbec8279aa55f4f8e4d9baa55f4f8eb 1efbde498d0 -modmul 13988e 194c58ee5827999fcef32422cbec4d9baa55f4f2422cbec4d9baa55f4f8ed 1efbde48eb 17247c498b -modmul 13988e 194242b 1efbde4aa55f4f8eb 1eef84d697da -modmul 6d28e 194c58 5a82799 4b07f1e -modmul 13988e 194c58 3a8 90 -modmul 13988e 194c58 9a8 438 -modmul 93988e 194c58 5a8 418 -mul 16c2d9895a204eb19a1bb5b9b3c06d262c4bf90b74bfe8e1e 400000000000000000000000d294c6e5844edbe8b97f145b4ffd73ef7617d71a92e8006ee548e1e 5b0b662568813ac6686ed6e7fa92c1116b9e82723896e0c88f73a9faa96b8ac9b4c6efa28706e2696b0addabd81c3c9ff6167d1b5e537339bee8dfca0814b84 -mul 16c2d9895a204eb19a1bb5b9b3c06d262c4bf90b040fec72b389cf6fd375533 2cfd2d7d74d29145b4ffd73ef7617d71a92e8006ee548e1d 3ffffffffffffb03af375d6c1c311c6e51e081bfe99f6b41a84698ec8806beb21ddb37330f58d39c67989aa2ebdd2300fecefebf42f0c7 -mul 16c2d9895a204eb00000000000000000004bf90b74bfec72b389cf6fd375533 2cfd2d7d74d294c6e5844edbe85b4ffd73ef7617d71a92e8006ee548e1e 3ffffffffffffffb7eda681a2eca0a66bebccfbd4ca3fa67c9ba58e8735028859d32a47c8411726411fd5c7779d7ce5b04076dbea2c376bf5bc7a45fa -mul 16c2d9895a204eb19a1bb5b9b3c06d262c4bf90b74bfec72b389cfd2d7d74d294c6e5844edbe8b97f145b4ffd73ef7617d71a92e8006ee548e1e 40000000000000 5b0b662568813ac6686ed6e6cf01b498b12fe42dd2ffb1cace273f4b5f5d34a531b96113b6fa2e5fc516d3ff5cfbdd85f5c6a4ba001bb95238780000000000000 -mul 16c2d9895a2bf90b74bfec72b389cf6fd375533 2cfd2d7d74d294c6e5844edbe8b97f145b4ffd73ef7617d71a92e8006ee548e1e 400000000020cd0df0a99c9bcec9106a845f2589fb29d491cf9909dc7c53bc3dcad9a6eca00e5b04076dbea2c376bf5bc7a45fa -mul 16c2d9895a204eb19a1bb5b9b3c06d262c4bf90b74bfec72b389cf6fd375533 2cfd2d7d74d294c6e5844edbe8b97f145b4ffd73ef7617d71a92e8006ee548e9e 400000000000000000000000000000000000000000000000000000000000000b616cc4ad102758cd0ddadcd9e037357125cae0be6763f7fc885e7745824df7a -mul 16c2d9895a204eb19a1bb5b9b3c06d262c4bf90b74bfec72b389cf6fd375533 cfd2d7d74d294c6e5844edbe8b97f145b4ffd73ef7617d71a92e8006ee548e1e 127a4ced4bbf629ccbc8948c987f25b3a7680de91680271a98ec6120591559a00000000000000000000000000000a25affce5b04076dbea2c376bf5bc7a45fa -mul 16c2d9895a204eb19a1bb5b9b3c06d262c4bf90b74bfec76b389cf6fd375533 2cfd2d7d74d294c6e5844edbe8b97f145b4ffd73ef7617d71a92e8006ee548e1e 40000000000000000000000000000000000000000000000b3f4b5f5d34a531b96113b6fa2e5fc516d3ff5cfbdd869821a4885b1fc0bff71ac376bf5bc7a45fa -mul 16c2d9895a204eb19a1bb5b9b3c06d272c4bf90b74bfec72b389cf6fd375533 2cfd2d7d74d294c6e5844edbe8b97f145b4ffd73ef7617d71a92e8006ee548e1e 40000000000000000000000000000002cfd2d7d74d294c6e5844edbe8b97f145b4ffd73ef7617d71a92e8006ee553078ffce5b04076dbea2c376bf5bc7a45fa -mul 16c2d9895a204eb59a1bb5b9b3c06d262c4bf90b74bfec72b389cf6fd375533 2cfd2d7d74d294c6e5844edbe8b97f145b4ffd73ef7617d71a92e8006ee548e1e 400000000000000b3f4b5f5d34a531b96113b6fa2e5fc516d3ff5cfbdd85f5c6a4ba001bb9523878000000000000a25affce5b04076dbea2c376bf5bc7a45fa -mul 12c2d9895a204eb19a1bb5b9b3c06d262c4bf90b74bfec72b389cf6fd375533 2cfd2d7d74d294c6e5844edbe8b97f145b4ffd73ef7617d71a92e8006ee548e1e 34c0b4a0a2cb5ace469eec4905d1a03ae92c00a304227a0a395b45ffe446adc78800000000000000000000000000a25affce5b04076dbea2c376bf5bc7a45fa -mul 36c2d9895a204eb19a1bb5b9b3c06d262c4bf90b74bfec72b389cf6fd375533 2cfd2d7d74d294c6e5844edbe8b97f145b4ffd73ef7617d71a92e8006ee548e1e 99fa5afae9a5298dcb089db7d172fe28b69ffae7deec2fae3525d000ddca91c3c000000000000000000000000000a25affce5b04076dbea2c376bf5bc7a45fa -mul 56c2d9895a204eb19a1bb5b9b3c06d262c4bf90b74bfec72b389cf6fd375533 2cfd2d7d74d294c6e5844edbe8b97f145b4ffd73ef7617d71a92e8006ee548e1e f3f4b5f5d34a531b96113b6fa2e5fc516d3ff5cfbdd85f5c6a4ba001bb9523878000000000000000000000000000a25affce5b04076dbea2c376bf5bc7a45fa -mul 96c2d9895a204eb19a1bb5b9b3c06d262c4bf90b74bfec72b389cf6fd375533 2cfd2d7d74d294c6e5844edbe8b97f145b4ffd73ef7617d71a92e8006ee548e1e 1a7e96beba694a6372c2276df45cbf8a2da7feb9f7bb0beb8d4974003772a470f0000000000000000000000000000a25affce5b04076dbea2c376bf5bc7a45fa -mul 1 bbbbbbbbb0 bbbbbbbbb0 -mul 1 a a -mul 1 e e -mul a c 78 -mul 1 1 1 -mul a 4 28 -mul 1 2 2 -mul 1 8 8 -mul 1 0 0 -mul 0 4 0 -mul 3 4 c -mul 5 4 14 -mul 9 4 24 diff --git a/unix/uxserver.c b/unix/uxserver.c index c0ee251a..897ff865 100644 --- a/unix/uxserver.c +++ b/unix/uxserver.c @@ -43,6 +43,7 @@ #define PUTTY_DO_GLOBALS /* actually _define_ globals */ #include "putty.h" +#include "mpint.h" #include "ssh.h" #include "sshserver.h" @@ -221,11 +222,11 @@ bool auth_publickey(AuthPolicy *ap, ptrlen username, ptrlen public_blob) return false; } struct RSAKey *auth_publickey_ssh1( - AuthPolicy *ap, ptrlen username, Bignum rsa_modulus) + AuthPolicy *ap, ptrlen username, mp_int *rsa_modulus) { struct AuthPolicy_ssh1_pubkey *iter; for (iter = ap->ssh1keys; iter; iter = iter->next) { - if (!bignum_cmp(rsa_modulus, iter->key.modulus)) + if (mp_cmp_eq(rsa_modulus, iter->key.modulus)) return &iter->key; } return NULL; diff --git a/windows/winpgen.c b/windows/winpgen.c index 833ef393..005cd818 100644 --- a/windows/winpgen.c +++ b/windows/winpgen.c @@ -349,7 +349,8 @@ struct rsa_key_thread_params { union { struct RSAKey *key; struct dss_key *dsskey; - struct ec_key *eckey; + struct ecdsa_key *eckey; + struct eddsa_key *edkey; }; }; static DWORD WINAPI generate_key_thread(void *param) @@ -364,9 +365,10 @@ static DWORD WINAPI generate_key_thread(void *param) if (params->keytype == DSA) dsa_generate(params->dsskey, params->key_bits, progress_update, &prog); else if (params->keytype == ECDSA) - ec_generate(params->eckey, params->curve_bits, progress_update, &prog); + ecdsa_generate(params->eckey, params->curve_bits, + progress_update, &prog); else if (params->keytype == ED25519) - ec_edgenerate(params->eckey, 256, progress_update, &prog); + eddsa_generate(params->edkey, 256, progress_update, &prog); else rsa_generate(params->key, params->key_bits, progress_update, &prog); @@ -390,7 +392,8 @@ struct MainDlgState { union { struct RSAKey key; struct dss_key dsskey; - struct ec_key eckey; + struct ecdsa_key eckey; + struct eddsa_key edkey; }; HMENU filemenu, keymenu, cvtmenu; }; @@ -1401,7 +1404,7 @@ static INT_PTR CALLBACK MainDlgProc(HWND hwnd, UINT msg, } else if (state->keytype == ECDSA) { state->ssh2key.key = &state->eckey.sshk; } else if (state->keytype == ED25519) { - state->ssh2key.key = &state->eckey.sshk; + state->ssh2key.key = &state->edkey.sshk; } else { state->ssh2key.key = &state->key.sshk; }