/*
 * Code to generate 'nonce' values for DSA signature algorithms, in a
 * deterministic way.
 */

#include "ssh.h"
#include "mpint.h"
#include "misc.h"

/*
 * All DSA-type signature systems depend on a nonce - a random number
 * generated during the signing operation.
 *
 * This nonce is a weak point of DSA and needs careful protection,
 * for multiple reasons:
 *
 *  1. If an attacker in possession of your public key and a single
 *     signature can find out or guess the nonce you used in that
 *     signature, they can immediately recover your _private key_.
 *
 *  2. If you reuse the same nonce in two different signatures, this
 *     will be instantly obvious to the attacker (one of the two
 *     values making up the signature will match), and again, they can
 *     immediately recover the private key as soon as they notice this.
 *
 *  3. In at least one system, information about your private key is
 *     leaked merely by generating nonces with a significant bias.
 *
 * Attacks #1 and #2 work across all of integer DSA, NIST-style ECDSA,
 * and EdDSA. The details vary, but the headline effects are the same.
 *
 * So we must be very careful with our nonces. They must be generated
 * with uniform distribution, but also, they must avoid depending on
 * any random number generator that has the slightest doubt about its
 * reliability.
 *
 * In particular, PuTTY's policy is that for this purpose we don't
 * _even_ trust the PRNG we use for other cryptography. This is mostly
 * a concern because of Windows, where system entropy sources are
 * limited and we have doubts about their trustworthiness
 * - even CryptGenRandom. PuTTY compensates as best it can with its
 * own ongoing entropy collection, and we trust that for session keys,
 * but revealing the private key that goes with a long-term public key
 * is a far worse outcome than revealing one SSH session key, and for
 * keeping your private key safe, we don't think the available Windows
 * entropy gives us enough confidence.
 *
 * A common strategy these days (although <hipster>PuTTY was doing it
 * before it was cool</hipster>) is to avoid using a PRNG based on
 * system entropy at all. Instead, you use a deterministic PRNG that
 * starts from a fixed input seed, and in that input seed you include
 * the message to be signed and the _private key_.
 *
 * Including the private key in the seed is counterintuitive, but does
 * actually make sense. A deterministic nonce generation strategy must
 * use _some_ piece of input that the attacker doesn't have, or else
 * they'd be able to repeat the entire computation and construct the
 * same nonce you did. And the one thing they don't know is the
 * private key! So we include that in the seed data (under enough
 * layers of overcautious hashing to protect it against exposure), and
 * then they _can't_ repeat the same construction. Moreover, if they
 * _could_, they'd already know the private key, so they wouldn't need
 * to perform an attack of this kind at all!
 *
 * (This trick doesn't, _per se_, protect against reuse of nonces.
 * That is left to chance, which is enough, because the space of
 * nonces is large enough to make it adequately unlikely. But it
 * avoids escalating the reuse risk due to inadequate entropy.)
 *
 * For integer DSA and ECDSA, the system we use for deterministic
 * generation of k is exactly the one specified in RFC 6979. We
 * switched to this from the old system that PuTTY used to use before
 * that RFC came out. The old system had a critical bug: it did not
 * always generate _enough_ data to get uniform distribution, because
 * its output was a single SHA-512 hash. We could have fixed that
 * minimally, by concatenating multiple hashes, but it seemed more
 * sensible to switch to a system that comes with test vectors.
 *
 * One downside of RFC 6979 is that it's based on rejection sampling
 * (that is, you generate a random number and keep retrying until it's
 * in range). This makes it play badly with our side-channel test
 * system, which wants every execution trace of a supposedly
 * constant-time operation to be the same. To work around this
 * awkwardness, we break up the algorithm further, into a setup phase
 * and an 'attempt to generate an output' phase, each of which is
 * individually constant-time.
 */

struct RFC6979 {
    /*
     * Size of the cyclic group over which we're doing DSA.
     * Equivalently, the multiplicative order of g (for integer DSA)
     * or the curve's base point (for ECDSA). For integer DSA this is
     * also the same thing as the small prime q from the key
     * parameters.
     *
     * This pointer is not owned. Freeing this structure will not free
     * it, and freeing the pointed-to integer before freeing this
     * structure will make this structure dangerous to use.
     */
    mp_int *q;

    /*
     * The private key integer, which is always the discrete log of
     * the public key with respect to the group generator.
     *
     * This pointer is not owned. Freeing this structure will not free
     * it, and freeing the pointed-to integer before freeing this
     * structure will make this structure dangerous to use.
     */
    mp_int *x;

    /*
     * Cached values derived from q: its length in bits, and in bytes.
     */
    size_t qbits, qbytes;

    /*
     * Reusable hash and MAC objects.
     */
    ssh_hash *hash;
    ssh2_mac *mac;

    /*
     * Cached value: the output length of the hash.
     */
    size_t hlen;

    /*
     * The byte string V used in the algorithm.
     */
    unsigned char V[MAX_HASH_LEN];

    /*
     * The string T to use during each attempt, and how many
     * hash-sized blocks to fill it with.
     */
    size_t T_nblocks;
    unsigned char *T;
};

static mp_int *bits2int(ptrlen b, RFC6979 *s)
{
    if (b.len > s->qbytes)
        b.len = s->qbytes;
    mp_int *x = mp_from_bytes_be(b);

    /*
     * Rationale for using mp_rshift_fixed_into and not
     * mp_rshift_safe_into: the shift count is derived from the
     * difference between the length of the modulus q, and the length
     * of the input bit string, i.e. between the _sizes_ of things
     * involved in the protocol. But the sizes aren't secret. Only the
     * actual values of integers and bit strings of those sizes are
     * secret. So it's OK for the shift count to be known to an
     * attacker - they'd know it anyway just from which DSA algorithm
     * we were using.
     */
    if (b.len * 8 > s->qbits)
        mp_rshift_fixed_into(x, x, b.len * 8 - s->qbits);

    return x;
}

static void BinarySink_put_int2octets(BinarySink *bs, mp_int *x, RFC6979 *s)
{
    mp_int *x_mod_q = mp_mod(x, s->q);
    for (size_t i = s->qbytes; i-- > 0 ;)
        put_byte(bs, mp_get_byte(x_mod_q, i));
    mp_free(x_mod_q);
}

static void BinarySink_put_bits2octets(BinarySink *bs, ptrlen b, RFC6979 *s)
{
    mp_int *x = bits2int(b, s);
    BinarySink_put_int2octets(bs, x, s);
    mp_free(x);
}

#define put_int2octets(bs, x, s) \
    BinarySink_put_int2octets(BinarySink_UPCAST(bs), x, s)
#define put_bits2octets(bs, b, s) \
    BinarySink_put_bits2octets(BinarySink_UPCAST(bs), b, s)

RFC6979 *rfc6979_new(const ssh_hashalg *hashalg, mp_int *q, mp_int *x)
{
    /* Make the state structure. */
    RFC6979 *s = snew(RFC6979);
    s->q = q;
    s->x = x;
    s->qbits = mp_get_nbits(q);
    s->qbytes = (s->qbits + 7) >> 3;
    s->hash = ssh_hash_new(hashalg);
    s->mac = hmac_new_from_hash(hashalg);
    s->hlen = hashalg->hlen;

    /* In each attempt, we concatenate enough hash blocks to be
     * greater than qbits in size. */
    size_t hbits = 8 * s->hlen;
    s->T_nblocks = (s->qbits + hbits - 1) / hbits;
    s->T = snewn(s->T_nblocks * s->hlen, unsigned char);

    return s;
}

void rfc6979_setup(RFC6979 *s, ptrlen message)
{
    unsigned char h1[MAX_HASH_LEN];
    unsigned char K[MAX_HASH_LEN];

    /* 3.2 (a): hash the message to get h1. */
    ssh_hash_reset(s->hash);
    put_datapl(s->hash, message);
    ssh_hash_digest(s->hash, h1);

    /* 3.2 (b): set V to a sequence of 0x01 bytes the same size as the
     * hash function's output. */
    memset(s->V, 1, s->hlen);

    /* 3.2 (c): set the initial HMAC key K to all zeroes, again the
     * same size as the hash function's output. */
    memset(K, 0, s->hlen);
    ssh2_mac_setkey(s->mac, make_ptrlen(K, s->hlen));

    /* 3.2 (d): compute the MAC of V, the private key, and h1, with
     * key K, making a new key to replace K. */
    ssh2_mac_start(s->mac);
    put_data(s->mac, s->V, s->hlen);
    put_byte(s->mac, 0);
    put_int2octets(s->mac, s->x, s);
    put_bits2octets(s->mac, make_ptrlen(h1, s->hlen), s);
    ssh2_mac_genresult(s->mac, K);
    ssh2_mac_setkey(s->mac, make_ptrlen(K, s->hlen));

    /* 3.2 (e): replace V with its HMAC using the new K. */
    ssh2_mac_start(s->mac);
    put_data(s->mac, s->V, s->hlen);
    ssh2_mac_genresult(s->mac, s->V);

    /* 3.2 (f): repeat step (d), only using the new K in place of the
     * initial all-zeroes one, and with the extra byte in the middle
     * of the MAC preimage being 1 rather than 0. */
    ssh2_mac_start(s->mac);
    put_data(s->mac, s->V, s->hlen);
    put_byte(s->mac, 1);
    put_int2octets(s->mac, s->x, s);
    put_bits2octets(s->mac, make_ptrlen(h1, s->hlen), s);
    ssh2_mac_genresult(s->mac, K);
    ssh2_mac_setkey(s->mac, make_ptrlen(K, s->hlen));

    /* 3.2 (g): repeat step (e), using the again-replaced K. */
    ssh2_mac_start(s->mac);
    put_data(s->mac, s->V, s->hlen);
    ssh2_mac_genresult(s->mac, s->V);

    smemclr(h1, sizeof(h1));
    smemclr(K, sizeof(K));
}

RFC6979Result rfc6979_attempt(RFC6979 *s)
{
    RFC6979Result result;

    /* 3.2 (h) 1: set T to the empty string */
    /* 3.2 (h) 2: make lots of output by concatenating MACs of V */
    for (size_t i = 0; i < s->T_nblocks; i++) {
        ssh2_mac_start(s->mac);
        put_data(s->mac, s->V, s->hlen);
        ssh2_mac_genresult(s->mac, s->V);
        memcpy(s->T + i * s->hlen, s->V, s->hlen);
    }

    /* 3.2 (h) 3: if we have a number in [1, q-1], return it ... */
    result.k = bits2int(make_ptrlen(s->T, s->T_nblocks * s->hlen), s);
    result.ok = mp_hs_integer(result.k, 1) & ~mp_cmp_hs(result.k, s->q);

    /*
     * Perturb K and regenerate V ready for the next attempt.
     *
     * We do this unconditionally, whether or not the k we just
     * generated is acceptable. The time cost isn't large compared to
     * the public-key operation we're going to do next (not to mention
     * the larger number of these same operations we've already done),
     * and it makes side-channel testing easier if this function is
     * constant-time from beginning to end.
     *
     * In other rejection-sampling situations, particularly prime
     * generation, we're not this careful: it's enough to ensure that
     * _successful_ attempts run in constant time, Failures can do
     * whatever they like, on the theory that the only information
     * they _have_ to potentially expose via side channels is
     * information that was subsequently thrown away without being
     * used for anything important. (Hence, for example, it's fine to
     * have multiple different early-exit paths for failures you
     * detect at different times.)
     *
     * But here, the situation is different. Prime generation attempts
     * are independent of each other. These are not. All our
     * iterations round this loop use the _same_ secret data set up by
     * rfc6979_new(), and also, the perturbation step we're about to
     * compute will be used by the next iteration if there is one. So
     * it's absolutely _not_ true that a failed iteration deals
     * exclusively with data that won't contribute to the eventual
     * output. Hence, we have to be careful about the failures as well
     * as the successes.
     *
     * (Even so, it would be OK to make successes and failures take
     * different amounts of time, as long as each of those amounts was
     * consistent. But it's easier for testing to make them the same.)
     */
    ssh2_mac_start(s->mac);
    put_data(s->mac, s->V, s->hlen);
    put_byte(s->mac, 0);
    unsigned char K[MAX_HASH_LEN];
    ssh2_mac_genresult(s->mac, K);
    ssh2_mac_setkey(s->mac, make_ptrlen(K, s->hlen));
    smemclr(K, sizeof(K));

    ssh2_mac_start(s->mac);
    put_data(s->mac, s->V, s->hlen);
    ssh2_mac_genresult(s->mac, s->V);

    return result;
}

void rfc6979_free(RFC6979 *s)
{
    /* We don't free s->q or s->x: our caller still owns those. */

    ssh_hash_free(s->hash);
    ssh2_mac_free(s->mac);
    smemclr(s->T, s->T_nblocks * s->hlen);
    sfree(s->T);

    /* Clear the whole structure before freeing. Most fields aren't
     * sensitive (pointers or well-known length values), but V is, and
     * it's easier to clear the whole lot than fiddle about
     * identifying the sensitive fields. */
    smemclr(s, sizeof(*s));

    sfree(s);
}

mp_int *rfc6979(
    const ssh_hashalg *hashalg, mp_int *q, mp_int *x, ptrlen message)
{
    RFC6979 *s = rfc6979_new(hashalg, q, x);
    rfc6979_setup(s, message);
    RFC6979Result result;
    while (true) {
        result = rfc6979_attempt(s);
        if (result.ok)
            break;
        else
            mp_free(result.k);
    }
    rfc6979_free(s);
    return result.k;
}