From fcdc804b4f4cee6c5b6a1b436fbc6ec7db06ea3e Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Sun, 1 Dec 2024 09:55:39 +0000 Subject: [PATCH] Move some NTRU helper routines into a header file. I'm going to want to use these again for ML-KEM, so let's put one copy of them where both algorithms can use it. --- crypto/ntru.c | 56 +++++--------------------------------------- crypto/smallmoduli.h | 54 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 50 deletions(-) create mode 100644 crypto/smallmoduli.h diff --git a/crypto/ntru.c b/crypto/ntru.c index 60c37e2e..a7e53122 100644 --- a/crypto/ntru.c +++ b/crypto/ntru.c @@ -79,58 +79,14 @@ #include "ssh.h" #include "mpint.h" #include "ntru.h" - -/* ---------------------------------------------------------------------- - * Preliminaries: we're going to need to do modular arithmetic on - * small values (considerably smaller than 2^16), and we need to do it - * without using integer division which might not be time-safe. - * - * The strategy for this is the same as I used in - * mp_mod_known_integer: see there for the proofs. The basic idea is - * that we precompute the reciprocal of our modulus as a fixed-point - * number, and use that to get an approximate quotient which we - * subtract off. For these integer sizes, precomputing a fixed-point - * reciprocal of the form (2^48 / modulus) leaves us at most off by 1 - * in the quotient, so there's a single (time-safe) trial subtraction - * at the end. - * - * (It's possible that some speed could be gained by not reducing - * fully at every step. But then you'd have to carefully identify all - * the places in the algorithm where things are compared to zero. This - * was the easiest way to get it all working in the first place.) - */ - -/* Precompute the reciprocal */ -static uint64_t reciprocal_for_reduction(uint16_t q) -{ - return ((uint64_t)1 << 48) / q; -} - -/* Reduce x mod q, assuming qrecip == reciprocal_for_reduction(q) */ -static uint16_t reduce(uint32_t x, uint16_t q, uint64_t qrecip) -{ - uint64_t unshifted_quot = x * qrecip; - uint64_t quot = unshifted_quot >> 48; - uint16_t reduced = x - quot * q; - reduced -= q * (1 & ((q-1 - reduced) >> 15)); - return reduced; -} - -/* Reduce x mod q as above, but also return the quotient */ -static uint16_t reduce_with_quot(uint32_t x, uint32_t *quot_out, - uint16_t q, uint64_t qrecip) -{ - uint64_t unshifted_quot = x * qrecip; - uint64_t quot = unshifted_quot >> 48; - uint16_t reduced = x - quot * q; - uint64_t extraquot = (1 & ((q-1 - reduced) >> 15)); - reduced -= extraquot * q; - *quot_out = quot + extraquot; - return reduced; -} +#include "smallmoduli.h" /* Invert x mod q, assuming it's nonzero. (For time-safety, no check - * is made for zero; it just returns 0.) */ + * is made for zero; it just returns 0.) + * + * Expects qrecip == reciprocal_for_reduction(q). (But it's passed in + * as a parameter to save recomputing it, on the theory that the + * caller will have had it lying around already in most cases.) */ static uint16_t invert(uint16_t x, uint16_t q, uint64_t qrecip) { /* Fermat inversion: compute x^(q-2), since x^(q-1) == 1. */ diff --git a/crypto/smallmoduli.h b/crypto/smallmoduli.h new file mode 100644 index 00000000..b452b410 --- /dev/null +++ b/crypto/smallmoduli.h @@ -0,0 +1,54 @@ +/* + * Shared code between algorithms whose state consists of a large + * collection of residues mod a small prime. + */ + +/* + * We need to do modular arithmetic on small values (considerably + * smaller than 2^16), and we need to do it without using integer + * division which might not be time-safe. Input values might not fit + * in a 16-bit int, because we'll also be multiplying mod q. + * + * The strategy for this is the same as I used in + * mp_mod_known_integer: see there for the proofs. The basic idea is + * that we precompute the reciprocal of our modulus as a fixed-point + * number, and use that to get an approximate quotient which we + * subtract off. For these integer sizes, precomputing a fixed-point + * reciprocal of the form (2^48 / modulus) leaves us at most off by 1 + * in the quotient, so there's a single (time-safe) trial subtraction + * at the end. + * + * (It's possible that some speed could be gained by not reducing + * fully at every step. But then you'd have to carefully identify all + * the places in the algorithm where things are compared to zero. This + * was the easiest way to get it all working in the first place.) + */ + +/* Precompute the reciprocal */ +static inline uint64_t reciprocal_for_reduction(uint16_t q) +{ + return ((uint64_t)1 << 48) / q; +} + +/* Reduce x mod q, assuming qrecip == reciprocal_for_reduction(q) */ +static inline uint16_t reduce(uint32_t x, uint16_t q, uint64_t qrecip) +{ + uint64_t unshifted_quot = x * qrecip; + uint64_t quot = unshifted_quot >> 48; + uint16_t reduced = x - quot * q; + reduced -= q * (1 & ((q-1 - reduced) >> 15)); + return reduced; +} + +/* Reduce x mod q as above, but also return the quotient */ +static inline uint16_t reduce_with_quot(uint32_t x, uint32_t *quot_out, + uint16_t q, uint64_t qrecip) +{ + uint64_t unshifted_quot = x * qrecip; + uint64_t quot = unshifted_quot >> 48; + uint16_t reduced = x - quot * q; + uint64_t extraquot = (1 & ((q-1 - reduced) >> 15)); + reduced -= extraquot * q; + *quot_out = quot + extraquot; + return reduced; +}