1
0
mirror of https://git.tartarus.org/simon/putty.git synced 2025-07-03 20:42:48 -05:00

Implement AES-GCM using the @openssh.com protocol IDs.

I only recently found out that OpenSSH defined their own protocol IDs
for AES-GCM, defined to work the same as the standard ones except that
they fixed the semantics for how you select the linked cipher+MAC pair
during key exchange.

(RFC 5647 defines protocol ids for AES-GCM in both the cipher and MAC
namespaces, and requires that you MUST select both or neither - but
this contradicts the selection policy set out in the base SSH RFCs,
and there's no discussion of how you resolve a conflict between them!
OpenSSH's answer is to do it the same way ChaCha20-Poly1305 works,
because that will ensure the two suites don't fight.)

People do occasionally ask us for this linked cipher/MAC pair, and now
I know it's actually feasible, I've implemented it, including a pair
of vector implementations for x86 and Arm using their respective
architecture extensions for multiplying polynomials over GF(2).

Unlike ChaCha20-Poly1305, I've kept the cipher and MAC implementations
in separate objects, with an arm's-length link between them that the
MAC uses when it needs to encrypt single cipher blocks to use as the
inputs to the MAC algorithm. That enables the cipher and the MAC to be
independently selected from their hardware-accelerated versions, just
in case someone runs on a system that has polynomial multiplication
instructions but not AES acceleration, or vice versa.

There's a fourth implementation of the GCM MAC, which is a pure
software implementation of the same algorithm used in the vectorised
versions. It's too slow to use live, but I've kept it in the code for
future testing needs, and because it's a convenient place to dump my
design comments.

The vectorised implementations are fairly crude as far as optimisation
goes. I'm sure serious x86 _or_ Arm optimisation engineers would look
at them and laugh. But GCM is a fast MAC compared to HMAC-SHA-256
(indeed compared to HMAC-anything-at-all), so it should at least be
good enough to use. And we've got a working version with some tests
now, so if someone else wants to improve them, they can.
This commit is contained in:
Simon Tatham
2022-08-16 18:36:58 +01:00
parent fd840f0dfe
commit c1a2114b28
30 changed files with 2167 additions and 11 deletions

View File

@ -259,6 +259,12 @@ VOLATILE_WRAPPED_DEFN(static, size_t, looplimit, (size_t x))
#define IF_SHA_NI(x)
#endif
#if HAVE_CLMUL
#define IF_CLMUL(x) x
#else
#define IF_CLMUL(x)
#endif
#if HAVE_NEON_CRYPTO
#define IF_NEON_CRYPTO(x) x
#else
@ -271,6 +277,12 @@ VOLATILE_WRAPPED_DEFN(static, size_t, looplimit, (size_t x))
#define IF_NEON_SHA512(x)
#endif
#if HAVE_NEON_PMULL
#define IF_NEON_PMULL(x) x
#else
#define IF_NEON_PMULL(x)
#endif
/* Ciphers that we expect to pass this test. Blowfish and Arcfour are
* intentionally omitted, because we already know they don't. */
#define CIPHERS(X, Y) \
@ -280,28 +292,40 @@ VOLATILE_WRAPPED_DEFN(static, size_t, looplimit, (size_t x))
X(Y, ssh_des) \
X(Y, ssh_des_sshcom_ssh2) \
X(Y, ssh_aes256_sdctr) \
X(Y, ssh_aes256_gcm) \
X(Y, ssh_aes256_cbc) \
X(Y, ssh_aes192_sdctr) \
X(Y, ssh_aes192_gcm) \
X(Y, ssh_aes192_cbc) \
X(Y, ssh_aes128_sdctr) \
X(Y, ssh_aes128_gcm) \
X(Y, ssh_aes128_cbc) \
X(Y, ssh_aes256_sdctr_sw) \
X(Y, ssh_aes256_gcm_sw) \
X(Y, ssh_aes256_cbc_sw) \
X(Y, ssh_aes192_sdctr_sw) \
X(Y, ssh_aes192_gcm_sw) \
X(Y, ssh_aes192_cbc_sw) \
X(Y, ssh_aes128_sdctr_sw) \
X(Y, ssh_aes128_gcm_sw) \
X(Y, ssh_aes128_cbc_sw) \
IF_AES_NI(X(Y, ssh_aes256_sdctr_ni)) \
IF_AES_NI(X(Y, ssh_aes256_gcm_ni)) \
IF_AES_NI(X(Y, ssh_aes256_cbc_ni)) \
IF_AES_NI(X(Y, ssh_aes192_sdctr_ni)) \
IF_AES_NI(X(Y, ssh_aes192_gcm_ni)) \
IF_AES_NI(X(Y, ssh_aes192_cbc_ni)) \
IF_AES_NI(X(Y, ssh_aes128_sdctr_ni)) \
IF_AES_NI(X(Y, ssh_aes128_gcm_ni)) \
IF_AES_NI(X(Y, ssh_aes128_cbc_ni)) \
IF_NEON_CRYPTO(X(Y, ssh_aes256_sdctr_neon)) \
IF_NEON_CRYPTO(X(Y, ssh_aes256_gcm_neon)) \
IF_NEON_CRYPTO(X(Y, ssh_aes256_cbc_neon)) \
IF_NEON_CRYPTO(X(Y, ssh_aes192_sdctr_neon)) \
IF_NEON_CRYPTO(X(Y, ssh_aes192_gcm_neon)) \
IF_NEON_CRYPTO(X(Y, ssh_aes192_cbc_neon)) \
IF_NEON_CRYPTO(X(Y, ssh_aes128_sdctr_neon)) \
IF_NEON_CRYPTO(X(Y, ssh_aes128_gcm_neon)) \
IF_NEON_CRYPTO(X(Y, ssh_aes128_cbc_neon)) \
X(Y, ssh2_chacha20_poly1305) \
/* end of list */
@ -317,9 +341,17 @@ VOLATILE_WRAPPED_DEFN(static, size_t, looplimit, (size_t x))
X(Y, ssh_hmac_sha256) \
/* end of list */
#define ALL_MACS(X, Y) \
SIMPLE_MACS(X, Y) \
X(Y, poly1305) \
#define ALL_MACS(X, Y) \
SIMPLE_MACS(X, Y) \
X(Y, poly1305) \
X(Y, aesgcm_sw_sw) \
X(Y, aesgcm_sw_refpoly) \
IF_AES_NI(X(Y, aesgcm_ni_sw)) \
IF_NEON_CRYPTO(X(Y, aesgcm_neon_sw)) \
IF_CLMUL(X(Y, aesgcm_sw_clmul)) \
IF_NEON_PMULL(X(Y, aesgcm_sw_neon)) \
IF_AES_NI(IF_CLMUL(X(Y, aesgcm_ni_clmul))) \
IF_NEON_CRYPTO(IF_NEON_PMULL(X(Y, aesgcm_neon_neon))) \
/* end of list */
#define MAC_TESTLIST(X, name) X(mac_ ## name)
@ -1473,6 +1505,58 @@ static void test_mac_poly1305(void)
test_mac(&ssh2_poly1305, &ssh2_chacha20_poly1305);
}
static void test_mac_aesgcm_sw_sw(void)
{
test_mac(&ssh2_aesgcm_mac_sw, &ssh_aes128_gcm_sw);
}
static void test_mac_aesgcm_sw_refpoly(void)
{
test_mac(&ssh2_aesgcm_mac_ref_poly, &ssh_aes128_gcm_sw);
}
#if HAVE_AES_NI
static void test_mac_aesgcm_ni_sw(void)
{
test_mac(&ssh2_aesgcm_mac_sw, &ssh_aes128_gcm_ni);
}
#endif
#if HAVE_NEON_CRYPTO
static void test_mac_aesgcm_neon_sw(void)
{
test_mac(&ssh2_aesgcm_mac_sw, &ssh_aes128_gcm_neon);
}
#endif
#if HAVE_CLMUL
static void test_mac_aesgcm_sw_clmul(void)
{
test_mac(&ssh2_aesgcm_mac_clmul, &ssh_aes128_gcm_sw);
}
#endif
#if HAVE_NEON_PMULL
static void test_mac_aesgcm_sw_neon(void)
{
test_mac(&ssh2_aesgcm_mac_neon, &ssh_aes128_gcm_sw);
}
#endif
#if HAVE_AES_NI && HAVE_CLMUL
static void test_mac_aesgcm_ni_clmul(void)
{
test_mac(&ssh2_aesgcm_mac_clmul, &ssh_aes128_gcm_ni);
}
#endif
#if HAVE_NEON_CRYPTO && HAVE_NEON_PMULL
static void test_mac_aesgcm_neon_neon(void)
{
test_mac(&ssh2_aesgcm_mac_neon, &ssh_aes128_gcm_neon);
}
#endif
static void test_hash(const ssh_hashalg *halg)
{
ssh_hash *h = ssh_hash_new(halg);