1
0
mirror of https://git.tartarus.org/simon/putty.git synced 2025-01-09 17:38:00 +00:00

AES-GCM NEON: cope with missing vaddq_p128.

In some compilers (I'm told clang 10, in particular), the NEON
intrinsic vaddq_p128 is missing, even though its input type poly128_t
is provided.

vaddq_p128 is just an XOR of two vector registers, so that's easy to
work around by casting to a more mundane type and back. Added a
configure-time test for that intrinsic, and a workaround to be used in
its absence.
This commit is contained in:
Simon Tatham 2022-10-12 12:54:36 +01:00
parent 0615767224
commit 2222cd104d
3 changed files with 17 additions and 0 deletions

View File

@ -54,6 +54,7 @@
#cmakedefine01 HAVE_CLMUL #cmakedefine01 HAVE_CLMUL
#cmakedefine01 HAVE_NEON_CRYPTO #cmakedefine01 HAVE_NEON_CRYPTO
#cmakedefine01 HAVE_NEON_PMULL #cmakedefine01 HAVE_NEON_PMULL
#cmakedefine01 HAVE_NEON_VADDQ_P128
#cmakedefine01 HAVE_NEON_SHA512 #cmakedefine01 HAVE_NEON_SHA512
#cmakedefine01 HAVE_NEON_SHA512_INTRINSICS #cmakedefine01 HAVE_NEON_SHA512_INTRINSICS
#cmakedefine01 USE_ARM64_NEON_H #cmakedefine01 USE_ARM64_NEON_H

View File

@ -195,6 +195,14 @@ if(neon)
int main(void) { r = vmull_p64(a, b); r = vmull_high_p64(u, v); }" int main(void) { r = vmull_p64(a, b); r = vmull_high_p64(u, v); }"
ADD_SOURCES_IF_SUCCESSFUL aesgcm-neon.c) ADD_SOURCES_IF_SUCCESSFUL aesgcm-neon.c)
test_compile_with_flags(HAVE_NEON_VADDQ_P128
GNU_FLAGS -march=armv8-a+crypto
MSVC_FLAGS -D_ARM_USE_NEW_NEON_INTRINSICS
TEST_SOURCE "
#include <${neon_header}>
volatile poly128_t r;
int main(void) { r = vaddq_p128(r, r); }")
# The 'sha3' architecture extension, despite the name, includes # The 'sha3' architecture extension, despite the name, includes
# support for SHA-512 (from the SHA-2 standard) as well as SHA-3 # support for SHA-512 (from the SHA-2 standard) as well as SHA-3
# proper. # proper.

View File

@ -87,6 +87,14 @@ static inline void store_p128_be(void *p, poly128_t v)
vst1q_u8(p, vrev64q_u8(vreinterpretq_u8_p128(swapped))); vst1q_u8(p, vrev64q_u8(vreinterpretq_u8_p128(swapped)));
} }
#if !HAVE_NEON_VADDQ_P128
static inline poly128_t vaddq_p128(poly128_t a, poly128_t b)
{
return vreinterpretq_p128_u32(veorq_u32(
vreinterpretq_u32_p128(a), vreinterpretq_u32_p128(b)));
}
#endif
/* /*
* Key setup is just like in aesgcm-ref-poly.c. There's no point using * Key setup is just like in aesgcm-ref-poly.c. There's no point using
* vector registers to accelerate this, because it happens rarely. * vector registers to accelerate this, because it happens rarely.