mirror of
https://git.tartarus.org/simon/putty.git
synced 2025-01-10 01:48:00 +00:00
AES-GCM NEON: cope with missing vaddq_p128.
In some compilers (I'm told clang 10, in particular), the NEON intrinsic vaddq_p128 is missing, even though its input type poly128_t is provided. vaddq_p128 is just an XOR of two vector registers, so that's easy to work around by casting to a more mundane type and back. Added a configure-time test for that intrinsic, and a workaround to be used in its absence.
This commit is contained in:
parent
0615767224
commit
2222cd104d
@ -54,6 +54,7 @@
|
|||||||
#cmakedefine01 HAVE_CLMUL
|
#cmakedefine01 HAVE_CLMUL
|
||||||
#cmakedefine01 HAVE_NEON_CRYPTO
|
#cmakedefine01 HAVE_NEON_CRYPTO
|
||||||
#cmakedefine01 HAVE_NEON_PMULL
|
#cmakedefine01 HAVE_NEON_PMULL
|
||||||
|
#cmakedefine01 HAVE_NEON_VADDQ_P128
|
||||||
#cmakedefine01 HAVE_NEON_SHA512
|
#cmakedefine01 HAVE_NEON_SHA512
|
||||||
#cmakedefine01 HAVE_NEON_SHA512_INTRINSICS
|
#cmakedefine01 HAVE_NEON_SHA512_INTRINSICS
|
||||||
#cmakedefine01 USE_ARM64_NEON_H
|
#cmakedefine01 USE_ARM64_NEON_H
|
||||||
|
@ -195,6 +195,14 @@ if(neon)
|
|||||||
int main(void) { r = vmull_p64(a, b); r = vmull_high_p64(u, v); }"
|
int main(void) { r = vmull_p64(a, b); r = vmull_high_p64(u, v); }"
|
||||||
ADD_SOURCES_IF_SUCCESSFUL aesgcm-neon.c)
|
ADD_SOURCES_IF_SUCCESSFUL aesgcm-neon.c)
|
||||||
|
|
||||||
|
test_compile_with_flags(HAVE_NEON_VADDQ_P128
|
||||||
|
GNU_FLAGS -march=armv8-a+crypto
|
||||||
|
MSVC_FLAGS -D_ARM_USE_NEW_NEON_INTRINSICS
|
||||||
|
TEST_SOURCE "
|
||||||
|
#include <${neon_header}>
|
||||||
|
volatile poly128_t r;
|
||||||
|
int main(void) { r = vaddq_p128(r, r); }")
|
||||||
|
|
||||||
# The 'sha3' architecture extension, despite the name, includes
|
# The 'sha3' architecture extension, despite the name, includes
|
||||||
# support for SHA-512 (from the SHA-2 standard) as well as SHA-3
|
# support for SHA-512 (from the SHA-2 standard) as well as SHA-3
|
||||||
# proper.
|
# proper.
|
||||||
|
@ -87,6 +87,14 @@ static inline void store_p128_be(void *p, poly128_t v)
|
|||||||
vst1q_u8(p, vrev64q_u8(vreinterpretq_u8_p128(swapped)));
|
vst1q_u8(p, vrev64q_u8(vreinterpretq_u8_p128(swapped)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if !HAVE_NEON_VADDQ_P128
|
||||||
|
static inline poly128_t vaddq_p128(poly128_t a, poly128_t b)
|
||||||
|
{
|
||||||
|
return vreinterpretq_p128_u32(veorq_u32(
|
||||||
|
vreinterpretq_u32_p128(a), vreinterpretq_u32_p128(b)));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Key setup is just like in aesgcm-ref-poly.c. There's no point using
|
* Key setup is just like in aesgcm-ref-poly.c. There's no point using
|
||||||
* vector registers to accelerate this, because it happens rarely.
|
* vector registers to accelerate this, because it happens rarely.
|
||||||
|
Loading…
Reference in New Issue
Block a user