diff --git a/configure.ac b/configure.ac index b20c4fd9..7ae3ab43 100644 --- a/configure.ac +++ b/configure.ac @@ -171,7 +171,7 @@ AC_CHECK_LIB(X11, XOpenDisplay, AC_CHECK_FUNCS([getaddrinfo posix_openpt ptsname setresuid strsignal updwtmpx fstatat dirfd futimes setpwent endpwent getauxval elf_aux_info]) AC_CHECK_DECLS([CLOCK_MONOTONIC], [], [], [[#include ]]) -AC_CHECK_HEADERS([sys/auxv.h asm/hwcap.h sys/types.h glob.h]) +AC_CHECK_HEADERS([sys/auxv.h asm/hwcap.h sys/sysctl.h sys/types.h glob.h]) AC_SEARCH_LIBS([clock_gettime], [rt], [AC_DEFINE([HAVE_CLOCK_GETTIME],[],[Define if clock_gettime() is available])]) AC_CACHE_CHECK([for SO_PEERCRED and dependencies], [x_cv_linux_so_peercred], [ diff --git a/ssh.h b/ssh.h index 32de1cd6..ed3f81c8 100644 --- a/ssh.h +++ b/ssh.h @@ -976,7 +976,11 @@ extern const ssh_hashalg ssh_sha256; extern const ssh_hashalg ssh_sha256_hw; extern const ssh_hashalg ssh_sha256_sw; extern const ssh_hashalg ssh_sha384; +extern const ssh_hashalg ssh_sha384_hw; +extern const ssh_hashalg ssh_sha384_sw; extern const ssh_hashalg ssh_sha512; +extern const ssh_hashalg ssh_sha512_hw; +extern const ssh_hashalg ssh_sha512_sw; extern const ssh_hashalg ssh_sha3_224; extern const ssh_hashalg ssh_sha3_256; extern const ssh_hashalg ssh_sha3_384; @@ -1020,6 +1024,7 @@ extern const ssh_compression_alg ssh_zlib; bool platform_aes_hw_available(void); bool platform_sha256_hw_available(void); bool platform_sha1_hw_available(void); +bool platform_sha512_hw_available(void); /* * PuTTY version number formatted as an SSH version string. diff --git a/sshsh512.c b/sshsh512.c index 238eb2f0..a226d57d 100644 --- a/sshsh512.c +++ b/sshsh512.c @@ -9,6 +9,100 @@ #include #include "ssh.h" +/* + * Start by deciding whether we can support hardware SHA at all. + */ +#define HW_SHA512_NONE 0 +#define HW_SHA512_NEON 1 + +#ifdef _FORCE_SHA512_NEON +# define HW_SHA512 HW_SHA512_NEON +#elif defined __BYTE_ORDER__ && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + /* Arm can potentially support both endiannesses, but this code + * hasn't been tested on anything but little. If anyone wants to + * run big-endian, they'll need to fix it first. */ +#elif defined __ARM_FEATURE_SHA512 + /* If the Arm SHA-512 extension is available already, we can + * support NEON SHA without having to enable anything by hand */ +# define HW_SHA512 HW_SHA512_NEON +#elif defined(__clang__) +# if __has_attribute(target) && __has_include() && \ + (defined(__aarch64__)) + /* clang can enable the crypto extension in AArch64 using + * __attribute__((target)) */ +# define HW_SHA512 HW_SHA512_NEON +# define USE_CLANG_ATTR_TARGET_AARCH64 +# endif +#endif + +#if defined _FORCE_SOFTWARE_SHA || !defined HW_SHA512 +# undef HW_SHA512 +# define HW_SHA512 HW_SHA512_NONE +#endif + +/* + * The actual query function that asks if hardware acceleration is + * available. + */ +static bool sha512_hw_available(void); + +/* + * The top-level selection function, caching the results of + * sha512_hw_available() so it only has to run once. + */ +static bool sha512_hw_available_cached(void) +{ + static bool initialised = false; + static bool hw_available; + if (!initialised) { + hw_available = sha512_hw_available(); + initialised = true; + } + return hw_available; +} + +struct sha512_select_options { + const ssh_hashalg *hw, *sw; +}; + +static ssh_hash *sha512_select(const ssh_hashalg *alg) +{ + const struct sha512_select_options *options = + (const struct sha512_select_options *)alg->extra; + + const ssh_hashalg *real_alg = + sha512_hw_available_cached() ? options->hw : options->sw; + + return ssh_hash_new(real_alg); +} + +const struct sha512_select_options ssh_sha512_select_options = { + &ssh_sha512_hw, &ssh_sha512_sw, +}; +const struct sha512_select_options ssh_sha384_select_options = { + &ssh_sha384_hw, &ssh_sha384_sw, +}; + +const ssh_hashalg ssh_sha512 = { + .new = sha512_select, + .hlen = 64, + .blocklen = 128, + HASHALG_NAMES_ANNOTATED("SHA-512", "dummy selector vtable"), + .extra = &ssh_sha512_select_options, +}; + +const ssh_hashalg ssh_sha384 = { + .new = sha512_select, + .hlen = 48, + .blocklen = 128, + HASHALG_NAMES_ANNOTATED("SHA-384", "dummy selector vtable"), + .extra = &ssh_sha384_select_options, +}; + +/* ---------------------------------------------------------------------- + * Definitions likely to be helpful to multiple implementations. + */ + static const uint64_t sha512_initial_state[] = { 0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, @@ -128,6 +222,10 @@ static inline void sha512_block_pad(sha512_block *blk, BinarySink *bs) assert(blk->used == 0 && "Should have exactly hit a block boundary"); } +/* ---------------------------------------------------------------------- + * Software implementation of SHA-512. + */ + static inline uint64_t ror(uint64_t x, unsigned y) { return (x << (63 & -y)) | (x >> (63 & y)); @@ -275,7 +373,7 @@ static void sha512_sw_digest(ssh_hash *hash, uint8_t *digest) PUT_64BIT_MSB_FIRST(digest + 8*i, s->core[i]); } -const ssh_hashalg ssh_sha512 = { +const ssh_hashalg ssh_sha512_sw = { .new = sha512_sw_new, .reset = sha512_sw_reset, .copyfrom = sha512_sw_copyfrom, @@ -287,7 +385,7 @@ const ssh_hashalg ssh_sha512 = { .extra = sha512_initial_state, }; -const ssh_hashalg ssh_sha384 = { +const ssh_hashalg ssh_sha384_sw = { .new = sha512_sw_new, .reset = sha512_sw_reset, .copyfrom = sha512_sw_copyfrom, @@ -298,3 +396,430 @@ const ssh_hashalg ssh_sha384 = { HASHALG_NAMES_ANNOTATED("SHA-384", "unaccelerated"), .extra = sha384_initial_state, }; + +/* ---------------------------------------------------------------------- + * Hardware-accelerated implementation of SHA-512 using Arm NEON. + */ + +#if HW_SHA512 == HW_SHA512_NEON + +/* + * Manually set the target architecture, if we decided above that we + * need to. + */ +#ifdef USE_CLANG_ATTR_TARGET_AARCH64 +/* + * A spot of cheating: redefine some ACLE feature macros before + * including arm_neon.h. Otherwise we won't get the SHA intrinsics + * defined by that header, because it will be looking at the settings + * for the whole translation unit rather than the ones we're going to + * put on some particular functions using __attribute__((target)). + */ +#define __ARM_NEON 1 +#define __ARM_FEATURE_CRYPTO 1 +#define FUNC_ISA __attribute__ ((target("neon,sha3"))) +#endif /* USE_CLANG_ATTR_TARGET_AARCH64 */ + +#ifndef FUNC_ISA +#define FUNC_ISA +#endif + +#ifdef USE_ARM64_NEON_H +#include +#else +#include +#endif + +static bool sha512_hw_available(void) +{ + /* + * For Arm, we delegate to a per-platform detection function (see + * explanation in sshaes.c). + */ + return platform_sha512_hw_available(); +} + +#if defined __clang__ +/* + * As of 2020-12-24, I've found that clang doesn't provide the SHA-512 + * NEON intrinsics. So I define my own set using inline assembler, and + * use #define to effectively rename them over the top of the standard + * names. + * + * The aim of that #define technique is that it should avoid a build + * failure if these intrinsics _are_ defined in . + * Obviously it would be better in that situation to switch back to + * using the real intrinsics, but until I see a version of clang that + * supports them, I won't know what version number to test in the + * ifdef. + */ +static inline FUNC_ISA +uint64x2_t vsha512su0q_u64_asm(uint64x2_t x, uint64x2_t y) { + __asm__("sha512su0 %0.2D,%1.2D" : "+w" (x) : "w" (y)); + return x; +} +static inline FUNC_ISA +uint64x2_t vsha512su1q_u64_asm(uint64x2_t x, uint64x2_t y, uint64x2_t z) { + __asm__("sha512su1 %0.2D,%1.2D,%2.2D" : "+w" (x) : "w" (y), "w" (z)); + return x; +} +static inline FUNC_ISA +uint64x2_t vsha512hq_u64_asm(uint64x2_t x, uint64x2_t y, uint64x2_t z) { + __asm__("sha512h %0,%1,%2.2D" : "+w" (x) : "w" (y), "w" (z)); + return x; +} +static inline FUNC_ISA +uint64x2_t vsha512h2q_u64_asm(uint64x2_t x, uint64x2_t y, uint64x2_t z) { + __asm__("sha512h2 %0,%1,%2.2D" : "+w" (x) : "w" (y), "w" (z)); + return x; +} +#undef vsha512su0q_u64 +#define vsha512su0q_u64 vsha512su0q_u64_asm +#undef vsha512su1q_u64 +#define vsha512su1q_u64 vsha512su1q_u64_asm +#undef vsha512hq_u64 +#define vsha512hq_u64 vsha512hq_u64_asm +#undef vsha512h2q_u64 +#define vsha512h2q_u64 vsha512h2q_u64_asm +#endif /* defined __clang__ */ + +typedef struct sha512_neon_core sha512_neon_core; +struct sha512_neon_core { + uint64x2_t ab, cd, ef, gh; +}; + +FUNC_ISA +static inline uint64x2_t sha512_neon_load_input(const uint8_t *p) +{ + return vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(p))); +} + +FUNC_ISA +static inline uint64x2_t sha512_neon_schedule_update( + uint64x2_t m8, uint64x2_t m7, uint64x2_t m4, uint64x2_t m3, uint64x2_t m1) +{ + /* + * vsha512su0q_u64() takes words from a long way back in the + * schedule and performs the sigma_0 half of the computation of + * the next two 64-bit message-schedule words. + * + * vsha512su1q_u64() combines the result of that with the sigma_1 + * steps, to output the finished version of those two words. The + * total amount of input data it requires fits nicely into three + * 128-bit vector registers, but one of those registers is + * misaligned compared to the 128-bit chunks that the message + * schedule is stored in. So we use vextq_u64 to make one of its + * input words out of the second half of m4 and the first half of + * m3. + */ + return vsha512su1q_u64(vsha512su0q_u64(m8, m7), m1, vextq_u64(m4, m3, 1)); +} + +FUNC_ISA +static inline void sha512_neon_round2( + unsigned round_index, uint64x2_t schedule_words, + uint64x2_t *ab, uint64x2_t *cd, uint64x2_t *ef, uint64x2_t *gh) +{ + /* + * vsha512hq_u64 performs the Sigma_1 and Ch half of the + * computation of two rounds of SHA-512 (including feeding back + * one of the outputs from the first of those half-rounds into the + * second one). + * + * vsha512h2q_u64 combines the result of that with the Sigma_0 and + * Maj steps, and outputs one 128-bit vector that replaces the gh + * piece of the input hash state, and a second that updates cd by + * addition. + * + * Similarly to vsha512su1q_u64 above, some of the input registers + * expected by these instructions are misaligned by 64 bits + * relative to the chunks we've divided the hash state into, so we + * have to start by making 'de' and 'fg' words out of our input + * cd,ef,gh, using vextq_u64. + * + * Also, one of the inputs to vsha512hq_u64 is expected to contain + * the results of summing gh + two round constants + two words of + * message schedule, but the two words of the message schedule + * have to be the opposite way round in the vector register from + * the way that vsha512su1q_u64 output them. Hence, there's + * another vextq_u64 in here that swaps the two halves of the + * initial_sum vector register. + * + * (This also means that I don't have to prepare a specially + * reordered version of the sha512_round_constants[] array: as + * long as I'm unavoidably doing a swap at run time _anyway_, I + * can load from the normally ordered version of that array, and + * just take care to fold in that data _before_ the swap rather + * than after.) + */ + + /* Load two round constants, with the first one in the low half */ + uint64x2_t round_constants = vld1q_u64( + sha512_round_constants + round_index); + + /* Add schedule words to round constants */ + uint64x2_t initial_sum = vaddq_u64(schedule_words, round_constants); + + /* Swap that sum around so the word used in the first of the two + * rounds is in the _high_ half of the vector, matching where h + * lives in the gh vector */ + uint64x2_t swapped_initial_sum = vextq_u64(initial_sum, initial_sum, 1); + + /* Add gh to that, now that they're matching ways round */ + uint64x2_t sum = vaddq_u64(swapped_initial_sum, *gh); + + /* Make the misaligned de and fg words */ + uint64x2_t de = vextq_u64(*cd, *ef, 1); + uint64x2_t fg = vextq_u64(*ef, *gh, 1); + + /* Now we're ready to put all the pieces together. The output from + * vsha512h2q_u64 can be used directly as the new gh, and the + * output from vsha512hq_u64 is simultaneously the intermediate + * value passed to h2 and the thing you have to add on to cd. */ + uint64x2_t intermed = vsha512hq_u64(sum, fg, de); + *gh = vsha512h2q_u64(intermed, *cd, *ab); + *cd = vaddq_u64(*cd, intermed); +} + +FUNC_ISA +static inline void sha512_neon_block(sha512_neon_core *core, const uint8_t *p) +{ + uint64x2_t s0, s1, s2, s3, s4, s5, s6, s7; + + uint64x2_t ab = core->ab, cd = core->cd, ef = core->ef, gh = core->gh; + + s0 = sha512_neon_load_input(p + 16*0); + sha512_neon_round2(0, s0, &ab, &cd, &ef, &gh); + s1 = sha512_neon_load_input(p + 16*1); + sha512_neon_round2(2, s1, &gh, &ab, &cd, &ef); + s2 = sha512_neon_load_input(p + 16*2); + sha512_neon_round2(4, s2, &ef, &gh, &ab, &cd); + s3 = sha512_neon_load_input(p + 16*3); + sha512_neon_round2(6, s3, &cd, &ef, &gh, &ab); + s4 = sha512_neon_load_input(p + 16*4); + sha512_neon_round2(8, s4, &ab, &cd, &ef, &gh); + s5 = sha512_neon_load_input(p + 16*5); + sha512_neon_round2(10, s5, &gh, &ab, &cd, &ef); + s6 = sha512_neon_load_input(p + 16*6); + sha512_neon_round2(12, s6, &ef, &gh, &ab, &cd); + s7 = sha512_neon_load_input(p + 16*7); + sha512_neon_round2(14, s7, &cd, &ef, &gh, &ab); + s0 = sha512_neon_schedule_update(s0, s1, s4, s5, s7); + sha512_neon_round2(16, s0, &ab, &cd, &ef, &gh); + s1 = sha512_neon_schedule_update(s1, s2, s5, s6, s0); + sha512_neon_round2(18, s1, &gh, &ab, &cd, &ef); + s2 = sha512_neon_schedule_update(s2, s3, s6, s7, s1); + sha512_neon_round2(20, s2, &ef, &gh, &ab, &cd); + s3 = sha512_neon_schedule_update(s3, s4, s7, s0, s2); + sha512_neon_round2(22, s3, &cd, &ef, &gh, &ab); + s4 = sha512_neon_schedule_update(s4, s5, s0, s1, s3); + sha512_neon_round2(24, s4, &ab, &cd, &ef, &gh); + s5 = sha512_neon_schedule_update(s5, s6, s1, s2, s4); + sha512_neon_round2(26, s5, &gh, &ab, &cd, &ef); + s6 = sha512_neon_schedule_update(s6, s7, s2, s3, s5); + sha512_neon_round2(28, s6, &ef, &gh, &ab, &cd); + s7 = sha512_neon_schedule_update(s7, s0, s3, s4, s6); + sha512_neon_round2(30, s7, &cd, &ef, &gh, &ab); + s0 = sha512_neon_schedule_update(s0, s1, s4, s5, s7); + sha512_neon_round2(32, s0, &ab, &cd, &ef, &gh); + s1 = sha512_neon_schedule_update(s1, s2, s5, s6, s0); + sha512_neon_round2(34, s1, &gh, &ab, &cd, &ef); + s2 = sha512_neon_schedule_update(s2, s3, s6, s7, s1); + sha512_neon_round2(36, s2, &ef, &gh, &ab, &cd); + s3 = sha512_neon_schedule_update(s3, s4, s7, s0, s2); + sha512_neon_round2(38, s3, &cd, &ef, &gh, &ab); + s4 = sha512_neon_schedule_update(s4, s5, s0, s1, s3); + sha512_neon_round2(40, s4, &ab, &cd, &ef, &gh); + s5 = sha512_neon_schedule_update(s5, s6, s1, s2, s4); + sha512_neon_round2(42, s5, &gh, &ab, &cd, &ef); + s6 = sha512_neon_schedule_update(s6, s7, s2, s3, s5); + sha512_neon_round2(44, s6, &ef, &gh, &ab, &cd); + s7 = sha512_neon_schedule_update(s7, s0, s3, s4, s6); + sha512_neon_round2(46, s7, &cd, &ef, &gh, &ab); + s0 = sha512_neon_schedule_update(s0, s1, s4, s5, s7); + sha512_neon_round2(48, s0, &ab, &cd, &ef, &gh); + s1 = sha512_neon_schedule_update(s1, s2, s5, s6, s0); + sha512_neon_round2(50, s1, &gh, &ab, &cd, &ef); + s2 = sha512_neon_schedule_update(s2, s3, s6, s7, s1); + sha512_neon_round2(52, s2, &ef, &gh, &ab, &cd); + s3 = sha512_neon_schedule_update(s3, s4, s7, s0, s2); + sha512_neon_round2(54, s3, &cd, &ef, &gh, &ab); + s4 = sha512_neon_schedule_update(s4, s5, s0, s1, s3); + sha512_neon_round2(56, s4, &ab, &cd, &ef, &gh); + s5 = sha512_neon_schedule_update(s5, s6, s1, s2, s4); + sha512_neon_round2(58, s5, &gh, &ab, &cd, &ef); + s6 = sha512_neon_schedule_update(s6, s7, s2, s3, s5); + sha512_neon_round2(60, s6, &ef, &gh, &ab, &cd); + s7 = sha512_neon_schedule_update(s7, s0, s3, s4, s6); + sha512_neon_round2(62, s7, &cd, &ef, &gh, &ab); + s0 = sha512_neon_schedule_update(s0, s1, s4, s5, s7); + sha512_neon_round2(64, s0, &ab, &cd, &ef, &gh); + s1 = sha512_neon_schedule_update(s1, s2, s5, s6, s0); + sha512_neon_round2(66, s1, &gh, &ab, &cd, &ef); + s2 = sha512_neon_schedule_update(s2, s3, s6, s7, s1); + sha512_neon_round2(68, s2, &ef, &gh, &ab, &cd); + s3 = sha512_neon_schedule_update(s3, s4, s7, s0, s2); + sha512_neon_round2(70, s3, &cd, &ef, &gh, &ab); + s4 = sha512_neon_schedule_update(s4, s5, s0, s1, s3); + sha512_neon_round2(72, s4, &ab, &cd, &ef, &gh); + s5 = sha512_neon_schedule_update(s5, s6, s1, s2, s4); + sha512_neon_round2(74, s5, &gh, &ab, &cd, &ef); + s6 = sha512_neon_schedule_update(s6, s7, s2, s3, s5); + sha512_neon_round2(76, s6, &ef, &gh, &ab, &cd); + s7 = sha512_neon_schedule_update(s7, s0, s3, s4, s6); + sha512_neon_round2(78, s7, &cd, &ef, &gh, &ab); + + core->ab = vaddq_u64(core->ab, ab); + core->cd = vaddq_u64(core->cd, cd); + core->ef = vaddq_u64(core->ef, ef); + core->gh = vaddq_u64(core->gh, gh); +} + +typedef struct sha512_neon { + sha512_neon_core core; + sha512_block blk; + BinarySink_IMPLEMENTATION; + ssh_hash hash; +} sha512_neon; + +static void sha512_neon_write(BinarySink *bs, const void *vp, size_t len); + +static ssh_hash *sha512_neon_new(const ssh_hashalg *alg) +{ + if (!sha512_hw_available_cached()) + return NULL; + + sha512_neon *s = snew(sha512_neon); + + s->hash.vt = alg; + BinarySink_INIT(s, sha512_neon_write); + BinarySink_DELEGATE_INIT(&s->hash, s); + return &s->hash; +} + +static void sha512_neon_reset(ssh_hash *hash) +{ + sha512_neon *s = container_of(hash, sha512_neon, hash); + const uint64_t *iv = (const uint64_t *)hash->vt->extra; + + s->core.ab = vld1q_u64(iv); + s->core.cd = vld1q_u64(iv+2); + s->core.ef = vld1q_u64(iv+4); + s->core.gh = vld1q_u64(iv+6); + + sha512_block_setup(&s->blk); +} + +static void sha512_neon_copyfrom(ssh_hash *hcopy, ssh_hash *horig) +{ + sha512_neon *copy = container_of(hcopy, sha512_neon, hash); + sha512_neon *orig = container_of(horig, sha512_neon, hash); + + *copy = *orig; /* structure copy */ + + BinarySink_COPIED(copy); + BinarySink_DELEGATE_INIT(©->hash, copy); +} + +static void sha512_neon_free(ssh_hash *hash) +{ + sha512_neon *s = container_of(hash, sha512_neon, hash); + smemclr(s, sizeof(*s)); + sfree(s); +} + +static void sha512_neon_write(BinarySink *bs, const void *vp, size_t len) +{ + sha512_neon *s = BinarySink_DOWNCAST(bs, sha512_neon); + + while (len > 0) + if (sha512_block_write(&s->blk, &vp, &len)) + sha512_neon_block(&s->core, s->blk.block); +} + +static void sha512_neon_digest(ssh_hash *hash, uint8_t *digest) +{ + sha512_neon *s = container_of(hash, sha512_neon, hash); + + sha512_block_pad(&s->blk, BinarySink_UPCAST(s)); + + vst1q_u8(digest, vrev64q_u8(vreinterpretq_u8_u64(s->core.ab))); + vst1q_u8(digest+16, vrev64q_u8(vreinterpretq_u8_u64(s->core.cd))); + vst1q_u8(digest+32, vrev64q_u8(vreinterpretq_u8_u64(s->core.ef))); + vst1q_u8(digest+48, vrev64q_u8(vreinterpretq_u8_u64(s->core.gh))); +} + +const ssh_hashalg ssh_sha512_hw = { + .new = sha512_neon_new, + .reset = sha512_neon_reset, + .copyfrom = sha512_neon_copyfrom, + .digest = sha512_neon_digest, + .free = sha512_neon_free, + .hlen = 64, + .blocklen = 128, + HASHALG_NAMES_ANNOTATED("SHA-512", "NEON accelerated"), + .extra = sha512_initial_state, +}; + +const ssh_hashalg ssh_sha384_hw = { + .new = sha512_neon_new, + .reset = sha512_neon_reset, + .copyfrom = sha512_neon_copyfrom, + .digest = sha512_neon_digest, + .free = sha512_neon_free, + .hlen = 48, + .blocklen = 128, + HASHALG_NAMES_ANNOTATED("SHA-384", "NEON accelerated"), + .extra = sha384_initial_state, +}; + +/* ---------------------------------------------------------------------- + * Stub functions if we have no hardware-accelerated SHA-512. In this + * case, sha512_hw_new returns NULL (though it should also never be + * selected by sha512_select, so the only thing that should even be + * _able_ to call it is testcrypt). As a result, the remaining vtable + * functions should never be called at all. + */ + +#elif HW_SHA512 == HW_SHA512_NONE + +static bool sha512_hw_available(void) +{ + return false; +} + +static ssh_hash *sha512_stub_new(const ssh_hashalg *alg) +{ + return NULL; +} + +#define STUB_BODY { unreachable("Should never be called"); } + +static void sha512_stub_reset(ssh_hash *hash) STUB_BODY +static void sha512_stub_copyfrom(ssh_hash *hash, ssh_hash *orig) STUB_BODY +static void sha512_stub_free(ssh_hash *hash) STUB_BODY +static void sha512_stub_digest(ssh_hash *hash, uint8_t *digest) STUB_BODY + +const ssh_hashalg ssh_sha512_hw = { + .new = sha512_stub_new, + .reset = sha512_stub_reset, + .copyfrom = sha512_stub_copyfrom, + .digest = sha512_stub_digest, + .free = sha512_stub_free, + .hlen = 64, + .blocklen = 128, + HASHALG_NAMES_ANNOTATED("SHA-512", "!NONEXISTENT ACCELERATED VERSION!"), +}; + +const ssh_hashalg ssh_sha384_hw = { + .new = sha512_stub_new, + .reset = sha512_stub_reset, + .copyfrom = sha512_stub_copyfrom, + .digest = sha512_stub_digest, + .free = sha512_stub_free, + .hlen = 48, + .blocklen = 128, + HASHALG_NAMES_ANNOTATED("SHA-384", "!NONEXISTENT ACCELERATED VERSION!"), +}; + +#endif /* HW_SHA512 */ diff --git a/test/cryptsuite.py b/test/cryptsuite.py index 5a4ae07e..78725a34 100755 --- a/test/cryptsuite.py +++ b/test/cryptsuite.py @@ -2289,80 +2289,95 @@ class standard_test_vectors(MyTestBase): "8ad3361763f7e9b2d95f4f0da6e1ccbc")) def testSHA384(self): - # Test cases from RFC 6234 section 8.5, omitting the ones - # whose input is not a multiple of 8 bits - self.assertEqualBin(hash_str('sha384', "abc"), unhex( - 'cb00753f45a35e8bb5a03d699ac65007272c32ab0eded163' - '1a8b605a43ff5bed8086072ba1e7cc2358baeca134c825a7')) - self.assertEqualBin(hash_str('sha384', - "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmn" - "hijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu"), unhex( - '09330c33f71147e83d192fc782cd1b4753111b173b3b05d2' - '2fa08086e3b0f712fcc7c71a557e2db966c3e9fa91746039')) - self.assertEqualBin(hash_str_iter('sha384', - ("a" * 1000 for _ in range(1000))), unhex( - '9d0e1809716474cb086e834e310a4a1ced149e9c00f24852' - '7972cec5704c2a5b07b8b3dc38ecc4ebae97ddd87f3d8985')) - self.assertEqualBin(hash_str('sha384', - "01234567012345670123456701234567" * 20), unhex( - '2fc64a4f500ddb6828f6a3430b8dd72a368eb7f3a8322a70' - 'bc84275b9c0b3ab00d27a5cc3c2d224aa6b61a0d79fb4596')) - self.assertEqualBin(hash_str('sha384', b"\xB9"), unhex( - 'bc8089a19007c0b14195f4ecc74094fec64f01f90929282c' - '2fb392881578208ad466828b1c6c283d2722cf0ad1ab6938')) - self.assertEqualBin(hash_str('sha384', - unhex("a41c497779c0375ff10a7f4e08591739")), unhex( - 'c9a68443a005812256b8ec76b00516f0dbb74fab26d66591' - '3f194b6ffb0e91ea9967566b58109cbc675cc208e4c823f7')) - self.assertEqualBin(hash_str('sha384', unhex( - "399669e28f6b9c6dbcbb6912ec10ffcf74790349b7dc8fbe4a8e7b3b5621db0f" - "3e7dc87f823264bbe40d1811c9ea2061e1c84ad10a23fac1727e7202fc3f5042" - "e6bf58cba8a2746e1f64f9b9ea352c711507053cf4e5339d52865f25cc22b5e8" - "7784a12fc961d66cb6e89573199a2ce6565cbdf13dca403832cfcb0e8b7211e8" - "3af32a11ac17929ff1c073a51cc027aaedeff85aad7c2b7c5a803e2404d96d2a" - "77357bda1a6daeed17151cb9bc5125a422e941de0ca0fc5011c23ecffefdd096" - "76711cf3db0a3440720e1615c1f22fbc3c721de521e1b99ba1bd557740864214" - "7ed096")), unhex( - '4f440db1e6edd2899fa335f09515aa025ee177a79f4b4aaf' - '38e42b5c4de660f5de8fb2a5b2fbd2a3cbffd20cff1288c0')) + for hashname in ['sha384_sw', 'sha384_hw']: + if ssh_hash_new(hashname) is None: + continue # skip testing of unavailable HW implementation + + # Test cases from RFC 6234 section 8.5, omitting the ones + # whose input is not a multiple of 8 bits + self.assertEqualBin(hash_str('sha384', "abc"), unhex( + 'cb00753f45a35e8bb5a03d699ac65007272c32ab0eded163' + '1a8b605a43ff5bed8086072ba1e7cc2358baeca134c825a7')) + self.assertEqualBin(hash_str('sha384', + "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmn" + "hijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu"), + unhex('09330c33f71147e83d192fc782cd1b4753111b173b3b05d2' + '2fa08086e3b0f712fcc7c71a557e2db966c3e9fa91746039')) + self.assertEqualBin(hash_str_iter('sha384', + ("a" * 1000 for _ in range(1000))), unhex( + '9d0e1809716474cb086e834e310a4a1ced149e9c00f24852' + '7972cec5704c2a5b07b8b3dc38ecc4ebae97ddd87f3d8985')) + self.assertEqualBin(hash_str('sha384', + "01234567012345670123456701234567" * 20), unhex( + '2fc64a4f500ddb6828f6a3430b8dd72a368eb7f3a8322a70' + 'bc84275b9c0b3ab00d27a5cc3c2d224aa6b61a0d79fb4596')) + self.assertEqualBin(hash_str('sha384', b"\xB9"), unhex( + 'bc8089a19007c0b14195f4ecc74094fec64f01f90929282c' + '2fb392881578208ad466828b1c6c283d2722cf0ad1ab6938')) + self.assertEqualBin(hash_str('sha384', + unhex("a41c497779c0375ff10a7f4e08591739")), unhex( + 'c9a68443a005812256b8ec76b00516f0dbb74fab26d66591' + '3f194b6ffb0e91ea9967566b58109cbc675cc208e4c823f7')) + self.assertEqualBin(hash_str('sha384', unhex( + "399669e28f6b9c6dbcbb6912ec10ffcf74790349b7dc8fbe4a8e7b3b5621" + "db0f3e7dc87f823264bbe40d1811c9ea2061e1c84ad10a23fac1727e7202" + "fc3f5042e6bf58cba8a2746e1f64f9b9ea352c711507053cf4e5339d5286" + "5f25cc22b5e87784a12fc961d66cb6e89573199a2ce6565cbdf13dca4038" + "32cfcb0e8b7211e83af32a11ac17929ff1c073a51cc027aaedeff85aad7c" + "2b7c5a803e2404d96d2a77357bda1a6daeed17151cb9bc5125a422e941de" + "0ca0fc5011c23ecffefdd09676711cf3db0a3440720e1615c1f22fbc3c72" + "1de521e1b99ba1bd5577408642147ed096")), unhex( + '4f440db1e6edd2899fa335f09515aa025ee177a79f4b4aaf' + '38e42b5c4de660f5de8fb2a5b2fbd2a3cbffd20cff1288c0')) def testSHA512(self): - # Test cases from RFC 6234 section 8.5, omitting the ones - # whose input is not a multiple of 8 bits - self.assertEqualBin(hash_str('sha512', "abc"), unhex( - 'ddaf35a193617abacc417349ae20413112e6fa4e89a97ea20a9eeee64b55d39a' - '2192992a274fc1a836ba3c23a3feebbd454d4423643ce80e2a9ac94fa54ca49f')) - self.assertEqualBin(hash_str('sha512', - "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmn" - "hijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu"), unhex( - '8e959b75dae313da8cf4f72814fc143f8f7779c6eb9f7fa17299aeadb6889018' - '501d289e4900f7e4331b99dec4b5433ac7d329eeb6dd26545e96e55b874be909')) - self.assertEqualBin(hash_str_iter('sha512', - ("a" * 1000 for _ in range(1000))), unhex( - 'e718483d0ce769644e2e42c7bc15b4638e1f98b13b2044285632a803afa973eb' - 'de0ff244877ea60a4cb0432ce577c31beb009c5c2c49aa2e4eadb217ad8cc09b')) - self.assertEqualBin(hash_str('sha512', - "01234567012345670123456701234567" * 20), unhex( - '89d05ba632c699c31231ded4ffc127d5a894dad412c0e024db872d1abd2ba814' - '1a0f85072a9be1e2aa04cf33c765cb510813a39cd5a84c4acaa64d3f3fb7bae9')) - self.assertEqualBin(hash_str('sha512', b"\xD0"), unhex( - '9992202938e882e73e20f6b69e68a0a7149090423d93c81bab3f21678d4aceee' - 'e50e4e8cafada4c85a54ea8306826c4ad6e74cece9631bfa8a549b4ab3fbba15')) - self.assertEqualBin(hash_str('sha512', - unhex("8d4e3c0e3889191491816e9d98bff0a0")), unhex( - 'cb0b67a4b8712cd73c9aabc0b199e9269b20844afb75acbdd1c153c9828924c3' - 'ddedaafe669c5fdd0bc66f630f6773988213eb1b16f517ad0de4b2f0c95c90f8')) - self.assertEqualBin(hash_str('sha512', unhex( - "a55f20c411aad132807a502d65824e31a2305432aa3d06d3e282a8d84e0de1de" - "6974bf495469fc7f338f8054d58c26c49360c3e87af56523acf6d89d03e56ff2" - "f868002bc3e431edc44df2f0223d4bb3b243586e1a7d924936694fcbbaf88d95" - "19e4eb50a644f8e4f95eb0ea95bc4465c8821aacd2fe15ab4981164bbb6dc32f" - "969087a145b0d9cc9c67c22b763299419cc4128be9a077b3ace634064e6d9928" - "3513dc06e7515d0d73132e9a0dc6d3b1f8b246f1a98a3fc72941b1e3bb2098e8" - "bf16f268d64f0b0f4707fe1ea1a1791ba2f3c0c758e5f551863a96c949ad47d7" - "fb40d2")), unhex( - 'c665befb36da189d78822d10528cbf3b12b3eef726039909c1a16a270d487193' - '77966b957a878e720584779a62825c18da26415e49a7176a894e7510fd1451f5')) + for hashname in ['sha512_sw', 'sha512_hw']: + if ssh_hash_new(hashname) is None: + continue # skip testing of unavailable HW implementation + + # Test cases from RFC 6234 section 8.5, omitting the ones + # whose input is not a multiple of 8 bits + self.assertEqualBin(hash_str('sha512', "abc"), unhex( + 'ddaf35a193617abacc417349ae20413112e6fa4e89a97ea20a9eeee64b55' + 'd39a2192992a274fc1a836ba3c23a3feebbd454d4423643ce80e2a9ac94f' + 'a54ca49f')) + self.assertEqualBin(hash_str('sha512', + "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmn" + "hijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu"), + unhex('8e959b75dae313da8cf4f72814fc143f8f7779c6eb9f7fa17299' + 'aeadb6889018501d289e4900f7e4331b99dec4b5433ac7d329eeb6dd26' + '545e96e55b874be909')) + self.assertEqualBin(hash_str_iter('sha512', + ("a" * 1000 for _ in range(1000))), unhex( + 'e718483d0ce769644e2e42c7bc15b4638e1f98b13b2044285632a803afa9' + '73ebde0ff244877ea60a4cb0432ce577c31beb009c5c2c49aa2e4eadb217' + 'ad8cc09b')) + self.assertEqualBin(hash_str('sha512', + "01234567012345670123456701234567" * 20), unhex( + '89d05ba632c699c31231ded4ffc127d5a894dad412c0e024db872d1abd2b' + 'a8141a0f85072a9be1e2aa04cf33c765cb510813a39cd5a84c4acaa64d3f' + '3fb7bae9')) + self.assertEqualBin(hash_str('sha512', b"\xD0"), unhex( + '9992202938e882e73e20f6b69e68a0a7149090423d93c81bab3f21678d4a' + 'ceeee50e4e8cafada4c85a54ea8306826c4ad6e74cece9631bfa8a549b4a' + 'b3fbba15')) + self.assertEqualBin(hash_str('sha512', + unhex("8d4e3c0e3889191491816e9d98bff0a0")), unhex( + 'cb0b67a4b8712cd73c9aabc0b199e9269b20844afb75acbdd1c153c98289' + '24c3ddedaafe669c5fdd0bc66f630f6773988213eb1b16f517ad0de4b2f0' + 'c95c90f8')) + self.assertEqualBin(hash_str('sha512', unhex( + "a55f20c411aad132807a502d65824e31a2305432aa3d06d3e282a8d84e0d" + "e1de6974bf495469fc7f338f8054d58c26c49360c3e87af56523acf6d89d" + "03e56ff2f868002bc3e431edc44df2f0223d4bb3b243586e1a7d92493669" + "4fcbbaf88d9519e4eb50a644f8e4f95eb0ea95bc4465c8821aacd2fe15ab" + "4981164bbb6dc32f969087a145b0d9cc9c67c22b763299419cc4128be9a0" + "77b3ace634064e6d99283513dc06e7515d0d73132e9a0dc6d3b1f8b246f1" + "a98a3fc72941b1e3bb2098e8bf16f268d64f0b0f4707fe1ea1a1791ba2f3" + "c0c758e5f551863a96c949ad47d7fb40d2")), unhex( + 'c665befb36da189d78822d10528cbf3b12b3eef726039909c1a16a270d48' + '719377966b957a878e720584779a62825c18da26415e49a7176a894e7510' + 'fd1451f5')) def testSHA3(self): # Source: all the SHA-3 test strings from diff --git a/testcrypt.c b/testcrypt.c index 4737442d..8eb1134b 100644 --- a/testcrypt.c +++ b/testcrypt.c @@ -215,7 +215,11 @@ static const ssh_hashalg *get_hashalg(BinarySource *in) {"sha256_sw", &ssh_sha256_sw}, {"sha256_hw", &ssh_sha256_hw}, {"sha384", &ssh_sha384}, + {"sha384_sw", &ssh_sha384_sw}, + {"sha384_hw", &ssh_sha384_hw}, {"sha512", &ssh_sha512}, + {"sha512_sw", &ssh_sha512_sw}, + {"sha512_hw", &ssh_sha512_hw}, {"sha3_224", &ssh_sha3_224}, {"sha3_256", &ssh_sha3_256}, {"sha3_384", &ssh_sha3_384}, diff --git a/unix/uxutils.c b/unix/uxutils.c index a1804479..3a04c1be 100644 --- a/unix/uxutils.c +++ b/unix/uxutils.c @@ -49,4 +49,17 @@ bool platform_sha1_hw_available(void) #endif } +bool platform_sha512_hw_available(void) +{ +#if defined HWCAP_SHA512 + return getauxval(AT_HWCAP) & HWCAP_SHA512; +#elif defined HWCAP2_SHA512 + return getauxval(AT_HWCAP2) & HWCAP2_SHA512; +#elif defined __APPLE__ + return test_sysctl_flag("hw.optional.armv8_2_sha512"); +#else + return false; +#endif +} + #endif /* defined __arm__ || defined __aarch64__ */ diff --git a/unix/uxutils.h b/unix/uxutils.h index 4e8dc808..05e0b9e2 100644 --- a/unix/uxutils.h +++ b/unix/uxutils.h @@ -24,6 +24,10 @@ #include #endif +#ifdef HAVE_SYS_SYSCTL_H +#include +#endif + #if defined HAVE_GETAUXVAL /* No code needed: getauxval has just the API we want already */ #elif defined HAVE_ELF_AUX_INFO @@ -42,4 +46,14 @@ static inline u_long getauxval(int which) { return 0; } #endif /* defined __arm__ || defined __aarch64__ */ +#if defined __APPLE__ +static inline bool test_sysctl_flag(const char *flagname) +{ + int value; + size_t size = sizeof(value); + return (sysctlbyname(flagname, &value, &size, NULL, 0) == 0 && + size == sizeof(value) && value != 0); +} +#endif /* defined __APPLE__ */ + #endif /* PUTTY_UXUTILS_H */ diff --git a/windows/winmiscs.c b/windows/winmiscs.c index 73e4f868..571a9122 100644 --- a/windows/winmiscs.c +++ b/windows/winmiscs.c @@ -266,6 +266,14 @@ bool platform_sha1_hw_available(void) return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE); } +bool platform_sha512_hw_available(void) +{ + /* As of 2020-12-24, as far as I can tell from docs.microsoft.com, + * Windows on Arm does not yet provide a PF_ARM_V8_* flag for the + * SHA-512 architecture extension. */ + return false; +} + #endif bool is_console_handle(HANDLE handle)