1
0
mirror of https://git.tartarus.org/simon/putty.git synced 2025-01-25 01:02:24 +00:00

Tidy up arithmetic in the SHA-512 implementation.

It was written in an awkward roundabout way involving all the
arithmetic being done in horrible macros looking like assembler
instructions, and lots of explicit temp variables. That's because,
when I originally wrote it, I needed it to compile on platforms
without a 64-bit integer type.

In commit a647f2ba11 I switched it over to using uint64_t, but I did
it in a way that made minimal change to the code structure, by
rewriting the insides of those macros to contain ordinary uint64_t
arithmetic instead of faffing about with 32-bit halves. So it worked,
but it still looked disgusting.

Now I've reworked it so that individual arithmetic operations are
written directly in the sensible way, and the more complicated
SHA-specific operations are written as inline functions instead of
macros.
This commit is contained in:
Simon Tatham 2020-12-24 10:52:48 +00:00
parent 092c51afed
commit 43cdc3d910

View File

@ -19,49 +19,71 @@ typedef struct {
BinarySink_IMPLEMENTATION; BinarySink_IMPLEMENTATION;
} SHA512_State; } SHA512_State;
/*
* Arithmetic implementations. Note that AND, XOR and NOT can
* overlap destination with one source, but the others can't.
*/
#define add(r,x,y) ( r = (x) + (y) )
#define rorB(r,x,y) ( r = ((x) >> (y)) | ((x) << (64-(y))) )
#define rorL(r,x,y) ( r = ((x) >> (y)) | ((x) << (64-(y))) )
#define shrB(r,x,y) ( r = (x) >> (y) )
#define shrL(r,x,y) ( r = (x) >> (y) )
#define and(r,x,y) ( r = (x) & (y) )
#define xor(r,x,y) ( r = (x) ^ (y) )
#define not(r,x) ( r = ~(x) )
#define INIT(h,l) ((((uint64_t)(h)) << 32) | (l))
#define BUILD(r,h,l) ( r = ((((uint64_t)(h)) << 32) | (l)) )
#define EXTRACT(h,l,r) ( h = (r) >> 32, l = (r) & 0xFFFFFFFFU )
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
* Core SHA512 algorithm: processes 16-doubleword blocks into a * Core SHA512 algorithm: processes 16-doubleword blocks into a
* message digest. * message digest.
*/ */
#define Ch(r,t,x,y,z) ( not(t,x), and(r,t,z), and(t,x,y), xor(r,r,t) ) static inline uint64_t ror(uint64_t x, unsigned y)
#define Maj(r,t,x,y,z) ( and(r,x,y), and(t,x,z), xor(r,r,t), \ {
and(t,y,z), xor(r,r,t) ) return (x << (63 & -y)) | (x >> (63 & y));
#define bigsigma0(r,t,x) ( rorL(r,x,28), rorB(t,x,34), xor(r,r,t), \ }
rorB(t,x,39), xor(r,r,t) )
#define bigsigma1(r,t,x) ( rorL(r,x,14), rorL(t,x,18), xor(r,r,t), \ static inline uint64_t Ch(uint64_t ctrl, uint64_t if1, uint64_t if0)
rorB(t,x,41), xor(r,r,t) ) {
#define smallsigma0(r,t,x) ( rorL(r,x,1), rorL(t,x,8), xor(r,r,t), \ return if0 ^ (ctrl & (if1 ^ if0));
shrL(t,x,7), xor(r,r,t) ) }
#define smallsigma1(r,t,x) ( rorL(r,x,19), rorB(t,x,61), xor(r,r,t), \
shrL(t,x,6), xor(r,r,t) ) static inline uint64_t Maj(uint64_t x, uint64_t y, uint64_t z)
{
return (x & y) | (z & (x | y));
}
static inline uint64_t Sigma_0(uint64_t x)
{
return ror(x,28) ^ ror(x,34) ^ ror(x,39);
}
static inline uint64_t Sigma_1(uint64_t x)
{
return ror(x,14) ^ ror(x,18) ^ ror(x,41);
}
static inline uint64_t sigma_0(uint64_t x)
{
return ror(x,1) ^ ror(x,8) ^ (x >> 7);
}
static inline uint64_t sigma_1(uint64_t x)
{
return ror(x,19) ^ ror(x,61) ^ (x >> 6);
}
static inline void SHA512_Round(
unsigned round_index, const uint64_t *round_constants,
const uint64_t *schedule,
uint64_t *a, uint64_t *b, uint64_t *c, uint64_t *d,
uint64_t *e, uint64_t *f, uint64_t *g, uint64_t *h)
{
uint64_t t1 = *h + Sigma_1(*e) + Ch(*e,*f,*g) +
round_constants[round_index] + schedule[round_index];
uint64_t t2 = Sigma_0(*a) + Maj(*a,*b,*c);
*d += t1;
*h = t1 + t2;
}
static void SHA512_Core_Init(SHA512_State *s) { static void SHA512_Core_Init(SHA512_State *s) {
static const uint64_t iv[] = { static const uint64_t iv[] = {
INIT(0x6a09e667, 0xf3bcc908), 0x6a09e667f3bcc908ULL,
INIT(0xbb67ae85, 0x84caa73b), 0xbb67ae8584caa73bULL,
INIT(0x3c6ef372, 0xfe94f82b), 0x3c6ef372fe94f82bULL,
INIT(0xa54ff53a, 0x5f1d36f1), 0xa54ff53a5f1d36f1ULL,
INIT(0x510e527f, 0xade682d1), 0x510e527fade682d1ULL,
INIT(0x9b05688c, 0x2b3e6c1f), 0x9b05688c2b3e6c1fULL,
INIT(0x1f83d9ab, 0xfb41bd6b), 0x1f83d9abfb41bd6bULL,
INIT(0x5be0cd19, 0x137e2179), 0x5be0cd19137e2179ULL,
}; };
int i; int i;
for (i = 0; i < 8; i++) for (i = 0; i < 8; i++)
@ -70,14 +92,14 @@ static void SHA512_Core_Init(SHA512_State *s) {
static void SHA384_Core_Init(SHA512_State *s) { static void SHA384_Core_Init(SHA512_State *s) {
static const uint64_t iv[] = { static const uint64_t iv[] = {
INIT(0xcbbb9d5d, 0xc1059ed8), 0xcbbb9d5dc1059ed8ULL,
INIT(0x629a292a, 0x367cd507), 0x629a292a367cd507ULL,
INIT(0x9159015a, 0x3070dd17), 0x9159015a3070dd17ULL,
INIT(0x152fecd8, 0xf70e5939), 0x152fecd8f70e5939ULL,
INIT(0x67332667, 0xffc00b31), 0x67332667ffc00b31ULL,
INIT(0x8eb44a87, 0x68581511), 0x8eb44a8768581511ULL,
INIT(0xdb0c2e0d, 0x64f98fa7), 0xdb0c2e0d64f98fa7ULL,
INIT(0x47b5481d, 0xbefa4fa4), 0x47b5481dbefa4fa4ULL,
}; };
int i; int i;
for (i = 0; i < 8; i++) for (i = 0; i < 8; i++)
@ -88,46 +110,46 @@ static void SHA512_Block(SHA512_State *s, uint64_t *block) {
uint64_t w[80]; uint64_t w[80];
uint64_t a,b,c,d,e,f,g,h; uint64_t a,b,c,d,e,f,g,h;
static const uint64_t k[] = { static const uint64_t k[] = {
INIT(0x428a2f98, 0xd728ae22), INIT(0x71374491, 0x23ef65cd), 0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL,
INIT(0xb5c0fbcf, 0xec4d3b2f), INIT(0xe9b5dba5, 0x8189dbbc), 0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL,
INIT(0x3956c25b, 0xf348b538), INIT(0x59f111f1, 0xb605d019), 0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL,
INIT(0x923f82a4, 0xaf194f9b), INIT(0xab1c5ed5, 0xda6d8118), 0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL,
INIT(0xd807aa98, 0xa3030242), INIT(0x12835b01, 0x45706fbe), 0xd807aa98a3030242ULL, 0x12835b0145706fbeULL,
INIT(0x243185be, 0x4ee4b28c), INIT(0x550c7dc3, 0xd5ffb4e2), 0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL,
INIT(0x72be5d74, 0xf27b896f), INIT(0x80deb1fe, 0x3b1696b1), 0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL,
INIT(0x9bdc06a7, 0x25c71235), INIT(0xc19bf174, 0xcf692694), 0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL,
INIT(0xe49b69c1, 0x9ef14ad2), INIT(0xefbe4786, 0x384f25e3), 0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL,
INIT(0x0fc19dc6, 0x8b8cd5b5), INIT(0x240ca1cc, 0x77ac9c65), 0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL,
INIT(0x2de92c6f, 0x592b0275), INIT(0x4a7484aa, 0x6ea6e483), 0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL,
INIT(0x5cb0a9dc, 0xbd41fbd4), INIT(0x76f988da, 0x831153b5), 0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL,
INIT(0x983e5152, 0xee66dfab), INIT(0xa831c66d, 0x2db43210), 0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL,
INIT(0xb00327c8, 0x98fb213f), INIT(0xbf597fc7, 0xbeef0ee4), 0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL,
INIT(0xc6e00bf3, 0x3da88fc2), INIT(0xd5a79147, 0x930aa725), 0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL,
INIT(0x06ca6351, 0xe003826f), INIT(0x14292967, 0x0a0e6e70), 0x06ca6351e003826fULL, 0x142929670a0e6e70ULL,
INIT(0x27b70a85, 0x46d22ffc), INIT(0x2e1b2138, 0x5c26c926), 0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL,
INIT(0x4d2c6dfc, 0x5ac42aed), INIT(0x53380d13, 0x9d95b3df), 0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL,
INIT(0x650a7354, 0x8baf63de), INIT(0x766a0abb, 0x3c77b2a8), 0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL,
INIT(0x81c2c92e, 0x47edaee6), INIT(0x92722c85, 0x1482353b), 0x81c2c92e47edaee6ULL, 0x92722c851482353bULL,
INIT(0xa2bfe8a1, 0x4cf10364), INIT(0xa81a664b, 0xbc423001), 0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL,
INIT(0xc24b8b70, 0xd0f89791), INIT(0xc76c51a3, 0x0654be30), 0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL,
INIT(0xd192e819, 0xd6ef5218), INIT(0xd6990624, 0x5565a910), 0xd192e819d6ef5218ULL, 0xd69906245565a910ULL,
INIT(0xf40e3585, 0x5771202a), INIT(0x106aa070, 0x32bbd1b8), 0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL,
INIT(0x19a4c116, 0xb8d2d0c8), INIT(0x1e376c08, 0x5141ab53), 0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL,
INIT(0x2748774c, 0xdf8eeb99), INIT(0x34b0bcb5, 0xe19b48a8), 0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL,
INIT(0x391c0cb3, 0xc5c95a63), INIT(0x4ed8aa4a, 0xe3418acb), 0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL,
INIT(0x5b9cca4f, 0x7763e373), INIT(0x682e6ff3, 0xd6b2b8a3), 0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL,
INIT(0x748f82ee, 0x5defb2fc), INIT(0x78a5636f, 0x43172f60), 0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL,
INIT(0x84c87814, 0xa1f0ab72), INIT(0x8cc70208, 0x1a6439ec), 0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL,
INIT(0x90befffa, 0x23631e28), INIT(0xa4506ceb, 0xde82bde9), 0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL,
INIT(0xbef9a3f7, 0xb2c67915), INIT(0xc67178f2, 0xe372532b), 0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL,
INIT(0xca273ece, 0xea26619c), INIT(0xd186b8c7, 0x21c0c207), 0xca273eceea26619cULL, 0xd186b8c721c0c207ULL,
INIT(0xeada7dd6, 0xcde0eb1e), INIT(0xf57d4f7f, 0xee6ed178), 0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL,
INIT(0x06f067aa, 0x72176fba), INIT(0x0a637dc5, 0xa2c898a6), 0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL,
INIT(0x113f9804, 0xbef90dae), INIT(0x1b710b35, 0x131c471b), 0x113f9804bef90daeULL, 0x1b710b35131c471bULL,
INIT(0x28db77f5, 0x23047d84), INIT(0x32caab7b, 0x40c72493), 0x28db77f523047d84ULL, 0x32caab7b40c72493ULL,
INIT(0x3c9ebe0a, 0x15c9bebc), INIT(0x431d67c4, 0x9c100d4c), 0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL,
INIT(0x4cc5d4be, 0xcb3e42b6), INIT(0x597f299c, 0xfc657e2a), 0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL,
INIT(0x5fcb6fab, 0x3ad6faec), INIT(0x6c44198c, 0x4a475817), 0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL,
}; };
int t; int t;
@ -135,54 +157,25 @@ static void SHA512_Block(SHA512_State *s, uint64_t *block) {
for (t = 0; t < 16; t++) for (t = 0; t < 16; t++)
w[t] = block[t]; w[t] = block[t];
for (t = 16; t < 80; t++) { for (t = 16; t < 80; t++)
uint64_t p, q, r, tmp; w[t] = w[t-16] + w[t-7] + sigma_0(w[t-15]) + sigma_1(w[t-2]);
smallsigma1(p, tmp, w[t-2]);
smallsigma0(q, tmp, w[t-15]);
add(r, p, q);
add(p, r, w[t-7]);
add(w[t], p, w[t-16]);
}
a = s->h[0]; b = s->h[1]; c = s->h[2]; d = s->h[3]; a = s->h[0]; b = s->h[1]; c = s->h[2]; d = s->h[3];
e = s->h[4]; f = s->h[5]; g = s->h[6]; h = s->h[7]; e = s->h[4]; f = s->h[5]; g = s->h[6]; h = s->h[7];
for (t = 0; t < 80; t+=8) { for (t = 0; t < 80; t+=8) {
uint64_t tmp, p, q, r; SHA512_Round(t+0, k,w, &a,&b,&c,&d,&e,&f,&g,&h);
SHA512_Round(t+1, k,w, &h,&a,&b,&c,&d,&e,&f,&g);
#define ROUND(j,a,b,c,d,e,f,g,h) do { \ SHA512_Round(t+2, k,w, &g,&h,&a,&b,&c,&d,&e,&f);
bigsigma1(p, tmp, e); \ SHA512_Round(t+3, k,w, &f,&g,&h,&a,&b,&c,&d,&e);
Ch(q, tmp, e, f, g); \ SHA512_Round(t+4, k,w, &e,&f,&g,&h,&a,&b,&c,&d);
add(r, p, q); \ SHA512_Round(t+5, k,w, &d,&e,&f,&g,&h,&a,&b,&c);
add(p, r, k[j]) ; \ SHA512_Round(t+6, k,w, &c,&d,&e,&f,&g,&h,&a,&b);
add(q, p, w[j]); \ SHA512_Round(t+7, k,w, &b,&c,&d,&e,&f,&g,&h,&a);
add(r, q, h); \
bigsigma0(p, tmp, a); \
Maj(tmp, q, a, b, c); \
add(q, tmp, p); \
add(p, r, d); \
d = p; \
add(h, q, r); \
} while (0)
ROUND(t+0, a,b,c,d,e,f,g,h);
ROUND(t+1, h,a,b,c,d,e,f,g);
ROUND(t+2, g,h,a,b,c,d,e,f);
ROUND(t+3, f,g,h,a,b,c,d,e);
ROUND(t+4, e,f,g,h,a,b,c,d);
ROUND(t+5, d,e,f,g,h,a,b,c);
ROUND(t+6, c,d,e,f,g,h,a,b);
ROUND(t+7, b,c,d,e,f,g,h,a);
} }
{ s->h[0] += a; s->h[1] += b; s->h[2] += c; s->h[3] += d;
uint64_t tmp; s->h[4] += e; s->h[5] += f; s->h[6] += g; s->h[7] += h;
#define UPDATE(state, local) ( tmp = state, add(state, tmp, local) )
UPDATE(s->h[0], a); UPDATE(s->h[1], b);
UPDATE(s->h[2], c); UPDATE(s->h[3], d);
UPDATE(s->h[4], e); UPDATE(s->h[5], f);
UPDATE(s->h[6], g); UPDATE(s->h[7], h);
}
} }
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------