1
0
mirror of https://git.tartarus.org/simon/putty.git synced 2025-01-08 08:58:00 +00:00

Rewrite CRC implementation to be constant-time.

In SSH-1, the CRC is used on sensitive data, because it takes the
place of what ought to be a MAC. This is of course hopelessly bad
security and one of the major reasons SSH-1 was replaced, but even so,
there's no need to add timing and cache side channels _as well_ as all
the other problems with it!

So I've removed the 256-entry lookup table that's the usual way to
implement CRC (in particular, the implementation given in the RFC 1662
appendix shows the same table in full). The new strategy folds in four
bits at a time, using a multiply+XOR technique to replicate the
outgoing four bits in all the right places.

In a crude timing test this gave about a factor of 2 slowdown, which
seemed surprisingly good to me - six multiplies replacing a single
table lookup? But the multiplications in each 4-bit fold are
independent of each other, so I suspect the CPU is managing to
parallelise them.
This commit is contained in:
Simon Tatham 2019-01-14 20:46:12 +00:00
parent c330156259
commit 2e866e1fb7

180
sshcrc.c
View File

@ -37,156 +37,56 @@
#include "ssh.h"
/* ----------------------------------------------------------------------
* Multi-function module. Can be compiled three ways.
*
* - Compile with no special #defines. Will generate a table
* that's already initialised at compile time, and one function
* crc32_compute(buf,len) that uses it. Normal usage.
*
* - Compile with INITFUNC defined. Will generate an uninitialised
* array as the table, and as well as crc32_compute(buf,len) it
* will also generate void crc32_init(void) which sets up the
* table at run time. Useful if binary size is important.
*
* - Compile with GENPROGRAM defined. Will create a standalone
* program that does the initialisation and outputs the table as
* C code.
*/
#define POLY (0xEDB88320L)
#ifdef GENPROGRAM
#define INITFUNC /* the gen program needs the init func :-) */
#endif
#ifdef INITFUNC
/*
* This variant of the code generates the table at run-time from an
* init function.
* Multiply a CRC value by x^4. This implementation strategy avoids
* using a lookup table (which would be a side-channel hazard, since
* SSH-1 applies this CRC to decrypted session data).
*
* The basic idea is that you'd like to "multiply" the shifted-out 4
* bits by the CRC polynomial value 0xEDB88320, or rather by that
* value shifted right 3 bits (since you want the _last_ bit shifted
* out, i.e. the one originally at the 2^3 position, to generate
* 0xEDB88320 itself). But the scare-quoted "multiply" would have to
* be a multiplication of polynomials over GF(2), which differs from
* integer multiplication in that you don't have any carries. In other
* words, you make a copy of one input shifted left by the index of
* each set bit in the other, so that adding them all together would
* give you the ordinary integer product, and then you XOR them
* together instead.
*
* With a 4-bit multiplier, the two kinds of multiplication coincide
* provided the multiplicand has no two set bits at positions
* differing by less than 4, because then no two copies of the
* multiplier can overlap to generate a carry. So I break up the
* intended multiplicand K = 0xEDB88320 >> 3 into three sub-constants
* a,b,c with that property, such that a^b^c = K. Then I can multiply
* m by each of them separately, and XOR together the results.
*/
static uint32_t crc32_table[256];
void crc32_init(void)
static inline uint32_t crc32_shift_4(uint32_t v)
{
uint32_t crcword;
int i;
for (i = 0; i < 256; i++) {
uint32_t newbyte, x32term;
int j;
crcword = 0;
newbyte = i;
for (j = 0; j < 8; j++) {
x32term = (crcword ^ newbyte) & 1;
crcword = (crcword >> 1) ^ (x32term * POLY);
newbyte >>= 1;
}
crc32_table[i] = crcword;
}
const uint32_t a = 0x11111044, b = 0x08840020, c = 0x04220000;
uint32_t m = v & 0xF;
return (v >> 4) ^ (a*m) ^ (b*m) ^ (c*m);
}
#else
/*
* The 8-bit shift you need every time you absorb an input byte,
* implemented simply by iterating the 4-bit shift twice.
*/
static inline uint32_t crc32_shift_8(uint32_t v)
{
return crc32_shift_4(crc32_shift_4(v));
}
/*
* This variant of the code has the data already prepared.
* Update an existing hash value with extra bytes of data.
*/
static const uint32_t crc32_table[256] = {
0x00000000L, 0x77073096L, 0xEE0E612CL, 0x990951BAL,
0x076DC419L, 0x706AF48FL, 0xE963A535L, 0x9E6495A3L,
0x0EDB8832L, 0x79DCB8A4L, 0xE0D5E91EL, 0x97D2D988L,
0x09B64C2BL, 0x7EB17CBDL, 0xE7B82D07L, 0x90BF1D91L,
0x1DB71064L, 0x6AB020F2L, 0xF3B97148L, 0x84BE41DEL,
0x1ADAD47DL, 0x6DDDE4EBL, 0xF4D4B551L, 0x83D385C7L,
0x136C9856L, 0x646BA8C0L, 0xFD62F97AL, 0x8A65C9ECL,
0x14015C4FL, 0x63066CD9L, 0xFA0F3D63L, 0x8D080DF5L,
0x3B6E20C8L, 0x4C69105EL, 0xD56041E4L, 0xA2677172L,
0x3C03E4D1L, 0x4B04D447L, 0xD20D85FDL, 0xA50AB56BL,
0x35B5A8FAL, 0x42B2986CL, 0xDBBBC9D6L, 0xACBCF940L,
0x32D86CE3L, 0x45DF5C75L, 0xDCD60DCFL, 0xABD13D59L,
0x26D930ACL, 0x51DE003AL, 0xC8D75180L, 0xBFD06116L,
0x21B4F4B5L, 0x56B3C423L, 0xCFBA9599L, 0xB8BDA50FL,
0x2802B89EL, 0x5F058808L, 0xC60CD9B2L, 0xB10BE924L,
0x2F6F7C87L, 0x58684C11L, 0xC1611DABL, 0xB6662D3DL,
0x76DC4190L, 0x01DB7106L, 0x98D220BCL, 0xEFD5102AL,
0x71B18589L, 0x06B6B51FL, 0x9FBFE4A5L, 0xE8B8D433L,
0x7807C9A2L, 0x0F00F934L, 0x9609A88EL, 0xE10E9818L,
0x7F6A0DBBL, 0x086D3D2DL, 0x91646C97L, 0xE6635C01L,
0x6B6B51F4L, 0x1C6C6162L, 0x856530D8L, 0xF262004EL,
0x6C0695EDL, 0x1B01A57BL, 0x8208F4C1L, 0xF50FC457L,
0x65B0D9C6L, 0x12B7E950L, 0x8BBEB8EAL, 0xFCB9887CL,
0x62DD1DDFL, 0x15DA2D49L, 0x8CD37CF3L, 0xFBD44C65L,
0x4DB26158L, 0x3AB551CEL, 0xA3BC0074L, 0xD4BB30E2L,
0x4ADFA541L, 0x3DD895D7L, 0xA4D1C46DL, 0xD3D6F4FBL,
0x4369E96AL, 0x346ED9FCL, 0xAD678846L, 0xDA60B8D0L,
0x44042D73L, 0x33031DE5L, 0xAA0A4C5FL, 0xDD0D7CC9L,
0x5005713CL, 0x270241AAL, 0xBE0B1010L, 0xC90C2086L,
0x5768B525L, 0x206F85B3L, 0xB966D409L, 0xCE61E49FL,
0x5EDEF90EL, 0x29D9C998L, 0xB0D09822L, 0xC7D7A8B4L,
0x59B33D17L, 0x2EB40D81L, 0xB7BD5C3BL, 0xC0BA6CADL,
0xEDB88320L, 0x9ABFB3B6L, 0x03B6E20CL, 0x74B1D29AL,
0xEAD54739L, 0x9DD277AFL, 0x04DB2615L, 0x73DC1683L,
0xE3630B12L, 0x94643B84L, 0x0D6D6A3EL, 0x7A6A5AA8L,
0xE40ECF0BL, 0x9309FF9DL, 0x0A00AE27L, 0x7D079EB1L,
0xF00F9344L, 0x8708A3D2L, 0x1E01F268L, 0x6906C2FEL,
0xF762575DL, 0x806567CBL, 0x196C3671L, 0x6E6B06E7L,
0xFED41B76L, 0x89D32BE0L, 0x10DA7A5AL, 0x67DD4ACCL,
0xF9B9DF6FL, 0x8EBEEFF9L, 0x17B7BE43L, 0x60B08ED5L,
0xD6D6A3E8L, 0xA1D1937EL, 0x38D8C2C4L, 0x4FDFF252L,
0xD1BB67F1L, 0xA6BC5767L, 0x3FB506DDL, 0x48B2364BL,
0xD80D2BDAL, 0xAF0A1B4CL, 0x36034AF6L, 0x41047A60L,
0xDF60EFC3L, 0xA867DF55L, 0x316E8EEFL, 0x4669BE79L,
0xCB61B38CL, 0xBC66831AL, 0x256FD2A0L, 0x5268E236L,
0xCC0C7795L, 0xBB0B4703L, 0x220216B9L, 0x5505262FL,
0xC5BA3BBEL, 0xB2BD0B28L, 0x2BB45A92L, 0x5CB36A04L,
0xC2D7FFA7L, 0xB5D0CF31L, 0x2CD99E8BL, 0x5BDEAE1DL,
0x9B64C2B0L, 0xEC63F226L, 0x756AA39CL, 0x026D930AL,
0x9C0906A9L, 0xEB0E363FL, 0x72076785L, 0x05005713L,
0x95BF4A82L, 0xE2B87A14L, 0x7BB12BAEL, 0x0CB61B38L,
0x92D28E9BL, 0xE5D5BE0DL, 0x7CDCEFB7L, 0x0BDBDF21L,
0x86D3D2D4L, 0xF1D4E242L, 0x68DDB3F8L, 0x1FDA836EL,
0x81BE16CDL, 0xF6B9265BL, 0x6FB077E1L, 0x18B74777L,
0x88085AE6L, 0xFF0F6A70L, 0x66063BCAL, 0x11010B5CL,
0x8F659EFFL, 0xF862AE69L, 0x616BFFD3L, 0x166CCF45L,
0xA00AE278L, 0xD70DD2EEL, 0x4E048354L, 0x3903B3C2L,
0xA7672661L, 0xD06016F7L, 0x4969474DL, 0x3E6E77DBL,
0xAED16A4AL, 0xD9D65ADCL, 0x40DF0B66L, 0x37D83BF0L,
0xA9BCAE53L, 0xDEBB9EC5L, 0x47B2CF7FL, 0x30B5FFE9L,
0xBDBDF21CL, 0xCABAC28AL, 0x53B39330L, 0x24B4A3A6L,
0xBAD03605L, 0xCDD70693L, 0x54DE5729L, 0x23D967BFL,
0xB3667A2EL, 0xC4614AB8L, 0x5D681B02L, 0x2A6F2B94L,
0xB40BBE37L, 0xC30C8EA1L, 0x5A05DF1BL, 0x2D02EF8DL
};
#endif
#ifdef GENPROGRAM
int main(void)
{
int i;
crc32_init();
for (i = 0; i < 256; i++) {
printf("%s0x%08lXL%s",
(i % 4 == 0 ? " " : " "),
crc32_table[i],
(i % 4 == 3 ? (i == 255 ? "\n" : ",\n") : ","));
}
return 0;
}
#endif
uint32_t crc32_update(uint32_t crcword, ptrlen data)
uint32_t crc32_update(uint32_t crc, ptrlen data)
{
const uint8_t *p = (const uint8_t *)data.ptr;
for (size_t len = data.len; len-- > 0 ;) {
uint32_t newbyte = *p++;
newbyte ^= crcword & 0xFFL;
crcword = (crcword >> 8) ^ crc32_table[newbyte];
}
return crcword;
for (size_t len = data.len; len-- > 0 ;)
crc = crc32_shift_8(crc ^ *p++);
return crc;
}
/*