mirror of
https://git.tartarus.org/simon/putty.git
synced 2025-01-09 17:38:00 +00:00
Rewrite CRC implementation to be constant-time.
In SSH-1, the CRC is used on sensitive data, because it takes the place of what ought to be a MAC. This is of course hopelessly bad security and one of the major reasons SSH-1 was replaced, but even so, there's no need to add timing and cache side channels _as well_ as all the other problems with it! So I've removed the 256-entry lookup table that's the usual way to implement CRC (in particular, the implementation given in the RFC 1662 appendix shows the same table in full). The new strategy folds in four bits at a time, using a multiply+XOR technique to replicate the outgoing four bits in all the right places. In a crude timing test this gave about a factor of 2 slowdown, which seemed surprisingly good to me - six multiplies replacing a single table lookup? But the multiplications in each 4-bit fold are independent of each other, so I suspect the CPU is managing to parallelise them.
This commit is contained in:
parent
c330156259
commit
2e866e1fb7
174
sshcrc.c
174
sshcrc.c
@ -37,156 +37,56 @@
|
|||||||
|
|
||||||
#include "ssh.h"
|
#include "ssh.h"
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/*
|
||||||
* Multi-function module. Can be compiled three ways.
|
* Multiply a CRC value by x^4. This implementation strategy avoids
|
||||||
|
* using a lookup table (which would be a side-channel hazard, since
|
||||||
|
* SSH-1 applies this CRC to decrypted session data).
|
||||||
*
|
*
|
||||||
* - Compile with no special #defines. Will generate a table
|
* The basic idea is that you'd like to "multiply" the shifted-out 4
|
||||||
* that's already initialised at compile time, and one function
|
* bits by the CRC polynomial value 0xEDB88320, or rather by that
|
||||||
* crc32_compute(buf,len) that uses it. Normal usage.
|
* value shifted right 3 bits (since you want the _last_ bit shifted
|
||||||
|
* out, i.e. the one originally at the 2^3 position, to generate
|
||||||
|
* 0xEDB88320 itself). But the scare-quoted "multiply" would have to
|
||||||
|
* be a multiplication of polynomials over GF(2), which differs from
|
||||||
|
* integer multiplication in that you don't have any carries. In other
|
||||||
|
* words, you make a copy of one input shifted left by the index of
|
||||||
|
* each set bit in the other, so that adding them all together would
|
||||||
|
* give you the ordinary integer product, and then you XOR them
|
||||||
|
* together instead.
|
||||||
*
|
*
|
||||||
* - Compile with INITFUNC defined. Will generate an uninitialised
|
* With a 4-bit multiplier, the two kinds of multiplication coincide
|
||||||
* array as the table, and as well as crc32_compute(buf,len) it
|
* provided the multiplicand has no two set bits at positions
|
||||||
* will also generate void crc32_init(void) which sets up the
|
* differing by less than 4, because then no two copies of the
|
||||||
* table at run time. Useful if binary size is important.
|
* multiplier can overlap to generate a carry. So I break up the
|
||||||
*
|
* intended multiplicand K = 0xEDB88320 >> 3 into three sub-constants
|
||||||
* - Compile with GENPROGRAM defined. Will create a standalone
|
* a,b,c with that property, such that a^b^c = K. Then I can multiply
|
||||||
* program that does the initialisation and outputs the table as
|
* m by each of them separately, and XOR together the results.
|
||||||
* C code.
|
|
||||||
*/
|
*/
|
||||||
|
static inline uint32_t crc32_shift_4(uint32_t v)
|
||||||
#define POLY (0xEDB88320L)
|
{
|
||||||
|
const uint32_t a = 0x11111044, b = 0x08840020, c = 0x04220000;
|
||||||
#ifdef GENPROGRAM
|
uint32_t m = v & 0xF;
|
||||||
#define INITFUNC /* the gen program needs the init func :-) */
|
return (v >> 4) ^ (a*m) ^ (b*m) ^ (c*m);
|
||||||
#endif
|
}
|
||||||
|
|
||||||
#ifdef INITFUNC
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This variant of the code generates the table at run-time from an
|
* The 8-bit shift you need every time you absorb an input byte,
|
||||||
* init function.
|
* implemented simply by iterating the 4-bit shift twice.
|
||||||
*/
|
*/
|
||||||
static uint32_t crc32_table[256];
|
static inline uint32_t crc32_shift_8(uint32_t v)
|
||||||
|
|
||||||
void crc32_init(void)
|
|
||||||
{
|
{
|
||||||
uint32_t crcword;
|
return crc32_shift_4(crc32_shift_4(v));
|
||||||
int i;
|
|
||||||
|
|
||||||
for (i = 0; i < 256; i++) {
|
|
||||||
uint32_t newbyte, x32term;
|
|
||||||
int j;
|
|
||||||
crcword = 0;
|
|
||||||
newbyte = i;
|
|
||||||
for (j = 0; j < 8; j++) {
|
|
||||||
x32term = (crcword ^ newbyte) & 1;
|
|
||||||
crcword = (crcword >> 1) ^ (x32term * POLY);
|
|
||||||
newbyte >>= 1;
|
|
||||||
}
|
}
|
||||||
crc32_table[i] = crcword;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This variant of the code has the data already prepared.
|
* Update an existing hash value with extra bytes of data.
|
||||||
*/
|
*/
|
||||||
static const uint32_t crc32_table[256] = {
|
uint32_t crc32_update(uint32_t crc, ptrlen data)
|
||||||
0x00000000L, 0x77073096L, 0xEE0E612CL, 0x990951BAL,
|
|
||||||
0x076DC419L, 0x706AF48FL, 0xE963A535L, 0x9E6495A3L,
|
|
||||||
0x0EDB8832L, 0x79DCB8A4L, 0xE0D5E91EL, 0x97D2D988L,
|
|
||||||
0x09B64C2BL, 0x7EB17CBDL, 0xE7B82D07L, 0x90BF1D91L,
|
|
||||||
0x1DB71064L, 0x6AB020F2L, 0xF3B97148L, 0x84BE41DEL,
|
|
||||||
0x1ADAD47DL, 0x6DDDE4EBL, 0xF4D4B551L, 0x83D385C7L,
|
|
||||||
0x136C9856L, 0x646BA8C0L, 0xFD62F97AL, 0x8A65C9ECL,
|
|
||||||
0x14015C4FL, 0x63066CD9L, 0xFA0F3D63L, 0x8D080DF5L,
|
|
||||||
0x3B6E20C8L, 0x4C69105EL, 0xD56041E4L, 0xA2677172L,
|
|
||||||
0x3C03E4D1L, 0x4B04D447L, 0xD20D85FDL, 0xA50AB56BL,
|
|
||||||
0x35B5A8FAL, 0x42B2986CL, 0xDBBBC9D6L, 0xACBCF940L,
|
|
||||||
0x32D86CE3L, 0x45DF5C75L, 0xDCD60DCFL, 0xABD13D59L,
|
|
||||||
0x26D930ACL, 0x51DE003AL, 0xC8D75180L, 0xBFD06116L,
|
|
||||||
0x21B4F4B5L, 0x56B3C423L, 0xCFBA9599L, 0xB8BDA50FL,
|
|
||||||
0x2802B89EL, 0x5F058808L, 0xC60CD9B2L, 0xB10BE924L,
|
|
||||||
0x2F6F7C87L, 0x58684C11L, 0xC1611DABL, 0xB6662D3DL,
|
|
||||||
0x76DC4190L, 0x01DB7106L, 0x98D220BCL, 0xEFD5102AL,
|
|
||||||
0x71B18589L, 0x06B6B51FL, 0x9FBFE4A5L, 0xE8B8D433L,
|
|
||||||
0x7807C9A2L, 0x0F00F934L, 0x9609A88EL, 0xE10E9818L,
|
|
||||||
0x7F6A0DBBL, 0x086D3D2DL, 0x91646C97L, 0xE6635C01L,
|
|
||||||
0x6B6B51F4L, 0x1C6C6162L, 0x856530D8L, 0xF262004EL,
|
|
||||||
0x6C0695EDL, 0x1B01A57BL, 0x8208F4C1L, 0xF50FC457L,
|
|
||||||
0x65B0D9C6L, 0x12B7E950L, 0x8BBEB8EAL, 0xFCB9887CL,
|
|
||||||
0x62DD1DDFL, 0x15DA2D49L, 0x8CD37CF3L, 0xFBD44C65L,
|
|
||||||
0x4DB26158L, 0x3AB551CEL, 0xA3BC0074L, 0xD4BB30E2L,
|
|
||||||
0x4ADFA541L, 0x3DD895D7L, 0xA4D1C46DL, 0xD3D6F4FBL,
|
|
||||||
0x4369E96AL, 0x346ED9FCL, 0xAD678846L, 0xDA60B8D0L,
|
|
||||||
0x44042D73L, 0x33031DE5L, 0xAA0A4C5FL, 0xDD0D7CC9L,
|
|
||||||
0x5005713CL, 0x270241AAL, 0xBE0B1010L, 0xC90C2086L,
|
|
||||||
0x5768B525L, 0x206F85B3L, 0xB966D409L, 0xCE61E49FL,
|
|
||||||
0x5EDEF90EL, 0x29D9C998L, 0xB0D09822L, 0xC7D7A8B4L,
|
|
||||||
0x59B33D17L, 0x2EB40D81L, 0xB7BD5C3BL, 0xC0BA6CADL,
|
|
||||||
0xEDB88320L, 0x9ABFB3B6L, 0x03B6E20CL, 0x74B1D29AL,
|
|
||||||
0xEAD54739L, 0x9DD277AFL, 0x04DB2615L, 0x73DC1683L,
|
|
||||||
0xE3630B12L, 0x94643B84L, 0x0D6D6A3EL, 0x7A6A5AA8L,
|
|
||||||
0xE40ECF0BL, 0x9309FF9DL, 0x0A00AE27L, 0x7D079EB1L,
|
|
||||||
0xF00F9344L, 0x8708A3D2L, 0x1E01F268L, 0x6906C2FEL,
|
|
||||||
0xF762575DL, 0x806567CBL, 0x196C3671L, 0x6E6B06E7L,
|
|
||||||
0xFED41B76L, 0x89D32BE0L, 0x10DA7A5AL, 0x67DD4ACCL,
|
|
||||||
0xF9B9DF6FL, 0x8EBEEFF9L, 0x17B7BE43L, 0x60B08ED5L,
|
|
||||||
0xD6D6A3E8L, 0xA1D1937EL, 0x38D8C2C4L, 0x4FDFF252L,
|
|
||||||
0xD1BB67F1L, 0xA6BC5767L, 0x3FB506DDL, 0x48B2364BL,
|
|
||||||
0xD80D2BDAL, 0xAF0A1B4CL, 0x36034AF6L, 0x41047A60L,
|
|
||||||
0xDF60EFC3L, 0xA867DF55L, 0x316E8EEFL, 0x4669BE79L,
|
|
||||||
0xCB61B38CL, 0xBC66831AL, 0x256FD2A0L, 0x5268E236L,
|
|
||||||
0xCC0C7795L, 0xBB0B4703L, 0x220216B9L, 0x5505262FL,
|
|
||||||
0xC5BA3BBEL, 0xB2BD0B28L, 0x2BB45A92L, 0x5CB36A04L,
|
|
||||||
0xC2D7FFA7L, 0xB5D0CF31L, 0x2CD99E8BL, 0x5BDEAE1DL,
|
|
||||||
0x9B64C2B0L, 0xEC63F226L, 0x756AA39CL, 0x026D930AL,
|
|
||||||
0x9C0906A9L, 0xEB0E363FL, 0x72076785L, 0x05005713L,
|
|
||||||
0x95BF4A82L, 0xE2B87A14L, 0x7BB12BAEL, 0x0CB61B38L,
|
|
||||||
0x92D28E9BL, 0xE5D5BE0DL, 0x7CDCEFB7L, 0x0BDBDF21L,
|
|
||||||
0x86D3D2D4L, 0xF1D4E242L, 0x68DDB3F8L, 0x1FDA836EL,
|
|
||||||
0x81BE16CDL, 0xF6B9265BL, 0x6FB077E1L, 0x18B74777L,
|
|
||||||
0x88085AE6L, 0xFF0F6A70L, 0x66063BCAL, 0x11010B5CL,
|
|
||||||
0x8F659EFFL, 0xF862AE69L, 0x616BFFD3L, 0x166CCF45L,
|
|
||||||
0xA00AE278L, 0xD70DD2EEL, 0x4E048354L, 0x3903B3C2L,
|
|
||||||
0xA7672661L, 0xD06016F7L, 0x4969474DL, 0x3E6E77DBL,
|
|
||||||
0xAED16A4AL, 0xD9D65ADCL, 0x40DF0B66L, 0x37D83BF0L,
|
|
||||||
0xA9BCAE53L, 0xDEBB9EC5L, 0x47B2CF7FL, 0x30B5FFE9L,
|
|
||||||
0xBDBDF21CL, 0xCABAC28AL, 0x53B39330L, 0x24B4A3A6L,
|
|
||||||
0xBAD03605L, 0xCDD70693L, 0x54DE5729L, 0x23D967BFL,
|
|
||||||
0xB3667A2EL, 0xC4614AB8L, 0x5D681B02L, 0x2A6F2B94L,
|
|
||||||
0xB40BBE37L, 0xC30C8EA1L, 0x5A05DF1BL, 0x2D02EF8DL
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef GENPROGRAM
|
|
||||||
int main(void)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
|
|
||||||
crc32_init();
|
|
||||||
for (i = 0; i < 256; i++) {
|
|
||||||
printf("%s0x%08lXL%s",
|
|
||||||
(i % 4 == 0 ? " " : " "),
|
|
||||||
crc32_table[i],
|
|
||||||
(i % 4 == 3 ? (i == 255 ? "\n" : ",\n") : ","));
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
uint32_t crc32_update(uint32_t crcword, ptrlen data)
|
|
||||||
{
|
{
|
||||||
const uint8_t *p = (const uint8_t *)data.ptr;
|
const uint8_t *p = (const uint8_t *)data.ptr;
|
||||||
for (size_t len = data.len; len-- > 0 ;) {
|
for (size_t len = data.len; len-- > 0 ;)
|
||||||
uint32_t newbyte = *p++;
|
crc = crc32_shift_8(crc ^ *p++);
|
||||||
newbyte ^= crcword & 0xFFL;
|
return crc;
|
||||||
crcword = (crcword >> 8) ^ crc32_table[newbyte];
|
|
||||||
}
|
|
||||||
return crcword;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
Loading…
Reference in New Issue
Block a user