putty-source/sshbn.h

/*
 * sshbn.h: the assorted conditional definitions of BignumInt and
 * multiply macros used throughout the bignum code to treat numbers as
 * arrays of the most conveniently sized word for the target machine.
 * Exported so that other code (e.g. poly1305) can use it too.
 */

#if defined __SIZEOF_INT128__
/* gcc and clang both provide a __uint128_t type on 64-bit targets
 * (and, when they do, indicate its presence by the above macro),
 * using the same 'two machine registers' kind of code generation that
 * 32-bit targets use for 64-bit ints. If we have one of these, we can
 * use a 64-bit BignumInt and a 128-bit BignumDblInt. */
typedef unsigned long long BignumInt;
typedef __uint128_t BignumDblInt;
#define BIGNUM_INT_MASK  0xFFFFFFFFFFFFFFFFULL
#define BIGNUM_TOP_BIT   0x8000000000000000ULL
#define BIGNUM_INT_BITS  64
#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
#elif defined __GNUC__ && defined __i386__
typedef unsigned long BignumInt;
typedef unsigned long long BignumDblInt;
#define BIGNUM_INT_MASK  0xFFFFFFFFUL
#define BIGNUM_TOP_BIT   0x80000000UL
#define BIGNUM_INT_BITS  32
#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
#elif defined _MSC_VER && defined _M_IX86
typedef unsigned __int32 BignumInt;
typedef unsigned __int64 BignumDblInt;
#define BIGNUM_INT_MASK  0xFFFFFFFFUL
#define BIGNUM_TOP_BIT   0x80000000UL
#define BIGNUM_INT_BITS  32
#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
#elif defined _LP64
/* 64-bit architectures can do 32x32->64 chunks at a time */
typedef unsigned int BignumInt;
typedef unsigned long BignumDblInt;
#define BIGNUM_INT_MASK  0xFFFFFFFFU
#define BIGNUM_TOP_BIT   0x80000000U
#define BIGNUM_INT_BITS  32
#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
#elif defined _LLP64
/* 64-bit architectures in which unsigned long is 32 bits, not 64 */
typedef unsigned long BignumInt;
typedef unsigned long long BignumDblInt;
#define BIGNUM_INT_MASK  0xFFFFFFFFUL
#define BIGNUM_TOP_BIT   0x80000000UL
#define BIGNUM_INT_BITS  32
#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
#else
/* Fallback for all other cases */
typedef unsigned short BignumInt;
typedef unsigned long BignumDblInt;
#define BIGNUM_INT_MASK  0xFFFFU
#define BIGNUM_TOP_BIT   0x8000U
#define BIGNUM_INT_BITS  16
#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
#endif

#define BIGNUM_INT_BYTES (BIGNUM_INT_BITS / 8)
Move BignumInt definitions into a header file. This allows files other than sshbn.c to work with the primitives necessary to build multi-word arithmetic functions satisfying all of PuTTY's portability constraints. 2015-06-06 13:52:29 +00:00			`/*`
			`* sshbn.h: the assorted conditional definitions of BignumInt and`
Rewrite the core divide function to not use DIVMOD_WORD. DIVMOD_WORD is a portability hazard, because implementing it requires either a way to get direct access to the x86 DIV instruction or equivalent (be it inline assembler or a compiler intrinsic), or else an integer type we can use as BignumDblInt. But I'm starting to think about porting to 64-bit Visual Studio with a 64-bit BignumInt, and in that situation neither of those options will be available. I could write a piece of _out_-of-line x86-64 assembler in a separate source file and put a function call in DIVMOD_WORD, but instead I've decided to solve the problem in a more futureproof way: remove DIVMOD_WORD totally and write a division function that doesn't need it at all, solving not only today's porting headache but all future ones in this area. The new implementation works by precomputing (a good enough approximation to) the leading word of the reciprocal of the modulus, and then getting each word of quotient by multiplying by that reciprocal, where we previously used DIVMOD_WORD to divide by the leading word of the actual modulus. The reciprocal itself is computed outside internal_mod() and passed in as a parameter, allowing me to save time by only computing it once when I'm about to do a modpow. To some extent this complicates the implementation: the advantage of DIVMOD_WORD was that it yielded a full word q of quotient every time it was used, so the subtraction of qm from the input could be done in a nicely word-aligned way. But the reciprocal multiply approach yields _almost_ a full word of quotient, because you have to make the reciprocal a bit short to avoid overflow at multiplication time. For a start, this means we have to do fractionally more iterations of the main loop; but more painfully, we can no longer depend on the subtraction of qm at every step being word-aligned, and instead we have to be prepared to do it at any bit shift. But the flip side is that once we've implemented that, the rest of the algorithm becomes a lot less full of horrible special cases: in particular, we can now completely throw away the horribleness at all the call sites where we shift the modulus up by a fractional word to set its top bit, and then have to do a little dance to get the last few bits of quotient involving a second call to internal_mod. So there are points both for and against the new implementation in simplicity terms; but I think on balance it's more comprehensible than the old one, and a quick timing test suggests it also ends up a touch faster overall - the new testbn gets through the output of testdata/bignum.py in 4.034s where the old one took 4.392s. 2015-12-13 14:46:43 +00:00			`* multiply macros used throughout the bignum code to treat numbers as`
			`* arrays of the most conveniently sized word for the target machine.`
			`* Exported so that other code (e.g. poly1305) can use it too.`
Move BignumInt definitions into a header file. This allows files other than sshbn.c to work with the primitives necessary to build multi-word arithmetic functions satisfying all of PuTTY's portability constraints. 2015-06-06 13:52:29 +00:00			`*/`

Use 64-bit BignumInt wherever __uint128_t is available. gcc and clang both provide a type called __uint128_t when compiling for 64-bit targets, code-generated more or less similarly to the way 64-bit long longs are handled on 32-bit targets (spanning two registers, using ADD/ADC, that sort of thing). Where this is available (and they also provide a handy macro to make it easy to detect), we should obviously use it, so that we can handle bignums a larger chunk at a time and make use of the full width of the hardware's multiplier. Preliminary benchmarking using 'testbn' suggests a factor of about 2.5 improvement. I've added the new possibility to the ifdefs in sshbn.h, and also re-run contrib/make1305.py to generate a set of variants of the poly1305 arithmetic for the new size of BignumInt. 2015-06-08 18:24:58 +00:00			`#if defined __SIZEOF_INT128__`
			`/* gcc and clang both provide a __uint128_t type on 64-bit targets`
			`* (and, when they do, indicate its presence by the above macro),`
			`* using the same 'two machine registers' kind of code generation that`
			`* 32-bit targets use for 64-bit ints. If we have one of these, we can`
			`* use a 64-bit BignumInt and a 128-bit BignumDblInt. */`
Fix __uint128_t compile error on MinGW. MinGW has __uint128_t, but not __uint64_t. 2015-08-11 07:43:34 +00:00			`typedef unsigned long long BignumInt;`
Use 64-bit BignumInt wherever __uint128_t is available. gcc and clang both provide a type called __uint128_t when compiling for 64-bit targets, code-generated more or less similarly to the way 64-bit long longs are handled on 32-bit targets (spanning two registers, using ADD/ADC, that sort of thing). Where this is available (and they also provide a handy macro to make it easy to detect), we should obviously use it, so that we can handle bignums a larger chunk at a time and make use of the full width of the hardware's multiplier. Preliminary benchmarking using 'testbn' suggests a factor of about 2.5 improvement. I've added the new possibility to the ifdefs in sshbn.h, and also re-run contrib/make1305.py to generate a set of variants of the poly1305 arithmetic for the new size of BignumInt. 2015-06-08 18:24:58 +00:00			`typedef __uint128_t BignumDblInt;`
			`#define BIGNUM_INT_MASK 0xFFFFFFFFFFFFFFFFULL`
			`#define BIGNUM_TOP_BIT 0x8000000000000000ULL`
			`#define BIGNUM_INT_BITS 64`
			`#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)`
			`#elif defined __GNUC__ && defined __i386__`
Move BignumInt definitions into a header file. This allows files other than sshbn.c to work with the primitives necessary to build multi-word arithmetic functions satisfying all of PuTTY's portability constraints. 2015-06-06 13:52:29 +00:00			`typedef unsigned long BignumInt;`
			`typedef unsigned long long BignumDblInt;`
			`#define BIGNUM_INT_MASK 0xFFFFFFFFUL`
			`#define BIGNUM_TOP_BIT 0x80000000UL`
			`#define BIGNUM_INT_BITS 32`
			`#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)`
			`#elif defined _MSC_VER && defined _M_IX86`
			`typedef unsigned __int32 BignumInt;`
			`typedef unsigned __int64 BignumDblInt;`
			`#define BIGNUM_INT_MASK 0xFFFFFFFFUL`
			`#define BIGNUM_TOP_BIT 0x80000000UL`
			`#define BIGNUM_INT_BITS 32`
			`#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)`
			`#elif defined _LP64`
			`/* 64-bit architectures can do 32x32->64 chunks at a time */`
			`typedef unsigned int BignumInt;`
			`typedef unsigned long BignumDblInt;`
			`#define BIGNUM_INT_MASK 0xFFFFFFFFU`
			`#define BIGNUM_TOP_BIT 0x80000000U`
			`#define BIGNUM_INT_BITS 32`
			`#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)`
			`#elif defined _LLP64`
			`/* 64-bit architectures in which unsigned long is 32 bits, not 64 */`
			`typedef unsigned long BignumInt;`
			`typedef unsigned long long BignumDblInt;`
			`#define BIGNUM_INT_MASK 0xFFFFFFFFUL`
			`#define BIGNUM_TOP_BIT 0x80000000UL`
			`#define BIGNUM_INT_BITS 32`
			`#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)`
			`#else`
			`/* Fallback for all other cases */`
			`typedef unsigned short BignumInt;`
			`typedef unsigned long BignumDblInt;`
			`#define BIGNUM_INT_MASK 0xFFFFU`
			`#define BIGNUM_TOP_BIT 0x8000U`
			`#define BIGNUM_INT_BITS 16`
			`#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)`
			`#endif`

			`#define BIGNUM_INT_BYTES (BIGNUM_INT_BITS / 8)`