mirror of
https://git.tartarus.org/simon/putty.git
synced 2025-01-10 01:48:00 +00:00
445395c9d3
gcc and clang both provide a type called __uint128_t when compiling
for 64-bit targets, code-generated more or less similarly to the way
64-bit long longs are handled on 32-bit targets (spanning two
registers, using ADD/ADC, that sort of thing). Where this is available
(and they also provide a handy macro to make it easy to detect), we
should obviously use it, so that we can handle bignums a larger chunk
at a time and make use of the full width of the hardware's multiplier.
Preliminary benchmarking using 'testbn' suggests a factor of about 2.5
improvement.
I've added the new possibility to the ifdefs in sshbn.h, and also
re-run contrib/make1305.py to generate a set of variants of the
poly1305 arithmetic for the new size of BignumInt.
(cherry picked from commit f8b27925ee
)
Conflicts:
sshccp.c
Cherry-picker's notes: the conflict arose because the original commit
also added new 64-bit autogenerated forms of dedicated Poly1305
arithmetic, which doesn't exist on this branch.
110 lines
4.0 KiB
C
110 lines
4.0 KiB
C
/*
|
|
* sshbn.h: the assorted conditional definitions of BignumInt and
|
|
* multiply/divide macros used throughout the bignum code to treat
|
|
* numbers as arrays of the most conveniently sized word for the
|
|
* target machine. Exported so that other code (e.g. poly1305) can use
|
|
* it too.
|
|
*/
|
|
|
|
/*
|
|
* Usage notes:
|
|
* * Do not call the DIVMOD_WORD macro with expressions such as array
|
|
* subscripts, as some implementations object to this (see below).
|
|
* * Note that none of the division methods below will cope if the
|
|
* quotient won't fit into BIGNUM_INT_BITS. Callers should be careful
|
|
* to avoid this case.
|
|
* If this condition occurs, in the case of the x86 DIV instruction,
|
|
* an overflow exception will occur, which (according to a correspondent)
|
|
* will manifest on Windows as something like
|
|
* 0xC0000095: Integer overflow
|
|
* The C variant won't give the right answer, either.
|
|
*/
|
|
|
|
#if defined __SIZEOF_INT128__
|
|
/* gcc and clang both provide a __uint128_t type on 64-bit targets
|
|
* (and, when they do, indicate its presence by the above macro),
|
|
* using the same 'two machine registers' kind of code generation that
|
|
* 32-bit targets use for 64-bit ints. If we have one of these, we can
|
|
* use a 64-bit BignumInt and a 128-bit BignumDblInt. */
|
|
typedef __uint64_t BignumInt;
|
|
typedef __uint128_t BignumDblInt;
|
|
#define BIGNUM_INT_MASK 0xFFFFFFFFFFFFFFFFULL
|
|
#define BIGNUM_TOP_BIT 0x8000000000000000ULL
|
|
#define BIGNUM_INT_BITS 64
|
|
#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
|
|
#define DIVMOD_WORD(q, r, hi, lo, w) do { \
|
|
BignumDblInt n = (((BignumDblInt)hi) << BIGNUM_INT_BITS) | lo; \
|
|
q = n / w; \
|
|
r = n % w; \
|
|
} while (0)
|
|
#elif defined __GNUC__ && defined __i386__
|
|
typedef unsigned long BignumInt;
|
|
typedef unsigned long long BignumDblInt;
|
|
#define BIGNUM_INT_MASK 0xFFFFFFFFUL
|
|
#define BIGNUM_TOP_BIT 0x80000000UL
|
|
#define BIGNUM_INT_BITS 32
|
|
#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
|
|
#define DIVMOD_WORD(q, r, hi, lo, w) \
|
|
__asm__("div %2" : \
|
|
"=d" (r), "=a" (q) : \
|
|
"r" (w), "d" (hi), "a" (lo))
|
|
#elif defined _MSC_VER && defined _M_IX86
|
|
typedef unsigned __int32 BignumInt;
|
|
typedef unsigned __int64 BignumDblInt;
|
|
#define BIGNUM_INT_MASK 0xFFFFFFFFUL
|
|
#define BIGNUM_TOP_BIT 0x80000000UL
|
|
#define BIGNUM_INT_BITS 32
|
|
#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
|
|
/* Note: MASM interprets array subscripts in the macro arguments as
|
|
* assembler syntax, which gives the wrong answer. Don't supply them.
|
|
* <http://msdn2.microsoft.com/en-us/library/bf1dw62z.aspx> */
|
|
#define DIVMOD_WORD(q, r, hi, lo, w) do { \
|
|
__asm mov edx, hi \
|
|
__asm mov eax, lo \
|
|
__asm div w \
|
|
__asm mov r, edx \
|
|
__asm mov q, eax \
|
|
} while(0)
|
|
#elif defined _LP64
|
|
/* 64-bit architectures can do 32x32->64 chunks at a time */
|
|
typedef unsigned int BignumInt;
|
|
typedef unsigned long BignumDblInt;
|
|
#define BIGNUM_INT_MASK 0xFFFFFFFFU
|
|
#define BIGNUM_TOP_BIT 0x80000000U
|
|
#define BIGNUM_INT_BITS 32
|
|
#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
|
|
#define DIVMOD_WORD(q, r, hi, lo, w) do { \
|
|
BignumDblInt n = (((BignumDblInt)hi) << BIGNUM_INT_BITS) | lo; \
|
|
q = n / w; \
|
|
r = n % w; \
|
|
} while (0)
|
|
#elif defined _LLP64
|
|
/* 64-bit architectures in which unsigned long is 32 bits, not 64 */
|
|
typedef unsigned long BignumInt;
|
|
typedef unsigned long long BignumDblInt;
|
|
#define BIGNUM_INT_MASK 0xFFFFFFFFUL
|
|
#define BIGNUM_TOP_BIT 0x80000000UL
|
|
#define BIGNUM_INT_BITS 32
|
|
#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
|
|
#define DIVMOD_WORD(q, r, hi, lo, w) do { \
|
|
BignumDblInt n = (((BignumDblInt)hi) << BIGNUM_INT_BITS) | lo; \
|
|
q = n / w; \
|
|
r = n % w; \
|
|
} while (0)
|
|
#else
|
|
/* Fallback for all other cases */
|
|
typedef unsigned short BignumInt;
|
|
typedef unsigned long BignumDblInt;
|
|
#define BIGNUM_INT_MASK 0xFFFFU
|
|
#define BIGNUM_TOP_BIT 0x8000U
|
|
#define BIGNUM_INT_BITS 16
|
|
#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
|
|
#define DIVMOD_WORD(q, r, hi, lo, w) do { \
|
|
BignumDblInt n = (((BignumDblInt)hi) << BIGNUM_INT_BITS) | lo; \
|
|
q = n / w; \
|
|
r = n % w; \
|
|
} while (0)
|
|
#endif
|
|
|
|
#define BIGNUM_INT_BYTES (BIGNUM_INT_BITS / 8)
|