Relegate BignumDblInt to an implementation detail of sshbn.h.

As I mentioned in the previous commit, I'm going to want PuTTY to be able to run sensibly when compiled with 64-bit Visual Studio, including handling bignums in 64-bit chunks for speed. Unfortunately, 64-bit VS does not provide any type we can use as BignumDblInt in that situation (unlike 64-bit gcc and clang, which give us __uint128_t). The only facilities it provides are compiler intrinsics to access an add-with-carry operation and a 64x64->128 multiplication (the latter delivering its product in two separate 64-bit output chunks). Hence, here's a substantial rework of the bignum code to make it implement everything in terms of _those_ primitives, rather than depending throughout on having BignumDblInt available to use ad-hoc. BignumDblInt does still exist, for the moment, but now it's an internal implementation detail of sshbn.h, only declared inside a new set of macros implementing arithmetic primitives, and not accessible to any code outside sshbn.h (which confirms that I really did catch all uses of it and remove them). The resulting code is surprisingly nice-looking, actually. You'd expect more hassle and roundabout circumlocutions when you drop down to using a more basic set of primitive operations, but actually, in many cases it's turned out shorter to write things in terms of the new BignumADC and BignumMUL macros - because almost all my uses of BignumDblInt were implementing those operations anyway, taking several lines at a time, and now they can do each thing in just one line. The biggest headache was Poly1305: I wasn't able to find any sensible way to adapt the existing Python script that generates the various per-int-size implementations of arithmetic mod 2^130-5, and so I had to rewrite it from scratch instead, with nothing in common with the old version beyond a handful of comments. But even that seems to have worked out nicely: the new version has much more legible descriptions of the high-level algorithms, by virtue of having a 'Multiprecision' type which wraps up the division into words, and yet Multiprecision's range analysis allows it to automatically drop out special cases such as multiplication by 5 being much easier than multiplication by another multi-word integer.
2025-07-07 06:22:47 -05:00 · 2015-12-16 14:12:26 +00:00
parent 482b4ab872
commit c2ec13c7e9
4 changed files with 1159 additions and 1209 deletions
--- a/sshbn.h
+++ b/sshbn.h
@ -3,58 +3,171 @@
 * multiply macros used throughout the bignum code to treat numbers as
 * arrays of the most conveniently sized word for the target machine.
 * Exported so that other code (e.g. poly1305) can use it too.
+ *
+ * This file must export, in whatever ifdef branch it ends up in:
+ *
+ *  - two types: 'BignumInt' and 'BignumCarry'. BignumInt is an
+ *    unsigned integer type which will be used as the base word size
+ *    for all bignum operations. BignumCarry is an unsigned integer
+ *    type used to hold the carry flag taken as input and output by
+ *    the BignumADC macro (see below).
+ *
+ *  - four constant macros: BIGNUM_INT_BITS, BIGNUM_INT_BYTES,
+ *    BIGNUM_TOP_BIT, BIGNUM_INT_MASK. These should be more or less
+ *    self-explanatory, but just in case, they give the number of bits
+ *    in BignumInt, the number of bytes that works out to, the
+ *    BignumInt value consisting of only the top bit, and the
+ *    BignumInt value with all bits set.
+ *
+ *  - four statement macros: BignumADC, BignumMUL, BignumMULADD,
+ *    BignumMULADD2. These do various kinds of multi-word arithmetic,
+ *    and all produce two output values.
+ *     * BignumADC(ret,retc,a,b,c) takes input BignumInt values a,b
+ *       and a BignumCarry c, and outputs a BignumInt ret = a+b+c and
+ *       a BignumCarry retc which is the carry off the top of that
+ *       addition.
+ *     * BignumMUL(rh,rl,a,b) returns the two halves of the
+ *       double-width product a*b.
+ *     * BignumMULADD(rh,rl,a,b,addend) returns the two halves of the
+ *       double-width value a*b + addend.
+ *     * BignumMULADD2(rh,rl,a,b,addend1,addend2) returns the two
+ *       halves of the double-width value a*b + addend1 + addend2.
+ *
+ * Every branch of the main ifdef below defines the type BignumInt and
+ * the value BIGNUM_INT_BITS. The other three constant macros are
+ * filled in by common code further down.
+ *
+ * Most branches also define a macro DEFINE_BIGNUMDBLINT containing a
+ * typedef statement which declares a type _twice_ the length of a
+ * BignumInt. This causes the common code further down to produce a
+ * default implementation of the four statement macros in terms of
+ * that double-width type, and also to defined BignumCarry to be
+ * BignumInt.
+ *
+ * However, if a particular compile target does not have a type twice
+ * the length of the BignumInt you want to use but it does provide
+ * some alternative means of doing add-with-carry and double-word
+ * multiply, then the ifdef branch in question can just define
+ * BignumCarry and the four statement macros itself, and that's fine
+ * too.
 */

 #if defined __SIZEOF_INT128__
-/* gcc and clang both provide a __uint128_t type on 64-bit targets
- * (and, when they do, indicate its presence by the above macro),
- * using the same 'two machine registers' kind of code generation that
- * 32-bit targets use for 64-bit ints. If we have one of these, we can
- * use a 64-bit BignumInt and a 128-bit BignumDblInt. */
-typedef unsigned long long BignumInt;
-typedef __uint128_t BignumDblInt;
-#define BIGNUM_INT_MASK  0xFFFFFFFFFFFFFFFFULL
-#define BIGNUM_TOP_BIT   0x8000000000000000ULL
-#define BIGNUM_INT_BITS  64
-#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
-#elif defined __GNUC__ && defined __i386__
-typedef unsigned long BignumInt;
-typedef unsigned long long BignumDblInt;
-#define BIGNUM_INT_MASK  0xFFFFFFFFUL
-#define BIGNUM_TOP_BIT   0x80000000UL
-#define BIGNUM_INT_BITS  32
-#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
+
+  /*
+   * 64-bit BignumInt using gcc/clang style 128-bit BignumDblInt.
+   *
+   * gcc and clang both provide a __uint128_t type on 64-bit targets
+   * (and, when they do, indicate its presence by the above macro),
+   * using the same 'two machine registers' kind of code generation
+   * that 32-bit targets use for 64-bit ints.
+   */
+
+  typedef unsigned long long BignumInt;
+  #define BIGNUM_INT_BITS 64
+  #define DEFINE_BIGNUMDBLINT typedef __uint128_t BignumDblInt
+
+#elif defined __GNUC__ || defined _LLP64 || __STDC__ >= 199901L
+
+  /* 32-bit BignumInt, using C99 unsigned long long as BignumDblInt */
+
+  typedef unsigned int BignumInt;
+  #define BIGNUM_INT_BITS 32
+  #define DEFINE_BIGNUMDBLINT typedef unsigned long long BignumDblInt
+
 #elif defined _MSC_VER && defined _M_IX86
-typedef unsigned __int32 BignumInt;
-typedef unsigned __int64 BignumDblInt;
-#define BIGNUM_INT_MASK  0xFFFFFFFFUL
-#define BIGNUM_TOP_BIT   0x80000000UL
-#define BIGNUM_INT_BITS  32
-#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
+
+  /* 32-bit BignumInt, using Visual Studio __int64 as BignumDblInt */
+
+  typedef unsigned int BignumInt;
+  #define BIGNUM_INT_BITS  32
+  #define DEFINE_BIGNUMDBLINT typedef unsigned __int64 BignumDblInt
+
 #elif defined _LP64
-/* 64-bit architectures can do 32x32->64 chunks at a time */
-typedef unsigned int BignumInt;
-typedef unsigned long BignumDblInt;
-#define BIGNUM_INT_MASK  0xFFFFFFFFU
-#define BIGNUM_TOP_BIT   0x80000000U
-#define BIGNUM_INT_BITS  32
-#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
-#elif defined _LLP64
-/* 64-bit architectures in which unsigned long is 32 bits, not 64 */
-typedef unsigned long BignumInt;
-typedef unsigned long long BignumDblInt;
-#define BIGNUM_INT_MASK  0xFFFFFFFFUL
-#define BIGNUM_TOP_BIT   0x80000000UL
-#define BIGNUM_INT_BITS  32
-#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
+
+  /*
+   * 32-bit BignumInt, using unsigned long itself as BignumDblInt.
+   *
+   * Only for platforms where long is 64 bits, of course.
+   */
+
+  typedef unsigned int BignumInt;
+  #define BIGNUM_INT_BITS  32
+  #define DEFINE_BIGNUMDBLINT typedef unsigned long BignumDblInt
+
 #else
-/* Fallback for all other cases */
-typedef unsigned short BignumInt;
-typedef unsigned long BignumDblInt;
-#define BIGNUM_INT_MASK  0xFFFFU
-#define BIGNUM_TOP_BIT   0x8000U
-#define BIGNUM_INT_BITS  16
-#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
+
+  /*
+   * 16-bit BignumInt, using unsigned long as BignumDblInt.
+   *
+   * This is the final fallback for real emergencies: C89 guarantees
+   * unsigned short/long to be at least the required sizes, so this
+   * should work on any C implementation at all. But it'll be
+   * noticeably slow, so if you find yourself in this case you
+   * probably want to move heaven and earth to find an alternative!
+   */
+
+  typedef unsigned short BignumInt;
+  #define BIGNUM_INT_BITS  16
+  #define DEFINE_BIGNUMDBLINT typedef unsigned long BignumDblInt
+
 #endif

+/*
+ * Common code across all branches of that ifdef: define the three
+ * easy constant macros in terms of BIGNUM_INT_BITS.
+ */
 #define BIGNUM_INT_BYTES (BIGNUM_INT_BITS / 8)
+#define BIGNUM_TOP_BIT (((BignumInt)1) << (BIGNUM_INT_BITS-1))
+#define BIGNUM_INT_MASK (BIGNUM_TOP_BIT | (BIGNUM_TOP_BIT-1))
+
+/*
+ * Common code across _most_ branches of the ifdef: define a set of
+ * statement macros in terms of the BignumDblInt type provided. In
+ * this case, we also define BignumCarry to be the same thing as
+ * BignumInt, for simplicity.
+ */
+#ifdef DEFINE_BIGNUMDBLINT
+
+  typedef BignumInt BignumCarry;
+  #define BignumADC(ret, retc, a, b, c) do                        \
+      {                                                           \
+          DEFINE_BIGNUMDBLINT;                                    \
+          BignumDblInt ADC_temp = (BignumInt)(a);                 \
+          ADC_temp += (BignumInt)(b);                             \
+          ADC_temp += (c);                                        \
+          (ret) = (BignumInt)ADC_temp;                            \
+          (retc) = (BignumCarry)(ADC_temp >> BIGNUM_INT_BITS);    \
+      } while (0)
+  
+  #define BignumMUL(rh, rl, a, b) do                              \
+      {                                                           \
+          DEFINE_BIGNUMDBLINT;                                    \
+          BignumDblInt MUL_temp = (BignumInt)(a);                 \
+          MUL_temp *= (BignumInt)(b);                             \
+          (rh) = (BignumInt)(MUL_temp >> BIGNUM_INT_BITS);        \
+          (rl) = (BignumInt)(MUL_temp);                           \
+      } while (0)
+  
+  #define BignumMULADD(rh, rl, a, b, addend) do                   \
+      {                                                           \
+          DEFINE_BIGNUMDBLINT;                                    \
+          BignumDblInt MUL_temp = (BignumInt)(a);                 \
+          MUL_temp *= (BignumInt)(b);                             \
+          MUL_temp += (BignumInt)(addend);                        \
+          (rh) = (BignumInt)(MUL_temp >> BIGNUM_INT_BITS);        \
+          (rl) = (BignumInt)(MUL_temp);                           \
+      } while (0)
+  
+  #define BignumMULADD2(rh, rl, a, b, addend1, addend2) do        \
+      {                                                           \
+          DEFINE_BIGNUMDBLINT;                                    \
+          BignumDblInt MUL_temp = (BignumInt)(a);                 \
+          MUL_temp *= (BignumInt)(b);                             \
+          MUL_temp += (BignumInt)(addend1);                       \
+          MUL_temp += (BignumInt)(addend2);                       \
+          (rh) = (BignumInt)(MUL_temp >> BIGNUM_INT_BITS);        \
+          (rl) = (BignumInt)(MUL_temp);                           \
+      } while (0)
+
+#endif /* DEFINE_BIGNUMDBLINT */