From 445395c9d31dd49424cd5a01f8aa3f75a69cfe67 Mon Sep 17 00:00:00 2001
From: Simon Tatham <anakin@pobox.com>
Date: Mon, 8 Jun 2015 19:24:58 +0100
Subject: [PATCH] Use 64-bit BignumInt wherever __uint128_t is available.

gcc and clang both provide a type called __uint128_t when compiling
for 64-bit targets, code-generated more or less similarly to the way
64-bit long longs are handled on 32-bit targets (spanning two
registers, using ADD/ADC, that sort of thing). Where this is available
(and they also provide a handy macro to make it easy to detect), we
should obviously use it, so that we can handle bignums a larger chunk
at a time and make use of the full width of the hardware's multiplier.
Preliminary benchmarking using 'testbn' suggests a factor of about 2.5
improvement.

I've added the new possibility to the ifdefs in sshbn.h, and also
re-run contrib/make1305.py to generate a set of variants of the
poly1305 arithmetic for the new size of BignumInt.

(cherry picked from commit f8b27925eee6a37df107a7cd2e718e997a52516e)

Conflicts:
	sshccp.c

Cherry-picker's notes: the conflict arose because the original commit
also added new 64-bit autogenerated forms of dedicated Poly1305
arithmetic, which doesn't exist on this branch.
---
 sshbn.h | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/sshbn.h b/sshbn.h
index 3d15b948..a043241e 100644
--- a/sshbn.h
+++ b/sshbn.h
@@ -20,7 +20,24 @@
  *    The C variant won't give the right answer, either.
  */
 
-#if defined __GNUC__ && defined __i386__
+#if defined __SIZEOF_INT128__
+/* gcc and clang both provide a __uint128_t type on 64-bit targets
+ * (and, when they do, indicate its presence by the above macro),
+ * using the same 'two machine registers' kind of code generation that
+ * 32-bit targets use for 64-bit ints. If we have one of these, we can
+ * use a 64-bit BignumInt and a 128-bit BignumDblInt. */
+typedef __uint64_t BignumInt;
+typedef __uint128_t BignumDblInt;
+#define BIGNUM_INT_MASK  0xFFFFFFFFFFFFFFFFULL
+#define BIGNUM_TOP_BIT   0x8000000000000000ULL
+#define BIGNUM_INT_BITS  64
+#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
+#define DIVMOD_WORD(q, r, hi, lo, w) do { \
+    BignumDblInt n = (((BignumDblInt)hi) << BIGNUM_INT_BITS) | lo; \
+    q = n / w; \
+    r = n % w; \
+} while (0)
+#elif defined __GNUC__ && defined __i386__
 typedef unsigned long BignumInt;
 typedef unsigned long long BignumDblInt;
 #define BIGNUM_INT_MASK  0xFFFFFFFFUL